aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h9
-rw-r--r--net/6lowpan/core.c8
-rw-r--r--net/6lowpan/debugfs.c22
-rw-r--r--net/6lowpan/iphc.c111
-rw-r--r--net/6lowpan/nhc_udp.c2
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/batman-adv/bat_iv_ogm.c171
-rw-r--r--net/batman-adv/bat_v.c21
-rw-r--r--net/batman-adv/bat_v_ogm.c219
-rw-r--r--net/batman-adv/bitarray.c16
-rw-r--r--net/batman-adv/bitarray.h15
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c333
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h43
-rw-r--r--net/batman-adv/debugfs.c21
-rw-r--r--net/batman-adv/distributed-arp-table.c31
-rw-r--r--net/batman-adv/fragmentation.c12
-rw-r--r--net/batman-adv/gateway_client.c12
-rw-r--r--net/batman-adv/hard-interface.c40
-rw-r--r--net/batman-adv/hard-interface.h3
-rw-r--r--net/batman-adv/hash.h6
-rw-r--r--net/batman-adv/icmp_socket.c24
-rw-r--r--net/batman-adv/main.c26
-rw-r--r--net/batman-adv/main.h11
-rw-r--r--net/batman-adv/multicast.c11
-rw-r--r--net/batman-adv/network-coding.c45
-rw-r--r--net/batman-adv/originator.c60
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/packet.h3
-rw-r--r--net/batman-adv/routing.c57
-rw-r--r--net/batman-adv/routing.h6
-rw-r--r--net/batman-adv/send.c16
-rw-r--r--net/batman-adv/soft-interface.c60
-rw-r--r--net/batman-adv/soft-interface.h10
-rw-r--r--net/batman-adv/sysfs.c9
-rw-r--r--net/batman-adv/translation-table.c88
-rw-r--r--net/batman-adv/types.h15
-rw-r--r--net/bluetooth/6lowpan.c93
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_mdb.c124
-rw-r--r--net/bridge/br_multicast.c20
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netfilter_ipv6.c10
-rw-r--r--net/bridge/br_netlink.c78
-rw-r--r--net/bridge/br_private.h22
-rw-r--r--net/bridge/br_sysfs_br.c17
-rw-r--r--net/bridge/br_vlan.c109
-rw-r--r--net/ceph/auth.c8
-rw-r--r--net/ceph/auth_none.c71
-rw-r--r--net/ceph/auth_none.h3
-rw-r--r--net/ceph/auth_x.c21
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/osd_client.c6
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c51
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/gen_stats.c35
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-procfs.c3
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/rtnetlink.c96
-rw-r--r--net/core/skbuff.c247
-rw-r--r--net/core/sock.c53
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dccp/input.c2
-rw-r--r--net/dccp/ipv4.c24
-rw-r--r--net/dccp/ipv6.c24
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c8
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/slave.c100
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h14
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ieee802154/6lowpan/tx.c14
-rw-r--r--net/ieee802154/nl802154.c10
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fou.c6
-rw-r--r--net/ipv4/gre_demux.c61
-rw-r--r--net/ipv4/icmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/ip_forward.c6
-rw-r--r--net/ipv4/ip_fragment.c14
-rw-r--r--net/ipv4/ip_gre.c255
-rw-r--r--net/ipv4/ip_input.c42
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c18
-rw-r--r--net/ipv4/netfilter/arp_tables.c223
-rw-r--r--net/ipv4/netfilter/ip_tables.c250
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c47
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp.c47
-rw-r--r--net/ipv4/tcp_bic.c6
-rw-r--r--net/ipv4/tcp_cdg.c34
-rw-r--r--net/ipv4/tcp_cubic.c26
-rw-r--r--net/ipv4/tcp_fastopen.c14
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_illinois.c21
-rw-r--r--net/ipv4/tcp_input.c163
-rw-r--r--net/ipv4/tcp_ipv4.c52
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_minisocks.c14
-rw-r--r--net/ipv4/tcp_output.c51
-rw-r--r--net/ipv4/tcp_recovery.c4
-rw-r--r--net/ipv4/tcp_timer.c24
-rw-r--r--net/ipv4/tcp_vegas.c6
-rw-r--r--net/ipv4/tcp_vegas.h2
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_westwood.c7
-rw-r--r--net/ipv4/tcp_yeah.c7
-rw-r--r--net/ipv4/udp.c72
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c48
-rw-r--r--net/ipv6/datagram.c13
-rw-r--r--net/ipv6/exthdrs.c66
-rw-r--r--net/ipv6/icmp.c45
-rw-r--r--net/ipv6/ila/ila.h79
-rw-r--r--net/ipv6/ila/ila_common.c81
-rw-r--r--net/ipv6/ila/ila_lwt.c54
-rw-r--r--net/ipv6/ila/ila_xlat.c161
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c438
-rw-r--r--net/ipv6/ip6_input.c39
-rw-r--r--net/ipv6/ip6_output.c76
-rw-r--r--net/ipv6/ip6_tunnel.c268
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c235
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/ping.c12
-rw-r--r--net/ipv6/raw.c33
-rw-r--r--net/ipv6/reassembly.c32
-rw-r--r--net/ipv6/route.c42
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/tcp_ipv6.c41
-rw-r--r--net/ipv6/udp.c129
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip6.c33
-rw-r--r--net/l3mdev/l3mdev.c63
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c162
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c414
-rw-r--r--net/netfilter/nf_conntrack_expect.c83
-rw-r--r--net/netfilter/nf_conntrack_helper.c12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c29
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c13
-rw-r--r--net/netfilter/nf_nat_core.c39
-rw-r--r--net/netfilter/nf_tables_api.c78
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c6
-rw-r--r--net/netfilter/nft_ct.c30
-rw-r--r--net/netfilter/nft_rbtree.c49
-rw-r--r--net/nfc/nci/core.c117
-rw-r--r--net/nfc/nci/ntf.c2
-rw-r--r--net/nfc/nci/rsp.c23
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c27
-rw-r--r--net/qrtr/Kconfig24
-rw-r--r--net/qrtr/Makefile2
-rw-r--r--net/qrtr/qrtr.c1007
-rw-r--r--net/qrtr/qrtr.h31
-rw-r--r--net/qrtr/smd.c117
-rw-r--r--net/rds/tcp.c3
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c8
-rw-r--r--net/rds/tcp_listen.c54
-rw-r--r--net/rds/tcp_recv.c14
-rw-r--r--net/rxrpc/ar-input.c4
-rw-r--r--net/sched/act_api.c7
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_u32.c7
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_fq_codel.c89
-rw-r--r--net/sched/sch_generic.c44
-rw-r--r--net/sched/sch_netem.c61
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/input.c16
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sctp_diag.c11
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/ulpqueue.c25
-rw-r--r--net/socket.c3
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c6
-rw-r--r--net/tipc/core.c8
-rw-r--r--net/tipc/msg.h14
-rw-r--r--net/tipc/node.c27
-rw-r--r--net/tipc/node.h6
-rw-r--r--net/tipc/socket.c144
-rw-r--r--net/tipc/socket.h17
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/xfrm/xfrm_output.c3
228 files changed, 6007 insertions, 3570 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index d16bb4b14aa1..97ecc27aeca6 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -3,6 +3,15 @@
3 3
4#include <linux/netdevice.h> 4#include <linux/netdevice.h>
5 5
6#include <net/6lowpan.h>
7
8/* caller need to be sure it's dev->type is ARPHRD_6LOWPAN */
9static inline bool lowpan_is_ll(const struct net_device *dev,
10 enum lowpan_lltypes lltype)
11{
12 return lowpan_dev(dev)->lltype == lltype;
13}
14
6#ifdef CONFIG_6LOWPAN_DEBUGFS 15#ifdef CONFIG_6LOWPAN_DEBUGFS
7int lowpan_dev_debugfs_init(struct net_device *dev); 16int lowpan_dev_debugfs_init(struct net_device *dev);
8void lowpan_dev_debugfs_exit(struct net_device *dev); 17void lowpan_dev_debugfs_exit(struct net_device *dev);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 34e44c0c0836..7a240b3eaed1 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -27,11 +27,11 @@ int lowpan_register_netdevice(struct net_device *dev,
27 dev->mtu = IPV6_MIN_MTU; 27 dev->mtu = IPV6_MIN_MTU;
28 dev->priv_flags |= IFF_NO_QUEUE; 28 dev->priv_flags |= IFF_NO_QUEUE;
29 29
30 lowpan_priv(dev)->lltype = lltype; 30 lowpan_dev(dev)->lltype = lltype;
31 31
32 spin_lock_init(&lowpan_priv(dev)->ctx.lock); 32 spin_lock_init(&lowpan_dev(dev)->ctx.lock);
33 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) 33 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
34 lowpan_priv(dev)->ctx.table[i].id = i; 34 lowpan_dev(dev)->ctx.table[i].id = i;
35 35
36 ret = register_netdevice(dev); 36 ret = register_netdevice(dev);
37 if (ret < 0) 37 if (ret < 0)
@@ -85,7 +85,7 @@ static int lowpan_event(struct notifier_block *unused,
85 case NETDEV_DOWN: 85 case NETDEV_DOWN:
86 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) 86 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
87 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, 87 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
88 &lowpan_priv(dev)->ctx.table[i].flags); 88 &lowpan_dev(dev)->ctx.table[i].flags);
89 break; 89 break;
90 default: 90 default:
91 return NOTIFY_DONE; 91 return NOTIFY_DONE;
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 0793a8157472..acbaa3db493b 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -172,7 +172,7 @@ static const struct file_operations lowpan_ctx_pfx_fops = {
172static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, 172static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
173 struct dentry *ctx, u8 id) 173 struct dentry *ctx, u8 id)
174{ 174{
175 struct lowpan_priv *lpriv = lowpan_priv(dev); 175 struct lowpan_dev *ldev = lowpan_dev(dev);
176 struct dentry *dentry, *root; 176 struct dentry *dentry, *root;
177 char buf[32]; 177 char buf[32];
178 178
@@ -185,25 +185,25 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
185 return -EINVAL; 185 return -EINVAL;
186 186
187 dentry = debugfs_create_file("active", 0644, root, 187 dentry = debugfs_create_file("active", 0644, root,
188 &lpriv->ctx.table[id], 188 &ldev->ctx.table[id],
189 &lowpan_ctx_flag_active_fops); 189 &lowpan_ctx_flag_active_fops);
190 if (!dentry) 190 if (!dentry)
191 return -EINVAL; 191 return -EINVAL;
192 192
193 dentry = debugfs_create_file("compression", 0644, root, 193 dentry = debugfs_create_file("compression", 0644, root,
194 &lpriv->ctx.table[id], 194 &ldev->ctx.table[id],
195 &lowpan_ctx_flag_c_fops); 195 &lowpan_ctx_flag_c_fops);
196 if (!dentry) 196 if (!dentry)
197 return -EINVAL; 197 return -EINVAL;
198 198
199 dentry = debugfs_create_file("prefix", 0644, root, 199 dentry = debugfs_create_file("prefix", 0644, root,
200 &lpriv->ctx.table[id], 200 &ldev->ctx.table[id],
201 &lowpan_ctx_pfx_fops); 201 &lowpan_ctx_pfx_fops);
202 if (!dentry) 202 if (!dentry)
203 return -EINVAL; 203 return -EINVAL;
204 204
205 dentry = debugfs_create_file("prefix_len", 0644, root, 205 dentry = debugfs_create_file("prefix_len", 0644, root,
206 &lpriv->ctx.table[id], 206 &ldev->ctx.table[id],
207 &lowpan_ctx_plen_fops); 207 &lowpan_ctx_plen_fops);
208 if (!dentry) 208 if (!dentry)
209 return -EINVAL; 209 return -EINVAL;
@@ -247,21 +247,21 @@ static const struct file_operations lowpan_context_fops = {
247 247
248int lowpan_dev_debugfs_init(struct net_device *dev) 248int lowpan_dev_debugfs_init(struct net_device *dev)
249{ 249{
250 struct lowpan_priv *lpriv = lowpan_priv(dev); 250 struct lowpan_dev *ldev = lowpan_dev(dev);
251 struct dentry *contexts, *dentry; 251 struct dentry *contexts, *dentry;
252 int ret, i; 252 int ret, i;
253 253
254 /* creating the root */ 254 /* creating the root */
255 lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); 255 ldev->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
256 if (!lpriv->iface_debugfs) 256 if (!ldev->iface_debugfs)
257 goto fail; 257 goto fail;
258 258
259 contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs); 259 contexts = debugfs_create_dir("contexts", ldev->iface_debugfs);
260 if (!contexts) 260 if (!contexts)
261 goto remove_root; 261 goto remove_root;
262 262
263 dentry = debugfs_create_file("show", 0644, contexts, 263 dentry = debugfs_create_file("show", 0644, contexts,
264 &lowpan_priv(dev)->ctx, 264 &lowpan_dev(dev)->ctx,
265 &lowpan_context_fops); 265 &lowpan_context_fops);
266 if (!dentry) 266 if (!dentry)
267 goto remove_root; 267 goto remove_root;
@@ -282,7 +282,7 @@ fail:
282 282
283void lowpan_dev_debugfs_exit(struct net_device *dev) 283void lowpan_dev_debugfs_exit(struct net_device *dev)
284{ 284{
285 debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); 285 debugfs_remove_recursive(lowpan_dev(dev)->iface_debugfs);
286} 286}
287 287
288int __init lowpan_debugfs_init(void) 288int __init lowpan_debugfs_init(void)
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 68c80f3c9add..8501dd532fe1 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -53,9 +53,6 @@
53#include <net/6lowpan.h> 53#include <net/6lowpan.h>
54#include <net/ipv6.h> 54#include <net/ipv6.h>
55 55
56/* special link-layer handling */
57#include <net/mac802154.h>
58
59#include "6lowpan_i.h" 56#include "6lowpan_i.h"
60#include "nhc.h" 57#include "nhc.h"
61 58
@@ -156,32 +153,17 @@
156#define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f) 153#define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f)
157#define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4) 154#define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4)
158 155
159static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, 156static inline void
160 const void *lladdr) 157lowpan_iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
161{ 158 const void *lladdr)
162 /* fe:80::XXXX:XXXX:XXXX:XXXX
163 * \_________________/
164 * hwaddr
165 */
166 ipaddr->s6_addr[0] = 0xFE;
167 ipaddr->s6_addr[1] = 0x80;
168 memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN);
169 /* second bit-flip (Universe/Local)
170 * is done according RFC2464
171 */
172 ipaddr->s6_addr[8] ^= 0x02;
173}
174
175static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
176 const void *lladdr)
177{ 159{
178 const struct ieee802154_addr *addr = lladdr; 160 const struct ieee802154_addr *addr = lladdr;
179 u8 eui64[EUI64_ADDR_LEN] = { }; 161 u8 eui64[EUI64_ADDR_LEN];
180 162
181 switch (addr->mode) { 163 switch (addr->mode) {
182 case IEEE802154_ADDR_LONG: 164 case IEEE802154_ADDR_LONG:
183 ieee802154_le64_to_be64(eui64, &addr->extended_addr); 165 ieee802154_le64_to_be64(eui64, &addr->extended_addr);
184 iphc_uncompress_eui64_lladdr(ipaddr, eui64); 166 lowpan_iphc_uncompress_eui64_lladdr(ipaddr, eui64);
185 break; 167 break;
186 case IEEE802154_ADDR_SHORT: 168 case IEEE802154_ADDR_SHORT:
187 /* fe:80::ff:fe00:XXXX 169 /* fe:80::ff:fe00:XXXX
@@ -207,7 +189,7 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
207static struct lowpan_iphc_ctx * 189static struct lowpan_iphc_ctx *
208lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id) 190lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id)
209{ 191{
210 struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id]; 192 struct lowpan_iphc_ctx *ret = &lowpan_dev(dev)->ctx.table[id];
211 193
212 if (!lowpan_iphc_ctx_is_active(ret)) 194 if (!lowpan_iphc_ctx_is_active(ret))
213 return NULL; 195 return NULL;
@@ -219,7 +201,7 @@ static struct lowpan_iphc_ctx *
219lowpan_iphc_ctx_get_by_addr(const struct net_device *dev, 201lowpan_iphc_ctx_get_by_addr(const struct net_device *dev,
220 const struct in6_addr *addr) 202 const struct in6_addr *addr)
221{ 203{
222 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; 204 struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table;
223 struct lowpan_iphc_ctx *ret = NULL; 205 struct lowpan_iphc_ctx *ret = NULL;
224 struct in6_addr addr_pfx; 206 struct in6_addr addr_pfx;
225 u8 addr_plen; 207 u8 addr_plen;
@@ -263,7 +245,7 @@ static struct lowpan_iphc_ctx *
263lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, 245lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
264 const struct in6_addr *addr) 246 const struct in6_addr *addr)
265{ 247{
266 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table; 248 struct lowpan_iphc_ctx *table = lowpan_dev(dev)->ctx.table;
267 struct lowpan_iphc_ctx *ret = NULL; 249 struct lowpan_iphc_ctx *ret = NULL;
268 struct in6_addr addr_mcast, network_pfx = {}; 250 struct in6_addr addr_mcast, network_pfx = {};
269 int i; 251 int i;
@@ -301,9 +283,10 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
301 * 283 *
302 * address_mode is the masked value for sam or dam value 284 * address_mode is the masked value for sam or dam value
303 */ 285 */
304static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev, 286static int lowpan_iphc_uncompress_addr(struct sk_buff *skb,
305 struct in6_addr *ipaddr, u8 address_mode, 287 const struct net_device *dev,
306 const void *lladdr) 288 struct in6_addr *ipaddr,
289 u8 address_mode, const void *lladdr)
307{ 290{
308 bool fail; 291 bool fail;
309 292
@@ -332,12 +315,12 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
332 case LOWPAN_IPHC_SAM_11: 315 case LOWPAN_IPHC_SAM_11:
333 case LOWPAN_IPHC_DAM_11: 316 case LOWPAN_IPHC_DAM_11:
334 fail = false; 317 fail = false;
335 switch (lowpan_priv(dev)->lltype) { 318 switch (lowpan_dev(dev)->lltype) {
336 case LOWPAN_LLTYPE_IEEE802154: 319 case LOWPAN_LLTYPE_IEEE802154:
337 iphc_uncompress_802154_lladdr(ipaddr, lladdr); 320 lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
338 break; 321 break;
339 default: 322 default:
340 iphc_uncompress_eui64_lladdr(ipaddr, lladdr); 323 lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
341 break; 324 break;
342 } 325 }
343 break; 326 break;
@@ -360,11 +343,11 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
360/* Uncompress address function for source context 343/* Uncompress address function for source context
361 * based address(non-multicast). 344 * based address(non-multicast).
362 */ 345 */
363static int uncompress_ctx_addr(struct sk_buff *skb, 346static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb,
364 const struct net_device *dev, 347 const struct net_device *dev,
365 const struct lowpan_iphc_ctx *ctx, 348 const struct lowpan_iphc_ctx *ctx,
366 struct in6_addr *ipaddr, u8 address_mode, 349 struct in6_addr *ipaddr,
367 const void *lladdr) 350 u8 address_mode, const void *lladdr)
368{ 351{
369 bool fail; 352 bool fail;
370 353
@@ -393,12 +376,12 @@ static int uncompress_ctx_addr(struct sk_buff *skb,
393 case LOWPAN_IPHC_SAM_11: 376 case LOWPAN_IPHC_SAM_11:
394 case LOWPAN_IPHC_DAM_11: 377 case LOWPAN_IPHC_DAM_11:
395 fail = false; 378 fail = false;
396 switch (lowpan_priv(dev)->lltype) { 379 switch (lowpan_dev(dev)->lltype) {
397 case LOWPAN_LLTYPE_IEEE802154: 380 case LOWPAN_LLTYPE_IEEE802154:
398 iphc_uncompress_802154_lladdr(ipaddr, lladdr); 381 lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr);
399 break; 382 break;
400 default: 383 default:
401 iphc_uncompress_eui64_lladdr(ipaddr, lladdr); 384 lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
402 break; 385 break;
403 } 386 }
404 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen); 387 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
@@ -657,22 +640,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
657 } 640 }
658 641
659 if (iphc1 & LOWPAN_IPHC_SAC) { 642 if (iphc1 & LOWPAN_IPHC_SAC) {
660 spin_lock_bh(&lowpan_priv(dev)->ctx.lock); 643 spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
661 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid)); 644 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid));
662 if (!ci) { 645 if (!ci) {
663 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 646 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
664 return -EINVAL; 647 return -EINVAL;
665 } 648 }
666 649
667 pr_debug("SAC bit is set. Handle context based source address.\n"); 650 pr_debug("SAC bit is set. Handle context based source address.\n");
668 err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr, 651 err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.saddr,
669 iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); 652 iphc1 & LOWPAN_IPHC_SAM_MASK,
670 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 653 saddr);
654 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
671 } else { 655 } else {
672 /* Source address uncompression */ 656 /* Source address uncompression */
673 pr_debug("source address stateless compression\n"); 657 pr_debug("source address stateless compression\n");
674 err = uncompress_addr(skb, dev, &hdr.saddr, 658 err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.saddr,
675 iphc1 & LOWPAN_IPHC_SAM_MASK, saddr); 659 iphc1 & LOWPAN_IPHC_SAM_MASK,
660 saddr);
676 } 661 }
677 662
678 /* Check on error of previous branch */ 663 /* Check on error of previous branch */
@@ -681,10 +666,10 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
681 666
682 switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) { 667 switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
683 case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC: 668 case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
684 spin_lock_bh(&lowpan_priv(dev)->ctx.lock); 669 spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
685 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); 670 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
686 if (!ci) { 671 if (!ci) {
687 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 672 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
688 return -EINVAL; 673 return -EINVAL;
689 } 674 }
690 675
@@ -693,7 +678,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
693 err = lowpan_uncompress_multicast_ctx_daddr(skb, ci, 678 err = lowpan_uncompress_multicast_ctx_daddr(skb, ci,
694 &hdr.daddr, 679 &hdr.daddr,
695 iphc1 & LOWPAN_IPHC_DAM_MASK); 680 iphc1 & LOWPAN_IPHC_DAM_MASK);
696 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 681 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
697 break; 682 break;
698 case LOWPAN_IPHC_M: 683 case LOWPAN_IPHC_M:
699 /* multicast */ 684 /* multicast */
@@ -701,22 +686,24 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
701 iphc1 & LOWPAN_IPHC_DAM_MASK); 686 iphc1 & LOWPAN_IPHC_DAM_MASK);
702 break; 687 break;
703 case LOWPAN_IPHC_DAC: 688 case LOWPAN_IPHC_DAC:
704 spin_lock_bh(&lowpan_priv(dev)->ctx.lock); 689 spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
705 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); 690 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
706 if (!ci) { 691 if (!ci) {
707 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 692 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
708 return -EINVAL; 693 return -EINVAL;
709 } 694 }
710 695
711 /* Destination address context based uncompression */ 696 /* Destination address context based uncompression */
712 pr_debug("DAC bit is set. Handle context based destination address.\n"); 697 pr_debug("DAC bit is set. Handle context based destination address.\n");
713 err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr, 698 err = lowpan_iphc_uncompress_ctx_addr(skb, dev, ci, &hdr.daddr,
714 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); 699 iphc1 & LOWPAN_IPHC_DAM_MASK,
715 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 700 daddr);
701 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
716 break; 702 break;
717 default: 703 default:
718 err = uncompress_addr(skb, dev, &hdr.daddr, 704 err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr,
719 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); 705 iphc1 & LOWPAN_IPHC_DAM_MASK,
706 daddr);
720 pr_debug("dest: stateless compression mode %d dest %pI6c\n", 707 pr_debug("dest: stateless compression mode %d dest %pI6c\n",
721 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); 708 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
722 break; 709 break;
@@ -736,7 +723,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
736 return err; 723 return err;
737 } 724 }
738 725
739 switch (lowpan_priv(dev)->lltype) { 726 switch (lowpan_dev(dev)->lltype) {
740 case LOWPAN_LLTYPE_IEEE802154: 727 case LOWPAN_LLTYPE_IEEE802154:
741 if (lowpan_802154_cb(skb)->d_size) 728 if (lowpan_802154_cb(skb)->d_size)
742 hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size - 729 hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size -
@@ -1033,7 +1020,7 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
1033 skb->data, skb->len); 1020 skb->data, skb->len);
1034 1021
1035 ipv6_daddr_type = ipv6_addr_type(&hdr->daddr); 1022 ipv6_daddr_type = ipv6_addr_type(&hdr->daddr);
1036 spin_lock_bh(&lowpan_priv(dev)->ctx.lock); 1023 spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
1037 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST) 1024 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST)
1038 dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr); 1025 dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr);
1039 else 1026 else
@@ -1042,15 +1029,15 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
1042 memcpy(&dci_entry, dci, sizeof(*dci)); 1029 memcpy(&dci_entry, dci, sizeof(*dci));
1043 cid |= dci->id; 1030 cid |= dci->id;
1044 } 1031 }
1045 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 1032 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
1046 1033
1047 spin_lock_bh(&lowpan_priv(dev)->ctx.lock); 1034 spin_lock_bh(&lowpan_dev(dev)->ctx.lock);
1048 sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr); 1035 sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr);
1049 if (sci) { 1036 if (sci) {
1050 memcpy(&sci_entry, sci, sizeof(*sci)); 1037 memcpy(&sci_entry, sci, sizeof(*sci));
1051 cid |= (sci->id << 4); 1038 cid |= (sci->id << 4);
1052 } 1039 }
1053 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock); 1040 spin_unlock_bh(&lowpan_dev(dev)->ctx.lock);
1054 1041
1055 /* if cid is zero it will be compressed */ 1042 /* if cid is zero it will be compressed */
1056 if (cid) { 1043 if (cid) {
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 69537a2eaab1..225d91906dfa 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -91,7 +91,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
91 * here, we obtain the hint from the remaining size of the 91 * here, we obtain the hint from the remaining size of the
92 * frame 92 * frame
93 */ 93 */
94 switch (lowpan_priv(skb->dev)->lltype) { 94 switch (lowpan_dev(skb->dev)->lltype) {
95 case LOWPAN_LLTYPE_IEEE802154: 95 case LOWPAN_LLTYPE_IEEE802154:
96 if (lowpan_802154_cb(skb)->d_size) 96 if (lowpan_802154_cb(skb)->d_size)
97 uh.len = htons(lowpan_802154_cb(skb)->d_size - 97 uh.len = htons(lowpan_802154_cb(skb)->d_size -
diff --git a/net/Kconfig b/net/Kconfig
index a8934d8c8fda..b841c42e5c9b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -236,6 +236,7 @@ source "net/mpls/Kconfig"
236source "net/hsr/Kconfig" 236source "net/hsr/Kconfig"
237source "net/switchdev/Kconfig" 237source "net/switchdev/Kconfig"
238source "net/l3mdev/Kconfig" 238source "net/l3mdev/Kconfig"
239source "net/qrtr/Kconfig"
239 240
240config RPS 241config RPS
241 bool 242 bool
diff --git a/net/Makefile b/net/Makefile
index 81d14119eab5..bdd14553a774 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ endif
78ifneq ($(CONFIG_NET_L3_MASTER_DEV),) 78ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
79obj-y += l3mdev/ 79obj-y += l3mdev/
80endif 80endif
81obj-$(CONFIG_QRTR) += qrtr/
diff --git a/net/atm/lec.c b/net/atm/lec.c
index cd3b37989057..e574a7e9db6f 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
194static void lec_tx_timeout(struct net_device *dev) 194static void lec_tx_timeout(struct net_device *dev)
195{ 195{
196 pr_info("%s\n", dev->name); 196 pr_info("%s\n", dev->name);
197 dev->trans_start = jiffies; 197 netif_trans_update(dev);
198 netif_wake_queue(dev); 198 netif_wake_queue(dev);
199} 199}
200 200
@@ -324,7 +324,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
324out: 324out:
325 if (entry) 325 if (entry)
326 lec_arp_put(entry); 326 lec_arp_put(entry);
327 dev->trans_start = jiffies; 327 netif_trans_update(dev);
328 return NETDEV_TX_OK; 328 return NETDEV_TX_OK;
329} 329}
330 330
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index cb2d1b9b0340..7f98a9d39883 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -32,6 +32,7 @@
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/kref.h> 34#include <linux/kref.h>
35#include <linux/lockdep.h>
35#include <linux/netdevice.h> 36#include <linux/netdevice.h>
36#include <linux/pkt_sched.h> 37#include <linux/pkt_sched.h>
37#include <linux/printk.h> 38#include <linux/printk.h>
@@ -175,71 +176,107 @@ unlock:
175} 176}
176 177
177/** 178/**
178 * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to 179 * batadv_iv_ogm_drop_bcast_own_entry - drop section of bcast_own
179 * exclude the removed interface
180 * @orig_node: the orig_node that has to be changed 180 * @orig_node: the orig_node that has to be changed
181 * @max_if_num: the current amount of interfaces 181 * @max_if_num: the current amount of interfaces
182 * @del_if_num: the index of the interface being removed 182 * @del_if_num: the index of the interface being removed
183 *
184 * Return: 0 on success, a negative error code otherwise.
185 */ 183 */
186static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, 184static void
187 int max_if_num, int del_if_num) 185batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
186 int max_if_num, int del_if_num)
188{ 187{
189 int ret = -ENOMEM; 188 size_t chunk_size;
190 size_t chunk_size, if_offset; 189 size_t if_offset;
191 void *data_ptr = NULL; 190 void *data_ptr;
192
193 spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
194 191
195 /* last interface was removed */ 192 lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
196 if (max_if_num == 0)
197 goto free_bcast_own;
198 193
199 chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS; 194 chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
200 data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC); 195 data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC);
201 if (!data_ptr) 196 if (!data_ptr)
202 goto unlock; 197 /* use old buffer when new one could not be allocated */
198 data_ptr = orig_node->bat_iv.bcast_own;
203 199
204 /* copy first part */ 200 /* copy first part */
205 memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size); 201 memmove(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
206 202
207 /* copy second part */ 203 /* copy second part */
208 if_offset = (del_if_num + 1) * chunk_size; 204 if_offset = (del_if_num + 1) * chunk_size;
209 memcpy((char *)data_ptr + del_if_num * chunk_size, 205 memmove((char *)data_ptr + del_if_num * chunk_size,
210 (uint8_t *)orig_node->bat_iv.bcast_own + if_offset, 206 (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
211 (max_if_num - del_if_num) * chunk_size); 207 (max_if_num - del_if_num) * chunk_size);
212 208
213free_bcast_own: 209 /* bcast_own was shrunk down in new buffer; free old one */
214 kfree(orig_node->bat_iv.bcast_own); 210 if (orig_node->bat_iv.bcast_own != data_ptr) {
215 orig_node->bat_iv.bcast_own = data_ptr; 211 kfree(orig_node->bat_iv.bcast_own);
212 orig_node->bat_iv.bcast_own = data_ptr;
213 }
214}
215
216/**
217 * batadv_iv_ogm_drop_bcast_own_sum_entry - drop section of bcast_own_sum
218 * @orig_node: the orig_node that has to be changed
219 * @max_if_num: the current amount of interfaces
220 * @del_if_num: the index of the interface being removed
221 */
222static void
223batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
224 int max_if_num, int del_if_num)
225{
226 size_t if_offset;
227 void *data_ptr;
216 228
217 if (max_if_num == 0) 229 lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
218 goto free_own_sum;
219 230
220 data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); 231 data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
221 if (!data_ptr) { 232 if (!data_ptr)
222 kfree(orig_node->bat_iv.bcast_own); 233 /* use old buffer when new one could not be allocated */
223 goto unlock; 234 data_ptr = orig_node->bat_iv.bcast_own_sum;
224 }
225 235
226 memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, 236 memmove(data_ptr, orig_node->bat_iv.bcast_own_sum,
227 del_if_num * sizeof(u8)); 237 del_if_num * sizeof(u8));
228 238
229 if_offset = (del_if_num + 1) * sizeof(u8); 239 if_offset = (del_if_num + 1) * sizeof(u8);
230 memcpy((char *)data_ptr + del_if_num * sizeof(u8), 240 memmove((char *)data_ptr + del_if_num * sizeof(u8),
231 orig_node->bat_iv.bcast_own_sum + if_offset, 241 orig_node->bat_iv.bcast_own_sum + if_offset,
232 (max_if_num - del_if_num) * sizeof(u8)); 242 (max_if_num - del_if_num) * sizeof(u8));
243
244 /* bcast_own_sum was shrunk down in new buffer; free old one */
245 if (orig_node->bat_iv.bcast_own_sum != data_ptr) {
246 kfree(orig_node->bat_iv.bcast_own_sum);
247 orig_node->bat_iv.bcast_own_sum = data_ptr;
248 }
249}
233 250
234free_own_sum: 251/**
235 kfree(orig_node->bat_iv.bcast_own_sum); 252 * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
236 orig_node->bat_iv.bcast_own_sum = data_ptr; 253 * exclude the removed interface
254 * @orig_node: the orig_node that has to be changed
255 * @max_if_num: the current amount of interfaces
256 * @del_if_num: the index of the interface being removed
257 *
258 * Return: 0 on success, a negative error code otherwise.
259 */
260static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
261 int max_if_num, int del_if_num)
262{
263 spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
264
265 if (max_if_num == 0) {
266 kfree(orig_node->bat_iv.bcast_own);
267 kfree(orig_node->bat_iv.bcast_own_sum);
268 orig_node->bat_iv.bcast_own = NULL;
269 orig_node->bat_iv.bcast_own_sum = NULL;
270 } else {
271 batadv_iv_ogm_drop_bcast_own_entry(orig_node, max_if_num,
272 del_if_num);
273 batadv_iv_ogm_drop_bcast_own_sum_entry(orig_node, max_if_num,
274 del_if_num);
275 }
237 276
238 ret = 0;
239unlock:
240 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 277 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
241 278
242 return ret; 279 return 0;
243} 280}
244 281
245/** 282/**
@@ -644,18 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
644 unsigned char *skb_buff; 681 unsigned char *skb_buff;
645 unsigned int skb_size; 682 unsigned int skb_size;
646 683
647 if (!kref_get_unless_zero(&if_incoming->refcount))
648 return;
649
650 if (!kref_get_unless_zero(&if_outgoing->refcount))
651 goto out_free_incoming;
652
653 /* own packet should always be scheduled */ 684 /* own packet should always be scheduled */
654 if (!own_packet) { 685 if (!own_packet) {
655 if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { 686 if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
656 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 687 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
657 "batman packet queue full\n"); 688 "batman packet queue full\n");
658 goto out_free_outgoing; 689 return;
659 } 690 }
660 } 691 }
661 692
@@ -681,6 +712,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
681 forw_packet_aggr->packet_len = packet_len; 712 forw_packet_aggr->packet_len = packet_len;
682 memcpy(skb_buff, packet_buff, packet_len); 713 memcpy(skb_buff, packet_buff, packet_len);
683 714
715 kref_get(&if_incoming->refcount);
716 kref_get(&if_outgoing->refcount);
684 forw_packet_aggr->own = own_packet; 717 forw_packet_aggr->own = own_packet;
685 forw_packet_aggr->if_incoming = if_incoming; 718 forw_packet_aggr->if_incoming = if_incoming;
686 forw_packet_aggr->if_outgoing = if_outgoing; 719 forw_packet_aggr->if_outgoing = if_outgoing;
@@ -710,10 +743,6 @@ out_free_forw_packet:
710out_nomem: 743out_nomem:
711 if (!own_packet) 744 if (!own_packet)
712 atomic_inc(&bat_priv->batman_queue_left); 745 atomic_inc(&bat_priv->batman_queue_left);
713out_free_outgoing:
714 batadv_hardif_put(if_outgoing);
715out_free_incoming:
716 batadv_hardif_put(if_incoming);
717} 746}
718 747
719/* aggregate a new packet into the existing ogm packet */ 748/* aggregate a new packet into the existing ogm packet */
@@ -950,9 +979,15 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
950 list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { 979 list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
951 if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) 980 if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
952 continue; 981 continue;
982
983 if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
984 continue;
985
953 batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, 986 batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
954 *ogm_buff_len, hard_iface, 987 *ogm_buff_len, hard_iface,
955 tmp_hard_iface, 1, send_time); 988 tmp_hard_iface, 1, send_time);
989
990 batadv_hardif_put(tmp_hard_iface);
956 } 991 }
957 rcu_read_unlock(); 992 rcu_read_unlock();
958 993
@@ -1133,13 +1168,13 @@ out:
1133 * @if_incoming: interface where the packet was received 1168 * @if_incoming: interface where the packet was received
1134 * @if_outgoing: interface for which the retransmission should be considered 1169 * @if_outgoing: interface for which the retransmission should be considered
1135 * 1170 *
1136 * Return: 1 if the link can be considered bidirectional, 0 otherwise 1171 * Return: true if the link can be considered bidirectional, false otherwise
1137 */ 1172 */
1138static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, 1173static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1139 struct batadv_orig_node *orig_neigh_node, 1174 struct batadv_orig_node *orig_neigh_node,
1140 struct batadv_ogm_packet *batadv_ogm_packet, 1175 struct batadv_ogm_packet *batadv_ogm_packet,
1141 struct batadv_hard_iface *if_incoming, 1176 struct batadv_hard_iface *if_incoming,
1142 struct batadv_hard_iface *if_outgoing) 1177 struct batadv_hard_iface *if_outgoing)
1143{ 1178{
1144 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); 1179 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
1145 struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; 1180 struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
@@ -1147,9 +1182,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1147 u8 total_count; 1182 u8 total_count;
1148 u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; 1183 u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
1149 unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; 1184 unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
1150 int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; 1185 int tq_asym_penalty, inv_asym_penalty, if_num;
1151 unsigned int combined_tq; 1186 unsigned int combined_tq;
1152 int tq_iface_penalty; 1187 int tq_iface_penalty;
1188 bool ret = false;
1153 1189
1154 /* find corresponding one hop neighbor */ 1190 /* find corresponding one hop neighbor */
1155 rcu_read_lock(); 1191 rcu_read_lock();
@@ -1261,7 +1297,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1261 * consider it bidirectional 1297 * consider it bidirectional
1262 */ 1298 */
1263 if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT) 1299 if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT)
1264 ret = 1; 1300 ret = true;
1265 1301
1266out: 1302out:
1267 if (neigh_node) 1303 if (neigh_node)
@@ -1290,9 +1326,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1290 struct batadv_orig_ifinfo *orig_ifinfo = NULL; 1326 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
1291 struct batadv_neigh_node *neigh_node; 1327 struct batadv_neigh_node *neigh_node;
1292 struct batadv_neigh_ifinfo *neigh_ifinfo; 1328 struct batadv_neigh_ifinfo *neigh_ifinfo;
1293 int is_dup; 1329 bool is_dup;
1294 s32 seq_diff; 1330 s32 seq_diff;
1295 int need_update = 0; 1331 bool need_update = false;
1296 int set_mark; 1332 int set_mark;
1297 enum batadv_dup_status ret = BATADV_NO_DUP; 1333 enum batadv_dup_status ret = BATADV_NO_DUP;
1298 u32 seqno = ntohl(batadv_ogm_packet->seqno); 1334 u32 seqno = ntohl(batadv_ogm_packet->seqno);
@@ -1402,7 +1438,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1402 struct sk_buff *skb_priv; 1438 struct sk_buff *skb_priv;
1403 struct ethhdr *ethhdr; 1439 struct ethhdr *ethhdr;
1404 u8 *prev_sender; 1440 u8 *prev_sender;
1405 int is_bidirect; 1441 bool is_bidirect;
1406 1442
1407 /* create a private copy of the skb, as some functions change tq value 1443 /* create a private copy of the skb, as some functions change tq value
1408 * and/or flags. 1444 * and/or flags.
@@ -1730,8 +1766,13 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
1730 if (hard_iface->soft_iface != bat_priv->soft_iface) 1766 if (hard_iface->soft_iface != bat_priv->soft_iface)
1731 continue; 1767 continue;
1732 1768
1769 if (!kref_get_unless_zero(&hard_iface->refcount))
1770 continue;
1771
1733 batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, 1772 batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
1734 if_incoming, hard_iface); 1773 if_incoming, hard_iface);
1774
1775 batadv_hardif_put(hard_iface);
1735 } 1776 }
1736 rcu_read_unlock(); 1777 rcu_read_unlock();
1737 1778
@@ -1829,9 +1870,8 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
1829 int batman_count = 0; 1870 int batman_count = 0;
1830 u32 i; 1871 u32 i;
1831 1872
1832 seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", 1873 seq_puts(seq,
1833 "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE, 1874 " Originator last-seen (#/255) Nexthop [outgoingIF]: Potential nexthops ...\n");
1834 "Nexthop", "outgoingIF", "Potential nexthops");
1835 1875
1836 for (i = 0; i < hash->size; i++) { 1876 for (i = 0; i < hash->size; i++) {
1837 head = &hash->table[i]; 1877 head = &hash->table[i];
@@ -1911,8 +1951,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
1911 struct batadv_hard_iface *hard_iface; 1951 struct batadv_hard_iface *hard_iface;
1912 int batman_count = 0; 1952 int batman_count = 0;
1913 1953
1914 seq_printf(seq, " %10s %-13s %s\n", 1954 seq_puts(seq, " IF Neighbor last-seen\n");
1915 "IF", "Neighbor", "last-seen");
1916 1955
1917 rcu_read_lock(); 1956 rcu_read_lock();
1918 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 1957 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 3315b9a598af..3ff8bd1b7bdc 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -32,10 +32,21 @@
32 32
33#include "bat_v_elp.h" 33#include "bat_v_elp.h"
34#include "bat_v_ogm.h" 34#include "bat_v_ogm.h"
35#include "hard-interface.h"
35#include "hash.h" 36#include "hash.h"
36#include "originator.h" 37#include "originator.h"
37#include "packet.h" 38#include "packet.h"
38 39
40static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
41{
42 /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can
43 * set the interface as ACTIVE right away, without any risk of race
44 * condition
45 */
46 if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
47 hard_iface->if_status = BATADV_IF_ACTIVE;
48}
49
39static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) 50static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
40{ 51{
41 int ret; 52 int ret;
@@ -151,8 +162,8 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
151 struct batadv_hard_iface *hard_iface; 162 struct batadv_hard_iface *hard_iface;
152 int batman_count = 0; 163 int batman_count = 0;
153 164
154 seq_printf(seq, " %-15s %s (%11s) [%10s]\n", "Neighbor", 165 seq_puts(seq,
155 "last-seen", "throughput", "IF"); 166 " Neighbor last-seen ( throughput) [ IF]\n");
156 167
157 rcu_read_lock(); 168 rcu_read_lock();
158 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 169 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -191,9 +202,8 @@ static void batadv_v_orig_print(struct batadv_priv *bat_priv,
191 int batman_count = 0; 202 int batman_count = 0;
192 u32 i; 203 u32 i;
193 204
194 seq_printf(seq, " %-15s %s (%11s) %17s [%10s]: %20s ...\n", 205 seq_puts(seq,
195 "Originator", "last-seen", "throughput", "Nexthop", 206 " Originator last-seen ( throughput) Nexthop [outgoingIF]: Potential nexthops ...\n");
196 "outgoingIF", "Potential nexthops");
197 207
198 for (i = 0; i < hash->size; i++) { 208 for (i = 0; i < hash->size; i++) {
199 head = &hash->table[i]; 209 head = &hash->table[i];
@@ -274,6 +284,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
274 284
275static struct batadv_algo_ops batadv_batman_v __read_mostly = { 285static struct batadv_algo_ops batadv_batman_v __read_mostly = {
276 .name = "BATMAN_V", 286 .name = "BATMAN_V",
287 .bat_iface_activate = batadv_v_iface_activate,
277 .bat_iface_enable = batadv_v_iface_enable, 288 .bat_iface_enable = batadv_v_iface_enable,
278 .bat_iface_disable = batadv_v_iface_disable, 289 .bat_iface_disable = batadv_v_iface_disable,
279 .bat_iface_update_mac = batadv_v_iface_update_mac, 290 .bat_iface_update_mac = batadv_v_iface_update_mac,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index d9bcbe6e7d65..473ebb9a0e73 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -26,6 +26,7 @@
26#include <linux/if_ether.h> 26#include <linux/if_ether.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/kref.h>
29#include <linux/list.h> 30#include <linux/list.h>
30#include <linux/netdevice.h> 31#include <linux/netdevice.h>
31#include <linux/random.h> 32#include <linux/random.h>
@@ -176,6 +177,9 @@ static void batadv_v_ogm_send(struct work_struct *work)
176 if (hard_iface->soft_iface != bat_priv->soft_iface) 177 if (hard_iface->soft_iface != bat_priv->soft_iface)
177 continue; 178 continue;
178 179
180 if (!kref_get_unless_zero(&hard_iface->refcount))
181 continue;
182
179 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 183 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
180 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", 184 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
181 ogm_packet->orig, ntohl(ogm_packet->seqno), 185 ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -185,10 +189,13 @@ static void batadv_v_ogm_send(struct work_struct *work)
185 189
186 /* this skb gets consumed by batadv_v_ogm_send_to_if() */ 190 /* this skb gets consumed by batadv_v_ogm_send_to_if() */
187 skb_tmp = skb_clone(skb, GFP_ATOMIC); 191 skb_tmp = skb_clone(skb, GFP_ATOMIC);
188 if (!skb_tmp) 192 if (!skb_tmp) {
193 batadv_hardif_put(hard_iface);
189 break; 194 break;
195 }
190 196
191 batadv_v_ogm_send_to_if(skb_tmp, hard_iface); 197 batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
198 batadv_hardif_put(hard_iface);
192 } 199 }
193 rcu_read_unlock(); 200 rcu_read_unlock();
194 201
@@ -234,73 +241,6 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
234} 241}
235 242
236/** 243/**
237 * batadv_v_ogm_orig_update - update the originator status based on the received
238 * OGM
239 * @bat_priv: the bat priv with all the soft interface information
240 * @orig_node: the originator to update
241 * @neigh_node: the neighbour the OGM has been received from (to update)
242 * @ogm2: the received OGM
243 * @if_outgoing: the interface where this OGM is going to be forwarded through
244 */
245static void
246batadv_v_ogm_orig_update(struct batadv_priv *bat_priv,
247 struct batadv_orig_node *orig_node,
248 struct batadv_neigh_node *neigh_node,
249 const struct batadv_ogm2_packet *ogm2,
250 struct batadv_hard_iface *if_outgoing)
251{
252 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
253 struct batadv_neigh_node *router = NULL;
254 s32 neigh_seq_diff;
255 u32 neigh_last_seqno;
256 u32 router_last_seqno;
257 u32 router_throughput, neigh_throughput;
258
259 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
260 "Searching and updating originator entry of received packet\n");
261
262 /* if this neighbor already is our next hop there is nothing
263 * to change
264 */
265 router = batadv_orig_router_get(orig_node, if_outgoing);
266 if (router == neigh_node)
267 goto out;
268
269 /* don't consider neighbours with worse throughput.
270 * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
271 * the last received seqno from our best next hop.
272 */
273 if (router) {
274 router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
275 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
276
277 /* if these are not allocated, something is wrong. */
278 if (!router_ifinfo || !neigh_ifinfo)
279 goto out;
280
281 neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
282 router_last_seqno = router_ifinfo->bat_v.last_seqno;
283 neigh_seq_diff = neigh_last_seqno - router_last_seqno;
284 router_throughput = router_ifinfo->bat_v.throughput;
285 neigh_throughput = neigh_ifinfo->bat_v.throughput;
286
287 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
288 (router_throughput >= neigh_throughput))
289 goto out;
290 }
291
292 batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
293
294out:
295 if (router_ifinfo)
296 batadv_neigh_ifinfo_put(router_ifinfo);
297 if (neigh_ifinfo)
298 batadv_neigh_ifinfo_put(neigh_ifinfo);
299 if (router)
300 batadv_neigh_node_put(router);
301}
302
303/**
304 * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded 244 * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded
305 * with B.A.T.M.A.N. V OGMs 245 * with B.A.T.M.A.N. V OGMs
306 * @bat_priv: the bat priv with all the soft interface information 246 * @bat_priv: the bat priv with all the soft interface information
@@ -347,10 +287,12 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
347} 287}
348 288
349/** 289/**
350 * batadv_v_ogm_forward - forward an OGM to the given outgoing interface 290 * batadv_v_ogm_forward - check conditions and forward an OGM to the given
291 * outgoing interface
351 * @bat_priv: the bat priv with all the soft interface information 292 * @bat_priv: the bat priv with all the soft interface information
352 * @ogm_received: previously received OGM to be forwarded 293 * @ogm_received: previously received OGM to be forwarded
353 * @throughput: throughput to announce, may vary per outgoing interface 294 * @orig_node: the originator which has been updated
295 * @neigh_node: the neigh_node through with the OGM has been received
354 * @if_incoming: the interface on which this OGM was received on 296 * @if_incoming: the interface on which this OGM was received on
355 * @if_outgoing: the interface to which the OGM has to be forwarded to 297 * @if_outgoing: the interface to which the OGM has to be forwarded to
356 * 298 *
@@ -359,28 +301,57 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
359 */ 301 */
360static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, 302static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
361 const struct batadv_ogm2_packet *ogm_received, 303 const struct batadv_ogm2_packet *ogm_received,
362 u32 throughput, 304 struct batadv_orig_node *orig_node,
305 struct batadv_neigh_node *neigh_node,
363 struct batadv_hard_iface *if_incoming, 306 struct batadv_hard_iface *if_incoming,
364 struct batadv_hard_iface *if_outgoing) 307 struct batadv_hard_iface *if_outgoing)
365{ 308{
309 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
310 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
311 struct batadv_neigh_node *router = NULL;
366 struct batadv_ogm2_packet *ogm_forward; 312 struct batadv_ogm2_packet *ogm_forward;
367 unsigned char *skb_buff; 313 unsigned char *skb_buff;
368 struct sk_buff *skb; 314 struct sk_buff *skb;
369 size_t packet_len; 315 size_t packet_len;
370 u16 tvlv_len; 316 u16 tvlv_len;
371 317
318 /* only forward for specific interfaces, not for the default one. */
319 if (if_outgoing == BATADV_IF_DEFAULT)
320 goto out;
321
322 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
323 if (!orig_ifinfo)
324 goto out;
325
326 /* acquire possibly updated router */
327 router = batadv_orig_router_get(orig_node, if_outgoing);
328
329 /* strict rule: forward packets coming from the best next hop only */
330 if (neigh_node != router)
331 goto out;
332
333 /* don't forward the same seqno twice on one interface */
334 if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm_received->seqno))
335 goto out;
336
337 orig_ifinfo->last_seqno_forwarded = ntohl(ogm_received->seqno);
338
372 if (ogm_received->ttl <= 1) { 339 if (ogm_received->ttl <= 1) {
373 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); 340 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
374 return; 341 goto out;
375 } 342 }
376 343
344 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
345 if (!neigh_ifinfo)
346 goto out;
347
377 tvlv_len = ntohs(ogm_received->tvlv_len); 348 tvlv_len = ntohs(ogm_received->tvlv_len);
378 349
379 packet_len = BATADV_OGM2_HLEN + tvlv_len; 350 packet_len = BATADV_OGM2_HLEN + tvlv_len;
380 skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev, 351 skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev,
381 ETH_HLEN + packet_len); 352 ETH_HLEN + packet_len);
382 if (!skb) 353 if (!skb)
383 return; 354 goto out;
384 355
385 skb_reserve(skb, ETH_HLEN); 356 skb_reserve(skb, ETH_HLEN);
386 skb_buff = skb_put(skb, packet_len); 357 skb_buff = skb_put(skb, packet_len);
@@ -388,15 +359,23 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
388 359
389 /* apply forward penalty */ 360 /* apply forward penalty */
390 ogm_forward = (struct batadv_ogm2_packet *)skb_buff; 361 ogm_forward = (struct batadv_ogm2_packet *)skb_buff;
391 ogm_forward->throughput = htonl(throughput); 362 ogm_forward->throughput = htonl(neigh_ifinfo->bat_v.throughput);
392 ogm_forward->ttl--; 363 ogm_forward->ttl--;
393 364
394 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 365 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
395 "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n", 366 "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n",
396 if_outgoing->net_dev->name, throughput, ogm_forward->ttl, 367 if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
397 if_incoming->net_dev->name); 368 ogm_forward->ttl, if_incoming->net_dev->name);
398 369
399 batadv_v_ogm_send_to_if(skb, if_outgoing); 370 batadv_v_ogm_send_to_if(skb, if_outgoing);
371
372out:
373 if (orig_ifinfo)
374 batadv_orig_ifinfo_put(orig_ifinfo);
375 if (router)
376 batadv_neigh_node_put(router);
377 if (neigh_ifinfo)
378 batadv_neigh_ifinfo_put(neigh_ifinfo);
400} 379}
401 380
402/** 381/**
@@ -493,8 +472,10 @@ out:
493 * @neigh_node: the neigh_node through with the OGM has been received 472 * @neigh_node: the neigh_node through with the OGM has been received
494 * @if_incoming: the interface where this packet was received 473 * @if_incoming: the interface where this packet was received
495 * @if_outgoing: the interface for which the packet should be considered 474 * @if_outgoing: the interface for which the packet should be considered
475 *
476 * Return: true if the packet should be forwarded, false otherwise
496 */ 477 */
497static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv, 478static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
498 const struct ethhdr *ethhdr, 479 const struct ethhdr *ethhdr,
499 const struct batadv_ogm2_packet *ogm2, 480 const struct batadv_ogm2_packet *ogm2,
500 struct batadv_orig_node *orig_node, 481 struct batadv_orig_node *orig_node,
@@ -503,14 +484,14 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
503 struct batadv_hard_iface *if_outgoing) 484 struct batadv_hard_iface *if_outgoing)
504{ 485{
505 struct batadv_neigh_node *router = NULL; 486 struct batadv_neigh_node *router = NULL;
506 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
507 struct batadv_orig_node *orig_neigh_node = NULL; 487 struct batadv_orig_node *orig_neigh_node = NULL;
508 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
509 struct batadv_neigh_node *orig_neigh_router = NULL; 488 struct batadv_neigh_node *orig_neigh_router = NULL;
510 489 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
511 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); 490 u32 router_throughput, neigh_throughput;
512 if (!neigh_ifinfo) 491 u32 router_last_seqno;
513 goto out; 492 u32 neigh_last_seqno;
493 s32 neigh_seq_diff;
494 bool forward = false;
514 495
515 orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source); 496 orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source);
516 if (!orig_neigh_node) 497 if (!orig_neigh_node)
@@ -529,47 +510,57 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
529 goto out; 510 goto out;
530 } 511 }
531 512
532 if (router) 513 /* Mark the OGM to be considered for forwarding, and update routes
533 batadv_neigh_node_put(router); 514 * if needed.
515 */
516 forward = true;
534 517
535 /* Update routes, and check if the OGM is from the best next hop */ 518 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
536 batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2, 519 "Searching and updating originator entry of received packet\n");
537 if_outgoing);
538 520
539 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); 521 /* if this neighbor already is our next hop there is nothing
540 if (!orig_ifinfo) 522 * to change
523 */
524 if (router == neigh_node)
541 goto out; 525 goto out;
542 526
543 /* don't forward the same seqno twice on one interface */ 527 /* don't consider neighbours with worse throughput.
544 if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm2->seqno)) 528 * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
545 goto out; 529 * the last received seqno from our best next hop.
530 */
531 if (router) {
532 router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
533 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
546 534
547 /* acquire possibly updated router */ 535 /* if these are not allocated, something is wrong. */
548 router = batadv_orig_router_get(orig_node, if_outgoing); 536 if (!router_ifinfo || !neigh_ifinfo)
537 goto out;
549 538
550 /* strict rule: forward packets coming from the best next hop only */ 539 neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
551 if (neigh_node != router) 540 router_last_seqno = router_ifinfo->bat_v.last_seqno;
552 goto out; 541 neigh_seq_diff = neigh_last_seqno - router_last_seqno;
542 router_throughput = router_ifinfo->bat_v.throughput;
543 neigh_throughput = neigh_ifinfo->bat_v.throughput;
553 544
554 /* only forward for specific interface, not for the default one. */ 545 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
555 if (if_outgoing != BATADV_IF_DEFAULT) { 546 (router_throughput >= neigh_throughput))
556 orig_ifinfo->last_seqno_forwarded = ntohl(ogm2->seqno); 547 goto out;
557 batadv_v_ogm_forward(bat_priv, ogm2,
558 neigh_ifinfo->bat_v.throughput,
559 if_incoming, if_outgoing);
560 } 548 }
561 549
550 batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
562out: 551out:
563 if (orig_ifinfo)
564 batadv_orig_ifinfo_put(orig_ifinfo);
565 if (router) 552 if (router)
566 batadv_neigh_node_put(router); 553 batadv_neigh_node_put(router);
567 if (orig_neigh_router) 554 if (orig_neigh_router)
568 batadv_neigh_node_put(orig_neigh_router); 555 batadv_neigh_node_put(orig_neigh_router);
569 if (orig_neigh_node) 556 if (orig_neigh_node)
570 batadv_orig_node_put(orig_neigh_node); 557 batadv_orig_node_put(orig_neigh_node);
558 if (router_ifinfo)
559 batadv_neigh_ifinfo_put(router_ifinfo);
571 if (neigh_ifinfo) 560 if (neigh_ifinfo)
572 batadv_neigh_ifinfo_put(neigh_ifinfo); 561 batadv_neigh_ifinfo_put(neigh_ifinfo);
562
563 return forward;
573} 564}
574 565
575/** 566/**
@@ -592,6 +583,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
592 struct batadv_hard_iface *if_outgoing) 583 struct batadv_hard_iface *if_outgoing)
593{ 584{
594 int seqno_age; 585 int seqno_age;
586 bool forward;
595 587
596 /* first, update the metric with according sanity checks */ 588 /* first, update the metric with according sanity checks */
597 seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node, 589 seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node,
@@ -610,8 +602,14 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
610 ntohs(ogm2->tvlv_len)); 602 ntohs(ogm2->tvlv_len));
611 603
612 /* if the metric update went through, update routes if needed */ 604 /* if the metric update went through, update routes if needed */
613 batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node, 605 forward = batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node,
614 neigh_node, if_incoming, if_outgoing); 606 neigh_node, if_incoming,
607 if_outgoing);
608
609 /* if the routes have been processed correctly, check and forward */
610 if (forward)
611 batadv_v_ogm_forward(bat_priv, ogm2, orig_node, neigh_node,
612 if_incoming, if_outgoing);
615} 613}
616 614
617/** 615/**
@@ -713,9 +711,14 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
713 if (hard_iface->soft_iface != bat_priv->soft_iface) 711 if (hard_iface->soft_iface != bat_priv->soft_iface)
714 continue; 712 continue;
715 713
714 if (!kref_get_unless_zero(&hard_iface->refcount))
715 continue;
716
716 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, 717 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
717 orig_node, neigh_node, 718 orig_node, neigh_node,
718 if_incoming, hard_iface); 719 if_incoming, hard_iface);
720
721 batadv_hardif_put(hard_iface);
719 } 722 }
720 rcu_read_unlock(); 723 rcu_read_unlock();
721out: 724out:
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index b56bb000a0ab..a0c7913837a5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -38,11 +38,11 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
38 * the last sequence number 38 * the last sequence number
39 * @set_mark: whether this packet should be marked in seq_bits 39 * @set_mark: whether this packet should be marked in seq_bits
40 * 40 *
41 * Return: 1 if the window was moved (either new or very old), 41 * Return: true if the window was moved (either new or very old),
42 * 0 if the window was not moved/shifted. 42 * false if the window was not moved/shifted.
43 */ 43 */
44int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 44bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
45 int set_mark) 45 s32 seq_num_diff, int set_mark)
46{ 46{
47 struct batadv_priv *bat_priv = priv; 47 struct batadv_priv *bat_priv = priv;
48 48
@@ -52,7 +52,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
52 if (seq_num_diff <= 0 && seq_num_diff > -BATADV_TQ_LOCAL_WINDOW_SIZE) { 52 if (seq_num_diff <= 0 && seq_num_diff > -BATADV_TQ_LOCAL_WINDOW_SIZE) {
53 if (set_mark) 53 if (set_mark)
54 batadv_set_bit(seq_bits, -seq_num_diff); 54 batadv_set_bit(seq_bits, -seq_num_diff);
55 return 0; 55 return false;
56 } 56 }
57 57
58 /* sequence number is slightly newer, so we shift the window and 58 /* sequence number is slightly newer, so we shift the window and
@@ -63,7 +63,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
63 63
64 if (set_mark) 64 if (set_mark)
65 batadv_set_bit(seq_bits, 0); 65 batadv_set_bit(seq_bits, 0);
66 return 1; 66 return true;
67 } 67 }
68 68
69 /* sequence number is much newer, probably missed a lot of packets */ 69 /* sequence number is much newer, probably missed a lot of packets */
@@ -75,7 +75,7 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
75 bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); 75 bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
76 if (set_mark) 76 if (set_mark)
77 batadv_set_bit(seq_bits, 0); 77 batadv_set_bit(seq_bits, 0);
78 return 1; 78 return true;
79 } 79 }
80 80
81 /* received a much older packet. The other host either restarted 81 /* received a much older packet. The other host either restarted
@@ -94,5 +94,5 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
94 if (set_mark) 94 if (set_mark)
95 batadv_set_bit(seq_bits, 0); 95 batadv_set_bit(seq_bits, 0);
96 96
97 return 1; 97 return true;
98} 98}
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 3e41bb80eb81..0e6e9d09078c 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -22,6 +22,7 @@
22 22
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/compiler.h> 24#include <linux/compiler.h>
25#include <linux/stddef.h>
25#include <linux/types.h> 26#include <linux/types.h>
26 27
27/** 28/**
@@ -31,17 +32,17 @@
31 * @last_seqno: latest sequence number in seq_bits 32 * @last_seqno: latest sequence number in seq_bits
32 * @curr_seqno: sequence number to test for 33 * @curr_seqno: sequence number to test for
33 * 34 *
34 * Return: 1 if the corresponding bit in the given seq_bits indicates true 35 * Return: true if the corresponding bit in the given seq_bits indicates true
35 * and curr_seqno is within range of last_seqno. Otherwise returns 0. 36 * and curr_seqno is within range of last_seqno. Otherwise returns false.
36 */ 37 */
37static inline int batadv_test_bit(const unsigned long *seq_bits, 38static inline bool batadv_test_bit(const unsigned long *seq_bits,
38 u32 last_seqno, u32 curr_seqno) 39 u32 last_seqno, u32 curr_seqno)
39{ 40{
40 s32 diff; 41 s32 diff;
41 42
42 diff = last_seqno - curr_seqno; 43 diff = last_seqno - curr_seqno;
43 if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) 44 if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE)
44 return 0; 45 return false;
45 return test_bit(diff, seq_bits) != 0; 46 return test_bit(diff, seq_bits) != 0;
46} 47}
47 48
@@ -55,7 +56,7 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
55 set_bit(n, seq_bits); /* turn the position on */ 56 set_bit(n, seq_bits); /* turn the position on */
56} 57}
57 58
58int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 59bool batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
59 int set_mark); 60 s32 seq_num_diff, int set_mark);
60 61
61#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ 62#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 0a6c8b824a00..748a9ead7ce5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -50,6 +50,7 @@
50#include "hash.h" 50#include "hash.h"
51#include "originator.h" 51#include "originator.h"
52#include "packet.h" 52#include "packet.h"
53#include "sysfs.h"
53#include "translation-table.h" 54#include "translation-table.h"
54 55
55static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; 56static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
@@ -100,10 +101,10 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
100 * @node: list node of the first entry to compare 101 * @node: list node of the first entry to compare
101 * @data2: pointer to the second backbone gateway 102 * @data2: pointer to the second backbone gateway
102 * 103 *
103 * Return: 1 if the backbones have the same data, 0 otherwise 104 * Return: true if the backbones have the same data, false otherwise
104 */ 105 */
105static int batadv_compare_backbone_gw(const struct hlist_node *node, 106static bool batadv_compare_backbone_gw(const struct hlist_node *node,
106 const void *data2) 107 const void *data2)
107{ 108{
108 const void *data1 = container_of(node, struct batadv_bla_backbone_gw, 109 const void *data1 = container_of(node, struct batadv_bla_backbone_gw,
109 hash_entry); 110 hash_entry);
@@ -111,23 +112,23 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
111 const struct batadv_bla_backbone_gw *gw2 = data2; 112 const struct batadv_bla_backbone_gw *gw2 = data2;
112 113
113 if (!batadv_compare_eth(gw1->orig, gw2->orig)) 114 if (!batadv_compare_eth(gw1->orig, gw2->orig))
114 return 0; 115 return false;
115 116
116 if (gw1->vid != gw2->vid) 117 if (gw1->vid != gw2->vid)
117 return 0; 118 return false;
118 119
119 return 1; 120 return true;
120} 121}
121 122
122/** 123/**
123 * batadv_compare_backbone_gw - compare address and vid of two claims 124 * batadv_compare_claim - compare address and vid of two claims
124 * @node: list node of the first entry to compare 125 * @node: list node of the first entry to compare
125 * @data2: pointer to the second claims 126 * @data2: pointer to the second claims
126 * 127 *
127 * Return: 1 if the claim have the same data, 0 otherwise 128 * Return: true if the claim have the same data, 0 otherwise
128 */ 129 */
129static int batadv_compare_claim(const struct hlist_node *node, 130static bool batadv_compare_claim(const struct hlist_node *node,
130 const void *data2) 131 const void *data2)
131{ 132{
132 const void *data1 = container_of(node, struct batadv_bla_claim, 133 const void *data1 = container_of(node, struct batadv_bla_claim,
133 hash_entry); 134 hash_entry);
@@ -135,12 +136,12 @@ static int batadv_compare_claim(const struct hlist_node *node,
135 const struct batadv_bla_claim *cl2 = data2; 136 const struct batadv_bla_claim *cl2 = data2;
136 137
137 if (!batadv_compare_eth(cl1->addr, cl2->addr)) 138 if (!batadv_compare_eth(cl1->addr, cl2->addr))
138 return 0; 139 return false;
139 140
140 if (cl1->vid != cl2->vid) 141 if (cl1->vid != cl2->vid)
141 return 0; 142 return false;
142 143
143 return 1; 144 return true;
144} 145}
145 146
146/** 147/**
@@ -200,9 +201,9 @@ static void batadv_claim_put(struct batadv_bla_claim *claim)
200 * 201 *
201 * Return: claim if found or NULL otherwise. 202 * Return: claim if found or NULL otherwise.
202 */ 203 */
203static struct batadv_bla_claim 204static struct batadv_bla_claim *
204*batadv_claim_hash_find(struct batadv_priv *bat_priv, 205batadv_claim_hash_find(struct batadv_priv *bat_priv,
205 struct batadv_bla_claim *data) 206 struct batadv_bla_claim *data)
206{ 207{
207 struct batadv_hashtable *hash = bat_priv->bla.claim_hash; 208 struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
208 struct hlist_head *head; 209 struct hlist_head *head;
@@ -407,6 +408,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
407 ethhdr->h_source, ethhdr->h_dest, 408 ethhdr->h_source, ethhdr->h_dest,
408 BATADV_PRINT_VID(vid)); 409 BATADV_PRINT_VID(vid));
409 break; 410 break;
411 case BATADV_CLAIM_TYPE_LOOPDETECT:
412 ether_addr_copy(ethhdr->h_source, mac);
413 batadv_dbg(BATADV_DBG_BLA, bat_priv,
414 "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
415 ethhdr->h_source, ethhdr->h_dest,
416 BATADV_PRINT_VID(vid));
417
418 break;
410 } 419 }
411 420
412 if (vid & BATADV_VLAN_HAS_TAG) 421 if (vid & BATADV_VLAN_HAS_TAG)
@@ -427,6 +436,36 @@ out:
427} 436}
428 437
429/** 438/**
439 * batadv_bla_loopdetect_report - worker for reporting the loop
440 * @work: work queue item
441 *
442 * Throws an uevent, as the loopdetect check function can't do that itself
443 * since the kernel may sleep while throwing uevents.
444 */
445static void batadv_bla_loopdetect_report(struct work_struct *work)
446{
447 struct batadv_bla_backbone_gw *backbone_gw;
448 struct batadv_priv *bat_priv;
449 char vid_str[6] = { '\0' };
450
451 backbone_gw = container_of(work, struct batadv_bla_backbone_gw,
452 report_work);
453 bat_priv = backbone_gw->bat_priv;
454
455 batadv_info(bat_priv->soft_iface,
456 "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n",
457 BATADV_PRINT_VID(backbone_gw->vid));
458 snprintf(vid_str, sizeof(vid_str), "%d",
459 BATADV_PRINT_VID(backbone_gw->vid));
460 vid_str[sizeof(vid_str) - 1] = 0;
461
462 batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT,
463 vid_str);
464
465 batadv_backbone_gw_put(backbone_gw);
466}
467
468/**
430 * batadv_bla_get_backbone_gw - finds or creates a backbone gateway 469 * batadv_bla_get_backbone_gw - finds or creates a backbone gateway
431 * @bat_priv: the bat priv with all the soft interface information 470 * @bat_priv: the bat priv with all the soft interface information
432 * @orig: the mac address of the originator 471 * @orig: the mac address of the originator
@@ -464,6 +503,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
464 atomic_set(&entry->request_sent, 0); 503 atomic_set(&entry->request_sent, 0);
465 atomic_set(&entry->wait_periods, 0); 504 atomic_set(&entry->wait_periods, 0);
466 ether_addr_copy(entry->orig, orig); 505 ether_addr_copy(entry->orig, orig);
506 INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
467 507
468 /* one for the hash, one for returning */ 508 /* one for the hash, one for returning */
469 kref_init(&entry->refcount); 509 kref_init(&entry->refcount);
@@ -735,22 +775,22 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
735 * @backbone_addr: originator address of the sender (Ethernet source MAC) 775 * @backbone_addr: originator address of the sender (Ethernet source MAC)
736 * @vid: the VLAN ID of the frame 776 * @vid: the VLAN ID of the frame
737 * 777 *
738 * Return: 1 if handled 778 * Return: true if handled
739 */ 779 */
740static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, 780static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
741 u8 *backbone_addr, unsigned short vid) 781 u8 *backbone_addr, unsigned short vid)
742{ 782{
743 struct batadv_bla_backbone_gw *backbone_gw; 783 struct batadv_bla_backbone_gw *backbone_gw;
744 u16 backbone_crc, crc; 784 u16 backbone_crc, crc;
745 785
746 if (memcmp(an_addr, batadv_announce_mac, 4) != 0) 786 if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
747 return 0; 787 return false;
748 788
749 backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid, 789 backbone_gw = batadv_bla_get_backbone_gw(bat_priv, backbone_addr, vid,
750 false); 790 false);
751 791
752 if (unlikely(!backbone_gw)) 792 if (unlikely(!backbone_gw))
753 return 1; 793 return true;
754 794
755 /* handle as ANNOUNCE frame */ 795 /* handle as ANNOUNCE frame */
756 backbone_gw->lasttime = jiffies; 796 backbone_gw->lasttime = jiffies;
@@ -783,7 +823,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
783 } 823 }
784 824
785 batadv_backbone_gw_put(backbone_gw); 825 batadv_backbone_gw_put(backbone_gw);
786 return 1; 826 return true;
787} 827}
788 828
789/** 829/**
@@ -794,29 +834,29 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
794 * @ethhdr: ethernet header of a packet 834 * @ethhdr: ethernet header of a packet
795 * @vid: the VLAN ID of the frame 835 * @vid: the VLAN ID of the frame
796 * 836 *
797 * Return: 1 if handled 837 * Return: true if handled
798 */ 838 */
799static int batadv_handle_request(struct batadv_priv *bat_priv, 839static bool batadv_handle_request(struct batadv_priv *bat_priv,
800 struct batadv_hard_iface *primary_if, 840 struct batadv_hard_iface *primary_if,
801 u8 *backbone_addr, struct ethhdr *ethhdr, 841 u8 *backbone_addr, struct ethhdr *ethhdr,
802 unsigned short vid) 842 unsigned short vid)
803{ 843{
804 /* check for REQUEST frame */ 844 /* check for REQUEST frame */
805 if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) 845 if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest))
806 return 0; 846 return false;
807 847
808 /* sanity check, this should not happen on a normal switch, 848 /* sanity check, this should not happen on a normal switch,
809 * we ignore it in this case. 849 * we ignore it in this case.
810 */ 850 */
811 if (!batadv_compare_eth(ethhdr->h_dest, primary_if->net_dev->dev_addr)) 851 if (!batadv_compare_eth(ethhdr->h_dest, primary_if->net_dev->dev_addr))
812 return 1; 852 return true;
813 853
814 batadv_dbg(BATADV_DBG_BLA, bat_priv, 854 batadv_dbg(BATADV_DBG_BLA, bat_priv,
815 "handle_request(): REQUEST vid %d (sent by %pM)...\n", 855 "handle_request(): REQUEST vid %d (sent by %pM)...\n",
816 BATADV_PRINT_VID(vid), ethhdr->h_source); 856 BATADV_PRINT_VID(vid), ethhdr->h_source);
817 857
818 batadv_bla_answer_request(bat_priv, primary_if, vid); 858 batadv_bla_answer_request(bat_priv, primary_if, vid);
819 return 1; 859 return true;
820} 860}
821 861
822/** 862/**
@@ -827,12 +867,12 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
827 * @claim_addr: Client to be unclaimed (ARP sender HW MAC) 867 * @claim_addr: Client to be unclaimed (ARP sender HW MAC)
828 * @vid: the VLAN ID of the frame 868 * @vid: the VLAN ID of the frame
829 * 869 *
830 * Return: 1 if handled 870 * Return: true if handled
831 */ 871 */
832static int batadv_handle_unclaim(struct batadv_priv *bat_priv, 872static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
833 struct batadv_hard_iface *primary_if, 873 struct batadv_hard_iface *primary_if,
834 u8 *backbone_addr, u8 *claim_addr, 874 u8 *backbone_addr, u8 *claim_addr,
835 unsigned short vid) 875 unsigned short vid)
836{ 876{
837 struct batadv_bla_backbone_gw *backbone_gw; 877 struct batadv_bla_backbone_gw *backbone_gw;
838 878
@@ -845,7 +885,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
845 backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid); 885 backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid);
846 886
847 if (!backbone_gw) 887 if (!backbone_gw)
848 return 1; 888 return true;
849 889
850 /* this must be an UNCLAIM frame */ 890 /* this must be an UNCLAIM frame */
851 batadv_dbg(BATADV_DBG_BLA, bat_priv, 891 batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -854,7 +894,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
854 894
855 batadv_bla_del_claim(bat_priv, claim_addr, vid); 895 batadv_bla_del_claim(bat_priv, claim_addr, vid);
856 batadv_backbone_gw_put(backbone_gw); 896 batadv_backbone_gw_put(backbone_gw);
857 return 1; 897 return true;
858} 898}
859 899
860/** 900/**
@@ -865,12 +905,12 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
865 * @claim_addr: client mac address to be claimed (ARP sender HW MAC) 905 * @claim_addr: client mac address to be claimed (ARP sender HW MAC)
866 * @vid: the VLAN ID of the frame 906 * @vid: the VLAN ID of the frame
867 * 907 *
868 * Return: 1 if handled 908 * Return: true if handled
869 */ 909 */
870static int batadv_handle_claim(struct batadv_priv *bat_priv, 910static bool batadv_handle_claim(struct batadv_priv *bat_priv,
871 struct batadv_hard_iface *primary_if, 911 struct batadv_hard_iface *primary_if,
872 u8 *backbone_addr, u8 *claim_addr, 912 u8 *backbone_addr, u8 *claim_addr,
873 unsigned short vid) 913 unsigned short vid)
874{ 914{
875 struct batadv_bla_backbone_gw *backbone_gw; 915 struct batadv_bla_backbone_gw *backbone_gw;
876 916
@@ -880,7 +920,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
880 false); 920 false);
881 921
882 if (unlikely(!backbone_gw)) 922 if (unlikely(!backbone_gw))
883 return 1; 923 return true;
884 924
885 /* this must be a CLAIM frame */ 925 /* this must be a CLAIM frame */
886 batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw); 926 batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw);
@@ -891,7 +931,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
891 /* TODO: we could call something like tt_local_del() here. */ 931 /* TODO: we could call something like tt_local_del() here. */
892 932
893 batadv_backbone_gw_put(backbone_gw); 933 batadv_backbone_gw_put(backbone_gw);
894 return 1; 934 return true;
895} 935}
896 936
897/** 937/**
@@ -975,12 +1015,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
975 * @primary_if: the primary hard interface of this batman soft interface 1015 * @primary_if: the primary hard interface of this batman soft interface
976 * @skb: the frame to be checked 1016 * @skb: the frame to be checked
977 * 1017 *
978 * Return: 1 if it was a claim frame, otherwise return 0 to 1018 * Return: true if it was a claim frame, otherwise return false to
979 * tell the callee that it can use the frame on its own. 1019 * tell the callee that it can use the frame on its own.
980 */ 1020 */
981static int batadv_bla_process_claim(struct batadv_priv *bat_priv, 1021static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
982 struct batadv_hard_iface *primary_if, 1022 struct batadv_hard_iface *primary_if,
983 struct sk_buff *skb) 1023 struct sk_buff *skb)
984{ 1024{
985 struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; 1025 struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
986 u8 *hw_src, *hw_dst; 1026 u8 *hw_src, *hw_dst;
@@ -1011,7 +1051,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1011 vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN, 1051 vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
1012 &vhdr_buf); 1052 &vhdr_buf);
1013 if (!vhdr) 1053 if (!vhdr)
1014 return 0; 1054 return false;
1015 1055
1016 proto = vhdr->h_vlan_encapsulated_proto; 1056 proto = vhdr->h_vlan_encapsulated_proto;
1017 headlen += VLAN_HLEN; 1057 headlen += VLAN_HLEN;
@@ -1020,12 +1060,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1020 } 1060 }
1021 1061
1022 if (proto != htons(ETH_P_ARP)) 1062 if (proto != htons(ETH_P_ARP))
1023 return 0; /* not a claim frame */ 1063 return false; /* not a claim frame */
1024 1064
1025 /* this must be a ARP frame. check if it is a claim. */ 1065 /* this must be a ARP frame. check if it is a claim. */
1026 1066
1027 if (unlikely(!pskb_may_pull(skb, headlen + arp_hdr_len(skb->dev)))) 1067 if (unlikely(!pskb_may_pull(skb, headlen + arp_hdr_len(skb->dev))))
1028 return 0; 1068 return false;
1029 1069
1030 /* pskb_may_pull() may have modified the pointers, get ethhdr again */ 1070 /* pskb_may_pull() may have modified the pointers, get ethhdr again */
1031 ethhdr = eth_hdr(skb); 1071 ethhdr = eth_hdr(skb);
@@ -1035,13 +1075,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1035 * IP information 1075 * IP information
1036 */ 1076 */
1037 if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) 1077 if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
1038 return 0; 1078 return false;
1039 if (arphdr->ar_pro != htons(ETH_P_IP)) 1079 if (arphdr->ar_pro != htons(ETH_P_IP))
1040 return 0; 1080 return false;
1041 if (arphdr->ar_hln != ETH_ALEN) 1081 if (arphdr->ar_hln != ETH_ALEN)
1042 return 0; 1082 return false;
1043 if (arphdr->ar_pln != 4) 1083 if (arphdr->ar_pln != 4)
1044 return 0; 1084 return false;
1045 1085
1046 hw_src = (u8 *)arphdr + sizeof(struct arphdr); 1086 hw_src = (u8 *)arphdr + sizeof(struct arphdr);
1047 hw_dst = hw_src + ETH_ALEN + 4; 1087 hw_dst = hw_src + ETH_ALEN + 4;
@@ -1051,14 +1091,18 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1051 /* check if it is a claim frame in general */ 1091 /* check if it is a claim frame in general */
1052 if (memcmp(bla_dst->magic, bla_dst_own->magic, 1092 if (memcmp(bla_dst->magic, bla_dst_own->magic,
1053 sizeof(bla_dst->magic)) != 0) 1093 sizeof(bla_dst->magic)) != 0)
1054 return 0; 1094 return false;
1055 1095
1056 /* check if there is a claim frame encapsulated deeper in (QinQ) and 1096 /* check if there is a claim frame encapsulated deeper in (QinQ) and
1057 * drop that, as this is not supported by BLA but should also not be 1097 * drop that, as this is not supported by BLA but should also not be
1058 * sent via the mesh. 1098 * sent via the mesh.
1059 */ 1099 */
1060 if (vlan_depth > 1) 1100 if (vlan_depth > 1)
1061 return 1; 1101 return true;
1102
1103 /* Let the loopdetect frames on the mesh in any case. */
1104 if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
1105 return 0;
1062 1106
1063 /* check if it is a claim frame. */ 1107 /* check if it is a claim frame. */
1064 ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, 1108 ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1070,7 +1114,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1070 hw_dst); 1114 hw_dst);
1071 1115
1072 if (ret < 2) 1116 if (ret < 2)
1073 return ret; 1117 return !!ret;
1074 1118
1075 /* become a backbone gw ourselves on this vlan if not happened yet */ 1119 /* become a backbone gw ourselves on this vlan if not happened yet */
1076 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); 1120 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
@@ -1080,30 +1124,30 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1080 case BATADV_CLAIM_TYPE_CLAIM: 1124 case BATADV_CLAIM_TYPE_CLAIM:
1081 if (batadv_handle_claim(bat_priv, primary_if, hw_src, 1125 if (batadv_handle_claim(bat_priv, primary_if, hw_src,
1082 ethhdr->h_source, vid)) 1126 ethhdr->h_source, vid))
1083 return 1; 1127 return true;
1084 break; 1128 break;
1085 case BATADV_CLAIM_TYPE_UNCLAIM: 1129 case BATADV_CLAIM_TYPE_UNCLAIM:
1086 if (batadv_handle_unclaim(bat_priv, primary_if, 1130 if (batadv_handle_unclaim(bat_priv, primary_if,
1087 ethhdr->h_source, hw_src, vid)) 1131 ethhdr->h_source, hw_src, vid))
1088 return 1; 1132 return true;
1089 break; 1133 break;
1090 1134
1091 case BATADV_CLAIM_TYPE_ANNOUNCE: 1135 case BATADV_CLAIM_TYPE_ANNOUNCE:
1092 if (batadv_handle_announce(bat_priv, hw_src, ethhdr->h_source, 1136 if (batadv_handle_announce(bat_priv, hw_src, ethhdr->h_source,
1093 vid)) 1137 vid))
1094 return 1; 1138 return true;
1095 break; 1139 break;
1096 case BATADV_CLAIM_TYPE_REQUEST: 1140 case BATADV_CLAIM_TYPE_REQUEST:
1097 if (batadv_handle_request(bat_priv, primary_if, hw_src, ethhdr, 1141 if (batadv_handle_request(bat_priv, primary_if, hw_src, ethhdr,
1098 vid)) 1142 vid))
1099 return 1; 1143 return true;
1100 break; 1144 break;
1101 } 1145 }
1102 1146
1103 batadv_dbg(BATADV_DBG_BLA, bat_priv, 1147 batadv_dbg(BATADV_DBG_BLA, bat_priv,
1104 "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", 1148 "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
1105 ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); 1149 ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
1106 return 1; 1150 return true;
1107} 1151}
1108 1152
1109/** 1153/**
@@ -1265,6 +1309,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
1265} 1309}
1266 1310
1267/** 1311/**
1312 * batadv_bla_send_loopdetect - send a loopdetect frame
1313 * @bat_priv: the bat priv with all the soft interface information
1314 * @backbone_gw: the backbone gateway for which a loop should be detected
1315 *
1316 * To detect loops that the bridge loop avoidance can't handle, send a loop
1317 * detection packet on the backbone. Unlike other BLA frames, this frame will
1318 * be allowed on the mesh by other nodes. If it is received on the mesh, this
1319 * indicates that there is a loop.
1320 */
1321static void
1322batadv_bla_send_loopdetect(struct batadv_priv *bat_priv,
1323 struct batadv_bla_backbone_gw *backbone_gw)
1324{
1325 batadv_dbg(BATADV_DBG_BLA, bat_priv, "Send loopdetect frame for vid %d\n",
1326 backbone_gw->vid);
1327 batadv_bla_send_claim(bat_priv, bat_priv->bla.loopdetect_addr,
1328 backbone_gw->vid, BATADV_CLAIM_TYPE_LOOPDETECT);
1329}
1330
1331/**
1268 * batadv_bla_status_update - purge bla interfaces if necessary 1332 * batadv_bla_status_update - purge bla interfaces if necessary
1269 * @net_dev: the soft interface net device 1333 * @net_dev: the soft interface net device
1270 */ 1334 */
@@ -1301,9 +1365,10 @@ static void batadv_bla_periodic_work(struct work_struct *work)
1301 struct batadv_bla_backbone_gw *backbone_gw; 1365 struct batadv_bla_backbone_gw *backbone_gw;
1302 struct batadv_hashtable *hash; 1366 struct batadv_hashtable *hash;
1303 struct batadv_hard_iface *primary_if; 1367 struct batadv_hard_iface *primary_if;
1368 bool send_loopdetect = false;
1304 int i; 1369 int i;
1305 1370
1306 delayed_work = container_of(work, struct delayed_work, work); 1371 delayed_work = to_delayed_work(work);
1307 priv_bla = container_of(delayed_work, struct batadv_priv_bla, work); 1372 priv_bla = container_of(delayed_work, struct batadv_priv_bla, work);
1308 bat_priv = container_of(priv_bla, struct batadv_priv, bla); 1373 bat_priv = container_of(priv_bla, struct batadv_priv, bla);
1309 primary_if = batadv_primary_if_get_selected(bat_priv); 1374 primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -1316,6 +1381,22 @@ static void batadv_bla_periodic_work(struct work_struct *work)
1316 if (!atomic_read(&bat_priv->bridge_loop_avoidance)) 1381 if (!atomic_read(&bat_priv->bridge_loop_avoidance))
1317 goto out; 1382 goto out;
1318 1383
1384 if (atomic_dec_and_test(&bat_priv->bla.loopdetect_next)) {
1385 /* set a new random mac address for the next bridge loop
1386 * detection frames. Set the locally administered bit to avoid
1387 * collisions with users mac addresses.
1388 */
1389 random_ether_addr(bat_priv->bla.loopdetect_addr);
1390 bat_priv->bla.loopdetect_addr[0] = 0xba;
1391 bat_priv->bla.loopdetect_addr[1] = 0xbe;
1392 bat_priv->bla.loopdetect_lasttime = jiffies;
1393 atomic_set(&bat_priv->bla.loopdetect_next,
1394 BATADV_BLA_LOOPDETECT_PERIODS);
1395
1396 /* mark for sending loop detect on all VLANs */
1397 send_loopdetect = true;
1398 }
1399
1319 hash = bat_priv->bla.backbone_hash; 1400 hash = bat_priv->bla.backbone_hash;
1320 if (!hash) 1401 if (!hash)
1321 goto out; 1402 goto out;
@@ -1332,6 +1413,9 @@ static void batadv_bla_periodic_work(struct work_struct *work)
1332 backbone_gw->lasttime = jiffies; 1413 backbone_gw->lasttime = jiffies;
1333 1414
1334 batadv_bla_send_announce(bat_priv, backbone_gw); 1415 batadv_bla_send_announce(bat_priv, backbone_gw);
1416 if (send_loopdetect)
1417 batadv_bla_send_loopdetect(bat_priv,
1418 backbone_gw);
1335 1419
1336 /* request_sent is only set after creation to avoid 1420 /* request_sent is only set after creation to avoid
1337 * problems when we are not yet known as backbone gw 1421 * problems when we are not yet known as backbone gw
@@ -1405,6 +1489,9 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1405 bat_priv->bla.bcast_duplist[i].entrytime = entrytime; 1489 bat_priv->bla.bcast_duplist[i].entrytime = entrytime;
1406 bat_priv->bla.bcast_duplist_curr = 0; 1490 bat_priv->bla.bcast_duplist_curr = 0;
1407 1491
1492 atomic_set(&bat_priv->bla.loopdetect_next,
1493 BATADV_BLA_LOOPDETECT_PERIODS);
1494
1408 if (bat_priv->bla.claim_hash) 1495 if (bat_priv->bla.claim_hash)
1409 return 0; 1496 return 0;
1410 1497
@@ -1442,15 +1529,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1442 * sent by another host, drop it. We allow equal packets from 1529 * sent by another host, drop it. We allow equal packets from
1443 * the same host however as this might be intended. 1530 * the same host however as this might be intended.
1444 * 1531 *
1445 * Return: 1 if a packet is in the duplicate list, 0 otherwise. 1532 * Return: true if a packet is in the duplicate list, false otherwise.
1446 */ 1533 */
1447int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, 1534bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
1448 struct sk_buff *skb) 1535 struct sk_buff *skb)
1449{ 1536{
1450 int i, curr, ret = 0; 1537 int i, curr;
1451 __be32 crc; 1538 __be32 crc;
1452 struct batadv_bcast_packet *bcast_packet; 1539 struct batadv_bcast_packet *bcast_packet;
1453 struct batadv_bcast_duplist_entry *entry; 1540 struct batadv_bcast_duplist_entry *entry;
1541 bool ret = false;
1454 1542
1455 bcast_packet = (struct batadv_bcast_packet *)skb->data; 1543 bcast_packet = (struct batadv_bcast_packet *)skb->data;
1456 1544
@@ -1478,9 +1566,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
1478 continue; 1566 continue;
1479 1567
1480 /* this entry seems to match: same crc, not too old, 1568 /* this entry seems to match: same crc, not too old,
1481 * and from another gw. therefore return 1 to forbid it. 1569 * and from another gw. therefore return true to forbid it.
1482 */ 1570 */
1483 ret = 1; 1571 ret = true;
1484 goto out; 1572 goto out;
1485 } 1573 }
1486 /* not found, add a new entry (overwrite the oldest entry) 1574 /* not found, add a new entry (overwrite the oldest entry)
@@ -1546,21 +1634,21 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1546 * @orig_node: the orig_node of the frame 1634 * @orig_node: the orig_node of the frame
1547 * @hdr_size: maximum length of the frame 1635 * @hdr_size: maximum length of the frame
1548 * 1636 *
1549 * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise 1637 * Return: true if the orig_node is also a gateway on the soft interface,
1550 * it returns 0. 1638 * otherwise it returns false.
1551 */ 1639 */
1552int batadv_bla_is_backbone_gw(struct sk_buff *skb, 1640bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
1553 struct batadv_orig_node *orig_node, int hdr_size) 1641 struct batadv_orig_node *orig_node, int hdr_size)
1554{ 1642{
1555 struct batadv_bla_backbone_gw *backbone_gw; 1643 struct batadv_bla_backbone_gw *backbone_gw;
1556 unsigned short vid; 1644 unsigned short vid;
1557 1645
1558 if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) 1646 if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
1559 return 0; 1647 return false;
1560 1648
1561 /* first, find out the vid. */ 1649 /* first, find out the vid. */
1562 if (!pskb_may_pull(skb, hdr_size + ETH_HLEN)) 1650 if (!pskb_may_pull(skb, hdr_size + ETH_HLEN))
1563 return 0; 1651 return false;
1564 1652
1565 vid = batadv_get_vid(skb, hdr_size); 1653 vid = batadv_get_vid(skb, hdr_size);
1566 1654
@@ -1568,14 +1656,14 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1568 backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv, 1656 backbone_gw = batadv_backbone_hash_find(orig_node->bat_priv,
1569 orig_node->orig, vid); 1657 orig_node->orig, vid);
1570 if (!backbone_gw) 1658 if (!backbone_gw)
1571 return 0; 1659 return false;
1572 1660
1573 batadv_backbone_gw_put(backbone_gw); 1661 batadv_backbone_gw_put(backbone_gw);
1574 return 1; 1662 return true;
1575} 1663}
1576 1664
1577/** 1665/**
1578 * batadv_bla_init - free all bla structures 1666 * batadv_bla_free - free all bla structures
1579 * @bat_priv: the bat priv with all the soft interface information 1667 * @bat_priv: the bat priv with all the soft interface information
1580 * 1668 *
1581 * for softinterface free or module unload 1669 * for softinterface free or module unload
@@ -1602,6 +1690,55 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1602} 1690}
1603 1691
1604/** 1692/**
1693 * batadv_bla_loopdetect_check - check and handle a detected loop
1694 * @bat_priv: the bat priv with all the soft interface information
1695 * @skb: the packet to check
1696 * @primary_if: interface where the request came on
1697 * @vid: the VLAN ID of the frame
1698 *
1699 * Checks if this packet is a loop detect frame which has been sent by us,
1700 * throw an uevent and log the event if that is the case.
1701 *
1702 * Return: true if it is a loop detect frame which is to be dropped, false
1703 * otherwise.
1704 */
1705static bool
1706batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
1707 struct batadv_hard_iface *primary_if,
1708 unsigned short vid)
1709{
1710 struct batadv_bla_backbone_gw *backbone_gw;
1711 struct ethhdr *ethhdr;
1712
1713 ethhdr = eth_hdr(skb);
1714
1715 /* Only check for the MAC address and skip more checks here for
1716 * performance reasons - this function is on the hotpath, after all.
1717 */
1718 if (!batadv_compare_eth(ethhdr->h_source,
1719 bat_priv->bla.loopdetect_addr))
1720 return false;
1721
1722 /* If the packet came too late, don't forward it on the mesh
1723 * but don't consider that as loop. It might be a coincidence.
1724 */
1725 if (batadv_has_timed_out(bat_priv->bla.loopdetect_lasttime,
1726 BATADV_BLA_LOOPDETECT_TIMEOUT))
1727 return true;
1728
1729 backbone_gw = batadv_bla_get_backbone_gw(bat_priv,
1730 primary_if->net_dev->dev_addr,
1731 vid, true);
1732 if (unlikely(!backbone_gw))
1733 return true;
1734
1735 queue_work(batadv_event_workqueue, &backbone_gw->report_work);
1736 /* backbone_gw is unreferenced in the report work function function */
1737
1738 return true;
1739}
1740
1741/**
1605 * batadv_bla_rx - check packets coming from the mesh. 1742 * batadv_bla_rx - check packets coming from the mesh.
1606 * @bat_priv: the bat priv with all the soft interface information 1743 * @bat_priv: the bat priv with all the soft interface information
1607 * @skb: the frame to be checked 1744 * @skb: the frame to be checked
@@ -1614,16 +1751,16 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1614 * 1751 *
1615 * in these cases, the skb is further handled by this function 1752 * in these cases, the skb is further handled by this function
1616 * 1753 *
1617 * Return: 1 if handled, otherwise it returns 0 and the caller shall further 1754 * Return: true if handled, otherwise it returns false and the caller shall
1618 * process the skb. 1755 * further process the skb.
1619 */ 1756 */
1620int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1757bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1621 unsigned short vid, bool is_bcast) 1758 unsigned short vid, bool is_bcast)
1622{ 1759{
1623 struct ethhdr *ethhdr; 1760 struct ethhdr *ethhdr;
1624 struct batadv_bla_claim search_claim, *claim = NULL; 1761 struct batadv_bla_claim search_claim, *claim = NULL;
1625 struct batadv_hard_iface *primary_if; 1762 struct batadv_hard_iface *primary_if;
1626 int ret; 1763 bool ret;
1627 1764
1628 ethhdr = eth_hdr(skb); 1765 ethhdr = eth_hdr(skb);
1629 1766
@@ -1634,6 +1771,9 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1634 if (!atomic_read(&bat_priv->bridge_loop_avoidance)) 1771 if (!atomic_read(&bat_priv->bridge_loop_avoidance))
1635 goto allow; 1772 goto allow;
1636 1773
1774 if (batadv_bla_loopdetect_check(bat_priv, skb, primary_if, vid))
1775 goto handled;
1776
1637 if (unlikely(atomic_read(&bat_priv->bla.num_requests))) 1777 if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
1638 /* don't allow broadcasts while requests are in flight */ 1778 /* don't allow broadcasts while requests are in flight */
1639 if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) 1779 if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
@@ -1682,12 +1822,12 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1682 } 1822 }
1683allow: 1823allow:
1684 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); 1824 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
1685 ret = 0; 1825 ret = false;
1686 goto out; 1826 goto out;
1687 1827
1688handled: 1828handled:
1689 kfree_skb(skb); 1829 kfree_skb(skb);
1690 ret = 1; 1830 ret = true;
1691 1831
1692out: 1832out:
1693 if (primary_if) 1833 if (primary_if)
@@ -1711,16 +1851,16 @@ out:
1711 * 1851 *
1712 * This call might reallocate skb data. 1852 * This call might reallocate skb data.
1713 * 1853 *
1714 * Return: 1 if handled, otherwise it returns 0 and the caller shall further 1854 * Return: true if handled, otherwise it returns false and the caller shall
1715 * process the skb. 1855 * further process the skb.
1716 */ 1856 */
1717int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1857bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1718 unsigned short vid) 1858 unsigned short vid)
1719{ 1859{
1720 struct ethhdr *ethhdr; 1860 struct ethhdr *ethhdr;
1721 struct batadv_bla_claim search_claim, *claim = NULL; 1861 struct batadv_bla_claim search_claim, *claim = NULL;
1722 struct batadv_hard_iface *primary_if; 1862 struct batadv_hard_iface *primary_if;
1723 int ret = 0; 1863 bool ret = false;
1724 1864
1725 primary_if = batadv_primary_if_get_selected(bat_priv); 1865 primary_if = batadv_primary_if_get_selected(bat_priv);
1726 if (!primary_if) 1866 if (!primary_if)
@@ -1774,10 +1914,10 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1774 } 1914 }
1775allow: 1915allow:
1776 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid); 1916 batadv_bla_update_own_backbone_gw(bat_priv, primary_if, vid);
1777 ret = 0; 1917 ret = false;
1778 goto out; 1918 goto out;
1779handled: 1919handled:
1780 ret = 1; 1920 ret = true;
1781out: 1921out:
1782 if (primary_if) 1922 if (primary_if)
1783 batadv_hardif_put(primary_if); 1923 batadv_hardif_put(primary_if);
@@ -1815,8 +1955,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1815 "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n", 1955 "Claims announced for the mesh %s (orig %pM, group id %#.4x)\n",
1816 net_dev->name, primary_addr, 1956 net_dev->name, primary_addr,
1817 ntohs(bat_priv->bla.claim_dest.group)); 1957 ntohs(bat_priv->bla.claim_dest.group));
1818 seq_printf(seq, " %-17s %-5s %-17s [o] (%-6s)\n", 1958 seq_puts(seq,
1819 "Client", "VID", "Originator", "CRC"); 1959 " Client VID Originator [o] (CRC )\n");
1820 for (i = 0; i < hash->size; i++) { 1960 for (i = 0; i < hash->size; i++) {
1821 head = &hash->table[i]; 1961 head = &hash->table[i];
1822 1962
@@ -1873,8 +2013,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1873 "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n", 2013 "Backbones announced for the mesh %s (orig %pM, group id %#.4x)\n",
1874 net_dev->name, primary_addr, 2014 net_dev->name, primary_addr,
1875 ntohs(bat_priv->bla.claim_dest.group)); 2015 ntohs(bat_priv->bla.claim_dest.group));
1876 seq_printf(seq, " %-17s %-5s %-9s (%-6s)\n", 2016 seq_puts(seq, " Originator VID last seen (CRC )\n");
1877 "Originator", "VID", "last seen", "CRC");
1878 for (i = 0; i < hash->size; i++) { 2017 for (i = 0; i < hash->size; i++) {
1879 head = &hash->table[i]; 2018 head = &hash->table[i];
1880 2019
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 579f0fa6fe6a..0f01daeb359e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -27,19 +27,20 @@ struct seq_file;
27struct sk_buff; 27struct sk_buff;
28 28
29#ifdef CONFIG_BATMAN_ADV_BLA 29#ifdef CONFIG_BATMAN_ADV_BLA
30int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, 30bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
31 unsigned short vid, bool is_bcast); 31 unsigned short vid, bool is_bcast);
32int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, 32bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
33 unsigned short vid); 33 unsigned short vid);
34int batadv_bla_is_backbone_gw(struct sk_buff *skb, 34bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
35 struct batadv_orig_node *orig_node, int hdr_size); 35 struct batadv_orig_node *orig_node,
36 int hdr_size);
36int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); 37int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
37int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, 38int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
38 void *offset); 39 void *offset);
39bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, 40bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
40 unsigned short vid); 41 unsigned short vid);
41int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, 42bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
42 struct sk_buff *skb); 43 struct sk_buff *skb);
43void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, 44void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
44 struct batadv_hard_iface *primary_if, 45 struct batadv_hard_iface *primary_if,
45 struct batadv_hard_iface *oldif); 46 struct batadv_hard_iface *oldif);
@@ -50,24 +51,24 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
50#define BATADV_BLA_CRC_INIT 0 51#define BATADV_BLA_CRC_INIT 0
51#else /* ifdef CONFIG_BATMAN_ADV_BLA */ 52#else /* ifdef CONFIG_BATMAN_ADV_BLA */
52 53
53static inline int batadv_bla_rx(struct batadv_priv *bat_priv, 54static inline bool batadv_bla_rx(struct batadv_priv *bat_priv,
54 struct sk_buff *skb, unsigned short vid, 55 struct sk_buff *skb, unsigned short vid,
55 bool is_bcast) 56 bool is_bcast)
56{ 57{
57 return 0; 58 return false;
58} 59}
59 60
60static inline int batadv_bla_tx(struct batadv_priv *bat_priv, 61static inline bool batadv_bla_tx(struct batadv_priv *bat_priv,
61 struct sk_buff *skb, unsigned short vid) 62 struct sk_buff *skb, unsigned short vid)
62{ 63{
63 return 0; 64 return false;
64} 65}
65 66
66static inline int batadv_bla_is_backbone_gw(struct sk_buff *skb, 67static inline bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
67 struct batadv_orig_node *orig_node, 68 struct batadv_orig_node *orig_node,
68 int hdr_size) 69 int hdr_size)
69{ 70{
70 return 0; 71 return false;
71} 72}
72 73
73static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, 74static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq,
@@ -88,11 +89,11 @@ static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
88 return false; 89 return false;
89} 90}
90 91
91static inline int 92static inline bool
92batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, 93batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
93 struct sk_buff *skb) 94 struct sk_buff *skb)
94{ 95{
95 return 0; 96 return false;
96} 97}
97 98
98static inline void 99static inline void
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 48253cf8341b..952900466d88 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -134,7 +134,7 @@ static int batadv_log_release(struct inode *inode, struct file *file)
134 return 0; 134 return 0;
135} 135}
136 136
137static int batadv_log_empty(struct batadv_priv_debug_log *debug_log) 137static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
138{ 138{
139 return !(debug_log->log_start - debug_log->log_end); 139 return !(debug_log->log_start - debug_log->log_end);
140} 140}
@@ -365,14 +365,17 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
365 365
366#define BATADV_DEBUGINFO(_name, _mode, _open) \ 366#define BATADV_DEBUGINFO(_name, _mode, _open) \
367struct batadv_debuginfo batadv_debuginfo_##_name = { \ 367struct batadv_debuginfo batadv_debuginfo_##_name = { \
368 .attr = { .name = __stringify(_name), \ 368 .attr = { \
369 .mode = _mode, }, \ 369 .name = __stringify(_name), \
370 .fops = { .owner = THIS_MODULE, \ 370 .mode = _mode, \
371 .open = _open, \ 371 }, \
372 .read = seq_read, \ 372 .fops = { \
373 .llseek = seq_lseek, \ 373 .owner = THIS_MODULE, \
374 .release = single_release, \ 374 .open = _open, \
375 } \ 375 .read = seq_read, \
376 .llseek = seq_lseek, \
377 .release = single_release, \
378 }, \
376} 379}
377 380
378/* the following attributes are general and therefore they will be directly 381/* the following attributes are general and therefore they will be directly
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index e96d7c745b4a..278800a99c69 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -152,7 +152,7 @@ static void batadv_dat_purge(struct work_struct *work)
152 struct batadv_priv_dat *priv_dat; 152 struct batadv_priv_dat *priv_dat;
153 struct batadv_priv *bat_priv; 153 struct batadv_priv *bat_priv;
154 154
155 delayed_work = container_of(work, struct delayed_work, work); 155 delayed_work = to_delayed_work(work);
156 priv_dat = container_of(delayed_work, struct batadv_priv_dat, work); 156 priv_dat = container_of(delayed_work, struct batadv_priv_dat, work);
157 bat_priv = container_of(priv_dat, struct batadv_priv, dat); 157 bat_priv = container_of(priv_dat, struct batadv_priv, dat);
158 158
@@ -165,14 +165,14 @@ static void batadv_dat_purge(struct work_struct *work)
165 * @node: node in the local table 165 * @node: node in the local table
166 * @data2: second object to compare the node to 166 * @data2: second object to compare the node to
167 * 167 *
168 * Return: 1 if the two entries are the same, 0 otherwise. 168 * Return: true if the two entries are the same, false otherwise.
169 */ 169 */
170static int batadv_compare_dat(const struct hlist_node *node, const void *data2) 170static bool batadv_compare_dat(const struct hlist_node *node, const void *data2)
171{ 171{
172 const void *data1 = container_of(node, struct batadv_dat_entry, 172 const void *data1 = container_of(node, struct batadv_dat_entry,
173 hash_entry); 173 hash_entry);
174 174
175 return memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0; 175 return memcmp(data1, data2, sizeof(__be32)) == 0;
176} 176}
177 177
178/** 178/**
@@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
568 * be sent to 568 * be sent to
569 * @bat_priv: the bat priv with all the soft interface information 569 * @bat_priv: the bat priv with all the soft interface information
570 * @ip_dst: ipv4 to look up in the DHT 570 * @ip_dst: ipv4 to look up in the DHT
571 * @vid: VLAN identifier
571 * 572 *
572 * An originator O is selected if and only if its DHT_ID value is one of three 573 * An originator O is selected if and only if its DHT_ID value is one of three
573 * closest values (from the LEFT, with wrap around if needed) then the hash 574 * closest values (from the LEFT, with wrap around if needed) then the hash
@@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
576 * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM. 577 * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
577 */ 578 */
578static struct batadv_dat_candidate * 579static struct batadv_dat_candidate *
579batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) 580batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
581 unsigned short vid)
580{ 582{
581 int select; 583 int select;
582 batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; 584 batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
592 return NULL; 594 return NULL;
593 595
594 dat.ip = ip_dst; 596 dat.ip = ip_dst;
595 dat.vid = 0; 597 dat.vid = vid;
596 ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, 598 ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
597 BATADV_DAT_ADDR_MAX); 599 BATADV_DAT_ADDR_MAX);
598 600
@@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
612 * @bat_priv: the bat priv with all the soft interface information 614 * @bat_priv: the bat priv with all the soft interface information
613 * @skb: payload to send 615 * @skb: payload to send
614 * @ip: the DHT key 616 * @ip: the DHT key
617 * @vid: VLAN identifier
615 * @packet_subtype: unicast4addr packet subtype to use 618 * @packet_subtype: unicast4addr packet subtype to use
616 * 619 *
617 * This function copies the skb with pskb_copy() and is sent as unicast packet 620 * This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
622 */ 625 */
623static bool batadv_dat_send_data(struct batadv_priv *bat_priv, 626static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
624 struct sk_buff *skb, __be32 ip, 627 struct sk_buff *skb, __be32 ip,
625 int packet_subtype) 628 unsigned short vid, int packet_subtype)
626{ 629{
627 int i; 630 int i;
628 bool ret = false; 631 bool ret = false;
@@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
631 struct sk_buff *tmp_skb; 634 struct sk_buff *tmp_skb;
632 struct batadv_dat_candidate *cand; 635 struct batadv_dat_candidate *cand;
633 636
634 cand = batadv_dat_select_candidates(bat_priv, ip); 637 cand = batadv_dat_select_candidates(bat_priv, ip, vid);
635 if (!cand) 638 if (!cand)
636 goto out; 639 goto out;
637 640
@@ -717,7 +720,7 @@ void batadv_dat_status_update(struct net_device *net_dev)
717} 720}
718 721
719/** 722/**
720 * batadv_gw_tvlv_ogm_handler_v1 - process incoming dat tvlv container 723 * batadv_dat_tvlv_ogm_handler_v1 - process incoming dat tvlv container
721 * @bat_priv: the bat priv with all the soft interface information 724 * @bat_priv: the bat priv with all the soft interface information
722 * @orig: the orig_node of the ogm 725 * @orig: the orig_node of the ogm
723 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) 726 * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
@@ -814,8 +817,8 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
814 goto out; 817 goto out;
815 818
816 seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); 819 seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name);
817 seq_printf(seq, " %-7s %-9s %4s %11s\n", "IPv4", 820 seq_puts(seq,
818 "MAC", "VID", "last-seen"); 821 " IPv4 MAC VID last-seen\n");
819 822
820 for (i = 0; i < hash->size; i++) { 823 for (i = 0; i < hash->size; i++) {
821 head = &hash->table[i]; 824 head = &hash->table[i];
@@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
1022 ret = true; 1025 ret = true;
1023 } else { 1026 } else {
1024 /* Send the request to the DHT */ 1027 /* Send the request to the DHT */
1025 ret = batadv_dat_send_data(bat_priv, skb, ip_dst, 1028 ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
1026 BATADV_P_DAT_DHT_GET); 1029 BATADV_P_DAT_DHT_GET);
1027 } 1030 }
1028out: 1031out:
@@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1150 /* Send the ARP reply to the candidates for both the IP addresses that 1153 /* Send the ARP reply to the candidates for both the IP addresses that
1151 * the node obtained from the ARP reply 1154 * the node obtained from the ARP reply
1152 */ 1155 */
1153 batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); 1156 batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
1154 batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); 1157 batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
1155} 1158}
1156 1159
1157/** 1160/**
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index e6956d0746a2..65536db1bff7 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -407,8 +407,8 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
407 unsigned int mtu) 407 unsigned int mtu)
408{ 408{
409 struct sk_buff *skb_fragment; 409 struct sk_buff *skb_fragment;
410 unsigned header_size = sizeof(*frag_head); 410 unsigned int header_size = sizeof(*frag_head);
411 unsigned fragment_size = mtu - header_size; 411 unsigned int fragment_size = mtu - header_size;
412 412
413 skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); 413 skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
414 if (!skb_fragment) 414 if (!skb_fragment)
@@ -444,15 +444,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
444 struct batadv_hard_iface *primary_if = NULL; 444 struct batadv_hard_iface *primary_if = NULL;
445 struct batadv_frag_packet frag_header; 445 struct batadv_frag_packet frag_header;
446 struct sk_buff *skb_fragment; 446 struct sk_buff *skb_fragment;
447 unsigned mtu = neigh_node->if_incoming->net_dev->mtu; 447 unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
448 unsigned header_size = sizeof(frag_header); 448 unsigned int header_size = sizeof(frag_header);
449 unsigned max_fragment_size, max_packet_size; 449 unsigned int max_fragment_size, max_packet_size;
450 bool ret = false; 450 bool ret = false;
451 451
452 /* To avoid merge and refragmentation at next-hops we never send 452 /* To avoid merge and refragmentation at next-hops we never send
453 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE 453 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
454 */ 454 */
455 mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); 455 mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
456 max_fragment_size = mtu - header_size; 456 max_fragment_size = mtu - header_size;
457 max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; 457 max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
458 458
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index c59aff5ccac8..5839c569f769 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -135,8 +135,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
135 135
136 spin_lock_bh(&bat_priv->gw.list_lock); 136 spin_lock_bh(&bat_priv->gw.list_lock);
137 137
138 if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount)) 138 if (new_gw_node)
139 new_gw_node = NULL; 139 kref_get(&new_gw_node->refcount);
140 140
141 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); 141 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
142 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); 142 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
@@ -440,15 +440,11 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
440 if (gateway->bandwidth_down == 0) 440 if (gateway->bandwidth_down == 0)
441 return; 441 return;
442 442
443 if (!kref_get_unless_zero(&orig_node->refcount))
444 return;
445
446 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); 443 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
447 if (!gw_node) { 444 if (!gw_node)
448 batadv_orig_node_put(orig_node);
449 return; 445 return;
450 }
451 446
447 kref_get(&orig_node->refcount);
452 INIT_HLIST_NODE(&gw_node->list); 448 INIT_HLIST_NODE(&gw_node->list);
453 gw_node->orig_node = orig_node; 449 gw_node->orig_node = orig_node;
454 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); 450 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b22b2775a0a5..8c2f39962fa5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -36,7 +36,6 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/workqueue.h> 38#include <linux/workqueue.h>
39#include <net/net_namespace.h>
40 39
41#include "bridge_loop_avoidance.h" 40#include "bridge_loop_avoidance.h"
42#include "debugfs.h" 41#include "debugfs.h"
@@ -121,6 +120,7 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
121static bool batadv_is_on_batman_iface(const struct net_device *net_dev) 120static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
122{ 121{
123 struct net_device *parent_dev; 122 struct net_device *parent_dev;
123 struct net *net = dev_net(net_dev);
124 bool ret; 124 bool ret;
125 125
126 /* check if this is a batman-adv mesh interface */ 126 /* check if this is a batman-adv mesh interface */
@@ -133,7 +133,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
133 return false; 133 return false;
134 134
135 /* recurse over the parent device */ 135 /* recurse over the parent device */
136 parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); 136 parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
137 /* if we got a NULL parent_dev there is something broken.. */ 137 /* if we got a NULL parent_dev there is something broken.. */
138 if (WARN(!parent_dev, "Cannot find parent device")) 138 if (WARN(!parent_dev, "Cannot find parent device"))
139 return false; 139 return false;
@@ -146,22 +146,22 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
146 return ret; 146 return ret;
147} 147}
148 148
149static int batadv_is_valid_iface(const struct net_device *net_dev) 149static bool batadv_is_valid_iface(const struct net_device *net_dev)
150{ 150{
151 if (net_dev->flags & IFF_LOOPBACK) 151 if (net_dev->flags & IFF_LOOPBACK)
152 return 0; 152 return false;
153 153
154 if (net_dev->type != ARPHRD_ETHER) 154 if (net_dev->type != ARPHRD_ETHER)
155 return 0; 155 return false;
156 156
157 if (net_dev->addr_len != ETH_ALEN) 157 if (net_dev->addr_len != ETH_ALEN)
158 return 0; 158 return false;
159 159
160 /* no batman over batman */ 160 /* no batman over batman */
161 if (batadv_is_on_batman_iface(net_dev)) 161 if (batadv_is_on_batman_iface(net_dev))
162 return 0; 162 return false;
163 163
164 return 1; 164 return true;
165} 165}
166 166
167/** 167/**
@@ -236,8 +236,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
236 236
237 ASSERT_RTNL(); 237 ASSERT_RTNL();
238 238
239 if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount)) 239 if (new_hard_iface)
240 new_hard_iface = NULL; 240 kref_get(&new_hard_iface->refcount);
241 241
242 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); 242 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
243 rcu_assign_pointer(bat_priv->primary_if, new_hard_iface); 243 rcu_assign_pointer(bat_priv->primary_if, new_hard_iface);
@@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
407 407
408 batadv_update_min_mtu(hard_iface->soft_iface); 408 batadv_update_min_mtu(hard_iface->soft_iface);
409 409
410 if (bat_priv->bat_algo_ops->bat_iface_activate)
411 bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
412
410out: 413out:
411 if (primary_if) 414 if (primary_if)
412 batadv_hardif_put(primary_if); 415 batadv_hardif_put(primary_if);
@@ -453,7 +456,7 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave,
453} 456}
454 457
455int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 458int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
456 const char *iface_name) 459 struct net *net, const char *iface_name)
457{ 460{
458 struct batadv_priv *bat_priv; 461 struct batadv_priv *bat_priv;
459 struct net_device *soft_iface, *master; 462 struct net_device *soft_iface, *master;
@@ -464,13 +467,12 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
464 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 467 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
465 goto out; 468 goto out;
466 469
467 if (!kref_get_unless_zero(&hard_iface->refcount)) 470 kref_get(&hard_iface->refcount);
468 goto out;
469 471
470 soft_iface = dev_get_by_name(&init_net, iface_name); 472 soft_iface = dev_get_by_name(net, iface_name);
471 473
472 if (!soft_iface) { 474 if (!soft_iface) {
473 soft_iface = batadv_softif_create(iface_name); 475 soft_iface = batadv_softif_create(net, iface_name);
474 476
475 if (!soft_iface) { 477 if (!soft_iface) {
476 ret = -ENOMEM; 478 ret = -ENOMEM;
@@ -519,6 +521,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
519 goto err_upper; 521 goto err_upper;
520 } 522 }
521 523
524 kref_get(&hard_iface->refcount);
522 hard_iface->batman_adv_ptype.type = ethertype; 525 hard_iface->batman_adv_ptype.type = ethertype;
523 hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; 526 hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv;
524 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; 527 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
@@ -572,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
572 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 575 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
573 struct batadv_hard_iface *primary_if = NULL; 576 struct batadv_hard_iface *primary_if = NULL;
574 577
575 if (hard_iface->if_status == BATADV_IF_ACTIVE) 578 batadv_hardif_deactivate_interface(hard_iface);
576 batadv_hardif_deactivate_interface(hard_iface);
577 579
578 if (hard_iface->if_status != BATADV_IF_INACTIVE) 580 if (hard_iface->if_status != BATADV_IF_INACTIVE)
579 goto out; 581 goto out;
@@ -581,6 +583,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
581 batadv_info(hard_iface->soft_iface, "Removing interface: %s\n", 583 batadv_info(hard_iface->soft_iface, "Removing interface: %s\n",
582 hard_iface->net_dev->name); 584 hard_iface->net_dev->name);
583 dev_remove_pack(&hard_iface->batman_adv_ptype); 585 dev_remove_pack(&hard_iface->batman_adv_ptype);
586 batadv_hardif_put(hard_iface);
584 587
585 bat_priv->num_ifaces--; 588 bat_priv->num_ifaces--;
586 batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces); 589 batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
@@ -650,8 +653,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
650 653
651 ASSERT_RTNL(); 654 ASSERT_RTNL();
652 655
653 ret = batadv_is_valid_iface(net_dev); 656 if (!batadv_is_valid_iface(net_dev))
654 if (ret != 1)
655 goto out; 657 goto out;
656 658
657 dev_hold(net_dev); 659 dev_hold(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index d74f1983f33e..a76724d369bf 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -28,6 +28,7 @@
28#include <linux/types.h> 28#include <linux/types.h>
29 29
30struct net_device; 30struct net_device;
31struct net;
31 32
32enum batadv_hard_if_state { 33enum batadv_hard_if_state {
33 BATADV_IF_NOT_IN_USE, 34 BATADV_IF_NOT_IN_USE,
@@ -55,7 +56,7 @@ bool batadv_is_wifi_iface(int ifindex);
55struct batadv_hard_iface* 56struct batadv_hard_iface*
56batadv_hardif_get_by_netdev(const struct net_device *net_dev); 57batadv_hardif_get_by_netdev(const struct net_device *net_dev);
57int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 58int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
58 const char *iface_name); 59 struct net *net, const char *iface_name);
59void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, 60void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
60 enum batadv_hard_if_cleanup autodel); 61 enum batadv_hard_if_cleanup autodel);
61void batadv_hardif_remove_interfaces(void); 62void batadv_hardif_remove_interfaces(void);
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 9bb57b87447c..cbbf87075f06 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -32,10 +32,10 @@ struct lock_class_key;
32/* callback to a compare function. should compare 2 element datas for their 32/* callback to a compare function. should compare 2 element datas for their
33 * keys 33 * keys
34 * 34 *
35 * Return: 0 if same and not 0 if not same 35 * Return: true if same and false if not same
36 */ 36 */
37typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, 37typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *,
38 const void *); 38 const void *);
39 39
40/* the hashfunction 40/* the hashfunction
41 * 41 *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 14d0013b387e..777aea10cd8f 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -104,25 +104,21 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
104 104
105static int batadv_socket_release(struct inode *inode, struct file *file) 105static int batadv_socket_release(struct inode *inode, struct file *file)
106{ 106{
107 struct batadv_socket_client *socket_client = file->private_data; 107 struct batadv_socket_client *client = file->private_data;
108 struct batadv_socket_packet *socket_packet; 108 struct batadv_socket_packet *packet, *tmp;
109 struct list_head *list_pos, *list_pos_tmp;
110 109
111 spin_lock_bh(&socket_client->lock); 110 spin_lock_bh(&client->lock);
112 111
113 /* for all packets in the queue ... */ 112 /* for all packets in the queue ... */
114 list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) { 113 list_for_each_entry_safe(packet, tmp, &client->queue_list, list) {
115 socket_packet = list_entry(list_pos, 114 list_del(&packet->list);
116 struct batadv_socket_packet, list); 115 kfree(packet);
117
118 list_del(list_pos);
119 kfree(socket_packet);
120 } 116 }
121 117
122 batadv_socket_client_hash[socket_client->index] = NULL; 118 batadv_socket_client_hash[client->index] = NULL;
123 spin_unlock_bh(&socket_client->lock); 119 spin_unlock_bh(&client->lock);
124 120
125 kfree(socket_client); 121 kfree(client);
126 module_put(THIS_MODULE); 122 module_put(THIS_MODULE);
127 123
128 return 0; 124 return 0;
@@ -337,7 +333,7 @@ err:
337} 333}
338 334
339/** 335/**
340 * batadv_socket_receive_packet - schedule an icmp packet to be sent to 336 * batadv_socket_add_packet - schedule an icmp packet to be sent to
341 * userspace on an icmp socket. 337 * userspace on an icmp socket.
342 * @socket_client: the socket this packet belongs to 338 * @socket_client: the socket this packet belongs to
343 * @icmph: pointer to the header of the icmp packet 339 * @icmph: pointer to the header of the icmp packet
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d64ddb961979..5f2974bd1227 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -401,11 +401,19 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
401 401
402 hard_iface = container_of(ptype, struct batadv_hard_iface, 402 hard_iface = container_of(ptype, struct batadv_hard_iface,
403 batman_adv_ptype); 403 batman_adv_ptype);
404
405 /* Prevent processing a packet received on an interface which is getting
406 * shut down otherwise the packet may trigger de-reference errors
407 * further down in the receive path.
408 */
409 if (!kref_get_unless_zero(&hard_iface->refcount))
410 goto err_out;
411
404 skb = skb_share_check(skb, GFP_ATOMIC); 412 skb = skb_share_check(skb, GFP_ATOMIC);
405 413
406 /* skb was released by skb_share_check() */ 414 /* skb was released by skb_share_check() */
407 if (!skb) 415 if (!skb)
408 goto err_out; 416 goto err_put;
409 417
410 /* packet should hold at least type and version */ 418 /* packet should hold at least type and version */
411 if (unlikely(!pskb_may_pull(skb, 2))) 419 if (unlikely(!pskb_may_pull(skb, 2)))
@@ -448,6 +456,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
448 if (ret == NET_RX_DROP) 456 if (ret == NET_RX_DROP)
449 kfree_skb(skb); 457 kfree_skb(skb);
450 458
459 batadv_hardif_put(hard_iface);
460
451 /* return NET_RX_SUCCESS in any case as we 461 /* return NET_RX_SUCCESS in any case as we
452 * most probably dropped the packet for 462 * most probably dropped the packet for
453 * routing-logical reasons. 463 * routing-logical reasons.
@@ -456,6 +466,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
456 466
457err_free: 467err_free:
458 kfree_skb(skb); 468 kfree_skb(skb);
469err_put:
470 batadv_hardif_put(hard_iface);
459err_out: 471err_out:
460 return NET_RX_DROP; 472 return NET_RX_DROP;
461} 473}
@@ -663,8 +675,8 @@ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
663 * 675 *
664 * Return: tvlv handler if found or NULL otherwise. 676 * Return: tvlv handler if found or NULL otherwise.
665 */ 677 */
666static struct batadv_tvlv_handler 678static struct batadv_tvlv_handler *
667*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) 679batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
668{ 680{
669 struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; 681 struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
670 682
@@ -722,8 +734,8 @@ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
722 * 734 *
723 * Return: tvlv container if found or NULL otherwise. 735 * Return: tvlv container if found or NULL otherwise.
724 */ 736 */
725static struct batadv_tvlv_container 737static struct batadv_tvlv_container *
726*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) 738batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
727{ 739{
728 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; 740 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
729 741
@@ -736,9 +748,7 @@ static struct batadv_tvlv_container
736 if (tvlv_tmp->tvlv_hdr.version != version) 748 if (tvlv_tmp->tvlv_hdr.version != version)
737 continue; 749 continue;
738 750
739 if (!kref_get_unless_zero(&tvlv_tmp->refcount)) 751 kref_get(&tvlv_tmp->refcount);
740 continue;
741
742 tvlv = tvlv_tmp; 752 tvlv = tvlv_tmp;
743 break; 753 break;
744 } 754 }
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index db4533631834..76925266deed 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2016.1" 27#define BATADV_SOURCE_VERSION "2016.2"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
@@ -120,6 +120,8 @@
120#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6) 120#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6)
121#define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10) 121#define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10)
122#define BATADV_BLA_WAIT_PERIODS 3 122#define BATADV_BLA_WAIT_PERIODS 3
123#define BATADV_BLA_LOOPDETECT_PERIODS 6
124#define BATADV_BLA_LOOPDETECT_TIMEOUT 3000 /* 3 seconds */
123 125
124#define BATADV_DUPLIST_SIZE 16 126#define BATADV_DUPLIST_SIZE 16
125#define BATADV_DUPLIST_TIMEOUT 500 /* 500 ms */ 127#define BATADV_DUPLIST_TIMEOUT 500 /* 500 ms */
@@ -142,10 +144,12 @@ enum batadv_uev_action {
142 BATADV_UEV_ADD = 0, 144 BATADV_UEV_ADD = 0,
143 BATADV_UEV_DEL, 145 BATADV_UEV_DEL,
144 BATADV_UEV_CHANGE, 146 BATADV_UEV_CHANGE,
147 BATADV_UEV_LOOPDETECT,
145}; 148};
146 149
147enum batadv_uev_type { 150enum batadv_uev_type {
148 BATADV_UEV_GW = 0, 151 BATADV_UEV_GW = 0,
152 BATADV_UEV_BLA,
149}; 153};
150 154
151#define BATADV_GW_THRESHOLD 50 155#define BATADV_GW_THRESHOLD 50
@@ -288,7 +292,7 @@ static inline void _batadv_dbg(int type __always_unused,
288 * 292 *
289 * note: can't use ether_addr_equal() as it requires aligned memory 293 * note: can't use ether_addr_equal() as it requires aligned memory
290 * 294 *
291 * Return: 1 if they are the same ethernet addr 295 * Return: true if they are the same ethernet addr
292 */ 296 */
293static inline bool batadv_compare_eth(const void *data1, const void *data2) 297static inline bool batadv_compare_eth(const void *data1, const void *data2)
294{ 298{
@@ -296,7 +300,8 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
296} 300}
297 301
298/** 302/**
299 * has_timed_out - compares current time (jiffies) and timestamp + timeout 303 * batadv_has_timed_out - compares current time (jiffies) and timestamp +
304 * timeout
300 * @timestamp: base value to compare with (in jiffies) 305 * @timestamp: base value to compare with (in jiffies)
301 * @timeout: added to base value before comparing (in milliseconds) 306 * @timeout: added to base value before comparing (in milliseconds)
302 * 307 *
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 8caa2c72efa3..c32f24fafe67 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -394,7 +394,8 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
394} 394}
395 395
396/** 396/**
397 * batadv_mcast_want_all_ip_count - count nodes with unspecific mcast interest 397 * batadv_mcast_forw_want_all_ip_count - count nodes with unspecific mcast
398 * interest
398 * @bat_priv: the bat priv with all the soft interface information 399 * @bat_priv: the bat priv with all the soft interface information
399 * @ethhdr: ethernet header of a packet 400 * @ethhdr: ethernet header of a packet
400 * 401 *
@@ -433,7 +434,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
433} 434}
434 435
435/** 436/**
436 * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag 437 * batadv_mcast_forw_ipv4_node_get - get a node with an ipv4 flag
437 * @bat_priv: the bat priv with all the soft interface information 438 * @bat_priv: the bat priv with all the soft interface information
438 * 439 *
439 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and 440 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
@@ -460,7 +461,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
460} 461}
461 462
462/** 463/**
463 * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag 464 * batadv_mcast_forw_ipv6_node_get - get a node with an ipv6 flag
464 * @bat_priv: the bat priv with all the soft interface information 465 * @bat_priv: the bat priv with all the soft interface information
465 * 466 *
466 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set 467 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
@@ -487,7 +488,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
487} 488}
488 489
489/** 490/**
490 * batadv_mcast_want_forw_ip_node_get - get a node with an ipv4/ipv6 flag 491 * batadv_mcast_forw_ip_node_get - get a node with an ipv4/ipv6 flag
491 * @bat_priv: the bat priv with all the soft interface information 492 * @bat_priv: the bat priv with all the soft interface information
492 * @ethhdr: an ethernet header to determine the protocol family from 493 * @ethhdr: an ethernet header to determine the protocol family from
493 * 494 *
@@ -511,7 +512,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
511} 512}
512 513
513/** 514/**
514 * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag 515 * batadv_mcast_forw_unsnoop_node_get - get a node with an unsnoopable flag
515 * @bat_priv: the bat priv with all the soft interface information 516 * @bat_priv: the bat priv with all the soft interface information
516 * 517 *
517 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag 518 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b41719b6487a..678f06865312 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -510,10 +510,10 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
510 * @node: node in the local table 510 * @node: node in the local table
511 * @data2: second object to compare the node to 511 * @data2: second object to compare the node to
512 * 512 *
513 * Return: 1 if the two entry are the same, 0 otherwise 513 * Return: true if the two entry are the same, false otherwise
514 */ 514 */
515static int batadv_nc_hash_compare(const struct hlist_node *node, 515static bool batadv_nc_hash_compare(const struct hlist_node *node,
516 const void *data2) 516 const void *data2)
517{ 517{
518 const struct batadv_nc_path *nc_path1, *nc_path2; 518 const struct batadv_nc_path *nc_path1, *nc_path2;
519 519
@@ -521,15 +521,13 @@ static int batadv_nc_hash_compare(const struct hlist_node *node,
521 nc_path2 = data2; 521 nc_path2 = data2;
522 522
523 /* Return 1 if the two keys are identical */ 523 /* Return 1 if the two keys are identical */
524 if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, 524 if (!batadv_compare_eth(nc_path1->prev_hop, nc_path2->prev_hop))
525 sizeof(nc_path1->prev_hop)) != 0) 525 return false;
526 return 0;
527 526
528 if (memcmp(nc_path1->next_hop, nc_path2->next_hop, 527 if (!batadv_compare_eth(nc_path1->next_hop, nc_path2->next_hop))
529 sizeof(nc_path1->next_hop)) != 0) 528 return false;
530 return 0;
531 529
532 return 1; 530 return true;
533} 531}
534 532
535/** 533/**
@@ -714,7 +712,7 @@ static void batadv_nc_worker(struct work_struct *work)
714 struct batadv_priv *bat_priv; 712 struct batadv_priv *bat_priv;
715 unsigned long timeout; 713 unsigned long timeout;
716 714
717 delayed_work = container_of(work, struct delayed_work, work); 715 delayed_work = to_delayed_work(work);
718 priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); 716 priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
719 bat_priv = container_of(priv_nc, struct batadv_priv, nc); 717 bat_priv = container_of(priv_nc, struct batadv_priv, nc);
720 718
@@ -793,10 +791,10 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
793 * 791 *
794 * Return: the nc_node if found, NULL otherwise. 792 * Return: the nc_node if found, NULL otherwise.
795 */ 793 */
796static struct batadv_nc_node 794static struct batadv_nc_node *
797*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, 795batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
798 struct batadv_orig_node *orig_neigh_node, 796 struct batadv_orig_node *orig_neigh_node,
799 bool in_coding) 797 bool in_coding)
800{ 798{
801 struct batadv_nc_node *nc_node, *nc_node_out = NULL; 799 struct batadv_nc_node *nc_node, *nc_node_out = NULL;
802 struct list_head *list; 800 struct list_head *list;
@@ -835,11 +833,11 @@ static struct batadv_nc_node
835 * 833 *
836 * Return: the nc_node if found or created, NULL in case of an error. 834 * Return: the nc_node if found or created, NULL in case of an error.
837 */ 835 */
838static struct batadv_nc_node 836static struct batadv_nc_node *
839*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, 837batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
840 struct batadv_orig_node *orig_node, 838 struct batadv_orig_node *orig_node,
841 struct batadv_orig_node *orig_neigh_node, 839 struct batadv_orig_node *orig_neigh_node,
842 bool in_coding) 840 bool in_coding)
843{ 841{
844 struct batadv_nc_node *nc_node; 842 struct batadv_nc_node *nc_node;
845 spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ 843 spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
@@ -856,8 +854,7 @@ static struct batadv_nc_node
856 if (!nc_node) 854 if (!nc_node)
857 return NULL; 855 return NULL;
858 856
859 if (!kref_get_unless_zero(&orig_neigh_node->refcount)) 857 kref_get(&orig_neigh_node->refcount);
860 goto free;
861 858
862 /* Initialize nc_node */ 859 /* Initialize nc_node */
863 INIT_LIST_HEAD(&nc_node->list); 860 INIT_LIST_HEAD(&nc_node->list);
@@ -884,10 +881,6 @@ static struct batadv_nc_node
884 spin_unlock_bh(lock); 881 spin_unlock_bh(lock);
885 882
886 return nc_node; 883 return nc_node;
887
888free:
889 kfree(nc_node);
890 return NULL;
891} 884}
892 885
893/** 886/**
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index e4cbb0753e37..1ff4ee473966 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -54,9 +54,9 @@ static void batadv_purge_orig(struct work_struct *work);
54 * @node: node in the local table 54 * @node: node in the local table
55 * @data2: second object to compare the node to 55 * @data2: second object to compare the node to
56 * 56 *
57 * Return: 1 if they are the same originator 57 * Return: true if they are the same originator
58 */ 58 */
59int batadv_compare_orig(const struct hlist_node *node, const void *data2) 59bool batadv_compare_orig(const struct hlist_node *node, const void *data2)
60{ 60{
61 const void *data1 = container_of(node, struct batadv_orig_node, 61 const void *data1 = container_of(node, struct batadv_orig_node,
62 hash_entry); 62 hash_entry);
@@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref)
250{ 250{
251 struct hlist_node *node_tmp; 251 struct hlist_node *node_tmp;
252 struct batadv_neigh_node *neigh_node; 252 struct batadv_neigh_node *neigh_node;
253 struct batadv_hardif_neigh_node *hardif_neigh;
254 struct batadv_neigh_ifinfo *neigh_ifinfo; 253 struct batadv_neigh_ifinfo *neigh_ifinfo;
255 struct batadv_algo_ops *bao; 254 struct batadv_algo_ops *bao;
256 255
@@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref)
262 batadv_neigh_ifinfo_put(neigh_ifinfo); 261 batadv_neigh_ifinfo_put(neigh_ifinfo);
263 } 262 }
264 263
265 hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, 264 batadv_hardif_neigh_put(neigh_node->hardif_neigh);
266 neigh_node->addr);
267 if (hardif_neigh) {
268 /* batadv_hardif_neigh_get() increases refcount too */
269 batadv_hardif_neigh_put(hardif_neigh);
270 batadv_hardif_neigh_put(hardif_neigh);
271 }
272 265
273 if (bao->bat_neigh_free) 266 if (bao->bat_neigh_free)
274 bao->bat_neigh_free(neigh_node); 267 bao->bat_neigh_free(neigh_node);
@@ -289,7 +282,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
289} 282}
290 283
291/** 284/**
292 * batadv_orig_node_get_router - router to the originator depending on iface 285 * batadv_orig_router_get - router to the originator depending on iface
293 * @orig_node: the orig node for the router 286 * @orig_node: the orig node for the router
294 * @if_outgoing: the interface where the payload packet has been received or 287 * @if_outgoing: the interface where the payload packet has been received or
295 * the OGM should be sent to 288 * the OGM should be sent to
@@ -381,12 +374,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
381 if (!orig_ifinfo) 374 if (!orig_ifinfo)
382 goto out; 375 goto out;
383 376
384 if (if_outgoing != BATADV_IF_DEFAULT && 377 if (if_outgoing != BATADV_IF_DEFAULT)
385 !kref_get_unless_zero(&if_outgoing->refcount)) { 378 kref_get(&if_outgoing->refcount);
386 kfree(orig_ifinfo);
387 orig_ifinfo = NULL;
388 goto out;
389 }
390 379
391 reset_time = jiffies - 1; 380 reset_time = jiffies - 1;
392 reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); 381 reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS);
@@ -462,11 +451,8 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
462 if (!neigh_ifinfo) 451 if (!neigh_ifinfo)
463 goto out; 452 goto out;
464 453
465 if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) { 454 if (if_outgoing)
466 kfree(neigh_ifinfo); 455 kref_get(&if_outgoing->refcount);
467 neigh_ifinfo = NULL;
468 goto out;
469 }
470 456
471 INIT_HLIST_NODE(&neigh_ifinfo->list); 457 INIT_HLIST_NODE(&neigh_ifinfo->list);
472 kref_init(&neigh_ifinfo->refcount); 458 kref_init(&neigh_ifinfo->refcount);
@@ -539,15 +525,11 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
539 if (hardif_neigh) 525 if (hardif_neigh)
540 goto out; 526 goto out;
541 527
542 if (!kref_get_unless_zero(&hard_iface->refcount))
543 goto out;
544
545 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); 528 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
546 if (!hardif_neigh) { 529 if (!hardif_neigh)
547 batadv_hardif_put(hard_iface);
548 goto out; 530 goto out;
549 }
550 531
532 kref_get(&hard_iface->refcount);
551 INIT_HLIST_NODE(&hardif_neigh->list); 533 INIT_HLIST_NODE(&hardif_neigh->list);
552 ether_addr_copy(hardif_neigh->addr, neigh_addr); 534 ether_addr_copy(hardif_neigh->addr, neigh_addr);
553 hardif_neigh->if_incoming = hard_iface; 535 hardif_neigh->if_incoming = hard_iface;
@@ -650,19 +632,19 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
650 if (!neigh_node) 632 if (!neigh_node)
651 goto out; 633 goto out;
652 634
653 if (!kref_get_unless_zero(&hard_iface->refcount)) {
654 kfree(neigh_node);
655 neigh_node = NULL;
656 goto out;
657 }
658
659 INIT_HLIST_NODE(&neigh_node->list); 635 INIT_HLIST_NODE(&neigh_node->list);
660 INIT_HLIST_HEAD(&neigh_node->ifinfo_list); 636 INIT_HLIST_HEAD(&neigh_node->ifinfo_list);
661 spin_lock_init(&neigh_node->ifinfo_lock); 637 spin_lock_init(&neigh_node->ifinfo_lock);
662 638
639 kref_get(&hard_iface->refcount);
663 ether_addr_copy(neigh_node->addr, neigh_addr); 640 ether_addr_copy(neigh_node->addr, neigh_addr);
664 neigh_node->if_incoming = hard_iface; 641 neigh_node->if_incoming = hard_iface;
665 neigh_node->orig_node = orig_node; 642 neigh_node->orig_node = orig_node;
643 neigh_node->last_seen = jiffies;
644
645 /* increment unique neighbor refcount */
646 kref_get(&hardif_neigh->refcount);
647 neigh_node->hardif_neigh = hardif_neigh;
666 648
667 /* extra reference for return */ 649 /* extra reference for return */
668 kref_init(&neigh_node->refcount); 650 kref_init(&neigh_node->refcount);
@@ -672,9 +654,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
672 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); 654 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
673 spin_unlock_bh(&orig_node->neigh_list_lock); 655 spin_unlock_bh(&orig_node->neigh_list_lock);
674 656
675 /* increment unique neighbor refcount */
676 kref_get(&hardif_neigh->refcount);
677
678 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, 657 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
679 "Creating new neighbor %pM for orig_node %pM on interface %s\n", 658 "Creating new neighbor %pM for orig_node %pM on interface %s\n",
680 neigh_addr, orig_node->orig, hard_iface->net_dev->name); 659 neigh_addr, orig_node->orig, hard_iface->net_dev->name);
@@ -1165,6 +1144,9 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1165 if (hard_iface->soft_iface != bat_priv->soft_iface) 1144 if (hard_iface->soft_iface != bat_priv->soft_iface)
1166 continue; 1145 continue;
1167 1146
1147 if (!kref_get_unless_zero(&hard_iface->refcount))
1148 continue;
1149
1168 best_neigh_node = batadv_find_best_neighbor(bat_priv, 1150 best_neigh_node = batadv_find_best_neighbor(bat_priv,
1169 orig_node, 1151 orig_node,
1170 hard_iface); 1152 hard_iface);
@@ -1172,6 +1154,8 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1172 best_neigh_node); 1154 best_neigh_node);
1173 if (best_neigh_node) 1155 if (best_neigh_node)
1174 batadv_neigh_node_put(best_neigh_node); 1156 batadv_neigh_node_put(best_neigh_node);
1157
1158 batadv_hardif_put(hard_iface);
1175 } 1159 }
1176 rcu_read_unlock(); 1160 rcu_read_unlock();
1177 1161
@@ -1222,7 +1206,7 @@ static void batadv_purge_orig(struct work_struct *work)
1222 struct delayed_work *delayed_work; 1206 struct delayed_work *delayed_work;
1223 struct batadv_priv *bat_priv; 1207 struct batadv_priv *bat_priv;
1224 1208
1225 delayed_work = container_of(work, struct delayed_work, work); 1209 delayed_work = to_delayed_work(work);
1226 bat_priv = container_of(delayed_work, struct batadv_priv, orig_work); 1210 bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
1227 _batadv_purge_orig(bat_priv); 1211 _batadv_purge_orig(bat_priv);
1228 queue_delayed_work(batadv_event_workqueue, 1212 queue_delayed_work(batadv_event_workqueue,
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 4e8b67f11051..64a8951e5844 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -33,7 +33,7 @@
33 33
34struct seq_file; 34struct seq_file;
35 35
36int batadv_compare_orig(const struct hlist_node *node, const void *data2); 36bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
37int batadv_originator_init(struct batadv_priv *bat_priv); 37int batadv_originator_init(struct batadv_priv *bat_priv);
38void batadv_originator_free(struct batadv_priv *bat_priv); 38void batadv_originator_free(struct batadv_priv *bat_priv);
39void batadv_purge_orig_ref(struct batadv_priv *bat_priv); 39void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 8a8d7ca1a5cf..372128ddb474 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -175,6 +175,7 @@ enum batadv_bla_claimframe {
175 BATADV_CLAIM_TYPE_UNCLAIM = 0x01, 175 BATADV_CLAIM_TYPE_UNCLAIM = 0x01,
176 BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, 176 BATADV_CLAIM_TYPE_ANNOUNCE = 0x02,
177 BATADV_CLAIM_TYPE_REQUEST = 0x03, 177 BATADV_CLAIM_TYPE_REQUEST = 0x03,
178 BATADV_CLAIM_TYPE_LOOPDETECT = 0x04,
178}; 179};
179 180
180/** 181/**
@@ -501,7 +502,7 @@ struct batadv_coded_packet {
501#pragma pack() 502#pragma pack()
502 503
503/** 504/**
504 * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload 505 * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
505 * @packet_type: batman-adv packet type, part of the general header 506 * @packet_type: batman-adv packet type, part of the general header
506 * @version: batman-adv protocol version, part of the genereal header 507 * @version: batman-adv protocol version, part of the genereal header
507 * @ttl: time to live for this packet, part of the genereal header 508 * @ttl: time to live for this packet, part of the genereal header
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 4dd646a52f1a..ae850f2d11cb 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -100,11 +100,20 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
100 if (curr_router) 100 if (curr_router)
101 batadv_neigh_node_put(curr_router); 101 batadv_neigh_node_put(curr_router);
102 102
103 spin_lock_bh(&orig_node->neigh_list_lock);
104 /* curr_router used earlier may not be the current orig_ifinfo->router
105 * anymore because it was dereferenced outside of the neigh_list_lock
106 * protected region. After the new best neighbor has replace the current
107 * best neighbor the reference counter needs to decrease. Consequently,
108 * the code needs to ensure the curr_router variable contains a pointer
109 * to the replaced best neighbor.
110 */
111 curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
112
103 /* increase refcount of new best neighbor */ 113 /* increase refcount of new best neighbor */
104 if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount)) 114 if (neigh_node)
105 neigh_node = NULL; 115 kref_get(&neigh_node->refcount);
106 116
107 spin_lock_bh(&orig_node->neigh_list_lock);
108 rcu_assign_pointer(orig_ifinfo->router, neigh_node); 117 rcu_assign_pointer(orig_ifinfo->router, neigh_node);
109 spin_unlock_bh(&orig_node->neigh_list_lock); 118 spin_unlock_bh(&orig_node->neigh_list_lock);
110 batadv_orig_ifinfo_put(orig_ifinfo); 119 batadv_orig_ifinfo_put(orig_ifinfo);
@@ -154,18 +163,18 @@ out:
154 * doesn't change otherwise. 163 * doesn't change otherwise.
155 * 164 *
156 * Return: 165 * Return:
157 * 0 if the packet is to be accepted. 166 * false if the packet is to be accepted.
158 * 1 if the packet is to be ignored. 167 * true if the packet is to be ignored.
159 */ 168 */
160int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 169bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
161 s32 seq_old_max_diff, unsigned long *last_reset, 170 s32 seq_old_max_diff, unsigned long *last_reset,
162 bool *protection_started) 171 bool *protection_started)
163{ 172{
164 if (seq_num_diff <= -seq_old_max_diff || 173 if (seq_num_diff <= -seq_old_max_diff ||
165 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { 174 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
166 if (!batadv_has_timed_out(*last_reset, 175 if (!batadv_has_timed_out(*last_reset,
167 BATADV_RESET_PROTECTION_MS)) 176 BATADV_RESET_PROTECTION_MS))
168 return 1; 177 return true;
169 178
170 *last_reset = jiffies; 179 *last_reset = jiffies;
171 if (protection_started) 180 if (protection_started)
@@ -174,7 +183,7 @@ int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
174 "old packet received, start protection\n"); 183 "old packet received, start protection\n");
175 } 184 }
176 185
177 return 0; 186 return false;
178} 187}
179 188
180bool batadv_check_management_packet(struct sk_buff *skb, 189bool batadv_check_management_packet(struct sk_buff *skb,
@@ -709,8 +718,9 @@ out:
709 return ret; 718 return ret;
710} 719}
711 720
712static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, 721static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
713 struct sk_buff *skb, int hdr_len) { 722 struct sk_buff *skb, int hdr_len)
723{
714 struct batadv_unicast_packet *unicast_packet; 724 struct batadv_unicast_packet *unicast_packet;
715 struct batadv_hard_iface *primary_if; 725 struct batadv_hard_iface *primary_if;
716 struct batadv_orig_node *orig_node; 726 struct batadv_orig_node *orig_node;
@@ -721,11 +731,11 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
721 731
722 /* check if there is enough data before accessing it */ 732 /* check if there is enough data before accessing it */
723 if (!pskb_may_pull(skb, hdr_len + ETH_HLEN)) 733 if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
724 return 0; 734 return false;
725 735
726 /* create a copy of the skb (in case of for re-routing) to modify it. */ 736 /* create a copy of the skb (in case of for re-routing) to modify it. */
727 if (skb_cow(skb, sizeof(*unicast_packet)) < 0) 737 if (skb_cow(skb, sizeof(*unicast_packet)) < 0)
728 return 0; 738 return false;
729 739
730 unicast_packet = (struct batadv_unicast_packet *)skb->data; 740 unicast_packet = (struct batadv_unicast_packet *)skb->data;
731 vid = batadv_get_vid(skb, hdr_len); 741 vid = batadv_get_vid(skb, hdr_len);
@@ -749,7 +759,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
749 * table. If not, let the packet go untouched anyway because 759 * table. If not, let the packet go untouched anyway because
750 * there is nothing the node can do 760 * there is nothing the node can do
751 */ 761 */
752 return 1; 762 return true;
753 } 763 }
754 764
755 /* retrieve the TTVN known by this node for the packet destination. This 765 /* retrieve the TTVN known by this node for the packet destination. This
@@ -765,7 +775,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
765 * not be possible to deliver it 775 * not be possible to deliver it
766 */ 776 */
767 if (!orig_node) 777 if (!orig_node)
768 return 0; 778 return false;
769 779
770 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn); 780 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
771 batadv_orig_node_put(orig_node); 781 batadv_orig_node_put(orig_node);
@@ -776,7 +786,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
776 */ 786 */
777 is_old_ttvn = batadv_seq_before(unicast_packet->ttvn, curr_ttvn); 787 is_old_ttvn = batadv_seq_before(unicast_packet->ttvn, curr_ttvn);
778 if (!is_old_ttvn) 788 if (!is_old_ttvn)
779 return 1; 789 return true;
780 790
781 old_ttvn = unicast_packet->ttvn; 791 old_ttvn = unicast_packet->ttvn;
782 /* the packet was forged based on outdated network information. Its 792 /* the packet was forged based on outdated network information. Its
@@ -789,7 +799,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
789 "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", 799 "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
790 unicast_packet->dest, ethhdr->h_dest, 800 unicast_packet->dest, ethhdr->h_dest,
791 old_ttvn, curr_ttvn); 801 old_ttvn, curr_ttvn);
792 return 1; 802 return true;
793 } 803 }
794 804
795 /* the packet has not been re-routed: either the destination is 805 /* the packet has not been re-routed: either the destination is
@@ -797,14 +807,14 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
797 * it is possible to drop the packet 807 * it is possible to drop the packet
798 */ 808 */
799 if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid)) 809 if (!batadv_is_my_client(bat_priv, ethhdr->h_dest, vid))
800 return 0; 810 return false;
801 811
802 /* update the header in order to let the packet be delivered to this 812 /* update the header in order to let the packet be delivered to this
803 * node's soft interface 813 * node's soft interface
804 */ 814 */
805 primary_if = batadv_primary_if_get_selected(bat_priv); 815 primary_if = batadv_primary_if_get_selected(bat_priv);
806 if (!primary_if) 816 if (!primary_if)
807 return 0; 817 return false;
808 818
809 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); 819 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
810 820
@@ -812,7 +822,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
812 822
813 unicast_packet->ttvn = curr_ttvn; 823 unicast_packet->ttvn = curr_ttvn;
814 824
815 return 1; 825 return true;
816} 826}
817 827
818/** 828/**
@@ -903,7 +913,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
903 hdr_size)) 913 hdr_size))
904 goto rx_success; 914 goto rx_success;
905 915
906 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, 916 batadv_interface_rx(recv_if->soft_iface, skb, hdr_size,
907 orig_node); 917 orig_node);
908 918
909rx_success: 919rx_success:
@@ -1113,8 +1123,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1113 goto rx_success; 1123 goto rx_success;
1114 1124
1115 /* broadcast for me */ 1125 /* broadcast for me */
1116 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, 1126 batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node);
1117 orig_node);
1118 1127
1119rx_success: 1128rx_success:
1120 ret = NET_RX_SUCCESS; 1129 ret = NET_RX_SUCCESS;
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 02a5caa84127..05c3ff42e181 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -51,8 +51,8 @@ struct batadv_neigh_node *
51batadv_find_router(struct batadv_priv *bat_priv, 51batadv_find_router(struct batadv_priv *bat_priv,
52 struct batadv_orig_node *orig_node, 52 struct batadv_orig_node *orig_node,
53 struct batadv_hard_iface *recv_if); 53 struct batadv_hard_iface *recv_if);
54int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 54bool batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
55 s32 seq_old_max_diff, unsigned long *last_reset, 55 s32 seq_old_max_diff, unsigned long *last_reset,
56 bool *protection_started); 56 bool *protection_started);
57 57
58#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 58#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3ce06e0a91b1..f2f125684ed9 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -26,6 +26,7 @@
26#include <linux/if.h> 26#include <linux/if.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/kref.h>
29#include <linux/list.h> 30#include <linux/list.h>
30#include <linux/netdevice.h> 31#include <linux/netdevice.h>
31#include <linux/printk.h> 32#include <linux/printk.h>
@@ -552,7 +553,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
552 struct net_device *soft_iface; 553 struct net_device *soft_iface;
553 struct batadv_priv *bat_priv; 554 struct batadv_priv *bat_priv;
554 555
555 delayed_work = container_of(work, struct delayed_work, work); 556 delayed_work = to_delayed_work(work);
556 forw_packet = container_of(delayed_work, struct batadv_forw_packet, 557 forw_packet = container_of(delayed_work, struct batadv_forw_packet,
557 delayed_work); 558 delayed_work);
558 soft_iface = forw_packet->if_incoming->soft_iface; 559 soft_iface = forw_packet->if_incoming->soft_iface;
@@ -577,10 +578,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
577 if (forw_packet->num_packets >= hard_iface->num_bcasts) 578 if (forw_packet->num_packets >= hard_iface->num_bcasts)
578 continue; 579 continue;
579 580
581 if (!kref_get_unless_zero(&hard_iface->refcount))
582 continue;
583
580 /* send a copy of the saved skb */ 584 /* send a copy of the saved skb */
581 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 585 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
582 if (skb1) 586 if (skb1)
583 batadv_send_broadcast_skb(skb1, hard_iface); 587 batadv_send_broadcast_skb(skb1, hard_iface);
588
589 batadv_hardif_put(hard_iface);
584 } 590 }
585 rcu_read_unlock(); 591 rcu_read_unlock();
586 592
@@ -604,7 +610,7 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
604 struct batadv_forw_packet *forw_packet; 610 struct batadv_forw_packet *forw_packet;
605 struct batadv_priv *bat_priv; 611 struct batadv_priv *bat_priv;
606 612
607 delayed_work = container_of(work, struct delayed_work, work); 613 delayed_work = to_delayed_work(work);
608 forw_packet = container_of(delayed_work, struct batadv_forw_packet, 614 forw_packet = container_of(delayed_work, struct batadv_forw_packet,
609 delayed_work); 615 delayed_work);
610 bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); 616 bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
@@ -675,6 +681,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
675 681
676 if (pending) { 682 if (pending) {
677 hlist_del(&forw_packet->list); 683 hlist_del(&forw_packet->list);
684 if (!forw_packet->own)
685 atomic_inc(&bat_priv->bcast_queue_left);
686
678 batadv_forw_packet_free(forw_packet); 687 batadv_forw_packet_free(forw_packet);
679 } 688 }
680 } 689 }
@@ -702,6 +711,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
702 711
703 if (pending) { 712 if (pending) {
704 hlist_del(&forw_packet->list); 713 hlist_del(&forw_packet->list);
714 if (!forw_packet->own)
715 atomic_inc(&bat_priv->batman_queue_left);
716
705 batadv_forw_packet_free(forw_packet); 717 batadv_forw_packet_free(forw_packet);
706 } 718 }
707 } 719 }
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0710379491bf..343d2c904399 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -186,7 +186,6 @@ static int batadv_interface_tx(struct sk_buff *skb,
186 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 186 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
187 struct batadv_hard_iface *primary_if = NULL; 187 struct batadv_hard_iface *primary_if = NULL;
188 struct batadv_bcast_packet *bcast_packet; 188 struct batadv_bcast_packet *bcast_packet;
189 __be16 ethertype = htons(ETH_P_BATMAN);
190 static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 189 static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
191 0x00, 0x00}; 190 0x00, 0x00};
192 static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 191 static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
@@ -208,7 +207,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
208 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) 207 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
209 goto dropped; 208 goto dropped;
210 209
211 soft_iface->trans_start = jiffies; 210 netif_trans_update(soft_iface);
212 vid = batadv_get_vid(skb, 0); 211 vid = batadv_get_vid(skb, 0);
213 ethhdr = eth_hdr(skb); 212 ethhdr = eth_hdr(skb);
214 213
@@ -216,7 +215,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
216 case ETH_P_8021Q: 215 case ETH_P_8021Q:
217 vhdr = vlan_eth_hdr(skb); 216 vhdr = vlan_eth_hdr(skb);
218 217
219 if (vhdr->h_vlan_encapsulated_proto != ethertype) { 218 /* drop batman-in-batman packets to prevent loops */
219 if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) {
220 network_offset += VLAN_HLEN; 220 network_offset += VLAN_HLEN;
221 break; 221 break;
222 } 222 }
@@ -381,13 +381,29 @@ end:
381 return NETDEV_TX_OK; 381 return NETDEV_TX_OK;
382} 382}
383 383
384/**
385 * batadv_interface_rx - receive ethernet frame on local batman-adv interface
386 * @soft_iface: local interface which will receive the ethernet frame
387 * @skb: ethernet frame for @soft_iface
388 * @hdr_size: size of already parsed batman-adv header
389 * @orig_node: originator from which the batman-adv packet was sent
390 *
391 * Sends a ethernet frame to the receive path of the local @soft_iface.
392 * skb->data has still point to the batman-adv header with the size @hdr_size.
393 * The caller has to have parsed this header already and made sure that at least
394 * @hdr_size bytes are still available for pull in @skb.
395 *
396 * The packet may still get dropped. This can happen when the encapsulated
397 * ethernet frame is invalid or contains again an batman-adv packet. Also
398 * unicast packets will be dropped directly when it was sent between two
399 * isolated clients.
400 */
384void batadv_interface_rx(struct net_device *soft_iface, 401void batadv_interface_rx(struct net_device *soft_iface,
385 struct sk_buff *skb, struct batadv_hard_iface *recv_if, 402 struct sk_buff *skb, int hdr_size,
386 int hdr_size, struct batadv_orig_node *orig_node) 403 struct batadv_orig_node *orig_node)
387{ 404{
388 struct batadv_bcast_packet *batadv_bcast_packet; 405 struct batadv_bcast_packet *batadv_bcast_packet;
389 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 406 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
390 __be16 ethertype = htons(ETH_P_BATMAN);
391 struct vlan_ethhdr *vhdr; 407 struct vlan_ethhdr *vhdr;
392 struct ethhdr *ethhdr; 408 struct ethhdr *ethhdr;
393 unsigned short vid; 409 unsigned short vid;
@@ -396,10 +412,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
396 batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; 412 batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data;
397 is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); 413 is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST);
398 414
399 /* check if enough space is available for pulling, and pull */
400 if (!pskb_may_pull(skb, hdr_size))
401 goto dropped;
402
403 skb_pull_rcsum(skb, hdr_size); 415 skb_pull_rcsum(skb, hdr_size);
404 skb_reset_mac_header(skb); 416 skb_reset_mac_header(skb);
405 417
@@ -408,14 +420,21 @@ void batadv_interface_rx(struct net_device *soft_iface,
408 */ 420 */
409 nf_reset(skb); 421 nf_reset(skb);
410 422
423 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
424 goto dropped;
425
411 vid = batadv_get_vid(skb, 0); 426 vid = batadv_get_vid(skb, 0);
412 ethhdr = eth_hdr(skb); 427 ethhdr = eth_hdr(skb);
413 428
414 switch (ntohs(ethhdr->h_proto)) { 429 switch (ntohs(ethhdr->h_proto)) {
415 case ETH_P_8021Q: 430 case ETH_P_8021Q:
431 if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
432 goto dropped;
433
416 vhdr = (struct vlan_ethhdr *)skb->data; 434 vhdr = (struct vlan_ethhdr *)skb->data;
417 435
418 if (vhdr->h_vlan_encapsulated_proto != ethertype) 436 /* drop batman-in-batman packets to prevent loops */
437 if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN))
419 break; 438 break;
420 439
421 /* fall through */ 440 /* fall through */
@@ -424,8 +443,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
424 } 443 }
425 444
426 /* skb->dev & skb->pkt_type are set here */ 445 /* skb->dev & skb->pkt_type are set here */
427 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
428 goto dropped;
429 skb->protocol = eth_type_trans(skb, soft_iface); 446 skb->protocol = eth_type_trans(skb, soft_iface);
430 447
431 /* should not be necessary anymore as we use skb_pull_rcsum() 448 /* should not be necessary anymore as we use skb_pull_rcsum()
@@ -539,7 +556,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
539} 556}
540 557
541/** 558/**
542 * batadv_create_vlan - allocate the needed resources for a new vlan 559 * batadv_softif_create_vlan - allocate the needed resources for a new vlan
543 * @bat_priv: the bat priv with all the soft interface information 560 * @bat_priv: the bat priv with all the soft interface information
544 * @vid: the VLAN identifier 561 * @vid: the VLAN identifier
545 * 562 *
@@ -868,13 +885,14 @@ static int batadv_softif_slave_add(struct net_device *dev,
868 struct net_device *slave_dev) 885 struct net_device *slave_dev)
869{ 886{
870 struct batadv_hard_iface *hard_iface; 887 struct batadv_hard_iface *hard_iface;
888 struct net *net = dev_net(dev);
871 int ret = -EINVAL; 889 int ret = -EINVAL;
872 890
873 hard_iface = batadv_hardif_get_by_netdev(slave_dev); 891 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
874 if (!hard_iface || hard_iface->soft_iface) 892 if (!hard_iface || hard_iface->soft_iface)
875 goto out; 893 goto out;
876 894
877 ret = batadv_hardif_enable_interface(hard_iface, dev->name); 895 ret = batadv_hardif_enable_interface(hard_iface, net, dev->name);
878 896
879out: 897out:
880 if (hard_iface) 898 if (hard_iface)
@@ -955,7 +973,7 @@ static void batadv_softif_init_early(struct net_device *dev)
955 973
956 dev->netdev_ops = &batadv_netdev_ops; 974 dev->netdev_ops = &batadv_netdev_ops;
957 dev->destructor = batadv_softif_free; 975 dev->destructor = batadv_softif_free;
958 dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 976 dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
959 dev->priv_flags |= IFF_NO_QUEUE; 977 dev->priv_flags |= IFF_NO_QUEUE;
960 978
961 /* can't call min_mtu, because the needed variables 979 /* can't call min_mtu, because the needed variables
@@ -971,7 +989,7 @@ static void batadv_softif_init_early(struct net_device *dev)
971 memset(priv, 0, sizeof(*priv)); 989 memset(priv, 0, sizeof(*priv));
972} 990}
973 991
974struct net_device *batadv_softif_create(const char *name) 992struct net_device *batadv_softif_create(struct net *net, const char *name)
975{ 993{
976 struct net_device *soft_iface; 994 struct net_device *soft_iface;
977 int ret; 995 int ret;
@@ -981,6 +999,8 @@ struct net_device *batadv_softif_create(const char *name)
981 if (!soft_iface) 999 if (!soft_iface)
982 return NULL; 1000 return NULL;
983 1001
1002 dev_net_set(soft_iface, net);
1003
984 soft_iface->rtnl_link_ops = &batadv_link_ops; 1004 soft_iface->rtnl_link_ops = &batadv_link_ops;
985 1005
986 ret = register_netdevice(soft_iface); 1006 ret = register_netdevice(soft_iface);
@@ -1025,12 +1045,12 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
1025 unregister_netdevice_queue(soft_iface, head); 1045 unregister_netdevice_queue(soft_iface, head);
1026} 1046}
1027 1047
1028int batadv_softif_is_valid(const struct net_device *net_dev) 1048bool batadv_softif_is_valid(const struct net_device *net_dev)
1029{ 1049{
1030 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) 1050 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
1031 return 1; 1051 return true;
1032 1052
1033 return 0; 1053 return false;
1034} 1054}
1035 1055
1036struct rtnl_link_ops batadv_link_ops __read_mostly = { 1056struct rtnl_link_ops batadv_link_ops __read_mostly = {
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 9ae265703d23..ec303ddbf647 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -20,18 +20,20 @@
20 20
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/types.h>
23#include <net/rtnetlink.h> 24#include <net/rtnetlink.h>
24 25
25struct net_device; 26struct net_device;
27struct net;
26struct sk_buff; 28struct sk_buff;
27 29
28int batadv_skb_head_push(struct sk_buff *skb, unsigned int len); 30int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
29void batadv_interface_rx(struct net_device *soft_iface, 31void batadv_interface_rx(struct net_device *soft_iface,
30 struct sk_buff *skb, struct batadv_hard_iface *recv_if, 32 struct sk_buff *skb, int hdr_size,
31 int hdr_size, struct batadv_orig_node *orig_node); 33 struct batadv_orig_node *orig_node);
32struct net_device *batadv_softif_create(const char *name); 34struct net_device *batadv_softif_create(struct net *net, const char *name);
33void batadv_softif_destroy_sysfs(struct net_device *soft_iface); 35void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
34int batadv_softif_is_valid(const struct net_device *net_dev); 36bool batadv_softif_is_valid(const struct net_device *net_dev);
35extern struct rtnl_link_ops batadv_link_ops; 37extern struct rtnl_link_ops batadv_link_ops;
36int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); 38int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
37void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan); 39void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e7cf51333a36..414b2074165f 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -116,11 +116,13 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
116static char *batadv_uev_action_str[] = { 116static char *batadv_uev_action_str[] = {
117 "add", 117 "add",
118 "del", 118 "del",
119 "change" 119 "change",
120 "loopdetect",
120}; 121};
121 122
122static char *batadv_uev_type_str[] = { 123static char *batadv_uev_type_str[] = {
123 "gw" 124 "gw",
125 "bla",
124}; 126};
125 127
126/* Use this, if you have customized show and store functions for vlan attrs */ 128/* Use this, if you have customized show and store functions for vlan attrs */
@@ -830,6 +832,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
830 size_t count) 832 size_t count)
831{ 833{
832 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); 834 struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
835 struct net *net = dev_net(net_dev);
833 struct batadv_hard_iface *hard_iface; 836 struct batadv_hard_iface *hard_iface;
834 int status_tmp = -1; 837 int status_tmp = -1;
835 int ret = count; 838 int ret = count;
@@ -873,7 +876,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
873 batadv_hardif_disable_interface(hard_iface, 876 batadv_hardif_disable_interface(hard_iface,
874 BATADV_IF_CLEANUP_AUTO); 877 BATADV_IF_CLEANUP_AUTO);
875 878
876 ret = batadv_hardif_enable_interface(hard_iface, buff); 879 ret = batadv_hardif_enable_interface(hard_iface, net, buff);
877 880
878unlock: 881unlock:
879 rtnl_unlock(); 882 rtnl_unlock();
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0b43e86328a5..feaf492b01ca 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -43,7 +43,6 @@
43#include <linux/stddef.h> 43#include <linux/stddef.h>
44#include <linux/string.h> 44#include <linux/string.h>
45#include <linux/workqueue.h> 45#include <linux/workqueue.h>
46#include <net/net_namespace.h>
47 46
48#include "bridge_loop_avoidance.h" 47#include "bridge_loop_avoidance.h"
49#include "hard-interface.h" 48#include "hard-interface.h"
@@ -76,9 +75,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
76 * 75 *
77 * Compare the MAC address and the VLAN ID of the two TT entries and check if 76 * Compare the MAC address and the VLAN ID of the two TT entries and check if
78 * they are the same TT client. 77 * they are the same TT client.
79 * Return: 1 if the two TT clients are the same, 0 otherwise 78 * Return: true if the two TT clients are the same, false otherwise
80 */ 79 */
81static int batadv_compare_tt(const struct hlist_node *node, const void *data2) 80static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
82{ 81{
83 const void *data1 = container_of(node, struct batadv_tt_common_entry, 82 const void *data1 = container_of(node, struct batadv_tt_common_entry,
84 hash_entry); 83 hash_entry);
@@ -215,6 +214,8 @@ static void batadv_tt_local_entry_release(struct kref *ref)
215 tt_local_entry = container_of(ref, struct batadv_tt_local_entry, 214 tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
216 common.refcount); 215 common.refcount);
217 216
217 batadv_softif_vlan_put(tt_local_entry->vlan);
218
218 kfree_rcu(tt_local_entry, common.rcu); 219 kfree_rcu(tt_local_entry, common.rcu);
219} 220}
220 221
@@ -583,6 +584,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
583 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 584 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
584 struct batadv_tt_local_entry *tt_local; 585 struct batadv_tt_local_entry *tt_local;
585 struct batadv_tt_global_entry *tt_global = NULL; 586 struct batadv_tt_global_entry *tt_global = NULL;
587 struct net *net = dev_net(soft_iface);
586 struct batadv_softif_vlan *vlan; 588 struct batadv_softif_vlan *vlan;
587 struct net_device *in_dev = NULL; 589 struct net_device *in_dev = NULL;
588 struct hlist_head *head; 590 struct hlist_head *head;
@@ -594,7 +596,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
594 u32 match_mark; 596 u32 match_mark;
595 597
596 if (ifindex != BATADV_NULL_IFINDEX) 598 if (ifindex != BATADV_NULL_IFINDEX)
597 in_dev = dev_get_by_index(&init_net, ifindex); 599 in_dev = dev_get_by_index(net, ifindex);
598 600
599 tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); 601 tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
600 602
@@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
673 kref_get(&tt_local->common.refcount); 675 kref_get(&tt_local->common.refcount);
674 tt_local->last_seen = jiffies; 676 tt_local->last_seen = jiffies;
675 tt_local->common.added_at = tt_local->last_seen; 677 tt_local->common.added_at = tt_local->last_seen;
678 tt_local->vlan = vlan;
676 679
677 /* the batman interface mac and multicast addresses should never be 680 /* the batman interface mac and multicast addresses should never be
678 * purged 681 * purged
@@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
991 struct batadv_tt_common_entry *tt_common_entry; 994 struct batadv_tt_common_entry *tt_common_entry;
992 struct batadv_tt_local_entry *tt_local; 995 struct batadv_tt_local_entry *tt_local;
993 struct batadv_hard_iface *primary_if; 996 struct batadv_hard_iface *primary_if;
994 struct batadv_softif_vlan *vlan;
995 struct hlist_head *head; 997 struct hlist_head *head;
996 unsigned short vid; 998 unsigned short vid;
997 u32 i; 999 u32 i;
@@ -1008,8 +1010,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1008 seq_printf(seq, 1010 seq_printf(seq,
1009 "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", 1011 "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
1010 net_dev->name, (u8)atomic_read(&bat_priv->tt.vn)); 1012 net_dev->name, (u8)atomic_read(&bat_priv->tt.vn));
1011 seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID", 1013 seq_puts(seq,
1012 "Flags", "Last seen", "CRC"); 1014 " Client VID Flags Last seen (CRC )\n");
1013 1015
1014 for (i = 0; i < hash->size; i++) { 1016 for (i = 0; i < hash->size; i++) {
1015 head = &hash->table[i]; 1017 head = &hash->table[i];
@@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1027 last_seen_msecs = last_seen_msecs % 1000; 1029 last_seen_msecs = last_seen_msecs % 1000;
1028 1030
1029 no_purge = tt_common_entry->flags & np_flag; 1031 no_purge = tt_common_entry->flags & np_flag;
1030
1031 vlan = batadv_softif_vlan_get(bat_priv, vid);
1032 if (!vlan) {
1033 seq_printf(seq, "Cannot retrieve VLAN %d\n",
1034 BATADV_PRINT_VID(vid));
1035 continue;
1036 }
1037
1038 seq_printf(seq, 1032 seq_printf(seq,
1039 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", 1033 " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
1040 tt_common_entry->addr, 1034 tt_common_entry->addr,
@@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1052 BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1046 BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1053 no_purge ? 0 : last_seen_secs, 1047 no_purge ? 0 : last_seen_secs,
1054 no_purge ? 0 : last_seen_msecs, 1048 no_purge ? 0 : last_seen_msecs,
1055 vlan->tt.crc); 1049 tt_local->vlan->tt.crc);
1056
1057 batadv_softif_vlan_put(vlan);
1058 } 1050 }
1059 rcu_read_unlock(); 1051 rcu_read_unlock();
1060 } 1052 }
@@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1099{ 1091{
1100 struct batadv_tt_local_entry *tt_local_entry; 1092 struct batadv_tt_local_entry *tt_local_entry;
1101 u16 flags, curr_flags = BATADV_NO_FLAGS; 1093 u16 flags, curr_flags = BATADV_NO_FLAGS;
1102 struct batadv_softif_vlan *vlan;
1103 void *tt_entry_exists; 1094 void *tt_entry_exists;
1104 1095
1105 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); 1096 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
@@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1139 /* extra call to free the local tt entry */ 1130 /* extra call to free the local tt entry */
1140 batadv_tt_local_entry_put(tt_local_entry); 1131 batadv_tt_local_entry_put(tt_local_entry);
1141 1132
1142 /* decrease the reference held for this vlan */
1143 vlan = batadv_softif_vlan_get(bat_priv, vid);
1144 if (!vlan)
1145 goto out;
1146
1147 batadv_softif_vlan_put(vlan);
1148 batadv_softif_vlan_put(vlan);
1149
1150out: 1133out:
1151 if (tt_local_entry) 1134 if (tt_local_entry)
1152 batadv_tt_local_entry_put(tt_local_entry); 1135 batadv_tt_local_entry_put(tt_local_entry);
@@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
1219 spinlock_t *list_lock; /* protects write access to the hash lists */ 1202 spinlock_t *list_lock; /* protects write access to the hash lists */
1220 struct batadv_tt_common_entry *tt_common_entry; 1203 struct batadv_tt_common_entry *tt_common_entry;
1221 struct batadv_tt_local_entry *tt_local; 1204 struct batadv_tt_local_entry *tt_local;
1222 struct batadv_softif_vlan *vlan;
1223 struct hlist_node *node_tmp; 1205 struct hlist_node *node_tmp;
1224 struct hlist_head *head; 1206 struct hlist_head *head;
1225 u32 i; 1207 u32 i;
@@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
1241 struct batadv_tt_local_entry, 1223 struct batadv_tt_local_entry,
1242 common); 1224 common);
1243 1225
1244 /* decrease the reference held for this vlan */
1245 vlan = batadv_softif_vlan_get(bat_priv,
1246 tt_common_entry->vid);
1247 if (vlan) {
1248 batadv_softif_vlan_put(vlan);
1249 batadv_softif_vlan_put(vlan);
1250 }
1251
1252 batadv_tt_local_entry_put(tt_local); 1226 batadv_tt_local_entry_put(tt_local);
1253 } 1227 }
1254 spin_unlock_bh(list_lock); 1228 spin_unlock_bh(list_lock);
@@ -1706,9 +1680,8 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
1706 seq_printf(seq, 1680 seq_printf(seq,
1707 "Globally announced TT entries received via the mesh %s\n", 1681 "Globally announced TT entries received via the mesh %s\n",
1708 net_dev->name); 1682 net_dev->name);
1709 seq_printf(seq, " %-13s %s %s %-15s %s (%-10s) %s\n", 1683 seq_puts(seq,
1710 "Client", "VID", "(TTVN)", "Originator", "(Curr TTVN)", 1684 " Client VID (TTVN) Originator (Curr TTVN) (CRC ) Flags\n");
1711 "CRC", "Flags");
1712 1685
1713 for (i = 0; i < hash->size; i++) { 1686 for (i = 0; i < hash->size; i++) {
1714 head = &hash->table[i]; 1687 head = &hash->table[i];
@@ -2388,19 +2361,19 @@ unlock:
2388 * @entry_ptr: to be checked local tt entry 2361 * @entry_ptr: to be checked local tt entry
2389 * @data_ptr: not used but definition required to satisfy the callback prototype 2362 * @data_ptr: not used but definition required to satisfy the callback prototype
2390 * 2363 *
2391 * Return: 1 if the entry is a valid, 0 otherwise. 2364 * Return: true if the entry is a valid, false otherwise.
2392 */ 2365 */
2393static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) 2366static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
2394{ 2367{
2395 const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; 2368 const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
2396 2369
2397 if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) 2370 if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW)
2398 return 0; 2371 return false;
2399 return 1; 2372 return true;
2400} 2373}
2401 2374
2402static int batadv_tt_global_valid(const void *entry_ptr, 2375static bool batadv_tt_global_valid(const void *entry_ptr,
2403 const void *data_ptr) 2376 const void *data_ptr)
2404{ 2377{
2405 const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; 2378 const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
2406 const struct batadv_tt_global_entry *tt_global_entry; 2379 const struct batadv_tt_global_entry *tt_global_entry;
@@ -2408,7 +2381,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,
2408 2381
2409 if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM || 2382 if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ||
2410 tt_common_entry->flags & BATADV_TT_CLIENT_TEMP) 2383 tt_common_entry->flags & BATADV_TT_CLIENT_TEMP)
2411 return 0; 2384 return false;
2412 2385
2413 tt_global_entry = container_of(tt_common_entry, 2386 tt_global_entry = container_of(tt_common_entry,
2414 struct batadv_tt_global_entry, 2387 struct batadv_tt_global_entry,
@@ -2430,7 +2403,8 @@ static int batadv_tt_global_valid(const void *entry_ptr,
2430static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, 2403static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
2431 struct batadv_hashtable *hash, 2404 struct batadv_hashtable *hash,
2432 void *tvlv_buff, u16 tt_len, 2405 void *tvlv_buff, u16 tt_len,
2433 int (*valid_cb)(const void *, const void *), 2406 bool (*valid_cb)(const void *,
2407 const void *),
2434 void *cb_data) 2408 void *cb_data)
2435{ 2409{
2436 struct batadv_tt_common_entry *tt_common_entry; 2410 struct batadv_tt_common_entry *tt_common_entry;
@@ -2579,11 +2553,11 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
2579 * 2553 *
2580 * Return: true if the TT Request was sent, false otherwise 2554 * Return: true if the TT Request was sent, false otherwise
2581 */ 2555 */
2582static int batadv_send_tt_request(struct batadv_priv *bat_priv, 2556static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
2583 struct batadv_orig_node *dst_orig_node, 2557 struct batadv_orig_node *dst_orig_node,
2584 u8 ttvn, 2558 u8 ttvn,
2585 struct batadv_tvlv_tt_vlan_data *tt_vlan, 2559 struct batadv_tvlv_tt_vlan_data *tt_vlan,
2586 u16 num_vlan, bool full_table) 2560 u16 num_vlan, bool full_table)
2587{ 2561{
2588 struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; 2562 struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
2589 struct batadv_tt_req_node *tt_req_node = NULL; 2563 struct batadv_tt_req_node *tt_req_node = NULL;
@@ -3227,7 +3201,7 @@ static void batadv_tt_purge(struct work_struct *work)
3227 struct batadv_priv_tt *priv_tt; 3201 struct batadv_priv_tt *priv_tt;
3228 struct batadv_priv *bat_priv; 3202 struct batadv_priv *bat_priv;
3229 3203
3230 delayed_work = container_of(work, struct delayed_work, work); 3204 delayed_work = to_delayed_work(work);
3231 priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); 3205 priv_tt = container_of(delayed_work, struct batadv_priv_tt, work);
3232 bat_priv = container_of(priv_tt, struct batadv_priv, tt); 3206 bat_priv = container_of(priv_tt, struct batadv_priv, tt);
3233 3207
@@ -3309,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3309 struct batadv_hashtable *hash = bat_priv->tt.local_hash; 3283 struct batadv_hashtable *hash = bat_priv->tt.local_hash;
3310 struct batadv_tt_common_entry *tt_common; 3284 struct batadv_tt_common_entry *tt_common;
3311 struct batadv_tt_local_entry *tt_local; 3285 struct batadv_tt_local_entry *tt_local;
3312 struct batadv_softif_vlan *vlan;
3313 struct hlist_node *node_tmp; 3286 struct hlist_node *node_tmp;
3314 struct hlist_head *head; 3287 struct hlist_head *head;
3315 spinlock_t *list_lock; /* protects write access to the hash lists */ 3288 spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3339,13 +3312,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3339 struct batadv_tt_local_entry, 3312 struct batadv_tt_local_entry,
3340 common); 3313 common);
3341 3314
3342 /* decrease the reference held for this vlan */
3343 vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
3344 if (vlan) {
3345 batadv_softif_vlan_put(vlan);
3346 batadv_softif_vlan_put(vlan);
3347 }
3348
3349 batadv_tt_local_entry_put(tt_local); 3315 batadv_tt_local_entry_put(tt_local);
3350 } 3316 }
3351 spin_unlock_bh(list_lock); 3317 spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 9abfb3e73c34..6a577f4f8ba7 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node {
433 * @ifinfo_lock: lock protecting private ifinfo members and list 433 * @ifinfo_lock: lock protecting private ifinfo members and list
434 * @if_incoming: pointer to incoming hard-interface 434 * @if_incoming: pointer to incoming hard-interface
435 * @last_seen: when last packet via this neighbor was received 435 * @last_seen: when last packet via this neighbor was received
436 * @hardif_neigh: hardif_neigh of this neighbor
436 * @refcount: number of contexts the object is used 437 * @refcount: number of contexts the object is used
437 * @rcu: struct used for freeing in an RCU-safe manner 438 * @rcu: struct used for freeing in an RCU-safe manner
438 */ 439 */
@@ -444,6 +445,7 @@ struct batadv_neigh_node {
444 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ 445 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
445 struct batadv_hard_iface *if_incoming; 446 struct batadv_hard_iface *if_incoming;
446 unsigned long last_seen; 447 unsigned long last_seen;
448 struct batadv_hardif_neigh_node *hardif_neigh;
447 struct kref refcount; 449 struct kref refcount;
448 struct rcu_head rcu; 450 struct rcu_head rcu;
449}; 451};
@@ -655,6 +657,9 @@ struct batadv_priv_tt {
655 * @num_requests: number of bla requests in flight 657 * @num_requests: number of bla requests in flight
656 * @claim_hash: hash table containing mesh nodes this host has claimed 658 * @claim_hash: hash table containing mesh nodes this host has claimed
657 * @backbone_hash: hash table containing all detected backbone gateways 659 * @backbone_hash: hash table containing all detected backbone gateways
660 * @loopdetect_addr: MAC address used for own loopdetection frames
661 * @loopdetect_lasttime: time when the loopdetection frames were sent
662 * @loopdetect_next: how many periods to wait for the next loopdetect process
658 * @bcast_duplist: recently received broadcast packets array (for broadcast 663 * @bcast_duplist: recently received broadcast packets array (for broadcast
659 * duplicate suppression) 664 * duplicate suppression)
660 * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist 665 * @bcast_duplist_curr: index of last broadcast packet added to bcast_duplist
@@ -666,6 +671,9 @@ struct batadv_priv_bla {
666 atomic_t num_requests; 671 atomic_t num_requests;
667 struct batadv_hashtable *claim_hash; 672 struct batadv_hashtable *claim_hash;
668 struct batadv_hashtable *backbone_hash; 673 struct batadv_hashtable *backbone_hash;
674 u8 loopdetect_addr[ETH_ALEN];
675 unsigned long loopdetect_lasttime;
676 atomic_t loopdetect_next;
669 struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE]; 677 struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
670 int bcast_duplist_curr; 678 int bcast_duplist_curr;
671 /* protects bcast_duplist & bcast_duplist_curr */ 679 /* protects bcast_duplist & bcast_duplist_curr */
@@ -1010,6 +1018,7 @@ struct batadv_socket_packet {
1010 * resolved 1018 * resolved
1011 * @crc: crc16 checksum over all claims 1019 * @crc: crc16 checksum over all claims
1012 * @crc_lock: lock protecting crc 1020 * @crc_lock: lock protecting crc
1021 * @report_work: work struct for reporting detected loops
1013 * @refcount: number of contexts the object is used 1022 * @refcount: number of contexts the object is used
1014 * @rcu: struct used for freeing in an RCU-safe manner 1023 * @rcu: struct used for freeing in an RCU-safe manner
1015 */ 1024 */
@@ -1023,6 +1032,7 @@ struct batadv_bla_backbone_gw {
1023 atomic_t request_sent; 1032 atomic_t request_sent;
1024 u16 crc; 1033 u16 crc;
1025 spinlock_t crc_lock; /* protects crc */ 1034 spinlock_t crc_lock; /* protects crc */
1035 struct work_struct report_work;
1026 struct kref refcount; 1036 struct kref refcount;
1027 struct rcu_head rcu; 1037 struct rcu_head rcu;
1028}; 1038};
@@ -1073,10 +1083,12 @@ struct batadv_tt_common_entry {
1073 * struct batadv_tt_local_entry - translation table local entry data 1083 * struct batadv_tt_local_entry - translation table local entry data
1074 * @common: general translation table data 1084 * @common: general translation table data
1075 * @last_seen: timestamp used for purging stale tt local entries 1085 * @last_seen: timestamp used for purging stale tt local entries
1086 * @vlan: soft-interface vlan of the entry
1076 */ 1087 */
1077struct batadv_tt_local_entry { 1088struct batadv_tt_local_entry {
1078 struct batadv_tt_common_entry common; 1089 struct batadv_tt_common_entry common;
1079 unsigned long last_seen; 1090 unsigned long last_seen;
1091 struct batadv_softif_vlan *vlan;
1080}; 1092};
1081 1093
1082/** 1094/**
@@ -1250,6 +1262,8 @@ struct batadv_forw_packet {
1250 * struct batadv_algo_ops - mesh algorithm callbacks 1262 * struct batadv_algo_ops - mesh algorithm callbacks
1251 * @list: list node for the batadv_algo_list 1263 * @list: list node for the batadv_algo_list
1252 * @name: name of the algorithm 1264 * @name: name of the algorithm
1265 * @bat_iface_activate: start routing mechanisms when hard-interface is brought
1266 * up
1253 * @bat_iface_enable: init routing info when hard-interface is enabled 1267 * @bat_iface_enable: init routing info when hard-interface is enabled
1254 * @bat_iface_disable: de-init routing info when hard-interface is disabled 1268 * @bat_iface_disable: de-init routing info when hard-interface is disabled
1255 * @bat_iface_update_mac: (re-)init mac addresses of the protocol information 1269 * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
@@ -1277,6 +1291,7 @@ struct batadv_forw_packet {
1277struct batadv_algo_ops { 1291struct batadv_algo_ops {
1278 struct hlist_node list; 1292 struct hlist_node list;
1279 char *name; 1293 char *name;
1294 void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
1280 int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); 1295 int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
1281 void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); 1296 void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
1282 void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); 1297 void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 8a4cc2f7f0db..780089d75915 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -68,7 +68,7 @@ struct lowpan_peer {
68 struct in6_addr peer_addr; 68 struct in6_addr peer_addr;
69}; 69};
70 70
71struct lowpan_dev { 71struct lowpan_btle_dev {
72 struct list_head list; 72 struct list_head list;
73 73
74 struct hci_dev *hdev; 74 struct hci_dev *hdev;
@@ -80,18 +80,21 @@ struct lowpan_dev {
80 struct delayed_work notify_peers; 80 struct delayed_work notify_peers;
81}; 81};
82 82
83static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) 83static inline struct lowpan_btle_dev *
84lowpan_btle_dev(const struct net_device *netdev)
84{ 85{
85 return (struct lowpan_dev *)lowpan_priv(netdev)->priv; 86 return (struct lowpan_btle_dev *)lowpan_dev(netdev)->priv;
86} 87}
87 88
88static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) 89static inline void peer_add(struct lowpan_btle_dev *dev,
90 struct lowpan_peer *peer)
89{ 91{
90 list_add_rcu(&peer->list, &dev->peers); 92 list_add_rcu(&peer->list, &dev->peers);
91 atomic_inc(&dev->peer_count); 93 atomic_inc(&dev->peer_count);
92} 94}
93 95
94static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) 96static inline bool peer_del(struct lowpan_btle_dev *dev,
97 struct lowpan_peer *peer)
95{ 98{
96 list_del_rcu(&peer->list); 99 list_del_rcu(&peer->list);
97 kfree_rcu(peer, rcu); 100 kfree_rcu(peer, rcu);
@@ -106,7 +109,7 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer)
106 return false; 109 return false;
107} 110}
108 111
109static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, 112static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev,
110 bdaddr_t *ba, __u8 type) 113 bdaddr_t *ba, __u8 type)
111{ 114{
112 struct lowpan_peer *peer; 115 struct lowpan_peer *peer;
@@ -134,8 +137,8 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev,
134 return NULL; 137 return NULL;
135} 138}
136 139
137static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev, 140static inline struct lowpan_peer *
138 struct l2cap_chan *chan) 141__peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan)
139{ 142{
140 struct lowpan_peer *peer; 143 struct lowpan_peer *peer;
141 144
@@ -147,8 +150,8 @@ static inline struct lowpan_peer *__peer_lookup_chan(struct lowpan_dev *dev,
147 return NULL; 150 return NULL;
148} 151}
149 152
150static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev, 153static inline struct lowpan_peer *
151 struct l2cap_conn *conn) 154__peer_lookup_conn(struct lowpan_btle_dev *dev, struct l2cap_conn *conn)
152{ 155{
153 struct lowpan_peer *peer; 156 struct lowpan_peer *peer;
154 157
@@ -160,7 +163,7 @@ static inline struct lowpan_peer *__peer_lookup_conn(struct lowpan_dev *dev,
160 return NULL; 163 return NULL;
161} 164}
162 165
163static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev, 166static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
164 struct in6_addr *daddr, 167 struct in6_addr *daddr,
165 struct sk_buff *skb) 168 struct sk_buff *skb)
166{ 169{
@@ -220,7 +223,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_dev *dev,
220 223
221static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) 224static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn)
222{ 225{
223 struct lowpan_dev *entry; 226 struct lowpan_btle_dev *entry;
224 struct lowpan_peer *peer = NULL; 227 struct lowpan_peer *peer = NULL;
225 228
226 rcu_read_lock(); 229 rcu_read_lock();
@@ -236,10 +239,10 @@ static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn)
236 return peer; 239 return peer;
237} 240}
238 241
239static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn) 242static struct lowpan_btle_dev *lookup_dev(struct l2cap_conn *conn)
240{ 243{
241 struct lowpan_dev *entry; 244 struct lowpan_btle_dev *entry;
242 struct lowpan_dev *dev = NULL; 245 struct lowpan_btle_dev *dev = NULL;
243 246
244 rcu_read_lock(); 247 rcu_read_lock();
245 248
@@ -270,10 +273,10 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
270 struct l2cap_chan *chan) 273 struct l2cap_chan *chan)
271{ 274{
272 const u8 *saddr, *daddr; 275 const u8 *saddr, *daddr;
273 struct lowpan_dev *dev; 276 struct lowpan_btle_dev *dev;
274 struct lowpan_peer *peer; 277 struct lowpan_peer *peer;
275 278
276 dev = lowpan_dev(netdev); 279 dev = lowpan_btle_dev(netdev);
277 280
278 rcu_read_lock(); 281 rcu_read_lock();
279 peer = __peer_lookup_chan(dev, chan); 282 peer = __peer_lookup_chan(dev, chan);
@@ -375,7 +378,7 @@ drop:
375/* Packet from BT LE device */ 378/* Packet from BT LE device */
376static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) 379static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
377{ 380{
378 struct lowpan_dev *dev; 381 struct lowpan_btle_dev *dev;
379 struct lowpan_peer *peer; 382 struct lowpan_peer *peer;
380 int err; 383 int err;
381 384
@@ -431,15 +434,18 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
431 bdaddr_t *peer_addr, u8 *peer_addr_type) 434 bdaddr_t *peer_addr, u8 *peer_addr_type)
432{ 435{
433 struct in6_addr ipv6_daddr; 436 struct in6_addr ipv6_daddr;
434 struct lowpan_dev *dev; 437 struct ipv6hdr *hdr;
438 struct lowpan_btle_dev *dev;
435 struct lowpan_peer *peer; 439 struct lowpan_peer *peer;
436 bdaddr_t addr, *any = BDADDR_ANY; 440 bdaddr_t addr, *any = BDADDR_ANY;
437 u8 *daddr = any->b; 441 u8 *daddr = any->b;
438 int err, status = 0; 442 int err, status = 0;
439 443
440 dev = lowpan_dev(netdev); 444 hdr = ipv6_hdr(skb);
445
446 dev = lowpan_btle_dev(netdev);
441 447
442 memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr)); 448 memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr));
443 449
444 if (ipv6_addr_is_multicast(&ipv6_daddr)) { 450 if (ipv6_addr_is_multicast(&ipv6_daddr)) {
445 lowpan_cb(skb)->chan = NULL; 451 lowpan_cb(skb)->chan = NULL;
@@ -489,15 +495,9 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev,
489 unsigned short type, const void *_daddr, 495 unsigned short type, const void *_daddr,
490 const void *_saddr, unsigned int len) 496 const void *_saddr, unsigned int len)
491{ 497{
492 struct ipv6hdr *hdr;
493
494 if (type != ETH_P_IPV6) 498 if (type != ETH_P_IPV6)
495 return -EINVAL; 499 return -EINVAL;
496 500
497 hdr = ipv6_hdr(skb);
498
499 memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr));
500
501 return 0; 501 return 0;
502} 502}
503 503
@@ -543,19 +543,19 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
543static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) 543static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
544{ 544{
545 struct sk_buff *local_skb; 545 struct sk_buff *local_skb;
546 struct lowpan_dev *entry; 546 struct lowpan_btle_dev *entry;
547 int err = 0; 547 int err = 0;
548 548
549 rcu_read_lock(); 549 rcu_read_lock();
550 550
551 list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { 551 list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) {
552 struct lowpan_peer *pentry; 552 struct lowpan_peer *pentry;
553 struct lowpan_dev *dev; 553 struct lowpan_btle_dev *dev;
554 554
555 if (entry->netdev != netdev) 555 if (entry->netdev != netdev)
556 continue; 556 continue;
557 557
558 dev = lowpan_dev(entry->netdev); 558 dev = lowpan_btle_dev(entry->netdev);
559 559
560 list_for_each_entry_rcu(pentry, &dev->peers, list) { 560 list_for_each_entry_rcu(pentry, &dev->peers, list) {
561 int ret; 561 int ret;
@@ -723,8 +723,8 @@ static void ifdown(struct net_device *netdev)
723 723
724static void do_notify_peers(struct work_struct *work) 724static void do_notify_peers(struct work_struct *work)
725{ 725{
726 struct lowpan_dev *dev = container_of(work, struct lowpan_dev, 726 struct lowpan_btle_dev *dev = container_of(work, struct lowpan_btle_dev,
727 notify_peers.work); 727 notify_peers.work);
728 728
729 netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ 729 netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */
730} 730}
@@ -766,7 +766,7 @@ static void set_ip_addr_bits(u8 addr_type, u8 *addr)
766} 766}
767 767
768static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, 768static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
769 struct lowpan_dev *dev) 769 struct lowpan_btle_dev *dev)
770{ 770{
771 struct lowpan_peer *peer; 771 struct lowpan_peer *peer;
772 772
@@ -803,12 +803,12 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
803 return peer->chan; 803 return peer->chan;
804} 804}
805 805
806static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) 806static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
807{ 807{
808 struct net_device *netdev; 808 struct net_device *netdev;
809 int err = 0; 809 int err = 0;
810 810
811 netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)), 811 netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)),
812 IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN, 812 IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN,
813 netdev_setup); 813 netdev_setup);
814 if (!netdev) 814 if (!netdev)
@@ -820,7 +820,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
820 SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); 820 SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
821 SET_NETDEV_DEVTYPE(netdev, &bt_type); 821 SET_NETDEV_DEVTYPE(netdev, &bt_type);
822 822
823 *dev = lowpan_dev(netdev); 823 *dev = lowpan_btle_dev(netdev);
824 (*dev)->netdev = netdev; 824 (*dev)->netdev = netdev;
825 (*dev)->hdev = chan->conn->hcon->hdev; 825 (*dev)->hdev = chan->conn->hcon->hdev;
826 INIT_LIST_HEAD(&(*dev)->peers); 826 INIT_LIST_HEAD(&(*dev)->peers);
@@ -853,7 +853,7 @@ out:
853 853
854static inline void chan_ready_cb(struct l2cap_chan *chan) 854static inline void chan_ready_cb(struct l2cap_chan *chan)
855{ 855{
856 struct lowpan_dev *dev; 856 struct lowpan_btle_dev *dev;
857 857
858 dev = lookup_dev(chan->conn); 858 dev = lookup_dev(chan->conn);
859 859
@@ -890,8 +890,9 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
890 890
891static void delete_netdev(struct work_struct *work) 891static void delete_netdev(struct work_struct *work)
892{ 892{
893 struct lowpan_dev *entry = container_of(work, struct lowpan_dev, 893 struct lowpan_btle_dev *entry = container_of(work,
894 delete_netdev); 894 struct lowpan_btle_dev,
895 delete_netdev);
895 896
896 lowpan_unregister_netdev(entry->netdev); 897 lowpan_unregister_netdev(entry->netdev);
897 898
@@ -900,8 +901,8 @@ static void delete_netdev(struct work_struct *work)
900 901
901static void chan_close_cb(struct l2cap_chan *chan) 902static void chan_close_cb(struct l2cap_chan *chan)
902{ 903{
903 struct lowpan_dev *entry; 904 struct lowpan_btle_dev *entry;
904 struct lowpan_dev *dev = NULL; 905 struct lowpan_btle_dev *dev = NULL;
905 struct lowpan_peer *peer; 906 struct lowpan_peer *peer;
906 int err = -ENOENT; 907 int err = -ENOENT;
907 bool last = false, remove = true; 908 bool last = false, remove = true;
@@ -921,7 +922,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
921 spin_lock(&devices_lock); 922 spin_lock(&devices_lock);
922 923
923 list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { 924 list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) {
924 dev = lowpan_dev(entry->netdev); 925 dev = lowpan_btle_dev(entry->netdev);
925 peer = __peer_lookup_chan(dev, chan); 926 peer = __peer_lookup_chan(dev, chan);
926 if (peer) { 927 if (peer) {
927 last = peer_del(dev, peer); 928 last = peer_del(dev, peer);
@@ -1131,7 +1132,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
1131 1132
1132static void disconnect_all_peers(void) 1133static void disconnect_all_peers(void)
1133{ 1134{
1134 struct lowpan_dev *entry; 1135 struct lowpan_btle_dev *entry;
1135 struct lowpan_peer *peer, *tmp_peer, *new_peer; 1136 struct lowpan_peer *peer, *tmp_peer, *new_peer;
1136 struct list_head peers; 1137 struct list_head peers;
1137 1138
@@ -1291,7 +1292,7 @@ static ssize_t lowpan_control_write(struct file *fp,
1291 1292
1292static int lowpan_control_show(struct seq_file *f, void *ptr) 1293static int lowpan_control_show(struct seq_file *f, void *ptr)
1293{ 1294{
1294 struct lowpan_dev *entry; 1295 struct lowpan_btle_dev *entry;
1295 struct lowpan_peer *peer; 1296 struct lowpan_peer *peer;
1296 1297
1297 spin_lock(&devices_lock); 1298 spin_lock(&devices_lock);
@@ -1322,7 +1323,7 @@ static const struct file_operations lowpan_control_fops = {
1322 1323
1323static void disconnect_devices(void) 1324static void disconnect_devices(void)
1324{ 1325{
1325 struct lowpan_dev *entry, *tmp, *new_dev; 1326 struct lowpan_btle_dev *entry, *tmp, *new_dev;
1326 struct list_head devices; 1327 struct list_head devices;
1327 1328
1328 INIT_LIST_HEAD(&devices); 1329 INIT_LIST_HEAD(&devices);
@@ -1360,7 +1361,7 @@ static int device_event(struct notifier_block *unused,
1360 unsigned long event, void *ptr) 1361 unsigned long event, void *ptr)
1361{ 1362{
1362 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 1363 struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1363 struct lowpan_dev *entry; 1364 struct lowpan_btle_dev *entry;
1364 1365
1365 if (netdev->type != ARPHRD_6LOWPAN) 1366 if (netdev->type != ARPHRD_6LOWPAN)
1366 return NOTIFY_DONE; 1367 return NOTIFY_DONE;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 6ceb5d36a32b..f4fcb4a9d5c1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -188,7 +188,7 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
188 * So we have to queue them and wake up session thread which is sleeping 188 * So we have to queue them and wake up session thread which is sleeping
189 * on the sk_sleep(sk). 189 * on the sk_sleep(sk).
190 */ 190 */
191 dev->trans_start = jiffies; 191 netif_trans_update(dev);
192 skb_queue_tail(&sk->sk_write_queue, skb); 192 skb_queue_tail(&sk->sk_write_queue, skb);
193 wake_up_interruptible(sk_sleep(sk)); 193 wake_up_interruptible(sk_sleep(sk));
194 194
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index f8fc6241469a..d99b2009771a 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include "br_private.h" 22#include "br_private.h"
23 23
24/* called with RTNL */
25static int get_bridge_ifindices(struct net *net, int *indices, int num) 24static int get_bridge_ifindices(struct net *net, int *indices, int num)
26{ 25{
27 struct net_device *dev; 26 struct net_device *dev;
28 int i = 0; 27 int i = 0;
29 28
30 for_each_netdev(net, dev) { 29 rcu_read_lock();
30 for_each_netdev_rcu(net, dev) {
31 if (i >= num) 31 if (i >= num)
32 break; 32 break;
33 if (dev->priv_flags & IFF_EBRIDGE) 33 if (dev->priv_flags & IFF_EBRIDGE)
34 indices[i++] = dev->ifindex; 34 indices[i++] = dev->ifindex;
35 } 35 }
36 rcu_read_unlock();
36 37
37 return i; 38 return i;
38} 39}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 253bc77eda3b..7dbc80d01eb0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -61,6 +61,19 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
61 e->flags |= MDB_FLAGS_OFFLOAD; 61 e->flags |= MDB_FLAGS_OFFLOAD;
62} 62}
63 63
64static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
65{
66 memset(ip, 0, sizeof(struct br_ip));
67 ip->vid = entry->vid;
68 ip->proto = entry->addr.proto;
69 if (ip->proto == htons(ETH_P_IP))
70 ip->u.ip4 = entry->addr.u.ip4;
71#if IS_ENABLED(CONFIG_IPV6)
72 else
73 ip->u.ip6 = entry->addr.u.ip6;
74#endif
75}
76
64static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, 77static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
65 struct net_device *dev) 78 struct net_device *dev)
66{ 79{
@@ -243,9 +256,45 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
243 + nla_total_size(sizeof(struct br_mdb_entry)); 256 + nla_total_size(sizeof(struct br_mdb_entry));
244} 257}
245 258
246static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, 259struct br_mdb_complete_info {
247 int type, struct net_bridge_port_group *pg) 260 struct net_bridge_port *port;
261 struct br_ip ip;
262};
263
264static void br_mdb_complete(struct net_device *dev, int err, void *priv)
248{ 265{
266 struct br_mdb_complete_info *data = priv;
267 struct net_bridge_port_group __rcu **pp;
268 struct net_bridge_port_group *p;
269 struct net_bridge_mdb_htable *mdb;
270 struct net_bridge_mdb_entry *mp;
271 struct net_bridge_port *port = data->port;
272 struct net_bridge *br = port->br;
273
274 if (err)
275 goto err;
276
277 spin_lock_bh(&br->multicast_lock);
278 mdb = mlock_dereference(br->mdb, br);
279 mp = br_mdb_ip_get(mdb, &data->ip);
280 if (!mp)
281 goto out;
282 for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
283 pp = &p->next) {
284 if (p->port != port)
285 continue;
286 p->flags |= MDB_PG_FLAGS_OFFLOAD;
287 }
288out:
289 spin_unlock_bh(&br->multicast_lock);
290err:
291 kfree(priv);
292}
293
294static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
295 struct br_mdb_entry *entry, int type)
296{
297 struct br_mdb_complete_info *complete_info;
249 struct switchdev_obj_port_mdb mdb = { 298 struct switchdev_obj_port_mdb mdb = {
250 .obj = { 299 .obj = {
251 .id = SWITCHDEV_OBJ_ID_PORT_MDB, 300 .id = SWITCHDEV_OBJ_ID_PORT_MDB,
@@ -268,9 +317,14 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
268 317
269 mdb.obj.orig_dev = port_dev; 318 mdb.obj.orig_dev = port_dev;
270 if (port_dev && type == RTM_NEWMDB) { 319 if (port_dev && type == RTM_NEWMDB) {
271 err = switchdev_port_obj_add(port_dev, &mdb.obj); 320 complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
272 if (!err && pg) 321 if (complete_info) {
273 pg->flags |= MDB_PG_FLAGS_OFFLOAD; 322 complete_info->port = p;
323 __mdb_entry_to_br_ip(entry, &complete_info->ip);
324 mdb.obj.complete_priv = complete_info;
325 mdb.obj.complete = br_mdb_complete;
326 switchdev_port_obj_add(port_dev, &mdb.obj);
327 }
274 } else if (port_dev && type == RTM_DELMDB) { 328 } else if (port_dev && type == RTM_DELMDB) {
275 switchdev_port_obj_del(port_dev, &mdb.obj); 329 switchdev_port_obj_del(port_dev, &mdb.obj);
276 } 330 }
@@ -291,21 +345,21 @@ errout:
291 rtnl_set_sk_err(net, RTNLGRP_MDB, err); 345 rtnl_set_sk_err(net, RTNLGRP_MDB, err);
292} 346}
293 347
294void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, 348void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
295 int type) 349 struct br_ip *group, int type, u8 flags)
296{ 350{
297 struct br_mdb_entry entry; 351 struct br_mdb_entry entry;
298 352
299 memset(&entry, 0, sizeof(entry)); 353 memset(&entry, 0, sizeof(entry));
300 entry.ifindex = pg->port->dev->ifindex; 354 entry.ifindex = port->dev->ifindex;
301 entry.addr.proto = pg->addr.proto; 355 entry.addr.proto = group->proto;
302 entry.addr.u.ip4 = pg->addr.u.ip4; 356 entry.addr.u.ip4 = group->u.ip4;
303#if IS_ENABLED(CONFIG_IPV6) 357#if IS_ENABLED(CONFIG_IPV6)
304 entry.addr.u.ip6 = pg->addr.u.ip6; 358 entry.addr.u.ip6 = group->u.ip6;
305#endif 359#endif
306 entry.vid = pg->addr.vid; 360 entry.vid = group->vid;
307 __mdb_entry_fill_flags(&entry, pg->flags); 361 __mdb_entry_fill_flags(&entry, flags);
308 __br_mdb_notify(dev, &entry, type, pg); 362 __br_mdb_notify(dev, port, &entry, type);
309} 363}
310 364
311static int nlmsg_populate_rtr_fill(struct sk_buff *skb, 365static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@ -450,8 +504,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
450} 504}
451 505
452static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, 506static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
453 struct br_ip *group, unsigned char state, 507 struct br_ip *group, unsigned char state)
454 struct net_bridge_port_group **pg)
455{ 508{
456 struct net_bridge_mdb_entry *mp; 509 struct net_bridge_mdb_entry *mp;
457 struct net_bridge_port_group *p; 510 struct net_bridge_port_group *p;
@@ -482,7 +535,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
482 if (unlikely(!p)) 535 if (unlikely(!p))
483 return -ENOMEM; 536 return -ENOMEM;
484 rcu_assign_pointer(*pp, p); 537 rcu_assign_pointer(*pp, p);
485 *pg = p;
486 if (state == MDB_TEMPORARY) 538 if (state == MDB_TEMPORARY)
487 mod_timer(&p->timer, now + br->multicast_membership_interval); 539 mod_timer(&p->timer, now + br->multicast_membership_interval);
488 540
@@ -490,8 +542,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
490} 542}
491 543
492static int __br_mdb_add(struct net *net, struct net_bridge *br, 544static int __br_mdb_add(struct net *net, struct net_bridge *br,
493 struct br_mdb_entry *entry, 545 struct br_mdb_entry *entry)
494 struct net_bridge_port_group **pg)
495{ 546{
496 struct br_ip ip; 547 struct br_ip ip;
497 struct net_device *dev; 548 struct net_device *dev;
@@ -509,18 +560,10 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
509 if (!p || p->br != br || p->state == BR_STATE_DISABLED) 560 if (!p || p->br != br || p->state == BR_STATE_DISABLED)
510 return -EINVAL; 561 return -EINVAL;
511 562
512 memset(&ip, 0, sizeof(ip)); 563 __mdb_entry_to_br_ip(entry, &ip);
513 ip.vid = entry->vid;
514 ip.proto = entry->addr.proto;
515 if (ip.proto == htons(ETH_P_IP))
516 ip.u.ip4 = entry->addr.u.ip4;
517#if IS_ENABLED(CONFIG_IPV6)
518 else
519 ip.u.ip6 = entry->addr.u.ip6;
520#endif
521 564
522 spin_lock_bh(&br->multicast_lock); 565 spin_lock_bh(&br->multicast_lock);
523 ret = br_mdb_add_group(br, p, &ip, entry->state, pg); 566 ret = br_mdb_add_group(br, p, &ip, entry->state);
524 spin_unlock_bh(&br->multicast_lock); 567 spin_unlock_bh(&br->multicast_lock);
525 return ret; 568 return ret;
526} 569}
@@ -528,7 +571,6 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
528static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) 571static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
529{ 572{
530 struct net *net = sock_net(skb->sk); 573 struct net *net = sock_net(skb->sk);
531 struct net_bridge_port_group *pg;
532 struct net_bridge_vlan_group *vg; 574 struct net_bridge_vlan_group *vg;
533 struct net_device *dev, *pdev; 575 struct net_device *dev, *pdev;
534 struct br_mdb_entry *entry; 576 struct br_mdb_entry *entry;
@@ -558,15 +600,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
558 if (br_vlan_enabled(br) && vg && entry->vid == 0) { 600 if (br_vlan_enabled(br) && vg && entry->vid == 0) {
559 list_for_each_entry(v, &vg->vlan_list, vlist) { 601 list_for_each_entry(v, &vg->vlan_list, vlist) {
560 entry->vid = v->vid; 602 entry->vid = v->vid;
561 err = __br_mdb_add(net, br, entry, &pg); 603 err = __br_mdb_add(net, br, entry);
562 if (err) 604 if (err)
563 break; 605 break;
564 __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); 606 __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
565 } 607 }
566 } else { 608 } else {
567 err = __br_mdb_add(net, br, entry, &pg); 609 err = __br_mdb_add(net, br, entry);
568 if (!err) 610 if (!err)
569 __br_mdb_notify(dev, entry, RTM_NEWMDB, pg); 611 __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
570 } 612 }
571 613
572 return err; 614 return err;
@@ -584,15 +626,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
584 if (!netif_running(br->dev) || br->multicast_disabled) 626 if (!netif_running(br->dev) || br->multicast_disabled)
585 return -EINVAL; 627 return -EINVAL;
586 628
587 memset(&ip, 0, sizeof(ip)); 629 __mdb_entry_to_br_ip(entry, &ip);
588 ip.vid = entry->vid;
589 ip.proto = entry->addr.proto;
590 if (ip.proto == htons(ETH_P_IP))
591 ip.u.ip4 = entry->addr.u.ip4;
592#if IS_ENABLED(CONFIG_IPV6)
593 else
594 ip.u.ip6 = entry->addr.u.ip6;
595#endif
596 630
597 spin_lock_bh(&br->multicast_lock); 631 spin_lock_bh(&br->multicast_lock);
598 mdb = mlock_dereference(br->mdb, br); 632 mdb = mlock_dereference(br->mdb, br);
@@ -662,12 +696,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
662 entry->vid = v->vid; 696 entry->vid = v->vid;
663 err = __br_mdb_del(br, entry); 697 err = __br_mdb_del(br, entry);
664 if (!err) 698 if (!err)
665 __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); 699 __br_mdb_notify(dev, p, entry, RTM_DELMDB);
666 } 700 }
667 } else { 701 } else {
668 err = __br_mdb_del(br, entry); 702 err = __br_mdb_del(br, entry);
669 if (!err) 703 if (!err)
670 __br_mdb_notify(dev, entry, RTM_DELMDB, NULL); 704 __br_mdb_notify(dev, p, entry, RTM_DELMDB);
671 } 705 }
672 706
673 return err; 707 return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a4c15df2b792..6852f3c7009c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -283,7 +283,8 @@ static void br_multicast_del_pg(struct net_bridge *br,
283 rcu_assign_pointer(*pp, p->next); 283 rcu_assign_pointer(*pp, p->next);
284 hlist_del_init(&p->mglist); 284 hlist_del_init(&p->mglist);
285 del_timer(&p->timer); 285 del_timer(&p->timer);
286 br_mdb_notify(br->dev, p, RTM_DELMDB); 286 br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
287 p->flags);
287 call_rcu_bh(&p->rcu, br_multicast_free_pg); 288 call_rcu_bh(&p->rcu, br_multicast_free_pg);
288 289
289 if (!mp->ports && !mp->mglist && 290 if (!mp->ports && !mp->mglist &&
@@ -705,7 +706,7 @@ static int br_multicast_add_group(struct net_bridge *br,
705 if (unlikely(!p)) 706 if (unlikely(!p))
706 goto err; 707 goto err;
707 rcu_assign_pointer(*pp, p); 708 rcu_assign_pointer(*pp, p);
708 br_mdb_notify(br->dev, p, RTM_NEWMDB); 709 br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0);
709 710
710found: 711found:
711 mod_timer(&p->timer, now + br->multicast_membership_interval); 712 mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -1278,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1278 struct br_ip saddr; 1279 struct br_ip saddr;
1279 unsigned long max_delay; 1280 unsigned long max_delay;
1280 unsigned long now = jiffies; 1281 unsigned long now = jiffies;
1282 unsigned int offset = skb_transport_offset(skb);
1281 __be32 group; 1283 __be32 group;
1282 int err = 0; 1284 int err = 0;
1283 1285
@@ -1288,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1288 1290
1289 group = ih->group; 1291 group = ih->group;
1290 1292
1291 if (skb->len == sizeof(*ih)) { 1293 if (skb->len == offset + sizeof(*ih)) {
1292 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); 1294 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
1293 1295
1294 if (!max_delay) { 1296 if (!max_delay) {
1295 max_delay = 10 * HZ; 1297 max_delay = 10 * HZ;
1296 group = 0; 1298 group = 0;
1297 } 1299 }
1298 } else if (skb->len >= sizeof(*ih3)) { 1300 } else if (skb->len >= offset + sizeof(*ih3)) {
1299 ih3 = igmpv3_query_hdr(skb); 1301 ih3 = igmpv3_query_hdr(skb);
1300 if (ih3->nsrcs) 1302 if (ih3->nsrcs)
1301 goto out; 1303 goto out;
@@ -1356,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1356 struct br_ip saddr; 1358 struct br_ip saddr;
1357 unsigned long max_delay; 1359 unsigned long max_delay;
1358 unsigned long now = jiffies; 1360 unsigned long now = jiffies;
1361 unsigned int offset = skb_transport_offset(skb);
1359 const struct in6_addr *group = NULL; 1362 const struct in6_addr *group = NULL;
1360 bool is_general_query; 1363 bool is_general_query;
1361 int err = 0; 1364 int err = 0;
@@ -1365,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1365 (port && port->state == BR_STATE_DISABLED)) 1368 (port && port->state == BR_STATE_DISABLED))
1366 goto out; 1369 goto out;
1367 1370
1368 if (skb->len == sizeof(*mld)) { 1371 if (skb->len == offset + sizeof(*mld)) {
1369 if (!pskb_may_pull(skb, sizeof(*mld))) { 1372 if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
1370 err = -EINVAL; 1373 err = -EINVAL;
1371 goto out; 1374 goto out;
1372 } 1375 }
@@ -1375,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1375 if (max_delay) 1378 if (max_delay)
1376 group = &mld->mld_mca; 1379 group = &mld->mld_mca;
1377 } else { 1380 } else {
1378 if (!pskb_may_pull(skb, sizeof(*mld2q))) { 1381 if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
1379 err = -EINVAL; 1382 err = -EINVAL;
1380 goto out; 1383 goto out;
1381 } 1384 }
@@ -1461,7 +1464,8 @@ br_multicast_leave_group(struct net_bridge *br,
1461 hlist_del_init(&p->mglist); 1464 hlist_del_init(&p->mglist);
1462 del_timer(&p->timer); 1465 del_timer(&p->timer);
1463 call_rcu_bh(&p->rcu, br_multicast_free_pg); 1466 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1464 br_mdb_notify(br->dev, p, RTM_DELMDB); 1467 br_mdb_notify(br->dev, port, group, RTM_DELMDB,
1468 p->flags);
1465 1469
1466 if (!mp->ports && !mp->mglist && 1470 if (!mp->ports && !mp->mglist &&
1467 netif_running(br->dev)) 1471 netif_running(br->dev))
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 44114a94c576..2d25979273a6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
217 217
218 len = ntohs(iph->tot_len); 218 len = ntohs(iph->tot_len);
219 if (skb->len < len) { 219 if (skb->len < len) {
220 IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); 220 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
221 goto drop; 221 goto drop;
222 } else if (len < (iph->ihl*4)) 222 } else if (len < (iph->ihl*4))
223 goto inhdr_error; 223 goto inhdr_error;
224 224
225 if (pskb_trim_rcsum(skb, len)) { 225 if (pskb_trim_rcsum(skb, len)) {
226 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); 226 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
227 goto drop; 227 goto drop;
228 } 228 }
229 229
@@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
236 return 0; 236 return 0;
237 237
238inhdr_error: 238inhdr_error:
239 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); 239 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
240drop: 240drop:
241 return -1; 241 return -1;
242} 242}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index d61f56efc8dc..5e59a8457e7b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
122 122
123 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { 123 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
124 if (pkt_len + ip6h_len > skb->len) { 124 if (pkt_len + ip6h_len > skb->len) {
125 IP6_INC_STATS_BH(net, idev, 125 __IP6_INC_STATS(net, idev,
126 IPSTATS_MIB_INTRUNCATEDPKTS); 126 IPSTATS_MIB_INTRUNCATEDPKTS);
127 goto drop; 127 goto drop;
128 } 128 }
129 if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { 129 if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
130 IP6_INC_STATS_BH(net, idev, 130 __IP6_INC_STATS(net, idev,
131 IPSTATS_MIB_INDISCARDS); 131 IPSTATS_MIB_INDISCARDS);
132 goto drop; 132 goto drop;
133 } 133 }
134 } 134 }
@@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
142 return 0; 142 return 0;
143 143
144inhdr_error: 144inhdr_error:
145 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 145 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
146drop: 146drop:
147 return -1; 147 return -1;
148} 148}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6bae1125e36d..a5343c7232bf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -850,6 +850,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
850 [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 }, 850 [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 },
851 [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, 851 [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
852 [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, 852 [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
853 [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
853}; 854};
854 855
855static int br_changelink(struct net_device *brdev, struct nlattr *tb[], 856static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -921,6 +922,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
921 if (err) 922 if (err)
922 return err; 923 return err;
923 } 924 }
925
926 if (data[IFLA_BR_VLAN_STATS_ENABLED]) {
927 __u8 vlan_stats = nla_get_u8(data[IFLA_BR_VLAN_STATS_ENABLED]);
928
929 err = br_vlan_set_stats(br, vlan_stats);
930 if (err)
931 return err;
932 }
924#endif 933#endif
925 934
926 if (data[IFLA_BR_GROUP_FWD_MASK]) { 935 if (data[IFLA_BR_GROUP_FWD_MASK]) {
@@ -1082,6 +1091,7 @@ static size_t br_get_size(const struct net_device *brdev)
1082#ifdef CONFIG_BRIDGE_VLAN_FILTERING 1091#ifdef CONFIG_BRIDGE_VLAN_FILTERING
1083 nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ 1092 nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
1084 nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */ 1093 nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */
1094 nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */
1085#endif 1095#endif
1086 nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */ 1096 nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */
1087 nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */ 1097 nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */
@@ -1167,7 +1177,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
1167 1177
1168#ifdef CONFIG_BRIDGE_VLAN_FILTERING 1178#ifdef CONFIG_BRIDGE_VLAN_FILTERING
1169 if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) || 1179 if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
1170 nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid)) 1180 nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) ||
1181 nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled))
1171 return -EMSGSIZE; 1182 return -EMSGSIZE;
1172#endif 1183#endif
1173#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 1184#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -1223,6 +1234,69 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
1223 return 0; 1234 return 0;
1224} 1235}
1225 1236
1237static size_t br_get_linkxstats_size(const struct net_device *dev)
1238{
1239 struct net_bridge *br = netdev_priv(dev);
1240 struct net_bridge_vlan_group *vg;
1241 struct net_bridge_vlan *v;
1242 int numvls = 0;
1243
1244 vg = br_vlan_group(br);
1245 if (!vg)
1246 return 0;
1247
1248 /* we need to count all, even placeholder entries */
1249 list_for_each_entry(v, &vg->vlan_list, vlist)
1250 numvls++;
1251
1252 /* account for the vlans and the link xstats type nest attribute */
1253 return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
1254 nla_total_size(0);
1255}
1256
1257static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
1258 int *prividx)
1259{
1260 struct net_bridge *br = netdev_priv(dev);
1261 struct net_bridge_vlan_group *vg;
1262 struct net_bridge_vlan *v;
1263 struct nlattr *nest;
1264 int vl_idx = 0;
1265
1266 vg = br_vlan_group(br);
1267 if (!vg)
1268 goto out;
1269 nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
1270 if (!nest)
1271 return -EMSGSIZE;
1272 list_for_each_entry(v, &vg->vlan_list, vlist) {
1273 struct bridge_vlan_xstats vxi;
1274 struct br_vlan_stats stats;
1275
1276 if (vl_idx++ < *prividx)
1277 continue;
1278 memset(&vxi, 0, sizeof(vxi));
1279 vxi.vid = v->vid;
1280 br_vlan_get_stats(v, &stats);
1281 vxi.rx_bytes = stats.rx_bytes;
1282 vxi.rx_packets = stats.rx_packets;
1283 vxi.tx_bytes = stats.tx_bytes;
1284 vxi.tx_packets = stats.tx_packets;
1285
1286 if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
1287 goto nla_put_failure;
1288 }
1289 nla_nest_end(skb, nest);
1290 *prividx = 0;
1291out:
1292 return 0;
1293
1294nla_put_failure:
1295 nla_nest_end(skb, nest);
1296 *prividx = vl_idx;
1297
1298 return -EMSGSIZE;
1299}
1226 1300
1227static struct rtnl_af_ops br_af_ops __read_mostly = { 1301static struct rtnl_af_ops br_af_ops __read_mostly = {
1228 .family = AF_BRIDGE, 1302 .family = AF_BRIDGE,
@@ -1241,6 +1315,8 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
1241 .dellink = br_dev_delete, 1315 .dellink = br_dev_delete,
1242 .get_size = br_get_size, 1316 .get_size = br_get_size,
1243 .fill_info = br_fill_info, 1317 .fill_info = br_fill_info,
1318 .fill_linkxstats = br_fill_linkxstats,
1319 .get_linkxstats_size = br_get_linkxstats_size,
1244 1320
1245 .slave_maxtype = IFLA_BRPORT_MAX, 1321 .slave_maxtype = IFLA_BRPORT_MAX,
1246 .slave_policy = br_port_policy, 1322 .slave_policy = br_port_policy,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b5d145dfcbf..c7fb5d7a7218 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -77,12 +77,21 @@ struct bridge_mcast_querier {
77}; 77};
78#endif 78#endif
79 79
80struct br_vlan_stats {
81 u64 rx_bytes;
82 u64 rx_packets;
83 u64 tx_bytes;
84 u64 tx_packets;
85 struct u64_stats_sync syncp;
86};
87
80/** 88/**
81 * struct net_bridge_vlan - per-vlan entry 89 * struct net_bridge_vlan - per-vlan entry
82 * 90 *
83 * @vnode: rhashtable member 91 * @vnode: rhashtable member
84 * @vid: VLAN id 92 * @vid: VLAN id
85 * @flags: bridge vlan flags 93 * @flags: bridge vlan flags
94 * @stats: per-cpu VLAN statistics
86 * @br: if MASTER flag set, this points to a bridge struct 95 * @br: if MASTER flag set, this points to a bridge struct
87 * @port: if MASTER flag unset, this points to a port struct 96 * @port: if MASTER flag unset, this points to a port struct
88 * @refcnt: if MASTER flag set, this is bumped for each port referencing it 97 * @refcnt: if MASTER flag set, this is bumped for each port referencing it
@@ -100,6 +109,7 @@ struct net_bridge_vlan {
100 struct rhash_head vnode; 109 struct rhash_head vnode;
101 u16 vid; 110 u16 vid;
102 u16 flags; 111 u16 flags;
112 struct br_vlan_stats __percpu *stats;
103 union { 113 union {
104 struct net_bridge *br; 114 struct net_bridge *br;
105 struct net_bridge_port *port; 115 struct net_bridge_port *port;
@@ -342,6 +352,7 @@ struct net_bridge
342#ifdef CONFIG_BRIDGE_VLAN_FILTERING 352#ifdef CONFIG_BRIDGE_VLAN_FILTERING
343 struct net_bridge_vlan_group __rcu *vlgrp; 353 struct net_bridge_vlan_group __rcu *vlgrp;
344 u8 vlan_enabled; 354 u8 vlan_enabled;
355 u8 vlan_stats_enabled;
345 __be16 vlan_proto; 356 __be16 vlan_proto;
346 u16 default_pvid; 357 u16 default_pvid;
347#endif 358#endif
@@ -560,8 +571,8 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
560 unsigned char flags); 571 unsigned char flags);
561void br_mdb_init(void); 572void br_mdb_init(void);
562void br_mdb_uninit(void); 573void br_mdb_uninit(void);
563void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg, 574void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
564 int type); 575 struct br_ip *group, int type, u8 flags);
565void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, 576void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
566 int type); 577 int type);
567 578
@@ -691,6 +702,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
691int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); 702int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
692int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); 703int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
693int br_vlan_set_proto(struct net_bridge *br, unsigned long val); 704int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
705int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
694int br_vlan_init(struct net_bridge *br); 706int br_vlan_init(struct net_bridge *br);
695int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); 707int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
696int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); 708int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
@@ -699,6 +711,8 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
699void nbp_vlan_flush(struct net_bridge_port *port); 711void nbp_vlan_flush(struct net_bridge_port *port);
700int nbp_vlan_init(struct net_bridge_port *port); 712int nbp_vlan_init(struct net_bridge_port *port);
701int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); 713int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
714void br_vlan_get_stats(const struct net_bridge_vlan *v,
715 struct br_vlan_stats *stats);
702 716
703static inline struct net_bridge_vlan_group *br_vlan_group( 717static inline struct net_bridge_vlan_group *br_vlan_group(
704 const struct net_bridge *br) 718 const struct net_bridge *br)
@@ -881,6 +895,10 @@ static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
881 return NULL; 895 return NULL;
882} 896}
883 897
898static inline void br_vlan_get_stats(const struct net_bridge_vlan *v,
899 struct br_vlan_stats *stats)
900{
901}
884#endif 902#endif
885 903
886struct nf_br_ops { 904struct nf_br_ops {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 70bddfd0f3e9..beb47071e38d 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -731,6 +731,22 @@ static ssize_t default_pvid_store(struct device *d,
731 return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); 731 return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid);
732} 732}
733static DEVICE_ATTR_RW(default_pvid); 733static DEVICE_ATTR_RW(default_pvid);
734
735static ssize_t vlan_stats_enabled_show(struct device *d,
736 struct device_attribute *attr,
737 char *buf)
738{
739 struct net_bridge *br = to_bridge(d);
740 return sprintf(buf, "%u\n", br->vlan_stats_enabled);
741}
742
743static ssize_t vlan_stats_enabled_store(struct device *d,
744 struct device_attribute *attr,
745 const char *buf, size_t len)
746{
747 return store_bridge_parm(d, buf, len, br_vlan_set_stats);
748}
749static DEVICE_ATTR_RW(vlan_stats_enabled);
734#endif 750#endif
735 751
736static struct attribute *bridge_attrs[] = { 752static struct attribute *bridge_attrs[] = {
@@ -778,6 +794,7 @@ static struct attribute *bridge_attrs[] = {
778 &dev_attr_vlan_filtering.attr, 794 &dev_attr_vlan_filtering.attr,
779 &dev_attr_vlan_protocol.attr, 795 &dev_attr_vlan_protocol.attr,
780 &dev_attr_default_pvid.attr, 796 &dev_attr_default_pvid.attr,
797 &dev_attr_vlan_stats_enabled.attr,
781#endif 798#endif
782 NULL 799 NULL
783}; 800};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index e001152d6ad1..b6de4f457161 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -162,6 +162,17 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
162 return masterv; 162 return masterv;
163} 163}
164 164
165static void br_master_vlan_rcu_free(struct rcu_head *rcu)
166{
167 struct net_bridge_vlan *v;
168
169 v = container_of(rcu, struct net_bridge_vlan, rcu);
170 WARN_ON(!br_vlan_is_master(v));
171 free_percpu(v->stats);
172 v->stats = NULL;
173 kfree(v);
174}
175
165static void br_vlan_put_master(struct net_bridge_vlan *masterv) 176static void br_vlan_put_master(struct net_bridge_vlan *masterv)
166{ 177{
167 struct net_bridge_vlan_group *vg; 178 struct net_bridge_vlan_group *vg;
@@ -174,7 +185,7 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
174 rhashtable_remove_fast(&vg->vlan_hash, 185 rhashtable_remove_fast(&vg->vlan_hash,
175 &masterv->vnode, br_vlan_rht_params); 186 &masterv->vnode, br_vlan_rht_params);
176 __vlan_del_list(masterv); 187 __vlan_del_list(masterv);
177 kfree_rcu(masterv, rcu); 188 call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
178 } 189 }
179} 190}
180 191
@@ -230,6 +241,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
230 if (!masterv) 241 if (!masterv)
231 goto out_filt; 242 goto out_filt;
232 v->brvlan = masterv; 243 v->brvlan = masterv;
244 v->stats = masterv->stats;
233 } 245 }
234 246
235 /* Add the dev mac and count the vlan only if it's usable */ 247 /* Add the dev mac and count the vlan only if it's usable */
@@ -329,6 +341,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
329 struct net_bridge_vlan_group *vg, 341 struct net_bridge_vlan_group *vg,
330 struct sk_buff *skb) 342 struct sk_buff *skb)
331{ 343{
344 struct br_vlan_stats *stats;
332 struct net_bridge_vlan *v; 345 struct net_bridge_vlan *v;
333 u16 vid; 346 u16 vid;
334 347
@@ -355,18 +368,27 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
355 return NULL; 368 return NULL;
356 } 369 }
357 } 370 }
371 if (br->vlan_stats_enabled) {
372 stats = this_cpu_ptr(v->stats);
373 u64_stats_update_begin(&stats->syncp);
374 stats->tx_bytes += skb->len;
375 stats->tx_packets++;
376 u64_stats_update_end(&stats->syncp);
377 }
378
358 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) 379 if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
359 skb->vlan_tci = 0; 380 skb->vlan_tci = 0;
360
361out: 381out:
362 return skb; 382 return skb;
363} 383}
364 384
365/* Called under RCU */ 385/* Called under RCU */
366static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, 386static bool __allowed_ingress(const struct net_bridge *br,
387 struct net_bridge_vlan_group *vg,
367 struct sk_buff *skb, u16 *vid) 388 struct sk_buff *skb, u16 *vid)
368{ 389{
369 const struct net_bridge_vlan *v; 390 struct br_vlan_stats *stats;
391 struct net_bridge_vlan *v;
370 bool tagged; 392 bool tagged;
371 393
372 BR_INPUT_SKB_CB(skb)->vlan_filtered = true; 394 BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
@@ -375,7 +397,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
375 * HW accelerated vlan tag. 397 * HW accelerated vlan tag.
376 */ 398 */
377 if (unlikely(!skb_vlan_tag_present(skb) && 399 if (unlikely(!skb_vlan_tag_present(skb) &&
378 skb->protocol == proto)) { 400 skb->protocol == br->vlan_proto)) {
379 skb = skb_vlan_untag(skb); 401 skb = skb_vlan_untag(skb);
380 if (unlikely(!skb)) 402 if (unlikely(!skb))
381 return false; 403 return false;
@@ -383,7 +405,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
383 405
384 if (!br_vlan_get_tag(skb, vid)) { 406 if (!br_vlan_get_tag(skb, vid)) {
385 /* Tagged frame */ 407 /* Tagged frame */
386 if (skb->vlan_proto != proto) { 408 if (skb->vlan_proto != br->vlan_proto) {
387 /* Protocol-mismatch, empty out vlan_tci for new tag */ 409 /* Protocol-mismatch, empty out vlan_tci for new tag */
388 skb_push(skb, ETH_HLEN); 410 skb_push(skb, ETH_HLEN);
389 skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, 411 skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
@@ -419,7 +441,7 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
419 *vid = pvid; 441 *vid = pvid;
420 if (likely(!tagged)) 442 if (likely(!tagged))
421 /* Untagged Frame. */ 443 /* Untagged Frame. */
422 __vlan_hwaccel_put_tag(skb, proto, pvid); 444 __vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid);
423 else 445 else
424 /* Priority-tagged Frame. 446 /* Priority-tagged Frame.
425 * At this point, We know that skb->vlan_tci had 447 * At this point, We know that skb->vlan_tci had
@@ -428,13 +450,24 @@ static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
428 */ 450 */
429 skb->vlan_tci |= pvid; 451 skb->vlan_tci |= pvid;
430 452
431 return true; 453 /* if stats are disabled we can avoid the lookup */
454 if (!br->vlan_stats_enabled)
455 return true;
432 } 456 }
433
434 /* Frame had a valid vlan tag. See if vlan is allowed */
435 v = br_vlan_find(vg, *vid); 457 v = br_vlan_find(vg, *vid);
436 if (v && br_vlan_should_use(v)) 458 if (!v || !br_vlan_should_use(v))
437 return true; 459 goto drop;
460
461 if (br->vlan_stats_enabled) {
462 stats = this_cpu_ptr(v->stats);
463 u64_stats_update_begin(&stats->syncp);
464 stats->rx_bytes += skb->len;
465 stats->rx_packets++;
466 u64_stats_update_end(&stats->syncp);
467 }
468
469 return true;
470
438drop: 471drop:
439 kfree_skb(skb); 472 kfree_skb(skb);
440 return false; 473 return false;
@@ -452,7 +485,7 @@ bool br_allowed_ingress(const struct net_bridge *br,
452 return true; 485 return true;
453 } 486 }
454 487
455 return __allowed_ingress(vg, br->vlan_proto, skb, vid); 488 return __allowed_ingress(br, vg, skb, vid);
456} 489}
457 490
458/* Called under RCU. */ 491/* Called under RCU. */
@@ -542,6 +575,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
542 if (!vlan) 575 if (!vlan)
543 return -ENOMEM; 576 return -ENOMEM;
544 577
578 vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats);
579 if (!vlan->stats) {
580 kfree(vlan);
581 return -ENOMEM;
582 }
545 vlan->vid = vid; 583 vlan->vid = vid;
546 vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; 584 vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER;
547 vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; 585 vlan->flags &= ~BRIDGE_VLAN_INFO_PVID;
@@ -549,8 +587,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
549 if (flags & BRIDGE_VLAN_INFO_BRENTRY) 587 if (flags & BRIDGE_VLAN_INFO_BRENTRY)
550 atomic_set(&vlan->refcnt, 1); 588 atomic_set(&vlan->refcnt, 1);
551 ret = __vlan_add(vlan, flags); 589 ret = __vlan_add(vlan, flags);
552 if (ret) 590 if (ret) {
591 free_percpu(vlan->stats);
553 kfree(vlan); 592 kfree(vlan);
593 }
554 594
555 return ret; 595 return ret;
556} 596}
@@ -711,6 +751,20 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
711 return __br_vlan_set_proto(br, htons(val)); 751 return __br_vlan_set_proto(br, htons(val));
712} 752}
713 753
754int br_vlan_set_stats(struct net_bridge *br, unsigned long val)
755{
756 switch (val) {
757 case 0:
758 case 1:
759 br->vlan_stats_enabled = val;
760 break;
761 default:
762 return -EINVAL;
763 }
764
765 return 0;
766}
767
714static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) 768static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid)
715{ 769{
716 struct net_bridge_vlan *v; 770 struct net_bridge_vlan *v;
@@ -1000,3 +1054,30 @@ void nbp_vlan_flush(struct net_bridge_port *port)
1000 synchronize_rcu(); 1054 synchronize_rcu();
1001 __vlan_group_free(vg); 1055 __vlan_group_free(vg);
1002} 1056}
1057
1058void br_vlan_get_stats(const struct net_bridge_vlan *v,
1059 struct br_vlan_stats *stats)
1060{
1061 int i;
1062
1063 memset(stats, 0, sizeof(*stats));
1064 for_each_possible_cpu(i) {
1065 u64 rxpackets, rxbytes, txpackets, txbytes;
1066 struct br_vlan_stats *cpu_stats;
1067 unsigned int start;
1068
1069 cpu_stats = per_cpu_ptr(v->stats, i);
1070 do {
1071 start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1072 rxpackets = cpu_stats->rx_packets;
1073 rxbytes = cpu_stats->rx_bytes;
1074 txbytes = cpu_stats->tx_bytes;
1075 txpackets = cpu_stats->tx_packets;
1076 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1077
1078 stats->rx_packets += rxpackets;
1079 stats->rx_bytes += rxbytes;
1080 stats->tx_bytes += txbytes;
1081 stats->tx_packets += txpackets;
1082 }
1083}
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 6b923bcaa2a4..2bc5965fdd1e 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
293} 293}
294EXPORT_SYMBOL(ceph_auth_create_authorizer); 294EXPORT_SYMBOL(ceph_auth_create_authorizer);
295 295
296void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, 296void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
297 struct ceph_authorizer *a)
298{ 297{
299 mutex_lock(&ac->mutex); 298 a->destroy(a);
300 if (ac->ops && ac->ops->destroy_authorizer)
301 ac->ops->destroy_authorizer(ac, a);
302 mutex_unlock(&ac->mutex);
303} 299}
304EXPORT_SYMBOL(ceph_auth_destroy_authorizer); 300EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
305 301
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 8c93fa8d81bc..5f836f02ae36 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac)
16 struct ceph_auth_none_info *xi = ac->private; 16 struct ceph_auth_none_info *xi = ac->private;
17 17
18 xi->starting = true; 18 xi->starting = true;
19 xi->built_authorizer = false;
20} 19}
21 20
22static void destroy(struct ceph_auth_client *ac) 21static void destroy(struct ceph_auth_client *ac)
@@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac)
39 return xi->starting; 38 return xi->starting;
40} 39}
41 40
41static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
42 struct ceph_none_authorizer *au)
43{
44 void *p = au->buf;
45 void *const end = p + sizeof(au->buf);
46 int ret;
47
48 ceph_encode_8_safe(&p, end, 1, e_range);
49 ret = ceph_entity_name_encode(ac->name, &p, end);
50 if (ret < 0)
51 return ret;
52
53 ceph_encode_64_safe(&p, end, ac->global_id, e_range);
54 au->buf_len = p - (void *)au->buf;
55 dout("%s built authorizer len %d\n", __func__, au->buf_len);
56 return 0;
57
58e_range:
59 return -ERANGE;
60}
61
42static int build_request(struct ceph_auth_client *ac, void *buf, void *end) 62static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
43{ 63{
44 return 0; 64 return 0;
@@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
57 return result; 77 return result;
58} 78}
59 79
80static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a)
81{
82 kfree(a);
83}
84
60/* 85/*
61 * build an 'authorizer' with our entity_name and global_id. we can 86 * build an 'authorizer' with our entity_name and global_id. it is
62 * reuse a single static copy since it is identical for all services 87 * identical for all services we connect to.
63 * we connect to.
64 */ 88 */
65static int ceph_auth_none_create_authorizer( 89static int ceph_auth_none_create_authorizer(
66 struct ceph_auth_client *ac, int peer_type, 90 struct ceph_auth_client *ac, int peer_type,
67 struct ceph_auth_handshake *auth) 91 struct ceph_auth_handshake *auth)
68{ 92{
69 struct ceph_auth_none_info *ai = ac->private; 93 struct ceph_none_authorizer *au;
70 struct ceph_none_authorizer *au = &ai->au;
71 void *p, *end;
72 int ret; 94 int ret;
73 95
74 if (!ai->built_authorizer) { 96 au = kmalloc(sizeof(*au), GFP_NOFS);
75 p = au->buf; 97 if (!au)
76 end = p + sizeof(au->buf); 98 return -ENOMEM;
77 ceph_encode_8(&p, 1); 99
78 ret = ceph_entity_name_encode(ac->name, &p, end - 8); 100 au->base.destroy = ceph_auth_none_destroy_authorizer;
79 if (ret < 0) 101
80 goto bad; 102 ret = ceph_auth_none_build_authorizer(ac, au);
81 ceph_decode_need(&p, end, sizeof(u64), bad2); 103 if (ret) {
82 ceph_encode_64(&p, ac->global_id); 104 kfree(au);
83 au->buf_len = p - (void *)au->buf; 105 return ret;
84 ai->built_authorizer = true;
85 dout("built authorizer len %d\n", au->buf_len);
86 } 106 }
87 107
88 auth->authorizer = (struct ceph_authorizer *) au; 108 auth->authorizer = (struct ceph_authorizer *) au;
@@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer(
92 auth->authorizer_reply_buf_len = sizeof (au->reply_buf); 112 auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
93 113
94 return 0; 114 return 0;
95
96bad2:
97 ret = -ERANGE;
98bad:
99 return ret;
100}
101
102static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
103 struct ceph_authorizer *a)
104{
105 /* nothing to do */
106} 115}
107 116
108static const struct ceph_auth_client_ops ceph_auth_none_ops = { 117static const struct ceph_auth_client_ops ceph_auth_none_ops = {
@@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
114 .build_request = build_request, 123 .build_request = build_request,
115 .handle_reply = handle_reply, 124 .handle_reply = handle_reply,
116 .create_authorizer = ceph_auth_none_create_authorizer, 125 .create_authorizer = ceph_auth_none_create_authorizer,
117 .destroy_authorizer = ceph_auth_none_destroy_authorizer,
118}; 126};
119 127
120int ceph_auth_none_init(struct ceph_auth_client *ac) 128int ceph_auth_none_init(struct ceph_auth_client *ac)
@@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
127 return -ENOMEM; 135 return -ENOMEM;
128 136
129 xi->starting = true; 137 xi->starting = true;
130 xi->built_authorizer = false;
131 138
132 ac->protocol = CEPH_AUTH_NONE; 139 ac->protocol = CEPH_AUTH_NONE;
133 ac->private = xi; 140 ac->private = xi;
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 059a3ce4b53f..62021535ae4a 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14struct ceph_none_authorizer { 14struct ceph_none_authorizer {
15 struct ceph_authorizer base;
15 char buf[128]; 16 char buf[128];
16 int buf_len; 17 int buf_len;
17 char reply_buf[0]; 18 char reply_buf[0];
@@ -19,8 +20,6 @@ struct ceph_none_authorizer {
19 20
20struct ceph_auth_none_info { 21struct ceph_auth_none_info {
21 bool starting; 22 bool starting;
22 bool built_authorizer;
23 struct ceph_none_authorizer au; /* we only need one; it's static */
24}; 23};
25 24
26int ceph_auth_none_init(struct ceph_auth_client *ac); 25int ceph_auth_none_init(struct ceph_auth_client *ac);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 9e43a315e662..a0905f04bd13 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
565 return -EAGAIN; 565 return -EAGAIN;
566} 566}
567 567
568static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
569{
570 struct ceph_x_authorizer *au = (void *)a;
571
572 ceph_x_authorizer_cleanup(au);
573 kfree(au);
574}
575
568static int ceph_x_create_authorizer( 576static int ceph_x_create_authorizer(
569 struct ceph_auth_client *ac, int peer_type, 577 struct ceph_auth_client *ac, int peer_type,
570 struct ceph_auth_handshake *auth) 578 struct ceph_auth_handshake *auth)
@@ -581,6 +589,8 @@ static int ceph_x_create_authorizer(
581 if (!au) 589 if (!au)
582 return -ENOMEM; 590 return -ENOMEM;
583 591
592 au->base.destroy = ceph_x_destroy_authorizer;
593
584 ret = ceph_x_build_authorizer(ac, th, au); 594 ret = ceph_x_build_authorizer(ac, th, au);
585 if (ret) { 595 if (ret) {
586 kfree(au); 596 kfree(au);
@@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
643 return ret; 653 return ret;
644} 654}
645 655
646static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
647 struct ceph_authorizer *a)
648{
649 struct ceph_x_authorizer *au = (void *)a;
650
651 ceph_x_authorizer_cleanup(au);
652 kfree(au);
653}
654
655
656static void ceph_x_reset(struct ceph_auth_client *ac) 656static void ceph_x_reset(struct ceph_auth_client *ac)
657{ 657{
658 struct ceph_x_info *xi = ac->private; 658 struct ceph_x_info *xi = ac->private;
@@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
770 .create_authorizer = ceph_x_create_authorizer, 770 .create_authorizer = ceph_x_create_authorizer,
771 .update_authorizer = ceph_x_update_authorizer, 771 .update_authorizer = ceph_x_update_authorizer,
772 .verify_authorizer_reply = ceph_x_verify_authorizer_reply, 772 .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
773 .destroy_authorizer = ceph_x_destroy_authorizer,
774 .invalidate_authorizer = ceph_x_invalidate_authorizer, 773 .invalidate_authorizer = ceph_x_invalidate_authorizer,
775 .reset = ceph_x_reset, 774 .reset = ceph_x_reset,
776 .destroy = ceph_x_destroy, 775 .destroy = ceph_x_destroy,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 40b1a3cf7397..21a5af904bae 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
26 26
27 27
28struct ceph_x_authorizer { 28struct ceph_x_authorizer {
29 struct ceph_authorizer base;
29 struct ceph_crypto_key session_key; 30 struct ceph_crypto_key session_key;
30 struct ceph_buffer *buf; 31 struct ceph_buffer *buf;
31 unsigned int service; 32 unsigned int service;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 32355d9d0103..40a53a70efdf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1087,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd)
1087 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), 1087 dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
1088 atomic_read(&osd->o_ref) - 1); 1088 atomic_read(&osd->o_ref) - 1);
1089 if (atomic_dec_and_test(&osd->o_ref)) { 1089 if (atomic_dec_and_test(&osd->o_ref)) {
1090 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
1091
1092 if (osd->o_auth.authorizer) 1090 if (osd->o_auth.authorizer)
1093 ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); 1091 ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
1094 kfree(osd); 1092 kfree(osd);
1095 } 1093 }
1096} 1094}
@@ -2984,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
2984 struct ceph_auth_handshake *auth = &o->o_auth; 2982 struct ceph_auth_handshake *auth = &o->o_auth;
2985 2983
2986 if (force_new && auth->authorizer) { 2984 if (force_new && auth->authorizer) {
2987 ceph_auth_destroy_authorizer(ac, auth->authorizer); 2985 ceph_auth_destroy_authorizer(auth->authorizer);
2988 auth->authorizer = NULL; 2986 auth->authorizer = NULL;
2989 } 2987 }
2990 if (!auth->authorizer) { 2988 if (!auth->authorizer) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6324bc9267f7..12436d1312ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1741,7 +1741,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
1741 __net_timestamp(SKB); \ 1741 __net_timestamp(SKB); \
1742 } \ 1742 } \
1743 1743
1744bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) 1744bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
1745{ 1745{
1746 unsigned int len; 1746 unsigned int len;
1747 1747
@@ -1850,7 +1850,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1850 * taps currently in use. 1850 * taps currently in use.
1851 */ 1851 */
1852 1852
1853static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1853void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1854{ 1854{
1855 struct packet_type *ptype; 1855 struct packet_type *ptype;
1856 struct sk_buff *skb2 = NULL; 1856 struct sk_buff *skb2 = NULL;
@@ -1907,6 +1907,7 @@ out_unlock:
1907 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); 1907 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1908 rcu_read_unlock(); 1908 rcu_read_unlock();
1909} 1909}
1910EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
1910 1911
1911/** 1912/**
1912 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change 1913 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
@@ -2815,7 +2816,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
2815 2816
2816 if (skb->ip_summed != CHECKSUM_NONE && 2817 if (skb->ip_summed != CHECKSUM_NONE &&
2817 !can_checksum_protocol(features, type)) { 2818 !can_checksum_protocol(features, type)) {
2818 features &= ~NETIF_F_CSUM_MASK; 2819 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2819 } else if (illegal_highdma(skb->dev, skb)) { 2820 } else if (illegal_highdma(skb->dev, skb)) {
2820 features &= ~NETIF_F_SG; 2821 features &= ~NETIF_F_SG;
2821 } 2822 }
@@ -3469,6 +3470,7 @@ u32 rps_cpu_mask __read_mostly;
3469EXPORT_SYMBOL(rps_cpu_mask); 3470EXPORT_SYMBOL(rps_cpu_mask);
3470 3471
3471struct static_key rps_needed __read_mostly; 3472struct static_key rps_needed __read_mostly;
3473EXPORT_SYMBOL(rps_needed);
3472 3474
3473static struct rps_dev_flow * 3475static struct rps_dev_flow *
3474set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 3476set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -3955,9 +3957,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
3955 break; 3957 break;
3956 case TC_ACT_SHOT: 3958 case TC_ACT_SHOT:
3957 qdisc_qstats_cpu_drop(cl->q); 3959 qdisc_qstats_cpu_drop(cl->q);
3960 kfree_skb(skb);
3961 return NULL;
3958 case TC_ACT_STOLEN: 3962 case TC_ACT_STOLEN:
3959 case TC_ACT_QUEUED: 3963 case TC_ACT_QUEUED:
3960 kfree_skb(skb); 3964 consume_skb(skb);
3961 return NULL; 3965 return NULL;
3962 case TC_ACT_REDIRECT: 3966 case TC_ACT_REDIRECT:
3963 /* skb_mac_header check was done by cls/act_bpf, so 3967 /* skb_mac_header check was done by cls/act_bpf, so
@@ -4982,8 +4986,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
4982 netpoll_poll_unlock(have); 4986 netpoll_poll_unlock(have);
4983 } 4987 }
4984 if (rc > 0) 4988 if (rc > 0)
4985 NET_ADD_STATS_BH(sock_net(sk), 4989 __NET_ADD_STATS(sock_net(sk),
4986 LINUX_MIB_BUSYPOLLRXPACKETS, rc); 4990 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
4987 local_bh_enable(); 4991 local_bh_enable();
4988 4992
4989 if (rc == LL_FLUSH_FAILED) 4993 if (rc == LL_FLUSH_FAILED)
@@ -6720,6 +6724,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
6720 features &= ~NETIF_F_TSO6; 6724 features &= ~NETIF_F_TSO6;
6721 } 6725 }
6722 6726
6727 /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
6728 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
6729 features &= ~NETIF_F_TSO_MANGLEID;
6730
6723 /* TSO ECN requires that TSO is present as well. */ 6731 /* TSO ECN requires that TSO is present as well. */
6724 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) 6732 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6725 features &= ~NETIF_F_TSO_ECN; 6733 features &= ~NETIF_F_TSO_ECN;
diff --git a/net/core/filter.c b/net/core/filter.c
index 218e5de8c402..71c2a1f473ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1344,6 +1344,21 @@ struct bpf_scratchpad {
1344 1344
1345static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); 1345static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1346 1346
1347static inline int bpf_try_make_writable(struct sk_buff *skb,
1348 unsigned int write_len)
1349{
1350 int err;
1351
1352 if (!skb_cloned(skb))
1353 return 0;
1354 if (skb_clone_writable(skb, write_len))
1355 return 0;
1356 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1357 if (!err)
1358 bpf_compute_data_end(skb);
1359 return err;
1360}
1361
1347static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1362static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1348{ 1363{
1349 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); 1364 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
@@ -1366,7 +1381,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1366 */ 1381 */
1367 if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) 1382 if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
1368 return -EFAULT; 1383 return -EFAULT;
1369 if (unlikely(skb_try_make_writable(skb, offset + len))) 1384 if (unlikely(bpf_try_make_writable(skb, offset + len)))
1370 return -EFAULT; 1385 return -EFAULT;
1371 1386
1372 ptr = skb_header_pointer(skb, offset, len, sp->buff); 1387 ptr = skb_header_pointer(skb, offset, len, sp->buff);
@@ -1444,7 +1459,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1444 return -EINVAL; 1459 return -EINVAL;
1445 if (unlikely((u32) offset > 0xffff)) 1460 if (unlikely((u32) offset > 0xffff))
1446 return -EFAULT; 1461 return -EFAULT;
1447 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) 1462 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
1448 return -EFAULT; 1463 return -EFAULT;
1449 1464
1450 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1465 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1499,7 +1514,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1499 return -EINVAL; 1514 return -EINVAL;
1500 if (unlikely((u32) offset > 0xffff)) 1515 if (unlikely((u32) offset > 0xffff))
1501 return -EFAULT; 1516 return -EFAULT;
1502 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) 1517 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
1503 return -EFAULT; 1518 return -EFAULT;
1504 1519
1505 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1520 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1699,12 +1714,15 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
1699{ 1714{
1700 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1715 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1701 __be16 vlan_proto = (__force __be16) r2; 1716 __be16 vlan_proto = (__force __be16) r2;
1717 int ret;
1702 1718
1703 if (unlikely(vlan_proto != htons(ETH_P_8021Q) && 1719 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
1704 vlan_proto != htons(ETH_P_8021AD))) 1720 vlan_proto != htons(ETH_P_8021AD)))
1705 vlan_proto = htons(ETH_P_8021Q); 1721 vlan_proto = htons(ETH_P_8021Q);
1706 1722
1707 return skb_vlan_push(skb, vlan_proto, vlan_tci); 1723 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
1724 bpf_compute_data_end(skb);
1725 return ret;
1708} 1726}
1709 1727
1710const struct bpf_func_proto bpf_skb_vlan_push_proto = { 1728const struct bpf_func_proto bpf_skb_vlan_push_proto = {
@@ -1720,8 +1738,11 @@ EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
1720static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1738static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1721{ 1739{
1722 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1740 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1741 int ret;
1723 1742
1724 return skb_vlan_pop(skb); 1743 ret = skb_vlan_pop(skb);
1744 bpf_compute_data_end(skb);
1745 return ret;
1725} 1746}
1726 1747
1727const struct bpf_func_proto bpf_skb_vlan_pop_proto = { 1748const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
@@ -2066,8 +2087,12 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
2066static bool sk_filter_is_valid_access(int off, int size, 2087static bool sk_filter_is_valid_access(int off, int size,
2067 enum bpf_access_type type) 2088 enum bpf_access_type type)
2068{ 2089{
2069 if (off == offsetof(struct __sk_buff, tc_classid)) 2090 switch (off) {
2091 case offsetof(struct __sk_buff, tc_classid):
2092 case offsetof(struct __sk_buff, data):
2093 case offsetof(struct __sk_buff, data_end):
2070 return false; 2094 return false;
2095 }
2071 2096
2072 if (type == BPF_WRITE) { 2097 if (type == BPF_WRITE) {
2073 switch (off) { 2098 switch (off) {
@@ -2215,6 +2240,20 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2215 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off); 2240 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2216 break; 2241 break;
2217 2242
2243 case offsetof(struct __sk_buff, data):
2244 *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
2245 dst_reg, src_reg,
2246 offsetof(struct sk_buff, data));
2247 break;
2248
2249 case offsetof(struct __sk_buff, data_end):
2250 ctx_off -= offsetof(struct __sk_buff, data_end);
2251 ctx_off += offsetof(struct sk_buff, cb);
2252 ctx_off += offsetof(struct bpf_skb_data_end, data_end);
2253 *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
2254 dst_reg, src_reg, ctx_off);
2255 break;
2256
2218 case offsetof(struct __sk_buff, tc_index): 2257 case offsetof(struct __sk_buff, tc_index):
2219#ifdef CONFIG_NET_SCHED 2258#ifdef CONFIG_NET_SCHED
2220 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); 2259 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/core/flow.c b/net/core/flow.c
index 1033725be40b..3937b1b68d5b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work)
92 list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); 92 list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
93 spin_unlock_bh(&xfrm->flow_cache_gc_lock); 93 spin_unlock_bh(&xfrm->flow_cache_gc_lock);
94 94
95 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) 95 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
96 flow_entry_kill(fce, xfrm); 96 flow_entry_kill(fce, xfrm);
97 atomic_dec(&xfrm->flow_cache_gc_count);
98 WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0);
99 }
97} 100}
98 101
99static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, 102static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
@@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
101 struct netns_xfrm *xfrm) 104 struct netns_xfrm *xfrm)
102{ 105{
103 if (deleted) { 106 if (deleted) {
107 atomic_add(deleted, &xfrm->flow_cache_gc_count);
104 fcp->hash_count -= deleted; 108 fcp->hash_count -= deleted;
105 spin_lock_bh(&xfrm->flow_cache_gc_lock); 109 spin_lock_bh(&xfrm->flow_cache_gc_lock);
106 list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); 110 list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
@@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
232 if (fcp->hash_count > fc->high_watermark) 236 if (fcp->hash_count > fc->high_watermark)
233 flow_cache_shrink(fc, fcp); 237 flow_cache_shrink(fc, fcp);
234 238
239 if (fcp->hash_count > 2 * fc->high_watermark ||
240 atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) {
241 atomic_inc(&net->xfrm.flow_cache_genid);
242 flo = ERR_PTR(-ENOBUFS);
243 goto ret_object;
244 }
245
235 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 246 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
236 if (fle) { 247 if (fle) {
237 fle->net = net; 248 fle->net = net;
@@ -446,6 +457,7 @@ int flow_cache_init(struct net *net)
446 INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); 457 INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
447 INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); 458 INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
448 mutex_init(&net->xfrm.flow_flush_sem); 459 mutex_init(&net->xfrm.flow_flush_sem);
460 atomic_set(&net->xfrm.flow_cache_gc_count, 0);
449 461
450 fc->hash_shift = 10; 462 fc->hash_shift = 10;
451 fc->low_watermark = 2 * flow_cache_hash_size(fc); 463 fc->low_watermark = 2 * flow_cache_hash_size(fc);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index e640462ea8bf..f96ee8b9478d 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -25,9 +25,9 @@
25 25
26 26
27static inline int 27static inline int
28gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) 28gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
29{ 29{
30 if (nla_put(d->skb, type, size, buf)) 30 if (nla_put_64bit(d->skb, type, size, buf, padattr))
31 goto nla_put_failure; 31 goto nla_put_failure;
32 return 0; 32 return 0;
33 33
@@ -59,7 +59,8 @@ nla_put_failure:
59 */ 59 */
60int 60int
61gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, 61gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
62 int xstats_type, spinlock_t *lock, struct gnet_dump *d) 62 int xstats_type, spinlock_t *lock,
63 struct gnet_dump *d, int padattr)
63 __acquires(lock) 64 __acquires(lock)
64{ 65{
65 memset(d, 0, sizeof(*d)); 66 memset(d, 0, sizeof(*d));
@@ -71,16 +72,17 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
71 d->skb = skb; 72 d->skb = skb;
72 d->compat_tc_stats = tc_stats_type; 73 d->compat_tc_stats = tc_stats_type;
73 d->compat_xstats = xstats_type; 74 d->compat_xstats = xstats_type;
75 d->padattr = padattr;
74 76
75 if (d->tail) 77 if (d->tail)
76 return gnet_stats_copy(d, type, NULL, 0); 78 return gnet_stats_copy(d, type, NULL, 0, padattr);
77 79
78 return 0; 80 return 0;
79} 81}
80EXPORT_SYMBOL(gnet_stats_start_copy_compat); 82EXPORT_SYMBOL(gnet_stats_start_copy_compat);
81 83
82/** 84/**
83 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode 85 * gnet_stats_start_copy - start dumping procedure in compatibility mode
84 * @skb: socket buffer to put statistics TLVs into 86 * @skb: socket buffer to put statistics TLVs into
85 * @type: TLV type for top level statistic TLV 87 * @type: TLV type for top level statistic TLV
86 * @lock: statistics lock 88 * @lock: statistics lock
@@ -94,9 +96,9 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat);
94 */ 96 */
95int 97int
96gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, 98gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
97 struct gnet_dump *d) 99 struct gnet_dump *d, int padattr)
98{ 100{
99 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); 101 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d, padattr);
100} 102}
101EXPORT_SYMBOL(gnet_stats_start_copy); 103EXPORT_SYMBOL(gnet_stats_start_copy);
102 104
@@ -169,7 +171,8 @@ gnet_stats_copy_basic(struct gnet_dump *d,
169 memset(&sb, 0, sizeof(sb)); 171 memset(&sb, 0, sizeof(sb));
170 sb.bytes = bstats.bytes; 172 sb.bytes = bstats.bytes;
171 sb.packets = bstats.packets; 173 sb.packets = bstats.packets;
172 return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); 174 return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb),
175 TCA_STATS_PAD);
173 } 176 }
174 return 0; 177 return 0;
175} 178}
@@ -208,11 +211,13 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
208 } 211 }
209 212
210 if (d->tail) { 213 if (d->tail) {
211 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); 214 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est),
215 TCA_STATS_PAD);
212 if (res < 0 || est.bps == r->bps) 216 if (res < 0 || est.bps == r->bps)
213 return res; 217 return res;
214 /* emit 64bit stats only if needed */ 218 /* emit 64bit stats only if needed */
215 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); 219 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r),
220 TCA_STATS_PAD);
216 } 221 }
217 222
218 return 0; 223 return 0;
@@ -286,7 +291,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
286 291
287 if (d->tail) 292 if (d->tail)
288 return gnet_stats_copy(d, TCA_STATS_QUEUE, 293 return gnet_stats_copy(d, TCA_STATS_QUEUE,
289 &qstats, sizeof(qstats)); 294 &qstats, sizeof(qstats),
295 TCA_STATS_PAD);
290 296
291 return 0; 297 return 0;
292} 298}
@@ -316,7 +322,8 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
316 } 322 }
317 323
318 if (d->tail) 324 if (d->tail)
319 return gnet_stats_copy(d, TCA_STATS_APP, st, len); 325 return gnet_stats_copy(d, TCA_STATS_APP, st, len,
326 TCA_STATS_PAD);
320 327
321 return 0; 328 return 0;
322 329
@@ -347,12 +354,12 @@ gnet_stats_finish_copy(struct gnet_dump *d)
347 354
348 if (d->compat_tc_stats) 355 if (d->compat_tc_stats)
349 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, 356 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
350 sizeof(d->tc_stats)) < 0) 357 sizeof(d->tc_stats), d->padattr) < 0)
351 return -1; 358 return -1;
352 359
353 if (d->compat_xstats && d->xstats) { 360 if (d->compat_xstats && d->xstats) {
354 if (gnet_stats_copy(d, d->compat_xstats, d->xstats, 361 if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
355 d->xstats_len) < 0) 362 d->xstats_len, d->padattr) < 0)
356 return -1; 363 return -1;
357 } 364 }
358 365
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6a395d440228..29dd8cc22bbf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1857,7 +1857,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1857 ndst.ndts_table_fulls += st->table_fulls; 1857 ndst.ndts_table_fulls += st->table_fulls;
1858 } 1858 }
1859 1859
1860 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) 1860 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1861 NDTA_PAD))
1861 goto nla_put_failure; 1862 goto nla_put_failure;
1862 } 1863 }
1863 1864
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 2bf83299600a..14d09345f00d 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -162,7 +162,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
162 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 162 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
163 sd->processed, sd->dropped, sd->time_squeeze, 0, 163 sd->processed, sd->dropped, sd->time_squeeze, 0,
164 0, 0, 0, 0, /* was fastroute */ 164 0, 0, 0, 0, /* was fastroute */
165 sd->cpu_collision, sd->received_rps, flow_limit_count); 165 0, /* was cpu_collision */
166 sd->received_rps, flow_limit_count);
166 return 0; 167 return 0;
167} 168}
168 169
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 20999aa596dd..8604ae245960 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3472,7 +3472,6 @@ xmit_more:
3472 pkt_dev->odevname, ret); 3472 pkt_dev->odevname, ret);
3473 pkt_dev->errors++; 3473 pkt_dev->errors++;
3474 /* fallthru */ 3474 /* fallthru */
3475 case NETDEV_TX_LOCKED:
3476 case NETDEV_TX_BUSY: 3475 case NETDEV_TX_BUSY:
3477 /* Retry it next time */ 3476 /* Retry it next time */
3478 atomic_dec(&(pkt_dev->skb->users)); 3477 atomic_dec(&(pkt_dev->skb->users));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9efc1f34ef3b..d69c4644f8f2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -876,7 +876,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
876 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ 876 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
877 + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ 877 + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
878 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ 878 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
879 + nla_total_size(sizeof(struct rtnl_link_ifmap)) 879 + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap))
880 + nla_total_size(sizeof(struct rtnl_link_stats)) 880 + nla_total_size(sizeof(struct rtnl_link_stats))
881 + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) 881 + nla_total_size_64bit(sizeof(struct rtnl_link_stats64))
882 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ 882 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
@@ -1173,15 +1173,17 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1173 1173
1174static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) 1174static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1175{ 1175{
1176 struct rtnl_link_ifmap map = { 1176 struct rtnl_link_ifmap map;
1177 .mem_start = dev->mem_start, 1177
1178 .mem_end = dev->mem_end, 1178 memset(&map, 0, sizeof(map));
1179 .base_addr = dev->base_addr, 1179 map.mem_start = dev->mem_start;
1180 .irq = dev->irq, 1180 map.mem_end = dev->mem_end;
1181 .dma = dev->dma, 1181 map.base_addr = dev->base_addr;
1182 .port = dev->if_port, 1182 map.irq = dev->irq;
1183 }; 1183 map.dma = dev->dma;
1184 if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) 1184 map.port = dev->if_port;
1185
1186 if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
1185 return -EMSGSIZE; 1187 return -EMSGSIZE;
1186 1188
1187 return 0; 1189 return 0;
@@ -3444,13 +3446,21 @@ out:
3444 return err; 3446 return err;
3445} 3447}
3446 3448
3449static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
3450{
3451 return (mask & IFLA_STATS_FILTER_BIT(attrid)) &&
3452 (!idxattr || idxattr == attrid);
3453}
3454
3447static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, 3455static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3448 int type, u32 pid, u32 seq, u32 change, 3456 int type, u32 pid, u32 seq, u32 change,
3449 unsigned int flags, unsigned int filter_mask) 3457 unsigned int flags, unsigned int filter_mask,
3458 int *idxattr, int *prividx)
3450{ 3459{
3451 struct if_stats_msg *ifsm; 3460 struct if_stats_msg *ifsm;
3452 struct nlmsghdr *nlh; 3461 struct nlmsghdr *nlh;
3453 struct nlattr *attr; 3462 struct nlattr *attr;
3463 int s_prividx = *prividx;
3454 3464
3455 ASSERT_RTNL(); 3465 ASSERT_RTNL();
3456 3466
@@ -3462,7 +3472,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3462 ifsm->ifindex = dev->ifindex; 3472 ifsm->ifindex = dev->ifindex;
3463 ifsm->filter_mask = filter_mask; 3473 ifsm->filter_mask = filter_mask;
3464 3474
3465 if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) { 3475 if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) {
3466 struct rtnl_link_stats64 *sp; 3476 struct rtnl_link_stats64 *sp;
3467 3477
3468 attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, 3478 attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
@@ -3475,12 +3485,36 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
3475 dev_get_stats(dev, sp); 3485 dev_get_stats(dev, sp);
3476 } 3486 }
3477 3487
3488 if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) {
3489 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
3490
3491 if (ops && ops->fill_linkxstats) {
3492 int err;
3493
3494 *idxattr = IFLA_STATS_LINK_XSTATS;
3495 attr = nla_nest_start(skb,
3496 IFLA_STATS_LINK_XSTATS);
3497 if (!attr)
3498 goto nla_put_failure;
3499
3500 err = ops->fill_linkxstats(skb, dev, prividx);
3501 nla_nest_end(skb, attr);
3502 if (err)
3503 goto nla_put_failure;
3504 *idxattr = 0;
3505 }
3506 }
3507
3478 nlmsg_end(skb, nlh); 3508 nlmsg_end(skb, nlh);
3479 3509
3480 return 0; 3510 return 0;
3481 3511
3482nla_put_failure: 3512nla_put_failure:
3483 nlmsg_cancel(skb, nlh); 3513 /* not a multi message or no progress mean a real error */
3514 if (!(flags & NLM_F_MULTI) || s_prividx == *prividx)
3515 nlmsg_cancel(skb, nlh);
3516 else
3517 nlmsg_end(skb, nlh);
3484 3518
3485 return -EMSGSIZE; 3519 return -EMSGSIZE;
3486} 3520}
@@ -3494,17 +3528,28 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
3494{ 3528{
3495 size_t size = 0; 3529 size_t size = 0;
3496 3530
3497 if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) 3531 if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
3498 size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); 3532 size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
3499 3533
3534 if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
3535 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
3536
3537 if (ops && ops->get_linkxstats_size) {
3538 size += nla_total_size(ops->get_linkxstats_size(dev));
3539 /* for IFLA_STATS_LINK_XSTATS */
3540 size += nla_total_size(0);
3541 }
3542 }
3543
3500 return size; 3544 return size;
3501} 3545}
3502 3546
3503static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) 3547static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
3504{ 3548{
3505 struct net *net = sock_net(skb->sk); 3549 struct net *net = sock_net(skb->sk);
3506 struct if_stats_msg *ifsm;
3507 struct net_device *dev = NULL; 3550 struct net_device *dev = NULL;
3551 int idxattr = 0, prividx = 0;
3552 struct if_stats_msg *ifsm;
3508 struct sk_buff *nskb; 3553 struct sk_buff *nskb;
3509 u32 filter_mask; 3554 u32 filter_mask;
3510 int err; 3555 int err;
@@ -3528,7 +3573,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
3528 3573
3529 err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, 3574 err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
3530 NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 3575 NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
3531 0, filter_mask); 3576 0, filter_mask, &idxattr, &prividx);
3532 if (err < 0) { 3577 if (err < 0) {
3533 /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ 3578 /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
3534 WARN_ON(err == -EMSGSIZE); 3579 WARN_ON(err == -EMSGSIZE);
@@ -3542,18 +3587,19 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
3542 3587
3543static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) 3588static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
3544{ 3589{
3590 int h, s_h, err, s_idx, s_idxattr, s_prividx;
3545 struct net *net = sock_net(skb->sk); 3591 struct net *net = sock_net(skb->sk);
3592 unsigned int flags = NLM_F_MULTI;
3546 struct if_stats_msg *ifsm; 3593 struct if_stats_msg *ifsm;
3547 int h, s_h;
3548 int idx = 0, s_idx;
3549 struct net_device *dev;
3550 struct hlist_head *head; 3594 struct hlist_head *head;
3551 unsigned int flags = NLM_F_MULTI; 3595 struct net_device *dev;
3552 u32 filter_mask = 0; 3596 u32 filter_mask = 0;
3553 int err; 3597 int idx = 0;
3554 3598
3555 s_h = cb->args[0]; 3599 s_h = cb->args[0];
3556 s_idx = cb->args[1]; 3600 s_idx = cb->args[1];
3601 s_idxattr = cb->args[2];
3602 s_prividx = cb->args[3];
3557 3603
3558 cb->seq = net->dev_base_seq; 3604 cb->seq = net->dev_base_seq;
3559 3605
@@ -3571,7 +3617,8 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
3571 err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 3617 err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
3572 NETLINK_CB(cb->skb).portid, 3618 NETLINK_CB(cb->skb).portid,
3573 cb->nlh->nlmsg_seq, 0, 3619 cb->nlh->nlmsg_seq, 0,
3574 flags, filter_mask); 3620 flags, filter_mask,
3621 &s_idxattr, &s_prividx);
3575 /* If we ran out of room on the first message, 3622 /* If we ran out of room on the first message,
3576 * we're in trouble 3623 * we're in trouble
3577 */ 3624 */
@@ -3579,13 +3626,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
3579 3626
3580 if (err < 0) 3627 if (err < 0)
3581 goto out; 3628 goto out;
3582 3629 s_prividx = 0;
3630 s_idxattr = 0;
3583 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 3631 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
3584cont: 3632cont:
3585 idx++; 3633 idx++;
3586 } 3634 }
3587 } 3635 }
3588out: 3636out:
3637 cb->args[3] = s_prividx;
3638 cb->args[2] = s_idxattr;
3589 cb->args[1] = idx; 3639 cb->args[1] = idx;
3590 cb->args[0] = h; 3640 cb->args[0] = h;
3591 3641
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7ff7788b0151..f2b77e549c03 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3080,8 +3080,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3080 unsigned int headroom; 3080 unsigned int headroom;
3081 unsigned int len = head_skb->len; 3081 unsigned int len = head_skb->len;
3082 __be16 proto; 3082 __be16 proto;
3083 bool csum; 3083 bool csum, sg;
3084 int sg = !!(features & NETIF_F_SG);
3085 int nfrags = skb_shinfo(head_skb)->nr_frags; 3084 int nfrags = skb_shinfo(head_skb)->nr_frags;
3086 int err = -ENOMEM; 3085 int err = -ENOMEM;
3087 int i = 0; 3086 int i = 0;
@@ -3093,15 +3092,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3093 if (unlikely(!proto)) 3092 if (unlikely(!proto))
3094 return ERR_PTR(-EINVAL); 3093 return ERR_PTR(-EINVAL);
3095 3094
3095 sg = !!(features & NETIF_F_SG);
3096 csum = !!can_checksum_protocol(features, proto); 3096 csum = !!can_checksum_protocol(features, proto);
3097 3097
3098 /* GSO partial only requires that we trim off any excess that 3098 /* GSO partial only requires that we trim off any excess that
3099 * doesn't fit into an MSS sized block, so take care of that 3099 * doesn't fit into an MSS sized block, so take care of that
3100 * now. 3100 * now.
3101 */ 3101 */
3102 if (features & NETIF_F_GSO_PARTIAL) { 3102 if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
3103 partial_segs = len / mss; 3103 partial_segs = len / mss;
3104 mss *= partial_segs; 3104 if (partial_segs > 1)
3105 mss *= partial_segs;
3106 else
3107 partial_segs = 0;
3105 } 3108 }
3106 3109
3107 headroom = skb_headroom(head_skb); 3110 headroom = skb_headroom(head_skb);
@@ -4622,3 +4625,239 @@ failure:
4622 return NULL; 4625 return NULL;
4623} 4626}
4624EXPORT_SYMBOL(alloc_skb_with_frags); 4627EXPORT_SYMBOL(alloc_skb_with_frags);
4628
4629/* carve out the first off bytes from skb when off < headlen */
4630static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
4631 const int headlen, gfp_t gfp_mask)
4632{
4633 int i;
4634 int size = skb_end_offset(skb);
4635 int new_hlen = headlen - off;
4636 u8 *data;
4637
4638 size = SKB_DATA_ALIGN(size);
4639
4640 if (skb_pfmemalloc(skb))
4641 gfp_mask |= __GFP_MEMALLOC;
4642 data = kmalloc_reserve(size +
4643 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
4644 gfp_mask, NUMA_NO_NODE, NULL);
4645 if (!data)
4646 return -ENOMEM;
4647
4648 size = SKB_WITH_OVERHEAD(ksize(data));
4649
4650 /* Copy real data, and all frags */
4651 skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
4652 skb->len -= off;
4653
4654 memcpy((struct skb_shared_info *)(data + size),
4655 skb_shinfo(skb),
4656 offsetof(struct skb_shared_info,
4657 frags[skb_shinfo(skb)->nr_frags]));
4658 if (skb_cloned(skb)) {
4659 /* drop the old head gracefully */
4660 if (skb_orphan_frags(skb, gfp_mask)) {
4661 kfree(data);
4662 return -ENOMEM;
4663 }
4664 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
4665 skb_frag_ref(skb, i);
4666 if (skb_has_frag_list(skb))
4667 skb_clone_fraglist(skb);
4668 skb_release_data(skb);
4669 } else {
4670 /* we can reuse existing recount- all we did was
4671 * relocate values
4672 */
4673 skb_free_head(skb);
4674 }
4675
4676 skb->head = data;
4677 skb->data = data;
4678 skb->head_frag = 0;
4679#ifdef NET_SKBUFF_DATA_USES_OFFSET
4680 skb->end = size;
4681#else
4682 skb->end = skb->head + size;
4683#endif
4684 skb_set_tail_pointer(skb, skb_headlen(skb));
4685 skb_headers_offset_update(skb, 0);
4686 skb->cloned = 0;
4687 skb->hdr_len = 0;
4688 skb->nohdr = 0;
4689 atomic_set(&skb_shinfo(skb)->dataref, 1);
4690
4691 return 0;
4692}
4693
4694static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
4695
4696/* carve out the first eat bytes from skb's frag_list. May recurse into
4697 * pskb_carve()
4698 */
4699static int pskb_carve_frag_list(struct sk_buff *skb,
4700 struct skb_shared_info *shinfo, int eat,
4701 gfp_t gfp_mask)
4702{
4703 struct sk_buff *list = shinfo->frag_list;
4704 struct sk_buff *clone = NULL;
4705 struct sk_buff *insp = NULL;
4706
4707 do {
4708 if (!list) {
4709 pr_err("Not enough bytes to eat. Want %d\n", eat);
4710 return -EFAULT;
4711 }
4712 if (list->len <= eat) {
4713 /* Eaten as whole. */
4714 eat -= list->len;
4715 list = list->next;
4716 insp = list;
4717 } else {
4718 /* Eaten partially. */
4719 if (skb_shared(list)) {
4720 clone = skb_clone(list, gfp_mask);
4721 if (!clone)
4722 return -ENOMEM;
4723 insp = list->next;
4724 list = clone;
4725 } else {
4726 /* This may be pulled without problems. */
4727 insp = list;
4728 }
4729 if (pskb_carve(list, eat, gfp_mask) < 0) {
4730 kfree_skb(clone);
4731 return -ENOMEM;
4732 }
4733 break;
4734 }
4735 } while (eat);
4736
4737 /* Free pulled out fragments. */
4738 while ((list = shinfo->frag_list) != insp) {
4739 shinfo->frag_list = list->next;
4740 kfree_skb(list);
4741 }
4742 /* And insert new clone at head. */
4743 if (clone) {
4744 clone->next = list;
4745 shinfo->frag_list = clone;
4746 }
4747 return 0;
4748}
4749
4750/* carve off first len bytes from skb. Split line (off) is in the
4751 * non-linear part of skb
4752 */
4753static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
4754 int pos, gfp_t gfp_mask)
4755{
4756 int i, k = 0;
4757 int size = skb_end_offset(skb);
4758 u8 *data;
4759 const int nfrags = skb_shinfo(skb)->nr_frags;
4760 struct skb_shared_info *shinfo;
4761
4762 size = SKB_DATA_ALIGN(size);
4763
4764 if (skb_pfmemalloc(skb))
4765 gfp_mask |= __GFP_MEMALLOC;
4766 data = kmalloc_reserve(size +
4767 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
4768 gfp_mask, NUMA_NO_NODE, NULL);
4769 if (!data)
4770 return -ENOMEM;
4771
4772 size = SKB_WITH_OVERHEAD(ksize(data));
4773
4774 memcpy((struct skb_shared_info *)(data + size),
4775 skb_shinfo(skb), offsetof(struct skb_shared_info,
4776 frags[skb_shinfo(skb)->nr_frags]));
4777 if (skb_orphan_frags(skb, gfp_mask)) {
4778 kfree(data);
4779 return -ENOMEM;
4780 }
4781 shinfo = (struct skb_shared_info *)(data + size);
4782 for (i = 0; i < nfrags; i++) {
4783 int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
4784
4785 if (pos + fsize > off) {
4786 shinfo->frags[k] = skb_shinfo(skb)->frags[i];
4787
4788 if (pos < off) {
4789 /* Split frag.
4790 * We have two variants in this case:
4791 * 1. Move all the frag to the second
4792 * part, if it is possible. F.e.
4793 * this approach is mandatory for TUX,
4794 * where splitting is expensive.
4795 * 2. Split is accurately. We make this.
4796 */
4797 shinfo->frags[0].page_offset += off - pos;
4798 skb_frag_size_sub(&shinfo->frags[0], off - pos);
4799 }
4800 skb_frag_ref(skb, i);
4801 k++;
4802 }
4803 pos += fsize;
4804 }
4805 shinfo->nr_frags = k;
4806 if (skb_has_frag_list(skb))
4807 skb_clone_fraglist(skb);
4808
4809 if (k == 0) {
4810 /* split line is in frag list */
4811 pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
4812 }
4813 skb_release_data(skb);
4814
4815 skb->head = data;
4816 skb->head_frag = 0;
4817 skb->data = data;
4818#ifdef NET_SKBUFF_DATA_USES_OFFSET
4819 skb->end = size;
4820#else
4821 skb->end = skb->head + size;
4822#endif
4823 skb_reset_tail_pointer(skb);
4824 skb_headers_offset_update(skb, 0);
4825 skb->cloned = 0;
4826 skb->hdr_len = 0;
4827 skb->nohdr = 0;
4828 skb->len -= off;
4829 skb->data_len = skb->len;
4830 atomic_set(&skb_shinfo(skb)->dataref, 1);
4831 return 0;
4832}
4833
4834/* remove len bytes from the beginning of the skb */
4835static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
4836{
4837 int headlen = skb_headlen(skb);
4838
4839 if (len < headlen)
4840 return pskb_carve_inside_header(skb, len, headlen, gfp);
4841 else
4842 return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
4843}
4844
4845/* Extract to_copy bytes starting at off from skb, and return this in
4846 * a new skb
4847 */
4848struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
4849 int to_copy, gfp_t gfp)
4850{
4851 struct sk_buff *clone = skb_clone(skb, gfp);
4852
4853 if (!clone)
4854 return NULL;
4855
4856 if (pskb_carve(clone, off, gfp) < 0 ||
4857 pskb_trim(clone, to_copy)) {
4858 kfree_skb(clone);
4859 return NULL;
4860 }
4861 return clone;
4862}
4863EXPORT_SYMBOL(pskb_extract);
diff --git a/net/core/sock.c b/net/core/sock.c
index e16a5db853c6..08bf97eceeb3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1655,6 +1655,17 @@ void sock_wfree(struct sk_buff *skb)
1655} 1655}
1656EXPORT_SYMBOL(sock_wfree); 1656EXPORT_SYMBOL(sock_wfree);
1657 1657
1658/* This variant of sock_wfree() is used by TCP,
1659 * since it sets SOCK_USE_WRITE_QUEUE.
1660 */
1661void __sock_wfree(struct sk_buff *skb)
1662{
1663 struct sock *sk = skb->sk;
1664
1665 if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1666 __sk_free(sk);
1667}
1668
1658void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 1669void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1659{ 1670{
1660 skb_orphan(skb); 1671 skb_orphan(skb);
@@ -1677,8 +1688,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1677} 1688}
1678EXPORT_SYMBOL(skb_set_owner_w); 1689EXPORT_SYMBOL(skb_set_owner_w);
1679 1690
1691/* This helper is used by netem, as it can hold packets in its
1692 * delay queue. We want to allow the owner socket to send more
1693 * packets, as if they were already TX completed by a typical driver.
1694 * But we also want to keep skb->sk set because some packet schedulers
1695 * rely on it (sch_fq for example). So we set skb->truesize to a small
1696 * amount (1) and decrease sk_wmem_alloc accordingly.
1697 */
1680void skb_orphan_partial(struct sk_buff *skb) 1698void skb_orphan_partial(struct sk_buff *skb)
1681{ 1699{
1700 /* If this skb is a TCP pure ACK or already went here,
1701 * we have nothing to do. 2 is already a very small truesize.
1702 */
1703 if (skb->truesize <= 2)
1704 return;
1705
1682 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, 1706 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1683 * so we do not completely orphan skb, but transfert all 1707 * so we do not completely orphan skb, but transfert all
1684 * accounted bytes but one, to avoid unexpected reorders. 1708 * accounted bytes but one, to avoid unexpected reorders.
@@ -2019,33 +2043,27 @@ static void __release_sock(struct sock *sk)
2019 __releases(&sk->sk_lock.slock) 2043 __releases(&sk->sk_lock.slock)
2020 __acquires(&sk->sk_lock.slock) 2044 __acquires(&sk->sk_lock.slock)
2021{ 2045{
2022 struct sk_buff *skb = sk->sk_backlog.head; 2046 struct sk_buff *skb, *next;
2023 2047
2024 do { 2048 while ((skb = sk->sk_backlog.head) != NULL) {
2025 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 2049 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2026 bh_unlock_sock(sk);
2027 2050
2028 do { 2051 spin_unlock_bh(&sk->sk_lock.slock);
2029 struct sk_buff *next = skb->next;
2030 2052
2053 do {
2054 next = skb->next;
2031 prefetch(next); 2055 prefetch(next);
2032 WARN_ON_ONCE(skb_dst_is_noref(skb)); 2056 WARN_ON_ONCE(skb_dst_is_noref(skb));
2033 skb->next = NULL; 2057 skb->next = NULL;
2034 sk_backlog_rcv(sk, skb); 2058 sk_backlog_rcv(sk, skb);
2035 2059
2036 /* 2060 cond_resched();
2037 * We are in process context here with softirqs
2038 * disabled, use cond_resched_softirq() to preempt.
2039 * This is safe to do because we've taken the backlog
2040 * queue private:
2041 */
2042 cond_resched_softirq();
2043 2061
2044 skb = next; 2062 skb = next;
2045 } while (skb != NULL); 2063 } while (skb != NULL);
2046 2064
2047 bh_lock_sock(sk); 2065 spin_lock_bh(&sk->sk_lock.slock);
2048 } while ((skb = sk->sk_backlog.head) != NULL); 2066 }
2049 2067
2050 /* 2068 /*
2051 * Doing the zeroing here guarantee we can not loop forever 2069 * Doing the zeroing here guarantee we can not loop forever
@@ -2054,6 +2072,13 @@ static void __release_sock(struct sock *sk)
2054 sk->sk_backlog.len = 0; 2072 sk->sk_backlog.len = 0;
2055} 2073}
2056 2074
2075void __sk_flush_backlog(struct sock *sk)
2076{
2077 spin_lock_bh(&sk->sk_lock.slock);
2078 __release_sock(sk);
2079 spin_unlock_bh(&sk->sk_lock.slock);
2080}
2081
2057/** 2082/**
2058 * sk_wait_data - wait for data to arrive at sk_receive_queue 2083 * sk_wait_data - wait for data to arrive at sk_receive_queue
2059 * @sk: sock to wait on 2084 * @sk: sock to wait on
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index ca9e35bbe13c..6b10573cc9fa 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -120,7 +120,7 @@ static size_t sock_diag_nlmsg_size(void)
120{ 120{
121 return NLMSG_ALIGN(sizeof(struct inet_diag_msg) 121 return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
122 + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ 122 + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
123 + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ 123 + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
124} 124}
125 125
126static void sock_diag_broadcast_destroy_work(struct work_struct *work) 126static void sock_diag_broadcast_destroy_work(struct work_struct *work)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b0e28d24e1a7..0c55ffb859bf 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -198,9 +198,9 @@ struct dccp_mib {
198}; 198};
199 199
200DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); 200DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
201#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) 201#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
202#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) 202#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field)
203#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) 203#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
204 204
205/* 205/*
206 * Checksumming routines 206 * Checksumming routines
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3bd14e885396..ba347184bda9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
359 goto discard; 359 goto discard;
360 } 360 }
361 361
362 DCCP_INC_STATS_BH(DCCP_MIB_INERRS); 362 DCCP_INC_STATS(DCCP_MIB_INERRS);
363discard: 363discard:
364 __kfree_skb(skb); 364 __kfree_skb(skb);
365 return 0; 365 return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f6d183f8f332..5c7e413a3ae4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
205 * socket here. 205 * socket here.
206 */ 206 */
207 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { 207 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
208 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 208 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
209 } else { 209 } else {
210 /* 210 /*
211 * Still in RESPOND, just remove it silently. 211 * Still in RESPOND, just remove it silently.
@@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
247 247
248 if (skb->len < offset + sizeof(*dh) || 248 if (skb->len < offset + sizeof(*dh) ||
249 skb->len < offset + __dccp_basic_hdr_len(dh)) { 249 skb->len < offset + __dccp_basic_hdr_len(dh)) {
250 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 250 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
251 return; 251 return;
252 } 252 }
253 253
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
256 iph->saddr, ntohs(dh->dccph_sport), 256 iph->saddr, ntohs(dh->dccph_sport),
257 inet_iif(skb)); 257 inet_iif(skb));
258 if (!sk) { 258 if (!sk) {
259 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 259 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
260 return; 260 return;
261 } 261 }
262 262
@@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
273 * servers this needs to be solved differently. 273 * servers this needs to be solved differently.
274 */ 274 */
275 if (sock_owned_by_user(sk)) 275 if (sock_owned_by_user(sk))
276 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 276 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
277 277
278 if (sk->sk_state == DCCP_CLOSED) 278 if (sk->sk_state == DCCP_CLOSED)
279 goto out; 279 goto out;
@@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
281 dp = dccp_sk(sk); 281 dp = dccp_sk(sk);
282 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && 282 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
283 !between48(seq, dp->dccps_awl, dp->dccps_awh)) { 283 !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
284 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 284 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
285 goto out; 285 goto out;
286 } 286 }
287 287
@@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
318 case DCCP_REQUESTING: 318 case DCCP_REQUESTING:
319 case DCCP_RESPOND: 319 case DCCP_RESPOND:
320 if (!sock_owned_by_user(sk)) { 320 if (!sock_owned_by_user(sk)) {
321 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 321 __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
322 sk->sk_err = err; 322 sk->sk_err = err;
323 323
324 sk->sk_error_report(sk); 324 sk->sk_error_report(sk);
@@ -431,11 +431,11 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
431 return newsk; 431 return newsk;
432 432
433exit_overflow: 433exit_overflow:
434 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 434 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
435exit_nonewsk: 435exit_nonewsk:
436 dst_release(dst); 436 dst_release(dst);
437exit: 437exit:
438 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 438 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
439 return NULL; 439 return NULL;
440put_and_exit: 440put_and_exit:
441 inet_csk_prepare_forced_close(newsk); 441 inet_csk_prepare_forced_close(newsk);
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
462 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 462 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
463 rt = ip_route_output_flow(net, &fl4, sk); 463 rt = ip_route_output_flow(net, &fl4, sk);
464 if (IS_ERR(rt)) { 464 if (IS_ERR(rt)) {
465 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 465 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
466 return NULL; 466 return NULL;
467 } 467 }
468 468
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
533 bh_unlock_sock(ctl_sk); 533 bh_unlock_sock(ctl_sk);
534 534
535 if (net_xmit_eval(err) == 0) { 535 if (net_xmit_eval(err) == 0) {
536 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 536 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
537 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 537 DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
538 } 538 }
539out: 539out:
540 dst_release(dst); 540 dst_release(dst);
@@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
637drop_and_free: 637drop_and_free:
638 reqsk_free(req); 638 reqsk_free(req);
639drop: 639drop:
640 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 640 __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
641 return -1; 641 return -1;
642} 642}
643EXPORT_SYMBOL_GPL(dccp_v4_conn_request); 643EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8ceb3cebcad4..d176f4e66369 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
80 80
81 if (skb->len < offset + sizeof(*dh) || 81 if (skb->len < offset + sizeof(*dh) ||
82 skb->len < offset + __dccp_basic_hdr_len(dh)) { 82 skb->len < offset + __dccp_basic_hdr_len(dh)) {
83 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), 83 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
84 ICMP6_MIB_INERRORS); 84 ICMP6_MIB_INERRORS);
85 return; 85 return;
86 } 86 }
87 87
@@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
91 inet6_iif(skb)); 91 inet6_iif(skb));
92 92
93 if (!sk) { 93 if (!sk) {
94 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), 94 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
95 ICMP6_MIB_INERRORS); 95 ICMP6_MIB_INERRORS);
96 return; 96 return;
97 } 97 }
98 98
@@ -106,7 +106,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
106 106
107 bh_lock_sock(sk); 107 bh_lock_sock(sk);
108 if (sock_owned_by_user(sk)) 108 if (sock_owned_by_user(sk))
109 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 109 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
110 110
111 if (sk->sk_state == DCCP_CLOSED) 111 if (sk->sk_state == DCCP_CLOSED)
112 goto out; 112 goto out;
@@ -114,7 +114,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
114 dp = dccp_sk(sk); 114 dp = dccp_sk(sk);
115 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && 115 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
116 !between48(seq, dp->dccps_awl, dp->dccps_awh)) { 116 !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
117 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 117 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
118 goto out; 118 goto out;
119 } 119 }
120 120
@@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
156 case DCCP_RESPOND: /* Cannot happen. 156 case DCCP_RESPOND: /* Cannot happen.
157 It can, it SYNs are crossed. --ANK */ 157 It can, it SYNs are crossed. --ANK */
158 if (!sock_owned_by_user(sk)) { 158 if (!sock_owned_by_user(sk)) {
159 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 159 __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
160 sk->sk_err = err; 160 sk->sk_err = err;
161 /* 161 /*
162 * Wake people up to see the error 162 * Wake people up to see the error
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
277 if (!IS_ERR(dst)) { 277 if (!IS_ERR(dst)) {
278 skb_dst_set(skb, dst); 278 skb_dst_set(skb, dst);
279 ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); 279 ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
280 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 280 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
281 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 281 DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
282 return; 282 return;
283 } 283 }
284 284
@@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
378drop_and_free: 378drop_and_free:
379 reqsk_free(req); 379 reqsk_free(req);
380drop: 380drop:
381 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 381 __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
382 return -1; 382 return -1;
383} 383}
384 384
@@ -527,11 +527,11 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
527 return newsk; 527 return newsk;
528 528
529out_overflow: 529out_overflow:
530 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 530 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
531out_nonewsk: 531out_nonewsk:
532 dst_release(dst); 532 dst_release(dst);
533out: 533out:
534 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 534 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
535 return NULL; 535 return NULL;
536} 536}
537 537
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1994f8af646b..53eddf99e4f6 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
127 } 127 }
128 dccp_init_xmit_timers(newsk); 128 dccp_init_xmit_timers(newsk);
129 129
130 DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); 130 __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS);
131 } 131 }
132 return newsk; 132 return newsk;
133} 133}
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9bce31886bda..74d29c56c367 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
253 return 0; 253 return 0;
254 254
255out_invalid_option: 255out_invalid_option:
256 DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); 256 DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
257 rc = DCCP_RESET_CODE_OPTION_ERROR; 257 rc = DCCP_RESET_CODE_OPTION_ERROR;
258out_featneg_failed: 258out_featneg_failed:
259 DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); 259 DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3ef7acef3ce8..3a2c34027758 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -28,7 +28,7 @@ static void dccp_write_err(struct sock *sk)
28 28
29 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 29 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
30 dccp_done(sk); 30 dccp_done(sk);
31 DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); 31 __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT);
32} 32}
33 33
34/* A write timeout has occurred. Process the after effects. */ 34/* A write timeout has occurred. Process the after effects. */
@@ -100,7 +100,7 @@ static void dccp_retransmit_timer(struct sock *sk)
100 * total number of retransmissions of clones of original packets. 100 * total number of retransmissions of clones of original packets.
101 */ 101 */
102 if (icsk->icsk_retransmits == 0) 102 if (icsk->icsk_retransmits == 0)
103 DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); 103 __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS);
104 104
105 if (dccp_retransmit_skb(sk) != 0) { 105 if (dccp_retransmit_skb(sk) != 0) {
106 /* 106 /*
@@ -179,7 +179,7 @@ static void dccp_delack_timer(unsigned long data)
179 if (sock_owned_by_user(sk)) { 179 if (sock_owned_by_user(sk)) {
180 /* Try again later. */ 180 /* Try again later. */
181 icsk->icsk_ack.blocked = 1; 181 icsk->icsk_ack.blocked = 1;
182 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 182 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
183 sk_reset_timer(sk, &icsk->icsk_delack_timer, 183 sk_reset_timer(sk, &icsk->icsk_delack_timer,
184 jiffies + TCP_DELACK_MIN); 184 jiffies + TCP_DELACK_MIN);
185 goto out; 185 goto out;
@@ -209,7 +209,7 @@ static void dccp_delack_timer(unsigned long data)
209 icsk->icsk_ack.ato = TCP_ATO_MIN; 209 icsk->icsk_ack.ato = TCP_ATO_MIN;
210 } 210 }
211 dccp_send_ack(sk); 211 dccp_send_ack(sk);
212 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); 212 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
213 } 213 }
214out: 214out:
215 bh_unlock_sock(sk); 215 bh_unlock_sock(sk);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d61ceed912be..eff5dfc2e33f 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -182,7 +182,7 @@ __ATTRIBUTE_GROUPS(dsa_hwmon);
182/* basic switch operations **************************************************/ 182/* basic switch operations **************************************************/
183static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master) 183static int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct net_device *master)
184{ 184{
185 struct dsa_chip_data *cd = ds->pd; 185 struct dsa_chip_data *cd = ds->cd;
186 struct device_node *port_dn; 186 struct device_node *port_dn;
187 struct phy_device *phydev; 187 struct phy_device *phydev;
188 int ret, port, mode; 188 int ret, port, mode;
@@ -219,7 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
219{ 219{
220 struct dsa_switch_driver *drv = ds->drv; 220 struct dsa_switch_driver *drv = ds->drv;
221 struct dsa_switch_tree *dst = ds->dst; 221 struct dsa_switch_tree *dst = ds->dst;
222 struct dsa_chip_data *pd = ds->pd; 222 struct dsa_chip_data *cd = ds->cd;
223 bool valid_name_found = false; 223 bool valid_name_found = false;
224 int index = ds->index; 224 int index = ds->index;
225 int i, ret; 225 int i, ret;
@@ -230,7 +230,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
230 for (i = 0; i < DSA_MAX_PORTS; i++) { 230 for (i = 0; i < DSA_MAX_PORTS; i++) {
231 char *name; 231 char *name;
232 232
233 name = pd->port_names[i]; 233 name = cd->port_names[i];
234 if (name == NULL) 234 if (name == NULL)
235 continue; 235 continue;
236 236
@@ -328,10 +328,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
328 if (!(ds->enabled_port_mask & (1 << i))) 328 if (!(ds->enabled_port_mask & (1 << i)))
329 continue; 329 continue;
330 330
331 ret = dsa_slave_create(ds, parent, i, pd->port_names[i]); 331 ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
332 if (ret < 0) { 332 if (ret < 0) {
333 netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", 333 netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
334 index, i, pd->port_names[i], ret); 334 index, i, cd->port_names[i], ret);
335 ret = 0; 335 ret = 0;
336 } 336 }
337 } 337 }
@@ -379,7 +379,7 @@ static struct dsa_switch *
379dsa_switch_setup(struct dsa_switch_tree *dst, int index, 379dsa_switch_setup(struct dsa_switch_tree *dst, int index,
380 struct device *parent, struct device *host_dev) 380 struct device *parent, struct device *host_dev)
381{ 381{
382 struct dsa_chip_data *pd = dst->pd->chip + index; 382 struct dsa_chip_data *cd = dst->pd->chip + index;
383 struct dsa_switch_driver *drv; 383 struct dsa_switch_driver *drv;
384 struct dsa_switch *ds; 384 struct dsa_switch *ds;
385 int ret; 385 int ret;
@@ -389,7 +389,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
389 /* 389 /*
390 * Probe for switch model. 390 * Probe for switch model.
391 */ 391 */
392 drv = dsa_switch_probe(parent, host_dev, pd->sw_addr, &name, &priv); 392 drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
393 if (drv == NULL) { 393 if (drv == NULL) {
394 netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", 394 netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
395 index); 395 index);
@@ -408,10 +408,10 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
408 408
409 ds->dst = dst; 409 ds->dst = dst;
410 ds->index = index; 410 ds->index = index;
411 ds->pd = pd; 411 ds->cd = cd;
412 ds->drv = drv; 412 ds->drv = drv;
413 ds->priv = priv; 413 ds->priv = priv;
414 ds->master_dev = host_dev; 414 ds->dev = parent;
415 415
416 ret = dsa_switch_setup_one(ds, parent); 416 ret = dsa_switch_setup_one(ds, parent);
417 if (ret) 417 if (ret)
@@ -424,7 +424,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
424{ 424{
425 struct device_node *port_dn; 425 struct device_node *port_dn;
426 struct phy_device *phydev; 426 struct phy_device *phydev;
427 struct dsa_chip_data *cd = ds->pd; 427 struct dsa_chip_data *cd = ds->cd;
428 int port; 428 int port;
429 429
430#ifdef CONFIG_NET_DSA_HWMON 430#ifdef CONFIG_NET_DSA_HWMON
@@ -659,9 +659,6 @@ static int dsa_of_probe(struct device *dev)
659 const char *port_name; 659 const char *port_name;
660 int chip_index, port_index; 660 int chip_index, port_index;
661 const unsigned int *sw_addr, *port_reg; 661 const unsigned int *sw_addr, *port_reg;
662 int gpio;
663 enum of_gpio_flags of_flags;
664 unsigned long flags;
665 u32 eeprom_len; 662 u32 eeprom_len;
666 int ret; 663 int ret;
667 664
@@ -740,19 +737,6 @@ static int dsa_of_probe(struct device *dev)
740 put_device(cd->host_dev); 737 put_device(cd->host_dev);
741 cd->host_dev = &mdio_bus_switch->dev; 738 cd->host_dev = &mdio_bus_switch->dev;
742 } 739 }
743 gpio = of_get_named_gpio_flags(child, "reset-gpios", 0,
744 &of_flags);
745 if (gpio_is_valid(gpio)) {
746 flags = (of_flags == OF_GPIO_ACTIVE_LOW ?
747 GPIOF_ACTIVE_LOW : 0);
748 ret = devm_gpio_request_one(dev, gpio, flags,
749 "switch_reset");
750 if (ret)
751 goto out_free_chip;
752
753 cd->reset = gpio_to_desc(gpio);
754 gpiod_direction_output(cd->reset, 0);
755 }
756 740
757 for_each_available_child_of_node(child, port) { 741 for_each_available_child_of_node(child, port) {
758 port_reg = of_get_property(port, "reg", NULL); 742 port_reg = of_get_property(port, "reg", NULL);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3b6750f5e68b..152436cdab30 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -50,8 +50,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
50 ds->slave_mii_bus->read = dsa_slave_phy_read; 50 ds->slave_mii_bus->read = dsa_slave_phy_read;
51 ds->slave_mii_bus->write = dsa_slave_phy_write; 51 ds->slave_mii_bus->write = dsa_slave_phy_write;
52 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", 52 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
53 ds->index, ds->pd->sw_addr); 53 ds->index, ds->cd->sw_addr);
54 ds->slave_mii_bus->parent = ds->master_dev; 54 ds->slave_mii_bus->parent = ds->dev;
55 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; 55 ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
56} 56}
57 57
@@ -615,8 +615,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
615 struct dsa_slave_priv *p = netdev_priv(dev); 615 struct dsa_slave_priv *p = netdev_priv(dev);
616 struct dsa_switch *ds = p->parent; 616 struct dsa_switch *ds = p->parent;
617 617
618 if (ds->pd->eeprom_len) 618 if (ds->cd->eeprom_len)
619 return ds->pd->eeprom_len; 619 return ds->cd->eeprom_len;
620 620
621 if (ds->drv->get_eeprom_len) 621 if (ds->drv->get_eeprom_len)
622 return ds->drv->get_eeprom_len(ds); 622 return ds->drv->get_eeprom_len(ds);
@@ -666,6 +666,78 @@ static void dsa_slave_get_strings(struct net_device *dev,
666 } 666 }
667} 667}
668 668
669static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
670 struct ethtool_stats *stats,
671 uint64_t *data)
672{
673 struct dsa_switch_tree *dst = dev->dsa_ptr;
674 struct dsa_switch *ds = dst->ds[0];
675 s8 cpu_port = dst->cpu_port;
676 int count = 0;
677
678 if (dst->master_ethtool_ops.get_sset_count) {
679 count = dst->master_ethtool_ops.get_sset_count(dev,
680 ETH_SS_STATS);
681 dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
682 }
683
684 if (ds->drv->get_ethtool_stats)
685 ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
686}
687
688static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
689{
690 struct dsa_switch_tree *dst = dev->dsa_ptr;
691 struct dsa_switch *ds = dst->ds[0];
692 int count = 0;
693
694 if (dst->master_ethtool_ops.get_sset_count)
695 count += dst->master_ethtool_ops.get_sset_count(dev, sset);
696
697 if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
698 count += ds->drv->get_sset_count(ds);
699
700 return count;
701}
702
703static void dsa_cpu_port_get_strings(struct net_device *dev,
704 uint32_t stringset, uint8_t *data)
705{
706 struct dsa_switch_tree *dst = dev->dsa_ptr;
707 struct dsa_switch *ds = dst->ds[0];
708 s8 cpu_port = dst->cpu_port;
709 int len = ETH_GSTRING_LEN;
710 int mcount = 0, count;
711 unsigned int i;
712 uint8_t pfx[4];
713 uint8_t *ndata;
714
715 snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
716 /* We do not want to be NULL-terminated, since this is a prefix */
717 pfx[sizeof(pfx) - 1] = '_';
718
719 if (dst->master_ethtool_ops.get_sset_count) {
720 mcount = dst->master_ethtool_ops.get_sset_count(dev,
721 ETH_SS_STATS);
722 dst->master_ethtool_ops.get_strings(dev, stringset, data);
723 }
724
725 if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
726 ndata = data + mcount * len;
727 /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
728 * the output after to prepend our CPU port prefix we
729 * constructed earlier
730 */
731 ds->drv->get_strings(ds, cpu_port, ndata);
732 count = ds->drv->get_sset_count(ds);
733 for (i = 0; i < count; i++) {
734 memmove(ndata + (i * len + sizeof(pfx)),
735 ndata + i * len, len - sizeof(pfx));
736 memcpy(ndata + i * len, pfx, sizeof(pfx));
737 }
738 }
739}
740
669static void dsa_slave_get_ethtool_stats(struct net_device *dev, 741static void dsa_slave_get_ethtool_stats(struct net_device *dev,
670 struct ethtool_stats *stats, 742 struct ethtool_stats *stats,
671 uint64_t *data) 743 uint64_t *data)
@@ -821,6 +893,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
821 .get_eee = dsa_slave_get_eee, 893 .get_eee = dsa_slave_get_eee,
822}; 894};
823 895
896static struct ethtool_ops dsa_cpu_port_ethtool_ops;
897
824static const struct net_device_ops dsa_slave_netdev_ops = { 898static const struct net_device_ops dsa_slave_netdev_ops = {
825 .ndo_open = dsa_slave_open, 899 .ndo_open = dsa_slave_open,
826 .ndo_stop = dsa_slave_close, 900 .ndo_stop = dsa_slave_close,
@@ -925,7 +999,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
925 struct net_device *slave_dev) 999 struct net_device *slave_dev)
926{ 1000{
927 struct dsa_switch *ds = p->parent; 1001 struct dsa_switch *ds = p->parent;
928 struct dsa_chip_data *cd = ds->pd; 1002 struct dsa_chip_data *cd = ds->cd;
929 struct device_node *phy_dn, *port_dn; 1003 struct device_node *phy_dn, *port_dn;
930 bool phy_is_fixed = false; 1004 bool phy_is_fixed = false;
931 u32 phy_flags = 0; 1005 u32 phy_flags = 0;
@@ -1038,6 +1112,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1038 int port, char *name) 1112 int port, char *name)
1039{ 1113{
1040 struct net_device *master = ds->dst->master_netdev; 1114 struct net_device *master = ds->dst->master_netdev;
1115 struct dsa_switch_tree *dst = ds->dst;
1041 struct net_device *slave_dev; 1116 struct net_device *slave_dev;
1042 struct dsa_slave_priv *p; 1117 struct dsa_slave_priv *p;
1043 int ret; 1118 int ret;
@@ -1049,6 +1124,19 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1049 1124
1050 slave_dev->features = master->vlan_features; 1125 slave_dev->features = master->vlan_features;
1051 slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; 1126 slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
1127 if (master->ethtool_ops != &dsa_cpu_port_ethtool_ops) {
1128 memcpy(&dst->master_ethtool_ops, master->ethtool_ops,
1129 sizeof(struct ethtool_ops));
1130 memcpy(&dsa_cpu_port_ethtool_ops, &dst->master_ethtool_ops,
1131 sizeof(struct ethtool_ops));
1132 dsa_cpu_port_ethtool_ops.get_sset_count =
1133 dsa_cpu_port_get_sset_count;
1134 dsa_cpu_port_ethtool_ops.get_ethtool_stats =
1135 dsa_cpu_port_get_ethtool_stats;
1136 dsa_cpu_port_ethtool_ops.get_strings =
1137 dsa_cpu_port_get_strings;
1138 master->ethtool_ops = &dsa_cpu_port_ethtool_ops;
1139 }
1052 eth_hw_addr_inherit(slave_dev, master); 1140 eth_hw_addr_inherit(slave_dev, master);
1053 slave_dev->priv_flags |= IFF_NO_QUEUE; 1141 slave_dev->priv_flags |= IFF_NO_QUEUE;
1054 slave_dev->netdev_ops = &dsa_slave_netdev_ops; 1142 slave_dev->netdev_ops = &dsa_slave_netdev_ops;
@@ -1059,7 +1147,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1059 NULL); 1147 NULL);
1060 1148
1061 SET_NETDEV_DEV(slave_dev, parent); 1149 SET_NETDEV_DEV(slave_dev, parent);
1062 slave_dev->dev.of_node = ds->pd->port_dn[port]; 1150 slave_dev->dev.of_node = ds->cd->port_dn[port];
1063 slave_dev->vlan_features = master->vlan_features; 1151 slave_dev->vlan_features = master->vlan_features;
1064 1152
1065 p = netdev_priv(slave_dev); 1153 p = netdev_priv(slave_dev);
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index b4e17a7c0df0..5ac778962e4e 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -41,24 +41,12 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
41 return (((__force u64)a->extended_addr) >> 32) ^ 41 return (((__force u64)a->extended_addr) >> 32) ^
42 (((__force u64)a->extended_addr) & 0xffffffff); 42 (((__force u64)a->extended_addr) & 0xffffffff);
43 case IEEE802154_ADDR_SHORT: 43 case IEEE802154_ADDR_SHORT:
44 return (__force u32)(a->short_addr); 44 return (__force u32)(a->short_addr + (a->pan_id << 16));
45 default: 45 default:
46 return 0; 46 return 0;
47 } 47 }
48} 48}
49 49
50/* private device info */
51struct lowpan_dev_info {
52 struct net_device *wdev; /* wpan device ptr */
53 u16 fragment_tag;
54};
55
56static inline struct
57lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
58{
59 return (struct lowpan_dev_info *)lowpan_priv(dev)->priv;
60}
61
62int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); 50int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
63void lowpan_net_frag_exit(void); 51void lowpan_net_frag_exit(void);
64int lowpan_net_frag_init(void); 52int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 0023c9048812..dd085db8580e 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -148,7 +148,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
148 return -EBUSY; 148 return -EBUSY;
149 } 149 }
150 150
151 lowpan_dev_info(ldev)->wdev = wdev; 151 lowpan_802154_dev(ldev)->wdev = wdev;
152 /* Set the lowpan hardware address to the wpan hardware address. */ 152 /* Set the lowpan hardware address to the wpan hardware address. */
153 memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); 153 memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
154 /* We need headroom for possible wpan_dev_hard_header call and tailroom 154 /* We need headroom for possible wpan_dev_hard_header call and tailroom
@@ -173,7 +173,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
173 173
174static void lowpan_dellink(struct net_device *ldev, struct list_head *head) 174static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
175{ 175{
176 struct net_device *wdev = lowpan_dev_info(ldev)->wdev; 176 struct net_device *wdev = lowpan_802154_dev(ldev)->wdev;
177 177
178 ASSERT_RTNL(); 178 ASSERT_RTNL();
179 179
@@ -184,7 +184,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
184 184
185static struct rtnl_link_ops lowpan_link_ops __read_mostly = { 185static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
186 .kind = "lowpan", 186 .kind = "lowpan",
187 .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)), 187 .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)),
188 .setup = lowpan_setup, 188 .setup = lowpan_setup,
189 .newlink = lowpan_newlink, 189 .newlink = lowpan_newlink,
190 .dellink = lowpan_dellink, 190 .dellink = lowpan_dellink,
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index d4353faced35..e459afd16bb3 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -84,7 +84,7 @@ static struct sk_buff*
84lowpan_alloc_frag(struct sk_buff *skb, int size, 84lowpan_alloc_frag(struct sk_buff *skb, int size,
85 const struct ieee802154_hdr *master_hdr, bool frag1) 85 const struct ieee802154_hdr *master_hdr, bool frag1)
86{ 86{
87 struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev; 87 struct net_device *wdev = lowpan_802154_dev(skb->dev)->wdev;
88 struct sk_buff *frag; 88 struct sk_buff *frag;
89 int rc; 89 int rc;
90 90
@@ -148,8 +148,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev,
148 int frag_cap, frag_len, payload_cap, rc; 148 int frag_cap, frag_len, payload_cap, rc;
149 int skb_unprocessed, skb_offset; 149 int skb_unprocessed, skb_offset;
150 150
151 frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag); 151 frag_tag = htons(lowpan_802154_dev(ldev)->fragment_tag);
152 lowpan_dev_info(ldev)->fragment_tag++; 152 lowpan_802154_dev(ldev)->fragment_tag++;
153 153
154 frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); 154 frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
155 frag_hdr[1] = dgram_size & 0xff; 155 frag_hdr[1] = dgram_size & 0xff;
@@ -208,7 +208,7 @@ err:
208static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, 208static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
209 u16 *dgram_size, u16 *dgram_offset) 209 u16 *dgram_size, u16 *dgram_offset)
210{ 210{
211 struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr; 211 struct wpan_dev *wpan_dev = lowpan_802154_dev(ldev)->wdev->ieee802154_ptr;
212 struct ieee802154_addr sa, da; 212 struct ieee802154_addr sa, da;
213 struct ieee802154_mac_cb *cb = mac_cb_init(skb); 213 struct ieee802154_mac_cb *cb = mac_cb_init(skb);
214 struct lowpan_addr_info info; 214 struct lowpan_addr_info info;
@@ -248,8 +248,8 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
248 cb->ackreq = wpan_dev->ackreq; 248 cb->ackreq = wpan_dev->ackreq;
249 } 249 }
250 250
251 return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa, 251 return wpan_dev_hard_header(skb, lowpan_802154_dev(ldev)->wdev, &da,
252 0); 252 &sa, 0);
253} 253}
254 254
255netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) 255netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
@@ -283,7 +283,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
283 max_single = ieee802154_max_payload(&wpan_hdr); 283 max_single = ieee802154_max_payload(&wpan_hdr);
284 284
285 if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { 285 if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
286 skb->dev = lowpan_dev_info(ldev)->wdev; 286 skb->dev = lowpan_802154_dev(ldev)->wdev;
287 ldev->stats.tx_packets++; 287 ldev->stats.tx_packets++;
288 ldev->stats.tx_bytes += dgram_size; 288 ldev->stats.tx_bytes += dgram_size;
289 return dev_queue_xmit(skb); 289 return dev_queue_xmit(skb);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 8035c93dd527..ca207dbf673b 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1078,6 +1078,11 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
1078 if (netif_running(dev)) 1078 if (netif_running(dev))
1079 return -EBUSY; 1079 return -EBUSY;
1080 1080
1081 if (wpan_dev->lowpan_dev) {
1082 if (netif_running(wpan_dev->lowpan_dev))
1083 return -EBUSY;
1084 }
1085
1081 /* don't change address fields on monitor */ 1086 /* don't change address fields on monitor */
1082 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || 1087 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
1083 !info->attrs[NL802154_ATTR_PAN_ID]) 1088 !info->attrs[NL802154_ATTR_PAN_ID])
@@ -1109,6 +1114,11 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
1109 if (netif_running(dev)) 1114 if (netif_running(dev))
1110 return -EBUSY; 1115 return -EBUSY;
1111 1116
1117 if (wpan_dev->lowpan_dev) {
1118 if (netif_running(wpan_dev->lowpan_dev))
1119 return -EBUSY;
1120 }
1121
1112 /* don't change address fields on monitor */ 1122 /* don't change address fields on monitor */
1113 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR || 1123 if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR ||
1114 !info->attrs[NL802154_ATTR_SHORT_ADDR]) 1124 !info->attrs[NL802154_ATTR_SHORT_ADDR])
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c34c7544d1db..89a8cac4726a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -436,7 +436,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
436 if (IS_ERR(rt)) 436 if (IS_ERR(rt))
437 return 1; 437 return 1;
438 if (rt->dst.dev != dev) { 438 if (rt->dst.dev != dev) {
439 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 439 __NET_INC_STATS(net, LINUX_MIB_ARPFILTER);
440 flag = 1; 440 flag = 1;
441 } 441 }
442 ip_rt_put(rt); 442 ip_rt_put(rt);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8a9246deccfe..ef2ebeb89d0f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -110,6 +110,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
110 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 110 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
111 return tb; 111 return tb;
112} 112}
113EXPORT_SYMBOL_GPL(fib_new_table);
113 114
114/* caller must hold either rtnl or rcu read lock */ 115/* caller must hold either rtnl or rcu read lock */
115struct fib_table *fib_get_table(struct net *net, u32 id) 116struct fib_table *fib_get_table(struct net *net, u32 id)
@@ -904,7 +905,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
904 if (ifa->ifa_flags & IFA_F_SECONDARY) { 905 if (ifa->ifa_flags & IFA_F_SECONDARY) {
905 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 906 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
906 if (!prim) { 907 if (!prim) {
907 pr_warn("%s: bug: prim == NULL\n", __func__); 908 /* if the device has been deleted, we don't perform
909 * address promotion
910 */
911 if (!in_dev->dead)
912 pr_warn("%s: bug: prim == NULL\n", __func__);
908 return; 913 return;
909 } 914 }
910 if (iprim && iprim != prim) { 915 if (iprim && iprim != prim) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 7ac5ec87b004..eeec7d60e5fd 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -227,8 +227,6 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
227 int err = -ENOSYS; 227 int err = -ENOSYS;
228 const struct net_offload **offloads; 228 const struct net_offload **offloads;
229 229
230 udp_tunnel_gro_complete(skb, nhoff);
231
232 rcu_read_lock(); 230 rcu_read_lock();
233 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 231 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
234 ops = rcu_dereference(offloads[proto]); 232 ops = rcu_dereference(offloads[proto]);
@@ -237,6 +235,8 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
237 235
238 err = ops->callbacks.gro_complete(skb, nhoff); 236 err = ops->callbacks.gro_complete(skb, nhoff);
239 237
238 skb_set_inner_mac_header(skb, nhoff);
239
240out_unlock: 240out_unlock:
241 rcu_read_unlock(); 241 rcu_read_unlock();
242 242
@@ -412,6 +412,8 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
412 412
413 err = ops->callbacks.gro_complete(skb, nhoff + guehlen); 413 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
414 414
415 skb_set_inner_mac_header(skb, nhoff + guehlen);
416
415out_unlock: 417out_unlock:
416 rcu_read_unlock(); 418 rcu_read_unlock();
417 return err; 419 return err;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index d9c552a721fc..d78e2eefc0f7 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -60,6 +60,67 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
60} 60}
61EXPORT_SYMBOL_GPL(gre_del_protocol); 61EXPORT_SYMBOL_GPL(gre_del_protocol);
62 62
63/* Fills in tpi and returns header length to be pulled. */
64int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
65 bool *csum_err)
66{
67 const struct gre_base_hdr *greh;
68 __be32 *options;
69 int hdr_len;
70
71 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
72 return -EINVAL;
73
74 greh = (struct gre_base_hdr *)skb_transport_header(skb);
75 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
76 return -EINVAL;
77
78 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
79 hdr_len = gre_calc_hlen(tpi->flags);
80
81 if (!pskb_may_pull(skb, hdr_len))
82 return -EINVAL;
83
84 greh = (struct gre_base_hdr *)skb_transport_header(skb);
85 tpi->proto = greh->protocol;
86
87 options = (__be32 *)(greh + 1);
88 if (greh->flags & GRE_CSUM) {
89 if (skb_checksum_simple_validate(skb)) {
90 *csum_err = true;
91 return -EINVAL;
92 }
93
94 skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
95 null_compute_pseudo);
96 options++;
97 }
98
99 if (greh->flags & GRE_KEY) {
100 tpi->key = *options;
101 options++;
102 } else {
103 tpi->key = 0;
104 }
105 if (unlikely(greh->flags & GRE_SEQ)) {
106 tpi->seq = *options;
107 options++;
108 } else {
109 tpi->seq = 0;
110 }
111 /* WCCP version 1 and 2 protocol decoding.
112 * - Change protocol to IP
113 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
114 */
115 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
116 tpi->proto = htons(ETH_P_IP);
117 if ((*(u8 *)options & 0xF0) != 0x40)
118 hdr_len += 4;
119 }
120 return hdr_len;
121}
122EXPORT_SYMBOL(gre_parse_header);
123
63static int gre_rcv(struct sk_buff *skb) 124static int gre_rcv(struct sk_buff *skb)
64{ 125{
65 const struct gre_protocol *proto; 126 const struct gre_protocol *proto;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6333489771ed..38abe70e595f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
363 icmp_param->data_len+icmp_param->head_len, 363 icmp_param->data_len+icmp_param->head_len,
364 icmp_param->head_len, 364 icmp_param->head_len,
365 ipc, rt, MSG_DONTWAIT) < 0) { 365 ipc, rt, MSG_DONTWAIT) < 0) {
366 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS); 366 __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
367 ip_flush_pending_frames(sk); 367 ip_flush_pending_frames(sk);
368 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 368 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
369 struct icmphdr *icmph = icmp_hdr(skb); 369 struct icmphdr *icmph = icmp_hdr(skb);
@@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
744 * avoid additional coding at protocol handlers. 744 * avoid additional coding at protocol handlers.
745 */ 745 */
746 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { 746 if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
747 ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); 747 __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
748 return; 748 return;
749 } 749 }
750 750
@@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb)
865out: 865out:
866 return true; 866 return true;
867out_err: 867out_err:
868 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 868 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
869 return false; 869 return false;
870} 870}
871 871
@@ -877,7 +877,7 @@ out_err:
877static bool icmp_redirect(struct sk_buff *skb) 877static bool icmp_redirect(struct sk_buff *skb)
878{ 878{
879 if (skb->len < sizeof(struct iphdr)) { 879 if (skb->len < sizeof(struct iphdr)) {
880 ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); 880 __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
881 return false; 881 return false;
882 } 882 }
883 883
@@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
956 return true; 956 return true;
957 957
958out_err: 958out_err:
959 ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); 959 __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
960 return false; 960 return false;
961} 961}
962 962
@@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb)
996 skb_set_network_header(skb, nh); 996 skb_set_network_header(skb, nh);
997 } 997 }
998 998
999 ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); 999 __ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
1000 1000
1001 if (skb_checksum_simple_validate(skb)) 1001 if (skb_checksum_simple_validate(skb))
1002 goto csum_error; 1002 goto csum_error;
@@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
1006 1006
1007 icmph = icmp_hdr(skb); 1007 icmph = icmp_hdr(skb);
1008 1008
1009 ICMPMSGIN_INC_STATS_BH(net, icmph->type); 1009 ICMPMSGIN_INC_STATS(net, icmph->type);
1010 /* 1010 /*
1011 * 18 is the highest 'known' ICMP type. Anything else is a mystery 1011 * 18 is the highest 'known' ICMP type. Anything else is a mystery
1012 * 1012 *
@@ -1052,9 +1052,9 @@ drop:
1052 kfree_skb(skb); 1052 kfree_skb(skb);
1053 return 0; 1053 return 0;
1054csum_error: 1054csum_error:
1055 ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS); 1055 __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
1056error: 1056error:
1057 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 1057 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
1058 goto drop; 1058 goto drop;
1059} 1059}
1060 1060
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ab69da2d2a77..fa8c39804bdb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
427route_err: 427route_err:
428 ip_rt_put(rt); 428 ip_rt_put(rt);
429no_route: 429no_route:
430 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 430 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
431 return NULL; 431 return NULL;
432} 432}
433EXPORT_SYMBOL_GPL(inet_csk_route_req); 433EXPORT_SYMBOL_GPL(inet_csk_route_req);
@@ -466,7 +466,7 @@ route_err:
466 ip_rt_put(rt); 466 ip_rt_put(rt);
467no_route: 467no_route:
468 rcu_read_unlock(); 468 rcu_read_unlock();
469 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 469 __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
470 return NULL; 470 return NULL;
471} 471}
472EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 472EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
@@ -706,7 +706,9 @@ void inet_csk_destroy_sock(struct sock *sk)
706 706
707 sk_refcnt_debug_release(sk); 707 sk_refcnt_debug_release(sk);
708 708
709 local_bh_disable();
709 percpu_counter_dec(sk->sk_prot->orphan_count); 710 percpu_counter_dec(sk->sk_prot->orphan_count);
711 local_bh_enable();
710 sock_put(sk); 712 sock_put(sk);
711} 713}
712EXPORT_SYMBOL(inet_csk_destroy_sock); 714EXPORT_SYMBOL(inet_csk_destroy_sock);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ad7956fa659a..25af1243649b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -220,8 +220,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
220 } 220 }
221 221
222 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { 222 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
223 attr = nla_reserve(skb, INET_DIAG_INFO, 223 attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
224 handler->idiag_info_size); 224 handler->idiag_info_size,
225 INET_DIAG_PAD);
225 if (!attr) 226 if (!attr)
226 goto errout; 227 goto errout;
227 228
@@ -1078,7 +1079,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
1078 } 1079 }
1079 1080
1080 attr = handler->idiag_info_size 1081 attr = handler->idiag_info_size
1081 ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size) 1082 ? nla_reserve_64bit(skb, INET_DIAG_INFO,
1083 handler->idiag_info_size,
1084 INET_DIAG_PAD)
1082 : NULL; 1085 : NULL;
1083 if (attr) 1086 if (attr)
1084 info = nla_data(attr); 1087 info = nla_data(attr);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index b76b0d7e59c1..77c20a489218 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -360,7 +360,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
360 __sk_nulls_add_node_rcu(sk, &head->chain); 360 __sk_nulls_add_node_rcu(sk, &head->chain);
361 if (tw) { 361 if (tw) {
362 sk_nulls_del_node_init_rcu((struct sock *)tw); 362 sk_nulls_del_node_init_rcu((struct sock *)tw);
363 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); 363 __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
364 } 364 }
365 spin_unlock(lock); 365 spin_unlock(lock);
366 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 366 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -438,6 +438,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
438 const struct sock *sk2, 438 const struct sock *sk2,
439 bool match_wildcard)) 439 bool match_wildcard))
440{ 440{
441 struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
441 struct sock *sk2; 442 struct sock *sk2;
442 kuid_t uid = sock_i_uid(sk); 443 kuid_t uid = sock_i_uid(sk);
443 444
@@ -446,6 +447,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
446 sk2->sk_family == sk->sk_family && 447 sk2->sk_family == sk->sk_family &&
447 ipv6_only_sock(sk2) == ipv6_only_sock(sk) && 448 ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
448 sk2->sk_bound_dev_if == sk->sk_bound_dev_if && 449 sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
450 inet_csk(sk2)->icsk_bind_hash == tb &&
449 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && 451 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
450 saddr_same(sk, sk2, false)) 452 saddr_same(sk, sk2, false))
451 return reuseport_add_sock(sk, sk2); 453 return reuseport_add_sock(sk, sk2);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c67f9bd7699c..206581674806 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -94,7 +94,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
94} 94}
95 95
96/* 96/*
97 * Enter the time wait state. This is called with locally disabled BH. 97 * Enter the time wait state.
98 * Essentially we whip up a timewait bucket, copy the relevant info into it 98 * Essentially we whip up a timewait bucket, copy the relevant info into it
99 * from the SK, and mess with hash chains and list linkage. 99 * from the SK, and mess with hash chains and list linkage.
100 */ 100 */
@@ -112,7 +112,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
112 */ 112 */
113 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, 113 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
114 hashinfo->bhash_size)]; 114 hashinfo->bhash_size)];
115 spin_lock(&bhead->lock); 115 spin_lock_bh(&bhead->lock);
116 tw->tw_tb = icsk->icsk_bind_hash; 116 tw->tw_tb = icsk->icsk_bind_hash;
117 WARN_ON(!icsk->icsk_bind_hash); 117 WARN_ON(!icsk->icsk_bind_hash);
118 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 118 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
@@ -138,7 +138,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
138 if (__sk_nulls_del_node_init_rcu(sk)) 138 if (__sk_nulls_del_node_init_rcu(sk))
139 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 139 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
140 140
141 spin_unlock(lock); 141 spin_unlock_bh(lock);
142} 142}
143EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 143EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
144 144
@@ -147,9 +147,9 @@ static void tw_timer_handler(unsigned long data)
147 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; 147 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
148 148
149 if (tw->tw_kill) 149 if (tw->tw_kill)
150 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 150 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
151 else 151 else
152 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); 152 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
153 inet_twsk_kill(tw); 153 inet_twsk_kill(tw);
154} 154}
155 155
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index af18f1e4889e..cbfb1808fcc4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -65,8 +65,8 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
65{ 65{
66 struct ip_options *opt = &(IPCB(skb)->opt); 66 struct ip_options *opt = &(IPCB(skb)->opt);
67 67
68 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); 68 __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
69 IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); 69 __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
70 70
71 if (unlikely(opt->optlen)) 71 if (unlikely(opt->optlen))
72 ip_forward_options(skb); 72 ip_forward_options(skb);
@@ -157,7 +157,7 @@ sr_failed:
157 157
158too_many_hops: 158too_many_hops:
159 /* Tell the sender its packet died... */ 159 /* Tell the sender its packet died... */
160 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); 160 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
161 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); 161 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
162drop: 162drop:
163 kfree_skb(skb); 163 kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index efbd47d1a531..bbe7f72db9c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg)
204 goto out; 204 goto out;
205 205
206 ipq_kill(qp); 206 ipq_kill(qp);
207 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 207 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
208 208
209 if (!inet_frag_evicting(&qp->q)) { 209 if (!inet_frag_evicting(&qp->q)) {
210 struct sk_buff *head = qp->q.fragments; 210 struct sk_buff *head = qp->q.fragments;
211 const struct iphdr *iph; 211 const struct iphdr *iph;
212 int err; 212 int err;
213 213
214 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); 214 __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
215 215
216 if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) 216 if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
217 goto out; 217 goto out;
@@ -291,7 +291,7 @@ static int ip_frag_too_far(struct ipq *qp)
291 struct net *net; 291 struct net *net;
292 292
293 net = container_of(qp->q.net, struct net, ipv4.frags); 293 net = container_of(qp->q.net, struct net, ipv4.frags);
294 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 294 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
295 } 295 }
296 296
297 return rc; 297 return rc;
@@ -635,7 +635,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
635 635
636 ip_send_check(iph); 636 ip_send_check(iph);
637 637
638 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 638 __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
639 qp->q.fragments = NULL; 639 qp->q.fragments = NULL;
640 qp->q.fragments_tail = NULL; 640 qp->q.fragments_tail = NULL;
641 return 0; 641 return 0;
@@ -647,7 +647,7 @@ out_nomem:
647out_oversize: 647out_oversize:
648 net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); 648 net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
649out_fail: 649out_fail:
650 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 650 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
651 return err; 651 return err;
652} 652}
653 653
@@ -658,7 +658,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
658 int vif = l3mdev_master_ifindex_rcu(dev); 658 int vif = l3mdev_master_ifindex_rcu(dev);
659 struct ipq *qp; 659 struct ipq *qp;
660 660
661 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); 661 __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
662 skb_orphan(skb); 662 skb_orphan(skb);
663 663
664 /* Lookup (or create) queue header */ 664 /* Lookup (or create) queue header */
@@ -675,7 +675,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
675 return ret; 675 return ret;
676 } 676 }
677 677
678 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 678 __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
679 kfree_skb(skb); 679 kfree_skb(skb);
680 return -ENOMEM; 680 return -ENOMEM;
681} 681}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index eedd829a2f87..2b267e71ebf5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -122,125 +122,6 @@ static int ipgre_tunnel_init(struct net_device *dev);
122static int ipgre_net_id __read_mostly; 122static int ipgre_net_id __read_mostly;
123static int gre_tap_net_id __read_mostly; 123static int gre_tap_net_id __read_mostly;
124 124
125static int ip_gre_calc_hlen(__be16 o_flags)
126{
127 int addend = 4;
128
129 if (o_flags & TUNNEL_CSUM)
130 addend += 4;
131 if (o_flags & TUNNEL_KEY)
132 addend += 4;
133 if (o_flags & TUNNEL_SEQ)
134 addend += 4;
135 return addend;
136}
137
138static __be16 gre_flags_to_tnl_flags(__be16 flags)
139{
140 __be16 tflags = 0;
141
142 if (flags & GRE_CSUM)
143 tflags |= TUNNEL_CSUM;
144 if (flags & GRE_ROUTING)
145 tflags |= TUNNEL_ROUTING;
146 if (flags & GRE_KEY)
147 tflags |= TUNNEL_KEY;
148 if (flags & GRE_SEQ)
149 tflags |= TUNNEL_SEQ;
150 if (flags & GRE_STRICT)
151 tflags |= TUNNEL_STRICT;
152 if (flags & GRE_REC)
153 tflags |= TUNNEL_REC;
154 if (flags & GRE_VERSION)
155 tflags |= TUNNEL_VERSION;
156
157 return tflags;
158}
159
160static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161{
162 __be16 flags = 0;
163
164 if (tflags & TUNNEL_CSUM)
165 flags |= GRE_CSUM;
166 if (tflags & TUNNEL_ROUTING)
167 flags |= GRE_ROUTING;
168 if (tflags & TUNNEL_KEY)
169 flags |= GRE_KEY;
170 if (tflags & TUNNEL_SEQ)
171 flags |= GRE_SEQ;
172 if (tflags & TUNNEL_STRICT)
173 flags |= GRE_STRICT;
174 if (tflags & TUNNEL_REC)
175 flags |= GRE_REC;
176 if (tflags & TUNNEL_VERSION)
177 flags |= GRE_VERSION;
178
179 return flags;
180}
181
182static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
183 bool *csum_err)
184{
185 const struct gre_base_hdr *greh;
186 __be32 *options;
187 int hdr_len;
188
189 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
190 return -EINVAL;
191
192 greh = (struct gre_base_hdr *)skb_transport_header(skb);
193 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
194 return -EINVAL;
195
196 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
197 hdr_len = ip_gre_calc_hlen(tpi->flags);
198
199 if (!pskb_may_pull(skb, hdr_len))
200 return -EINVAL;
201
202 greh = (struct gre_base_hdr *)skb_transport_header(skb);
203 tpi->proto = greh->protocol;
204
205 options = (__be32 *)(greh + 1);
206 if (greh->flags & GRE_CSUM) {
207 if (skb_checksum_simple_validate(skb)) {
208 *csum_err = true;
209 return -EINVAL;
210 }
211
212 skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
213 null_compute_pseudo);
214 options++;
215 }
216
217 if (greh->flags & GRE_KEY) {
218 tpi->key = *options;
219 options++;
220 } else {
221 tpi->key = 0;
222 }
223 if (unlikely(greh->flags & GRE_SEQ)) {
224 tpi->seq = *options;
225 options++;
226 } else {
227 tpi->seq = 0;
228 }
229 /* WCCP version 1 and 2 protocol decoding.
230 * - Change protocol to IP
231 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
232 */
233 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
234 tpi->proto = htons(ETH_P_IP);
235 if ((*(u8 *)options & 0xF0) != 0x40) {
236 hdr_len += 4;
237 if (!pskb_may_pull(skb, hdr_len))
238 return -EINVAL;
239 }
240 }
241 return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
242}
243
244static void ipgre_err(struct sk_buff *skb, u32 info, 125static void ipgre_err(struct sk_buff *skb, u32 info,
245 const struct tnl_ptk_info *tpi) 126 const struct tnl_ptk_info *tpi)
246{ 127{
@@ -341,7 +222,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
341 struct tnl_ptk_info tpi; 222 struct tnl_ptk_info tpi;
342 bool csum_err = false; 223 bool csum_err = false;
343 224
344 if (parse_gre_header(skb, &tpi, &csum_err)) { 225 if (gre_parse_header(skb, &tpi, &csum_err) < 0) {
345 if (!csum_err) /* ignore csum errors. */ 226 if (!csum_err) /* ignore csum errors. */
346 return; 227 return;
347 } 228 }
@@ -379,24 +260,22 @@ static __be32 tunnel_id_to_key(__be64 x)
379#endif 260#endif
380} 261}
381 262
382static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) 263static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
264 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
383{ 265{
384 struct net *net = dev_net(skb->dev);
385 struct metadata_dst *tun_dst = NULL; 266 struct metadata_dst *tun_dst = NULL;
386 struct ip_tunnel_net *itn;
387 const struct iphdr *iph; 267 const struct iphdr *iph;
388 struct ip_tunnel *tunnel; 268 struct ip_tunnel *tunnel;
389 269
390 if (tpi->proto == htons(ETH_P_TEB))
391 itn = net_generic(net, gre_tap_net_id);
392 else
393 itn = net_generic(net, ipgre_net_id);
394
395 iph = ip_hdr(skb); 270 iph = ip_hdr(skb);
396 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 271 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
397 iph->saddr, iph->daddr, tpi->key); 272 iph->saddr, iph->daddr, tpi->key);
398 273
399 if (tunnel) { 274 if (tunnel) {
275 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
276 raw_proto, false) < 0)
277 goto drop;
278
400 skb_pop_mac_header(skb); 279 skb_pop_mac_header(skb);
401 if (tunnel->collect_md) { 280 if (tunnel->collect_md) {
402 __be16 flags; 281 __be16 flags;
@@ -412,13 +291,41 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
412 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 291 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
413 return PACKET_RCVD; 292 return PACKET_RCVD;
414 } 293 }
415 return PACKET_REJECT; 294 return PACKET_NEXT;
295
296drop:
297 kfree_skb(skb);
298 return PACKET_RCVD;
299}
300
301static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
302 int hdr_len)
303{
304 struct net *net = dev_net(skb->dev);
305 struct ip_tunnel_net *itn;
306 int res;
307
308 if (tpi->proto == htons(ETH_P_TEB))
309 itn = net_generic(net, gre_tap_net_id);
310 else
311 itn = net_generic(net, ipgre_net_id);
312
313 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
314 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
315 /* ipgre tunnels in collect metadata mode should receive
316 * also ETH_P_TEB traffic.
317 */
318 itn = net_generic(net, ipgre_net_id);
319 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
320 }
321 return res;
416} 322}
417 323
418static int gre_rcv(struct sk_buff *skb) 324static int gre_rcv(struct sk_buff *skb)
419{ 325{
420 struct tnl_ptk_info tpi; 326 struct tnl_ptk_info tpi;
421 bool csum_err = false; 327 bool csum_err = false;
328 int hdr_len;
422 329
423#ifdef CONFIG_NET_IPGRE_BROADCAST 330#ifdef CONFIG_NET_IPGRE_BROADCAST
424 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 331 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
@@ -428,10 +335,11 @@ static int gre_rcv(struct sk_buff *skb)
428 } 335 }
429#endif 336#endif
430 337
431 if (parse_gre_header(skb, &tpi, &csum_err) < 0) 338 hdr_len = gre_parse_header(skb, &tpi, &csum_err);
339 if (hdr_len < 0)
432 goto drop; 340 goto drop;
433 341
434 if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) 342 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
435 return 0; 343 return 0;
436 344
437 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 345 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -440,49 +348,6 @@ drop:
440 return 0; 348 return 0;
441} 349}
442 350
443static __sum16 gre_checksum(struct sk_buff *skb)
444{
445 __wsum csum;
446
447 if (skb->ip_summed == CHECKSUM_PARTIAL)
448 csum = lco_csum(skb);
449 else
450 csum = skb_checksum(skb, 0, skb->len, 0);
451 return csum_fold(csum);
452}
453
454static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
455 __be16 proto, __be32 key, __be32 seq)
456{
457 struct gre_base_hdr *greh;
458
459 skb_push(skb, hdr_len);
460
461 skb_reset_transport_header(skb);
462 greh = (struct gre_base_hdr *)skb->data;
463 greh->flags = tnl_flags_to_gre_flags(flags);
464 greh->protocol = proto;
465
466 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
467 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
468
469 if (flags & TUNNEL_SEQ) {
470 *ptr = seq;
471 ptr--;
472 }
473 if (flags & TUNNEL_KEY) {
474 *ptr = key;
475 ptr--;
476 }
477 if (flags & TUNNEL_CSUM &&
478 !(skb_shinfo(skb)->gso_type &
479 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
480 *ptr = 0;
481 *(__sum16 *)ptr = gre_checksum(skb);
482 }
483 }
484}
485
486static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 351static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
487 const struct iphdr *tnl_params, 352 const struct iphdr *tnl_params,
488 __be16 proto) 353 __be16 proto)
@@ -493,8 +358,9 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
493 tunnel->o_seqno++; 358 tunnel->o_seqno++;
494 359
495 /* Push GRE header. */ 360 /* Push GRE header. */
496 build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, 361 gre_build_header(skb, tunnel->tun_hlen,
497 proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); 362 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
363 htonl(tunnel->o_seqno));
498 364
499 skb_set_inner_protocol(skb, proto); 365 skb_set_inner_protocol(skb, proto);
500 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 366 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
@@ -522,7 +388,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
522 return ip_route_output_key(net, fl); 388 return ip_route_output_key(net, fl);
523} 389}
524 390
525static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) 391static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
392 __be16 proto)
526{ 393{
527 struct ip_tunnel_info *tun_info; 394 struct ip_tunnel_info *tun_info;
528 const struct ip_tunnel_key *key; 395 const struct ip_tunnel_key *key;
@@ -552,7 +419,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
552 fl.saddr); 419 fl.saddr);
553 } 420 }
554 421
555 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); 422 tunnel_hlen = gre_calc_hlen(key->tun_flags);
556 423
557 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 424 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
558 + tunnel_hlen + sizeof(struct iphdr); 425 + tunnel_hlen + sizeof(struct iphdr);
@@ -571,8 +438,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
571 goto err_free_rt; 438 goto err_free_rt;
572 439
573 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); 440 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
574 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), 441 gre_build_header(skb, tunnel_hlen, flags, proto,
575 tunnel_id_to_key(tun_info->key.tun_id), 0); 442 tunnel_id_to_key(tun_info->key.tun_id), 0);
576 443
577 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 444 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
578 445
@@ -612,7 +479,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
612 const struct iphdr *tnl_params; 479 const struct iphdr *tnl_params;
613 480
614 if (tunnel->collect_md) { 481 if (tunnel->collect_md) {
615 gre_fb_xmit(skb, dev); 482 gre_fb_xmit(skb, dev, skb->protocol);
616 return NETDEV_TX_OK; 483 return NETDEV_TX_OK;
617 } 484 }
618 485
@@ -654,7 +521,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
654 struct ip_tunnel *tunnel = netdev_priv(dev); 521 struct ip_tunnel *tunnel = netdev_priv(dev);
655 522
656 if (tunnel->collect_md) { 523 if (tunnel->collect_md) {
657 gre_fb_xmit(skb, dev); 524 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
658 return NETDEV_TX_OK; 525 return NETDEV_TX_OK;
659 } 526 }
660 527
@@ -694,8 +561,8 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
694 if (err) 561 if (err)
695 return err; 562 return err;
696 563
697 p.i_flags = tnl_flags_to_gre_flags(p.i_flags); 564 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
698 p.o_flags = tnl_flags_to_gre_flags(p.o_flags); 565 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
699 566
700 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 567 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
701 return -EFAULT; 568 return -EFAULT;
@@ -739,7 +606,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
739 606
740 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); 607 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
741 greh = (struct gre_base_hdr *)(iph+1); 608 greh = (struct gre_base_hdr *)(iph+1);
742 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); 609 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
743 greh->protocol = htons(type); 610 greh->protocol = htons(type);
744 611
745 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 612 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
@@ -840,7 +707,7 @@ static void __gre_tunnel_init(struct net_device *dev)
840 int t_hlen; 707 int t_hlen;
841 708
842 tunnel = netdev_priv(dev); 709 tunnel = netdev_priv(dev);
843 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); 710 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
844 tunnel->parms.iph.protocol = IPPROTO_GRE; 711 tunnel->parms.iph.protocol = IPPROTO_GRE;
845 712
846 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 713 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
@@ -885,7 +752,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
885 netif_keep_dst(dev); 752 netif_keep_dst(dev);
886 dev->addr_len = 4; 753 dev->addr_len = 4;
887 754
888 if (iph->daddr) { 755 if (iph->daddr && !tunnel->collect_md) {
889#ifdef CONFIG_NET_IPGRE_BROADCAST 756#ifdef CONFIG_NET_IPGRE_BROADCAST
890 if (ipv4_is_multicast(iph->daddr)) { 757 if (ipv4_is_multicast(iph->daddr)) {
891 if (!iph->saddr) 758 if (!iph->saddr)
@@ -894,8 +761,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
894 dev->header_ops = &ipgre_header_ops; 761 dev->header_ops = &ipgre_header_ops;
895 } 762 }
896#endif 763#endif
897 } else 764 } else if (!tunnel->collect_md) {
898 dev->header_ops = &ipgre_header_ops; 765 dev->header_ops = &ipgre_header_ops;
766 }
899 767
900 return ip_tunnel_init(dev); 768 return ip_tunnel_init(dev);
901} 769}
@@ -938,6 +806,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
938 if (flags & (GRE_VERSION|GRE_ROUTING)) 806 if (flags & (GRE_VERSION|GRE_ROUTING))
939 return -EINVAL; 807 return -EINVAL;
940 808
809 if (data[IFLA_GRE_COLLECT_METADATA] &&
810 data[IFLA_GRE_ENCAP_TYPE] &&
811 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
812 return -EINVAL;
813
941 return 0; 814 return 0;
942} 815}
943 816
@@ -1155,8 +1028,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1155 struct ip_tunnel_parm *p = &t->parms; 1028 struct ip_tunnel_parm *p = &t->parms;
1156 1029
1157 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1030 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1158 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || 1031 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1159 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || 1032 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1033 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1034 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1160 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1035 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1161 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1036 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1162 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1037 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e3d782746d9d..4b351af3e67b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -218,17 +218,17 @@ static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_b
218 protocol = -ret; 218 protocol = -ret;
219 goto resubmit; 219 goto resubmit;
220 } 220 }
221 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); 221 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
222 } else { 222 } else {
223 if (!raw) { 223 if (!raw) {
224 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 224 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
225 IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); 225 __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
226 icmp_send(skb, ICMP_DEST_UNREACH, 226 icmp_send(skb, ICMP_DEST_UNREACH,
227 ICMP_PROT_UNREACH, 0); 227 ICMP_PROT_UNREACH, 0);
228 } 228 }
229 kfree_skb(skb); 229 kfree_skb(skb);
230 } else { 230 } else {
231 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); 231 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
232 consume_skb(skb); 232 consume_skb(skb);
233 } 233 }
234 } 234 }
@@ -273,7 +273,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
273 --ANK (980813) 273 --ANK (980813)
274 */ 274 */
275 if (skb_cow(skb, skb_headroom(skb))) { 275 if (skb_cow(skb, skb_headroom(skb))) {
276 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 276 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
277 goto drop; 277 goto drop;
278 } 278 }
279 279
@@ -282,7 +282,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
282 opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 282 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
283 283
284 if (ip_options_compile(dev_net(dev), opt, skb)) { 284 if (ip_options_compile(dev_net(dev), opt, skb)) {
285 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 285 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
286 goto drop; 286 goto drop;
287 } 287 }
288 288
@@ -313,6 +313,13 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
313 const struct iphdr *iph = ip_hdr(skb); 313 const struct iphdr *iph = ip_hdr(skb);
314 struct rtable *rt; 314 struct rtable *rt;
315 315
316 /* if ingress device is enslaved to an L3 master device pass the
317 * skb to its handler for processing
318 */
319 skb = l3mdev_ip_rcv(skb);
320 if (!skb)
321 return NET_RX_SUCCESS;
322
316 if (net->ipv4.sysctl_ip_early_demux && 323 if (net->ipv4.sysctl_ip_early_demux &&
317 !skb_dst(skb) && 324 !skb_dst(skb) &&
318 !skb->sk && 325 !skb->sk &&
@@ -337,7 +344,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
337 iph->tos, skb->dev); 344 iph->tos, skb->dev);
338 if (unlikely(err)) { 345 if (unlikely(err)) {
339 if (err == -EXDEV) 346 if (err == -EXDEV)
340 NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER); 347 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
341 goto drop; 348 goto drop;
342 } 349 }
343 } 350 }
@@ -358,9 +365,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
358 365
359 rt = skb_rtable(skb); 366 rt = skb_rtable(skb);
360 if (rt->rt_type == RTN_MULTICAST) { 367 if (rt->rt_type == RTN_MULTICAST) {
361 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); 368 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
362 } else if (rt->rt_type == RTN_BROADCAST) { 369 } else if (rt->rt_type == RTN_BROADCAST) {
363 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); 370 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
364 } else if (skb->pkt_type == PACKET_BROADCAST || 371 } else if (skb->pkt_type == PACKET_BROADCAST ||
365 skb->pkt_type == PACKET_MULTICAST) { 372 skb->pkt_type == PACKET_MULTICAST) {
366 struct in_device *in_dev = __in_dev_get_rcu(skb->dev); 373 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
@@ -409,11 +416,11 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
409 416
410 417
411 net = dev_net(dev); 418 net = dev_net(dev);
412 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len); 419 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
413 420
414 skb = skb_share_check(skb, GFP_ATOMIC); 421 skb = skb_share_check(skb, GFP_ATOMIC);
415 if (!skb) { 422 if (!skb) {
416 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); 423 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
417 goto out; 424 goto out;
418 } 425 }
419 426
@@ -439,9 +446,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
439 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); 446 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
440 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); 447 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
441 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); 448 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
442 IP_ADD_STATS_BH(net, 449 __IP_ADD_STATS(net,
443 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), 450 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
444 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 451 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
445 452
446 if (!pskb_may_pull(skb, iph->ihl*4)) 453 if (!pskb_may_pull(skb, iph->ihl*4))
447 goto inhdr_error; 454 goto inhdr_error;
@@ -453,7 +460,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
453 460
454 len = ntohs(iph->tot_len); 461 len = ntohs(iph->tot_len);
455 if (skb->len < len) { 462 if (skb->len < len) {
456 IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); 463 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
457 goto drop; 464 goto drop;
458 } else if (len < (iph->ihl*4)) 465 } else if (len < (iph->ihl*4))
459 goto inhdr_error; 466 goto inhdr_error;
@@ -463,7 +470,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
463 * Note this now means skb->len holds ntohs(iph->tot_len). 470 * Note this now means skb->len holds ntohs(iph->tot_len).
464 */ 471 */
465 if (pskb_trim_rcsum(skb, len)) { 472 if (pskb_trim_rcsum(skb, len)) {
466 IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); 473 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
467 goto drop; 474 goto drop;
468 } 475 }
469 476
@@ -471,6 +478,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
471 478
472 /* Remove any debris in the socket control block */ 479 /* Remove any debris in the socket control block */
473 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 480 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
481 IPCB(skb)->iif = skb->skb_iif;
474 482
475 /* Must drop socket now because of tproxy. */ 483 /* Must drop socket now because of tproxy. */
476 skb_orphan(skb); 484 skb_orphan(skb);
@@ -480,9 +488,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
480 ip_rcv_finish); 488 ip_rcv_finish);
481 489
482csum_error: 490csum_error:
483 IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS); 491 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
484inhdr_error: 492inhdr_error:
485 IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); 493 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
486drop: 494drop:
487 kfree_skb(skb); 495 kfree_skb(skb);
488out: 496out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index bdb222c0c6a2..5805762d7fc7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1193,7 +1193,12 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1193 ipv6_sk_rxinfo(sk); 1193 ipv6_sk_rxinfo(sk);
1194 1194
1195 if (prepare && skb_rtable(skb)) { 1195 if (prepare && skb_rtable(skb)) {
1196 pktinfo->ipi_ifindex = inet_iif(skb); 1196 /* skb->cb is overloaded: prior to this point it is IP{6}CB
1197 * which has interface index (iif) as the first member of the
1198 * underlying inet{6}_skb_parm struct. This code then overlays
1199 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1200 * element so the iif is picked up from the prior IPCB
1201 */
1197 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); 1202 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1198 } else { 1203 } else {
1199 pktinfo->ipi_ifindex = 0; 1204 pktinfo->ipi_ifindex = 0;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6aad0192443d..a69ed94bda1b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
326 326
327 if (!IS_ERR(rt)) { 327 if (!IS_ERR(rt)) {
328 tdev = rt->dst.dev; 328 tdev = rt->dst.dev;
329 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
330 fl4.saddr);
331 ip_rt_put(rt); 329 ip_rt_put(rt);
332 } 330 }
333 if (dev->type != ARPHRD_ETHER) 331 if (dev->type != ARPHRD_ETHER)
334 dev->flags |= IFF_POINTOPOINT; 332 dev->flags |= IFF_POINTOPOINT;
333
334 dst_cache_reset(&tunnel->dst_cache);
335 } 335 }
336 336
337 if (!tdev && tunnel->parms.link) 337 if (!tdev && tunnel->parms.link)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 786fa7ca28e0..9118b0e640ba 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -157,7 +157,7 @@ int iptunnel_handle_offloads(struct sk_buff *skb,
157 } 157 }
158 158
159 if (skb_is_gso(skb)) { 159 if (skb_is_gso(skb)) {
160 err = skb_unclone(skb, GFP_ATOMIC); 160 err = skb_header_unclone(skb, GFP_ATOMIC);
161 if (unlikely(err)) 161 if (unlikely(err))
162 return err; 162 return err;
163 skb_shinfo(skb)->gso_type |= gso_type_mask; 163 skb_shinfo(skb)->gso_type |= gso_type_mask;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5cf10b777b7e..a917903d5e97 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
156 struct dst_entry *dst = skb_dst(skb); 156 struct dst_entry *dst = skb_dst(skb);
157 struct net_device *tdev; /* Device to other host */ 157 struct net_device *tdev; /* Device to other host */
158 int err; 158 int err;
159 int mtu;
159 160
160 if (!dst) { 161 if (!dst) {
161 dev->stats.tx_carrier_errors++; 162 dev->stats.tx_carrier_errors++;
@@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
192 tunnel->err_count = 0; 193 tunnel->err_count = 0;
193 } 194 }
194 195
196 mtu = dst_mtu(dst);
197 if (skb->len > mtu) {
198 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
199 if (skb->protocol == htons(ETH_P_IP)) {
200 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
201 htonl(mtu));
202 } else {
203 if (mtu < IPV6_MIN_MTU)
204 mtu = IPV6_MIN_MTU;
205
206 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
207 }
208
209 dst_release(dst);
210 goto tx_error;
211 }
212
195 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); 213 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
196 skb_dst_set(skb, dst); 214 skb_dst_set(skb, dst);
197 skb->dev = skb_dst(skb)->dev; 215 skb->dev = skb_dst(skb)->dev;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 60f5161abcb4..2033f929aa66 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -34,27 +34,6 @@ MODULE_LICENSE("GPL");
34MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); 34MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
35MODULE_DESCRIPTION("arptables core"); 35MODULE_DESCRIPTION("arptables core");
36 36
37/*#define DEBUG_ARP_TABLES*/
38/*#define DEBUG_ARP_TABLES_USER*/
39
40#ifdef DEBUG_ARP_TABLES
41#define dprintf(format, args...) pr_debug(format, ## args)
42#else
43#define dprintf(format, args...)
44#endif
45
46#ifdef DEBUG_ARP_TABLES_USER
47#define duprintf(format, args...) pr_debug(format, ## args)
48#else
49#define duprintf(format, args...)
50#endif
51
52#ifdef CONFIG_NETFILTER_DEBUG
53#define ARP_NF_ASSERT(x) WARN_ON(!(x))
54#else
55#define ARP_NF_ASSERT(x)
56#endif
57
58void *arpt_alloc_initial_table(const struct xt_table *info) 37void *arpt_alloc_initial_table(const struct xt_table *info)
59{ 38{
60 return xt_alloc_initial_table(arpt, ARPT); 39 return xt_alloc_initial_table(arpt, ARPT);
@@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
113#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) 92#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
114 93
115 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, 94 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
116 ARPT_INV_ARPOP)) { 95 ARPT_INV_ARPOP))
117 dprintf("ARP operation field mismatch.\n");
118 dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
119 arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
120 return 0; 96 return 0;
121 }
122 97
123 if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, 98 if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
124 ARPT_INV_ARPHRD)) { 99 ARPT_INV_ARPHRD))
125 dprintf("ARP hardware address format mismatch.\n");
126 dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
127 arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
128 return 0; 100 return 0;
129 }
130 101
131 if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, 102 if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
132 ARPT_INV_ARPPRO)) { 103 ARPT_INV_ARPPRO))
133 dprintf("ARP protocol address format mismatch.\n");
134 dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
135 arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
136 return 0; 104 return 0;
137 }
138 105
139 if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, 106 if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
140 ARPT_INV_ARPHLN)) { 107 ARPT_INV_ARPHLN))
141 dprintf("ARP hardware address length mismatch.\n");
142 dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
143 arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
144 return 0; 108 return 0;
145 }
146 109
147 src_devaddr = arpptr; 110 src_devaddr = arpptr;
148 arpptr += dev->addr_len; 111 arpptr += dev->addr_len;
@@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
155 if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), 118 if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
156 ARPT_INV_SRCDEVADDR) || 119 ARPT_INV_SRCDEVADDR) ||
157 FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), 120 FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
158 ARPT_INV_TGTDEVADDR)) { 121 ARPT_INV_TGTDEVADDR))
159 dprintf("Source or target device address mismatch.\n");
160
161 return 0; 122 return 0;
162 }
163 123
164 if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, 124 if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
165 ARPT_INV_SRCIP) || 125 ARPT_INV_SRCIP) ||
166 FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), 126 FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
167 ARPT_INV_TGTIP)) { 127 ARPT_INV_TGTIP))
168 dprintf("Source or target IP address mismatch.\n");
169
170 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
171 &src_ipaddr,
172 &arpinfo->smsk.s_addr,
173 &arpinfo->src.s_addr,
174 arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
175 dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
176 &tgt_ipaddr,
177 &arpinfo->tmsk.s_addr,
178 &arpinfo->tgt.s_addr,
179 arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
180 return 0; 128 return 0;
181 }
182 129
183 /* Look for ifname matches. */ 130 /* Look for ifname matches. */
184 ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); 131 ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
185 132
186 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { 133 if (FWINV(ret != 0, ARPT_INV_VIA_IN))
187 dprintf("VIA in mismatch (%s vs %s).%s\n",
188 indev, arpinfo->iniface,
189 arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
190 return 0; 134 return 0;
191 }
192 135
193 ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); 136 ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
194 137
195 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { 138 if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
196 dprintf("VIA out mismatch (%s vs %s).%s\n",
197 outdev, arpinfo->outiface,
198 arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
199 return 0; 139 return 0;
200 }
201 140
202 return 1; 141 return 1;
203#undef FWINV 142#undef FWINV
@@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
205 144
206static inline int arp_checkentry(const struct arpt_arp *arp) 145static inline int arp_checkentry(const struct arpt_arp *arp)
207{ 146{
208 if (arp->flags & ~ARPT_F_MASK) { 147 if (arp->flags & ~ARPT_F_MASK)
209 duprintf("Unknown flag bits set: %08X\n",
210 arp->flags & ~ARPT_F_MASK);
211 return 0; 148 return 0;
212 } 149 if (arp->invflags & ~ARPT_INV_MASK)
213 if (arp->invflags & ~ARPT_INV_MASK) {
214 duprintf("Unknown invflag bits set: %08X\n",
215 arp->invflags & ~ARPT_INV_MASK);
216 return 0; 150 return 0;
217 }
218 151
219 return 1; 152 return 1;
220} 153}
@@ -406,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
406 = (void *)arpt_get_target_c(e); 339 = (void *)arpt_get_target_c(e);
407 int visited = e->comefrom & (1 << hook); 340 int visited = e->comefrom & (1 << hook);
408 341
409 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { 342 if (e->comefrom & (1 << NF_ARP_NUMHOOKS))
410 pr_notice("arptables: loop hook %u pos %u %08X.\n",
411 hook, pos, e->comefrom);
412 return 0; 343 return 0;
413 } 344
414 e->comefrom 345 e->comefrom
415 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); 346 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
416 347
@@ -423,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
423 354
424 if ((strcmp(t->target.u.user.name, 355 if ((strcmp(t->target.u.user.name,
425 XT_STANDARD_TARGET) == 0) && 356 XT_STANDARD_TARGET) == 0) &&
426 t->verdict < -NF_MAX_VERDICT - 1) { 357 t->verdict < -NF_MAX_VERDICT - 1)
427 duprintf("mark_source_chains: bad "
428 "negative verdict (%i)\n",
429 t->verdict);
430 return 0; 358 return 0;
431 }
432 359
433 /* Return: backtrack through the last 360 /* Return: backtrack through the last
434 * big jump. 361 * big jump.
@@ -462,8 +389,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
462 XT_STANDARD_TARGET) == 0 && 389 XT_STANDARD_TARGET) == 0 &&
463 newpos >= 0) { 390 newpos >= 0) {
464 /* This a jump; chase it. */ 391 /* This a jump; chase it. */
465 duprintf("Jump rule %u -> %u\n",
466 pos, newpos);
467 e = (struct arpt_entry *) 392 e = (struct arpt_entry *)
468 (entry0 + newpos); 393 (entry0 + newpos);
469 if (!find_jump_target(newinfo, e)) 394 if (!find_jump_target(newinfo, e))
@@ -480,8 +405,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
480 pos = newpos; 405 pos = newpos;
481 } 406 }
482 } 407 }
483next: 408next: ;
484 duprintf("Finished chain %u\n", hook);
485 } 409 }
486 return 1; 410 return 1;
487} 411}
@@ -489,7 +413,6 @@ next:
489static inline int check_target(struct arpt_entry *e, const char *name) 413static inline int check_target(struct arpt_entry *e, const char *name)
490{ 414{
491 struct xt_entry_target *t = arpt_get_target(e); 415 struct xt_entry_target *t = arpt_get_target(e);
492 int ret;
493 struct xt_tgchk_param par = { 416 struct xt_tgchk_param par = {
494 .table = name, 417 .table = name,
495 .entryinfo = e, 418 .entryinfo = e,
@@ -499,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
499 .family = NFPROTO_ARP, 422 .family = NFPROTO_ARP,
500 }; 423 };
501 424
502 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); 425 return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
503 if (ret < 0) {
504 duprintf("arp_tables: check failed for `%s'.\n",
505 t->u.kernel.target->name);
506 return ret;
507 }
508 return 0;
509} 426}
510 427
511static inline int 428static inline int
@@ -513,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
513{ 430{
514 struct xt_entry_target *t; 431 struct xt_entry_target *t;
515 struct xt_target *target; 432 struct xt_target *target;
433 unsigned long pcnt;
516 int ret; 434 int ret;
517 435
518 e->counters.pcnt = xt_percpu_counter_alloc(); 436 pcnt = xt_percpu_counter_alloc();
519 if (IS_ERR_VALUE(e->counters.pcnt)) 437 if (IS_ERR_VALUE(pcnt))
520 return -ENOMEM; 438 return -ENOMEM;
439 e->counters.pcnt = pcnt;
521 440
522 t = arpt_get_target(e); 441 t = arpt_get_target(e);
523 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 442 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
524 t->u.user.revision); 443 t->u.user.revision);
525 if (IS_ERR(target)) { 444 if (IS_ERR(target)) {
526 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
527 ret = PTR_ERR(target); 445 ret = PTR_ERR(target);
528 goto out; 446 goto out;
529 } 447 }
@@ -569,17 +487,12 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
569 487
570 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || 488 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
571 (unsigned char *)e + sizeof(struct arpt_entry) >= limit || 489 (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
572 (unsigned char *)e + e->next_offset > limit) { 490 (unsigned char *)e + e->next_offset > limit)
573 duprintf("Bad offset %p\n", e);
574 return -EINVAL; 491 return -EINVAL;
575 }
576 492
577 if (e->next_offset 493 if (e->next_offset
578 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { 494 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target))
579 duprintf("checking: element %p size %u\n",
580 e, e->next_offset);
581 return -EINVAL; 495 return -EINVAL;
582 }
583 496
584 if (!arp_checkentry(&e->arp)) 497 if (!arp_checkentry(&e->arp))
585 return -EINVAL; 498 return -EINVAL;
@@ -596,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
596 if ((unsigned char *)e - base == hook_entries[h]) 509 if ((unsigned char *)e - base == hook_entries[h])
597 newinfo->hook_entry[h] = hook_entries[h]; 510 newinfo->hook_entry[h] = hook_entries[h];
598 if ((unsigned char *)e - base == underflows[h]) { 511 if ((unsigned char *)e - base == underflows[h]) {
599 if (!check_underflow(e)) { 512 if (!check_underflow(e))
600 pr_debug("Underflows must be unconditional and "
601 "use the STANDARD target with "
602 "ACCEPT/DROP\n");
603 return -EINVAL; 513 return -EINVAL;
604 } 514
605 newinfo->underflow[h] = underflows[h]; 515 newinfo->underflow[h] = underflows[h];
606 } 516 }
607 } 517 }
@@ -646,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
646 newinfo->underflow[i] = 0xFFFFFFFF; 556 newinfo->underflow[i] = 0xFFFFFFFF;
647 } 557 }
648 558
649 duprintf("translate_table: size %u\n", newinfo->size);
650 i = 0; 559 i = 0;
651 560
652 /* Walk through entries, checking offsets. */ 561 /* Walk through entries, checking offsets. */
@@ -663,31 +572,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
663 XT_ERROR_TARGET) == 0) 572 XT_ERROR_TARGET) == 0)
664 ++newinfo->stacksize; 573 ++newinfo->stacksize;
665 } 574 }
666 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
667 if (ret != 0) 575 if (ret != 0)
668 return ret; 576 return ret;
669 577
670 if (i != repl->num_entries) { 578 if (i != repl->num_entries)
671 duprintf("translate_table: %u not %u entries\n",
672 i, repl->num_entries);
673 return -EINVAL; 579 return -EINVAL;
674 }
675 580
676 /* Check hooks all assigned */ 581 /* Check hooks all assigned */
677 for (i = 0; i < NF_ARP_NUMHOOKS; i++) { 582 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
678 /* Only hooks which are valid */ 583 /* Only hooks which are valid */
679 if (!(repl->valid_hooks & (1 << i))) 584 if (!(repl->valid_hooks & (1 << i)))
680 continue; 585 continue;
681 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 586 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
682 duprintf("Invalid hook entry %u %u\n",
683 i, repl->hook_entry[i]);
684 return -EINVAL; 587 return -EINVAL;
685 } 588 if (newinfo->underflow[i] == 0xFFFFFFFF)
686 if (newinfo->underflow[i] == 0xFFFFFFFF) {
687 duprintf("Invalid underflow %u %u\n",
688 i, repl->underflow[i]);
689 return -EINVAL; 589 return -EINVAL;
690 }
691 } 590 }
692 591
693 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 592 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -895,11 +794,8 @@ static int get_info(struct net *net, void __user *user,
895 struct xt_table *t; 794 struct xt_table *t;
896 int ret; 795 int ret;
897 796
898 if (*len != sizeof(struct arpt_getinfo)) { 797 if (*len != sizeof(struct arpt_getinfo))
899 duprintf("length %u != %Zu\n", *len,
900 sizeof(struct arpt_getinfo));
901 return -EINVAL; 798 return -EINVAL;
902 }
903 799
904 if (copy_from_user(name, user, sizeof(name)) != 0) 800 if (copy_from_user(name, user, sizeof(name)) != 0)
905 return -EFAULT; 801 return -EFAULT;
@@ -955,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
955 struct arpt_get_entries get; 851 struct arpt_get_entries get;
956 struct xt_table *t; 852 struct xt_table *t;
957 853
958 if (*len < sizeof(get)) { 854 if (*len < sizeof(get))
959 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
960 return -EINVAL; 855 return -EINVAL;
961 }
962 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 856 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
963 return -EFAULT; 857 return -EFAULT;
964 if (*len != sizeof(struct arpt_get_entries) + get.size) { 858 if (*len != sizeof(struct arpt_get_entries) + get.size)
965 duprintf("get_entries: %u != %Zu\n", *len,
966 sizeof(struct arpt_get_entries) + get.size);
967 return -EINVAL; 859 return -EINVAL;
968 } 860
969 get.name[sizeof(get.name) - 1] = '\0'; 861 get.name[sizeof(get.name) - 1] = '\0';
970 862
971 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 863 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
972 if (!IS_ERR_OR_NULL(t)) { 864 if (!IS_ERR_OR_NULL(t)) {
973 const struct xt_table_info *private = t->private; 865 const struct xt_table_info *private = t->private;
974 866
975 duprintf("t->private->number = %u\n",
976 private->number);
977 if (get.size == private->size) 867 if (get.size == private->size)
978 ret = copy_entries_to_user(private->size, 868 ret = copy_entries_to_user(private->size,
979 t, uptr->entrytable); 869 t, uptr->entrytable);
980 else { 870 else
981 duprintf("get_entries: I've got %u not %u!\n",
982 private->size, get.size);
983 ret = -EAGAIN; 871 ret = -EAGAIN;
984 } 872
985 module_put(t->me); 873 module_put(t->me);
986 xt_table_unlock(t); 874 xt_table_unlock(t);
987 } else 875 } else
@@ -1019,8 +907,6 @@ static int __do_replace(struct net *net, const char *name,
1019 907
1020 /* You lied! */ 908 /* You lied! */
1021 if (valid_hooks != t->valid_hooks) { 909 if (valid_hooks != t->valid_hooks) {
1022 duprintf("Valid hook crap: %08X vs %08X\n",
1023 valid_hooks, t->valid_hooks);
1024 ret = -EINVAL; 910 ret = -EINVAL;
1025 goto put_module; 911 goto put_module;
1026 } 912 }
@@ -1030,8 +916,6 @@ static int __do_replace(struct net *net, const char *name,
1030 goto put_module; 916 goto put_module;
1031 917
1032 /* Update module usage count based on number of rules */ 918 /* Update module usage count based on number of rules */
1033 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1034 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1035 if ((oldinfo->number > oldinfo->initial_entries) || 919 if ((oldinfo->number > oldinfo->initial_entries) ||
1036 (newinfo->number <= oldinfo->initial_entries)) 920 (newinfo->number <= oldinfo->initial_entries))
1037 module_put(t->me); 921 module_put(t->me);
@@ -1101,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user,
1101 if (ret != 0) 985 if (ret != 0)
1102 goto free_newinfo; 986 goto free_newinfo;
1103 987
1104 duprintf("arp_tables: Translated table\n");
1105
1106 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 988 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1107 tmp.num_counters, tmp.counters); 989 tmp.num_counters, tmp.counters);
1108 if (ret) 990 if (ret)
@@ -1200,20 +1082,14 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1200 unsigned int entry_offset; 1082 unsigned int entry_offset;
1201 int ret, off; 1083 int ret, off;
1202 1084
1203 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1204 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || 1085 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
1205 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || 1086 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
1206 (unsigned char *)e + e->next_offset > limit) { 1087 (unsigned char *)e + e->next_offset > limit)
1207 duprintf("Bad offset %p, limit = %p\n", e, limit);
1208 return -EINVAL; 1088 return -EINVAL;
1209 }
1210 1089
1211 if (e->next_offset < sizeof(struct compat_arpt_entry) + 1090 if (e->next_offset < sizeof(struct compat_arpt_entry) +
1212 sizeof(struct compat_xt_entry_target)) { 1091 sizeof(struct compat_xt_entry_target))
1213 duprintf("checking: element %p size %u\n",
1214 e, e->next_offset);
1215 return -EINVAL; 1092 return -EINVAL;
1216 }
1217 1093
1218 if (!arp_checkentry(&e->arp)) 1094 if (!arp_checkentry(&e->arp))
1219 return -EINVAL; 1095 return -EINVAL;
@@ -1230,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1230 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 1106 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
1231 t->u.user.revision); 1107 t->u.user.revision);
1232 if (IS_ERR(target)) { 1108 if (IS_ERR(target)) {
1233 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1234 t->u.user.name);
1235 ret = PTR_ERR(target); 1109 ret = PTR_ERR(target);
1236 goto out; 1110 goto out;
1237 } 1111 }
@@ -1301,7 +1175,6 @@ static int translate_compat_table(struct xt_table_info **pinfo,
1301 size = compatr->size; 1175 size = compatr->size;
1302 info->number = compatr->num_entries; 1176 info->number = compatr->num_entries;
1303 1177
1304 duprintf("translate_compat_table: size %u\n", info->size);
1305 j = 0; 1178 j = 0;
1306 xt_compat_lock(NFPROTO_ARP); 1179 xt_compat_lock(NFPROTO_ARP);
1307 xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); 1180 xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
@@ -1316,11 +1189,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
1316 } 1189 }
1317 1190
1318 ret = -EINVAL; 1191 ret = -EINVAL;
1319 if (j != compatr->num_entries) { 1192 if (j != compatr->num_entries)
1320 duprintf("translate_compat_table: %u not %u entries\n",
1321 j, compatr->num_entries);
1322 goto out_unlock; 1193 goto out_unlock;
1323 }
1324 1194
1325 ret = -ENOMEM; 1195 ret = -ENOMEM;
1326 newinfo = xt_alloc_table_info(size); 1196 newinfo = xt_alloc_table_info(size);
@@ -1411,8 +1281,6 @@ static int compat_do_replace(struct net *net, void __user *user,
1411 if (ret != 0) 1281 if (ret != 0)
1412 goto free_newinfo; 1282 goto free_newinfo;
1413 1283
1414 duprintf("compat_do_replace: Translated table\n");
1415
1416 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1284 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1417 tmp.num_counters, compat_ptr(tmp.counters)); 1285 tmp.num_counters, compat_ptr(tmp.counters));
1418 if (ret) 1286 if (ret)
@@ -1445,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1445 break; 1313 break;
1446 1314
1447 default: 1315 default:
1448 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1449 ret = -EINVAL; 1316 ret = -EINVAL;
1450 } 1317 }
1451 1318
@@ -1528,17 +1395,13 @@ static int compat_get_entries(struct net *net,
1528 struct compat_arpt_get_entries get; 1395 struct compat_arpt_get_entries get;
1529 struct xt_table *t; 1396 struct xt_table *t;
1530 1397
1531 if (*len < sizeof(get)) { 1398 if (*len < sizeof(get))
1532 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1533 return -EINVAL; 1399 return -EINVAL;
1534 }
1535 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1400 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1536 return -EFAULT; 1401 return -EFAULT;
1537 if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { 1402 if (*len != sizeof(struct compat_arpt_get_entries) + get.size)
1538 duprintf("compat_get_entries: %u != %zu\n",
1539 *len, sizeof(get) + get.size);
1540 return -EINVAL; 1403 return -EINVAL;
1541 } 1404
1542 get.name[sizeof(get.name) - 1] = '\0'; 1405 get.name[sizeof(get.name) - 1] = '\0';
1543 1406
1544 xt_compat_lock(NFPROTO_ARP); 1407 xt_compat_lock(NFPROTO_ARP);
@@ -1547,16 +1410,13 @@ static int compat_get_entries(struct net *net,
1547 const struct xt_table_info *private = t->private; 1410 const struct xt_table_info *private = t->private;
1548 struct xt_table_info info; 1411 struct xt_table_info info;
1549 1412
1550 duprintf("t->private->number = %u\n", private->number);
1551 ret = compat_table_info(private, &info); 1413 ret = compat_table_info(private, &info);
1552 if (!ret && get.size == info.size) { 1414 if (!ret && get.size == info.size) {
1553 ret = compat_copy_entries_to_user(private->size, 1415 ret = compat_copy_entries_to_user(private->size,
1554 t, uptr->entrytable); 1416 t, uptr->entrytable);
1555 } else if (!ret) { 1417 } else if (!ret)
1556 duprintf("compat_get_entries: I've got %u not %u!\n",
1557 private->size, get.size);
1558 ret = -EAGAIN; 1418 ret = -EAGAIN;
1559 } 1419
1560 xt_compat_flush_offsets(NFPROTO_ARP); 1420 xt_compat_flush_offsets(NFPROTO_ARP);
1561 module_put(t->me); 1421 module_put(t->me);
1562 xt_table_unlock(t); 1422 xt_table_unlock(t);
@@ -1608,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1608 break; 1468 break;
1609 1469
1610 default: 1470 default:
1611 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1612 ret = -EINVAL; 1471 ret = -EINVAL;
1613 } 1472 }
1614 1473
@@ -1651,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1651 } 1510 }
1652 1511
1653 default: 1512 default:
1654 duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
1655 ret = -EINVAL; 1513 ret = -EINVAL;
1656 } 1514 }
1657 1515
@@ -1696,7 +1554,6 @@ int arpt_register_table(struct net *net,
1696 memcpy(loc_cpu_entry, repl->entries, repl->size); 1554 memcpy(loc_cpu_entry, repl->entries, repl->size);
1697 1555
1698 ret = translate_table(newinfo, loc_cpu_entry, repl); 1556 ret = translate_table(newinfo, loc_cpu_entry, repl);
1699 duprintf("arpt_register_table: translate table gives %d\n", ret);
1700 if (ret != 0) 1557 if (ret != 0)
1701 goto out_free; 1558 goto out_free;
1702 1559
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 735d1ee8c1ab..54906e0e8e0c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,34 +35,12 @@ MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36MODULE_DESCRIPTION("IPv4 packet filter"); 36MODULE_DESCRIPTION("IPv4 packet filter");
37 37
38/*#define DEBUG_IP_FIREWALL*/
39/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40/*#define DEBUG_IP_FIREWALL_USER*/
41
42#ifdef DEBUG_IP_FIREWALL
43#define dprintf(format, args...) pr_info(format , ## args)
44#else
45#define dprintf(format, args...)
46#endif
47
48#ifdef DEBUG_IP_FIREWALL_USER
49#define duprintf(format, args...) pr_info(format , ## args)
50#else
51#define duprintf(format, args...)
52#endif
53
54#ifdef CONFIG_NETFILTER_DEBUG 38#ifdef CONFIG_NETFILTER_DEBUG
55#define IP_NF_ASSERT(x) WARN_ON(!(x)) 39#define IP_NF_ASSERT(x) WARN_ON(!(x))
56#else 40#else
57#define IP_NF_ASSERT(x) 41#define IP_NF_ASSERT(x)
58#endif 42#endif
59 43
60#if 0
61/* All the better to debug you with... */
62#define static
63#define inline
64#endif
65
66void *ipt_alloc_initial_table(const struct xt_table *info) 44void *ipt_alloc_initial_table(const struct xt_table *info)
67{ 45{
68 return xt_alloc_initial_table(ipt, IPT); 46 return xt_alloc_initial_table(ipt, IPT);
@@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip,
85 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, 63 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
86 IPT_INV_SRCIP) || 64 IPT_INV_SRCIP) ||
87 FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, 65 FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
88 IPT_INV_DSTIP)) { 66 IPT_INV_DSTIP))
89 dprintf("Source or dest mismatch.\n");
90
91 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
92 &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
93 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
94 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
95 &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
96 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
97 return false; 67 return false;
98 }
99 68
100 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); 69 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
101 70
102 if (FWINV(ret != 0, IPT_INV_VIA_IN)) { 71 if (FWINV(ret != 0, IPT_INV_VIA_IN))
103 dprintf("VIA in mismatch (%s vs %s).%s\n",
104 indev, ipinfo->iniface,
105 ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
106 return false; 72 return false;
107 }
108 73
109 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); 74 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
110 75
111 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { 76 if (FWINV(ret != 0, IPT_INV_VIA_OUT))
112 dprintf("VIA out mismatch (%s vs %s).%s\n",
113 outdev, ipinfo->outiface,
114 ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
115 return false; 77 return false;
116 }
117 78
118 /* Check specific protocol */ 79 /* Check specific protocol */
119 if (ipinfo->proto && 80 if (ipinfo->proto &&
120 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { 81 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
121 dprintf("Packet protocol %hi does not match %hi.%s\n",
122 ip->protocol, ipinfo->proto,
123 ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
124 return false; 82 return false;
125 }
126 83
127 /* If we have a fragment rule but the packet is not a fragment 84 /* If we have a fragment rule but the packet is not a fragment
128 * then we return zero */ 85 * then we return zero */
129 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { 86 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
130 dprintf("Fragment rule but not fragment.%s\n",
131 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
132 return false; 87 return false;
133 }
134 88
135 return true; 89 return true;
136} 90}
@@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip,
138static bool 92static bool
139ip_checkentry(const struct ipt_ip *ip) 93ip_checkentry(const struct ipt_ip *ip)
140{ 94{
141 if (ip->flags & ~IPT_F_MASK) { 95 if (ip->flags & ~IPT_F_MASK)
142 duprintf("Unknown flag bits set: %08X\n",
143 ip->flags & ~IPT_F_MASK);
144 return false; 96 return false;
145 } 97 if (ip->invflags & ~IPT_INV_MASK)
146 if (ip->invflags & ~IPT_INV_MASK) {
147 duprintf("Unknown invflag bits set: %08X\n",
148 ip->invflags & ~IPT_INV_MASK);
149 return false; 98 return false;
150 }
151 return true; 99 return true;
152} 100}
153 101
@@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb,
346 294
347 e = get_entry(table_base, private->hook_entry[hook]); 295 e = get_entry(table_base, private->hook_entry[hook]);
348 296
349 pr_debug("Entering %s(hook %u), UF %p\n",
350 table->name, hook,
351 get_entry(table_base, private->underflow[hook]));
352
353 do { 297 do {
354 const struct xt_entry_target *t; 298 const struct xt_entry_target *t;
355 const struct xt_entry_match *ematch; 299 const struct xt_entry_match *ematch;
@@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb,
396 if (stackidx == 0) { 340 if (stackidx == 0) {
397 e = get_entry(table_base, 341 e = get_entry(table_base,
398 private->underflow[hook]); 342 private->underflow[hook]);
399 pr_debug("Underflow (this is normal) "
400 "to %p\n", e);
401 } else { 343 } else {
402 e = jumpstack[--stackidx]; 344 e = jumpstack[--stackidx];
403 pr_debug("Pulled %p out from pos %u\n",
404 e, stackidx);
405 e = ipt_next_entry(e); 345 e = ipt_next_entry(e);
406 } 346 }
407 continue; 347 continue;
408 } 348 }
409 if (table_base + v != ipt_next_entry(e) && 349 if (table_base + v != ipt_next_entry(e) &&
410 !(e->ip.flags & IPT_F_GOTO)) { 350 !(e->ip.flags & IPT_F_GOTO))
411 jumpstack[stackidx++] = e; 351 jumpstack[stackidx++] = e;
412 pr_debug("Pushed %p into pos %u\n",
413 e, stackidx - 1);
414 }
415 352
416 e = get_entry(table_base, v); 353 e = get_entry(table_base, v);
417 continue; 354 continue;
@@ -429,18 +366,13 @@ ipt_do_table(struct sk_buff *skb,
429 /* Verdict */ 366 /* Verdict */
430 break; 367 break;
431 } while (!acpar.hotdrop); 368 } while (!acpar.hotdrop);
432 pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
433 369
434 xt_write_recseq_end(addend); 370 xt_write_recseq_end(addend);
435 local_bh_enable(); 371 local_bh_enable();
436 372
437#ifdef DEBUG_ALLOW_ALL
438 return NF_ACCEPT;
439#else
440 if (acpar.hotdrop) 373 if (acpar.hotdrop)
441 return NF_DROP; 374 return NF_DROP;
442 else return verdict; 375 else return verdict;
443#endif
444} 376}
445 377
446static bool find_jump_target(const struct xt_table_info *t, 378static bool find_jump_target(const struct xt_table_info *t,
@@ -480,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
480 = (void *)ipt_get_target_c(e); 412 = (void *)ipt_get_target_c(e);
481 int visited = e->comefrom & (1 << hook); 413 int visited = e->comefrom & (1 << hook);
482 414
483 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 415 if (e->comefrom & (1 << NF_INET_NUMHOOKS))
484 pr_err("iptables: loop hook %u pos %u %08X.\n",
485 hook, pos, e->comefrom);
486 return 0; 416 return 0;
487 } 417
488 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 418 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
489 419
490 /* Unconditional return/END. */ 420 /* Unconditional return/END. */
@@ -496,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
496 426
497 if ((strcmp(t->target.u.user.name, 427 if ((strcmp(t->target.u.user.name,
498 XT_STANDARD_TARGET) == 0) && 428 XT_STANDARD_TARGET) == 0) &&
499 t->verdict < -NF_MAX_VERDICT - 1) { 429 t->verdict < -NF_MAX_VERDICT - 1)
500 duprintf("mark_source_chains: bad "
501 "negative verdict (%i)\n",
502 t->verdict);
503 return 0; 430 return 0;
504 }
505 431
506 /* Return: backtrack through the last 432 /* Return: backtrack through the last
507 big jump. */ 433 big jump. */
508 do { 434 do {
509 e->comefrom ^= (1<<NF_INET_NUMHOOKS); 435 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
510#ifdef DEBUG_IP_FIREWALL_USER
511 if (e->comefrom
512 & (1 << NF_INET_NUMHOOKS)) {
513 duprintf("Back unset "
514 "on hook %u "
515 "rule %u\n",
516 hook, pos);
517 }
518#endif
519 oldpos = pos; 436 oldpos = pos;
520 pos = e->counters.pcnt; 437 pos = e->counters.pcnt;
521 e->counters.pcnt = 0; 438 e->counters.pcnt = 0;
@@ -543,8 +460,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
543 XT_STANDARD_TARGET) == 0 && 460 XT_STANDARD_TARGET) == 0 &&
544 newpos >= 0) { 461 newpos >= 0) {
545 /* This a jump; chase it. */ 462 /* This a jump; chase it. */
546 duprintf("Jump rule %u -> %u\n",
547 pos, newpos);
548 e = (struct ipt_entry *) 463 e = (struct ipt_entry *)
549 (entry0 + newpos); 464 (entry0 + newpos);
550 if (!find_jump_target(newinfo, e)) 465 if (!find_jump_target(newinfo, e))
@@ -561,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
561 pos = newpos; 476 pos = newpos;
562 } 477 }
563 } 478 }
564next: 479next: ;
565 duprintf("Finished chain %u\n", hook);
566 } 480 }
567 return 1; 481 return 1;
568} 482}
@@ -584,18 +498,12 @@ static int
584check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) 498check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
585{ 499{
586 const struct ipt_ip *ip = par->entryinfo; 500 const struct ipt_ip *ip = par->entryinfo;
587 int ret;
588 501
589 par->match = m->u.kernel.match; 502 par->match = m->u.kernel.match;
590 par->matchinfo = m->data; 503 par->matchinfo = m->data;
591 504
592 ret = xt_check_match(par, m->u.match_size - sizeof(*m), 505 return xt_check_match(par, m->u.match_size - sizeof(*m),
593 ip->proto, ip->invflags & IPT_INV_PROTO); 506 ip->proto, ip->invflags & IPT_INV_PROTO);
594 if (ret < 0) {
595 duprintf("check failed for `%s'.\n", par->match->name);
596 return ret;
597 }
598 return 0;
599} 507}
600 508
601static int 509static int
@@ -606,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
606 514
607 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, 515 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
608 m->u.user.revision); 516 m->u.user.revision);
609 if (IS_ERR(match)) { 517 if (IS_ERR(match))
610 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
611 return PTR_ERR(match); 518 return PTR_ERR(match);
612 }
613 m->u.kernel.match = match; 519 m->u.kernel.match = match;
614 520
615 ret = check_match(m, par); 521 ret = check_match(m, par);
@@ -634,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
634 .hook_mask = e->comefrom, 540 .hook_mask = e->comefrom,
635 .family = NFPROTO_IPV4, 541 .family = NFPROTO_IPV4,
636 }; 542 };
637 int ret;
638 543
639 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 544 return xt_check_target(&par, t->u.target_size - sizeof(*t),
640 e->ip.proto, e->ip.invflags & IPT_INV_PROTO); 545 e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
641 if (ret < 0) {
642 duprintf("check failed for `%s'.\n",
643 t->u.kernel.target->name);
644 return ret;
645 }
646 return 0;
647} 546}
648 547
649static int 548static int
@@ -656,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
656 unsigned int j; 555 unsigned int j;
657 struct xt_mtchk_param mtpar; 556 struct xt_mtchk_param mtpar;
658 struct xt_entry_match *ematch; 557 struct xt_entry_match *ematch;
558 unsigned long pcnt;
659 559
660 e->counters.pcnt = xt_percpu_counter_alloc(); 560 pcnt = xt_percpu_counter_alloc();
661 if (IS_ERR_VALUE(e->counters.pcnt)) 561 if (IS_ERR_VALUE(pcnt))
662 return -ENOMEM; 562 return -ENOMEM;
563 e->counters.pcnt = pcnt;
663 564
664 j = 0; 565 j = 0;
665 mtpar.net = net; 566 mtpar.net = net;
@@ -678,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
678 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, 579 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
679 t->u.user.revision); 580 t->u.user.revision);
680 if (IS_ERR(target)) { 581 if (IS_ERR(target)) {
681 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
682 ret = PTR_ERR(target); 582 ret = PTR_ERR(target);
683 goto cleanup_matches; 583 goto cleanup_matches;
684 } 584 }
@@ -732,17 +632,12 @@ check_entry_size_and_hooks(struct ipt_entry *e,
732 632
733 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || 633 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
734 (unsigned char *)e + sizeof(struct ipt_entry) >= limit || 634 (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
735 (unsigned char *)e + e->next_offset > limit) { 635 (unsigned char *)e + e->next_offset > limit)
736 duprintf("Bad offset %p\n", e);
737 return -EINVAL; 636 return -EINVAL;
738 }
739 637
740 if (e->next_offset 638 if (e->next_offset
741 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { 639 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
742 duprintf("checking: element %p size %u\n",
743 e, e->next_offset);
744 return -EINVAL; 640 return -EINVAL;
745 }
746 641
747 if (!ip_checkentry(&e->ip)) 642 if (!ip_checkentry(&e->ip))
748 return -EINVAL; 643 return -EINVAL;
@@ -759,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
759 if ((unsigned char *)e - base == hook_entries[h]) 654 if ((unsigned char *)e - base == hook_entries[h])
760 newinfo->hook_entry[h] = hook_entries[h]; 655 newinfo->hook_entry[h] = hook_entries[h];
761 if ((unsigned char *)e - base == underflows[h]) { 656 if ((unsigned char *)e - base == underflows[h]) {
762 if (!check_underflow(e)) { 657 if (!check_underflow(e))
763 pr_debug("Underflows must be unconditional and "
764 "use the STANDARD target with "
765 "ACCEPT/DROP\n");
766 return -EINVAL; 658 return -EINVAL;
767 } 659
768 newinfo->underflow[h] = underflows[h]; 660 newinfo->underflow[h] = underflows[h];
769 } 661 }
770 } 662 }
@@ -816,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
816 newinfo->underflow[i] = 0xFFFFFFFF; 708 newinfo->underflow[i] = 0xFFFFFFFF;
817 } 709 }
818 710
819 duprintf("translate_table: size %u\n", newinfo->size);
820 i = 0; 711 i = 0;
821 /* Walk through entries, checking offsets. */ 712 /* Walk through entries, checking offsets. */
822 xt_entry_foreach(iter, entry0, newinfo->size) { 713 xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -833,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
833 ++newinfo->stacksize; 724 ++newinfo->stacksize;
834 } 725 }
835 726
836 if (i != repl->num_entries) { 727 if (i != repl->num_entries)
837 duprintf("translate_table: %u not %u entries\n",
838 i, repl->num_entries);
839 return -EINVAL; 728 return -EINVAL;
840 }
841 729
842 /* Check hooks all assigned */ 730 /* Check hooks all assigned */
843 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 731 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
844 /* Only hooks which are valid */ 732 /* Only hooks which are valid */
845 if (!(repl->valid_hooks & (1 << i))) 733 if (!(repl->valid_hooks & (1 << i)))
846 continue; 734 continue;
847 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 735 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
848 duprintf("Invalid hook entry %u %u\n",
849 i, repl->hook_entry[i]);
850 return -EINVAL; 736 return -EINVAL;
851 } 737 if (newinfo->underflow[i] == 0xFFFFFFFF)
852 if (newinfo->underflow[i] == 0xFFFFFFFF) {
853 duprintf("Invalid underflow %u %u\n",
854 i, repl->underflow[i]);
855 return -EINVAL; 738 return -EINVAL;
856 }
857 } 739 }
858 740
859 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 741 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1081,11 +963,8 @@ static int get_info(struct net *net, void __user *user,
1081 struct xt_table *t; 963 struct xt_table *t;
1082 int ret; 964 int ret;
1083 965
1084 if (*len != sizeof(struct ipt_getinfo)) { 966 if (*len != sizeof(struct ipt_getinfo))
1085 duprintf("length %u != %zu\n", *len,
1086 sizeof(struct ipt_getinfo));
1087 return -EINVAL; 967 return -EINVAL;
1088 }
1089 968
1090 if (copy_from_user(name, user, sizeof(name)) != 0) 969 if (copy_from_user(name, user, sizeof(name)) != 0)
1091 return -EFAULT; 970 return -EFAULT;
@@ -1143,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1143 struct ipt_get_entries get; 1022 struct ipt_get_entries get;
1144 struct xt_table *t; 1023 struct xt_table *t;
1145 1024
1146 if (*len < sizeof(get)) { 1025 if (*len < sizeof(get))
1147 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1148 return -EINVAL; 1026 return -EINVAL;
1149 }
1150 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1027 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1151 return -EFAULT; 1028 return -EFAULT;
1152 if (*len != sizeof(struct ipt_get_entries) + get.size) { 1029 if (*len != sizeof(struct ipt_get_entries) + get.size)
1153 duprintf("get_entries: %u != %zu\n",
1154 *len, sizeof(get) + get.size);
1155 return -EINVAL; 1030 return -EINVAL;
1156 }
1157 get.name[sizeof(get.name) - 1] = '\0'; 1031 get.name[sizeof(get.name) - 1] = '\0';
1158 1032
1159 t = xt_find_table_lock(net, AF_INET, get.name); 1033 t = xt_find_table_lock(net, AF_INET, get.name);
1160 if (!IS_ERR_OR_NULL(t)) { 1034 if (!IS_ERR_OR_NULL(t)) {
1161 const struct xt_table_info *private = t->private; 1035 const struct xt_table_info *private = t->private;
1162 duprintf("t->private->number = %u\n", private->number);
1163 if (get.size == private->size) 1036 if (get.size == private->size)
1164 ret = copy_entries_to_user(private->size, 1037 ret = copy_entries_to_user(private->size,
1165 t, uptr->entrytable); 1038 t, uptr->entrytable);
1166 else { 1039 else
1167 duprintf("get_entries: I've got %u not %u!\n",
1168 private->size, get.size);
1169 ret = -EAGAIN; 1040 ret = -EAGAIN;
1170 } 1041
1171 module_put(t->me); 1042 module_put(t->me);
1172 xt_table_unlock(t); 1043 xt_table_unlock(t);
1173 } else 1044 } else
@@ -1203,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1203 1074
1204 /* You lied! */ 1075 /* You lied! */
1205 if (valid_hooks != t->valid_hooks) { 1076 if (valid_hooks != t->valid_hooks) {
1206 duprintf("Valid hook crap: %08X vs %08X\n",
1207 valid_hooks, t->valid_hooks);
1208 ret = -EINVAL; 1077 ret = -EINVAL;
1209 goto put_module; 1078 goto put_module;
1210 } 1079 }
@@ -1214,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1214 goto put_module; 1083 goto put_module;
1215 1084
1216 /* Update module usage count based on number of rules */ 1085 /* Update module usage count based on number of rules */
1217 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1218 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1219 if ((oldinfo->number > oldinfo->initial_entries) || 1086 if ((oldinfo->number > oldinfo->initial_entries) ||
1220 (newinfo->number <= oldinfo->initial_entries)) 1087 (newinfo->number <= oldinfo->initial_entries))
1221 module_put(t->me); 1088 module_put(t->me);
@@ -1284,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1284 if (ret != 0) 1151 if (ret != 0)
1285 goto free_newinfo; 1152 goto free_newinfo;
1286 1153
1287 duprintf("Translated table\n");
1288
1289 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1154 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1290 tmp.num_counters, tmp.counters); 1155 tmp.num_counters, tmp.counters);
1291 if (ret) 1156 if (ret)
@@ -1411,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m,
1411 1276
1412 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, 1277 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1413 m->u.user.revision); 1278 m->u.user.revision);
1414 if (IS_ERR(match)) { 1279 if (IS_ERR(match))
1415 duprintf("compat_check_calc_match: `%s' not found\n",
1416 m->u.user.name);
1417 return PTR_ERR(match); 1280 return PTR_ERR(match);
1418 } 1281
1419 m->u.kernel.match = match; 1282 m->u.kernel.match = match;
1420 *size += xt_compat_match_offset(match); 1283 *size += xt_compat_match_offset(match);
1421 return 0; 1284 return 0;
@@ -1447,20 +1310,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1447 unsigned int j; 1310 unsigned int j;
1448 int ret, off; 1311 int ret, off;
1449 1312
1450 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1451 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || 1313 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1452 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || 1314 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1453 (unsigned char *)e + e->next_offset > limit) { 1315 (unsigned char *)e + e->next_offset > limit)
1454 duprintf("Bad offset %p, limit = %p\n", e, limit);
1455 return -EINVAL; 1316 return -EINVAL;
1456 }
1457 1317
1458 if (e->next_offset < sizeof(struct compat_ipt_entry) + 1318 if (e->next_offset < sizeof(struct compat_ipt_entry) +
1459 sizeof(struct compat_xt_entry_target)) { 1319 sizeof(struct compat_xt_entry_target))
1460 duprintf("checking: element %p size %u\n",
1461 e, e->next_offset);
1462 return -EINVAL; 1320 return -EINVAL;
1463 }
1464 1321
1465 if (!ip_checkentry(&e->ip)) 1322 if (!ip_checkentry(&e->ip))
1466 return -EINVAL; 1323 return -EINVAL;
@@ -1484,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1484 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, 1341 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1485 t->u.user.revision); 1342 t->u.user.revision);
1486 if (IS_ERR(target)) { 1343 if (IS_ERR(target)) {
1487 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1488 t->u.user.name);
1489 ret = PTR_ERR(target); 1344 ret = PTR_ERR(target);
1490 goto release_matches; 1345 goto release_matches;
1491 } 1346 }
@@ -1567,7 +1422,6 @@ translate_compat_table(struct net *net,
1567 size = compatr->size; 1422 size = compatr->size;
1568 info->number = compatr->num_entries; 1423 info->number = compatr->num_entries;
1569 1424
1570 duprintf("translate_compat_table: size %u\n", info->size);
1571 j = 0; 1425 j = 0;
1572 xt_compat_lock(AF_INET); 1426 xt_compat_lock(AF_INET);
1573 xt_compat_init_offsets(AF_INET, compatr->num_entries); 1427 xt_compat_init_offsets(AF_INET, compatr->num_entries);
@@ -1582,11 +1436,8 @@ translate_compat_table(struct net *net,
1582 } 1436 }
1583 1437
1584 ret = -EINVAL; 1438 ret = -EINVAL;
1585 if (j != compatr->num_entries) { 1439 if (j != compatr->num_entries)
1586 duprintf("translate_compat_table: %u not %u entries\n",
1587 j, compatr->num_entries);
1588 goto out_unlock; 1440 goto out_unlock;
1589 }
1590 1441
1591 ret = -ENOMEM; 1442 ret = -ENOMEM;
1592 newinfo = xt_alloc_table_info(size); 1443 newinfo = xt_alloc_table_info(size);
@@ -1683,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1683 if (ret != 0) 1534 if (ret != 0)
1684 goto free_newinfo; 1535 goto free_newinfo;
1685 1536
1686 duprintf("compat_do_replace: Translated table\n");
1687
1688 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1537 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1689 tmp.num_counters, compat_ptr(tmp.counters)); 1538 tmp.num_counters, compat_ptr(tmp.counters));
1690 if (ret) 1539 if (ret)
@@ -1718,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1718 break; 1567 break;
1719 1568
1720 default: 1569 default:
1721 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1722 ret = -EINVAL; 1570 ret = -EINVAL;
1723 } 1571 }
1724 1572
@@ -1768,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1768 struct compat_ipt_get_entries get; 1616 struct compat_ipt_get_entries get;
1769 struct xt_table *t; 1617 struct xt_table *t;
1770 1618
1771 if (*len < sizeof(get)) { 1619 if (*len < sizeof(get))
1772 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1773 return -EINVAL; 1620 return -EINVAL;
1774 }
1775 1621
1776 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1622 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1777 return -EFAULT; 1623 return -EFAULT;
1778 1624
1779 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { 1625 if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
1780 duprintf("compat_get_entries: %u != %zu\n",
1781 *len, sizeof(get) + get.size);
1782 return -EINVAL; 1626 return -EINVAL;
1783 } 1627
1784 get.name[sizeof(get.name) - 1] = '\0'; 1628 get.name[sizeof(get.name) - 1] = '\0';
1785 1629
1786 xt_compat_lock(AF_INET); 1630 xt_compat_lock(AF_INET);
@@ -1788,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1788 if (!IS_ERR_OR_NULL(t)) { 1632 if (!IS_ERR_OR_NULL(t)) {
1789 const struct xt_table_info *private = t->private; 1633 const struct xt_table_info *private = t->private;
1790 struct xt_table_info info; 1634 struct xt_table_info info;
1791 duprintf("t->private->number = %u\n", private->number);
1792 ret = compat_table_info(private, &info); 1635 ret = compat_table_info(private, &info);
1793 if (!ret && get.size == info.size) { 1636 if (!ret && get.size == info.size)
1794 ret = compat_copy_entries_to_user(private->size, 1637 ret = compat_copy_entries_to_user(private->size,
1795 t, uptr->entrytable); 1638 t, uptr->entrytable);
1796 } else if (!ret) { 1639 else if (!ret)
1797 duprintf("compat_get_entries: I've got %u not %u!\n",
1798 private->size, get.size);
1799 ret = -EAGAIN; 1640 ret = -EAGAIN;
1800 } 1641
1801 xt_compat_flush_offsets(AF_INET); 1642 xt_compat_flush_offsets(AF_INET);
1802 module_put(t->me); 1643 module_put(t->me);
1803 xt_table_unlock(t); 1644 xt_table_unlock(t);
@@ -1850,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1850 break; 1691 break;
1851 1692
1852 default: 1693 default:
1853 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1854 ret = -EINVAL; 1694 ret = -EINVAL;
1855 } 1695 }
1856 1696
@@ -1902,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1902 } 1742 }
1903 1743
1904 default: 1744 default:
1905 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1906 ret = -EINVAL; 1745 ret = -EINVAL;
1907 } 1746 }
1908 1747
@@ -2004,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
2004 /* We've been asked to examine this packet, and we 1843 /* We've been asked to examine this packet, and we
2005 * can't. Hence, no choice but to drop. 1844 * can't. Hence, no choice but to drop.
2006 */ 1845 */
2007 duprintf("Dropping evil ICMP tinygram.\n");
2008 par->hotdrop = true; 1846 par->hotdrop = true;
2009 return false; 1847 return false;
2010 } 1848 }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e3c46e8e2762..ae1a71a97132 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net)
360 360
361 in->ctl_table[0].data = &nf_conntrack_max; 361 in->ctl_table[0].data = &nf_conntrack_max;
362 in->ctl_table[1].data = &net->ct.count; 362 in->ctl_table[1].data = &net->ct.count;
363 in->ctl_table[2].data = &net->ct.htable_size; 363 in->ctl_table[2].data = &nf_conntrack_htable_size;
364 in->ctl_table[3].data = &net->ct.sysctl_checksum; 364 in->ctl_table[3].data = &net->ct.sysctl_checksum;
365 in->ctl_table[4].data = &net->ct.sysctl_log_invalid; 365 in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
366#endif 366#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f0dfe92a00d6..c6f3c406f707 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -31,15 +31,14 @@ struct ct_iter_state {
31 31
32static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 32static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
33{ 33{
34 struct net *net = seq_file_net(seq);
35 struct ct_iter_state *st = seq->private; 34 struct ct_iter_state *st = seq->private;
36 struct hlist_nulls_node *n; 35 struct hlist_nulls_node *n;
37 36
38 for (st->bucket = 0; 37 for (st->bucket = 0;
39 st->bucket < net->ct.htable_size; 38 st->bucket < nf_conntrack_htable_size;
40 st->bucket++) { 39 st->bucket++) {
41 n = rcu_dereference( 40 n = rcu_dereference(
42 hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 41 hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
43 if (!is_a_nulls(n)) 42 if (!is_a_nulls(n))
44 return n; 43 return n;
45 } 44 }
@@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
49static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 48static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
50 struct hlist_nulls_node *head) 49 struct hlist_nulls_node *head)
51{ 50{
52 struct net *net = seq_file_net(seq);
53 struct ct_iter_state *st = seq->private; 51 struct ct_iter_state *st = seq->private;
54 52
55 head = rcu_dereference(hlist_nulls_next_rcu(head)); 53 head = rcu_dereference(hlist_nulls_next_rcu(head));
56 while (is_a_nulls(head)) { 54 while (is_a_nulls(head)) {
57 if (likely(get_nulls_value(head) == st->bucket)) { 55 if (likely(get_nulls_value(head) == st->bucket)) {
58 if (++st->bucket >= net->ct.htable_size) 56 if (++st->bucket >= nf_conntrack_htable_size)
59 return NULL; 57 return NULL;
60 } 58 }
61 head = rcu_dereference( 59 head = rcu_dereference(
62 hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 60 hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
63 } 61 }
64 return head; 62 return head;
65} 63}
@@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
114} 112}
115#endif 113#endif
116 114
115static bool ct_seq_should_skip(const struct nf_conn *ct,
116 const struct net *net,
117 const struct nf_conntrack_tuple_hash *hash)
118{
119 /* we only want to print DIR_ORIGINAL */
120 if (NF_CT_DIRECTION(hash))
121 return true;
122
123 if (nf_ct_l3num(ct) != AF_INET)
124 return true;
125
126 if (!net_eq(nf_ct_net(ct), net))
127 return true;
128
129 return false;
130}
131
117static int ct_seq_show(struct seq_file *s, void *v) 132static int ct_seq_show(struct seq_file *s, void *v)
118{ 133{
119 struct nf_conntrack_tuple_hash *hash = v; 134 struct nf_conntrack_tuple_hash *hash = v;
@@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v)
123 int ret = 0; 138 int ret = 0;
124 139
125 NF_CT_ASSERT(ct); 140 NF_CT_ASSERT(ct);
126 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 141 if (ct_seq_should_skip(ct, seq_file_net(s), hash))
127 return 0; 142 return 0;
128 143
144 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
145 return 0;
129 146
130 /* we only want to print DIR_ORIGINAL */ 147 /* check if we raced w. object reuse */
131 if (NF_CT_DIRECTION(hash)) 148 if (!nf_ct_is_confirmed(ct) ||
132 goto release; 149 ct_seq_should_skip(ct, seq_file_net(s), hash))
133 if (nf_ct_l3num(ct) != AF_INET)
134 goto release; 150 goto release;
135 151
136 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 152 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
@@ -220,13 +236,12 @@ struct ct_expect_iter_state {
220 236
221static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 237static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
222{ 238{
223 struct net *net = seq_file_net(seq);
224 struct ct_expect_iter_state *st = seq->private; 239 struct ct_expect_iter_state *st = seq->private;
225 struct hlist_node *n; 240 struct hlist_node *n;
226 241
227 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 242 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
228 n = rcu_dereference( 243 n = rcu_dereference(
229 hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 244 hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
230 if (n) 245 if (n)
231 return n; 246 return n;
232 } 247 }
@@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
236static struct hlist_node *ct_expect_get_next(struct seq_file *seq, 251static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
237 struct hlist_node *head) 252 struct hlist_node *head)
238{ 253{
239 struct net *net = seq_file_net(seq);
240 struct ct_expect_iter_state *st = seq->private; 254 struct ct_expect_iter_state *st = seq->private;
241 255
242 head = rcu_dereference(hlist_next_rcu(head)); 256 head = rcu_dereference(hlist_next_rcu(head));
@@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
244 if (++st->bucket >= nf_ct_expect_hsize) 258 if (++st->bucket >= nf_ct_expect_hsize)
245 return NULL; 259 return NULL;
246 head = rcu_dereference( 260 head = rcu_dereference(
247 hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 261 hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
248 } 262 }
249 return head; 263 return head;
250} 264}
@@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
285 299
286 exp = hlist_entry(n, struct nf_conntrack_expect, hnode); 300 exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
287 301
302 if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
303 return 0;
304
288 if (exp->tuple.src.l3num != AF_INET) 305 if (exp->tuple.src.l3num != AF_INET)
289 return 0; 306 return 0;
290 307
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 60398a9370e7..a1f2830d8110 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -915,11 +915,11 @@ static int ip_error(struct sk_buff *skb)
915 if (!IN_DEV_FORWARD(in_dev)) { 915 if (!IN_DEV_FORWARD(in_dev)) {
916 switch (rt->dst.error) { 916 switch (rt->dst.error) {
917 case EHOSTUNREACH: 917 case EHOSTUNREACH:
918 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS); 918 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
919 break; 919 break;
920 920
921 case ENETUNREACH: 921 case ENETUNREACH:
922 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); 922 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
923 break; 923 break;
924 } 924 }
925 goto out; 925 goto out;
@@ -934,7 +934,7 @@ static int ip_error(struct sk_buff *skb)
934 break; 934 break;
935 case ENETUNREACH: 935 case ENETUNREACH:
936 code = ICMP_NET_UNREACH; 936 code = ICMP_NET_UNREACH;
937 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES); 937 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
938 break; 938 break;
939 case EACCES: 939 case EACCES:
940 code = ICMP_PKT_FILTERED; 940 code = ICMP_PKT_FILTERED;
@@ -2146,6 +2146,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2146 unsigned int flags = 0; 2146 unsigned int flags = 0;
2147 struct fib_result res; 2147 struct fib_result res;
2148 struct rtable *rth; 2148 struct rtable *rth;
2149 int master_idx;
2149 int orig_oif; 2150 int orig_oif;
2150 int err = -ENETUNREACH; 2151 int err = -ENETUNREACH;
2151 2152
@@ -2155,6 +2156,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2155 2156
2156 orig_oif = fl4->flowi4_oif; 2157 orig_oif = fl4->flowi4_oif;
2157 2158
2159 master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
2160 if (master_idx)
2161 fl4->flowi4_oif = master_idx;
2158 fl4->flowi4_iif = LOOPBACK_IFINDEX; 2162 fl4->flowi4_iif = LOOPBACK_IFINDEX;
2159 fl4->flowi4_tos = tos & IPTOS_RT_MASK; 2163 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2160 fl4->flowi4_scope = ((tos & RTO_ONLINK) ? 2164 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4c04f09338e3..e3c4043c27de 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -312,11 +312,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
312 312
313 mss = __cookie_v4_check(ip_hdr(skb), th, cookie); 313 mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
314 if (mss == 0) { 314 if (mss == 0) {
315 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 315 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
316 goto out; 316 goto out;
317 } 317 }
318 318
319 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); 319 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
320 320
321 /* check for timestamp cookie support */ 321 /* check for timestamp cookie support */
322 memset(&tcp_opt, 0, sizeof(tcp_opt)); 322 memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d73858991af..5c7ed147449c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -430,14 +430,15 @@ EXPORT_SYMBOL(tcp_init_sock);
430 430
431static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) 431static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
432{ 432{
433 if (sk->sk_tsflags || tsflags) { 433 if (tsflags) {
434 struct skb_shared_info *shinfo = skb_shinfo(skb); 434 struct skb_shared_info *shinfo = skb_shinfo(skb);
435 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 435 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
436 436
437 sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); 437 sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
438 if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) 438 if (tsflags & SOF_TIMESTAMPING_TX_ACK)
439 tcb->txstamp_ack = 1;
440 if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
439 shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; 441 shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
440 tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP);
441 } 442 }
442} 443}
443 444
@@ -908,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
908 int copy, i; 909 int copy, i;
909 bool can_coalesce; 910 bool can_coalesce;
910 911
911 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { 912 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
913 !tcp_skb_can_collapse_to(skb)) {
912new_segment: 914new_segment:
913 if (!sk_stream_memory_free(sk)) 915 if (!sk_stream_memory_free(sk))
914 goto wait_for_sndbuf; 916 goto wait_for_sndbuf;
@@ -1082,6 +1084,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1082 struct sockcm_cookie sockc; 1084 struct sockcm_cookie sockc;
1083 int flags, err, copied = 0; 1085 int flags, err, copied = 0;
1084 int mss_now = 0, size_goal, copied_syn = 0; 1086 int mss_now = 0, size_goal, copied_syn = 0;
1087 bool process_backlog = false;
1085 bool sg; 1088 bool sg;
1086 long timeo; 1089 long timeo;
1087 1090
@@ -1134,11 +1137,12 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1134 /* This should be in poll */ 1137 /* This should be in poll */
1135 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1138 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1136 1139
1137 mss_now = tcp_send_mss(sk, &size_goal, flags);
1138
1139 /* Ok commence sending. */ 1140 /* Ok commence sending. */
1140 copied = 0; 1141 copied = 0;
1141 1142
1143restart:
1144 mss_now = tcp_send_mss(sk, &size_goal, flags);
1145
1142 err = -EPIPE; 1146 err = -EPIPE;
1143 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1147 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1144 goto out_err; 1148 goto out_err;
@@ -1156,7 +1160,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1156 copy = max - skb->len; 1160 copy = max - skb->len;
1157 } 1161 }
1158 1162
1159 if (copy <= 0) { 1163 if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
1160new_segment: 1164new_segment:
1161 /* Allocate new segment. If the interface is SG, 1165 /* Allocate new segment. If the interface is SG,
1162 * allocate skb fitting to single page. 1166 * allocate skb fitting to single page.
@@ -1164,6 +1168,10 @@ new_segment:
1164 if (!sk_stream_memory_free(sk)) 1168 if (!sk_stream_memory_free(sk))
1165 goto wait_for_sndbuf; 1169 goto wait_for_sndbuf;
1166 1170
1171 if (process_backlog && sk_flush_backlog(sk)) {
1172 process_backlog = false;
1173 goto restart;
1174 }
1167 skb = sk_stream_alloc_skb(sk, 1175 skb = sk_stream_alloc_skb(sk,
1168 select_size(sk, sg), 1176 select_size(sk, sg),
1169 sk->sk_allocation, 1177 sk->sk_allocation,
@@ -1171,6 +1179,7 @@ new_segment:
1171 if (!skb) 1179 if (!skb)
1172 goto wait_for_memory; 1180 goto wait_for_memory;
1173 1181
1182 process_backlog = true;
1174 /* 1183 /*
1175 * Check whether we can use HW checksum. 1184 * Check whether we can use HW checksum.
1176 */ 1185 */
@@ -1250,6 +1259,8 @@ new_segment:
1250 copied += copy; 1259 copied += copy;
1251 if (!msg_data_left(msg)) { 1260 if (!msg_data_left(msg)) {
1252 tcp_tx_timestamp(sk, sockc.tsflags, skb); 1261 tcp_tx_timestamp(sk, sockc.tsflags, skb);
1262 if (unlikely(flags & MSG_EOR))
1263 TCP_SKB_CB(skb)->eor = 1;
1253 goto out; 1264 goto out;
1254 } 1265 }
1255 1266
@@ -1443,14 +1454,10 @@ static void tcp_prequeue_process(struct sock *sk)
1443 struct sk_buff *skb; 1454 struct sk_buff *skb;
1444 struct tcp_sock *tp = tcp_sk(sk); 1455 struct tcp_sock *tp = tcp_sk(sk);
1445 1456
1446 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED); 1457 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1447 1458
1448 /* RX process wants to run with disabled BHs, though it is not
1449 * necessary */
1450 local_bh_disable();
1451 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) 1459 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1452 sk_backlog_rcv(sk, skb); 1460 sk_backlog_rcv(sk, skb);
1453 local_bh_enable();
1454 1461
1455 /* Clear memory counter. */ 1462 /* Clear memory counter. */
1456 tp->ucopy.memory = 0; 1463 tp->ucopy.memory = 0;
@@ -1777,7 +1784,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1777 1784
1778 chunk = len - tp->ucopy.len; 1785 chunk = len - tp->ucopy.len;
1779 if (chunk != 0) { 1786 if (chunk != 0) {
1780 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); 1787 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1781 len -= chunk; 1788 len -= chunk;
1782 copied += chunk; 1789 copied += chunk;
1783 } 1790 }
@@ -1789,7 +1796,7 @@ do_prequeue:
1789 1796
1790 chunk = len - tp->ucopy.len; 1797 chunk = len - tp->ucopy.len;
1791 if (chunk != 0) { 1798 if (chunk != 0) {
1792 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1799 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1793 len -= chunk; 1800 len -= chunk;
1794 copied += chunk; 1801 copied += chunk;
1795 } 1802 }
@@ -1875,7 +1882,7 @@ skip_copy:
1875 tcp_prequeue_process(sk); 1882 tcp_prequeue_process(sk);
1876 1883
1877 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { 1884 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1878 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1885 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1879 len -= chunk; 1886 len -= chunk;
1880 copied += chunk; 1887 copied += chunk;
1881 } 1888 }
@@ -2065,13 +2072,13 @@ void tcp_close(struct sock *sk, long timeout)
2065 sk->sk_prot->disconnect(sk, 0); 2072 sk->sk_prot->disconnect(sk, 0);
2066 } else if (data_was_unread) { 2073 } else if (data_was_unread) {
2067 /* Unread data was tossed, zap the connection. */ 2074 /* Unread data was tossed, zap the connection. */
2068 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 2075 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
2069 tcp_set_state(sk, TCP_CLOSE); 2076 tcp_set_state(sk, TCP_CLOSE);
2070 tcp_send_active_reset(sk, sk->sk_allocation); 2077 tcp_send_active_reset(sk, sk->sk_allocation);
2071 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 2078 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
2072 /* Check zero linger _after_ checking for unread data. */ 2079 /* Check zero linger _after_ checking for unread data. */
2073 sk->sk_prot->disconnect(sk, 0); 2080 sk->sk_prot->disconnect(sk, 0);
2074 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 2081 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
2075 } else if (tcp_close_state(sk)) { 2082 } else if (tcp_close_state(sk)) {
2076 /* We FIN if the application ate all the data before 2083 /* We FIN if the application ate all the data before
2077 * zapping the connection. 2084 * zapping the connection.
@@ -2148,7 +2155,7 @@ adjudge_to_death:
2148 if (tp->linger2 < 0) { 2155 if (tp->linger2 < 0) {
2149 tcp_set_state(sk, TCP_CLOSE); 2156 tcp_set_state(sk, TCP_CLOSE);
2150 tcp_send_active_reset(sk, GFP_ATOMIC); 2157 tcp_send_active_reset(sk, GFP_ATOMIC);
2151 NET_INC_STATS_BH(sock_net(sk), 2158 __NET_INC_STATS(sock_net(sk),
2152 LINUX_MIB_TCPABORTONLINGER); 2159 LINUX_MIB_TCPABORTONLINGER);
2153 } else { 2160 } else {
2154 const int tmo = tcp_fin_time(sk); 2161 const int tmo = tcp_fin_time(sk);
@@ -2167,7 +2174,7 @@ adjudge_to_death:
2167 if (tcp_check_oom(sk, 0)) { 2174 if (tcp_check_oom(sk, 0)) {
2168 tcp_set_state(sk, TCP_CLOSE); 2175 tcp_set_state(sk, TCP_CLOSE);
2169 tcp_send_active_reset(sk, GFP_ATOMIC); 2176 tcp_send_active_reset(sk, GFP_ATOMIC);
2170 NET_INC_STATS_BH(sock_net(sk), 2177 __NET_INC_STATS(sock_net(sk),
2171 LINUX_MIB_TCPABORTONMEMORY); 2178 LINUX_MIB_TCPABORTONMEMORY);
2172 } 2179 }
2173 } 2180 }
@@ -3091,7 +3098,7 @@ void tcp_done(struct sock *sk)
3091 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3098 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
3092 3099
3093 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 3100 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3094 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); 3101 TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3095 3102
3096 tcp_set_state(sk, TCP_CLOSE); 3103 tcp_set_state(sk, TCP_CLOSE);
3097 tcp_clear_xmit_timers(sk); 3104 tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index fd1405d37c14..36087bca9f48 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -197,15 +197,15 @@ static void bictcp_state(struct sock *sk, u8 new_state)
197/* Track delayed acknowledgment ratio using sliding window 197/* Track delayed acknowledgment ratio using sliding window
198 * ratio = (15*ratio + sample) / 16 198 * ratio = (15*ratio + sample) / 16
199 */ 199 */
200static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) 200static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
201{ 201{
202 const struct inet_connection_sock *icsk = inet_csk(sk); 202 const struct inet_connection_sock *icsk = inet_csk(sk);
203 203
204 if (icsk->icsk_ca_state == TCP_CA_Open) { 204 if (icsk->icsk_ca_state == TCP_CA_Open) {
205 struct bictcp *ca = inet_csk_ca(sk); 205 struct bictcp *ca = inet_csk_ca(sk);
206 206
207 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; 207 ca->delayed_ack += sample->pkts_acked -
208 ca->delayed_ack += cnt; 208 (ca->delayed_ack >> ACK_RATIO_SHIFT);
209 } 209 }
210} 210}
211 211
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 167b6a3e1b98..03725b294286 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
155 155
156 ca->last_ack = now_us; 156 ca->last_ack = now_us;
157 if (after(now_us, ca->round_start + base_owd)) { 157 if (after(now_us, ca->round_start + base_owd)) {
158 NET_INC_STATS_BH(sock_net(sk), 158 NET_INC_STATS(sock_net(sk),
159 LINUX_MIB_TCPHYSTARTTRAINDETECT); 159 LINUX_MIB_TCPHYSTARTTRAINDETECT);
160 NET_ADD_STATS_BH(sock_net(sk), 160 NET_ADD_STATS(sock_net(sk),
161 LINUX_MIB_TCPHYSTARTTRAINCWND, 161 LINUX_MIB_TCPHYSTARTTRAINCWND,
162 tp->snd_cwnd); 162 tp->snd_cwnd);
163 tp->snd_ssthresh = tp->snd_cwnd; 163 tp->snd_ssthresh = tp->snd_cwnd;
164 return; 164 return;
165 } 165 }
@@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
174 125U); 174 125U);
175 175
176 if (ca->rtt.min > thresh) { 176 if (ca->rtt.min > thresh) {
177 NET_INC_STATS_BH(sock_net(sk), 177 NET_INC_STATS(sock_net(sk),
178 LINUX_MIB_TCPHYSTARTDELAYDETECT); 178 LINUX_MIB_TCPHYSTARTDELAYDETECT);
179 NET_ADD_STATS_BH(sock_net(sk), 179 NET_ADD_STATS(sock_net(sk),
180 LINUX_MIB_TCPHYSTARTDELAYCWND, 180 LINUX_MIB_TCPHYSTARTDELAYCWND,
181 tp->snd_cwnd); 181 tp->snd_cwnd);
182 tp->snd_ssthresh = tp->snd_cwnd; 182 tp->snd_ssthresh = tp->snd_cwnd;
183 } 183 }
184 } 184 }
@@ -294,12 +294,12 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
294 ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); 294 ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
295} 295}
296 296
297static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us) 297static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample)
298{ 298{
299 struct cdg *ca = inet_csk_ca(sk); 299 struct cdg *ca = inet_csk_ca(sk);
300 struct tcp_sock *tp = tcp_sk(sk); 300 struct tcp_sock *tp = tcp_sk(sk);
301 301
302 if (rtt_us <= 0) 302 if (sample->rtt_us <= 0)
303 return; 303 return;
304 304
305 /* A heuristic for filtering delayed ACKs, adapted from: 305 /* A heuristic for filtering delayed ACKs, adapted from:
@@ -307,20 +307,20 @@ static void tcp_cdg_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
307 * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. 307 * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010.
308 */ 308 */
309 if (tp->sacked_out == 0) { 309 if (tp->sacked_out == 0) {
310 if (num_acked == 1 && ca->delack) { 310 if (sample->pkts_acked == 1 && ca->delack) {
311 /* A delayed ACK is only used for the minimum if it is 311 /* A delayed ACK is only used for the minimum if it is
312 * provenly lower than an existing non-zero minimum. 312 * provenly lower than an existing non-zero minimum.
313 */ 313 */
314 ca->rtt.min = min(ca->rtt.min, rtt_us); 314 ca->rtt.min = min(ca->rtt.min, sample->rtt_us);
315 ca->delack--; 315 ca->delack--;
316 return; 316 return;
317 } else if (num_acked > 1 && ca->delack < 5) { 317 } else if (sample->pkts_acked > 1 && ca->delack < 5) {
318 ca->delack++; 318 ca->delack++;
319 } 319 }
320 } 320 }
321 321
322 ca->rtt.min = min_not_zero(ca->rtt.min, rtt_us); 322 ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us);
323 ca->rtt.max = max(ca->rtt.max, rtt_us); 323 ca->rtt.max = max(ca->rtt.max, sample->rtt_us);
324} 324}
325 325
326static u32 tcp_cdg_ssthresh(struct sock *sk) 326static u32 tcp_cdg_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 448c2615fece..c99230efcd52 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay)
402 ca->last_ack = now; 402 ca->last_ack = now;
403 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) { 403 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
404 ca->found |= HYSTART_ACK_TRAIN; 404 ca->found |= HYSTART_ACK_TRAIN;
405 NET_INC_STATS_BH(sock_net(sk), 405 NET_INC_STATS(sock_net(sk),
406 LINUX_MIB_TCPHYSTARTTRAINDETECT); 406 LINUX_MIB_TCPHYSTARTTRAINDETECT);
407 NET_ADD_STATS_BH(sock_net(sk), 407 NET_ADD_STATS(sock_net(sk),
408 LINUX_MIB_TCPHYSTARTTRAINCWND, 408 LINUX_MIB_TCPHYSTARTTRAINCWND,
409 tp->snd_cwnd); 409 tp->snd_cwnd);
410 tp->snd_ssthresh = tp->snd_cwnd; 410 tp->snd_ssthresh = tp->snd_cwnd;
411 } 411 }
412 } 412 }
@@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay)
423 if (ca->curr_rtt > ca->delay_min + 423 if (ca->curr_rtt > ca->delay_min +
424 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { 424 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
425 ca->found |= HYSTART_DELAY; 425 ca->found |= HYSTART_DELAY;
426 NET_INC_STATS_BH(sock_net(sk), 426 NET_INC_STATS(sock_net(sk),
427 LINUX_MIB_TCPHYSTARTDELAYDETECT); 427 LINUX_MIB_TCPHYSTARTDELAYDETECT);
428 NET_ADD_STATS_BH(sock_net(sk), 428 NET_ADD_STATS(sock_net(sk),
429 LINUX_MIB_TCPHYSTARTDELAYCWND, 429 LINUX_MIB_TCPHYSTARTDELAYCWND,
430 tp->snd_cwnd); 430 tp->snd_cwnd);
431 tp->snd_ssthresh = tp->snd_cwnd; 431 tp->snd_ssthresh = tp->snd_cwnd;
432 } 432 }
433 } 433 }
@@ -437,21 +437,21 @@ static void hystart_update(struct sock *sk, u32 delay)
437/* Track delayed acknowledgment ratio using sliding window 437/* Track delayed acknowledgment ratio using sliding window
438 * ratio = (15*ratio + sample) / 16 438 * ratio = (15*ratio + sample) / 16
439 */ 439 */
440static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) 440static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
441{ 441{
442 const struct tcp_sock *tp = tcp_sk(sk); 442 const struct tcp_sock *tp = tcp_sk(sk);
443 struct bictcp *ca = inet_csk_ca(sk); 443 struct bictcp *ca = inet_csk_ca(sk);
444 u32 delay; 444 u32 delay;
445 445
446 /* Some calls are for duplicates without timetamps */ 446 /* Some calls are for duplicates without timetamps */
447 if (rtt_us < 0) 447 if (sample->rtt_us < 0)
448 return; 448 return;
449 449
450 /* Discard delay samples right after fast recovery */ 450 /* Discard delay samples right after fast recovery */
451 if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) 451 if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
452 return; 452 return;
453 453
454 delay = (rtt_us << 3) / USEC_PER_MSEC; 454 delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
455 if (delay == 0) 455 if (delay == 0)
456 delay = 1; 456 delay = 1;
457 457
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index cffd8f9ed1a9..54d9f9b0120f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -255,9 +255,9 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
255 spin_lock(&fastopenq->lock); 255 spin_lock(&fastopenq->lock);
256 req1 = fastopenq->rskq_rst_head; 256 req1 = fastopenq->rskq_rst_head;
257 if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { 257 if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
258 __NET_INC_STATS(sock_net(sk),
259 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
258 spin_unlock(&fastopenq->lock); 260 spin_unlock(&fastopenq->lock);
259 NET_INC_STATS_BH(sock_net(sk),
260 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
261 return false; 261 return false;
262 } 262 }
263 fastopenq->rskq_rst_head = req1->dl_next; 263 fastopenq->rskq_rst_head = req1->dl_next;
@@ -282,7 +282,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
282 struct sock *child; 282 struct sock *child;
283 283
284 if (foc->len == 0) /* Client requests a cookie */ 284 if (foc->len == 0) /* Client requests a cookie */
285 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); 285 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
286 286
287 if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && 287 if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
288 (syn_data || foc->len >= 0) && 288 (syn_data || foc->len >= 0) &&
@@ -311,13 +311,13 @@ fastopen:
311 child = tcp_fastopen_create_child(sk, skb, dst, req); 311 child = tcp_fastopen_create_child(sk, skb, dst, req);
312 if (child) { 312 if (child) {
313 foc->len = -1; 313 foc->len = -1;
314 NET_INC_STATS_BH(sock_net(sk), 314 NET_INC_STATS(sock_net(sk),
315 LINUX_MIB_TCPFASTOPENPASSIVE); 315 LINUX_MIB_TCPFASTOPENPASSIVE);
316 return child; 316 return child;
317 } 317 }
318 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 318 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
319 } else if (foc->len > 0) /* Client presents an invalid cookie */ 319 } else if (foc->len > 0) /* Client presents an invalid cookie */
320 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 320 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
321 321
322 valid_foc.exp = foc->exp; 322 valid_foc.exp = foc->exp;
323 *foc = valid_foc; 323 *foc = valid_foc;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 82f0d9ed60f5..4a4d8e76738f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -99,7 +99,7 @@ static inline void measure_rtt(struct sock *sk, u32 srtt)
99} 99}
100 100
101static void measure_achieved_throughput(struct sock *sk, 101static void measure_achieved_throughput(struct sock *sk,
102 u32 pkts_acked, s32 rtt) 102 const struct ack_sample *sample)
103{ 103{
104 const struct inet_connection_sock *icsk = inet_csk(sk); 104 const struct inet_connection_sock *icsk = inet_csk(sk);
105 const struct tcp_sock *tp = tcp_sk(sk); 105 const struct tcp_sock *tp = tcp_sk(sk);
@@ -107,10 +107,10 @@ static void measure_achieved_throughput(struct sock *sk,
107 u32 now = tcp_time_stamp; 107 u32 now = tcp_time_stamp;
108 108
109 if (icsk->icsk_ca_state == TCP_CA_Open) 109 if (icsk->icsk_ca_state == TCP_CA_Open)
110 ca->pkts_acked = pkts_acked; 110 ca->pkts_acked = sample->pkts_acked;
111 111
112 if (rtt > 0) 112 if (sample->rtt_us > 0)
113 measure_rtt(sk, usecs_to_jiffies(rtt)); 113 measure_rtt(sk, usecs_to_jiffies(sample->rtt_us));
114 114
115 if (!use_bandwidth_switch) 115 if (!use_bandwidth_switch)
116 return; 116 return;
@@ -122,7 +122,7 @@ static void measure_achieved_throughput(struct sock *sk,
122 return; 122 return;
123 } 123 }
124 124
125 ca->packetcount += pkts_acked; 125 ca->packetcount += sample->pkts_acked;
126 126
127 if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && 127 if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
128 now - ca->lasttime >= ca->minRTT && 128 now - ca->lasttime >= ca->minRTT &&
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 2ab9bbb6faff..c8e6d86be114 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -82,30 +82,31 @@ static void tcp_illinois_init(struct sock *sk)
82} 82}
83 83
84/* Measure RTT for each ack. */ 84/* Measure RTT for each ack. */
85static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt) 85static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample)
86{ 86{
87 struct illinois *ca = inet_csk_ca(sk); 87 struct illinois *ca = inet_csk_ca(sk);
88 s32 rtt_us = sample->rtt_us;
88 89
89 ca->acked = pkts_acked; 90 ca->acked = sample->pkts_acked;
90 91
91 /* dup ack, no rtt sample */ 92 /* dup ack, no rtt sample */
92 if (rtt < 0) 93 if (rtt_us < 0)
93 return; 94 return;
94 95
95 /* ignore bogus values, this prevents wraparound in alpha math */ 96 /* ignore bogus values, this prevents wraparound in alpha math */
96 if (rtt > RTT_MAX) 97 if (rtt_us > RTT_MAX)
97 rtt = RTT_MAX; 98 rtt_us = RTT_MAX;
98 99
99 /* keep track of minimum RTT seen so far */ 100 /* keep track of minimum RTT seen so far */
100 if (ca->base_rtt > rtt) 101 if (ca->base_rtt > rtt_us)
101 ca->base_rtt = rtt; 102 ca->base_rtt = rtt_us;
102 103
103 /* and max */ 104 /* and max */
104 if (ca->max_rtt < rtt) 105 if (ca->max_rtt < rtt_us)
105 ca->max_rtt = rtt; 106 ca->max_rtt = rtt_us;
106 107
107 ++ca->cnt_rtt; 108 ++ca->cnt_rtt;
108 ca->sum_rtt += rtt; 109 ca->sum_rtt += rtt_us;
109} 110}
110 111
111/* Maximum queuing delay */ 112/* Maximum queuing delay */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520dbe0bf..d6c8f4cd0800 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -869,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
869 else 869 else
870 mib_idx = LINUX_MIB_TCPSACKREORDER; 870 mib_idx = LINUX_MIB_TCPSACKREORDER;
871 871
872 NET_INC_STATS_BH(sock_net(sk), mib_idx); 872 NET_INC_STATS(sock_net(sk), mib_idx);
873#if FASTRETRANS_DEBUG > 1 873#if FASTRETRANS_DEBUG > 1
874 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", 874 pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
875 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 875 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1062,7 +1062,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1062 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { 1062 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1063 dup_sack = true; 1063 dup_sack = true;
1064 tcp_dsack_seen(tp); 1064 tcp_dsack_seen(tp);
1065 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV); 1065 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1066 } else if (num_sacks > 1) { 1066 } else if (num_sacks > 1) {
1067 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); 1067 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1068 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); 1068 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1071,7 +1071,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1071 !before(start_seq_0, start_seq_1)) { 1071 !before(start_seq_0, start_seq_1)) {
1072 dup_sack = true; 1072 dup_sack = true;
1073 tcp_dsack_seen(tp); 1073 tcp_dsack_seen(tp);
1074 NET_INC_STATS_BH(sock_net(sk), 1074 NET_INC_STATS(sock_net(sk),
1075 LINUX_MIB_TCPDSACKOFORECV); 1075 LINUX_MIB_TCPDSACKOFORECV);
1076 } 1076 }
1077 } 1077 }
@@ -1289,7 +1289,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1289 1289
1290 if (skb->len > 0) { 1290 if (skb->len > 0) {
1291 BUG_ON(!tcp_skb_pcount(skb)); 1291 BUG_ON(!tcp_skb_pcount(skb));
1292 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); 1292 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
1293 return false; 1293 return false;
1294 } 1294 }
1295 1295
@@ -1303,6 +1303,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1303 } 1303 }
1304 1304
1305 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; 1305 TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1306 TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
1306 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1307 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1307 TCP_SKB_CB(prev)->end_seq++; 1308 TCP_SKB_CB(prev)->end_seq++;
1308 1309
@@ -1313,7 +1314,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1313 tcp_unlink_write_queue(skb, sk); 1314 tcp_unlink_write_queue(skb, sk);
1314 sk_wmem_free_skb(sk, skb); 1315 sk_wmem_free_skb(sk, skb);
1315 1316
1316 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); 1317 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
1317 1318
1318 return true; 1319 return true;
1319} 1320}
@@ -1368,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
1368 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) 1369 if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
1369 goto fallback; 1370 goto fallback;
1370 1371
1372 if (!tcp_skb_can_collapse_to(prev))
1373 goto fallback;
1374
1371 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && 1375 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1372 !before(end_seq, TCP_SKB_CB(skb)->end_seq); 1376 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1373 1377
@@ -1469,7 +1473,7 @@ noop:
1469 return skb; 1473 return skb;
1470 1474
1471fallback: 1475fallback:
1472 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); 1476 NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
1473 return NULL; 1477 return NULL;
1474} 1478}
1475 1479
@@ -1657,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1657 mib_idx = LINUX_MIB_TCPSACKDISCARD; 1661 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1658 } 1662 }
1659 1663
1660 NET_INC_STATS_BH(sock_net(sk), mib_idx); 1664 NET_INC_STATS(sock_net(sk), mib_idx);
1661 if (i == 0) 1665 if (i == 0)
1662 first_sack_index = -1; 1666 first_sack_index = -1;
1663 continue; 1667 continue;
@@ -1909,7 +1913,7 @@ void tcp_enter_loss(struct sock *sk)
1909 skb = tcp_write_queue_head(sk); 1913 skb = tcp_write_queue_head(sk);
1910 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); 1914 is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
1911 if (is_reneg) { 1915 if (is_reneg) {
1912 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); 1916 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1913 tp->sacked_out = 0; 1917 tp->sacked_out = 0;
1914 tp->fackets_out = 0; 1918 tp->fackets_out = 0;
1915 } 1919 }
@@ -2395,7 +2399,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2395 else 2399 else
2396 mib_idx = LINUX_MIB_TCPFULLUNDO; 2400 mib_idx = LINUX_MIB_TCPFULLUNDO;
2397 2401
2398 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2402 NET_INC_STATS(sock_net(sk), mib_idx);
2399 } 2403 }
2400 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { 2404 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
2401 /* Hold old state until something *above* high_seq 2405 /* Hold old state until something *above* high_seq
@@ -2417,7 +2421,7 @@ static bool tcp_try_undo_dsack(struct sock *sk)
2417 if (tp->undo_marker && !tp->undo_retrans) { 2421 if (tp->undo_marker && !tp->undo_retrans) {
2418 DBGUNDO(sk, "D-SACK"); 2422 DBGUNDO(sk, "D-SACK");
2419 tcp_undo_cwnd_reduction(sk, false); 2423 tcp_undo_cwnd_reduction(sk, false);
2420 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); 2424 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2421 return true; 2425 return true;
2422 } 2426 }
2423 return false; 2427 return false;
@@ -2432,10 +2436,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2432 tcp_undo_cwnd_reduction(sk, true); 2436 tcp_undo_cwnd_reduction(sk, true);
2433 2437
2434 DBGUNDO(sk, "partial loss"); 2438 DBGUNDO(sk, "partial loss");
2435 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2439 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2436 if (frto_undo) 2440 if (frto_undo)
2437 NET_INC_STATS_BH(sock_net(sk), 2441 NET_INC_STATS(sock_net(sk),
2438 LINUX_MIB_TCPSPURIOUSRTOS); 2442 LINUX_MIB_TCPSPURIOUSRTOS);
2439 inet_csk(sk)->icsk_retransmits = 0; 2443 inet_csk(sk)->icsk_retransmits = 0;
2440 if (frto_undo || tcp_is_sack(tp)) 2444 if (frto_undo || tcp_is_sack(tp))
2441 tcp_set_ca_state(sk, TCP_CA_Open); 2445 tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2559,7 +2563,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
2559 2563
2560 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1; 2564 icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
2561 icsk->icsk_mtup.probe_size = 0; 2565 icsk->icsk_mtup.probe_size = 0;
2562 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL); 2566 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
2563} 2567}
2564 2568
2565static void tcp_mtup_probe_success(struct sock *sk) 2569static void tcp_mtup_probe_success(struct sock *sk)
@@ -2579,7 +2583,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
2579 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; 2583 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2580 icsk->icsk_mtup.probe_size = 0; 2584 icsk->icsk_mtup.probe_size = 0;
2581 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 2585 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2582 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); 2586 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
2583} 2587}
2584 2588
2585/* Do a simple retransmit without using the backoff mechanisms in 2589/* Do a simple retransmit without using the backoff mechanisms in
@@ -2643,7 +2647,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2643 else 2647 else
2644 mib_idx = LINUX_MIB_TCPSACKRECOVERY; 2648 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2645 2649
2646 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2650 NET_INC_STATS(sock_net(sk), mib_idx);
2647 2651
2648 tp->prior_ssthresh = 0; 2652 tp->prior_ssthresh = 0;
2649 tcp_init_undo(tp); 2653 tcp_init_undo(tp);
@@ -2736,7 +2740,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
2736 2740
2737 DBGUNDO(sk, "partial recovery"); 2741 DBGUNDO(sk, "partial recovery");
2738 tcp_undo_cwnd_reduction(sk, true); 2742 tcp_undo_cwnd_reduction(sk, true);
2739 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); 2743 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2740 tcp_try_keep_open(sk); 2744 tcp_try_keep_open(sk);
2741 return true; 2745 return true;
2742 } 2746 }
@@ -3087,8 +3091,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3087 return; 3091 return;
3088 3092
3089 shinfo = skb_shinfo(skb); 3093 shinfo = skb_shinfo(skb);
3090 if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && 3094 if (!before(shinfo->tskey, prior_snd_una) &&
3091 !before(shinfo->tskey, prior_snd_una) &&
3092 before(shinfo->tskey, tcp_sk(sk)->snd_una)) 3095 before(shinfo->tskey, tcp_sk(sk)->snd_una))
3093 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3096 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3094} 3097}
@@ -3245,8 +3248,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3245 tcp_rearm_rto(sk); 3248 tcp_rearm_rto(sk);
3246 } 3249 }
3247 3250
3248 if (icsk->icsk_ca_ops->pkts_acked) 3251 if (icsk->icsk_ca_ops->pkts_acked) {
3249 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us); 3252 struct ack_sample sample = { .pkts_acked = pkts_acked,
3253 .rtt_us = ca_rtt_us };
3254
3255 icsk->icsk_ca_ops->pkts_acked(sk, &sample);
3256 }
3250 3257
3251#if FASTRETRANS_DEBUG > 0 3258#if FASTRETRANS_DEBUG > 0
3252 WARN_ON((int)tp->sacked_out < 0); 3259 WARN_ON((int)tp->sacked_out < 0);
@@ -3352,9 +3359,10 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
3352{ 3359{
3353 u32 delta = ack - tp->snd_una; 3360 u32 delta = ack - tp->snd_una;
3354 3361
3355 u64_stats_update_begin(&tp->syncp); 3362 sock_owned_by_me((struct sock *)tp);
3363 u64_stats_update_begin_raw(&tp->syncp);
3356 tp->bytes_acked += delta; 3364 tp->bytes_acked += delta;
3357 u64_stats_update_end(&tp->syncp); 3365 u64_stats_update_end_raw(&tp->syncp);
3358 tp->snd_una = ack; 3366 tp->snd_una = ack;
3359} 3367}
3360 3368
@@ -3363,9 +3371,10 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
3363{ 3371{
3364 u32 delta = seq - tp->rcv_nxt; 3372 u32 delta = seq - tp->rcv_nxt;
3365 3373
3366 u64_stats_update_begin(&tp->syncp); 3374 sock_owned_by_me((struct sock *)tp);
3375 u64_stats_update_begin_raw(&tp->syncp);
3367 tp->bytes_received += delta; 3376 tp->bytes_received += delta;
3368 u64_stats_update_end(&tp->syncp); 3377 u64_stats_update_end_raw(&tp->syncp);
3369 tp->rcv_nxt = seq; 3378 tp->rcv_nxt = seq;
3370} 3379}
3371 3380
@@ -3431,7 +3440,7 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
3431 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); 3440 s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
3432 3441
3433 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { 3442 if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
3434 NET_INC_STATS_BH(net, mib_idx); 3443 NET_INC_STATS(net, mib_idx);
3435 return true; /* rate-limited: don't send yet! */ 3444 return true; /* rate-limited: don't send yet! */
3436 } 3445 }
3437 } 3446 }
@@ -3464,7 +3473,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
3464 challenge_count = 0; 3473 challenge_count = 0;
3465 } 3474 }
3466 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { 3475 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3467 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); 3476 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3468 tcp_send_ack(sk); 3477 tcp_send_ack(sk);
3469 } 3478 }
3470} 3479}
@@ -3513,8 +3522,8 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3513 tcp_set_ca_state(sk, TCP_CA_CWR); 3522 tcp_set_ca_state(sk, TCP_CA_CWR);
3514 tcp_end_cwnd_reduction(sk); 3523 tcp_end_cwnd_reduction(sk);
3515 tcp_try_keep_open(sk); 3524 tcp_try_keep_open(sk);
3516 NET_INC_STATS_BH(sock_net(sk), 3525 NET_INC_STATS(sock_net(sk),
3517 LINUX_MIB_TCPLOSSPROBERECOVERY); 3526 LINUX_MIB_TCPLOSSPROBERECOVERY);
3518 } else if (!(flag & (FLAG_SND_UNA_ADVANCED | 3527 } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
3519 FLAG_NOT_DUP | FLAG_DATA_SACKED))) { 3528 FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
3520 /* Pure dupack: original and TLP probe arrived; no loss */ 3529 /* Pure dupack: original and TLP probe arrived; no loss */
@@ -3618,14 +3627,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3618 3627
3619 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); 3628 tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
3620 3629
3621 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); 3630 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
3622 } else { 3631 } else {
3623 u32 ack_ev_flags = CA_ACK_SLOWPATH; 3632 u32 ack_ev_flags = CA_ACK_SLOWPATH;
3624 3633
3625 if (ack_seq != TCP_SKB_CB(skb)->end_seq) 3634 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3626 flag |= FLAG_DATA; 3635 flag |= FLAG_DATA;
3627 else 3636 else
3628 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); 3637 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3629 3638
3630 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); 3639 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3631 3640
@@ -4128,7 +4137,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4128 else 4137 else
4129 mib_idx = LINUX_MIB_TCPDSACKOFOSENT; 4138 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
4130 4139
4131 NET_INC_STATS_BH(sock_net(sk), mib_idx); 4140 NET_INC_STATS(sock_net(sk), mib_idx);
4132 4141
4133 tp->rx_opt.dsack = 1; 4142 tp->rx_opt.dsack = 1;
4134 tp->duplicate_sack[0].start_seq = seq; 4143 tp->duplicate_sack[0].start_seq = seq;
@@ -4152,7 +4161,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4152 4161
4153 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 4162 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4154 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4163 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4155 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4164 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4156 tcp_enter_quickack_mode(sk); 4165 tcp_enter_quickack_mode(sk);
4157 4166
4158 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4167 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -4302,7 +4311,7 @@ static bool tcp_try_coalesce(struct sock *sk,
4302 4311
4303 atomic_add(delta, &sk->sk_rmem_alloc); 4312 atomic_add(delta, &sk->sk_rmem_alloc);
4304 sk_mem_charge(sk, delta); 4313 sk_mem_charge(sk, delta);
4305 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); 4314 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4306 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; 4315 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4307 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; 4316 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4308 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; 4317 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
@@ -4390,7 +4399,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4390 tcp_ecn_check_ce(tp, skb); 4399 tcp_ecn_check_ce(tp, skb);
4391 4400
4392 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { 4401 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4393 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); 4402 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
4394 tcp_drop(sk, skb); 4403 tcp_drop(sk, skb);
4395 return; 4404 return;
4396 } 4405 }
@@ -4399,7 +4408,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4399 tp->pred_flags = 0; 4408 tp->pred_flags = 0;
4400 inet_csk_schedule_ack(sk); 4409 inet_csk_schedule_ack(sk);
4401 4410
4402 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); 4411 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
4403 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", 4412 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
4404 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4413 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4405 4414
@@ -4454,7 +4463,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4454 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { 4463 if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
4455 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4464 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4456 /* All the bits are present. Drop. */ 4465 /* All the bits are present. Drop. */
4457 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); 4466 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4458 tcp_drop(sk, skb); 4467 tcp_drop(sk, skb);
4459 skb = NULL; 4468 skb = NULL;
4460 tcp_dsack_set(sk, seq, end_seq); 4469 tcp_dsack_set(sk, seq, end_seq);
@@ -4493,7 +4502,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4493 __skb_unlink(skb1, &tp->out_of_order_queue); 4502 __skb_unlink(skb1, &tp->out_of_order_queue);
4494 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, 4503 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4495 TCP_SKB_CB(skb1)->end_seq); 4504 TCP_SKB_CB(skb1)->end_seq);
4496 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE); 4505 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
4497 tcp_drop(sk, skb1); 4506 tcp_drop(sk, skb1);
4498 } 4507 }
4499 4508
@@ -4608,14 +4617,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4608 4617
4609 __set_current_state(TASK_RUNNING); 4618 __set_current_state(TASK_RUNNING);
4610 4619
4611 local_bh_enable();
4612 if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { 4620 if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
4613 tp->ucopy.len -= chunk; 4621 tp->ucopy.len -= chunk;
4614 tp->copied_seq += chunk; 4622 tp->copied_seq += chunk;
4615 eaten = (chunk == skb->len); 4623 eaten = (chunk == skb->len);
4616 tcp_rcv_space_adjust(sk); 4624 tcp_rcv_space_adjust(sk);
4617 } 4625 }
4618 local_bh_disable();
4619 } 4626 }
4620 4627
4621 if (eaten <= 0) { 4628 if (eaten <= 0) {
@@ -4658,7 +4665,7 @@ queue_and_out:
4658 4665
4659 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4666 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4660 /* A retransmit, 2nd most common case. Force an immediate ack. */ 4667 /* A retransmit, 2nd most common case. Force an immediate ack. */
4661 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4668 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4662 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4669 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
4663 4670
4664out_of_window: 4671out_of_window:
@@ -4704,7 +4711,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
4704 4711
4705 __skb_unlink(skb, list); 4712 __skb_unlink(skb, list);
4706 __kfree_skb(skb); 4713 __kfree_skb(skb);
4707 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); 4714 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4708 4715
4709 return next; 4716 return next;
4710} 4717}
@@ -4863,7 +4870,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4863 bool res = false; 4870 bool res = false;
4864 4871
4865 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4872 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4866 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED); 4873 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
4867 __skb_queue_purge(&tp->out_of_order_queue); 4874 __skb_queue_purge(&tp->out_of_order_queue);
4868 4875
4869 /* Reset SACK state. A conforming SACK implementation will 4876 /* Reset SACK state. A conforming SACK implementation will
@@ -4892,7 +4899,7 @@ static int tcp_prune_queue(struct sock *sk)
4892 4899
4893 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); 4900 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4894 4901
4895 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED); 4902 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
4896 4903
4897 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4904 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4898 tcp_clamp_window(sk); 4905 tcp_clamp_window(sk);
@@ -4922,7 +4929,7 @@ static int tcp_prune_queue(struct sock *sk)
4922 * drop receive data on the floor. It will get retransmitted 4929 * drop receive data on the floor. It will get retransmitted
4923 * and hopefully then we'll have sufficient space. 4930 * and hopefully then we'll have sufficient space.
4924 */ 4931 */
4925 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED); 4932 NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
4926 4933
4927 /* Massive buffer overcommit. */ 4934 /* Massive buffer overcommit. */
4928 tp->pred_flags = 0; 4935 tp->pred_flags = 0;
@@ -5131,7 +5138,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5131 int chunk = skb->len - hlen; 5138 int chunk = skb->len - hlen;
5132 int err; 5139 int err;
5133 5140
5134 local_bh_enable();
5135 if (skb_csum_unnecessary(skb)) 5141 if (skb_csum_unnecessary(skb))
5136 err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); 5142 err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
5137 else 5143 else
@@ -5143,32 +5149,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
5143 tcp_rcv_space_adjust(sk); 5149 tcp_rcv_space_adjust(sk);
5144 } 5150 }
5145 5151
5146 local_bh_disable();
5147 return err; 5152 return err;
5148} 5153}
5149 5154
5150static __sum16 __tcp_checksum_complete_user(struct sock *sk,
5151 struct sk_buff *skb)
5152{
5153 __sum16 result;
5154
5155 if (sock_owned_by_user(sk)) {
5156 local_bh_enable();
5157 result = __tcp_checksum_complete(skb);
5158 local_bh_disable();
5159 } else {
5160 result = __tcp_checksum_complete(skb);
5161 }
5162 return result;
5163}
5164
5165static inline bool tcp_checksum_complete_user(struct sock *sk,
5166 struct sk_buff *skb)
5167{
5168 return !skb_csum_unnecessary(skb) &&
5169 __tcp_checksum_complete_user(sk, skb);
5170}
5171
5172/* Does PAWS and seqno based validation of an incoming segment, flags will 5155/* Does PAWS and seqno based validation of an incoming segment, flags will
5173 * play significant role here. 5156 * play significant role here.
5174 */ 5157 */
@@ -5181,7 +5164,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5181 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 5164 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5182 tcp_paws_discard(sk, skb)) { 5165 tcp_paws_discard(sk, skb)) {
5183 if (!th->rst) { 5166 if (!th->rst) {
5184 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 5167 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5185 if (!tcp_oow_rate_limited(sock_net(sk), skb, 5168 if (!tcp_oow_rate_limited(sock_net(sk), skb,
5186 LINUX_MIB_TCPACKSKIPPEDPAWS, 5169 LINUX_MIB_TCPACKSKIPPEDPAWS,
5187 &tp->last_oow_ack_time)) 5170 &tp->last_oow_ack_time))
@@ -5233,8 +5216,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5233 if (th->syn) { 5216 if (th->syn) {
5234syn_challenge: 5217syn_challenge:
5235 if (syn_inerr) 5218 if (syn_inerr)
5236 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5219 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5237 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); 5220 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
5238 tcp_send_challenge_ack(sk, skb); 5221 tcp_send_challenge_ack(sk, skb);
5239 goto discard; 5222 goto discard;
5240 } 5223 }
@@ -5349,7 +5332,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5349 tcp_data_snd_check(sk); 5332 tcp_data_snd_check(sk);
5350 return; 5333 return;
5351 } else { /* Header too small */ 5334 } else { /* Header too small */
5352 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5335 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5353 goto discard; 5336 goto discard;
5354 } 5337 }
5355 } else { 5338 } else {
@@ -5377,12 +5360,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5377 5360
5378 __skb_pull(skb, tcp_header_len); 5361 __skb_pull(skb, tcp_header_len);
5379 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 5362 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
5380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); 5363 NET_INC_STATS(sock_net(sk),
5364 LINUX_MIB_TCPHPHITSTOUSER);
5381 eaten = 1; 5365 eaten = 1;
5382 } 5366 }
5383 } 5367 }
5384 if (!eaten) { 5368 if (!eaten) {
5385 if (tcp_checksum_complete_user(sk, skb)) 5369 if (tcp_checksum_complete(skb))
5386 goto csum_error; 5370 goto csum_error;
5387 5371
5388 if ((int)skb->truesize > sk->sk_forward_alloc) 5372 if ((int)skb->truesize > sk->sk_forward_alloc)
@@ -5399,7 +5383,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5399 5383
5400 tcp_rcv_rtt_measure_ts(sk, skb); 5384 tcp_rcv_rtt_measure_ts(sk, skb);
5401 5385
5402 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); 5386 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
5403 5387
5404 /* Bulk data transfer: receiver */ 5388 /* Bulk data transfer: receiver */
5405 eaten = tcp_queue_rcv(sk, skb, tcp_header_len, 5389 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
@@ -5426,7 +5410,7 @@ no_ack:
5426 } 5410 }
5427 5411
5428slow_path: 5412slow_path:
5429 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) 5413 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
5430 goto csum_error; 5414 goto csum_error;
5431 5415
5432 if (!th->ack && !th->rst && !th->syn) 5416 if (!th->ack && !th->rst && !th->syn)
@@ -5456,8 +5440,8 @@ step5:
5456 return; 5440 return;
5457 5441
5458csum_error: 5442csum_error:
5459 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 5443 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
5460 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5444 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
5461 5445
5462discard: 5446discard:
5463 tcp_drop(sk, skb); 5447 tcp_drop(sk, skb);
@@ -5549,12 +5533,14 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5549 break; 5533 break;
5550 } 5534 }
5551 tcp_rearm_rto(sk); 5535 tcp_rearm_rto(sk);
5552 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); 5536 NET_INC_STATS(sock_net(sk),
5537 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
5553 return true; 5538 return true;
5554 } 5539 }
5555 tp->syn_data_acked = tp->syn_data; 5540 tp->syn_data_acked = tp->syn_data;
5556 if (tp->syn_data_acked) 5541 if (tp->syn_data_acked)
5557 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 5542 NET_INC_STATS(sock_net(sk),
5543 LINUX_MIB_TCPFASTOPENACTIVE);
5558 5544
5559 tcp_fastopen_add_skb(sk, synack); 5545 tcp_fastopen_add_skb(sk, synack);
5560 5546
@@ -5589,7 +5575,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5589 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 5575 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5590 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, 5576 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5591 tcp_time_stamp)) { 5577 tcp_time_stamp)) {
5592 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); 5578 NET_INC_STATS(sock_net(sk),
5579 LINUX_MIB_PAWSACTIVEREJECTED);
5593 goto reset_and_undo; 5580 goto reset_and_undo;
5594 } 5581 }
5595 5582
@@ -5958,7 +5945,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
5958 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 5945 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5959 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { 5946 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5960 tcp_done(sk); 5947 tcp_done(sk);
5961 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 5948 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5962 return 1; 5949 return 1;
5963 } 5950 }
5964 5951
@@ -6015,7 +6002,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
6015 if (sk->sk_shutdown & RCV_SHUTDOWN) { 6002 if (sk->sk_shutdown & RCV_SHUTDOWN) {
6016 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 6003 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
6017 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { 6004 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
6018 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 6005 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
6019 tcp_reset(sk); 6006 tcp_reset(sk);
6020 return 1; 6007 return 1;
6021 } 6008 }
@@ -6153,10 +6140,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
6153 if (net->ipv4.sysctl_tcp_syncookies) { 6140 if (net->ipv4.sysctl_tcp_syncookies) {
6154 msg = "Sending cookies"; 6141 msg = "Sending cookies";
6155 want_cookie = true; 6142 want_cookie = true;
6156 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 6143 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
6157 } else 6144 } else
6158#endif 6145#endif
6159 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6146 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6160 6147
6161 if (!queue->synflood_warned && 6148 if (!queue->synflood_warned &&
6162 net->ipv4.sysctl_tcp_syncookies != 2 && 6149 net->ipv4.sysctl_tcp_syncookies != 2 &&
@@ -6217,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6217 * timeout. 6204 * timeout.
6218 */ 6205 */
6219 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { 6206 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
6220 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 6207 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
6221 goto drop; 6208 goto drop;
6222 } 6209 }
6223 6210
@@ -6264,7 +6251,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6264 if (dst && strict && 6251 if (dst && strict &&
6265 !tcp_peer_is_proven(req, dst, true, 6252 !tcp_peer_is_proven(req, dst, true,
6266 tmp_opt.saw_tstamp)) { 6253 tmp_opt.saw_tstamp)) {
6267 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 6254 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
6268 goto drop_and_release; 6255 goto drop_and_release;
6269 } 6256 }
6270 } 6257 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d2a5763e5abc..8219d0d8dc83 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -320,7 +320,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
320 * an established socket here. 320 * an established socket here.
321 */ 321 */
322 if (seq != tcp_rsk(req)->snt_isn) { 322 if (seq != tcp_rsk(req)->snt_isn) {
323 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 323 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
324 } else if (abort) { 324 } else if (abort) {
325 /* 325 /*
326 * Still in SYN_RECV, just remove it silently. 326 * Still in SYN_RECV, just remove it silently.
@@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
372 th->dest, iph->saddr, ntohs(th->source), 372 th->dest, iph->saddr, ntohs(th->source),
373 inet_iif(icmp_skb)); 373 inet_iif(icmp_skb));
374 if (!sk) { 374 if (!sk) {
375 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 375 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
376 return; 376 return;
377 } 377 }
378 if (sk->sk_state == TCP_TIME_WAIT) { 378 if (sk->sk_state == TCP_TIME_WAIT) {
@@ -396,13 +396,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
396 */ 396 */
397 if (sock_owned_by_user(sk)) { 397 if (sock_owned_by_user(sk)) {
398 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 398 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
399 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 399 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
400 } 400 }
401 if (sk->sk_state == TCP_CLOSE) 401 if (sk->sk_state == TCP_CLOSE)
402 goto out; 402 goto out;
403 403
404 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 404 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
405 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 405 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
406 goto out; 406 goto out;
407 } 407 }
408 408
@@ -413,7 +413,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
413 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 413 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
414 if (sk->sk_state != TCP_LISTEN && 414 if (sk->sk_state != TCP_LISTEN &&
415 !between(seq, snd_una, tp->snd_nxt)) { 415 !between(seq, snd_una, tp->snd_nxt)) {
416 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 416 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
417 goto out; 417 goto out;
418 } 418 }
419 419
@@ -692,13 +692,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
692 offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 692 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
693 693
694 arg.tos = ip_hdr(skb)->tos; 694 arg.tos = ip_hdr(skb)->tos;
695 local_bh_disable();
695 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 696 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
696 skb, &TCP_SKB_CB(skb)->header.h4.opt, 697 skb, &TCP_SKB_CB(skb)->header.h4.opt,
697 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 698 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
698 &arg, arg.iov[0].iov_len); 699 &arg, arg.iov[0].iov_len);
699 700
700 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 701 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
701 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 702 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
703 local_bh_enable();
702 704
703#ifdef CONFIG_TCP_MD5SIG 705#ifdef CONFIG_TCP_MD5SIG
704out: 706out:
@@ -774,12 +776,14 @@ static void tcp_v4_send_ack(struct net *net,
774 if (oif) 776 if (oif)
775 arg.bound_dev_if = oif; 777 arg.bound_dev_if = oif;
776 arg.tos = tos; 778 arg.tos = tos;
779 local_bh_disable();
777 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 780 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
778 skb, &TCP_SKB_CB(skb)->header.h4.opt, 781 skb, &TCP_SKB_CB(skb)->header.h4.opt,
779 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 782 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
780 &arg, arg.iov[0].iov_len); 783 &arg, arg.iov[0].iov_len);
781 784
782 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 785 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
786 local_bh_enable();
783} 787}
784 788
785static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 789static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1151,12 +1155,12 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1151 return false; 1155 return false;
1152 1156
1153 if (hash_expected && !hash_location) { 1157 if (hash_expected && !hash_location) {
1154 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1158 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1155 return true; 1159 return true;
1156 } 1160 }
1157 1161
1158 if (!hash_expected && hash_location) { 1162 if (!hash_expected && hash_location) {
1159 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1163 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1160 return true; 1164 return true;
1161 } 1165 }
1162 1166
@@ -1342,7 +1346,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1342 return newsk; 1346 return newsk;
1343 1347
1344exit_overflow: 1348exit_overflow:
1345 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1349 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1346exit_nonewsk: 1350exit_nonewsk:
1347 dst_release(dst); 1351 dst_release(dst);
1348exit: 1352exit:
@@ -1432,8 +1436,8 @@ discard:
1432 return 0; 1436 return 0;
1433 1437
1434csum_err: 1438csum_err:
1435 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 1439 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1436 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1440 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1437 goto discard; 1441 goto discard;
1438} 1442}
1439EXPORT_SYMBOL(tcp_v4_do_rcv); 1443EXPORT_SYMBOL(tcp_v4_do_rcv);
@@ -1506,16 +1510,16 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1506 1510
1507 __skb_queue_tail(&tp->ucopy.prequeue, skb); 1511 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1508 tp->ucopy.memory += skb->truesize; 1512 tp->ucopy.memory += skb->truesize;
1509 if (tp->ucopy.memory > sk->sk_rcvbuf) { 1513 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1514 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1510 struct sk_buff *skb1; 1515 struct sk_buff *skb1;
1511 1516
1512 BUG_ON(sock_owned_by_user(sk)); 1517 BUG_ON(sock_owned_by_user(sk));
1518 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1519 skb_queue_len(&tp->ucopy.prequeue));
1513 1520
1514 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1521 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1515 sk_backlog_rcv(sk, skb1); 1522 sk_backlog_rcv(sk, skb1);
1516 NET_INC_STATS_BH(sock_net(sk),
1517 LINUX_MIB_TCPPREQUEUEDROPPED);
1518 }
1519 1523
1520 tp->ucopy.memory = 0; 1524 tp->ucopy.memory = 0;
1521 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 1525 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
@@ -1547,7 +1551,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1547 goto discard_it; 1551 goto discard_it;
1548 1552
1549 /* Count it even if it's bad */ 1553 /* Count it even if it's bad */
1550 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 1554 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1551 1555
1552 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1556 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1553 goto discard_it; 1557 goto discard_it;
@@ -1629,7 +1633,7 @@ process:
1629 } 1633 }
1630 } 1634 }
1631 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1635 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1632 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1636 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1633 goto discard_and_relse; 1637 goto discard_and_relse;
1634 } 1638 }
1635 1639
@@ -1662,7 +1666,7 @@ process:
1662 } else if (unlikely(sk_add_backlog(sk, skb, 1666 } else if (unlikely(sk_add_backlog(sk, skb,
1663 sk->sk_rcvbuf + sk->sk_sndbuf))) { 1667 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1664 bh_unlock_sock(sk); 1668 bh_unlock_sock(sk);
1665 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1669 __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1666 goto discard_and_relse; 1670 goto discard_and_relse;
1667 } 1671 }
1668 bh_unlock_sock(sk); 1672 bh_unlock_sock(sk);
@@ -1679,9 +1683,9 @@ no_tcp_socket:
1679 1683
1680 if (tcp_checksum_complete(skb)) { 1684 if (tcp_checksum_complete(skb)) {
1681csum_error: 1685csum_error:
1682 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 1686 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1683bad_packet: 1687bad_packet:
1684 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1688 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1685 } else { 1689 } else {
1686 tcp_v4_send_reset(NULL, skb); 1690 tcp_v4_send_reset(NULL, skb);
1687 } 1691 }
@@ -1835,7 +1839,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
1835 tcp_free_fastopen_req(tp); 1839 tcp_free_fastopen_req(tp);
1836 tcp_saved_syn_free(tp); 1840 tcp_saved_syn_free(tp);
1837 1841
1842 local_bh_disable();
1838 sk_sockets_allocated_dec(sk); 1843 sk_sockets_allocated_dec(sk);
1844 local_bh_enable();
1839 1845
1840 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 1846 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1841 sock_release_memcg(sk); 1847 sock_release_memcg(sk);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 1e70fa8fa793..c67ece1390c2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -260,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
260 * newReno in increase case. 260 * newReno in increase case.
261 * We work it out by following the idea from TCP-LP's paper directly 261 * We work it out by following the idea from TCP-LP's paper directly
262 */ 262 */
263static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) 263static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
264{ 264{
265 struct tcp_sock *tp = tcp_sk(sk); 265 struct tcp_sock *tp = tcp_sk(sk);
266 struct lp *lp = inet_csk_ca(sk); 266 struct lp *lp = inet_csk_ca(sk);
267 267
268 if (rtt_us > 0) 268 if (sample->rtt_us > 0)
269 tcp_lp_rtt_sample(sk, rtt_us); 269 tcp_lp_rtt_sample(sk, sample->rtt_us);
270 270
271 /* calc inference */ 271 /* calc inference */
272 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) 272 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4c53e7c86586..4b95ec4ed2c8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -235,7 +235,7 @@ kill:
235 } 235 }
236 236
237 if (paws_reject) 237 if (paws_reject)
238 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); 238 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
239 239
240 if (!th->rst) { 240 if (!th->rst) {
241 /* In this case we must reset the TIMEWAIT timer. 241 /* In this case we must reset the TIMEWAIT timer.
@@ -337,7 +337,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
337 * socket up. We've got bigger problems than 337 * socket up. We've got bigger problems than
338 * non-graceful socket closings. 338 * non-graceful socket closings.
339 */ 339 */
340 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); 340 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
341 } 341 }
342 342
343 tcp_update_metrics(sk); 343 tcp_update_metrics(sk);
@@ -545,7 +545,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
545 newtp->rack.mstamp.v64 = 0; 545 newtp->rack.mstamp.v64 = 0;
546 newtp->rack.advanced = 0; 546 newtp->rack.advanced = 0;
547 547
548 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 548 __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
549 } 549 }
550 return newsk; 550 return newsk;
551} 551}
@@ -710,7 +710,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
710 &tcp_rsk(req)->last_oow_ack_time)) 710 &tcp_rsk(req)->last_oow_ack_time))
711 req->rsk_ops->send_ack(sk, skb, req); 711 req->rsk_ops->send_ack(sk, skb, req);
712 if (paws_reject) 712 if (paws_reject)
713 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 713 __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
714 return NULL; 714 return NULL;
715 } 715 }
716 716
@@ -729,7 +729,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
729 * "fourth, check the SYN bit" 729 * "fourth, check the SYN bit"
730 */ 730 */
731 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { 731 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
732 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); 732 __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
733 goto embryonic_reset; 733 goto embryonic_reset;
734 } 734 }
735 735
@@ -752,7 +752,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
752 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 752 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
753 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 753 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
754 inet_rsk(req)->acked = 1; 754 inet_rsk(req)->acked = 1;
755 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 755 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
756 return NULL; 756 return NULL;
757 } 757 }
758 758
@@ -791,7 +791,7 @@ embryonic_reset:
791 } 791 }
792 if (!fastopen) { 792 if (!fastopen) {
793 inet_csk_reqsk_queue_drop(sk, req); 793 inet_csk_reqsk_queue_drop(sk, req);
794 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); 794 __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
795 } 795 }
796 return NULL; 796 return NULL;
797} 797}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9d3b4b364652..8daefd8b1b49 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -949,7 +949,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
949 949
950 skb_orphan(skb); 950 skb_orphan(skb);
951 skb->sk = sk; 951 skb->sk = sk;
952 skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; 952 skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
953 skb_set_hash_from_sk(skb, sk); 953 skb_set_hash_from_sk(skb, sk);
954 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 954 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
955 955
@@ -1111,11 +1111,17 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
1111 tcp_verify_left_out(tp); 1111 tcp_verify_left_out(tp);
1112} 1112}
1113 1113
1114static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
1115{
1116 return TCP_SKB_CB(skb)->txstamp_ack ||
1117 (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
1118}
1119
1114static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) 1120static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1115{ 1121{
1116 struct skb_shared_info *shinfo = skb_shinfo(skb); 1122 struct skb_shared_info *shinfo = skb_shinfo(skb);
1117 1123
1118 if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && 1124 if (unlikely(tcp_has_tx_tstamp(skb)) &&
1119 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { 1125 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1120 struct skb_shared_info *shinfo2 = skb_shinfo(skb2); 1126 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1121 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; 1127 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
@@ -1128,6 +1134,12 @@ static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1128 } 1134 }
1129} 1135}
1130 1136
1137static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
1138{
1139 TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
1140 TCP_SKB_CB(skb)->eor = 0;
1141}
1142
1131/* Function to create two new TCP segments. Shrinks the given segment 1143/* Function to create two new TCP segments. Shrinks the given segment
1132 * to the specified size and appends a new segment with the rest of the 1144 * to the specified size and appends a new segment with the rest of the
1133 * packet to the list. This won't be called frequently, I hope. 1145 * packet to the list. This won't be called frequently, I hope.
@@ -1173,6 +1185,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1173 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1185 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1174 TCP_SKB_CB(buff)->tcp_flags = flags; 1186 TCP_SKB_CB(buff)->tcp_flags = flags;
1175 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1187 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1188 tcp_skb_fragment_eor(skb, buff);
1176 1189
1177 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 1190 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
1178 /* Copy and checksum data tail into the new buffer. */ 1191 /* Copy and checksum data tail into the new buffer. */
@@ -1733,6 +1746,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1733 /* This packet was never sent out yet, so no SACK bits. */ 1746 /* This packet was never sent out yet, so no SACK bits. */
1734 TCP_SKB_CB(buff)->sacked = 0; 1747 TCP_SKB_CB(buff)->sacked = 0;
1735 1748
1749 tcp_skb_fragment_eor(skb, buff);
1750
1736 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; 1751 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1737 skb_split(skb, buff, len); 1752 skb_split(skb, buff, len);
1738 tcp_fragment_tstamp(skb, buff); 1753 tcp_fragment_tstamp(skb, buff);
@@ -2206,14 +2221,13 @@ bool tcp_schedule_loss_probe(struct sock *sk)
2206/* Thanks to skb fast clones, we can detect if a prior transmit of 2221/* Thanks to skb fast clones, we can detect if a prior transmit of
2207 * a packet is still in a qdisc or driver queue. 2222 * a packet is still in a qdisc or driver queue.
2208 * In this case, there is very little point doing a retransmit ! 2223 * In this case, there is very little point doing a retransmit !
2209 * Note: This is called from BH context only.
2210 */ 2224 */
2211static bool skb_still_in_host_queue(const struct sock *sk, 2225static bool skb_still_in_host_queue(const struct sock *sk,
2212 const struct sk_buff *skb) 2226 const struct sk_buff *skb)
2213{ 2227{
2214 if (unlikely(skb_fclone_busy(sk, skb))) { 2228 if (unlikely(skb_fclone_busy(sk, skb))) {
2215 NET_INC_STATS_BH(sock_net(sk), 2229 NET_INC_STATS(sock_net(sk),
2216 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); 2230 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2217 return true; 2231 return true;
2218 } 2232 }
2219 return false; 2233 return false;
@@ -2275,7 +2289,7 @@ void tcp_send_loss_probe(struct sock *sk)
2275 tp->tlp_high_seq = tp->snd_nxt; 2289 tp->tlp_high_seq = tp->snd_nxt;
2276 2290
2277probe_sent: 2291probe_sent:
2278 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); 2292 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
2279 /* Reset s.t. tcp_rearm_rto will restart timer from now */ 2293 /* Reset s.t. tcp_rearm_rto will restart timer from now */
2280 inet_csk(sk)->icsk_pending = 0; 2294 inet_csk(sk)->icsk_pending = 0;
2281rearm_timer: 2295rearm_timer:
@@ -2446,13 +2460,12 @@ u32 __tcp_select_window(struct sock *sk)
2446void tcp_skb_collapse_tstamp(struct sk_buff *skb, 2460void tcp_skb_collapse_tstamp(struct sk_buff *skb,
2447 const struct sk_buff *next_skb) 2461 const struct sk_buff *next_skb)
2448{ 2462{
2449 const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); 2463 if (unlikely(tcp_has_tx_tstamp(next_skb))) {
2450 u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; 2464 const struct skb_shared_info *next_shinfo =
2451 2465 skb_shinfo(next_skb);
2452 if (unlikely(tsflags)) {
2453 struct skb_shared_info *shinfo = skb_shinfo(skb); 2466 struct skb_shared_info *shinfo = skb_shinfo(skb);
2454 2467
2455 shinfo->tx_flags |= tsflags; 2468 shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
2456 shinfo->tskey = next_shinfo->tskey; 2469 shinfo->tskey = next_shinfo->tskey;
2457 TCP_SKB_CB(skb)->txstamp_ack |= 2470 TCP_SKB_CB(skb)->txstamp_ack |=
2458 TCP_SKB_CB(next_skb)->txstamp_ack; 2471 TCP_SKB_CB(next_skb)->txstamp_ack;
@@ -2494,6 +2507,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2494 * packet counting does not break. 2507 * packet counting does not break.
2495 */ 2508 */
2496 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; 2509 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
2510 TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
2497 2511
2498 /* changed transmit queue under us so clear hints */ 2512 /* changed transmit queue under us so clear hints */
2499 tcp_clear_retrans_hints_partial(tp); 2513 tcp_clear_retrans_hints_partial(tp);
@@ -2545,6 +2559,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2545 if (!tcp_can_collapse(sk, skb)) 2559 if (!tcp_can_collapse(sk, skb))
2546 break; 2560 break;
2547 2561
2562 if (!tcp_skb_can_collapse_to(to))
2563 break;
2564
2548 space -= skb->len; 2565 space -= skb->len;
2549 2566
2550 if (first) { 2567 if (first) {
@@ -2656,7 +2673,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2656 /* Update global TCP statistics. */ 2673 /* Update global TCP statistics. */
2657 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); 2674 TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs);
2658 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 2675 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2659 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 2676 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
2660 tp->total_retrans += segs; 2677 tp->total_retrans += segs;
2661 } 2678 }
2662 return err; 2679 return err;
@@ -2681,7 +2698,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2681 tp->retrans_stamp = tcp_skb_timestamp(skb); 2698 tp->retrans_stamp = tcp_skb_timestamp(skb);
2682 2699
2683 } else if (err != -EBUSY) { 2700 } else if (err != -EBUSY) {
2684 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2701 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2685 } 2702 }
2686 2703
2687 if (tp->undo_retrans < 0) 2704 if (tp->undo_retrans < 0)
@@ -2805,7 +2822,7 @@ begin_fwd:
2805 if (tcp_retransmit_skb(sk, skb, segs)) 2822 if (tcp_retransmit_skb(sk, skb, segs))
2806 return; 2823 return;
2807 2824
2808 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2825 NET_INC_STATS(sock_net(sk), mib_idx);
2809 2826
2810 if (tcp_in_cwnd_reduction(sk)) 2827 if (tcp_in_cwnd_reduction(sk))
2811 tp->prr_out += tcp_skb_pcount(skb); 2828 tp->prr_out += tcp_skb_pcount(skb);
@@ -3042,7 +3059,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3042 th->window = htons(min(req->rsk_rcv_wnd, 65535U)); 3059 th->window = htons(min(req->rsk_rcv_wnd, 65535U));
3043 tcp_options_write((__be32 *)(th + 1), NULL, &opts); 3060 tcp_options_write((__be32 *)(th + 1), NULL, &opts);
3044 th->doff = (tcp_header_size >> 2); 3061 th->doff = (tcp_header_size >> 2);
3045 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); 3062 __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
3046 3063
3047#ifdef CONFIG_TCP_MD5SIG 3064#ifdef CONFIG_TCP_MD5SIG
3048 /* Okay, we have all we need - do the md5 hash if needed */ 3065 /* Okay, we have all we need - do the md5 hash if needed */
@@ -3540,8 +3557,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
3540 tcp_rsk(req)->txhash = net_tx_rndhash(); 3557 tcp_rsk(req)->txhash = net_tx_rndhash();
3541 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); 3558 res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
3542 if (!res) { 3559 if (!res) {
3543 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 3560 __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
3544 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); 3561 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
3545 } 3562 }
3546 return res; 3563 return res;
3547} 3564}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 5353085fd0b2..e36df4fcfeba 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -65,8 +65,8 @@ int tcp_rack_mark_lost(struct sock *sk)
65 if (scb->sacked & TCPCB_SACKED_RETRANS) { 65 if (scb->sacked & TCPCB_SACKED_RETRANS) {
66 scb->sacked &= ~TCPCB_SACKED_RETRANS; 66 scb->sacked &= ~TCPCB_SACKED_RETRANS;
67 tp->retrans_out -= tcp_skb_pcount(skb); 67 tp->retrans_out -= tcp_skb_pcount(skb);
68 NET_INC_STATS_BH(sock_net(sk), 68 NET_INC_STATS(sock_net(sk),
69 LINUX_MIB_TCPLOSTRETRANSMIT); 69 LINUX_MIB_TCPLOSTRETRANSMIT);
70 } 70 }
71 } else if (!(scb->sacked & TCPCB_RETRANS)) { 71 } else if (!(scb->sacked & TCPCB_RETRANS)) {
72 /* Original data are sent sequentially so stop early 72 /* Original data are sent sequentially so stop early
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 373b03e78aaa..debdd8b33e69 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -30,7 +30,7 @@ static void tcp_write_err(struct sock *sk)
30 sk->sk_error_report(sk); 30 sk->sk_error_report(sk);
31 31
32 tcp_done(sk); 32 tcp_done(sk);
33 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); 33 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
34} 34}
35 35
36/* Do not allow orphaned sockets to eat all our resources. 36/* Do not allow orphaned sockets to eat all our resources.
@@ -68,7 +68,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
68 if (do_reset) 68 if (do_reset)
69 tcp_send_active_reset(sk, GFP_ATOMIC); 69 tcp_send_active_reset(sk, GFP_ATOMIC);
70 tcp_done(sk); 70 tcp_done(sk);
71 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); 71 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
72 return 1; 72 return 1;
73 } 73 }
74 return 0; 74 return 0;
@@ -162,8 +162,8 @@ static int tcp_write_timeout(struct sock *sk)
162 if (tp->syn_fastopen || tp->syn_data) 162 if (tp->syn_fastopen || tp->syn_data)
163 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 163 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
164 if (tp->syn_data && icsk->icsk_retransmits == 1) 164 if (tp->syn_data && icsk->icsk_retransmits == 1)
165 NET_INC_STATS_BH(sock_net(sk), 165 NET_INC_STATS(sock_net(sk),
166 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 166 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
167 } 167 }
168 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; 168 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
169 syn_set = true; 169 syn_set = true;
@@ -178,8 +178,8 @@ static int tcp_write_timeout(struct sock *sk)
178 tp->bytes_acked <= tp->rx_opt.mss_clamp) { 178 tp->bytes_acked <= tp->rx_opt.mss_clamp) {
179 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 179 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
180 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) 180 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
181 NET_INC_STATS_BH(sock_net(sk), 181 NET_INC_STATS(sock_net(sk),
182 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 182 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
183 } 183 }
184 /* Black hole detection */ 184 /* Black hole detection */
185 tcp_mtu_probing(icsk, sk); 185 tcp_mtu_probing(icsk, sk);
@@ -209,6 +209,7 @@ static int tcp_write_timeout(struct sock *sk)
209 return 0; 209 return 0;
210} 210}
211 211
212/* Called with BH disabled */
212void tcp_delack_timer_handler(struct sock *sk) 213void tcp_delack_timer_handler(struct sock *sk)
213{ 214{
214 struct tcp_sock *tp = tcp_sk(sk); 215 struct tcp_sock *tp = tcp_sk(sk);
@@ -228,7 +229,7 @@ void tcp_delack_timer_handler(struct sock *sk)
228 if (!skb_queue_empty(&tp->ucopy.prequeue)) { 229 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
229 struct sk_buff *skb; 230 struct sk_buff *skb;
230 231
231 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); 232 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
232 233
233 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) 234 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
234 sk_backlog_rcv(sk, skb); 235 sk_backlog_rcv(sk, skb);
@@ -248,7 +249,7 @@ void tcp_delack_timer_handler(struct sock *sk)
248 icsk->icsk_ack.ato = TCP_ATO_MIN; 249 icsk->icsk_ack.ato = TCP_ATO_MIN;
249 } 250 }
250 tcp_send_ack(sk); 251 tcp_send_ack(sk);
251 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); 252 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
252 } 253 }
253 254
254out: 255out:
@@ -265,7 +266,7 @@ static void tcp_delack_timer(unsigned long data)
265 tcp_delack_timer_handler(sk); 266 tcp_delack_timer_handler(sk);
266 } else { 267 } else {
267 inet_csk(sk)->icsk_ack.blocked = 1; 268 inet_csk(sk)->icsk_ack.blocked = 1;
268 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 269 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
269 /* deleguate our work to tcp_release_cb() */ 270 /* deleguate our work to tcp_release_cb() */
270 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) 271 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
271 sock_hold(sk); 272 sock_hold(sk);
@@ -431,7 +432,7 @@ void tcp_retransmit_timer(struct sock *sk)
431 } else { 432 } else {
432 mib_idx = LINUX_MIB_TCPTIMEOUTS; 433 mib_idx = LINUX_MIB_TCPTIMEOUTS;
433 } 434 }
434 NET_INC_STATS_BH(sock_net(sk), mib_idx); 435 __NET_INC_STATS(sock_net(sk), mib_idx);
435 } 436 }
436 437
437 tcp_enter_loss(sk); 438 tcp_enter_loss(sk);
@@ -493,6 +494,7 @@ out_reset_timer:
493out:; 494out:;
494} 495}
495 496
497/* Called with BH disabled */
496void tcp_write_timer_handler(struct sock *sk) 498void tcp_write_timer_handler(struct sock *sk)
497{ 499{
498 struct inet_connection_sock *icsk = inet_csk(sk); 500 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -549,7 +551,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
549{ 551{
550 struct net *net = read_pnet(&inet_rsk(req)->ireq_net); 552 struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
551 553
552 NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS); 554 __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
553} 555}
554EXPORT_SYMBOL(tcp_syn_ack_timeout); 556EXPORT_SYMBOL(tcp_syn_ack_timeout);
555 557
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13951c4087d4..4c4bac1b5eab 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -107,16 +107,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init);
107 * o min-filter RTT samples from a much longer window (forever for now) 107 * o min-filter RTT samples from a much longer window (forever for now)
108 * to find the propagation delay (baseRTT) 108 * to find the propagation delay (baseRTT)
109 */ 109 */
110void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) 110void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample)
111{ 111{
112 struct vegas *vegas = inet_csk_ca(sk); 112 struct vegas *vegas = inet_csk_ca(sk);
113 u32 vrtt; 113 u32 vrtt;
114 114
115 if (rtt_us < 0) 115 if (sample->rtt_us < 0)
116 return; 116 return;
117 117
118 /* Never allow zero rtt or baseRTT */ 118 /* Never allow zero rtt or baseRTT */
119 vrtt = rtt_us + 1; 119 vrtt = sample->rtt_us + 1;
120 120
121 /* Filter to find propagation delay: */ 121 /* Filter to find propagation delay: */
122 if (vrtt < vegas->baseRTT) 122 if (vrtt < vegas->baseRTT)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index ef9da5306c68..248cfc0ff9ae 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -17,7 +17,7 @@ struct vegas {
17 17
18void tcp_vegas_init(struct sock *sk); 18void tcp_vegas_init(struct sock *sk);
19void tcp_vegas_state(struct sock *sk, u8 ca_state); 19void tcp_vegas_state(struct sock *sk, u8 ca_state);
20void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); 20void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample);
21void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); 21void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
22size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, 22size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
23 union tcp_cc_info *info); 23 union tcp_cc_info *info);
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 0d094b995cd9..40171e163cff 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,16 +69,17 @@ static void tcp_veno_init(struct sock *sk)
69} 69}
70 70
71/* Do rtt sampling needed for Veno. */ 71/* Do rtt sampling needed for Veno. */
72static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) 72static void tcp_veno_pkts_acked(struct sock *sk,
73 const struct ack_sample *sample)
73{ 74{
74 struct veno *veno = inet_csk_ca(sk); 75 struct veno *veno = inet_csk_ca(sk);
75 u32 vrtt; 76 u32 vrtt;
76 77
77 if (rtt_us < 0) 78 if (sample->rtt_us < 0)
78 return; 79 return;
79 80
80 /* Never allow zero rtt or baseRTT */ 81 /* Never allow zero rtt or baseRTT */
81 vrtt = rtt_us + 1; 82 vrtt = sample->rtt_us + 1;
82 83
83 /* Filter to find propagation delay: */ 84 /* Filter to find propagation delay: */
84 if (vrtt < veno->basertt) 85 if (vrtt < veno->basertt)
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index c10732e39837..4b03a2e2a050 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -99,12 +99,13 @@ static void westwood_filter(struct westwood *w, u32 delta)
99 * Called after processing group of packets. 99 * Called after processing group of packets.
100 * but all westwood needs is the last sample of srtt. 100 * but all westwood needs is the last sample of srtt.
101 */ 101 */
102static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt) 102static void tcp_westwood_pkts_acked(struct sock *sk,
103 const struct ack_sample *sample)
103{ 104{
104 struct westwood *w = inet_csk_ca(sk); 105 struct westwood *w = inet_csk_ca(sk);
105 106
106 if (rtt > 0) 107 if (sample->rtt_us > 0)
107 w->rtt = usecs_to_jiffies(rtt); 108 w->rtt = usecs_to_jiffies(sample->rtt_us);
108} 109}
109 110
110/* 111/*
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 3e6a472e6b88..028eb046ea40 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -56,15 +56,16 @@ static void tcp_yeah_init(struct sock *sk)
56 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); 56 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
57} 57}
58 58
59static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) 59static void tcp_yeah_pkts_acked(struct sock *sk,
60 const struct ack_sample *sample)
60{ 61{
61 const struct inet_connection_sock *icsk = inet_csk(sk); 62 const struct inet_connection_sock *icsk = inet_csk(sk);
62 struct yeah *yeah = inet_csk_ca(sk); 63 struct yeah *yeah = inet_csk_ca(sk);
63 64
64 if (icsk->icsk_ca_state == TCP_CA_Open) 65 if (icsk->icsk_ca_state == TCP_CA_Open)
65 yeah->pkts_acked = pkts_acked; 66 yeah->pkts_acked = sample->pkts_acked;
66 67
67 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); 68 tcp_vegas_pkts_acked(sk, sample);
68} 69}
69 70
70static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) 71static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 76ea0a8be090..f67f52ba4809 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -688,7 +688,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
688 iph->saddr, uh->source, skb->dev->ifindex, udptable, 688 iph->saddr, uh->source, skb->dev->ifindex, udptable,
689 NULL); 689 NULL);
690 if (!sk) { 690 if (!sk) {
691 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 691 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
692 return; /* No socket for error */ 692 return; /* No socket for error */
693 } 693 }
694 694
@@ -882,13 +882,13 @@ send:
882 err = ip_send_skb(sock_net(sk), skb); 882 err = ip_send_skb(sock_net(sk), skb);
883 if (err) { 883 if (err) {
884 if (err == -ENOBUFS && !inet->recverr) { 884 if (err == -ENOBUFS && !inet->recverr) {
885 UDP_INC_STATS_USER(sock_net(sk), 885 UDP_INC_STATS(sock_net(sk),
886 UDP_MIB_SNDBUFERRORS, is_udplite); 886 UDP_MIB_SNDBUFERRORS, is_udplite);
887 err = 0; 887 err = 0;
888 } 888 }
889 } else 889 } else
890 UDP_INC_STATS_USER(sock_net(sk), 890 UDP_INC_STATS(sock_net(sk),
891 UDP_MIB_OUTDATAGRAMS, is_udplite); 891 UDP_MIB_OUTDATAGRAMS, is_udplite);
892 return err; 892 return err;
893} 893}
894 894
@@ -1157,8 +1157,8 @@ out:
1157 * seems like overkill. 1157 * seems like overkill.
1158 */ 1158 */
1159 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 1159 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1160 UDP_INC_STATS_USER(sock_net(sk), 1160 UDP_INC_STATS(sock_net(sk),
1161 UDP_MIB_SNDBUFERRORS, is_udplite); 1161 UDP_MIB_SNDBUFERRORS, is_udplite);
1162 } 1162 }
1163 return err; 1163 return err;
1164 1164
@@ -1242,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk)
1242 spin_lock_bh(&rcvq->lock); 1242 spin_lock_bh(&rcvq->lock);
1243 while ((skb = skb_peek(rcvq)) != NULL && 1243 while ((skb = skb_peek(rcvq)) != NULL &&
1244 udp_lib_checksum_complete(skb)) { 1244 udp_lib_checksum_complete(skb)) {
1245 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, 1245 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
1246 IS_UDPLITE(sk)); 1246 IS_UDPLITE(sk));
1247 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 1247 __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
1248 IS_UDPLITE(sk)); 1248 IS_UDPLITE(sk));
1249 atomic_inc(&sk->sk_drops); 1249 atomic_inc(&sk->sk_drops);
1250 __skb_unlink(skb, rcvq); 1250 __skb_unlink(skb, rcvq);
1251 __skb_queue_tail(&list_kill, skb); 1251 __skb_queue_tail(&list_kill, skb);
@@ -1352,16 +1352,16 @@ try_again:
1352 trace_kfree_skb(skb, udp_recvmsg); 1352 trace_kfree_skb(skb, udp_recvmsg);
1353 if (!peeked) { 1353 if (!peeked) {
1354 atomic_inc(&sk->sk_drops); 1354 atomic_inc(&sk->sk_drops);
1355 UDP_INC_STATS_USER(sock_net(sk), 1355 UDP_INC_STATS(sock_net(sk),
1356 UDP_MIB_INERRORS, is_udplite); 1356 UDP_MIB_INERRORS, is_udplite);
1357 } 1357 }
1358 skb_free_datagram_locked(sk, skb); 1358 skb_free_datagram_locked(sk, skb);
1359 return err; 1359 return err;
1360 } 1360 }
1361 1361
1362 if (!peeked) 1362 if (!peeked)
1363 UDP_INC_STATS_USER(sock_net(sk), 1363 UDP_INC_STATS(sock_net(sk),
1364 UDP_MIB_INDATAGRAMS, is_udplite); 1364 UDP_MIB_INDATAGRAMS, is_udplite);
1365 1365
1366 sock_recv_ts_and_drops(msg, sk, skb); 1366 sock_recv_ts_and_drops(msg, sk, skb);
1367 1367
@@ -1386,8 +1386,8 @@ try_again:
1386csum_copy_err: 1386csum_copy_err:
1387 slow = lock_sock_fast(sk); 1387 slow = lock_sock_fast(sk);
1388 if (!skb_kill_datagram(sk, skb, flags)) { 1388 if (!skb_kill_datagram(sk, skb, flags)) {
1389 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1389 UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1390 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1390 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1391 } 1391 }
1392 unlock_sock_fast(sk, slow); 1392 unlock_sock_fast(sk, slow);
1393 1393
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1514 1514
1515 /* Note that an ENOMEM error is charged twice */ 1515 /* Note that an ENOMEM error is charged twice */
1516 if (rc == -ENOMEM) 1516 if (rc == -ENOMEM)
1517 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1517 UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1518 is_udplite); 1518 is_udplite);
1519 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1519 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1520 kfree_skb(skb); 1520 kfree_skb(skb);
1521 trace_udp_fail_queue_rcv_skb(rc, sk); 1521 trace_udp_fail_queue_rcv_skb(rc, sk);
1522 return -1; 1522 return -1;
@@ -1580,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1580 1580
1581 ret = encap_rcv(sk, skb); 1581 ret = encap_rcv(sk, skb);
1582 if (ret <= 0) { 1582 if (ret <= 0) {
1583 UDP_INC_STATS_BH(sock_net(sk), 1583 __UDP_INC_STATS(sock_net(sk),
1584 UDP_MIB_INDATAGRAMS, 1584 UDP_MIB_INDATAGRAMS,
1585 is_udplite); 1585 is_udplite);
1586 return -ret; 1586 return -ret;
1587 } 1587 }
1588 } 1588 }
@@ -1633,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1633 1633
1634 udp_csum_pull_header(skb); 1634 udp_csum_pull_header(skb);
1635 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { 1635 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
1636 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1636 __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1637 is_udplite); 1637 is_udplite);
1638 goto drop; 1638 goto drop;
1639 } 1639 }
1640 1640
@@ -1653,9 +1653,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1653 return rc; 1653 return rc;
1654 1654
1655csum_error: 1655csum_error:
1656 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1656 __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1657drop: 1657drop:
1658 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1658 __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1659 atomic_inc(&sk->sk_drops); 1659 atomic_inc(&sk->sk_drops);
1660 kfree_skb(skb); 1660 kfree_skb(skb);
1661 return -1; 1661 return -1;
@@ -1715,10 +1715,10 @@ start_lookup:
1715 1715
1716 if (unlikely(!nskb)) { 1716 if (unlikely(!nskb)) {
1717 atomic_inc(&sk->sk_drops); 1717 atomic_inc(&sk->sk_drops);
1718 UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, 1718 __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
1719 IS_UDPLITE(sk)); 1719 IS_UDPLITE(sk));
1720 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, 1720 __UDP_INC_STATS(net, UDP_MIB_INERRORS,
1721 IS_UDPLITE(sk)); 1721 IS_UDPLITE(sk));
1722 continue; 1722 continue;
1723 } 1723 }
1724 if (udp_queue_rcv_skb(sk, nskb) > 0) 1724 if (udp_queue_rcv_skb(sk, nskb) > 0)
@@ -1736,8 +1736,8 @@ start_lookup:
1736 consume_skb(skb); 1736 consume_skb(skb);
1737 } else { 1737 } else {
1738 kfree_skb(skb); 1738 kfree_skb(skb);
1739 UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, 1739 __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
1740 proto == IPPROTO_UDPLITE); 1740 proto == IPPROTO_UDPLITE);
1741 } 1741 }
1742 return 0; 1742 return 0;
1743} 1743}
@@ -1851,7 +1851,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1851 if (udp_lib_checksum_complete(skb)) 1851 if (udp_lib_checksum_complete(skb))
1852 goto csum_error; 1852 goto csum_error;
1853 1853
1854 UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1854 __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1855 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1855 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1856 1856
1857 /* 1857 /*
@@ -1878,9 +1878,9 @@ csum_error:
1878 proto == IPPROTO_UDPLITE ? "Lite" : "", 1878 proto == IPPROTO_UDPLITE ? "Lite" : "",
1879 &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), 1879 &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
1880 ulen); 1880 ulen);
1881 UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); 1881 __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
1882drop: 1882drop:
1883 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1883 __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1884 kfree_skb(skb); 1884 kfree_skb(skb);
1885 return 0; 1885 return 0;
1886} 1886}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 097060def7f0..6b7459c92bb2 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -350,6 +350,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
350 350
351 uh->len = newlen; 351 uh->len = newlen;
352 352
353 /* Set encapsulation before calling into inner gro_complete() functions
354 * to make them set up the inner offsets.
355 */
356 skb->encapsulation = 1;
357
353 rcu_read_lock(); 358 rcu_read_lock();
354 sk = (*lookup)(skb, uh->source, uh->dest); 359 sk = (*lookup)(skb, uh->source, uh->dest);
355 if (sk && udp_sk(sk)->gro_complete) 360 if (sk && udp_sk(sk)->gro_complete)
@@ -360,9 +365,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
360 if (skb->remcsum_offload) 365 if (skb->remcsum_offload)
361 skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; 366 skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
362 367
363 skb->encapsulation = 1;
364 skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
365
366 return err; 368 return err;
367} 369}
368EXPORT_SYMBOL(udp_gro_complete); 370EXPORT_SYMBOL(udp_gro_complete);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11e875ffd7ac..3f8411328de5 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -218,6 +218,7 @@ config IPV6_GRE
218 tristate "IPv6: GRE tunnel" 218 tristate "IPv6: GRE tunnel"
219 select IPV6_TUNNEL 219 select IPV6_TUNNEL
220 select NET_IP_TUNNEL 220 select NET_IP_TUNNEL
221 depends on NET_IPGRE_DEMUX
221 ---help--- 222 ---help---
222 Tunneling means encapsulating data of one protocol type within 223 Tunneling means encapsulating data of one protocol type within
223 another protocol and sending it over a channel that understands the 224 another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f5a77a9dd34e..47f837a58e0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3175,35 +3175,9 @@ static void addrconf_gre_config(struct net_device *dev)
3175} 3175}
3176#endif 3176#endif
3177 3177
3178#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
3179/* If the host route is cached on the addr struct make sure it is associated
3180 * with the proper table. e.g., enslavement can change and if so the cached
3181 * host route needs to move to the new table.
3182 */
3183static void l3mdev_check_host_rt(struct inet6_dev *idev,
3184 struct inet6_ifaddr *ifp)
3185{
3186 if (ifp->rt) {
3187 u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3188
3189 if (tb_id != ifp->rt->rt6i_table->tb6_id) {
3190 ip6_del_rt(ifp->rt);
3191 ifp->rt = NULL;
3192 }
3193 }
3194}
3195#else
3196static void l3mdev_check_host_rt(struct inet6_dev *idev,
3197 struct inet6_ifaddr *ifp)
3198{
3199}
3200#endif
3201
3202static int fixup_permanent_addr(struct inet6_dev *idev, 3178static int fixup_permanent_addr(struct inet6_dev *idev,
3203 struct inet6_ifaddr *ifp) 3179 struct inet6_ifaddr *ifp)
3204{ 3180{
3205 l3mdev_check_host_rt(idev, ifp);
3206
3207 if (!ifp->rt) { 3181 if (!ifp->rt) {
3208 struct rt6_info *rt; 3182 struct rt6_info *rt;
3209 3183
@@ -3303,6 +3277,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3303 break; 3277 break;
3304 3278
3305 if (event == NETDEV_UP) { 3279 if (event == NETDEV_UP) {
3280 /* restore routes for permanent addresses */
3281 addrconf_permanent_addr(dev);
3282
3306 if (!addrconf_qdisc_ok(dev)) { 3283 if (!addrconf_qdisc_ok(dev)) {
3307 /* device is not ready yet. */ 3284 /* device is not ready yet. */
3308 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", 3285 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
@@ -3336,9 +3313,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3336 run_pending = 1; 3313 run_pending = 1;
3337 } 3314 }
3338 3315
3339 /* restore routes for permanent addresses */
3340 addrconf_permanent_addr(dev);
3341
3342 switch (dev->type) { 3316 switch (dev->type) {
3343#if IS_ENABLED(CONFIG_IPV6_SIT) 3317#if IS_ENABLED(CONFIG_IPV6_SIT)
3344 case ARPHRD_SIT: 3318 case ARPHRD_SIT:
@@ -3555,6 +3529,8 @@ restart:
3555 3529
3556 INIT_LIST_HEAD(&del_list); 3530 INIT_LIST_HEAD(&del_list);
3557 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { 3531 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
3532 struct rt6_info *rt = NULL;
3533
3558 addrconf_del_dad_work(ifa); 3534 addrconf_del_dad_work(ifa);
3559 3535
3560 write_unlock_bh(&idev->lock); 3536 write_unlock_bh(&idev->lock);
@@ -3567,6 +3543,9 @@ restart:
3567 ifa->state = 0; 3543 ifa->state = 0;
3568 if (!(ifa->flags & IFA_F_NODAD)) 3544 if (!(ifa->flags & IFA_F_NODAD))
3569 ifa->flags |= IFA_F_TENTATIVE; 3545 ifa->flags |= IFA_F_TENTATIVE;
3546
3547 rt = ifa->rt;
3548 ifa->rt = NULL;
3570 } else { 3549 } else {
3571 state = ifa->state; 3550 state = ifa->state;
3572 ifa->state = INET6_IFADDR_STATE_DEAD; 3551 ifa->state = INET6_IFADDR_STATE_DEAD;
@@ -3577,6 +3556,9 @@ restart:
3577 3556
3578 spin_unlock_bh(&ifa->lock); 3557 spin_unlock_bh(&ifa->lock);
3579 3558
3559 if (rt)
3560 ip6_del_rt(rt);
3561
3580 if (state != INET6_IFADDR_STATE_DEAD) { 3562 if (state != INET6_IFADDR_STATE_DEAD) {
3581 __ipv6_ifa_notify(RTM_DELADDR, ifa); 3563 __ipv6_ifa_notify(RTM_DELADDR, ifa);
3582 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); 3564 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
@@ -5344,10 +5326,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
5344 if (rt) 5326 if (rt)
5345 ip6_del_rt(rt); 5327 ip6_del_rt(rt);
5346 } 5328 }
5347 dst_hold(&ifp->rt->dst); 5329 if (ifp->rt) {
5348 5330 dst_hold(&ifp->rt->dst);
5349 ip6_del_rt(ifp->rt); 5331 ip6_del_rt(ifp->rt);
5350 5332 }
5351 rt_genid_bump_ipv6(net); 5333 rt_genid_bump_ipv6(net);
5352 break; 5334 break;
5353 } 5335 }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ea9ee5cce5cf..00d0c2903173 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -727,14 +727,13 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
727 727
728int ip6_datagram_send_ctl(struct net *net, struct sock *sk, 728int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
729 struct msghdr *msg, struct flowi6 *fl6, 729 struct msghdr *msg, struct flowi6 *fl6,
730 struct ipv6_txoptions *opt, 730 struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
731 int *hlimit, int *tclass, int *dontfrag,
732 struct sockcm_cookie *sockc)
733{ 731{
734 struct in6_pktinfo *src_info; 732 struct in6_pktinfo *src_info;
735 struct cmsghdr *cmsg; 733 struct cmsghdr *cmsg;
736 struct ipv6_rt_hdr *rthdr; 734 struct ipv6_rt_hdr *rthdr;
737 struct ipv6_opt_hdr *hdr; 735 struct ipv6_opt_hdr *hdr;
736 struct ipv6_txoptions *opt = ipc6->opt;
738 int len; 737 int len;
739 int err = 0; 738 int err = 0;
740 739
@@ -953,8 +952,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
953 goto exit_f; 952 goto exit_f;
954 } 953 }
955 954
956 *hlimit = *(int *)CMSG_DATA(cmsg); 955 ipc6->hlimit = *(int *)CMSG_DATA(cmsg);
957 if (*hlimit < -1 || *hlimit > 0xff) { 956 if (ipc6->hlimit < -1 || ipc6->hlimit > 0xff) {
958 err = -EINVAL; 957 err = -EINVAL;
959 goto exit_f; 958 goto exit_f;
960 } 959 }
@@ -974,7 +973,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
974 goto exit_f; 973 goto exit_f;
975 974
976 err = 0; 975 err = 0;
977 *tclass = tc; 976 ipc6->tclass = tc;
978 977
979 break; 978 break;
980 } 979 }
@@ -992,7 +991,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
992 goto exit_f; 991 goto exit_f;
993 992
994 err = 0; 993 err = 0;
995 *dontfrag = df; 994 ipc6->dontfrag = df;
996 995
997 break; 996 break;
998 } 997 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ea7c4d64a00a..8de5dd7aaa05 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
258 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 258 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
259 !pskb_may_pull(skb, (skb_transport_offset(skb) + 259 !pskb_may_pull(skb, (skb_transport_offset(skb) +
260 ((skb_transport_header(skb)[1] + 1) << 3)))) { 260 ((skb_transport_header(skb)[1] + 1) << 3)))) {
261 IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), 261 __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
262 IPSTATS_MIB_INHDRERRORS); 262 IPSTATS_MIB_INHDRERRORS);
263 kfree_skb(skb); 263 kfree_skb(skb);
264 return -1; 264 return -1;
265 } 265 }
@@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
280 return 1; 280 return 1;
281 } 281 }
282 282
283 IP6_INC_STATS_BH(dev_net(dst->dev), 283 __IP6_INC_STATS(dev_net(dst->dev),
284 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 284 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
285 return -1; 285 return -1;
286} 286}
287 287
@@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
309 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 309 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
310 !pskb_may_pull(skb, (skb_transport_offset(skb) + 310 !pskb_may_pull(skb, (skb_transport_offset(skb) +
311 ((skb_transport_header(skb)[1] + 1) << 3)))) { 311 ((skb_transport_header(skb)[1] + 1) << 3)))) {
312 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 312 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
313 IPSTATS_MIB_INHDRERRORS); 313 IPSTATS_MIB_INHDRERRORS);
314 kfree_skb(skb); 314 kfree_skb(skb);
315 return -1; 315 return -1;
316 } 316 }
@@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
319 319
320 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || 320 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
321 skb->pkt_type != PACKET_HOST) { 321 skb->pkt_type != PACKET_HOST) {
322 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 322 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
323 IPSTATS_MIB_INADDRERRORS); 323 IPSTATS_MIB_INADDRERRORS);
324 kfree_skb(skb); 324 kfree_skb(skb);
325 return -1; 325 return -1;
326 } 326 }
@@ -334,8 +334,8 @@ looped_back:
334 * processed by own 334 * processed by own
335 */ 335 */
336 if (!addr) { 336 if (!addr) {
337 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 337 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
338 IPSTATS_MIB_INADDRERRORS); 338 IPSTATS_MIB_INADDRERRORS);
339 kfree_skb(skb); 339 kfree_skb(skb);
340 return -1; 340 return -1;
341 } 341 }
@@ -360,8 +360,8 @@ looped_back:
360 goto unknown_rh; 360 goto unknown_rh;
361 /* Silently discard invalid RTH type 2 */ 361 /* Silently discard invalid RTH type 2 */
362 if (hdr->hdrlen != 2 || hdr->segments_left != 1) { 362 if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
363 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 363 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
364 IPSTATS_MIB_INHDRERRORS); 364 IPSTATS_MIB_INHDRERRORS);
365 kfree_skb(skb); 365 kfree_skb(skb);
366 return -1; 366 return -1;
367 } 367 }
@@ -379,8 +379,8 @@ looped_back:
379 n = hdr->hdrlen >> 1; 379 n = hdr->hdrlen >> 1;
380 380
381 if (hdr->segments_left > n) { 381 if (hdr->segments_left > n) {
382 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 382 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
383 IPSTATS_MIB_INHDRERRORS); 383 IPSTATS_MIB_INHDRERRORS);
384 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 384 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
385 ((&hdr->segments_left) - 385 ((&hdr->segments_left) -
386 skb_network_header(skb))); 386 skb_network_header(skb)));
@@ -393,8 +393,8 @@ looped_back:
393 if (skb_cloned(skb)) { 393 if (skb_cloned(skb)) {
394 /* the copy is a forwarded packet */ 394 /* the copy is a forwarded packet */
395 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 395 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
396 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 396 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
397 IPSTATS_MIB_OUTDISCARDS); 397 IPSTATS_MIB_OUTDISCARDS);
398 kfree_skb(skb); 398 kfree_skb(skb);
399 return -1; 399 return -1;
400 } 400 }
@@ -416,14 +416,14 @@ looped_back:
416 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, 416 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
417 (xfrm_address_t *)&ipv6_hdr(skb)->saddr, 417 (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
418 IPPROTO_ROUTING) < 0) { 418 IPPROTO_ROUTING) < 0) {
419 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 419 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
420 IPSTATS_MIB_INADDRERRORS); 420 IPSTATS_MIB_INADDRERRORS);
421 kfree_skb(skb); 421 kfree_skb(skb);
422 return -1; 422 return -1;
423 } 423 }
424 if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { 424 if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
425 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 425 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
426 IPSTATS_MIB_INADDRERRORS); 426 IPSTATS_MIB_INADDRERRORS);
427 kfree_skb(skb); 427 kfree_skb(skb);
428 return -1; 428 return -1;
429 } 429 }
@@ -434,8 +434,8 @@ looped_back:
434 } 434 }
435 435
436 if (ipv6_addr_is_multicast(addr)) { 436 if (ipv6_addr_is_multicast(addr)) {
437 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 437 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
438 IPSTATS_MIB_INADDRERRORS); 438 IPSTATS_MIB_INADDRERRORS);
439 kfree_skb(skb); 439 kfree_skb(skb);
440 return -1; 440 return -1;
441 } 441 }
@@ -454,8 +454,8 @@ looped_back:
454 454
455 if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { 455 if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
456 if (ipv6_hdr(skb)->hop_limit <= 1) { 456 if (ipv6_hdr(skb)->hop_limit <= 1) {
457 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 457 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
458 IPSTATS_MIB_INHDRERRORS); 458 IPSTATS_MIB_INHDRERRORS);
459 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 459 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
460 0); 460 0);
461 kfree_skb(skb); 461 kfree_skb(skb);
@@ -470,7 +470,7 @@ looped_back:
470 return -1; 470 return -1;
471 471
472unknown_rh: 472unknown_rh:
473 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); 473 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
474 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 474 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
475 (&hdr->type) - skb_network_header(skb)); 475 (&hdr->type) - skb_network_header(skb));
476 return -1; 476 return -1;
@@ -568,28 +568,28 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
568 if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { 568 if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
569 net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", 569 net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
570 nh[optoff+1]); 570 nh[optoff+1]);
571 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 571 __IP6_INC_STATS(net, ipv6_skb_idev(skb),
572 IPSTATS_MIB_INHDRERRORS); 572 IPSTATS_MIB_INHDRERRORS);
573 goto drop; 573 goto drop;
574 } 574 }
575 575
576 pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); 576 pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
577 if (pkt_len <= IPV6_MAXPLEN) { 577 if (pkt_len <= IPV6_MAXPLEN) {
578 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 578 __IP6_INC_STATS(net, ipv6_skb_idev(skb),
579 IPSTATS_MIB_INHDRERRORS); 579 IPSTATS_MIB_INHDRERRORS);
580 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); 580 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
581 return false; 581 return false;
582 } 582 }
583 if (ipv6_hdr(skb)->payload_len) { 583 if (ipv6_hdr(skb)->payload_len) {
584 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 584 __IP6_INC_STATS(net, ipv6_skb_idev(skb),
585 IPSTATS_MIB_INHDRERRORS); 585 IPSTATS_MIB_INHDRERRORS);
586 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); 586 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
587 return false; 587 return false;
588 } 588 }
589 589
590 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { 590 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
591 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 591 __IP6_INC_STATS(net, ipv6_skb_idev(skb),
592 IPSTATS_MIB_INTRUNCATEDPKTS); 592 IPSTATS_MIB_INTRUNCATEDPKTS);
593 goto drop; 593 goto drop;
594 } 594 }
595 595
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6b573ebe49de..4527285fcaa2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -401,10 +401,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
401 struct flowi6 fl6; 401 struct flowi6 fl6;
402 struct icmpv6_msg msg; 402 struct icmpv6_msg msg;
403 struct sockcm_cookie sockc_unused = {0}; 403 struct sockcm_cookie sockc_unused = {0};
404 struct ipcm6_cookie ipc6;
404 int iif = 0; 405 int iif = 0;
405 int addr_type = 0; 406 int addr_type = 0;
406 int len; 407 int len;
407 int hlimit;
408 int err = 0; 408 int err = 0;
409 u32 mark = IP6_REPLY_MARK(net, skb->mark); 409 u32 mark = IP6_REPLY_MARK(net, skb->mark);
410 410
@@ -446,6 +446,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
446 446
447 if (__ipv6_addr_needs_scope_id(addr_type)) 447 if (__ipv6_addr_needs_scope_id(addr_type))
448 iif = skb->dev->ifindex; 448 iif = skb->dev->ifindex;
449 else
450 iif = l3mdev_master_ifindex(skb->dev);
449 451
450 /* 452 /*
451 * Must not send error if the source does not uniquely 453 * Must not send error if the source does not uniquely
@@ -500,14 +502,14 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
500 else if (!fl6.flowi6_oif) 502 else if (!fl6.flowi6_oif)
501 fl6.flowi6_oif = np->ucast_oif; 503 fl6.flowi6_oif = np->ucast_oif;
502 504
503 if (!fl6.flowi6_oif)
504 fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
505
506 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 505 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
507 if (IS_ERR(dst)) 506 if (IS_ERR(dst))
508 goto out; 507 goto out;
509 508
510 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 509 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
510 ipc6.tclass = np->tclass;
511 ipc6.dontfrag = np->dontfrag;
512 ipc6.opt = NULL;
511 513
512 msg.skb = skb; 514 msg.skb = skb;
513 msg.offset = skb_network_offset(skb); 515 msg.offset = skb_network_offset(skb);
@@ -526,9 +528,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
526 528
527 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 529 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
528 len + sizeof(struct icmp6hdr), 530 len + sizeof(struct icmp6hdr),
529 sizeof(struct icmp6hdr), hlimit, 531 sizeof(struct icmp6hdr),
530 np->tclass, NULL, &fl6, (struct rt6_info *)dst, 532 &ipc6, &fl6, (struct rt6_info *)dst,
531 MSG_DONTWAIT, np->dontfrag, &sockc_unused); 533 MSG_DONTWAIT, &sockc_unused);
532 if (err) { 534 if (err) {
533 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 535 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
534 ip6_flush_pending_frames(sk); 536 ip6_flush_pending_frames(sk);
@@ -563,9 +565,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
563 struct flowi6 fl6; 565 struct flowi6 fl6;
564 struct icmpv6_msg msg; 566 struct icmpv6_msg msg;
565 struct dst_entry *dst; 567 struct dst_entry *dst;
568 struct ipcm6_cookie ipc6;
566 int err = 0; 569 int err = 0;
567 int hlimit;
568 u8 tclass;
569 u32 mark = IP6_REPLY_MARK(net, skb->mark); 570 u32 mark = IP6_REPLY_MARK(net, skb->mark);
570 struct sockcm_cookie sockc_unused = {0}; 571 struct sockcm_cookie sockc_unused = {0};
571 572
@@ -607,22 +608,24 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
607 if (IS_ERR(dst)) 608 if (IS_ERR(dst))
608 goto out; 609 goto out;
609 610
610 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
611
612 idev = __in6_dev_get(skb->dev); 611 idev = __in6_dev_get(skb->dev);
613 612
614 msg.skb = skb; 613 msg.skb = skb;
615 msg.offset = 0; 614 msg.offset = 0;
616 msg.type = ICMPV6_ECHO_REPLY; 615 msg.type = ICMPV6_ECHO_REPLY;
617 616
618 tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 617 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
618 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
619 ipc6.dontfrag = np->dontfrag;
620 ipc6.opt = NULL;
621
619 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 622 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
620 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, 623 sizeof(struct icmp6hdr), &ipc6, &fl6,
621 (struct rt6_info *)dst, MSG_DONTWAIT, 624 (struct rt6_info *)dst, MSG_DONTWAIT,
622 np->dontfrag, &sockc_unused); 625 &sockc_unused);
623 626
624 if (err) { 627 if (err) {
625 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 628 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
626 ip6_flush_pending_frames(sk); 629 ip6_flush_pending_frames(sk);
627 } else { 630 } else {
628 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 631 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -674,7 +677,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
674 return; 677 return;
675 678
676out: 679out:
677 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 680 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
678} 681}
679 682
680/* 683/*
@@ -710,7 +713,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
710 skb_set_network_header(skb, nh); 713 skb_set_network_header(skb, nh);
711 } 714 }
712 715
713 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); 716 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
714 717
715 saddr = &ipv6_hdr(skb)->saddr; 718 saddr = &ipv6_hdr(skb)->saddr;
716 daddr = &ipv6_hdr(skb)->daddr; 719 daddr = &ipv6_hdr(skb)->daddr;
@@ -728,7 +731,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
728 731
729 type = hdr->icmp6_type; 732 type = hdr->icmp6_type;
730 733
731 ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); 734 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
732 735
733 switch (type) { 736 switch (type) {
734 case ICMPV6_ECHO_REQUEST: 737 case ICMPV6_ECHO_REQUEST:
@@ -812,9 +815,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
812 return 0; 815 return 0;
813 816
814csum_error: 817csum_error:
815 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); 818 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
816discard_it: 819discard_it:
817 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); 820 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
818drop_no_count: 821drop_no_count:
819 kfree_skb(skb); 822 kfree_skb(skb);
820 return 0; 823 return 0;
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 28542cb2b387..d08fd2d48a78 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -23,10 +23,76 @@
23#include <net/protocol.h> 23#include <net/protocol.h>
24#include <uapi/linux/ila.h> 24#include <uapi/linux/ila.h>
25 25
26struct ila_locator {
27 union {
28 __u8 v8[8];
29 __be16 v16[4];
30 __be32 v32[2];
31 __be64 v64;
32 };
33};
34
35struct ila_identifier {
36 union {
37 struct {
38#if defined(__LITTLE_ENDIAN_BITFIELD)
39 u8 __space:4;
40 u8 csum_neutral:1;
41 u8 type:3;
42#elif defined(__BIG_ENDIAN_BITFIELD)
43 u8 type:3;
44 u8 csum_neutral:1;
45 u8 __space:4;
46#else
47#error "Adjust your <asm/byteorder.h> defines"
48#endif
49 u8 __space2[7];
50 };
51 __u8 v8[8];
52 __be16 v16[4];
53 __be32 v32[2];
54 __be64 v64;
55 };
56};
57
58enum {
59 ILA_ATYPE_IID = 0,
60 ILA_ATYPE_LUID,
61 ILA_ATYPE_VIRT_V4,
62 ILA_ATYPE_VIRT_UNI_V6,
63 ILA_ATYPE_VIRT_MULTI_V6,
64 ILA_ATYPE_RSVD_1,
65 ILA_ATYPE_RSVD_2,
66 ILA_ATYPE_RSVD_3,
67};
68
69#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
70
71struct ila_addr {
72 union {
73 struct in6_addr addr;
74 struct {
75 struct ila_locator loc;
76 struct ila_identifier ident;
77 };
78 };
79};
80
81static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
82{
83 return (struct ila_addr *)addr;
84}
85
86static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
87{
88 return (iaddr->ident.type != ILA_ATYPE_IID);
89}
90
26struct ila_params { 91struct ila_params {
27 __be64 locator; 92 struct ila_locator locator;
28 __be64 locator_match; 93 struct ila_locator locator_match;
29 __wsum csum_diff; 94 __wsum csum_diff;
95 u8 csum_mode;
30}; 96};
31 97
32static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) 98static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
@@ -38,7 +104,14 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
38 return csum_partial(diff, sizeof(diff), 0); 104 return csum_partial(diff, sizeof(diff), 0);
39} 105}
40 106
41void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); 107static inline bool ila_csum_neutral_set(struct ila_identifier ident)
108{
109 return !!(ident.csum_neutral);
110}
111
112void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
113
114void ila_init_saved_csum(struct ila_params *p);
42 115
43int ila_lwt_init(void); 116int ila_lwt_init(void);
44void ila_lwt_fini(void); 117void ila_lwt_fini(void);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 30613050e4ca..0e94042d1289 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -15,20 +15,52 @@
15 15
16static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) 16static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
17{ 17{
18 if (*(__be64 *)&ip6h->daddr == p->locator_match) 18 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
19
20 if (p->locator_match.v64)
19 return p->csum_diff; 21 return p->csum_diff;
20 else 22 else
21 return compute_csum_diff8((__be32 *)&ip6h->daddr, 23 return compute_csum_diff8((__be32 *)&iaddr->loc,
24 (__be32 *)&p->locator);
25}
26
27static void ila_csum_do_neutral(struct ila_addr *iaddr,
28 struct ila_params *p)
29{
30 __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
31 __wsum diff, fval;
32
33 /* Check if checksum adjust value has been cached */
34 if (p->locator_match.v64) {
35 diff = p->csum_diff;
36 } else {
37 diff = compute_csum_diff8((__be32 *)iaddr,
22 (__be32 *)&p->locator); 38 (__be32 *)&p->locator);
39 }
40
41 fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
42 ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
43
44 diff = csum_add(diff, fval);
45
46 *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
47
48 /* Flip the csum-neutral bit. Either we are doing a SIR->ILA
49 * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method
50 * and the C-bit is not set, or we are doing an ILA-SIR
51 * tranlsation and the C-bit is set.
52 */
53 iaddr->ident.csum_neutral ^= 1;
23} 54}
24 55
25void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) 56static void ila_csum_adjust_transport(struct sk_buff *skb,
57 struct ila_params *p)
26{ 58{
27 __wsum diff; 59 __wsum diff;
28 struct ipv6hdr *ip6h = ipv6_hdr(skb); 60 struct ipv6hdr *ip6h = ipv6_hdr(skb);
61 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
29 size_t nhoff = sizeof(struct ipv6hdr); 62 size_t nhoff = sizeof(struct ipv6hdr);
30 63
31 /* First update checksum */
32 switch (ip6h->nexthdr) { 64 switch (ip6h->nexthdr) {
33 case NEXTHDR_TCP: 65 case NEXTHDR_TCP:
34 if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { 66 if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
@@ -68,7 +100,46 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
68 } 100 }
69 101
70 /* Now change destination address */ 102 /* Now change destination address */
71 *(__be64 *)&ip6h->daddr = p->locator; 103 iaddr->loc = p->locator;
104}
105
106void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
107{
108 struct ipv6hdr *ip6h = ipv6_hdr(skb);
109 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
110
111 /* First deal with the transport checksum */
112 if (ila_csum_neutral_set(iaddr->ident)) {
113 /* C-bit is set in the locator indicating that this
114 * is a locator being translated to a SIR address.
115 * Perform (receiver) checksum-neutral translation.
116 */
117 ila_csum_do_neutral(iaddr, p);
118 } else {
119 switch (p->csum_mode) {
120 case ILA_CSUM_ADJUST_TRANSPORT:
121 ila_csum_adjust_transport(skb, p);
122 break;
123 case ILA_CSUM_NEUTRAL_MAP:
124 ila_csum_do_neutral(iaddr, p);
125 break;
126 case ILA_CSUM_NO_ACTION:
127 break;
128 }
129 }
130
131 /* Now change destination address */
132 iaddr->loc = p->locator;
133}
134
135void ila_init_saved_csum(struct ila_params *p)
136{
137 if (!p->locator_match.v64)
138 return;
139
140 p->csum_diff = compute_csum_diff8(
141 (__be32 *)&p->locator_match,
142 (__be32 *)&p->locator);
72} 143}
73 144
74static int __init ila_init(void) 145static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 9db3621b2126..1dfb64166d7d 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
26 if (skb->protocol != htons(ETH_P_IPV6)) 26 if (skb->protocol != htons(ETH_P_IPV6))
27 goto drop; 27 goto drop;
28 28
29 update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); 29 ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
30 30
31 return dst->lwtstate->orig_output(net, sk, skb); 31 return dst->lwtstate->orig_output(net, sk, skb);
32 32
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
42 if (skb->protocol != htons(ETH_P_IPV6)) 42 if (skb->protocol != htons(ETH_P_IPV6))
43 goto drop; 43 goto drop;
44 44
45 update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); 45 ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
46 46
47 return dst->lwtstate->orig_input(skb); 47 return dst->lwtstate->orig_input(skb);
48 48
@@ -53,6 +53,7 @@ drop:
53 53
54static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { 54static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
55 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, 55 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
56 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
56}; 57};
57 58
58static int ila_build_state(struct net_device *dev, struct nlattr *nla, 59static int ila_build_state(struct net_device *dev, struct nlattr *nla,
@@ -64,11 +65,28 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
64 size_t encap_len = sizeof(*p); 65 size_t encap_len = sizeof(*p);
65 struct lwtunnel_state *newts; 66 struct lwtunnel_state *newts;
66 const struct fib6_config *cfg6 = cfg; 67 const struct fib6_config *cfg6 = cfg;
68 struct ila_addr *iaddr;
67 int ret; 69 int ret;
68 70
69 if (family != AF_INET6) 71 if (family != AF_INET6)
70 return -EINVAL; 72 return -EINVAL;
71 73
74 if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
75 /* Need to have full locator and at least type field
76 * included in destination
77 */
78 return -EINVAL;
79 }
80
81 iaddr = (struct ila_addr *)&cfg6->fc_dst;
82
83 if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
84 /* Don't allow translation for a non-ILA address or checksum
85 * neutral flag to be set.
86 */
87 return -EINVAL;
88 }
89
72 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, 90 ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
73 ila_nl_policy); 91 ila_nl_policy);
74 if (ret < 0) 92 if (ret < 0)
@@ -84,16 +102,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
84 newts->len = encap_len; 102 newts->len = encap_len;
85 p = ila_params_lwtunnel(newts); 103 p = ila_params_lwtunnel(newts);
86 104
87 p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); 105 p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
88 106
89 if (cfg6->fc_dst_len > sizeof(__be64)) { 107 /* Precompute checksum difference for translation since we
90 /* Precompute checksum difference for translation since we 108 * know both the old locator and the new one.
91 * know both the old locator and the new one. 109 */
92 */ 110 p->locator_match = iaddr->loc;
93 p->locator_match = *(__be64 *)&cfg6->fc_dst; 111 p->csum_diff = compute_csum_diff8(
94 p->csum_diff = compute_csum_diff8( 112 (__be32 *)&p->locator_match, (__be32 *)&p->locator);
95 (__be32 *)&p->locator_match, (__be32 *)&p->locator); 113
96 } 114 if (tb[ILA_ATTR_CSUM_MODE])
115 p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
116
117 ila_init_saved_csum(p);
97 118
98 newts->type = LWTUNNEL_ENCAP_ILA; 119 newts->type = LWTUNNEL_ENCAP_ILA;
99 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | 120 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
@@ -109,9 +130,11 @@ static int ila_fill_encap_info(struct sk_buff *skb,
109{ 130{
110 struct ila_params *p = ila_params_lwtunnel(lwtstate); 131 struct ila_params *p = ila_params_lwtunnel(lwtstate);
111 132
112 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator, 133 if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
113 ILA_ATTR_PAD)) 134 ILA_ATTR_PAD))
114 goto nla_put_failure; 135 goto nla_put_failure;
136 if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
137 goto nla_put_failure;
115 138
116 return 0; 139 return 0;
117 140
@@ -121,8 +144,9 @@ nla_put_failure:
121 144
122static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) 145static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
123{ 146{
124 /* No encapsulation overhead */ 147 return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
125 return 0; 148 nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
149 0;
126} 150}
127 151
128static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 152static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -130,7 +154,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
130 struct ila_params *a_p = ila_params_lwtunnel(a); 154 struct ila_params *a_p = ila_params_lwtunnel(a);
131 struct ila_params *b_p = ila_params_lwtunnel(b); 155 struct ila_params *b_p = ila_params_lwtunnel(b);
132 156
133 return (a_p->locator != b_p->locator); 157 return (a_p->locator.v64 != b_p->locator.v64);
134} 158}
135 159
136static const struct lwtunnel_encap_ops ila_encap_ops = { 160static const struct lwtunnel_encap_ops ila_encap_ops = {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 0e9e579410da..a90e57229c6c 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -11,13 +11,11 @@
11 11
12struct ila_xlat_params { 12struct ila_xlat_params {
13 struct ila_params ip; 13 struct ila_params ip;
14 __be64 identifier;
15 int ifindex; 14 int ifindex;
16 unsigned int dir;
17}; 15};
18 16
19struct ila_map { 17struct ila_map {
20 struct ila_xlat_params p; 18 struct ila_xlat_params xp;
21 struct rhash_head node; 19 struct rhash_head node;
22 struct ila_map __rcu *next; 20 struct ila_map __rcu *next;
23 struct rcu_head rcu; 21 struct rcu_head rcu;
@@ -66,31 +64,29 @@ static __always_inline void __ila_hash_secret_init(void)
66 net_get_random_once(&hashrnd, sizeof(hashrnd)); 64 net_get_random_once(&hashrnd, sizeof(hashrnd));
67} 65}
68 66
69static inline u32 ila_identifier_hash(__be64 identifier) 67static inline u32 ila_locator_hash(struct ila_locator loc)
70{ 68{
71 u32 *v = (u32 *)&identifier; 69 u32 *v = (u32 *)loc.v32;
72 70
73 return jhash_2words(v[0], v[1], hashrnd); 71 return jhash_2words(v[0], v[1], hashrnd);
74} 72}
75 73
76static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) 74static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
75 struct ila_locator loc)
77{ 76{
78 return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; 77 return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
79} 78}
80 79
81static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, 80static inline int ila_cmp_wildcards(struct ila_map *ila,
82 int ifindex, unsigned int dir) 81 struct ila_addr *iaddr, int ifindex)
83{ 82{
84 return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || 83 return (ila->xp.ifindex && ila->xp.ifindex != ifindex);
85 (ila->p.ifindex && ila->p.ifindex != ifindex) ||
86 !(ila->p.dir & dir);
87} 84}
88 85
89static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) 86static inline int ila_cmp_params(struct ila_map *ila,
87 struct ila_xlat_params *xp)
90{ 88{
91 return (ila->p.ip.locator_match != p->ip.locator_match) || 89 return (ila->xp.ifindex != xp->ifindex);
92 (ila->p.ifindex != p->ifindex) ||
93 (ila->p.dir != p->dir);
94} 90}
95 91
96static int ila_cmpfn(struct rhashtable_compare_arg *arg, 92static int ila_cmpfn(struct rhashtable_compare_arg *arg,
@@ -98,17 +94,14 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg,
98{ 94{
99 const struct ila_map *ila = obj; 95 const struct ila_map *ila = obj;
100 96
101 return (ila->p.identifier != *(__be64 *)arg->key); 97 return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key);
102} 98}
103 99
104static inline int ila_order(struct ila_map *ila) 100static inline int ila_order(struct ila_map *ila)
105{ 101{
106 int score = 0; 102 int score = 0;
107 103
108 if (ila->p.ip.locator_match) 104 if (ila->xp.ifindex)
109 score += 1 << 0;
110
111 if (ila->p.ifindex)
112 score += 1 << 1; 105 score += 1 << 1;
113 106
114 return score; 107 return score;
@@ -117,7 +110,7 @@ static inline int ila_order(struct ila_map *ila)
117static const struct rhashtable_params rht_params = { 110static const struct rhashtable_params rht_params = {
118 .nelem_hint = 1024, 111 .nelem_hint = 1024,
119 .head_offset = offsetof(struct ila_map, node), 112 .head_offset = offsetof(struct ila_map, node),
120 .key_offset = offsetof(struct ila_map, p.identifier), 113 .key_offset = offsetof(struct ila_map, xp.ip.locator_match),
121 .key_len = sizeof(u64), /* identifier */ 114 .key_len = sizeof(u64), /* identifier */
122 .max_size = 1048576, 115 .max_size = 1048576,
123 .min_size = 256, 116 .min_size = 256,
@@ -136,50 +129,45 @@ static struct genl_family ila_nl_family = {
136}; 129};
137 130
138static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { 131static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
139 [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
140 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, 132 [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
141 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, 133 [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
142 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, 134 [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
143 [ILA_ATTR_DIR] = { .type = NLA_U32, }, 135 [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
144}; 136};
145 137
146static int parse_nl_config(struct genl_info *info, 138static int parse_nl_config(struct genl_info *info,
147 struct ila_xlat_params *p) 139 struct ila_xlat_params *xp)
148{ 140{
149 memset(p, 0, sizeof(*p)); 141 memset(xp, 0, sizeof(*xp));
150
151 if (info->attrs[ILA_ATTR_IDENTIFIER])
152 p->identifier = (__force __be64)nla_get_u64(
153 info->attrs[ILA_ATTR_IDENTIFIER]);
154 142
155 if (info->attrs[ILA_ATTR_LOCATOR]) 143 if (info->attrs[ILA_ATTR_LOCATOR])
156 p->ip.locator = (__force __be64)nla_get_u64( 144 xp->ip.locator.v64 = (__force __be64)nla_get_u64(
157 info->attrs[ILA_ATTR_LOCATOR]); 145 info->attrs[ILA_ATTR_LOCATOR]);
158 146
159 if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) 147 if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
160 p->ip.locator_match = (__force __be64)nla_get_u64( 148 xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
161 info->attrs[ILA_ATTR_LOCATOR_MATCH]); 149 info->attrs[ILA_ATTR_LOCATOR_MATCH]);
162 150
163 if (info->attrs[ILA_ATTR_IFINDEX]) 151 if (info->attrs[ILA_ATTR_CSUM_MODE])
164 p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); 152 xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
165 153
166 if (info->attrs[ILA_ATTR_DIR]) 154 if (info->attrs[ILA_ATTR_IFINDEX])
167 p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); 155 xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
168 156
169 return 0; 157 return 0;
170} 158}
171 159
172/* Must be called with rcu readlock */ 160/* Must be called with rcu readlock */
173static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, 161static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
174 int ifindex, 162 int ifindex,
175 unsigned int dir,
176 struct ila_net *ilan) 163 struct ila_net *ilan)
177{ 164{
178 struct ila_map *ila; 165 struct ila_map *ila;
179 166
180 ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); 167 ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
168 rht_params);
181 while (ila) { 169 while (ila) {
182 if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) 170 if (!ila_cmp_wildcards(ila, iaddr, ifindex))
183 return ila; 171 return ila;
184 ila = rcu_access_pointer(ila->next); 172 ila = rcu_access_pointer(ila->next);
185 } 173 }
@@ -188,15 +176,16 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
188} 176}
189 177
190/* Must be called with rcu readlock */ 178/* Must be called with rcu readlock */
191static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, 179static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
192 struct ila_net *ilan) 180 struct ila_net *ilan)
193{ 181{
194 struct ila_map *ila; 182 struct ila_map *ila;
195 183
196 ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, 184 ila = rhashtable_lookup_fast(&ilan->rhash_table,
185 &xp->ip.locator_match,
197 rht_params); 186 rht_params);
198 while (ila) { 187 while (ila) {
199 if (!ila_cmp_params(ila, p)) 188 if (!ila_cmp_params(ila, xp))
200 return ila; 189 return ila;
201 ila = rcu_access_pointer(ila->next); 190 ila = rcu_access_pointer(ila->next);
202 } 191 }
@@ -221,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
221 } 210 }
222} 211}
223 212
224static int ila_xlat_addr(struct sk_buff *skb, int dir); 213static int ila_xlat_addr(struct sk_buff *skb);
225 214
226static unsigned int 215static unsigned int
227ila_nf_input(void *priv, 216ila_nf_input(void *priv,
228 struct sk_buff *skb, 217 struct sk_buff *skb,
229 const struct nf_hook_state *state) 218 const struct nf_hook_state *state)
230{ 219{
231 ila_xlat_addr(skb, ILA_DIR_IN); 220 ila_xlat_addr(skb);
232 return NF_ACCEPT; 221 return NF_ACCEPT;
233} 222}
234 223
@@ -241,11 +230,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
241 }, 230 },
242}; 231};
243 232
244static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) 233static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
245{ 234{
246 struct ila_net *ilan = net_generic(net, ila_net_id); 235 struct ila_net *ilan = net_generic(net, ila_net_id);
247 struct ila_map *ila, *head; 236 struct ila_map *ila, *head;
248 spinlock_t *lock = ila_get_lock(ilan, p->identifier); 237 spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
249 int err = 0, order; 238 int err = 0, order;
250 239
251 if (!ilan->hooks_registered) { 240 if (!ilan->hooks_registered) {
@@ -264,22 +253,16 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
264 if (!ila) 253 if (!ila)
265 return -ENOMEM; 254 return -ENOMEM;
266 255
267 ila->p = *p; 256 ila_init_saved_csum(&xp->ip);
268 257
269 if (p->ip.locator_match) { 258 ila->xp = *xp;
270 /* Precompute checksum difference for translation since we
271 * know both the old identifier and the new one.
272 */
273 ila->p.ip.csum_diff = compute_csum_diff8(
274 (__be32 *)&p->ip.locator_match,
275 (__be32 *)&p->ip.locator);
276 }
277 259
278 order = ila_order(ila); 260 order = ila_order(ila);
279 261
280 spin_lock(lock); 262 spin_lock(lock);
281 263
282 head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, 264 head = rhashtable_lookup_fast(&ilan->rhash_table,
265 &xp->ip.locator_match,
283 rht_params); 266 rht_params);
284 if (!head) { 267 if (!head) {
285 /* New entry for the rhash_table */ 268 /* New entry for the rhash_table */
@@ -289,7 +272,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
289 struct ila_map *tila = head, *prev = NULL; 272 struct ila_map *tila = head, *prev = NULL;
290 273
291 do { 274 do {
292 if (!ila_cmp_params(tila, p)) { 275 if (!ila_cmp_params(tila, xp)) {
293 err = -EEXIST; 276 err = -EEXIST;
294 goto out; 277 goto out;
295 } 278 }
@@ -326,23 +309,23 @@ out:
326 return err; 309 return err;
327} 310}
328 311
329static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) 312static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
330{ 313{
331 struct ila_net *ilan = net_generic(net, ila_net_id); 314 struct ila_net *ilan = net_generic(net, ila_net_id);
332 struct ila_map *ila, *head, *prev; 315 struct ila_map *ila, *head, *prev;
333 spinlock_t *lock = ila_get_lock(ilan, p->identifier); 316 spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
334 int err = -ENOENT; 317 int err = -ENOENT;
335 318
336 spin_lock(lock); 319 spin_lock(lock);
337 320
338 head = rhashtable_lookup_fast(&ilan->rhash_table, 321 head = rhashtable_lookup_fast(&ilan->rhash_table,
339 &p->identifier, rht_params); 322 &xp->ip.locator_match, rht_params);
340 ila = head; 323 ila = head;
341 324
342 prev = NULL; 325 prev = NULL;
343 326
344 while (ila) { 327 while (ila) {
345 if (ila_cmp_params(ila, p)) { 328 if (ila_cmp_params(ila, xp)) {
346 prev = ila; 329 prev = ila;
347 ila = rcu_dereference_protected(ila->next, 330 ila = rcu_dereference_protected(ila->next,
348 lockdep_is_held(lock)); 331 lockdep_is_held(lock));
@@ -404,31 +387,28 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
404static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) 387static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
405{ 388{
406 struct net *net = genl_info_net(info); 389 struct net *net = genl_info_net(info);
407 struct ila_xlat_params p; 390 struct ila_xlat_params xp;
408 int err; 391 int err;
409 392
410 err = parse_nl_config(info, &p); 393 err = parse_nl_config(info, &xp);
411 if (err) 394 if (err)
412 return err; 395 return err;
413 396
414 ila_del_mapping(net, &p); 397 ila_del_mapping(net, &xp);
415 398
416 return 0; 399 return 0;
417} 400}
418 401
419static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) 402static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
420{ 403{
421 if (nla_put_u64_64bit(msg, ILA_ATTR_IDENTIFIER, 404 if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
422 (__force u64)ila->p.identifier, 405 (__force u64)ila->xp.ip.locator.v64,
423 ILA_ATTR_PAD) ||
424 nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
425 (__force u64)ila->p.ip.locator,
426 ILA_ATTR_PAD) || 406 ILA_ATTR_PAD) ||
427 nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH, 407 nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH,
428 (__force u64)ila->p.ip.locator_match, 408 (__force u64)ila->xp.ip.locator_match.v64,
429 ILA_ATTR_PAD) || 409 ILA_ATTR_PAD) ||
430 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || 410 nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
431 nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) 411 nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
432 return -1; 412 return -1;
433 413
434 return 0; 414 return 0;
@@ -460,11 +440,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
460 struct net *net = genl_info_net(info); 440 struct net *net = genl_info_net(info);
461 struct ila_net *ilan = net_generic(net, ila_net_id); 441 struct ila_net *ilan = net_generic(net, ila_net_id);
462 struct sk_buff *msg; 442 struct sk_buff *msg;
463 struct ila_xlat_params p; 443 struct ila_xlat_params xp;
464 struct ila_map *ila; 444 struct ila_map *ila;
465 int ret; 445 int ret;
466 446
467 ret = parse_nl_config(info, &p); 447 ret = parse_nl_config(info, &xp);
468 if (ret) 448 if (ret)
469 return ret; 449 return ret;
470 450
@@ -474,7 +454,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
474 454
475 rcu_read_lock(); 455 rcu_read_lock();
476 456
477 ila = ila_lookup_by_params(&p, ilan); 457 ila = ila_lookup_by_params(&xp, ilan);
478 if (ila) { 458 if (ila) {
479 ret = ila_dump_info(ila, 459 ret = ila_dump_info(ila,
480 info->snd_portid, 460 info->snd_portid,
@@ -617,45 +597,32 @@ static struct pernet_operations ila_net_ops = {
617 .size = sizeof(struct ila_net), 597 .size = sizeof(struct ila_net),
618}; 598};
619 599
620static int ila_xlat_addr(struct sk_buff *skb, int dir) 600static int ila_xlat_addr(struct sk_buff *skb)
621{ 601{
622 struct ila_map *ila; 602 struct ila_map *ila;
623 struct ipv6hdr *ip6h = ipv6_hdr(skb); 603 struct ipv6hdr *ip6h = ipv6_hdr(skb);
624 struct net *net = dev_net(skb->dev); 604 struct net *net = dev_net(skb->dev);
625 struct ila_net *ilan = net_generic(net, ila_net_id); 605 struct ila_net *ilan = net_generic(net, ila_net_id);
626 __be64 identifier, locator_match; 606 struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
627 size_t nhoff;
628 607
629 /* Assumes skb contains a valid IPv6 header that is pulled */ 608 /* Assumes skb contains a valid IPv6 header that is pulled */
630 609
631 identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; 610 if (!ila_addr_is_ila(iaddr)) {
632 locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; 611 /* Type indicates this is not an ILA address */
633 nhoff = sizeof(struct ipv6hdr); 612 return 0;
613 }
634 614
635 rcu_read_lock(); 615 rcu_read_lock();
636 616
637 ila = ila_lookup_wildcards(identifier, locator_match, 617 ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
638 skb->dev->ifindex, dir, ilan);
639 if (ila) 618 if (ila)
640 update_ipv6_locator(skb, &ila->p.ip); 619 ila_update_ipv6_locator(skb, &ila->xp.ip);
641 620
642 rcu_read_unlock(); 621 rcu_read_unlock();
643 622
644 return 0; 623 return 0;
645} 624}
646 625
647int ila_xlat_incoming(struct sk_buff *skb)
648{
649 return ila_xlat_addr(skb, ILA_DIR_IN);
650}
651EXPORT_SYMBOL(ila_xlat_incoming);
652
653int ila_xlat_outgoing(struct sk_buff *skb)
654{
655 return ila_xlat_addr(skb, ILA_DIR_OUT);
656}
657EXPORT_SYMBOL(ila_xlat_outgoing);
658
659int ila_xlat_init(void) 626int ila_xlat_init(void)
660{ 627{
661 int ret; 628 int ret;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index f1678388fb0d..00cf28ad4565 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -222,7 +222,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
222 __sk_nulls_add_node_rcu(sk, &head->chain); 222 __sk_nulls_add_node_rcu(sk, &head->chain);
223 if (tw) { 223 if (tw) {
224 sk_nulls_del_node_init_rcu((struct sock *)tw); 224 sk_nulls_del_node_init_rcu((struct sock *)tw);
225 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); 225 __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
226 } 226 }
227 spin_unlock(lock); 227 spin_unlock(lock);
228 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 228 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ea071fad67a0..1bcef2369d64 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -240,6 +240,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
240 240
241 return tb; 241 return tb;
242} 242}
243EXPORT_SYMBOL_GPL(fib6_new_table);
243 244
244struct fib6_table *fib6_get_table(struct net *net, u32 id) 245struct fib6_table *fib6_get_table(struct net *net, u32 id)
245{ 246{
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 35d3ddc328f8..b912f0dbaf72 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -373,7 +373,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
373 struct msghdr msg; 373 struct msghdr msg;
374 struct flowi6 flowi6; 374 struct flowi6 flowi6;
375 struct sockcm_cookie sockc_junk; 375 struct sockcm_cookie sockc_junk;
376 int junk; 376 struct ipcm6_cookie ipc6;
377 377
378 err = -ENOMEM; 378 err = -ENOMEM;
379 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); 379 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
@@ -390,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
390 msg.msg_control = (void *)(fl->opt+1); 390 msg.msg_control = (void *)(fl->opt+1);
391 memset(&flowi6, 0, sizeof(flowi6)); 391 memset(&flowi6, 0, sizeof(flowi6));
392 392
393 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, 393 ipc6.opt = fl->opt;
394 &junk, &junk, &junk, &sockc_junk); 394 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
395 if (err) 395 if (err)
396 goto done; 396 goto done;
397 err = -EINVAL; 397 err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ca5a2c5675c5..ee62ec469ab3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -54,6 +54,7 @@
54#include <net/ip6_fib.h> 54#include <net/ip6_fib.h>
55#include <net/ip6_route.h> 55#include <net/ip6_route.h>
56#include <net/ip6_tunnel.h> 56#include <net/ip6_tunnel.h>
57#include <net/gre.h>
57 58
58 59
59static bool log_ecn_error = true; 60static bool log_ecn_error = true;
@@ -342,7 +343,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
342 goto failed_free; 343 goto failed_free;
343 344
344 /* Can use a lockless transmit, unless we generate output sequences */ 345 /* Can use a lockless transmit, unless we generate output sequences */
345 if (!(nt->parms.o_flags & GRE_SEQ)) 346 if (!(nt->parms.o_flags & TUNNEL_SEQ))
346 dev->features |= NETIF_F_LLTX; 347 dev->features |= NETIF_F_LLTX;
347 348
348 dev_hold(dev); 349 dev_hold(dev);
@@ -443,137 +444,41 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
443 t->err_time = jiffies; 444 t->err_time = jiffies;
444} 445}
445 446
446static int ip6gre_rcv(struct sk_buff *skb) 447static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
447{ 448{
448 const struct ipv6hdr *ipv6h; 449 const struct ipv6hdr *ipv6h;
449 u8 *h;
450 __be16 flags;
451 __sum16 csum = 0;
452 __be32 key = 0;
453 u32 seqno = 0;
454 struct ip6_tnl *tunnel; 450 struct ip6_tnl *tunnel;
455 int offset = 4;
456 __be16 gre_proto;
457 int err;
458
459 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
460 goto drop;
461 451
462 ipv6h = ipv6_hdr(skb); 452 ipv6h = ipv6_hdr(skb);
463 h = skb->data;
464 flags = *(__be16 *)h;
465
466 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
467 /* - Version must be 0.
468 - We do not support routing headers.
469 */
470 if (flags&(GRE_VERSION|GRE_ROUTING))
471 goto drop;
472
473 if (flags&GRE_CSUM) {
474 csum = skb_checksum_simple_validate(skb);
475 offset += 4;
476 }
477 if (flags&GRE_KEY) {
478 key = *(__be32 *)(h + offset);
479 offset += 4;
480 }
481 if (flags&GRE_SEQ) {
482 seqno = ntohl(*(__be32 *)(h + offset));
483 offset += 4;
484 }
485 }
486
487 gre_proto = *(__be16 *)(h + 2);
488
489 tunnel = ip6gre_tunnel_lookup(skb->dev, 453 tunnel = ip6gre_tunnel_lookup(skb->dev,
490 &ipv6h->saddr, &ipv6h->daddr, key, 454 &ipv6h->saddr, &ipv6h->daddr, tpi->key,
491 gre_proto); 455 tpi->proto);
492 if (tunnel) { 456 if (tunnel) {
493 struct pcpu_sw_netstats *tstats; 457 ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
494 458
495 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 459 return PACKET_RCVD;
496 goto drop; 460 }
497
498 if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
499 tunnel->dev->stats.rx_dropped++;
500 goto drop;
501 }
502
503 skb->protocol = gre_proto;
504 /* WCCP version 1 and 2 protocol decoding.
505 * - Change protocol to IPv6
506 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
507 */
508 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
509 skb->protocol = htons(ETH_P_IPV6);
510 if ((*(h + offset) & 0xF0) != 0x40)
511 offset += 4;
512 }
513
514 skb->mac_header = skb->network_header;
515 __pskb_pull(skb, offset);
516 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
517
518 if (((flags&GRE_CSUM) && csum) ||
519 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
520 tunnel->dev->stats.rx_crc_errors++;
521 tunnel->dev->stats.rx_errors++;
522 goto drop;
523 }
524 if (tunnel->parms.i_flags&GRE_SEQ) {
525 if (!(flags&GRE_SEQ) ||
526 (tunnel->i_seqno &&
527 (s32)(seqno - tunnel->i_seqno) < 0)) {
528 tunnel->dev->stats.rx_fifo_errors++;
529 tunnel->dev->stats.rx_errors++;
530 goto drop;
531 }
532 tunnel->i_seqno = seqno + 1;
533 }
534
535 /* Warning: All skb pointers will be invalidated! */
536 if (tunnel->dev->type == ARPHRD_ETHER) {
537 if (!pskb_may_pull(skb, ETH_HLEN)) {
538 tunnel->dev->stats.rx_length_errors++;
539 tunnel->dev->stats.rx_errors++;
540 goto drop;
541 }
542
543 ipv6h = ipv6_hdr(skb);
544 skb->protocol = eth_type_trans(skb, tunnel->dev);
545 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
546 }
547
548 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
549 461
550 skb_reset_network_header(skb); 462 return PACKET_REJECT;
463}
551 464
552 err = IP6_ECN_decapsulate(ipv6h, skb); 465static int gre_rcv(struct sk_buff *skb)
553 if (unlikely(err)) { 466{
554 if (log_ecn_error) 467 struct tnl_ptk_info tpi;
555 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", 468 bool csum_err = false;
556 &ipv6h->saddr, 469 int hdr_len;
557 ipv6_get_dsfield(ipv6h));
558 if (err > 1) {
559 ++tunnel->dev->stats.rx_frame_errors;
560 ++tunnel->dev->stats.rx_errors;
561 goto drop;
562 }
563 }
564 470
565 tstats = this_cpu_ptr(tunnel->dev->tstats); 471 hdr_len = gre_parse_header(skb, &tpi, &csum_err);
566 u64_stats_update_begin(&tstats->syncp); 472 if (hdr_len < 0)
567 tstats->rx_packets++; 473 goto drop;
568 tstats->rx_bytes += skb->len;
569 u64_stats_update_end(&tstats->syncp);
570 474
571 netif_rx(skb); 475 if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
476 goto drop;
572 477
478 if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
573 return 0; 479 return 0;
574 }
575 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
576 480
481 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
577drop: 482drop:
578 kfree_skb(skb); 483 kfree_skb(skb);
579 return 0; 484 return 0;
@@ -584,199 +489,40 @@ struct ipv6_tel_txoption {
584 __u8 dst_opt[8]; 489 __u8 dst_opt[8];
585}; 490};
586 491
587static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) 492static int gre_handle_offloads(struct sk_buff *skb, bool csum)
588{ 493{
589 memset(opt, 0, sizeof(struct ipv6_tel_txoption)); 494 return iptunnel_handle_offloads(skb,
590 495 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
591 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
592 opt->dst_opt[3] = 1;
593 opt->dst_opt[4] = encap_limit;
594 opt->dst_opt[5] = IPV6_TLV_PADN;
595 opt->dst_opt[6] = 1;
596
597 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
598 opt->ops.opt_nflen = 8;
599} 496}
600 497
601static __sum16 gre6_checksum(struct sk_buff *skb) 498static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
602{ 499 struct net_device *dev, __u8 dsfield,
603 __wsum csum; 500 struct flowi6 *fl6, int encap_limit,
604 501 __u32 *pmtu, __be16 proto)
605 if (skb->ip_summed == CHECKSUM_PARTIAL)
606 csum = lco_csum(skb);
607 else
608 csum = skb_checksum(skb, sizeof(struct ipv6hdr),
609 skb->len - sizeof(struct ipv6hdr), 0);
610 return csum_fold(csum);
611}
612
613static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
614 struct net_device *dev,
615 __u8 dsfield,
616 struct flowi6 *fl6,
617 int encap_limit,
618 __u32 *pmtu)
619{ 502{
620 struct ip6_tnl *tunnel = netdev_priv(dev); 503 struct ip6_tnl *tunnel = netdev_priv(dev);
621 struct net *net = tunnel->net; 504 __be16 protocol = (dev->type == ARPHRD_ETHER) ?
622 struct net_device *tdev; /* Device to other host */ 505 htons(ETH_P_TEB) : proto;
623 struct ipv6hdr *ipv6h; /* Our new IP header */
624 unsigned int min_headroom = 0; /* The extra header space needed */
625 int gre_hlen;
626 struct ipv6_tel_txoption opt;
627 int mtu;
628 struct dst_entry *dst = NULL, *ndst = NULL;
629 struct net_device_stats *stats = &tunnel->dev->stats;
630 int err = -1;
631 u8 proto;
632 __be16 protocol;
633 506
634 if (dev->type == ARPHRD_ETHER) 507 if (dev->type == ARPHRD_ETHER)
635 IPCB(skb)->flags = 0; 508 IPCB(skb)->flags = 0;
636 509
637 if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { 510 if (dev->header_ops && dev->type == ARPHRD_IP6GRE)
638 gre_hlen = 0; 511 fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr;
639 ipv6h = (struct ipv6hdr *)skb->data; 512 else
640 fl6->daddr = ipv6h->daddr;
641 } else {
642 gre_hlen = tunnel->hlen;
643 fl6->daddr = tunnel->parms.raddr; 513 fl6->daddr = tunnel->parms.raddr;
644 }
645
646 if (!fl6->flowi6_mark)
647 dst = dst_cache_get(&tunnel->dst_cache);
648
649 if (!dst) {
650 dst = ip6_route_output(net, NULL, fl6);
651
652 if (dst->error)
653 goto tx_err_link_failure;
654 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
655 if (IS_ERR(dst)) {
656 err = PTR_ERR(dst);
657 dst = NULL;
658 goto tx_err_link_failure;
659 }
660 ndst = dst;
661 }
662
663 tdev = dst->dev;
664 514
665 if (tdev == dev) { 515 if (tunnel->parms.o_flags & TUNNEL_SEQ)
666 stats->collisions++; 516 tunnel->o_seqno++;
667 net_warn_ratelimited("%s: Local routing loop detected!\n",
668 tunnel->parms.name);
669 goto tx_err_dst_release;
670 }
671
672 mtu = dst_mtu(dst) - sizeof(*ipv6h);
673 if (encap_limit >= 0) {
674 min_headroom += 8;
675 mtu -= 8;
676 }
677 if (mtu < IPV6_MIN_MTU)
678 mtu = IPV6_MIN_MTU;
679 if (skb_dst(skb))
680 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
681 if (skb->len > mtu && !skb_is_gso(skb)) {
682 *pmtu = mtu;
683 err = -EMSGSIZE;
684 goto tx_err_dst_release;
685 }
686
687 if (tunnel->err_count > 0) {
688 if (time_before(jiffies,
689 tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
690 tunnel->err_count--;
691
692 dst_link_failure(skb);
693 } else
694 tunnel->err_count = 0;
695 }
696
697 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
698
699 min_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
700
701 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
702 int head_delta = SKB_DATA_ALIGN(min_headroom -
703 skb_headroom(skb) +
704 16);
705
706 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
707 0, GFP_ATOMIC);
708 if (min_headroom > dev->needed_headroom)
709 dev->needed_headroom = min_headroom;
710 if (unlikely(err))
711 goto tx_err_dst_release;
712 }
713
714 if (!fl6->flowi6_mark && ndst)
715 dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
716 skb_dst_set(skb, dst);
717 517
718 proto = NEXTHDR_GRE; 518 /* Push GRE header. */
719 if (encap_limit >= 0) { 519 gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
720 init_tel_txopt(&opt, encap_limit); 520 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
721 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
722 }
723
724 err = iptunnel_handle_offloads(skb,
725 (tunnel->parms.o_flags & GRE_CSUM) ?
726 SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
727 if (err)
728 goto tx_err_dst_release;
729
730 skb_push(skb, gre_hlen);
731 skb_reset_network_header(skb);
732 skb_set_transport_header(skb, sizeof(*ipv6h));
733
734 /*
735 * Push down and install the IP header.
736 */
737 ipv6h = ipv6_hdr(skb);
738 ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
739 ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
740 ipv6h->hop_limit = tunnel->parms.hop_limit;
741 ipv6h->nexthdr = proto;
742 ipv6h->saddr = fl6->saddr;
743 ipv6h->daddr = fl6->daddr;
744
745 ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
746 protocol = (dev->type == ARPHRD_ETHER) ?
747 htons(ETH_P_TEB) : skb->protocol;
748 ((__be16 *)(ipv6h + 1))[1] = protocol;
749
750 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
751 __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
752
753 if (tunnel->parms.o_flags&GRE_SEQ) {
754 ++tunnel->o_seqno;
755 *ptr = htonl(tunnel->o_seqno);
756 ptr--;
757 }
758 if (tunnel->parms.o_flags&GRE_KEY) {
759 *ptr = tunnel->parms.o_key;
760 ptr--;
761 }
762 if ((tunnel->parms.o_flags & GRE_CSUM) &&
763 !(skb_shinfo(skb)->gso_type &
764 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
765 *ptr = 0;
766 *(__sum16 *)ptr = gre6_checksum(skb);
767 }
768 }
769 521
770 skb_set_inner_protocol(skb, protocol); 522 skb_set_inner_protocol(skb, protocol);
771 523
772 ip6tunnel_xmit(NULL, skb, dev); 524 return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
773 return 0; 525 NEXTHDR_GRE);
774tx_err_link_failure:
775 stats->tx_carrier_errors++;
776 dst_link_failure(skb);
777tx_err_dst_release:
778 dst_release(dst);
779 return err;
780} 526}
781 527
782static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) 528static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
@@ -795,7 +541,6 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
795 encap_limit = t->parms.encap_limit; 541 encap_limit = t->parms.encap_limit;
796 542
797 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 543 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
798 fl6.flowi6_proto = IPPROTO_GRE;
799 544
800 dsfield = ipv4_get_dsfield(iph); 545 dsfield = ipv4_get_dsfield(iph);
801 546
@@ -805,7 +550,12 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
805 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 550 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
806 fl6.flowi6_mark = skb->mark; 551 fl6.flowi6_mark = skb->mark;
807 552
808 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 553 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
554 if (err)
555 return -1;
556
557 err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
558 skb->protocol);
809 if (err != 0) { 559 if (err != 0) {
810 /* XXX: send ICMP error even if DF is not set. */ 560 /* XXX: send ICMP error even if DF is not set. */
811 if (err == -EMSGSIZE) 561 if (err == -EMSGSIZE)
@@ -845,7 +595,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
845 encap_limit = t->parms.encap_limit; 595 encap_limit = t->parms.encap_limit;
846 596
847 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 597 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
848 fl6.flowi6_proto = IPPROTO_GRE;
849 598
850 dsfield = ipv6_get_dsfield(ipv6h); 599 dsfield = ipv6_get_dsfield(ipv6h);
851 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 600 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -855,7 +604,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
855 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 604 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
856 fl6.flowi6_mark = skb->mark; 605 fl6.flowi6_mark = skb->mark;
857 606
858 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 607 if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
608 return -1;
609
610 err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
611 &mtu, skb->protocol);
859 if (err != 0) { 612 if (err != 0) {
860 if (err == -EMSGSIZE) 613 if (err == -EMSGSIZE)
861 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 614 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -899,7 +652,11 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
899 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 652 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
900 fl6.flowi6_proto = skb->protocol; 653 fl6.flowi6_proto = skb->protocol;
901 654
902 err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); 655 err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
656 if (err)
657 return err;
658
659 err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
903 660
904 return err; 661 return err;
905} 662}
@@ -943,7 +700,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
943 struct net_device *dev = t->dev; 700 struct net_device *dev = t->dev;
944 struct __ip6_tnl_parm *p = &t->parms; 701 struct __ip6_tnl_parm *p = &t->parms;
945 struct flowi6 *fl6 = &t->fl.u.ip6; 702 struct flowi6 *fl6 = &t->fl.u.ip6;
946 int addend = sizeof(struct ipv6hdr) + 4; 703 int t_hlen;
947 704
948 if (dev->type != ARPHRD_ETHER) { 705 if (dev->type != ARPHRD_ETHER) {
949 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 706 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -970,16 +727,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
970 else 727 else
971 dev->flags &= ~IFF_POINTOPOINT; 728 dev->flags &= ~IFF_POINTOPOINT;
972 729
973 /* Precalculate GRE options length */ 730 t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
974 if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 731
975 if (t->parms.o_flags&GRE_CSUM) 732 t->hlen = t->tun_hlen;
976 addend += 4; 733
977 if (t->parms.o_flags&GRE_KEY) 734 t_hlen = t->hlen + sizeof(struct ipv6hdr);
978 addend += 4;
979 if (t->parms.o_flags&GRE_SEQ)
980 addend += 4;
981 }
982 t->hlen = addend;
983 735
984 if (p->flags & IP6_TNL_F_CAP_XMIT) { 736 if (p->flags & IP6_TNL_F_CAP_XMIT) {
985 int strict = (ipv6_addr_type(&p->raddr) & 737 int strict = (ipv6_addr_type(&p->raddr) &
@@ -993,10 +745,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
993 return; 745 return;
994 746
995 if (rt->dst.dev) { 747 if (rt->dst.dev) {
996 dev->hard_header_len = rt->dst.dev->hard_header_len + addend; 748 dev->hard_header_len = rt->dst.dev->hard_header_len +
749 t_hlen;
997 750
998 if (set_mtu) { 751 if (set_mtu) {
999 dev->mtu = rt->dst.dev->mtu - addend; 752 dev->mtu = rt->dst.dev->mtu - t_hlen;
1000 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 753 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1001 dev->mtu -= 8; 754 dev->mtu -= 8;
1002 if (dev->type == ARPHRD_ETHER) 755 if (dev->type == ARPHRD_ETHER)
@@ -1042,8 +795,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1042 p->link = u->link; 795 p->link = u->link;
1043 p->i_key = u->i_key; 796 p->i_key = u->i_key;
1044 p->o_key = u->o_key; 797 p->o_key = u->o_key;
1045 p->i_flags = u->i_flags; 798 p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
1046 p->o_flags = u->o_flags; 799 p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
1047 memcpy(p->name, u->name, sizeof(u->name)); 800 memcpy(p->name, u->name, sizeof(u->name));
1048} 801}
1049 802
@@ -1060,8 +813,8 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1060 u->link = p->link; 813 u->link = p->link;
1061 u->i_key = p->i_key; 814 u->i_key = p->i_key;
1062 u->o_key = p->o_key; 815 u->o_key = p->o_key;
1063 u->i_flags = p->i_flags; 816 u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
1064 u->o_flags = p->o_flags; 817 u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
1065 memcpy(u->name, p->name, sizeof(u->name)); 818 memcpy(u->name, p->name, sizeof(u->name));
1066} 819}
1067 820
@@ -1075,6 +828,8 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
1075 struct net *net = t->net; 828 struct net *net = t->net;
1076 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); 829 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1077 830
831 memset(&p1, 0, sizeof(p1));
832
1078 switch (cmd) { 833 switch (cmd) {
1079 case SIOCGETTUNNEL: 834 case SIOCGETTUNNEL:
1080 if (dev == ign->fb_tunnel_dev) { 835 if (dev == ign->fb_tunnel_dev) {
@@ -1174,15 +929,6 @@ done:
1174 return err; 929 return err;
1175} 930}
1176 931
1177static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1178{
1179 if (new_mtu < 68 ||
1180 new_mtu > 0xFFF8 - dev->hard_header_len)
1181 return -EINVAL;
1182 dev->mtu = new_mtu;
1183 return 0;
1184}
1185
1186static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, 932static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1187 unsigned short type, 933 unsigned short type,
1188 const void *daddr, const void *saddr, unsigned int len) 934 const void *daddr, const void *saddr, unsigned int len)
@@ -1226,7 +972,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
1226 .ndo_uninit = ip6gre_tunnel_uninit, 972 .ndo_uninit = ip6gre_tunnel_uninit,
1227 .ndo_start_xmit = ip6gre_tunnel_xmit, 973 .ndo_start_xmit = ip6gre_tunnel_xmit,
1228 .ndo_do_ioctl = ip6gre_tunnel_ioctl, 974 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1229 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 975 .ndo_change_mtu = ip6_tnl_change_mtu,
1230 .ndo_get_stats64 = ip_tunnel_get_stats64, 976 .ndo_get_stats64 = ip_tunnel_get_stats64,
1231 .ndo_get_iflink = ip6_tnl_get_iflink, 977 .ndo_get_iflink = ip6_tnl_get_iflink,
1232}; 978};
@@ -1242,17 +988,11 @@ static void ip6gre_dev_free(struct net_device *dev)
1242 988
1243static void ip6gre_tunnel_setup(struct net_device *dev) 989static void ip6gre_tunnel_setup(struct net_device *dev)
1244{ 990{
1245 struct ip6_tnl *t;
1246
1247 dev->netdev_ops = &ip6gre_netdev_ops; 991 dev->netdev_ops = &ip6gre_netdev_ops;
1248 dev->destructor = ip6gre_dev_free; 992 dev->destructor = ip6gre_dev_free;
1249 993
1250 dev->type = ARPHRD_IP6GRE; 994 dev->type = ARPHRD_IP6GRE;
1251 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; 995
1252 dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
1253 t = netdev_priv(dev);
1254 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1255 dev->mtu -= 8;
1256 dev->flags |= IFF_NOARP; 996 dev->flags |= IFF_NOARP;
1257 dev->addr_len = sizeof(struct in6_addr); 997 dev->addr_len = sizeof(struct in6_addr);
1258 netif_keep_dst(dev); 998 netif_keep_dst(dev);
@@ -1262,6 +1002,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1262{ 1002{
1263 struct ip6_tnl *tunnel; 1003 struct ip6_tnl *tunnel;
1264 int ret; 1004 int ret;
1005 int t_hlen;
1265 1006
1266 tunnel = netdev_priv(dev); 1007 tunnel = netdev_priv(dev);
1267 1008
@@ -1280,6 +1021,17 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1280 return ret; 1021 return ret;
1281 } 1022 }
1282 1023
1024 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1025
1026 tunnel->hlen = tunnel->tun_hlen;
1027
1028 t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
1029
1030 dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1031 dev->mtu = ETH_DATA_LEN - t_hlen;
1032 if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1033 dev->mtu -= 8;
1034
1283 return 0; 1035 return 0;
1284} 1036}
1285 1037
@@ -1318,7 +1070,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
1318 1070
1319 1071
1320static struct inet6_protocol ip6gre_protocol __read_mostly = { 1072static struct inet6_protocol ip6gre_protocol __read_mostly = {
1321 .handler = ip6gre_rcv, 1073 .handler = gre_rcv,
1322 .err_handler = ip6gre_err, 1074 .err_handler = ip6gre_err,
1323 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 1075 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1324}; 1076};
@@ -1462,10 +1214,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
1462 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1214 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1463 1215
1464 if (data[IFLA_GRE_IFLAGS]) 1216 if (data[IFLA_GRE_IFLAGS])
1465 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 1217 parms->i_flags = gre_flags_to_tnl_flags(
1218 nla_get_be16(data[IFLA_GRE_IFLAGS]));
1466 1219
1467 if (data[IFLA_GRE_OFLAGS]) 1220 if (data[IFLA_GRE_OFLAGS])
1468 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 1221 parms->o_flags = gre_flags_to_tnl_flags(
1222 nla_get_be16(data[IFLA_GRE_OFLAGS]));
1469 1223
1470 if (data[IFLA_GRE_IKEY]) 1224 if (data[IFLA_GRE_IKEY])
1471 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1225 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1514,7 +1268,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1514 .ndo_start_xmit = ip6gre_tunnel_xmit, 1268 .ndo_start_xmit = ip6gre_tunnel_xmit,
1515 .ndo_set_mac_address = eth_mac_addr, 1269 .ndo_set_mac_address = eth_mac_addr,
1516 .ndo_validate_addr = eth_validate_addr, 1270 .ndo_validate_addr = eth_validate_addr,
1517 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 1271 .ndo_change_mtu = ip6_tnl_change_mtu,
1518 .ndo_get_stats64 = ip_tunnel_get_stats64, 1272 .ndo_get_stats64 = ip_tunnel_get_stats64,
1519 .ndo_get_iflink = ip6_tnl_get_iflink, 1273 .ndo_get_iflink = ip6_tnl_get_iflink,
1520}; 1274};
@@ -1560,7 +1314,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1560 dev->features |= GRE6_FEATURES; 1314 dev->features |= GRE6_FEATURES;
1561 dev->hw_features |= GRE6_FEATURES; 1315 dev->hw_features |= GRE6_FEATURES;
1562 1316
1563 if (!(nt->parms.o_flags & GRE_SEQ)) { 1317 if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1564 /* TCP segmentation offload is not supported when we 1318 /* TCP segmentation offload is not supported when we
1565 * generate output sequences. 1319 * generate output sequences.
1566 */ 1320 */
@@ -1657,8 +1411,10 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1657 struct __ip6_tnl_parm *p = &t->parms; 1411 struct __ip6_tnl_parm *p = &t->parms;
1658 1412
1659 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1413 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1660 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || 1414 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1661 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || 1415 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1416 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1417 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1662 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1418 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1663 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1419 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1664 nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || 1420 nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c05c425c2389..f185cbcda114 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,13 @@
49 49
50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
51{ 51{
52 /* if ingress device is enslaved to an L3 master device pass the
53 * skb to its handler for processing
54 */
55 skb = l3mdev_ip6_rcv(skb);
56 if (!skb)
57 return NET_RX_SUCCESS;
58
52 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { 59 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
53 const struct inet6_protocol *ipprot; 60 const struct inet6_protocol *ipprot;
54 61
@@ -78,11 +85,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
78 85
79 idev = __in6_dev_get(skb->dev); 86 idev = __in6_dev_get(skb->dev);
80 87
81 IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len); 88 __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
82 89
83 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || 90 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
84 !idev || unlikely(idev->cnf.disable_ipv6)) { 91 !idev || unlikely(idev->cnf.disable_ipv6)) {
85 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); 92 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
86 goto drop; 93 goto drop;
87 } 94 }
88 95
@@ -109,10 +116,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
109 if (hdr->version != 6) 116 if (hdr->version != 6)
110 goto err; 117 goto err;
111 118
112 IP6_ADD_STATS_BH(net, idev, 119 __IP6_ADD_STATS(net, idev,
113 IPSTATS_MIB_NOECTPKTS + 120 IPSTATS_MIB_NOECTPKTS +
114 (ipv6_get_dsfield(hdr) & INET_ECN_MASK), 121 (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
115 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 122 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
116 /* 123 /*
117 * RFC4291 2.5.3 124 * RFC4291 2.5.3
118 * A packet received on an interface with a destination address 125 * A packet received on an interface with a destination address
@@ -169,12 +176,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
169 /* pkt_len may be zero if Jumbo payload option is present */ 176 /* pkt_len may be zero if Jumbo payload option is present */
170 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { 177 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
171 if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { 178 if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
172 IP6_INC_STATS_BH(net, 179 __IP6_INC_STATS(net,
173 idev, IPSTATS_MIB_INTRUNCATEDPKTS); 180 idev, IPSTATS_MIB_INTRUNCATEDPKTS);
174 goto drop; 181 goto drop;
175 } 182 }
176 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { 183 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
177 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 184 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
178 goto drop; 185 goto drop;
179 } 186 }
180 hdr = ipv6_hdr(skb); 187 hdr = ipv6_hdr(skb);
@@ -182,7 +189,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
182 189
183 if (hdr->nexthdr == NEXTHDR_HOP) { 190 if (hdr->nexthdr == NEXTHDR_HOP) {
184 if (ipv6_parse_hopopts(skb) < 0) { 191 if (ipv6_parse_hopopts(skb) < 0) {
185 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 192 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
186 rcu_read_unlock(); 193 rcu_read_unlock();
187 return NET_RX_DROP; 194 return NET_RX_DROP;
188 } 195 }
@@ -197,7 +204,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
197 net, NULL, skb, dev, NULL, 204 net, NULL, skb, dev, NULL,
198 ip6_rcv_finish); 205 ip6_rcv_finish);
199err: 206err:
200 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 207 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
201drop: 208drop:
202 rcu_read_unlock(); 209 rcu_read_unlock();
203 kfree_skb(skb); 210 kfree_skb(skb);
@@ -259,18 +266,18 @@ resubmit:
259 if (ret > 0) 266 if (ret > 0)
260 goto resubmit; 267 goto resubmit;
261 else if (ret == 0) 268 else if (ret == 0)
262 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); 269 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
263 } else { 270 } else {
264 if (!raw) { 271 if (!raw) {
265 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 272 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
266 IP6_INC_STATS_BH(net, idev, 273 __IP6_INC_STATS(net, idev,
267 IPSTATS_MIB_INUNKNOWNPROTOS); 274 IPSTATS_MIB_INUNKNOWNPROTOS);
268 icmpv6_send(skb, ICMPV6_PARAMPROB, 275 icmpv6_send(skb, ICMPV6_PARAMPROB,
269 ICMPV6_UNK_NEXTHDR, nhoff); 276 ICMPV6_UNK_NEXTHDR, nhoff);
270 } 277 }
271 kfree_skb(skb); 278 kfree_skb(skb);
272 } else { 279 } else {
273 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); 280 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
274 consume_skb(skb); 281 consume_skb(skb);
275 } 282 }
276 } 283 }
@@ -278,7 +285,7 @@ resubmit:
278 return 0; 285 return 0;
279 286
280discard: 287discard:
281 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); 288 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
282 rcu_read_unlock(); 289 rcu_read_unlock();
283 kfree_skb(skb); 290 kfree_skb(skb);
284 return 0; 291 return 0;
@@ -297,7 +304,7 @@ int ip6_mc_input(struct sk_buff *skb)
297 const struct ipv6hdr *hdr; 304 const struct ipv6hdr *hdr;
298 bool deliver; 305 bool deliver;
299 306
300 IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), 307 __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
301 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, 308 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
302 skb->len); 309 skb->len);
303 310
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 171518e3ca21..cbf127ae7c67 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -395,8 +395,8 @@ int ip6_forward(struct sk_buff *skb)
395 goto drop; 395 goto drop;
396 396
397 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 397 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
398 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 398 __IP6_INC_STATS(net, ip6_dst_idev(dst),
399 IPSTATS_MIB_INDISCARDS); 399 IPSTATS_MIB_INDISCARDS);
400 goto drop; 400 goto drop;
401 } 401 }
402 402
@@ -427,8 +427,8 @@ int ip6_forward(struct sk_buff *skb)
427 /* Force OUTPUT device used as source address */ 427 /* Force OUTPUT device used as source address */
428 skb->dev = dst->dev; 428 skb->dev = dst->dev;
429 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 429 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
430 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 430 __IP6_INC_STATS(net, ip6_dst_idev(dst),
431 IPSTATS_MIB_INHDRERRORS); 431 IPSTATS_MIB_INHDRERRORS);
432 432
433 kfree_skb(skb); 433 kfree_skb(skb);
434 return -ETIMEDOUT; 434 return -ETIMEDOUT;
@@ -441,15 +441,15 @@ int ip6_forward(struct sk_buff *skb)
441 if (proxied > 0) 441 if (proxied > 0)
442 return ip6_input(skb); 442 return ip6_input(skb);
443 else if (proxied < 0) { 443 else if (proxied < 0) {
444 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 444 __IP6_INC_STATS(net, ip6_dst_idev(dst),
445 IPSTATS_MIB_INDISCARDS); 445 IPSTATS_MIB_INDISCARDS);
446 goto drop; 446 goto drop;
447 } 447 }
448 } 448 }
449 449
450 if (!xfrm6_route_forward(skb)) { 450 if (!xfrm6_route_forward(skb)) {
451 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 451 __IP6_INC_STATS(net, ip6_dst_idev(dst),
452 IPSTATS_MIB_INDISCARDS); 452 IPSTATS_MIB_INDISCARDS);
453 goto drop; 453 goto drop;
454 } 454 }
455 dst = skb_dst(skb); 455 dst = skb_dst(skb);
@@ -505,17 +505,17 @@ int ip6_forward(struct sk_buff *skb)
505 /* Again, force OUTPUT device used as source address */ 505 /* Again, force OUTPUT device used as source address */
506 skb->dev = dst->dev; 506 skb->dev = dst->dev;
507 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 507 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
508 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 508 __IP6_INC_STATS(net, ip6_dst_idev(dst),
509 IPSTATS_MIB_INTOOBIGERRORS); 509 IPSTATS_MIB_INTOOBIGERRORS);
510 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 510 __IP6_INC_STATS(net, ip6_dst_idev(dst),
511 IPSTATS_MIB_FRAGFAILS); 511 IPSTATS_MIB_FRAGFAILS);
512 kfree_skb(skb); 512 kfree_skb(skb);
513 return -EMSGSIZE; 513 return -EMSGSIZE;
514 } 514 }
515 515
516 if (skb_cow(skb, dst->dev->hard_header_len)) { 516 if (skb_cow(skb, dst->dev->hard_header_len)) {
517 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 517 __IP6_INC_STATS(net, ip6_dst_idev(dst),
518 IPSTATS_MIB_OUTDISCARDS); 518 IPSTATS_MIB_OUTDISCARDS);
519 goto drop; 519 goto drop;
520 } 520 }
521 521
@@ -525,14 +525,14 @@ int ip6_forward(struct sk_buff *skb)
525 525
526 hdr->hop_limit--; 526 hdr->hop_limit--;
527 527
528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 528 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 529 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
530 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 530 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
531 net, NULL, skb, skb->dev, dst->dev, 531 net, NULL, skb, skb->dev, dst->dev,
532 ip6_forward_finish); 532 ip6_forward_finish);
533 533
534error: 534error:
535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 535 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
536drop: 536drop:
537 kfree_skb(skb); 537 kfree_skb(skb);
538 return -EINVAL; 538 return -EINVAL;
@@ -1182,12 +1182,12 @@ static void ip6_append_data_mtu(unsigned int *mtu,
1182} 1182}
1183 1183
1184static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1184static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1185 struct inet6_cork *v6_cork, 1185 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1186 int hlimit, int tclass, struct ipv6_txoptions *opt,
1187 struct rt6_info *rt, struct flowi6 *fl6) 1186 struct rt6_info *rt, struct flowi6 *fl6)
1188{ 1187{
1189 struct ipv6_pinfo *np = inet6_sk(sk); 1188 struct ipv6_pinfo *np = inet6_sk(sk);
1190 unsigned int mtu; 1189 unsigned int mtu;
1190 struct ipv6_txoptions *opt = ipc6->opt;
1191 1191
1192 /* 1192 /*
1193 * setup for corking 1193 * setup for corking
@@ -1229,8 +1229,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1229 dst_hold(&rt->dst); 1229 dst_hold(&rt->dst);
1230 cork->base.dst = &rt->dst; 1230 cork->base.dst = &rt->dst;
1231 cork->fl.u.ip6 = *fl6; 1231 cork->fl.u.ip6 = *fl6;
1232 v6_cork->hop_limit = hlimit; 1232 v6_cork->hop_limit = ipc6->hlimit;
1233 v6_cork->tclass = tclass; 1233 v6_cork->tclass = ipc6->tclass;
1234 if (rt->dst.flags & DST_XFRM_TUNNEL) 1234 if (rt->dst.flags & DST_XFRM_TUNNEL)
1235 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1235 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1236 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1236 rt->dst.dev->mtu : dst_mtu(&rt->dst);
@@ -1258,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk,
1258 int getfrag(void *from, char *to, int offset, 1258 int getfrag(void *from, char *to, int offset,
1259 int len, int odd, struct sk_buff *skb), 1259 int len, int odd, struct sk_buff *skb),
1260 void *from, int length, int transhdrlen, 1260 void *from, int length, int transhdrlen,
1261 unsigned int flags, int dontfrag, 1261 unsigned int flags, struct ipcm6_cookie *ipc6,
1262 const struct sockcm_cookie *sockc) 1262 const struct sockcm_cookie *sockc)
1263{ 1263{
1264 struct sk_buff *skb, *skb_prev = NULL; 1264 struct sk_buff *skb, *skb_prev = NULL;
@@ -1298,7 +1298,7 @@ static int __ip6_append_data(struct sock *sk,
1298 sizeof(struct frag_hdr) : 0) + 1298 sizeof(struct frag_hdr) : 0) +
1299 rt->rt6i_nfheader_len; 1299 rt->rt6i_nfheader_len;
1300 1300
1301 if (cork->length + length > mtu - headersize && dontfrag && 1301 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1302 (sk->sk_protocol == IPPROTO_UDP || 1302 (sk->sk_protocol == IPPROTO_UDP ||
1303 sk->sk_protocol == IPPROTO_RAW)) { 1303 sk->sk_protocol == IPPROTO_RAW)) {
1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
@@ -1564,9 +1564,9 @@ error:
1564int ip6_append_data(struct sock *sk, 1564int ip6_append_data(struct sock *sk,
1565 int getfrag(void *from, char *to, int offset, int len, 1565 int getfrag(void *from, char *to, int offset, int len,
1566 int odd, struct sk_buff *skb), 1566 int odd, struct sk_buff *skb),
1567 void *from, int length, int transhdrlen, int hlimit, 1567 void *from, int length, int transhdrlen,
1568 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1568 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1569 struct rt6_info *rt, unsigned int flags, int dontfrag, 1569 struct rt6_info *rt, unsigned int flags,
1570 const struct sockcm_cookie *sockc) 1570 const struct sockcm_cookie *sockc)
1571{ 1571{
1572 struct inet_sock *inet = inet_sk(sk); 1572 struct inet_sock *inet = inet_sk(sk);
@@ -1580,12 +1580,12 @@ int ip6_append_data(struct sock *sk,
1580 /* 1580 /*
1581 * setup for corking 1581 * setup for corking
1582 */ 1582 */
1583 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, 1583 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1584 tclass, opt, rt, fl6); 1584 ipc6, rt, fl6);
1585 if (err) 1585 if (err)
1586 return err; 1586 return err;
1587 1587
1588 exthdrlen = (opt ? opt->opt_flen : 0); 1588 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1589 length += exthdrlen; 1589 length += exthdrlen;
1590 transhdrlen += exthdrlen; 1590 transhdrlen += exthdrlen;
1591 } else { 1591 } else {
@@ -1595,8 +1595,7 @@ int ip6_append_data(struct sock *sk,
1595 1595
1596 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1596 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1597 &np->cork, sk_page_frag(sk), getfrag, 1597 &np->cork, sk_page_frag(sk), getfrag,
1598 from, length, transhdrlen, flags, dontfrag, 1598 from, length, transhdrlen, flags, ipc6, sockc);
1599 sockc);
1600} 1599}
1601EXPORT_SYMBOL_GPL(ip6_append_data); 1600EXPORT_SYMBOL_GPL(ip6_append_data);
1602 1601
@@ -1752,15 +1751,14 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
1752 int getfrag(void *from, char *to, int offset, 1751 int getfrag(void *from, char *to, int offset,
1753 int len, int odd, struct sk_buff *skb), 1752 int len, int odd, struct sk_buff *skb),
1754 void *from, int length, int transhdrlen, 1753 void *from, int length, int transhdrlen,
1755 int hlimit, int tclass, 1754 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1756 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1757 struct rt6_info *rt, unsigned int flags, 1755 struct rt6_info *rt, unsigned int flags,
1758 int dontfrag, const struct sockcm_cookie *sockc) 1756 const struct sockcm_cookie *sockc)
1759{ 1757{
1760 struct inet_cork_full cork; 1758 struct inet_cork_full cork;
1761 struct inet6_cork v6_cork; 1759 struct inet6_cork v6_cork;
1762 struct sk_buff_head queue; 1760 struct sk_buff_head queue;
1763 int exthdrlen = (opt ? opt->opt_flen : 0); 1761 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1764 int err; 1762 int err;
1765 1763
1766 if (flags & MSG_PROBE) 1764 if (flags & MSG_PROBE)
@@ -1772,17 +1770,17 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
1772 cork.base.addr = 0; 1770 cork.base.addr = 0;
1773 cork.base.opt = NULL; 1771 cork.base.opt = NULL;
1774 v6_cork.opt = NULL; 1772 v6_cork.opt = NULL;
1775 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); 1773 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1776 if (err) 1774 if (err)
1777 return ERR_PTR(err); 1775 return ERR_PTR(err);
1778 1776
1779 if (dontfrag < 0) 1777 if (ipc6->dontfrag < 0)
1780 dontfrag = inet6_sk(sk)->dontfrag; 1778 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1781 1779
1782 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, 1780 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1783 &current->task_frag, getfrag, from, 1781 &current->task_frag, getfrag, from,
1784 length + exthdrlen, transhdrlen + exthdrlen, 1782 length + exthdrlen, transhdrlen + exthdrlen,
1785 flags, dontfrag, sockc); 1783 flags, ipc6, sockc);
1786 if (err) { 1784 if (err) {
1787 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); 1785 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1788 return ERR_PTR(err); 1786 return ERR_PTR(err);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1f20345cbc97..e79330f214bd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
238{ 238{
239 struct ip6_tnl *t = netdev_priv(dev); 239 struct ip6_tnl *t = netdev_priv(dev);
240 240
241 gro_cells_destroy(&t->gro_cells);
241 dst_cache_destroy(&t->dst_cache); 242 dst_cache_destroy(&t->dst_cache);
242 free_percpu(dev->tstats); 243 free_percpu(dev->tstats);
243 free_netdev(dev); 244 free_netdev(dev);
@@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
753} 754}
754EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); 755EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
755 756
756/** 757static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
757 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 758 const struct tnl_ptk_info *tpi,
758 * @skb: received socket buffer 759 struct metadata_dst *tun_dst,
759 * @protocol: ethernet protocol ID 760 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
760 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN 761 const struct ipv6hdr *ipv6h,
761 * 762 struct sk_buff *skb),
762 * Return: 0 763 bool log_ecn_err)
763 **/
764
765static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
766 __u8 ipproto,
767 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
768 const struct ipv6hdr *ipv6h,
769 struct sk_buff *skb))
770{ 764{
771 struct ip6_tnl *t; 765 struct pcpu_sw_netstats *tstats;
772 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 766 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
773 u8 tproto;
774 int err; 767 int err;
775 768
776 rcu_read_lock(); 769 if ((!(tpi->flags & TUNNEL_CSUM) &&
777 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); 770 (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
778 if (t) { 771 ((tpi->flags & TUNNEL_CSUM) &&
779 struct pcpu_sw_netstats *tstats; 772 !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
773 tunnel->dev->stats.rx_crc_errors++;
774 tunnel->dev->stats.rx_errors++;
775 goto drop;
776 }
780 777
781 tproto = ACCESS_ONCE(t->parms.proto); 778 if (tunnel->parms.i_flags & TUNNEL_SEQ) {
782 if (tproto != ipproto && tproto != 0) { 779 if (!(tpi->flags & TUNNEL_SEQ) ||
783 rcu_read_unlock(); 780 (tunnel->i_seqno &&
784 goto discard; 781 (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
782 tunnel->dev->stats.rx_fifo_errors++;
783 tunnel->dev->stats.rx_errors++;
784 goto drop;
785 } 785 }
786 tunnel->i_seqno = ntohl(tpi->seq) + 1;
787 }
786 788
787 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 789 skb->protocol = tpi->proto;
788 rcu_read_unlock();
789 goto discard;
790 }
791 790
792 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { 791 /* Warning: All skb pointers will be invalidated! */
793 t->dev->stats.rx_dropped++; 792 if (tunnel->dev->type == ARPHRD_ETHER) {
794 rcu_read_unlock(); 793 if (!pskb_may_pull(skb, ETH_HLEN)) {
795 goto discard; 794 tunnel->dev->stats.rx_length_errors++;
795 tunnel->dev->stats.rx_errors++;
796 goto drop;
796 } 797 }
797 skb->mac_header = skb->network_header; 798
798 skb_reset_network_header(skb); 799 ipv6h = ipv6_hdr(skb);
799 skb->protocol = htons(protocol); 800 skb->protocol = eth_type_trans(skb, tunnel->dev);
800 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 801 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
801 802 } else {
802 __skb_tunnel_rx(skb, t->dev, t->net); 803 skb->dev = tunnel->dev;
803 804 }
804 err = dscp_ecn_decapsulate(t, ipv6h, skb); 805
805 if (unlikely(err)) { 806 skb_reset_network_header(skb);
806 if (log_ecn_error) 807 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
807 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", 808
808 &ipv6h->saddr, 809 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
809 ipv6_get_dsfield(ipv6h)); 810
810 if (err > 1) { 811 err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
811 ++t->dev->stats.rx_frame_errors; 812 if (unlikely(err)) {
812 ++t->dev->stats.rx_errors; 813 if (log_ecn_err)
813 rcu_read_unlock(); 814 net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
814 goto discard; 815 &ipv6h->saddr,
815 } 816 ipv6_get_dsfield(ipv6h));
817 if (err > 1) {
818 ++tunnel->dev->stats.rx_frame_errors;
819 ++tunnel->dev->stats.rx_errors;
820 goto drop;
816 } 821 }
822 }
817 823
818 tstats = this_cpu_ptr(t->dev->tstats); 824 tstats = this_cpu_ptr(tunnel->dev->tstats);
819 u64_stats_update_begin(&tstats->syncp); 825 u64_stats_update_begin(&tstats->syncp);
820 tstats->rx_packets++; 826 tstats->rx_packets++;
821 tstats->rx_bytes += skb->len; 827 tstats->rx_bytes += skb->len;
822 u64_stats_update_end(&tstats->syncp); 828 u64_stats_update_end(&tstats->syncp);
823 829
824 netif_rx(skb); 830 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
825 831
826 rcu_read_unlock(); 832 gro_cells_receive(&tunnel->gro_cells, skb);
827 return 0; 833 return 0;
834
835drop:
836 kfree_skb(skb);
837 return 0;
838}
839
840int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
841 const struct tnl_ptk_info *tpi,
842 struct metadata_dst *tun_dst,
843 bool log_ecn_err)
844{
845 return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
846 log_ecn_err);
847}
848EXPORT_SYMBOL(ip6_tnl_rcv);
849
850static const struct tnl_ptk_info tpi_v6 = {
851 /* no tunnel info required for ipxip6. */
852 .proto = htons(ETH_P_IPV6),
853};
854
855static const struct tnl_ptk_info tpi_v4 = {
856 /* no tunnel info required for ipxip6. */
857 .proto = htons(ETH_P_IP),
858};
859
860static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
861 const struct tnl_ptk_info *tpi,
862 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
863 const struct ipv6hdr *ipv6h,
864 struct sk_buff *skb))
865{
866 struct ip6_tnl *t;
867 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
868 int ret = -1;
869
870 rcu_read_lock();
871 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
872
873 if (t) {
874 u8 tproto = ACCESS_ONCE(t->parms.proto);
875
876 if (tproto != ipproto && tproto != 0)
877 goto drop;
878 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
879 goto drop;
880 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
881 goto drop;
882 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
883 goto drop;
884 ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
885 log_ecn_error);
828 } 886 }
887
829 rcu_read_unlock(); 888 rcu_read_unlock();
830 return 1;
831 889
832discard: 890 return ret;
891
892drop:
893 rcu_read_unlock();
833 kfree_skb(skb); 894 kfree_skb(skb);
834 return 0; 895 return 0;
835} 896}
836 897
837static int ip4ip6_rcv(struct sk_buff *skb) 898static int ip4ip6_rcv(struct sk_buff *skb)
838{ 899{
839 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, 900 return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
840 ip4ip6_dscp_ecn_decapsulate); 901 ip4ip6_dscp_ecn_decapsulate);
841} 902}
842 903
843static int ip6ip6_rcv(struct sk_buff *skb) 904static int ip6ip6_rcv(struct sk_buff *skb)
844{ 905{
845 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, 906 return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
846 ip6ip6_dscp_ecn_decapsulate); 907 ip6ip6_dscp_ecn_decapsulate);
847} 908}
848 909
849struct ipv6_tel_txoption { 910struct ipv6_tel_txoption {
@@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
918EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); 979EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
919 980
920/** 981/**
921 * ip6_tnl_xmit2 - encapsulate packet and send 982 * ip6_tnl_xmit - encapsulate packet and send
922 * @skb: the outgoing socket buffer 983 * @skb: the outgoing socket buffer
923 * @dev: the outgoing tunnel device 984 * @dev: the outgoing tunnel device
924 * @dsfield: dscp code for outer header 985 * @dsfield: dscp code for outer header
925 * @fl: flow of tunneled packet 986 * @fl6: flow of tunneled packet
926 * @encap_limit: encapsulation limit 987 * @encap_limit: encapsulation limit
927 * @pmtu: Path MTU is stored if packet is too big 988 * @pmtu: Path MTU is stored if packet is too big
989 * @proto: next header value
928 * 990 *
929 * Description: 991 * Description:
930 * Build new header and do some sanity checks on the packet before sending 992 * Build new header and do some sanity checks on the packet before sending
@@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
936 * %-EMSGSIZE message too big. return mtu in this case. 998 * %-EMSGSIZE message too big. return mtu in this case.
937 **/ 999 **/
938 1000
939static int ip6_tnl_xmit2(struct sk_buff *skb, 1001int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
940 struct net_device *dev, 1002 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
941 __u8 dsfield, 1003 __u8 proto)
942 struct flowi6 *fl6,
943 int encap_limit,
944 __u32 *pmtu)
945{ 1004{
946 struct ip6_tnl *t = netdev_priv(dev); 1005 struct ip6_tnl *t = netdev_priv(dev);
947 struct net *net = t->net; 1006 struct net *net = t->net;
@@ -952,7 +1011,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
952 struct net_device *tdev; 1011 struct net_device *tdev;
953 int mtu; 1012 int mtu;
954 unsigned int max_headroom = sizeof(struct ipv6hdr); 1013 unsigned int max_headroom = sizeof(struct ipv6hdr);
955 u8 proto;
956 int err = -1; 1014 int err = -1;
957 1015
958 /* NBMA tunnel */ 1016 /* NBMA tunnel */
@@ -1014,12 +1072,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1014 mtu = IPV6_MIN_MTU; 1072 mtu = IPV6_MIN_MTU;
1015 if (skb_dst(skb)) 1073 if (skb_dst(skb))
1016 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1074 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1017 if (skb->len > mtu) { 1075 if (skb->len > mtu && !skb_is_gso(skb)) {
1018 *pmtu = mtu; 1076 *pmtu = mtu;
1019 err = -EMSGSIZE; 1077 err = -EMSGSIZE;
1020 goto tx_err_dst_release; 1078 goto tx_err_dst_release;
1021 } 1079 }
1022 1080
1081 if (t->err_count > 0) {
1082 if (time_before(jiffies,
1083 t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1084 t->err_count--;
1085
1086 dst_link_failure(skb);
1087 } else {
1088 t->err_count = 0;
1089 }
1090 }
1091
1023 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); 1092 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1024 1093
1025 /* 1094 /*
@@ -1045,9 +1114,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1045 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); 1114 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1046 skb_dst_set(skb, dst); 1115 skb_dst_set(skb, dst);
1047 1116
1048 skb->transport_header = skb->network_header;
1049
1050 proto = fl6->flowi6_proto;
1051 if (encap_limit >= 0) { 1117 if (encap_limit >= 0) {
1052 init_tel_txopt(&opt, encap_limit); 1118 init_tel_txopt(&opt, encap_limit);
1053 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 1119 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
@@ -1058,6 +1124,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1058 skb->encapsulation = 1; 1124 skb->encapsulation = 1;
1059 } 1125 }
1060 1126
1127 max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1128 + dst->header_len;
1129 if (max_headroom > dev->needed_headroom)
1130 dev->needed_headroom = max_headroom;
1131
1061 skb_push(skb, sizeof(struct ipv6hdr)); 1132 skb_push(skb, sizeof(struct ipv6hdr));
1062 skb_reset_network_header(skb); 1133 skb_reset_network_header(skb);
1063 ipv6h = ipv6_hdr(skb); 1134 ipv6h = ipv6_hdr(skb);
@@ -1076,6 +1147,7 @@ tx_err_dst_release:
1076 dst_release(dst); 1147 dst_release(dst);
1077 return err; 1148 return err;
1078} 1149}
1150EXPORT_SYMBOL(ip6_tnl_xmit);
1079 1151
1080static inline int 1152static inline int
1081ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1153ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1099,7 +1171,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1099 encap_limit = t->parms.encap_limit; 1171 encap_limit = t->parms.encap_limit;
1100 1172
1101 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 1173 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1102 fl6.flowi6_proto = IPPROTO_IPIP;
1103 1174
1104 dsfield = ipv4_get_dsfield(iph); 1175 dsfield = ipv4_get_dsfield(iph);
1105 1176
@@ -1109,7 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1109 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1180 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1110 fl6.flowi6_mark = skb->mark; 1181 fl6.flowi6_mark = skb->mark;
1111 1182
1112 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1183 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1184 IPPROTO_IPIP);
1113 if (err != 0) { 1185 if (err != 0) {
1114 /* XXX: send ICMP error even if DF is not set. */ 1186 /* XXX: send ICMP error even if DF is not set. */
1115 if (err == -EMSGSIZE) 1187 if (err == -EMSGSIZE)
@@ -1153,7 +1225,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1153 encap_limit = t->parms.encap_limit; 1225 encap_limit = t->parms.encap_limit;
1154 1226
1155 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); 1227 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1156 fl6.flowi6_proto = IPPROTO_IPV6;
1157 1228
1158 dsfield = ipv6_get_dsfield(ipv6h); 1229 dsfield = ipv6_get_dsfield(ipv6h);
1159 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1230 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -1163,7 +1234,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1163 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1234 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1164 fl6.flowi6_mark = skb->mark; 1235 fl6.flowi6_mark = skb->mark;
1165 1236
1166 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1237 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1238 IPPROTO_IPV6);
1167 if (err != 0) { 1239 if (err != 0) {
1168 if (err == -EMSGSIZE) 1240 if (err == -EMSGSIZE)
1169 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1174,7 +1246,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1174} 1246}
1175 1247
1176static netdev_tx_t 1248static netdev_tx_t
1177ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1249ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1178{ 1250{
1179 struct ip6_tnl *t = netdev_priv(dev); 1251 struct ip6_tnl *t = netdev_priv(dev);
1180 struct net_device_stats *stats = &t->dev->stats; 1252 struct net_device_stats *stats = &t->dev->stats;
@@ -1370,6 +1442,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1370 struct net *net = t->net; 1442 struct net *net = t->net;
1371 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1443 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1372 1444
1445 memset(&p1, 0, sizeof(p1));
1446
1373 switch (cmd) { 1447 switch (cmd) {
1374 case SIOCGETTUNNEL: 1448 case SIOCGETTUNNEL:
1375 if (dev == ip6n->fb_tnl_dev) { 1449 if (dev == ip6n->fb_tnl_dev) {
@@ -1464,8 +1538,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1464 * %-EINVAL if mtu too small 1538 * %-EINVAL if mtu too small
1465 **/ 1539 **/
1466 1540
1467static int 1541int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1468ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1469{ 1542{
1470 struct ip6_tnl *tnl = netdev_priv(dev); 1543 struct ip6_tnl *tnl = netdev_priv(dev);
1471 1544
@@ -1481,6 +1554,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1481 dev->mtu = new_mtu; 1554 dev->mtu = new_mtu;
1482 return 0; 1555 return 0;
1483} 1556}
1557EXPORT_SYMBOL(ip6_tnl_change_mtu);
1484 1558
1485int ip6_tnl_get_iflink(const struct net_device *dev) 1559int ip6_tnl_get_iflink(const struct net_device *dev)
1486{ 1560{
@@ -1493,7 +1567,7 @@ EXPORT_SYMBOL(ip6_tnl_get_iflink);
1493static const struct net_device_ops ip6_tnl_netdev_ops = { 1567static const struct net_device_ops ip6_tnl_netdev_ops = {
1494 .ndo_init = ip6_tnl_dev_init, 1568 .ndo_init = ip6_tnl_dev_init,
1495 .ndo_uninit = ip6_tnl_dev_uninit, 1569 .ndo_uninit = ip6_tnl_dev_uninit,
1496 .ndo_start_xmit = ip6_tnl_xmit, 1570 .ndo_start_xmit = ip6_tnl_start_xmit,
1497 .ndo_do_ioctl = ip6_tnl_ioctl, 1571 .ndo_do_ioctl = ip6_tnl_ioctl,
1498 .ndo_change_mtu = ip6_tnl_change_mtu, 1572 .ndo_change_mtu = ip6_tnl_change_mtu,
1499 .ndo_get_stats = ip6_get_stats, 1573 .ndo_get_stats = ip6_get_stats,
@@ -1549,13 +1623,25 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1549 return -ENOMEM; 1623 return -ENOMEM;
1550 1624
1551 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); 1625 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1552 if (ret) { 1626 if (ret)
1553 free_percpu(dev->tstats); 1627 goto free_stats;
1554 dev->tstats = NULL; 1628
1555 return ret; 1629 ret = gro_cells_init(&t->gro_cells, dev);
1556 } 1630 if (ret)
1631 goto destroy_dst;
1632
1633 t->hlen = 0;
1634 t->tun_hlen = 0;
1557 1635
1558 return 0; 1636 return 0;
1637
1638destroy_dst:
1639 dst_cache_destroy(&t->dst_cache);
1640free_stats:
1641 free_percpu(dev->tstats);
1642 dev->tstats = NULL;
1643
1644 return ret;
1559} 1645}
1560 1646
1561/** 1647/**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bf678324fd52..f2e2013f8346 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1984,10 +1984,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1984 1984
1985static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 1985static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1986{ 1986{
1987 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 1987 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1988 IPSTATS_MIB_OUTFORWDATAGRAMS); 1988 IPSTATS_MIB_OUTFORWDATAGRAMS);
1989 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 1989 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1990 IPSTATS_MIB_OUTOCTETS, skb->len); 1990 IPSTATS_MIB_OUTOCTETS, skb->len);
1991 return dst_output(net, sk, skb); 1991 return dst_output(net, sk, skb);
1992} 1992}
1993 1993
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4ff4b29894eb..a9895e15ee9c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -473,7 +473,7 @@ sticky_done:
473 struct msghdr msg; 473 struct msghdr msg;
474 struct flowi6 fl6; 474 struct flowi6 fl6;
475 struct sockcm_cookie sockc_junk; 475 struct sockcm_cookie sockc_junk;
476 int junk; 476 struct ipcm6_cookie ipc6;
477 477
478 memset(&fl6, 0, sizeof(fl6)); 478 memset(&fl6, 0, sizeof(fl6));
479 fl6.flowi6_oif = sk->sk_bound_dev_if; 479 fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -503,9 +503,9 @@ sticky_done:
503 503
504 msg.msg_controllen = optlen; 504 msg.msg_controllen = optlen;
505 msg.msg_control = (void *)(opt+1); 505 msg.msg_control = (void *)(opt+1);
506 ipc6.opt = opt;
506 507
507 retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, 508 retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
508 &junk, &junk, &sockc_junk);
509 if (retv) 509 if (retv)
510 goto done; 510 goto done;
511update: 511update:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73e606c719ef..63e06c3dd319 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,34 +39,12 @@ MODULE_LICENSE("GPL");
39MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 39MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
40MODULE_DESCRIPTION("IPv6 packet filter"); 40MODULE_DESCRIPTION("IPv6 packet filter");
41 41
42/*#define DEBUG_IP_FIREWALL*/
43/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
44/*#define DEBUG_IP_FIREWALL_USER*/
45
46#ifdef DEBUG_IP_FIREWALL
47#define dprintf(format, args...) pr_info(format , ## args)
48#else
49#define dprintf(format, args...)
50#endif
51
52#ifdef DEBUG_IP_FIREWALL_USER
53#define duprintf(format, args...) pr_info(format , ## args)
54#else
55#define duprintf(format, args...)
56#endif
57
58#ifdef CONFIG_NETFILTER_DEBUG 42#ifdef CONFIG_NETFILTER_DEBUG
59#define IP_NF_ASSERT(x) WARN_ON(!(x)) 43#define IP_NF_ASSERT(x) WARN_ON(!(x))
60#else 44#else
61#define IP_NF_ASSERT(x) 45#define IP_NF_ASSERT(x)
62#endif 46#endif
63 47
64#if 0
65/* All the better to debug you with... */
66#define static
67#define inline
68#endif
69
70void *ip6t_alloc_initial_table(const struct xt_table *info) 48void *ip6t_alloc_initial_table(const struct xt_table *info)
71{ 49{
72 return xt_alloc_initial_table(ip6t, IP6T); 50 return xt_alloc_initial_table(ip6t, IP6T);
@@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb,
100 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, 78 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
101 &ip6info->src), IP6T_INV_SRCIP) || 79 &ip6info->src), IP6T_INV_SRCIP) ||
102 FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, 80 FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
103 &ip6info->dst), IP6T_INV_DSTIP)) { 81 &ip6info->dst), IP6T_INV_DSTIP))
104 dprintf("Source or dest mismatch.\n");
105/*
106 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
107 ipinfo->smsk.s_addr, ipinfo->src.s_addr,
108 ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
109 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
110 ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
111 ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
112 return false; 82 return false;
113 }
114 83
115 ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); 84 ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
116 85
117 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { 86 if (FWINV(ret != 0, IP6T_INV_VIA_IN))
118 dprintf("VIA in mismatch (%s vs %s).%s\n",
119 indev, ip6info->iniface,
120 ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
121 return false; 87 return false;
122 }
123 88
124 ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); 89 ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
125 90
126 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { 91 if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
127 dprintf("VIA out mismatch (%s vs %s).%s\n",
128 outdev, ip6info->outiface,
129 ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
130 return false; 92 return false;
131 }
132 93
133/* ... might want to do something with class and flowlabel here ... */ 94/* ... might want to do something with class and flowlabel here ... */
134 95
@@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb,
145 } 106 }
146 *fragoff = _frag_off; 107 *fragoff = _frag_off;
147 108
148 dprintf("Packet protocol %hi ?= %s%hi.\n",
149 protohdr,
150 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
151 ip6info->proto);
152
153 if (ip6info->proto == protohdr) { 109 if (ip6info->proto == protohdr) {
154 if (ip6info->invflags & IP6T_INV_PROTO) 110 if (ip6info->invflags & IP6T_INV_PROTO)
155 return false; 111 return false;
@@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb,
169static bool 125static bool
170ip6_checkentry(const struct ip6t_ip6 *ipv6) 126ip6_checkentry(const struct ip6t_ip6 *ipv6)
171{ 127{
172 if (ipv6->flags & ~IP6T_F_MASK) { 128 if (ipv6->flags & ~IP6T_F_MASK)
173 duprintf("Unknown flag bits set: %08X\n",
174 ipv6->flags & ~IP6T_F_MASK);
175 return false; 129 return false;
176 } 130 if (ipv6->invflags & ~IP6T_INV_MASK)
177 if (ipv6->invflags & ~IP6T_INV_MASK) {
178 duprintf("Unknown invflag bits set: %08X\n",
179 ipv6->invflags & ~IP6T_INV_MASK);
180 return false; 131 return false;
181 } 132
182 return true; 133 return true;
183} 134}
184 135
@@ -446,13 +397,9 @@ ip6t_do_table(struct sk_buff *skb,
446 xt_write_recseq_end(addend); 397 xt_write_recseq_end(addend);
447 local_bh_enable(); 398 local_bh_enable();
448 399
449#ifdef DEBUG_ALLOW_ALL
450 return NF_ACCEPT;
451#else
452 if (acpar.hotdrop) 400 if (acpar.hotdrop)
453 return NF_DROP; 401 return NF_DROP;
454 else return verdict; 402 else return verdict;
455#endif
456} 403}
457 404
458static bool find_jump_target(const struct xt_table_info *t, 405static bool find_jump_target(const struct xt_table_info *t,
@@ -492,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
492 = (void *)ip6t_get_target_c(e); 439 = (void *)ip6t_get_target_c(e);
493 int visited = e->comefrom & (1 << hook); 440 int visited = e->comefrom & (1 << hook);
494 441
495 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 442 if (e->comefrom & (1 << NF_INET_NUMHOOKS))
496 pr_err("iptables: loop hook %u pos %u %08X.\n",
497 hook, pos, e->comefrom);
498 return 0; 443 return 0;
499 } 444
500 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 445 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
501 446
502 /* Unconditional return/END. */ 447 /* Unconditional return/END. */
@@ -508,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
508 453
509 if ((strcmp(t->target.u.user.name, 454 if ((strcmp(t->target.u.user.name,
510 XT_STANDARD_TARGET) == 0) && 455 XT_STANDARD_TARGET) == 0) &&
511 t->verdict < -NF_MAX_VERDICT - 1) { 456 t->verdict < -NF_MAX_VERDICT - 1)
512 duprintf("mark_source_chains: bad "
513 "negative verdict (%i)\n",
514 t->verdict);
515 return 0; 457 return 0;
516 }
517 458
518 /* Return: backtrack through the last 459 /* Return: backtrack through the last
519 big jump. */ 460 big jump. */
520 do { 461 do {
521 e->comefrom ^= (1<<NF_INET_NUMHOOKS); 462 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
522#ifdef DEBUG_IP_FIREWALL_USER
523 if (e->comefrom
524 & (1 << NF_INET_NUMHOOKS)) {
525 duprintf("Back unset "
526 "on hook %u "
527 "rule %u\n",
528 hook, pos);
529 }
530#endif
531 oldpos = pos; 463 oldpos = pos;
532 pos = e->counters.pcnt; 464 pos = e->counters.pcnt;
533 e->counters.pcnt = 0; 465 e->counters.pcnt = 0;
@@ -555,8 +487,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
555 XT_STANDARD_TARGET) == 0 && 487 XT_STANDARD_TARGET) == 0 &&
556 newpos >= 0) { 488 newpos >= 0) {
557 /* This a jump; chase it. */ 489 /* This a jump; chase it. */
558 duprintf("Jump rule %u -> %u\n",
559 pos, newpos);
560 e = (struct ip6t_entry *) 490 e = (struct ip6t_entry *)
561 (entry0 + newpos); 491 (entry0 + newpos);
562 if (!find_jump_target(newinfo, e)) 492 if (!find_jump_target(newinfo, e))
@@ -573,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
573 pos = newpos; 503 pos = newpos;
574 } 504 }
575 } 505 }
576next: 506next: ;
577 duprintf("Finished chain %u\n", hook);
578 } 507 }
579 return 1; 508 return 1;
580} 509}
@@ -595,19 +524,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
595static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) 524static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
596{ 525{
597 const struct ip6t_ip6 *ipv6 = par->entryinfo; 526 const struct ip6t_ip6 *ipv6 = par->entryinfo;
598 int ret;
599 527
600 par->match = m->u.kernel.match; 528 par->match = m->u.kernel.match;
601 par->matchinfo = m->data; 529 par->matchinfo = m->data;
602 530
603 ret = xt_check_match(par, m->u.match_size - sizeof(*m), 531 return xt_check_match(par, m->u.match_size - sizeof(*m),
604 ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); 532 ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
605 if (ret < 0) {
606 duprintf("ip_tables: check failed for `%s'.\n",
607 par.match->name);
608 return ret;
609 }
610 return 0;
611} 533}
612 534
613static int 535static int
@@ -618,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
618 540
619 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, 541 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
620 m->u.user.revision); 542 m->u.user.revision);
621 if (IS_ERR(match)) { 543 if (IS_ERR(match))
622 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
623 return PTR_ERR(match); 544 return PTR_ERR(match);
624 } 545
625 m->u.kernel.match = match; 546 m->u.kernel.match = match;
626 547
627 ret = check_match(m, par); 548 ret = check_match(m, par);
@@ -646,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
646 .hook_mask = e->comefrom, 567 .hook_mask = e->comefrom,
647 .family = NFPROTO_IPV6, 568 .family = NFPROTO_IPV6,
648 }; 569 };
649 int ret;
650 570
651 t = ip6t_get_target(e); 571 t = ip6t_get_target(e);
652 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 572 return xt_check_target(&par, t->u.target_size - sizeof(*t),
653 e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); 573 e->ipv6.proto,
654 if (ret < 0) { 574 e->ipv6.invflags & IP6T_INV_PROTO);
655 duprintf("ip_tables: check failed for `%s'.\n",
656 t->u.kernel.target->name);
657 return ret;
658 }
659 return 0;
660} 575}
661 576
662static int 577static int
@@ -669,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
669 unsigned int j; 584 unsigned int j;
670 struct xt_mtchk_param mtpar; 585 struct xt_mtchk_param mtpar;
671 struct xt_entry_match *ematch; 586 struct xt_entry_match *ematch;
587 unsigned long pcnt;
672 588
673 e->counters.pcnt = xt_percpu_counter_alloc(); 589 pcnt = xt_percpu_counter_alloc();
674 if (IS_ERR_VALUE(e->counters.pcnt)) 590 if (IS_ERR_VALUE(pcnt))
675 return -ENOMEM; 591 return -ENOMEM;
592 e->counters.pcnt = pcnt;
676 593
677 j = 0; 594 j = 0;
678 mtpar.net = net; 595 mtpar.net = net;
@@ -691,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
691 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, 608 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
692 t->u.user.revision); 609 t->u.user.revision);
693 if (IS_ERR(target)) { 610 if (IS_ERR(target)) {
694 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
695 ret = PTR_ERR(target); 611 ret = PTR_ERR(target);
696 goto cleanup_matches; 612 goto cleanup_matches;
697 } 613 }
@@ -744,17 +660,12 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
744 660
745 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || 661 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
746 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || 662 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
747 (unsigned char *)e + e->next_offset > limit) { 663 (unsigned char *)e + e->next_offset > limit)
748 duprintf("Bad offset %p\n", e);
749 return -EINVAL; 664 return -EINVAL;
750 }
751 665
752 if (e->next_offset 666 if (e->next_offset
753 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) { 667 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target))
754 duprintf("checking: element %p size %u\n",
755 e, e->next_offset);
756 return -EINVAL; 668 return -EINVAL;
757 }
758 669
759 if (!ip6_checkentry(&e->ipv6)) 670 if (!ip6_checkentry(&e->ipv6))
760 return -EINVAL; 671 return -EINVAL;
@@ -771,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
771 if ((unsigned char *)e - base == hook_entries[h]) 682 if ((unsigned char *)e - base == hook_entries[h])
772 newinfo->hook_entry[h] = hook_entries[h]; 683 newinfo->hook_entry[h] = hook_entries[h];
773 if ((unsigned char *)e - base == underflows[h]) { 684 if ((unsigned char *)e - base == underflows[h]) {
774 if (!check_underflow(e)) { 685 if (!check_underflow(e))
775 pr_debug("Underflows must be unconditional and "
776 "use the STANDARD target with "
777 "ACCEPT/DROP\n");
778 return -EINVAL; 686 return -EINVAL;
779 } 687
780 newinfo->underflow[h] = underflows[h]; 688 newinfo->underflow[h] = underflows[h];
781 } 689 }
782 } 690 }
@@ -828,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
828 newinfo->underflow[i] = 0xFFFFFFFF; 736 newinfo->underflow[i] = 0xFFFFFFFF;
829 } 737 }
830 738
831 duprintf("translate_table: size %u\n", newinfo->size);
832 i = 0; 739 i = 0;
833 /* Walk through entries, checking offsets. */ 740 /* Walk through entries, checking offsets. */
834 xt_entry_foreach(iter, entry0, newinfo->size) { 741 xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -845,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
845 ++newinfo->stacksize; 752 ++newinfo->stacksize;
846 } 753 }
847 754
848 if (i != repl->num_entries) { 755 if (i != repl->num_entries)
849 duprintf("translate_table: %u not %u entries\n",
850 i, repl->num_entries);
851 return -EINVAL; 756 return -EINVAL;
852 }
853 757
854 /* Check hooks all assigned */ 758 /* Check hooks all assigned */
855 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 759 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
856 /* Only hooks which are valid */ 760 /* Only hooks which are valid */
857 if (!(repl->valid_hooks & (1 << i))) 761 if (!(repl->valid_hooks & (1 << i)))
858 continue; 762 continue;
859 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 763 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
860 duprintf("Invalid hook entry %u %u\n",
861 i, repl->hook_entry[i]);
862 return -EINVAL; 764 return -EINVAL;
863 } 765 if (newinfo->underflow[i] == 0xFFFFFFFF)
864 if (newinfo->underflow[i] == 0xFFFFFFFF) {
865 duprintf("Invalid underflow %u %u\n",
866 i, repl->underflow[i]);
867 return -EINVAL; 766 return -EINVAL;
868 }
869 } 767 }
870 768
871 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 769 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1093,11 +991,8 @@ static int get_info(struct net *net, void __user *user,
1093 struct xt_table *t; 991 struct xt_table *t;
1094 int ret; 992 int ret;
1095 993
1096 if (*len != sizeof(struct ip6t_getinfo)) { 994 if (*len != sizeof(struct ip6t_getinfo))
1097 duprintf("length %u != %zu\n", *len,
1098 sizeof(struct ip6t_getinfo));
1099 return -EINVAL; 995 return -EINVAL;
1100 }
1101 996
1102 if (copy_from_user(name, user, sizeof(name)) != 0) 997 if (copy_from_user(name, user, sizeof(name)) != 0)
1103 return -EFAULT; 998 return -EFAULT;
@@ -1155,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1155 struct ip6t_get_entries get; 1050 struct ip6t_get_entries get;
1156 struct xt_table *t; 1051 struct xt_table *t;
1157 1052
1158 if (*len < sizeof(get)) { 1053 if (*len < sizeof(get))
1159 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1160 return -EINVAL; 1054 return -EINVAL;
1161 }
1162 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1055 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1163 return -EFAULT; 1056 return -EFAULT;
1164 if (*len != sizeof(struct ip6t_get_entries) + get.size) { 1057 if (*len != sizeof(struct ip6t_get_entries) + get.size)
1165 duprintf("get_entries: %u != %zu\n",
1166 *len, sizeof(get) + get.size);
1167 return -EINVAL; 1058 return -EINVAL;
1168 } 1059
1169 get.name[sizeof(get.name) - 1] = '\0'; 1060 get.name[sizeof(get.name) - 1] = '\0';
1170 1061
1171 t = xt_find_table_lock(net, AF_INET6, get.name); 1062 t = xt_find_table_lock(net, AF_INET6, get.name);
1172 if (!IS_ERR_OR_NULL(t)) { 1063 if (!IS_ERR_OR_NULL(t)) {
1173 struct xt_table_info *private = t->private; 1064 struct xt_table_info *private = t->private;
1174 duprintf("t->private->number = %u\n", private->number);
1175 if (get.size == private->size) 1065 if (get.size == private->size)
1176 ret = copy_entries_to_user(private->size, 1066 ret = copy_entries_to_user(private->size,
1177 t, uptr->entrytable); 1067 t, uptr->entrytable);
1178 else { 1068 else
1179 duprintf("get_entries: I've got %u not %u!\n",
1180 private->size, get.size);
1181 ret = -EAGAIN; 1069 ret = -EAGAIN;
1182 } 1070
1183 module_put(t->me); 1071 module_put(t->me);
1184 xt_table_unlock(t); 1072 xt_table_unlock(t);
1185 } else 1073 } else
@@ -1215,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1215 1103
1216 /* You lied! */ 1104 /* You lied! */
1217 if (valid_hooks != t->valid_hooks) { 1105 if (valid_hooks != t->valid_hooks) {
1218 duprintf("Valid hook crap: %08X vs %08X\n",
1219 valid_hooks, t->valid_hooks);
1220 ret = -EINVAL; 1106 ret = -EINVAL;
1221 goto put_module; 1107 goto put_module;
1222 } 1108 }
@@ -1226,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1226 goto put_module; 1112 goto put_module;
1227 1113
1228 /* Update module usage count based on number of rules */ 1114 /* Update module usage count based on number of rules */
1229 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1230 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1231 if ((oldinfo->number > oldinfo->initial_entries) || 1115 if ((oldinfo->number > oldinfo->initial_entries) ||
1232 (newinfo->number <= oldinfo->initial_entries)) 1116 (newinfo->number <= oldinfo->initial_entries))
1233 module_put(t->me); 1117 module_put(t->me);
@@ -1296,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1296 if (ret != 0) 1180 if (ret != 0)
1297 goto free_newinfo; 1181 goto free_newinfo;
1298 1182
1299 duprintf("ip_tables: Translated table\n");
1300
1301 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1183 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1302 tmp.num_counters, tmp.counters); 1184 tmp.num_counters, tmp.counters);
1303 if (ret) 1185 if (ret)
@@ -1422,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m,
1422 1304
1423 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, 1305 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
1424 m->u.user.revision); 1306 m->u.user.revision);
1425 if (IS_ERR(match)) { 1307 if (IS_ERR(match))
1426 duprintf("compat_check_calc_match: `%s' not found\n",
1427 m->u.user.name);
1428 return PTR_ERR(match); 1308 return PTR_ERR(match);
1429 } 1309
1430 m->u.kernel.match = match; 1310 m->u.kernel.match = match;
1431 *size += xt_compat_match_offset(match); 1311 *size += xt_compat_match_offset(match);
1432 return 0; 1312 return 0;
@@ -1458,20 +1338,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1458 unsigned int j; 1338 unsigned int j;
1459 int ret, off; 1339 int ret, off;
1460 1340
1461 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1462 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || 1341 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
1463 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || 1342 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
1464 (unsigned char *)e + e->next_offset > limit) { 1343 (unsigned char *)e + e->next_offset > limit)
1465 duprintf("Bad offset %p, limit = %p\n", e, limit);
1466 return -EINVAL; 1344 return -EINVAL;
1467 }
1468 1345
1469 if (e->next_offset < sizeof(struct compat_ip6t_entry) + 1346 if (e->next_offset < sizeof(struct compat_ip6t_entry) +
1470 sizeof(struct compat_xt_entry_target)) { 1347 sizeof(struct compat_xt_entry_target))
1471 duprintf("checking: element %p size %u\n",
1472 e, e->next_offset);
1473 return -EINVAL; 1348 return -EINVAL;
1474 }
1475 1349
1476 if (!ip6_checkentry(&e->ipv6)) 1350 if (!ip6_checkentry(&e->ipv6))
1477 return -EINVAL; 1351 return -EINVAL;
@@ -1495,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1495 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, 1369 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
1496 t->u.user.revision); 1370 t->u.user.revision);
1497 if (IS_ERR(target)) { 1371 if (IS_ERR(target)) {
1498 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1499 t->u.user.name);
1500 ret = PTR_ERR(target); 1372 ret = PTR_ERR(target);
1501 goto release_matches; 1373 goto release_matches;
1502 } 1374 }
@@ -1575,7 +1447,6 @@ translate_compat_table(struct net *net,
1575 size = compatr->size; 1447 size = compatr->size;
1576 info->number = compatr->num_entries; 1448 info->number = compatr->num_entries;
1577 1449
1578 duprintf("translate_compat_table: size %u\n", info->size);
1579 j = 0; 1450 j = 0;
1580 xt_compat_lock(AF_INET6); 1451 xt_compat_lock(AF_INET6);
1581 xt_compat_init_offsets(AF_INET6, compatr->num_entries); 1452 xt_compat_init_offsets(AF_INET6, compatr->num_entries);
@@ -1590,11 +1461,8 @@ translate_compat_table(struct net *net,
1590 } 1461 }
1591 1462
1592 ret = -EINVAL; 1463 ret = -EINVAL;
1593 if (j != compatr->num_entries) { 1464 if (j != compatr->num_entries)
1594 duprintf("translate_compat_table: %u not %u entries\n",
1595 j, compatr->num_entries);
1596 goto out_unlock; 1465 goto out_unlock;
1597 }
1598 1466
1599 ret = -ENOMEM; 1467 ret = -ENOMEM;
1600 newinfo = xt_alloc_table_info(size); 1468 newinfo = xt_alloc_table_info(size);
@@ -1685,8 +1553,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1685 if (ret != 0) 1553 if (ret != 0)
1686 goto free_newinfo; 1554 goto free_newinfo;
1687 1555
1688 duprintf("compat_do_replace: Translated table\n");
1689
1690 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1556 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1691 tmp.num_counters, compat_ptr(tmp.counters)); 1557 tmp.num_counters, compat_ptr(tmp.counters));
1692 if (ret) 1558 if (ret)
@@ -1720,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1720 break; 1586 break;
1721 1587
1722 default: 1588 default:
1723 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
1724 ret = -EINVAL; 1589 ret = -EINVAL;
1725 } 1590 }
1726 1591
@@ -1770,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1770 struct compat_ip6t_get_entries get; 1635 struct compat_ip6t_get_entries get;
1771 struct xt_table *t; 1636 struct xt_table *t;
1772 1637
1773 if (*len < sizeof(get)) { 1638 if (*len < sizeof(get))
1774 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1775 return -EINVAL; 1639 return -EINVAL;
1776 }
1777 1640
1778 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1641 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1779 return -EFAULT; 1642 return -EFAULT;
1780 1643
1781 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { 1644 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size)
1782 duprintf("compat_get_entries: %u != %zu\n",
1783 *len, sizeof(get) + get.size);
1784 return -EINVAL; 1645 return -EINVAL;
1785 } 1646
1786 get.name[sizeof(get.name) - 1] = '\0'; 1647 get.name[sizeof(get.name) - 1] = '\0';
1787 1648
1788 xt_compat_lock(AF_INET6); 1649 xt_compat_lock(AF_INET6);
@@ -1790,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1790 if (!IS_ERR_OR_NULL(t)) { 1651 if (!IS_ERR_OR_NULL(t)) {
1791 const struct xt_table_info *private = t->private; 1652 const struct xt_table_info *private = t->private;
1792 struct xt_table_info info; 1653 struct xt_table_info info;
1793 duprintf("t->private->number = %u\n", private->number);
1794 ret = compat_table_info(private, &info); 1654 ret = compat_table_info(private, &info);
1795 if (!ret && get.size == info.size) { 1655 if (!ret && get.size == info.size)
1796 ret = compat_copy_entries_to_user(private->size, 1656 ret = compat_copy_entries_to_user(private->size,
1797 t, uptr->entrytable); 1657 t, uptr->entrytable);
1798 } else if (!ret) { 1658 else if (!ret)
1799 duprintf("compat_get_entries: I've got %u not %u!\n",
1800 private->size, get.size);
1801 ret = -EAGAIN; 1659 ret = -EAGAIN;
1802 } 1660
1803 xt_compat_flush_offsets(AF_INET6); 1661 xt_compat_flush_offsets(AF_INET6);
1804 module_put(t->me); 1662 module_put(t->me);
1805 xt_table_unlock(t); 1663 xt_table_unlock(t);
@@ -1852,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1852 break; 1710 break;
1853 1711
1854 default: 1712 default:
1855 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
1856 ret = -EINVAL; 1713 ret = -EINVAL;
1857 } 1714 }
1858 1715
@@ -1904,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1904 } 1761 }
1905 1762
1906 default: 1763 default:
1907 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1908 ret = -EINVAL; 1764 ret = -EINVAL;
1909 } 1765 }
1910 1766
@@ -2006,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
2006 /* We've been asked to examine this packet, and we 1862 /* We've been asked to examine this packet, and we
2007 * can't. Hence, no choice but to drop. 1863 * can't. Hence, no choice but to drop.
2008 */ 1864 */
2009 duprintf("Dropping evil ICMP tinygram.\n");
2010 par->hotdrop = true; 1865 par->hotdrop = true;
2011 return false; 1866 return false;
2012 } 1867 }
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5d778dd11f66..06bed74cf5ee 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -60,7 +60,7 @@ synproxy_send_tcp(struct net *net,
60 fl6.fl6_dport = nth->dest; 60 fl6.fl6_dport = nth->dest;
61 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6)); 61 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
62 dst = ip6_route_output(net, NULL, &fl6); 62 dst = ip6_route_output(net, NULL, &fl6);
63 if (dst == NULL || dst->error) { 63 if (dst->error) {
64 dst_release(dst); 64 dst_release(dst);
65 goto free_nskb; 65 goto free_nskb;
66 } 66 }
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index da1cff79e447..3ee3e444a66b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -58,11 +58,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
58 int iif = 0; 58 int iif = 0;
59 struct flowi6 fl6; 59 struct flowi6 fl6;
60 int err; 60 int err;
61 int hlimit;
62 struct dst_entry *dst; 61 struct dst_entry *dst;
63 struct rt6_info *rt; 62 struct rt6_info *rt;
64 struct pingfakehdr pfh; 63 struct pingfakehdr pfh;
65 struct sockcm_cookie junk = {0}; 64 struct sockcm_cookie junk = {0};
65 struct ipcm6_cookie ipc6;
66 66
67 pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); 67 pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
68 68
@@ -139,13 +139,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
139 pfh.wcheck = 0; 139 pfh.wcheck = 0;
140 pfh.family = AF_INET6; 140 pfh.family = AF_INET6;
141 141
142 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 142 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
143 ipc6.tclass = np->tclass;
144 ipc6.dontfrag = np->dontfrag;
145 ipc6.opt = NULL;
143 146
144 lock_sock(sk); 147 lock_sock(sk);
145 err = ip6_append_data(sk, ping_getfrag, &pfh, len, 148 err = ip6_append_data(sk, ping_getfrag, &pfh, len,
146 0, hlimit, 149 0, &ipc6, &fl6, rt,
147 np->tclass, NULL, &fl6, rt, 150 MSG_DONTWAIT, &junk);
148 MSG_DONTWAIT, np->dontfrag, &junk);
149 151
150 if (err) { 152 if (err) {
151 ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, 153 ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b07ce21983aa..896350df6423 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -746,10 +746,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
746 struct raw6_frag_vec rfv; 746 struct raw6_frag_vec rfv;
747 struct flowi6 fl6; 747 struct flowi6 fl6;
748 struct sockcm_cookie sockc; 748 struct sockcm_cookie sockc;
749 struct ipcm6_cookie ipc6;
749 int addr_len = msg->msg_namelen; 750 int addr_len = msg->msg_namelen;
750 int hlimit = -1;
751 int tclass = -1;
752 int dontfrag = -1;
753 u16 proto; 751 u16 proto;
754 int err; 752 int err;
755 753
@@ -770,6 +768,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
770 768
771 fl6.flowi6_mark = sk->sk_mark; 769 fl6.flowi6_mark = sk->sk_mark;
772 770
771 ipc6.hlimit = -1;
772 ipc6.tclass = -1;
773 ipc6.dontfrag = -1;
774 ipc6.opt = NULL;
775
773 if (sin6) { 776 if (sin6) {
774 if (addr_len < SIN6_LEN_RFC2133) 777 if (addr_len < SIN6_LEN_RFC2133)
775 return -EINVAL; 778 return -EINVAL;
@@ -827,10 +830,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
827 opt = &opt_space; 830 opt = &opt_space;
828 memset(opt, 0, sizeof(struct ipv6_txoptions)); 831 memset(opt, 0, sizeof(struct ipv6_txoptions));
829 opt->tot_len = sizeof(struct ipv6_txoptions); 832 opt->tot_len = sizeof(struct ipv6_txoptions);
833 ipc6.opt = opt;
830 834
831 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, 835 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
832 &hlimit, &tclass, &dontfrag,
833 &sockc);
834 if (err < 0) { 836 if (err < 0) {
835 fl6_sock_release(flowlabel); 837 fl6_sock_release(flowlabel);
836 return err; 838 return err;
@@ -846,7 +848,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
846 if (!opt) { 848 if (!opt) {
847 opt = txopt_get(np); 849 opt = txopt_get(np);
848 opt_to_free = opt; 850 opt_to_free = opt;
849 } 851 }
850 if (flowlabel) 852 if (flowlabel)
851 opt = fl6_merge_options(&opt_space, flowlabel, opt); 853 opt = fl6_merge_options(&opt_space, flowlabel, opt);
852 opt = ipv6_fixup_options(&opt_space, opt); 854 opt = ipv6_fixup_options(&opt_space, opt);
@@ -881,14 +883,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
881 err = PTR_ERR(dst); 883 err = PTR_ERR(dst);
882 goto out; 884 goto out;
883 } 885 }
884 if (hlimit < 0) 886 if (ipc6.hlimit < 0)
885 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 887 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
886 888
887 if (tclass < 0) 889 if (ipc6.tclass < 0)
888 tclass = np->tclass; 890 ipc6.tclass = np->tclass;
889 891
890 if (dontfrag < 0) 892 if (ipc6.dontfrag < 0)
891 dontfrag = np->dontfrag; 893 ipc6.dontfrag = np->dontfrag;
892 894
893 if (msg->msg_flags&MSG_CONFIRM) 895 if (msg->msg_flags&MSG_CONFIRM)
894 goto do_confirm; 896 goto do_confirm;
@@ -897,10 +899,11 @@ back_from_confirm:
897 if (inet->hdrincl) 899 if (inet->hdrincl)
898 err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); 900 err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
899 else { 901 else {
902 ipc6.opt = opt;
900 lock_sock(sk); 903 lock_sock(sk);
901 err = ip6_append_data(sk, raw6_getfrag, &rfv, 904 err = ip6_append_data(sk, raw6_getfrag, &rfv,
902 len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, 905 len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
903 msg->msg_flags, dontfrag, &sockc); 906 msg->msg_flags, &sockc);
904 907
905 if (err) 908 if (err)
906 ip6_flush_pending_frames(sk); 909 ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e2ea31175ef9..2160d5d009cb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -145,12 +145,12 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
145 if (!dev) 145 if (!dev)
146 goto out_rcu_unlock; 146 goto out_rcu_unlock;
147 147
148 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 148 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
149 149
150 if (inet_frag_evicting(&fq->q)) 150 if (inet_frag_evicting(&fq->q))
151 goto out_rcu_unlock; 151 goto out_rcu_unlock;
152 152
153 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 153 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
154 154
155 /* Don't send error if the first segment did not arrive. */ 155 /* Don't send error if the first segment did not arrive. */
156 if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) 156 if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
@@ -223,8 +223,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
223 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 223 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
224 224
225 if ((unsigned int)end > IPV6_MAXPLEN) { 225 if ((unsigned int)end > IPV6_MAXPLEN) {
226 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 226 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
227 IPSTATS_MIB_INHDRERRORS); 227 IPSTATS_MIB_INHDRERRORS);
228 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 228 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
229 ((u8 *)&fhdr->frag_off - 229 ((u8 *)&fhdr->frag_off -
230 skb_network_header(skb))); 230 skb_network_header(skb)));
@@ -258,8 +258,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
258 /* RFC2460 says always send parameter problem in 258 /* RFC2460 says always send parameter problem in
259 * this case. -DaveM 259 * this case. -DaveM
260 */ 260 */
261 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 261 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
262 IPSTATS_MIB_INHDRERRORS); 262 IPSTATS_MIB_INHDRERRORS);
263 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 263 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
264 offsetof(struct ipv6hdr, payload_len)); 264 offsetof(struct ipv6hdr, payload_len));
265 return -1; 265 return -1;
@@ -361,8 +361,8 @@ found:
361discard_fq: 361discard_fq:
362 inet_frag_kill(&fq->q, &ip6_frags); 362 inet_frag_kill(&fq->q, &ip6_frags);
363err: 363err:
364 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 364 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
365 IPSTATS_MIB_REASMFAILS); 365 IPSTATS_MIB_REASMFAILS);
366 kfree_skb(skb); 366 kfree_skb(skb);
367 return -1; 367 return -1;
368} 368}
@@ -500,7 +500,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
500 skb_network_header_len(head)); 500 skb_network_header_len(head));
501 501
502 rcu_read_lock(); 502 rcu_read_lock();
503 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 503 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
504 rcu_read_unlock(); 504 rcu_read_unlock();
505 fq->q.fragments = NULL; 505 fq->q.fragments = NULL;
506 fq->q.fragments_tail = NULL; 506 fq->q.fragments_tail = NULL;
@@ -513,7 +513,7 @@ out_oom:
513 net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); 513 net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
514out_fail: 514out_fail:
515 rcu_read_lock(); 515 rcu_read_lock();
516 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 516 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
517 rcu_read_unlock(); 517 rcu_read_unlock();
518 return -1; 518 return -1;
519} 519}
@@ -528,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
528 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) 528 if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
529 goto fail_hdr; 529 goto fail_hdr;
530 530
531 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 531 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
532 532
533 /* Jumbo payload inhibits frag. header */ 533 /* Jumbo payload inhibits frag. header */
534 if (hdr->payload_len == 0) 534 if (hdr->payload_len == 0)
@@ -544,8 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
544 if (!(fhdr->frag_off & htons(0xFFF9))) { 544 if (!(fhdr->frag_off & htons(0xFFF9))) {
545 /* It is not a fragmented frame */ 545 /* It is not a fragmented frame */
546 skb->transport_header += sizeof(struct frag_hdr); 546 skb->transport_header += sizeof(struct frag_hdr);
547 IP6_INC_STATS_BH(net, 547 __IP6_INC_STATS(net,
548 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); 548 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
549 549
550 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); 550 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
551 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; 551 IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
@@ -566,13 +566,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
566 return ret; 566 return ret;
567 } 567 }
568 568
569 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); 569 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
570 kfree_skb(skb); 570 kfree_skb(skb);
571 return -1; 571 return -1;
572 572
573fail_hdr: 573fail_hdr:
574 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 574 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
575 IPSTATS_MIB_INHDRERRORS); 575 IPSTATS_MIB_INHDRERRORS);
576 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); 576 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
577 return -1; 577 return -1;
578} 578}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d916d6ab9ad2..c42fa1deb152 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1190,7 +1190,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1190 struct dst_entry *dst; 1190 struct dst_entry *dst;
1191 bool any_src; 1191 bool any_src;
1192 1192
1193 dst = l3mdev_rt6_dst_by_oif(net, fl6); 1193 dst = l3mdev_get_rt6_dst(net, fl6);
1194 if (dst) 1194 if (dst)
1195 return dst; 1195 return dst;
1196 1196
@@ -1769,6 +1769,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1769 return -EINVAL; 1769 return -EINVAL;
1770} 1770}
1771 1771
1772static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1773 struct fib6_config *cfg,
1774 const struct in6_addr *gw_addr)
1775{
1776 struct flowi6 fl6 = {
1777 .flowi6_oif = cfg->fc_ifindex,
1778 .daddr = *gw_addr,
1779 .saddr = cfg->fc_prefsrc,
1780 };
1781 struct fib6_table *table;
1782 struct rt6_info *rt;
1783 int flags = 0;
1784
1785 table = fib6_get_table(net, cfg->fc_table);
1786 if (!table)
1787 return NULL;
1788
1789 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1790 flags |= RT6_LOOKUP_F_HAS_SADDR;
1791
1792 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1793
1794 /* if table lookup failed, fall back to full lookup */
1795 if (rt == net->ipv6.ip6_null_entry) {
1796 ip6_rt_put(rt);
1797 rt = NULL;
1798 }
1799
1800 return rt;
1801}
1802
1772static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) 1803static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1773{ 1804{
1774 struct net *net = cfg->fc_nlinfo.nl_net; 1805 struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1940,7 +1971,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1940 rt->rt6i_gateway = *gw_addr; 1971 rt->rt6i_gateway = *gw_addr;
1941 1972
1942 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1973 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1943 struct rt6_info *grt; 1974 struct rt6_info *grt = NULL;
1944 1975
1945 /* IPv6 strictly inhibits using not link-local 1976 /* IPv6 strictly inhibits using not link-local
1946 addresses as nexthop address. 1977 addresses as nexthop address.
@@ -1952,7 +1983,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1952 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1983 if (!(gwa_type & IPV6_ADDR_UNICAST))
1953 goto out; 1984 goto out;
1954 1985
1955 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1986 if (cfg->fc_table)
1987 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1988
1989 if (!grt)
1990 grt = rt6_lookup(net, gw_addr, NULL,
1991 cfg->fc_ifindex, 1);
1956 1992
1957 err = -EHOSTUNREACH; 1993 err = -EHOSTUNREACH;
1958 if (!grt) 1994 if (!grt)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index aab91fa86c5e..59c483937aec 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -155,11 +155,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
155 155
156 mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); 156 mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
157 if (mss == 0) { 157 if (mss == 0) {
158 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); 158 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
159 goto out; 159 goto out;
160 } 160 }
161 161
162 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); 162 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
163 163
164 /* check for timestamp cookie support */ 164 /* check for timestamp cookie support */
165 memset(&tcp_opt, 0, sizeof(tcp_opt)); 165 memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 800265c7fd3f..c4efaa97280c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
336 skb->dev->ifindex); 336 skb->dev->ifindex);
337 337
338 if (!sk) { 338 if (!sk) {
339 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), 339 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
340 ICMP6_MIB_INERRORS); 340 ICMP6_MIB_INERRORS);
341 return; 341 return;
342 } 342 }
343 343
@@ -352,13 +352,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
352 352
353 bh_lock_sock(sk); 353 bh_lock_sock(sk);
354 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 354 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
355 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 355 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
356 356
357 if (sk->sk_state == TCP_CLOSE) 357 if (sk->sk_state == TCP_CLOSE)
358 goto out; 358 goto out;
359 359
360 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { 360 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
361 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 361 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
362 goto out; 362 goto out;
363 } 363 }
364 364
@@ -368,7 +368,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
368 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 368 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
369 if (sk->sk_state != TCP_LISTEN && 369 if (sk->sk_state != TCP_LISTEN &&
370 !between(seq, snd_una, tp->snd_nxt)) { 370 !between(seq, snd_una, tp->snd_nxt)) {
371 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 371 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
372 goto out; 372 goto out;
373 } 373 }
374 374
@@ -649,12 +649,12 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
649 return false; 649 return false;
650 650
651 if (hash_expected && !hash_location) { 651 if (hash_expected && !hash_location) {
652 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 652 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
653 return true; 653 return true;
654 } 654 }
655 655
656 if (!hash_expected && hash_location) { 656 if (!hash_expected && hash_location) {
657 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 657 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
658 return true; 658 return true;
659 } 659 }
660 660
@@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
810 fl6.flowi6_proto = IPPROTO_TCP; 810 fl6.flowi6_proto = IPPROTO_TCP;
811 if (rt6_need_strict(&fl6.daddr) && !oif) 811 if (rt6_need_strict(&fl6.daddr) && !oif)
812 fl6.flowi6_oif = tcp_v6_iif(skb); 812 fl6.flowi6_oif = tcp_v6_iif(skb);
813 else 813 else {
814 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
815 oif = skb->skb_iif;
816
814 fl6.flowi6_oif = oif; 817 fl6.flowi6_oif = oif;
818 }
819
815 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); 820 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
816 fl6.fl6_dport = t1->dest; 821 fl6.fl6_dport = t1->dest;
817 fl6.fl6_sport = t1->source; 822 fl6.fl6_sport = t1->source;
@@ -825,9 +830,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
825 if (!IS_ERR(dst)) { 830 if (!IS_ERR(dst)) {
826 skb_dst_set(buff, dst); 831 skb_dst_set(buff, dst);
827 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); 832 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
828 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 833 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
829 if (rst) 834 if (rst)
830 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 835 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
831 return; 836 return;
832 } 837 }
833 838
@@ -1165,7 +1170,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
1165 return newsk; 1170 return newsk;
1166 1171
1167out_overflow: 1172out_overflow:
1168 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1173 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1169out_nonewsk: 1174out_nonewsk:
1170 dst_release(dst); 1175 dst_release(dst);
1171out: 1176out:
@@ -1276,8 +1281,8 @@ discard:
1276 kfree_skb(skb); 1281 kfree_skb(skb);
1277 return 0; 1282 return 0;
1278csum_err: 1283csum_err:
1279 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 1284 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1280 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1285 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1281 goto discard; 1286 goto discard;
1282 1287
1283 1288
@@ -1359,7 +1364,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1359 /* 1364 /*
1360 * Count it even if it's bad. 1365 * Count it even if it's bad.
1361 */ 1366 */
1362 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 1367 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1363 1368
1364 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1369 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1365 goto discard_it; 1370 goto discard_it;
@@ -1421,7 +1426,7 @@ process:
1421 } 1426 }
1422 } 1427 }
1423 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { 1428 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1424 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1429 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1425 goto discard_and_relse; 1430 goto discard_and_relse;
1426 } 1431 }
1427 1432
@@ -1454,7 +1459,7 @@ process:
1454 } else if (unlikely(sk_add_backlog(sk, skb, 1459 } else if (unlikely(sk_add_backlog(sk, skb,
1455 sk->sk_rcvbuf + sk->sk_sndbuf))) { 1460 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1456 bh_unlock_sock(sk); 1461 bh_unlock_sock(sk);
1457 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1462 __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1458 goto discard_and_relse; 1463 goto discard_and_relse;
1459 } 1464 }
1460 bh_unlock_sock(sk); 1465 bh_unlock_sock(sk);
@@ -1472,9 +1477,9 @@ no_tcp_socket:
1472 1477
1473 if (tcp_checksum_complete(skb)) { 1478 if (tcp_checksum_complete(skb)) {
1474csum_error: 1479csum_error:
1475 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 1480 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1476bad_packet: 1481bad_packet:
1477 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1482 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1478 } else { 1483 } else {
1479 tcp_v6_send_reset(NULL, skb); 1484 tcp_v6_send_reset(NULL, skb);
1480 } 1485 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8d8b2cd8ec5b..aca06094110f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -423,24 +423,22 @@ try_again:
423 if (!peeked) { 423 if (!peeked) {
424 atomic_inc(&sk->sk_drops); 424 atomic_inc(&sk->sk_drops);
425 if (is_udp4) 425 if (is_udp4)
426 UDP_INC_STATS_USER(sock_net(sk), 426 UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
427 UDP_MIB_INERRORS, 427 is_udplite);
428 is_udplite);
429 else 428 else
430 UDP6_INC_STATS_USER(sock_net(sk), 429 UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
431 UDP_MIB_INERRORS, 430 is_udplite);
432 is_udplite);
433 } 431 }
434 skb_free_datagram_locked(sk, skb); 432 skb_free_datagram_locked(sk, skb);
435 return err; 433 return err;
436 } 434 }
437 if (!peeked) { 435 if (!peeked) {
438 if (is_udp4) 436 if (is_udp4)
439 UDP_INC_STATS_USER(sock_net(sk), 437 UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
440 UDP_MIB_INDATAGRAMS, is_udplite); 438 is_udplite);
441 else 439 else
442 UDP6_INC_STATS_USER(sock_net(sk), 440 UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
443 UDP_MIB_INDATAGRAMS, is_udplite); 441 is_udplite);
444 } 442 }
445 443
446 sock_recv_ts_and_drops(msg, sk, skb); 444 sock_recv_ts_and_drops(msg, sk, skb);
@@ -487,15 +485,15 @@ csum_copy_err:
487 slow = lock_sock_fast(sk); 485 slow = lock_sock_fast(sk);
488 if (!skb_kill_datagram(sk, skb, flags)) { 486 if (!skb_kill_datagram(sk, skb, flags)) {
489 if (is_udp4) { 487 if (is_udp4) {
490 UDP_INC_STATS_USER(sock_net(sk), 488 UDP_INC_STATS(sock_net(sk),
491 UDP_MIB_CSUMERRORS, is_udplite); 489 UDP_MIB_CSUMERRORS, is_udplite);
492 UDP_INC_STATS_USER(sock_net(sk), 490 UDP_INC_STATS(sock_net(sk),
493 UDP_MIB_INERRORS, is_udplite); 491 UDP_MIB_INERRORS, is_udplite);
494 } else { 492 } else {
495 UDP6_INC_STATS_USER(sock_net(sk), 493 UDP6_INC_STATS(sock_net(sk),
496 UDP_MIB_CSUMERRORS, is_udplite); 494 UDP_MIB_CSUMERRORS, is_udplite);
497 UDP6_INC_STATS_USER(sock_net(sk), 495 UDP6_INC_STATS(sock_net(sk),
498 UDP_MIB_INERRORS, is_udplite); 496 UDP_MIB_INERRORS, is_udplite);
499 } 497 }
500 } 498 }
501 unlock_sock_fast(sk, slow); 499 unlock_sock_fast(sk, slow);
@@ -523,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
523 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, 521 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
524 inet6_iif(skb), udptable, skb); 522 inet6_iif(skb), udptable, skb);
525 if (!sk) { 523 if (!sk) {
526 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), 524 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
527 ICMP6_MIB_INERRORS); 525 ICMP6_MIB_INERRORS);
528 return; 526 return;
529 } 527 }
530 528
@@ -572,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
572 570
573 /* Note that an ENOMEM error is charged twice */ 571 /* Note that an ENOMEM error is charged twice */
574 if (rc == -ENOMEM) 572 if (rc == -ENOMEM)
575 UDP6_INC_STATS_BH(sock_net(sk), 573 UDP6_INC_STATS(sock_net(sk),
576 UDP_MIB_RCVBUFERRORS, is_udplite); 574 UDP_MIB_RCVBUFERRORS, is_udplite);
577 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 575 UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
578 kfree_skb(skb); 576 kfree_skb(skb);
579 return -1; 577 return -1;
580 } 578 }
@@ -630,9 +628,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
630 628
631 ret = encap_rcv(sk, skb); 629 ret = encap_rcv(sk, skb);
632 if (ret <= 0) { 630 if (ret <= 0) {
633 UDP_INC_STATS_BH(sock_net(sk), 631 __UDP_INC_STATS(sock_net(sk),
634 UDP_MIB_INDATAGRAMS, 632 UDP_MIB_INDATAGRAMS,
635 is_udplite); 633 is_udplite);
636 return -ret; 634 return -ret;
637 } 635 }
638 } 636 }
@@ -666,8 +664,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
666 664
667 udp_csum_pull_header(skb); 665 udp_csum_pull_header(skb);
668 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { 666 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
669 UDP6_INC_STATS_BH(sock_net(sk), 667 __UDP6_INC_STATS(sock_net(sk),
670 UDP_MIB_RCVBUFERRORS, is_udplite); 668 UDP_MIB_RCVBUFERRORS, is_udplite);
671 goto drop; 669 goto drop;
672 } 670 }
673 671
@@ -686,9 +684,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
686 return rc; 684 return rc;
687 685
688csum_error: 686csum_error:
689 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 687 __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
690drop: 688drop:
691 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 689 __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
692 atomic_inc(&sk->sk_drops); 690 atomic_inc(&sk->sk_drops);
693 kfree_skb(skb); 691 kfree_skb(skb);
694 return -1; 692 return -1;
@@ -771,10 +769,10 @@ start_lookup:
771 nskb = skb_clone(skb, GFP_ATOMIC); 769 nskb = skb_clone(skb, GFP_ATOMIC);
772 if (unlikely(!nskb)) { 770 if (unlikely(!nskb)) {
773 atomic_inc(&sk->sk_drops); 771 atomic_inc(&sk->sk_drops);
774 UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS, 772 __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
775 IS_UDPLITE(sk)); 773 IS_UDPLITE(sk));
776 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, 774 __UDP6_INC_STATS(net, UDP_MIB_INERRORS,
777 IS_UDPLITE(sk)); 775 IS_UDPLITE(sk));
778 continue; 776 continue;
779 } 777 }
780 778
@@ -793,8 +791,8 @@ start_lookup:
793 consume_skb(skb); 791 consume_skb(skb);
794 } else { 792 } else {
795 kfree_skb(skb); 793 kfree_skb(skb);
796 UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, 794 __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
797 proto == IPPROTO_UDPLITE); 795 proto == IPPROTO_UDPLITE);
798 } 796 }
799 return 0; 797 return 0;
800} 798}
@@ -887,7 +885,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
887 if (udp_lib_checksum_complete(skb)) 885 if (udp_lib_checksum_complete(skb))
888 goto csum_error; 886 goto csum_error;
889 887
890 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 888 __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
891 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 889 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
892 890
893 kfree_skb(skb); 891 kfree_skb(skb);
@@ -901,9 +899,9 @@ short_packet:
901 daddr, ntohs(uh->dest)); 899 daddr, ntohs(uh->dest));
902 goto discard; 900 goto discard;
903csum_error: 901csum_error:
904 UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); 902 __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
905discard: 903discard:
906 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 904 __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
907 kfree_skb(skb); 905 kfree_skb(skb);
908 return 0; 906 return 0;
909} 907}
@@ -1015,13 +1013,14 @@ send:
1015 err = ip6_send_skb(skb); 1013 err = ip6_send_skb(skb);
1016 if (err) { 1014 if (err) {
1017 if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { 1015 if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
1018 UDP6_INC_STATS_USER(sock_net(sk), 1016 UDP6_INC_STATS(sock_net(sk),
1019 UDP_MIB_SNDBUFERRORS, is_udplite); 1017 UDP_MIB_SNDBUFERRORS, is_udplite);
1020 err = 0; 1018 err = 0;
1021 } 1019 }
1022 } else 1020 } else {
1023 UDP6_INC_STATS_USER(sock_net(sk), 1021 UDP6_INC_STATS(sock_net(sk),
1024 UDP_MIB_OUTDATAGRAMS, is_udplite); 1022 UDP_MIB_OUTDATAGRAMS, is_udplite);
1023 }
1025 return err; 1024 return err;
1026} 1025}
1027 1026
@@ -1065,11 +1064,9 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1065 struct ip6_flowlabel *flowlabel = NULL; 1064 struct ip6_flowlabel *flowlabel = NULL;
1066 struct flowi6 fl6; 1065 struct flowi6 fl6;
1067 struct dst_entry *dst; 1066 struct dst_entry *dst;
1067 struct ipcm6_cookie ipc6;
1068 int addr_len = msg->msg_namelen; 1068 int addr_len = msg->msg_namelen;
1069 int ulen = len; 1069 int ulen = len;
1070 int hlimit = -1;
1071 int tclass = -1;
1072 int dontfrag = -1;
1073 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 1070 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
1074 int err; 1071 int err;
1075 int connected = 0; 1072 int connected = 0;
@@ -1077,6 +1074,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1077 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 1074 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
1078 struct sockcm_cookie sockc; 1075 struct sockcm_cookie sockc;
1079 1076
1077 ipc6.hlimit = -1;
1078 ipc6.tclass = -1;
1079 ipc6.dontfrag = -1;
1080
1080 /* destination address check */ 1081 /* destination address check */
1081 if (sin6) { 1082 if (sin6) {
1082 if (addr_len < offsetof(struct sockaddr, sa_data)) 1083 if (addr_len < offsetof(struct sockaddr, sa_data))
@@ -1201,10 +1202,9 @@ do_udp_sendmsg:
1201 opt = &opt_space; 1202 opt = &opt_space;
1202 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1203 memset(opt, 0, sizeof(struct ipv6_txoptions));
1203 opt->tot_len = sizeof(*opt); 1204 opt->tot_len = sizeof(*opt);
1205 ipc6.opt = opt;
1204 1206
1205 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, 1207 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
1206 &hlimit, &tclass, &dontfrag,
1207 &sockc);
1208 if (err < 0) { 1208 if (err < 0) {
1209 fl6_sock_release(flowlabel); 1209 fl6_sock_release(flowlabel);
1210 return err; 1210 return err;
@@ -1225,6 +1225,7 @@ do_udp_sendmsg:
1225 if (flowlabel) 1225 if (flowlabel)
1226 opt = fl6_merge_options(&opt_space, flowlabel, opt); 1226 opt = fl6_merge_options(&opt_space, flowlabel, opt);
1227 opt = ipv6_fixup_options(&opt_space, opt); 1227 opt = ipv6_fixup_options(&opt_space, opt);
1228 ipc6.opt = opt;
1228 1229
1229 fl6.flowi6_proto = sk->sk_protocol; 1230 fl6.flowi6_proto = sk->sk_protocol;
1230 if (!ipv6_addr_any(daddr)) 1231 if (!ipv6_addr_any(daddr))
@@ -1254,11 +1255,11 @@ do_udp_sendmsg:
1254 goto out; 1255 goto out;
1255 } 1256 }
1256 1257
1257 if (hlimit < 0) 1258 if (ipc6.hlimit < 0)
1258 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 1259 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
1259 1260
1260 if (tclass < 0) 1261 if (ipc6.tclass < 0)
1261 tclass = np->tclass; 1262 ipc6.tclass = np->tclass;
1262 1263
1263 if (msg->msg_flags&MSG_CONFIRM) 1264 if (msg->msg_flags&MSG_CONFIRM)
1264 goto do_confirm; 1265 goto do_confirm;
@@ -1269,9 +1270,9 @@ back_from_confirm:
1269 struct sk_buff *skb; 1270 struct sk_buff *skb;
1270 1271
1271 skb = ip6_make_skb(sk, getfrag, msg, ulen, 1272 skb = ip6_make_skb(sk, getfrag, msg, ulen,
1272 sizeof(struct udphdr), hlimit, tclass, opt, 1273 sizeof(struct udphdr), &ipc6,
1273 &fl6, (struct rt6_info *)dst, 1274 &fl6, (struct rt6_info *)dst,
1274 msg->msg_flags, dontfrag, &sockc); 1275 msg->msg_flags, &sockc);
1275 err = PTR_ERR(skb); 1276 err = PTR_ERR(skb);
1276 if (!IS_ERR_OR_NULL(skb)) 1277 if (!IS_ERR_OR_NULL(skb))
1277 err = udp_v6_send_skb(skb, &fl6); 1278 err = udp_v6_send_skb(skb, &fl6);
@@ -1292,14 +1293,12 @@ back_from_confirm:
1292 up->pending = AF_INET6; 1293 up->pending = AF_INET6;
1293 1294
1294do_append_data: 1295do_append_data:
1295 if (dontfrag < 0) 1296 if (ipc6.dontfrag < 0)
1296 dontfrag = np->dontfrag; 1297 ipc6.dontfrag = np->dontfrag;
1297 up->len += ulen; 1298 up->len += ulen;
1298 err = ip6_append_data(sk, getfrag, msg, ulen, 1299 err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
1299 sizeof(struct udphdr), hlimit, tclass, opt, &fl6, 1300 &ipc6, &fl6, (struct rt6_info *)dst,
1300 (struct rt6_info *)dst, 1301 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
1301 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
1302 &sockc);
1303 if (err) 1302 if (err)
1304 udp_v6_flush_pending_frames(sk); 1303 udp_v6_flush_pending_frames(sk);
1305 else if (!corkreq) 1304 else if (!corkreq)
@@ -1342,8 +1341,8 @@ out:
1342 * seems like overkill. 1341 * seems like overkill.
1343 */ 1342 */
1344 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 1343 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1345 UDP6_INC_STATS_USER(sock_net(sk), 1344 UDP6_INC_STATS(sock_net(sk),
1346 UDP_MIB_SNDBUFERRORS, is_udplite); 1345 UDP_MIB_SNDBUFERRORS, is_udplite);
1347 } 1346 }
1348 return err; 1347 return err;
1349 1348
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index fcfbe579434a..d8b7267280c3 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -181,7 +181,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
181 skb = new_skb; 181 skb = new_skb;
182 } 182 }
183 183
184 dev->trans_start = jiffies; 184 netif_trans_update(dev);
185 185
186 len = skb->len; 186 len = skb->len;
187 /* Now queue the packet in the transport layer */ 187 /* Now queue the packet in the transport layer */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index afca2eb4dfa7..6edfa9980314 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
1376 memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, 1376 memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
1377 sizeof(udp_conf.peer_ip6)); 1377 sizeof(udp_conf.peer_ip6));
1378 udp_conf.use_udp6_tx_checksums = 1378 udp_conf.use_udp6_tx_checksums =
1379 cfg->udp6_zero_tx_checksums; 1379 ! cfg->udp6_zero_tx_checksums;
1380 udp_conf.use_udp6_rx_checksums = 1380 udp_conf.use_udp6_rx_checksums =
1381 cfg->udp6_zero_rx_checksums; 1381 ! cfg->udp6_zero_rx_checksums;
1382 } else 1382 } else
1383#endif 1383#endif
1384 { 1384 {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 46e07267e503..c6f5df1bed12 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -495,10 +495,8 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
495 struct dst_entry *dst = NULL; 495 struct dst_entry *dst = NULL;
496 struct flowi6 fl6; 496 struct flowi6 fl6;
497 struct sockcm_cookie sockc_unused = {0}; 497 struct sockcm_cookie sockc_unused = {0};
498 struct ipcm6_cookie ipc6;
498 int addr_len = msg->msg_namelen; 499 int addr_len = msg->msg_namelen;
499 int hlimit = -1;
500 int tclass = -1;
501 int dontfrag = -1;
502 int transhdrlen = 4; /* zero session-id */ 500 int transhdrlen = 4; /* zero session-id */
503 int ulen = len + transhdrlen; 501 int ulen = len + transhdrlen;
504 int err; 502 int err;
@@ -520,6 +518,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
520 518
521 fl6.flowi6_mark = sk->sk_mark; 519 fl6.flowi6_mark = sk->sk_mark;
522 520
521 ipc6.hlimit = -1;
522 ipc6.tclass = -1;
523 ipc6.dontfrag = -1;
524
523 if (lsa) { 525 if (lsa) {
524 if (addr_len < SIN6_LEN_RFC2133) 526 if (addr_len < SIN6_LEN_RFC2133)
525 return -EINVAL; 527 return -EINVAL;
@@ -564,11 +566,11 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
564 opt = &opt_space; 566 opt = &opt_space;
565 memset(opt, 0, sizeof(struct ipv6_txoptions)); 567 memset(opt, 0, sizeof(struct ipv6_txoptions));
566 opt->tot_len = sizeof(struct ipv6_txoptions); 568 opt->tot_len = sizeof(struct ipv6_txoptions);
569 ipc6.opt = opt;
567 570
568 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, 571 err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
569 &hlimit, &tclass, &dontfrag, 572 &sockc_unused);
570 &sockc_unused); 573 if (err < 0) {
571 if (err < 0) {
572 fl6_sock_release(flowlabel); 574 fl6_sock_release(flowlabel);
573 return err; 575 return err;
574 } 576 }
@@ -588,6 +590,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
588 if (flowlabel) 590 if (flowlabel)
589 opt = fl6_merge_options(&opt_space, flowlabel, opt); 591 opt = fl6_merge_options(&opt_space, flowlabel, opt);
590 opt = ipv6_fixup_options(&opt_space, opt); 592 opt = ipv6_fixup_options(&opt_space, opt);
593 ipc6.opt = opt;
591 594
592 fl6.flowi6_proto = sk->sk_protocol; 595 fl6.flowi6_proto = sk->sk_protocol;
593 if (!ipv6_addr_any(daddr)) 596 if (!ipv6_addr_any(daddr))
@@ -612,14 +615,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
612 goto out; 615 goto out;
613 } 616 }
614 617
615 if (hlimit < 0) 618 if (ipc6.hlimit < 0)
616 hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 619 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
617 620
618 if (tclass < 0) 621 if (ipc6.tclass < 0)
619 tclass = np->tclass; 622 ipc6.tclass = np->tclass;
620 623
621 if (dontfrag < 0) 624 if (ipc6.dontfrag < 0)
622 dontfrag = np->dontfrag; 625 ipc6.dontfrag = np->dontfrag;
623 626
624 if (msg->msg_flags & MSG_CONFIRM) 627 if (msg->msg_flags & MSG_CONFIRM)
625 goto do_confirm; 628 goto do_confirm;
@@ -627,9 +630,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
627back_from_confirm: 630back_from_confirm:
628 lock_sock(sk); 631 lock_sock(sk);
629 err = ip6_append_data(sk, ip_generic_getfrag, msg, 632 err = ip6_append_data(sk, ip_generic_getfrag, msg,
630 ulen, transhdrlen, hlimit, tclass, opt, 633 ulen, transhdrlen, &ipc6,
631 &fl6, (struct rt6_info *)dst, 634 &fl6, (struct rt6_info *)dst,
632 msg->msg_flags, dontfrag, &sockc_unused); 635 msg->msg_flags, &sockc_unused);
633 if (err) 636 if (err)
634 ip6_flush_pending_frames(sk); 637 ip6_flush_pending_frames(sk);
635 else if (!(msg->msg_flags & MSG_MORE)) 638 else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index e925037fa0df..6651a78e100c 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -97,3 +97,66 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
97 return tb_id; 97 return tb_id;
98} 98}
99EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); 99EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
100
101/**
102 * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns
103 * cached route for L3 master device if relevant
104 * to flow
105 * @net: network namespace for device index lookup
106 * @fl6: IPv6 flow struct for lookup
107 */
108
109struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
110 const struct flowi6 *fl6)
111{
112 struct dst_entry *dst = NULL;
113 struct net_device *dev;
114
115 if (fl6->flowi6_oif) {
116 rcu_read_lock();
117
118 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
119 if (dev && netif_is_l3_slave(dev))
120 dev = netdev_master_upper_dev_get_rcu(dev);
121
122 if (dev && netif_is_l3_master(dev) &&
123 dev->l3mdev_ops->l3mdev_get_rt6_dst)
124 dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
125
126 rcu_read_unlock();
127 }
128
129 return dst;
130}
131EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
132
133/**
134 * l3mdev_get_saddr - get source address for a flow based on an interface
135 * enslaved to an L3 master device
136 * @net: network namespace for device index lookup
137 * @ifindex: Interface index
138 * @fl4: IPv4 flow struct
139 */
140
141int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
142{
143 struct net_device *dev;
144 int rc = 0;
145
146 if (ifindex) {
147 rcu_read_lock();
148
149 dev = dev_get_by_index_rcu(net, ifindex);
150 if (dev && netif_is_l3_slave(dev))
151 dev = netdev_master_upper_dev_get_rcu(dev);
152
153 if (dev && netif_is_l3_master(dev) &&
154 dev->l3mdev_ops->l3mdev_get_saddr)
155 rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
156
157 rcu_read_unlock();
158 }
159
160 return rc;
161}
162EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b3c52e3f689a..8ae3ed97d95c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
626 if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { 626 if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
627 struct llc_pktinfo info; 627 struct llc_pktinfo info;
628 628
629 memset(&info, 0, sizeof(info));
629 info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; 630 info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
630 llc_pdu_decode_dsap(skb, &info.lpi_sap); 631 llc_pdu_decode_dsap(skb, &info.lpi_sap);
631 llc_pdu_decode_da(skb, info.lpi_mac); 632 llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6a33f0b4d839..c59af3eb9fa4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1761 1761
1762 ret = dev_alloc_name(ndev, ndev->name); 1762 ret = dev_alloc_name(ndev, ndev->name);
1763 if (ret < 0) { 1763 if (ret < 0) {
1764 free_netdev(ndev); 1764 ieee80211_if_free(ndev);
1765 return ret; 1765 return ret;
1766 } 1766 }
1767 1767
@@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1847 1847
1848 ret = register_netdevice(ndev); 1848 ret = register_netdevice(ndev);
1849 if (ret) { 1849 if (ret) {
1850 free_netdev(ndev); 1850 ieee80211_if_free(ndev);
1851 return ret; 1851 return ret;
1852 } 1852 }
1853 } 1853 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 85ca189bdc3d..2cb3c626cd43 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
105} 105}
106 106
107static void ip_vs_conn_expire(unsigned long data);
107 108
108/* 109/*
109 * Returns hash value for IPVS connection entry 110 * Returns hash value for IPVS connection entry
@@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
453} 454}
454EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 455EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
455 456
457static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
458{
459 __ip_vs_conn_put(cp);
460 ip_vs_conn_expire((unsigned long)cp);
461}
462
456/* 463/*
457 * Put back the conn and restart its timer with its timeout 464 * Put back the conn and restart its timer with its timeout
458 */ 465 */
459void ip_vs_conn_put(struct ip_vs_conn *cp) 466static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
460{ 467{
461 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? 468 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
462 0 : cp->timeout; 469 0 : cp->timeout;
@@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
465 __ip_vs_conn_put(cp); 472 __ip_vs_conn_put(cp);
466} 473}
467 474
475void ip_vs_conn_put(struct ip_vs_conn *cp)
476{
477 if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
478 (atomic_read(&cp->refcnt) == 1) &&
479 !timer_pending(&cp->timer))
480 /* expire connection immediately */
481 __ip_vs_conn_put_notimer(cp);
482 else
483 __ip_vs_conn_put_timer(cp);
484}
468 485
469/* 486/*
470 * Fill a no_client_port connection with a client port number 487 * Fill a no_client_port connection with a client port number
@@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data)
819 if (cp->control) 836 if (cp->control)
820 ip_vs_control_del(cp); 837 ip_vs_control_del(cp);
821 838
822 if (cp->flags & IP_VS_CONN_F_NFCT) { 839 if ((cp->flags & IP_VS_CONN_F_NFCT) &&
840 !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
823 /* Do not access conntracks during subsys cleanup 841 /* Do not access conntracks during subsys cleanup
824 * because nf_conntrack_find_get can not be used after 842 * because nf_conntrack_find_get can not be used after
825 * conntrack cleanup for the net. 843 * conntrack cleanup for the net.
@@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data)
834 ip_vs_unbind_dest(cp); 852 ip_vs_unbind_dest(cp);
835 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 853 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
836 atomic_dec(&ip_vs_conn_no_cport_cnt); 854 atomic_dec(&ip_vs_conn_no_cport_cnt);
837 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); 855 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
856 ip_vs_conn_rcu_free(&cp->rcu_head);
857 else
858 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
838 atomic_dec(&ipvs->conn_count); 859 atomic_dec(&ipvs->conn_count);
839 return; 860 return;
840 } 861 }
@@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data)
850 if (ipvs->sync_state & IP_VS_STATE_MASTER) 871 if (ipvs->sync_state & IP_VS_STATE_MASTER)
851 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); 872 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
852 873
853 ip_vs_conn_put(cp); 874 __ip_vs_conn_put_timer(cp);
854} 875}
855 876
856/* Modify timer, so that it expires as soon as possible. 877/* Modify timer, so that it expires as soon as possible.
@@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1240 return 1; 1261 return 1;
1241} 1262}
1242 1263
1264static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
1265{
1266 struct ip_vs_service *svc;
1267
1268 if (!cp->dest)
1269 return false;
1270 svc = rcu_dereference(cp->dest->svc);
1271 return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET);
1272}
1273
1243/* Called from keventd and must protect itself from softirqs */ 1274/* Called from keventd and must protect itself from softirqs */
1244void ip_vs_random_dropentry(struct netns_ipvs *ipvs) 1275void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1245{ 1276{
@@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1254 unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; 1285 unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask;
1255 1286
1256 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 1287 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
1257 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1258 /* connection template */
1259 continue;
1260 if (cp->ipvs != ipvs) 1288 if (cp->ipvs != ipvs)
1261 continue; 1289 continue;
1290 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
1291 if (atomic_read(&cp->n_control) ||
1292 !ip_vs_conn_ops_mode(cp))
1293 continue;
1294 else
1295 /* connection template of OPS */
1296 goto try_drop;
1297 }
1262 if (cp->protocol == IPPROTO_TCP) { 1298 if (cp->protocol == IPPROTO_TCP) {
1263 switch(cp->state) { 1299 switch(cp->state) {
1264 case IP_VS_TCP_S_SYN_RECV: 1300 case IP_VS_TCP_S_SYN_RECV:
@@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1286 continue; 1322 continue;
1287 } 1323 }
1288 } else { 1324 } else {
1325try_drop:
1289 if (!todrop_entry(cp)) 1326 if (!todrop_entry(cp))
1290 continue; 1327 continue;
1291 } 1328 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b9a4082afa3a..1207f20d24e4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68#ifdef CONFIG_IP_VS_DEBUG 68#ifdef CONFIG_IP_VS_DEBUG
69EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
70#endif 70#endif
71EXPORT_SYMBOL(ip_vs_new_conn_out);
71 72
72static int ip_vs_net_id __read_mostly; 73static int ip_vs_net_id __read_mostly;
73/* netns cnt used for uniqueness */ 74/* netns cnt used for uniqueness */
@@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
611 ret = cp->packet_xmit(skb, cp, pd->pp, iph); 612 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
612 /* do not touch skb anymore */ 613 /* do not touch skb anymore */
613 614
614 atomic_inc(&cp->in_pkts); 615 if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
616 atomic_inc(&cp->control->in_pkts);
617 else
618 atomic_inc(&cp->in_pkts);
615 ip_vs_conn_put(cp); 619 ip_vs_conn_put(cp);
616 return ret; 620 return ret;
617 } 621 }
@@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
1100 } 1104 }
1101} 1105}
1102 1106
1107/* Generic function to create new connections for outgoing RS packets
1108 *
1109 * Pre-requisites for successful connection creation:
1110 * 1) Virtual Service is NOT fwmark based:
1111 * In fwmark-VS actual vaddr and vport are unknown to IPVS
1112 * 2) Real Server and Virtual Service were NOT configured without port:
1113 * This is to allow match of different VS to the same RS ip-addr
1114 */
1115struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
1116 struct ip_vs_dest *dest,
1117 struct sk_buff *skb,
1118 const struct ip_vs_iphdr *iph,
1119 __be16 dport,
1120 __be16 cport)
1121{
1122 struct ip_vs_conn_param param;
1123 struct ip_vs_conn *ct = NULL, *cp = NULL;
1124 const union nf_inet_addr *vaddr, *daddr, *caddr;
1125 union nf_inet_addr snet;
1126 __be16 vport;
1127 unsigned int flags;
1128
1129 EnterFunction(12);
1130 vaddr = &svc->addr;
1131 vport = svc->port;
1132 daddr = &iph->saddr;
1133 caddr = &iph->daddr;
1134
1135 /* check pre-requisites are satisfied */
1136 if (svc->fwmark)
1137 return NULL;
1138 if (!vport || !dport)
1139 return NULL;
1140
1141 /* for persistent service first create connection template */
1142 if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
1143 /* apply netmask the same way ingress-side does */
1144#ifdef CONFIG_IP_VS_IPV6
1145 if (svc->af == AF_INET6)
1146 ipv6_addr_prefix(&snet.in6, &caddr->in6,
1147 (__force __u32)svc->netmask);
1148 else
1149#endif
1150 snet.ip = caddr->ip & svc->netmask;
1151 /* fill params and create template if not existent */
1152 if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
1153 &snet, 0, vaddr,
1154 vport, &param) < 0)
1155 return NULL;
1156 ct = ip_vs_ct_in_get(&param);
1157 if (!ct) {
1158 ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
1159 IP_VS_CONN_F_TEMPLATE, dest, 0);
1160 if (!ct) {
1161 kfree(param.pe_data);
1162 return NULL;
1163 }
1164 ct->timeout = svc->timeout;
1165 } else {
1166 kfree(param.pe_data);
1167 }
1168 }
1169
1170 /* connection flags */
1171 flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
1172 iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
1173 /* create connection */
1174 ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
1175 caddr, cport, vaddr, vport, &param);
1176 cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
1177 if (!cp) {
1178 if (ct)
1179 ip_vs_conn_put(ct);
1180 return NULL;
1181 }
1182 if (ct) {
1183 ip_vs_control_add(cp, ct);
1184 ip_vs_conn_put(ct);
1185 }
1186 ip_vs_conn_stats(cp, svc);
1187
1188 /* return connection (will be used to handle outgoing packet) */
1189 IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
1190 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
1191 ip_vs_fwd_tag(cp),
1192 IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
1193 IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
1194 IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
1195 cp->flags, atomic_read(&cp->refcnt));
1196 LeaveFunction(12);
1197 return cp;
1198}
1199
1200/* Handle outgoing packets which are considered requests initiated by
1201 * real servers, so that subsequent responses from external client can be
1202 * routed to the right real server.
1203 * Used also for outgoing responses in OPS mode.
1204 *
1205 * Connection management is handled by persistent-engine specific callback.
1206 */
1207static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
1208 struct netns_ipvs *ipvs,
1209 int af, struct sk_buff *skb,
1210 const struct ip_vs_iphdr *iph)
1211{
1212 struct ip_vs_dest *dest;
1213 struct ip_vs_conn *cp = NULL;
1214 __be16 _ports[2], *pptr;
1215
1216 if (hooknum == NF_INET_LOCAL_IN)
1217 return NULL;
1218
1219 pptr = frag_safe_skb_hp(skb, iph->len,
1220 sizeof(_ports), _ports, iph);
1221 if (!pptr)
1222 return NULL;
1223
1224 rcu_read_lock();
1225 dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
1226 &iph->saddr, pptr[0]);
1227 if (dest) {
1228 struct ip_vs_service *svc;
1229 struct ip_vs_pe *pe;
1230
1231 svc = rcu_dereference(dest->svc);
1232 if (svc) {
1233 pe = rcu_dereference(svc->pe);
1234 if (pe && pe->conn_out)
1235 cp = pe->conn_out(svc, dest, skb, iph,
1236 pptr[0], pptr[1]);
1237 }
1238 }
1239 rcu_read_unlock();
1240
1241 return cp;
1242}
1243
1103/* Handle response packets: rewrite addresses and send away... 1244/* Handle response packets: rewrite addresses and send away...
1104 */ 1245 */
1105static unsigned int 1246static unsigned int
@@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1245 1386
1246 if (likely(cp)) 1387 if (likely(cp))
1247 return handle_response(af, skb, pd, cp, &iph, hooknum); 1388 return handle_response(af, skb, pd, cp, &iph, hooknum);
1389
1390 /* Check for real-server-started requests */
1391 if (atomic_read(&ipvs->conn_out_counter)) {
1392 /* Currently only for UDP:
1393 * connection oriented protocols typically use
1394 * ephemeral ports for outgoing connections, so
1395 * related incoming responses would not match any VS
1396 */
1397 if (pp->protocol == IPPROTO_UDP) {
1398 cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
1399 if (likely(cp))
1400 return handle_response(af, skb, pd, cp, &iph,
1401 hooknum);
1402 }
1403 }
1404
1248 if (sysctl_nat_icmp_send(ipvs) && 1405 if (sysctl_nat_icmp_send(ipvs) &&
1249 (pp->protocol == IPPROTO_TCP || 1406 (pp->protocol == IPPROTO_TCP ||
1250 pp->protocol == IPPROTO_UDP || 1407 pp->protocol == IPPROTO_UDP ||
@@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1837 1994
1838 if (ipvs->sync_state & IP_VS_STATE_MASTER) 1995 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1839 ip_vs_sync_conn(ipvs, cp, pkts); 1996 ip_vs_sync_conn(ipvs, cp, pkts);
1997 else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
1998 /* increment is done inside ip_vs_sync_conn too */
1999 atomic_inc(&cp->control->in_pkts);
1840 2000
1841 ip_vs_conn_put(cp); 2001 ip_vs_conn_put(cp);
1842 return ret; 2002 return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f35ebc02fa5c..c3c809b2e712 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
567 return false; 567 return false;
568} 568}
569 569
570/* Find real service record by <proto,addr,port>.
571 * In case of multiple records with the same <proto,addr,port>, only
572 * the first found record is returned.
573 *
574 * To be called under RCU lock.
575 */
576struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
577 __u16 protocol,
578 const union nf_inet_addr *daddr,
579 __be16 dport)
580{
581 unsigned int hash;
582 struct ip_vs_dest *dest;
583
584 /* Check for "full" addressed entries */
585 hash = ip_vs_rs_hashkey(af, daddr, dport);
586
587 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
588 if (dest->port == dport &&
589 dest->af == af &&
590 ip_vs_addr_equal(af, &dest->addr, daddr) &&
591 (dest->protocol == protocol || dest->vfwmark)) {
592 /* HIT */
593 return dest;
594 }
595 }
596
597 return NULL;
598}
599
570/* Lookup destination by {addr,port} in the given service 600/* Lookup destination by {addr,port} in the given service
571 * Called under RCU lock. 601 * Called under RCU lock.
572 */ 602 */
@@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1253 atomic_inc(&ipvs->ftpsvc_counter); 1283 atomic_inc(&ipvs->ftpsvc_counter);
1254 else if (svc->port == 0) 1284 else if (svc->port == 0)
1255 atomic_inc(&ipvs->nullsvc_counter); 1285 atomic_inc(&ipvs->nullsvc_counter);
1286 if (svc->pe && svc->pe->conn_out)
1287 atomic_inc(&ipvs->conn_out_counter);
1256 1288
1257 ip_vs_start_estimator(ipvs, &svc->stats); 1289 ip_vs_start_estimator(ipvs, &svc->stats);
1258 1290
@@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1293 struct ip_vs_scheduler *sched = NULL, *old_sched; 1325 struct ip_vs_scheduler *sched = NULL, *old_sched;
1294 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1326 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1295 int ret = 0; 1327 int ret = 0;
1328 bool new_pe_conn_out, old_pe_conn_out;
1296 1329
1297 /* 1330 /*
1298 * Lookup the scheduler, by 'u->sched_name' 1331 * Lookup the scheduler, by 'u->sched_name'
@@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1355 svc->netmask = u->netmask; 1388 svc->netmask = u->netmask;
1356 1389
1357 old_pe = rcu_dereference_protected(svc->pe, 1); 1390 old_pe = rcu_dereference_protected(svc->pe, 1);
1358 if (pe != old_pe) 1391 if (pe != old_pe) {
1359 rcu_assign_pointer(svc->pe, pe); 1392 rcu_assign_pointer(svc->pe, pe);
1393 /* check for optional methods in new pe */
1394 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1395 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1396 if (new_pe_conn_out && !old_pe_conn_out)
1397 atomic_inc(&svc->ipvs->conn_out_counter);
1398 if (old_pe_conn_out && !new_pe_conn_out)
1399 atomic_dec(&svc->ipvs->conn_out_counter);
1400 }
1360 1401
1361out: 1402out:
1362 ip_vs_scheduler_put(old_sched); 1403 ip_vs_scheduler_put(old_sched);
@@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1389 1430
1390 /* Unbind persistence engine, keep svc->pe */ 1431 /* Unbind persistence engine, keep svc->pe */
1391 old_pe = rcu_dereference_protected(svc->pe, 1); 1432 old_pe = rcu_dereference_protected(svc->pe, 1);
1433 if (old_pe && old_pe->conn_out)
1434 atomic_dec(&ipvs->conn_out_counter);
1392 ip_vs_pe_put(old_pe); 1435 ip_vs_pe_put(old_pe);
1393 1436
1394 /* 1437 /*
@@ -3969,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3969 (unsigned long) ipvs); 4012 (unsigned long) ipvs);
3970 atomic_set(&ipvs->ftpsvc_counter, 0); 4013 atomic_set(&ipvs->ftpsvc_counter, 0);
3971 atomic_set(&ipvs->nullsvc_counter, 0); 4014 atomic_set(&ipvs->nullsvc_counter, 0);
4015 atomic_set(&ipvs->conn_out_counter, 0);
3972 4016
3973 /* procfs stats */ 4017 /* procfs stats */
3974 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4018 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 30434fb133df..f04fd8df210b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return; 94 return;
95 95
96 /* Never alter conntrack for OPS conns (no reply is expected) */
97 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
98 return;
99
96 /* Alter reply only in original direction */ 100 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 101 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return; 102 return;
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0a6eb5c0d9e9..d07ef9e31c12 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
143 return cp->pe_data_len; 143 return cp->pe_data_len;
144} 144}
145 145
146static struct ip_vs_conn *
147ip_vs_sip_conn_out(struct ip_vs_service *svc,
148 struct ip_vs_dest *dest,
149 struct sk_buff *skb,
150 const struct ip_vs_iphdr *iph,
151 __be16 dport,
152 __be16 cport)
153{
154 if (likely(iph->protocol == IPPROTO_UDP))
155 return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
156 /* currently no need to handle other than UDP */
157 return NULL;
158}
159
146static struct ip_vs_pe ip_vs_sip_pe = 160static struct ip_vs_pe ip_vs_sip_pe =
147{ 161{
148 .name = "sip", 162 .name = "sip",
@@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe =
153 .ct_match = ip_vs_sip_ct_match, 167 .ct_match = ip_vs_sip_ct_match,
154 .hashkey_raw = ip_vs_sip_hashkey_raw, 168 .hashkey_raw = ip_vs_sip_hashkey_raw,
155 .show_pe_data = ip_vs_sip_show_pe_data, 169 .show_pe_data = ip_vs_sip_show_pe_data,
170 .conn_out = ip_vs_sip_conn_out,
156}; 171};
157 172
158static int __init ip_vs_sip_init(void) 173static int __init ip_vs_sip_init(void)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2fd607408998..566c64e3ec50 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,6 +54,7 @@
54#include <net/netfilter/nf_nat.h> 54#include <net/netfilter/nf_nat.h>
55#include <net/netfilter/nf_nat_core.h> 55#include <net/netfilter/nf_nat_core.h>
56#include <net/netfilter/nf_nat_helper.h> 56#include <net/netfilter/nf_nat_helper.h>
57#include <net/netns/hash.h>
57 58
58#define NF_CONNTRACK_VERSION "0.5.0" 59#define NF_CONNTRACK_VERSION "0.5.0"
59 60
@@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
68__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 69__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
69EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 70EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
70 71
72struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
73EXPORT_SYMBOL_GPL(nf_conntrack_hash);
74
75static __read_mostly struct kmem_cache *nf_conntrack_cachep;
71static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 76static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
77static __read_mostly seqcount_t nf_conntrack_generation;
72static __read_mostly bool nf_conntrack_locks_all; 78static __read_mostly bool nf_conntrack_locks_all;
73 79
74void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 80void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
107 spin_lock_nested(&nf_conntrack_locks[h1], 113 spin_lock_nested(&nf_conntrack_locks[h1],
108 SINGLE_DEPTH_NESTING); 114 SINGLE_DEPTH_NESTING);
109 } 115 }
110 if (read_seqcount_retry(&net->ct.generation, sequence)) { 116 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
111 nf_conntrack_double_unlock(h1, h2); 117 nf_conntrack_double_unlock(h1, h2);
112 return true; 118 return true;
113 } 119 }
@@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
141DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 147DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
142EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 148EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
143 149
144unsigned int nf_conntrack_hash_rnd __read_mostly; 150static unsigned int nf_conntrack_hash_rnd __read_mostly;
145EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
146 151
147static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) 152static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
153 const struct net *net)
148{ 154{
149 unsigned int n; 155 unsigned int n;
156 u32 seed;
157
158 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
150 159
151 /* The direction must be ignored, so we hash everything up to the 160 /* The direction must be ignored, so we hash everything up to the
152 * destination ports (which is a multiple of 4) and treat the last 161 * destination ports (which is a multiple of 4) and treat the last
153 * three bytes manually. 162 * three bytes manually.
154 */ 163 */
164 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
155 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 165 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
156 return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ 166 return jhash2((u32 *)tuple, n, seed ^
157 (((__force __u16)tuple->dst.u.all << 16) | 167 (((__force __u16)tuple->dst.u.all << 16) |
158 tuple->dst.protonum)); 168 tuple->dst.protonum));
159} 169}
160 170
161static u32 __hash_bucket(u32 hash, unsigned int size) 171static u32 scale_hash(u32 hash)
162{
163 return reciprocal_scale(hash, size);
164}
165
166static u32 hash_bucket(u32 hash, const struct net *net)
167{ 172{
168 return __hash_bucket(hash, net->ct.htable_size); 173 return reciprocal_scale(hash, nf_conntrack_htable_size);
169} 174}
170 175
171static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 176static u32 __hash_conntrack(const struct net *net,
172 unsigned int size) 177 const struct nf_conntrack_tuple *tuple,
178 unsigned int size)
173{ 179{
174 return __hash_bucket(hash_conntrack_raw(tuple), size); 180 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
175} 181}
176 182
177static inline u_int32_t hash_conntrack(const struct net *net, 183static u32 hash_conntrack(const struct net *net,
178 const struct nf_conntrack_tuple *tuple) 184 const struct nf_conntrack_tuple *tuple)
179{ 185{
180 return __hash_conntrack(tuple, net->ct.htable_size); 186 return scale_hash(hash_conntrack_raw(tuple, net));
181} 187}
182 188
183bool 189bool
@@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
358 } 364 }
359 rcu_read_lock(); 365 rcu_read_lock();
360 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 366 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
361 if (l4proto && l4proto->destroy) 367 if (l4proto->destroy)
362 l4proto->destroy(ct); 368 l4proto->destroy(ct);
363 369
364 rcu_read_unlock(); 370 rcu_read_unlock();
@@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
393 399
394 local_bh_disable(); 400 local_bh_disable();
395 do { 401 do {
396 sequence = read_seqcount_begin(&net->ct.generation); 402 sequence = read_seqcount_begin(&nf_conntrack_generation);
397 hash = hash_conntrack(net, 403 hash = hash_conntrack(net,
398 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 404 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
399 reply_hash = hash_conntrack(net, 405 reply_hash = hash_conntrack(net,
@@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
445static inline bool 451static inline bool
446nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, 452nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
447 const struct nf_conntrack_tuple *tuple, 453 const struct nf_conntrack_tuple *tuple,
448 const struct nf_conntrack_zone *zone) 454 const struct nf_conntrack_zone *zone,
455 const struct net *net)
449{ 456{
450 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 457 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
451 458
@@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
454 */ 461 */
455 return nf_ct_tuple_equal(tuple, &h->tuple) && 462 return nf_ct_tuple_equal(tuple, &h->tuple) &&
456 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && 463 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
457 nf_ct_is_confirmed(ct); 464 nf_ct_is_confirmed(ct) &&
465 net_eq(net, nf_ct_net(ct));
458} 466}
459 467
460/* 468/*
@@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
467 const struct nf_conntrack_tuple *tuple, u32 hash) 475 const struct nf_conntrack_tuple *tuple, u32 hash)
468{ 476{
469 struct nf_conntrack_tuple_hash *h; 477 struct nf_conntrack_tuple_hash *h;
478 struct hlist_nulls_head *ct_hash;
470 struct hlist_nulls_node *n; 479 struct hlist_nulls_node *n;
471 unsigned int bucket = hash_bucket(hash, net); 480 unsigned int bucket, sequence;
472 481
473 /* Disable BHs the entire time since we normally need to disable them
474 * at least once for the stats anyway.
475 */
476 local_bh_disable();
477begin: 482begin:
478 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { 483 do {
479 if (nf_ct_key_equal(h, tuple, zone)) { 484 sequence = read_seqcount_begin(&nf_conntrack_generation);
480 NF_CT_STAT_INC(net, found); 485 bucket = scale_hash(hash);
481 local_bh_enable(); 486 ct_hash = nf_conntrack_hash;
487 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
488
489 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
490 if (nf_ct_key_equal(h, tuple, zone, net)) {
491 NF_CT_STAT_INC_ATOMIC(net, found);
482 return h; 492 return h;
483 } 493 }
484 NF_CT_STAT_INC(net, searched); 494 NF_CT_STAT_INC_ATOMIC(net, searched);
485 } 495 }
486 /* 496 /*
487 * if the nulls value we got at the end of this lookup is 497 * if the nulls value we got at the end of this lookup is
@@ -489,10 +499,9 @@ begin:
489 * We probably met an item that was moved to another chain. 499 * We probably met an item that was moved to another chain.
490 */ 500 */
491 if (get_nulls_value(n) != bucket) { 501 if (get_nulls_value(n) != bucket) {
492 NF_CT_STAT_INC(net, search_restart); 502 NF_CT_STAT_INC_ATOMIC(net, search_restart);
493 goto begin; 503 goto begin;
494 } 504 }
495 local_bh_enable();
496 505
497 return NULL; 506 return NULL;
498} 507}
@@ -514,7 +523,7 @@ begin:
514 !atomic_inc_not_zero(&ct->ct_general.use))) 523 !atomic_inc_not_zero(&ct->ct_general.use)))
515 h = NULL; 524 h = NULL;
516 else { 525 else {
517 if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { 526 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
518 nf_ct_put(ct); 527 nf_ct_put(ct);
519 goto begin; 528 goto begin;
520 } 529 }
@@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
530 const struct nf_conntrack_tuple *tuple) 539 const struct nf_conntrack_tuple *tuple)
531{ 540{
532 return __nf_conntrack_find_get(net, zone, tuple, 541 return __nf_conntrack_find_get(net, zone, tuple,
533 hash_conntrack_raw(tuple)); 542 hash_conntrack_raw(tuple, net));
534} 543}
535EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 544EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
536 545
@@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
538 unsigned int hash, 547 unsigned int hash,
539 unsigned int reply_hash) 548 unsigned int reply_hash)
540{ 549{
541 struct net *net = nf_ct_net(ct);
542
543 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 550 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
544 &net->ct.hash[hash]); 551 &nf_conntrack_hash[hash]);
545 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 552 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
546 &net->ct.hash[reply_hash]); 553 &nf_conntrack_hash[reply_hash]);
547} 554}
548 555
549int 556int
@@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
560 567
561 local_bh_disable(); 568 local_bh_disable();
562 do { 569 do {
563 sequence = read_seqcount_begin(&net->ct.generation); 570 sequence = read_seqcount_begin(&nf_conntrack_generation);
564 hash = hash_conntrack(net, 571 hash = hash_conntrack(net,
565 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 572 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
566 reply_hash = hash_conntrack(net, 573 reply_hash = hash_conntrack(net,
@@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
568 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 575 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
569 576
570 /* See if there's one in the list already, including reverse */ 577 /* See if there's one in the list already, including reverse */
571 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 578 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
572 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 579 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
573 &h->tuple) && 580 zone, net))
574 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
575 NF_CT_DIRECTION(h)))
576 goto out; 581 goto out;
577 hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 582
578 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 583 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
579 &h->tuple) && 584 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
580 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, 585 zone, net))
581 NF_CT_DIRECTION(h)))
582 goto out; 586 goto out;
583 587
584 add_timer(&ct->timeout); 588 add_timer(&ct->timeout);
@@ -599,6 +603,62 @@ out:
599} 603}
600EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 604EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
601 605
606static inline void nf_ct_acct_update(struct nf_conn *ct,
607 enum ip_conntrack_info ctinfo,
608 unsigned int len)
609{
610 struct nf_conn_acct *acct;
611
612 acct = nf_conn_acct_find(ct);
613 if (acct) {
614 struct nf_conn_counter *counter = acct->counter;
615
616 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
617 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
618 }
619}
620
621static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
622 const struct nf_conn *loser_ct)
623{
624 struct nf_conn_acct *acct;
625
626 acct = nf_conn_acct_find(loser_ct);
627 if (acct) {
628 struct nf_conn_counter *counter = acct->counter;
629 unsigned int bytes;
630
631 /* u32 should be fine since we must have seen one packet. */
632 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
633 nf_ct_acct_update(ct, ctinfo, bytes);
634 }
635}
636
637/* Resolve race on insertion if this protocol allows this. */
638static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
639 enum ip_conntrack_info ctinfo,
640 struct nf_conntrack_tuple_hash *h)
641{
642 /* This is the conntrack entry already in hashes that won race. */
643 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
644 struct nf_conntrack_l4proto *l4proto;
645
646 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
647 if (l4proto->allow_clash &&
648 !nf_ct_is_dying(ct) &&
649 atomic_inc_not_zero(&ct->ct_general.use)) {
650 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
651 nf_conntrack_put(skb->nfct);
652 /* Assign conntrack already in hashes to this skbuff. Don't
653 * modify skb->nfctinfo to ensure consistent stateful filtering.
654 */
655 skb->nfct = &ct->ct_general;
656 return NF_ACCEPT;
657 }
658 NF_CT_STAT_INC(net, drop);
659 return NF_DROP;
660}
661
602/* Confirm a connection given skb; places it in hash table */ 662/* Confirm a connection given skb; places it in hash table */
603int 663int
604__nf_conntrack_confirm(struct sk_buff *skb) 664__nf_conntrack_confirm(struct sk_buff *skb)
@@ -613,6 +673,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
613 enum ip_conntrack_info ctinfo; 673 enum ip_conntrack_info ctinfo;
614 struct net *net; 674 struct net *net;
615 unsigned int sequence; 675 unsigned int sequence;
676 int ret = NF_DROP;
616 677
617 ct = nf_ct_get(skb, &ctinfo); 678 ct = nf_ct_get(skb, &ctinfo);
618 net = nf_ct_net(ct); 679 net = nf_ct_net(ct);
@@ -628,10 +689,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
628 local_bh_disable(); 689 local_bh_disable();
629 690
630 do { 691 do {
631 sequence = read_seqcount_begin(&net->ct.generation); 692 sequence = read_seqcount_begin(&nf_conntrack_generation);
632 /* reuse the hash saved before */ 693 /* reuse the hash saved before */
633 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 694 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
634 hash = hash_bucket(hash, net); 695 hash = scale_hash(hash);
635 reply_hash = hash_conntrack(net, 696 reply_hash = hash_conntrack(net,
636 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 697 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
637 698
@@ -655,23 +716,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
655 */ 716 */
656 nf_ct_del_from_dying_or_unconfirmed_list(ct); 717 nf_ct_del_from_dying_or_unconfirmed_list(ct);
657 718
658 if (unlikely(nf_ct_is_dying(ct))) 719 if (unlikely(nf_ct_is_dying(ct))) {
659 goto out; 720 nf_ct_add_to_dying_list(ct);
721 goto dying;
722 }
660 723
661 /* See if there's one in the list already, including reverse: 724 /* See if there's one in the list already, including reverse:
662 NAT could have grabbed it without realizing, since we're 725 NAT could have grabbed it without realizing, since we're
663 not in the hash. If there is, we lost race. */ 726 not in the hash. If there is, we lost race. */
664 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 727 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
665 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 728 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
666 &h->tuple) && 729 zone, net))
667 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
668 NF_CT_DIRECTION(h)))
669 goto out; 730 goto out;
670 hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 731
671 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 732 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
672 &h->tuple) && 733 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
673 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, 734 zone, net))
674 NF_CT_DIRECTION(h)))
675 goto out; 735 goto out;
676 736
677 /* Timer relative to confirmation time, not original 737 /* Timer relative to confirmation time, not original
@@ -710,10 +770,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
710 770
711out: 771out:
712 nf_ct_add_to_dying_list(ct); 772 nf_ct_add_to_dying_list(ct);
773 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
774dying:
713 nf_conntrack_double_unlock(hash, reply_hash); 775 nf_conntrack_double_unlock(hash, reply_hash);
714 NF_CT_STAT_INC(net, insert_failed); 776 NF_CT_STAT_INC(net, insert_failed);
715 local_bh_enable(); 777 local_bh_enable();
716 return NF_DROP; 778 return ret;
717} 779}
718EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 780EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
719 781
@@ -726,29 +788,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
726 struct net *net = nf_ct_net(ignored_conntrack); 788 struct net *net = nf_ct_net(ignored_conntrack);
727 const struct nf_conntrack_zone *zone; 789 const struct nf_conntrack_zone *zone;
728 struct nf_conntrack_tuple_hash *h; 790 struct nf_conntrack_tuple_hash *h;
791 struct hlist_nulls_head *ct_hash;
792 unsigned int hash, sequence;
729 struct hlist_nulls_node *n; 793 struct hlist_nulls_node *n;
730 struct nf_conn *ct; 794 struct nf_conn *ct;
731 unsigned int hash;
732 795
733 zone = nf_ct_zone(ignored_conntrack); 796 zone = nf_ct_zone(ignored_conntrack);
734 hash = hash_conntrack(net, tuple);
735 797
736 /* Disable BHs the entire time since we need to disable them at 798 rcu_read_lock();
737 * least once for the stats anyway. 799 do {
738 */ 800 sequence = read_seqcount_begin(&nf_conntrack_generation);
739 rcu_read_lock_bh(); 801 hash = hash_conntrack(net, tuple);
740 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 802 ct_hash = nf_conntrack_hash;
803 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
804
805 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
741 ct = nf_ct_tuplehash_to_ctrack(h); 806 ct = nf_ct_tuplehash_to_ctrack(h);
742 if (ct != ignored_conntrack && 807 if (ct != ignored_conntrack &&
743 nf_ct_tuple_equal(tuple, &h->tuple) && 808 nf_ct_key_equal(h, tuple, zone, net)) {
744 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { 809 NF_CT_STAT_INC_ATOMIC(net, found);
745 NF_CT_STAT_INC(net, found); 810 rcu_read_unlock();
746 rcu_read_unlock_bh();
747 return 1; 811 return 1;
748 } 812 }
749 NF_CT_STAT_INC(net, searched); 813 NF_CT_STAT_INC_ATOMIC(net, searched);
750 } 814 }
751 rcu_read_unlock_bh(); 815 rcu_read_unlock();
752 816
753 return 0; 817 return 0;
754} 818}
@@ -762,71 +826,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
762{ 826{
763 /* Use oldest entry, which is roughly LRU */ 827 /* Use oldest entry, which is roughly LRU */
764 struct nf_conntrack_tuple_hash *h; 828 struct nf_conntrack_tuple_hash *h;
765 struct nf_conn *ct = NULL, *tmp; 829 struct nf_conn *tmp;
766 struct hlist_nulls_node *n; 830 struct hlist_nulls_node *n;
767 unsigned int i = 0, cnt = 0; 831 unsigned int i, hash, sequence;
768 int dropped = 0; 832 struct nf_conn *ct = NULL;
769 unsigned int hash, sequence;
770 spinlock_t *lockp; 833 spinlock_t *lockp;
834 bool ret = false;
835
836 i = 0;
771 837
772 local_bh_disable(); 838 local_bh_disable();
773restart: 839restart:
774 sequence = read_seqcount_begin(&net->ct.generation); 840 sequence = read_seqcount_begin(&nf_conntrack_generation);
775 hash = hash_bucket(_hash, net); 841 for (; i < NF_CT_EVICTION_RANGE; i++) {
776 for (; i < net->ct.htable_size; i++) { 842 hash = scale_hash(_hash++);
777 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; 843 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
778 nf_conntrack_lock(lockp); 844 nf_conntrack_lock(lockp);
779 if (read_seqcount_retry(&net->ct.generation, sequence)) { 845 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
780 spin_unlock(lockp); 846 spin_unlock(lockp);
781 goto restart; 847 goto restart;
782 } 848 }
783 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], 849 hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
784 hnnode) { 850 hnnode) {
785 tmp = nf_ct_tuplehash_to_ctrack(h); 851 tmp = nf_ct_tuplehash_to_ctrack(h);
786 if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && 852
787 !nf_ct_is_dying(tmp) && 853 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
788 atomic_inc_not_zero(&tmp->ct_general.use)) { 854 !net_eq(nf_ct_net(tmp), net) ||
855 nf_ct_is_dying(tmp))
856 continue;
857
858 if (atomic_inc_not_zero(&tmp->ct_general.use)) {
789 ct = tmp; 859 ct = tmp;
790 break; 860 break;
791 } 861 }
792 cnt++;
793 } 862 }
794 863
795 hash = (hash + 1) % net->ct.htable_size;
796 spin_unlock(lockp); 864 spin_unlock(lockp);
797 865 if (ct)
798 if (ct || cnt >= NF_CT_EVICTION_RANGE)
799 break; 866 break;
800
801 } 867 }
868
802 local_bh_enable(); 869 local_bh_enable();
803 870
804 if (!ct) 871 if (!ct)
805 return dropped; 872 return false;
806 873
807 if (del_timer(&ct->timeout)) { 874 /* kill only if in same netns -- might have moved due to
875 * SLAB_DESTROY_BY_RCU rules
876 */
877 if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
808 if (nf_ct_delete(ct, 0, 0)) { 878 if (nf_ct_delete(ct, 0, 0)) {
809 dropped = 1;
810 NF_CT_STAT_INC_ATOMIC(net, early_drop); 879 NF_CT_STAT_INC_ATOMIC(net, early_drop);
880 ret = true;
811 } 881 }
812 } 882 }
813 nf_ct_put(ct);
814 return dropped;
815}
816
817void init_nf_conntrack_hash_rnd(void)
818{
819 unsigned int rand;
820 883
821 /* 884 nf_ct_put(ct);
822 * Why not initialize nf_conntrack_rnd in a "init()" function ? 885 return ret;
823 * Because there isn't enough entropy when system initializing,
824 * and we initialize it as late as possible.
825 */
826 do {
827 get_random_bytes(&rand, sizeof(rand));
828 } while (!rand);
829 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
830} 886}
831 887
832static struct nf_conn * 888static struct nf_conn *
@@ -838,12 +894,6 @@ __nf_conntrack_alloc(struct net *net,
838{ 894{
839 struct nf_conn *ct; 895 struct nf_conn *ct;
840 896
841 if (unlikely(!nf_conntrack_hash_rnd)) {
842 init_nf_conntrack_hash_rnd();
843 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
844 hash = hash_conntrack_raw(orig);
845 }
846
847 /* We don't want any race condition at early drop stage */ 897 /* We don't want any race condition at early drop stage */
848 atomic_inc(&net->ct.count); 898 atomic_inc(&net->ct.count);
849 899
@@ -860,7 +910,7 @@ __nf_conntrack_alloc(struct net *net,
860 * Do not use kmem_cache_zalloc(), as this cache uses 910 * Do not use kmem_cache_zalloc(), as this cache uses
861 * SLAB_DESTROY_BY_RCU. 911 * SLAB_DESTROY_BY_RCU.
862 */ 912 */
863 ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); 913 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
864 if (ct == NULL) 914 if (ct == NULL)
865 goto out; 915 goto out;
866 916
@@ -887,7 +937,7 @@ __nf_conntrack_alloc(struct net *net,
887 atomic_set(&ct->ct_general.use, 0); 937 atomic_set(&ct->ct_general.use, 0);
888 return ct; 938 return ct;
889out_free: 939out_free:
890 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 940 kmem_cache_free(nf_conntrack_cachep, ct);
891out: 941out:
892 atomic_dec(&net->ct.count); 942 atomic_dec(&net->ct.count);
893 return ERR_PTR(-ENOMEM); 943 return ERR_PTR(-ENOMEM);
@@ -914,7 +964,7 @@ void nf_conntrack_free(struct nf_conn *ct)
914 964
915 nf_ct_ext_destroy(ct); 965 nf_ct_ext_destroy(ct);
916 nf_ct_ext_free(ct); 966 nf_ct_ext_free(ct);
917 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 967 kmem_cache_free(nf_conntrack_cachep, ct);
918 smp_mb__before_atomic(); 968 smp_mb__before_atomic();
919 atomic_dec(&net->ct.count); 969 atomic_dec(&net->ct.count);
920} 970}
@@ -1061,7 +1111,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1061 1111
1062 /* look for tuple match */ 1112 /* look for tuple match */
1063 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1113 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1064 hash = hash_conntrack_raw(&tuple); 1114 hash = hash_conntrack_raw(&tuple, net);
1065 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1115 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1066 if (!h) { 1116 if (!h) {
1067 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 1117 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1270,17 +1320,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
1270 } 1320 }
1271 1321
1272acct: 1322acct:
1273 if (do_acct) { 1323 if (do_acct)
1274 struct nf_conn_acct *acct; 1324 nf_ct_acct_update(ct, ctinfo, skb->len);
1275
1276 acct = nf_conn_acct_find(ct);
1277 if (acct) {
1278 struct nf_conn_counter *counter = acct->counter;
1279
1280 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1281 atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
1282 }
1283 }
1284} 1325}
1285EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 1326EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1286 1327
@@ -1289,18 +1330,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
1289 const struct sk_buff *skb, 1330 const struct sk_buff *skb,
1290 int do_acct) 1331 int do_acct)
1291{ 1332{
1292 if (do_acct) { 1333 if (do_acct)
1293 struct nf_conn_acct *acct; 1334 nf_ct_acct_update(ct, ctinfo, skb->len);
1294
1295 acct = nf_conn_acct_find(ct);
1296 if (acct) {
1297 struct nf_conn_counter *counter = acct->counter;
1298
1299 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1300 atomic64_add(skb->len - skb_network_offset(skb),
1301 &counter[CTINFO2DIR(ctinfo)].bytes);
1302 }
1303 }
1304 1335
1305 if (del_timer(&ct->timeout)) { 1336 if (del_timer(&ct->timeout)) {
1306 ct->timeout.function((unsigned long)ct); 1337 ct->timeout.function((unsigned long)ct);
@@ -1396,16 +1427,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1396 int cpu; 1427 int cpu;
1397 spinlock_t *lockp; 1428 spinlock_t *lockp;
1398 1429
1399 for (; *bucket < net->ct.htable_size; (*bucket)++) { 1430 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1400 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1431 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1401 local_bh_disable(); 1432 local_bh_disable();
1402 nf_conntrack_lock(lockp); 1433 nf_conntrack_lock(lockp);
1403 if (*bucket < net->ct.htable_size) { 1434 if (*bucket < nf_conntrack_htable_size) {
1404 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1435 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
1405 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1436 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1406 continue; 1437 continue;
1407 ct = nf_ct_tuplehash_to_ctrack(h); 1438 ct = nf_ct_tuplehash_to_ctrack(h);
1408 if (iter(ct, data)) 1439 if (net_eq(nf_ct_net(ct), net) &&
1440 iter(ct, data))
1409 goto found; 1441 goto found;
1410 } 1442 }
1411 } 1443 }
@@ -1443,6 +1475,9 @@ void nf_ct_iterate_cleanup(struct net *net,
1443 1475
1444 might_sleep(); 1476 might_sleep();
1445 1477
1478 if (atomic_read(&net->ct.count) == 0)
1479 return;
1480
1446 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1481 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1447 /* Time to push up daises... */ 1482 /* Time to push up daises... */
1448 if (del_timer(&ct->timeout)) 1483 if (del_timer(&ct->timeout))
@@ -1494,6 +1529,8 @@ void nf_conntrack_cleanup_end(void)
1494 while (untrack_refs() > 0) 1529 while (untrack_refs() > 0)
1495 schedule(); 1530 schedule();
1496 1531
1532 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1533
1497#ifdef CONFIG_NF_CONNTRACK_ZONES 1534#ifdef CONFIG_NF_CONNTRACK_ZONES
1498 nf_ct_extend_unregister(&nf_ct_zone_extend); 1535 nf_ct_extend_unregister(&nf_ct_zone_extend);
1499#endif 1536#endif
@@ -1544,15 +1581,12 @@ i_see_dead_people:
1544 } 1581 }
1545 1582
1546 list_for_each_entry(net, net_exit_list, exit_list) { 1583 list_for_each_entry(net, net_exit_list, exit_list) {
1547 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1548 nf_conntrack_proto_pernet_fini(net); 1584 nf_conntrack_proto_pernet_fini(net);
1549 nf_conntrack_helper_pernet_fini(net); 1585 nf_conntrack_helper_pernet_fini(net);
1550 nf_conntrack_ecache_pernet_fini(net); 1586 nf_conntrack_ecache_pernet_fini(net);
1551 nf_conntrack_tstamp_pernet_fini(net); 1587 nf_conntrack_tstamp_pernet_fini(net);
1552 nf_conntrack_acct_pernet_fini(net); 1588 nf_conntrack_acct_pernet_fini(net);
1553 nf_conntrack_expect_pernet_fini(net); 1589 nf_conntrack_expect_pernet_fini(net);
1554 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1555 kfree(net->ct.slabname);
1556 free_percpu(net->ct.stat); 1590 free_percpu(net->ct.stat);
1557 free_percpu(net->ct.pcpu_lists); 1591 free_percpu(net->ct.pcpu_lists);
1558 } 1592 }
@@ -1607,7 +1641,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1607 1641
1608 local_bh_disable(); 1642 local_bh_disable();
1609 nf_conntrack_all_lock(); 1643 nf_conntrack_all_lock();
1610 write_seqcount_begin(&init_net.ct.generation); 1644 write_seqcount_begin(&nf_conntrack_generation);
1611 1645
1612 /* Lookups in the old hash might happen in parallel, which means we 1646 /* Lookups in the old hash might happen in parallel, which means we
1613 * might get false negatives during connection lookup. New connections 1647 * might get false negatives during connection lookup. New connections
@@ -1615,26 +1649,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1615 * though since that required taking the locks. 1649 * though since that required taking the locks.
1616 */ 1650 */
1617 1651
1618 for (i = 0; i < init_net.ct.htable_size; i++) { 1652 for (i = 0; i < nf_conntrack_htable_size; i++) {
1619 while (!hlist_nulls_empty(&init_net.ct.hash[i])) { 1653 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1620 h = hlist_nulls_entry(init_net.ct.hash[i].first, 1654 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1621 struct nf_conntrack_tuple_hash, hnnode); 1655 struct nf_conntrack_tuple_hash, hnnode);
1622 ct = nf_ct_tuplehash_to_ctrack(h); 1656 ct = nf_ct_tuplehash_to_ctrack(h);
1623 hlist_nulls_del_rcu(&h->hnnode); 1657 hlist_nulls_del_rcu(&h->hnnode);
1624 bucket = __hash_conntrack(&h->tuple, hashsize); 1658 bucket = __hash_conntrack(nf_ct_net(ct),
1659 &h->tuple, hashsize);
1625 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1660 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1626 } 1661 }
1627 } 1662 }
1628 old_size = init_net.ct.htable_size; 1663 old_size = nf_conntrack_htable_size;
1629 old_hash = init_net.ct.hash; 1664 old_hash = nf_conntrack_hash;
1630 1665
1631 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1666 nf_conntrack_hash = hash;
1632 init_net.ct.hash = hash; 1667 nf_conntrack_htable_size = hashsize;
1633 1668
1634 write_seqcount_end(&init_net.ct.generation); 1669 write_seqcount_end(&nf_conntrack_generation);
1635 nf_conntrack_all_unlock(); 1670 nf_conntrack_all_unlock();
1636 local_bh_enable(); 1671 local_bh_enable();
1637 1672
1673 synchronize_net();
1638 nf_ct_free_hashtable(old_hash, old_size); 1674 nf_ct_free_hashtable(old_hash, old_size);
1639 return 0; 1675 return 0;
1640} 1676}
@@ -1655,7 +1691,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1655int nf_conntrack_init_start(void) 1691int nf_conntrack_init_start(void)
1656{ 1692{
1657 int max_factor = 8; 1693 int max_factor = 8;
1658 int i, ret, cpu; 1694 int ret = -ENOMEM;
1695 int i, cpu;
1696
1697 seqcount_init(&nf_conntrack_generation);
1659 1698
1660 for (i = 0; i < CONNTRACK_LOCKS; i++) 1699 for (i = 0; i < CONNTRACK_LOCKS; i++)
1661 spin_lock_init(&nf_conntrack_locks[i]); 1700 spin_lock_init(&nf_conntrack_locks[i]);
@@ -1682,8 +1721,19 @@ int nf_conntrack_init_start(void)
1682 * entries. */ 1721 * entries. */
1683 max_factor = 4; 1722 max_factor = 4;
1684 } 1723 }
1724
1725 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1726 if (!nf_conntrack_hash)
1727 return -ENOMEM;
1728
1685 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1729 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1686 1730
1731 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1732 sizeof(struct nf_conn), 0,
1733 SLAB_DESTROY_BY_RCU, NULL);
1734 if (!nf_conntrack_cachep)
1735 goto err_cachep;
1736
1687 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", 1737 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1688 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1738 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1689 nf_conntrack_max); 1739 nf_conntrack_max);
@@ -1760,6 +1810,9 @@ err_tstamp:
1760err_acct: 1810err_acct:
1761 nf_conntrack_expect_fini(); 1811 nf_conntrack_expect_fini();
1762err_expect: 1812err_expect:
1813 kmem_cache_destroy(nf_conntrack_cachep);
1814err_cachep:
1815 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1763 return ret; 1816 return ret;
1764} 1817}
1765 1818
@@ -1783,7 +1836,6 @@ int nf_conntrack_init_net(struct net *net)
1783 int cpu; 1836 int cpu;
1784 1837
1785 atomic_set(&net->ct.count, 0); 1838 atomic_set(&net->ct.count, 0);
1786 seqcount_init(&net->ct.generation);
1787 1839
1788 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); 1840 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1789 if (!net->ct.pcpu_lists) 1841 if (!net->ct.pcpu_lists)
@@ -1801,24 +1853,6 @@ int nf_conntrack_init_net(struct net *net)
1801 if (!net->ct.stat) 1853 if (!net->ct.stat)
1802 goto err_pcpu_lists; 1854 goto err_pcpu_lists;
1803 1855
1804 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
1805 if (!net->ct.slabname)
1806 goto err_slabname;
1807
1808 net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
1809 sizeof(struct nf_conn), 0,
1810 SLAB_DESTROY_BY_RCU, NULL);
1811 if (!net->ct.nf_conntrack_cachep) {
1812 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1813 goto err_cache;
1814 }
1815
1816 net->ct.htable_size = nf_conntrack_htable_size;
1817 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1818 if (!net->ct.hash) {
1819 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1820 goto err_hash;
1821 }
1822 ret = nf_conntrack_expect_pernet_init(net); 1856 ret = nf_conntrack_expect_pernet_init(net);
1823 if (ret < 0) 1857 if (ret < 0)
1824 goto err_expect; 1858 goto err_expect;
@@ -1850,12 +1884,6 @@ err_tstamp:
1850err_acct: 1884err_acct:
1851 nf_conntrack_expect_pernet_fini(net); 1885 nf_conntrack_expect_pernet_fini(net);
1852err_expect: 1886err_expect:
1853 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1854err_hash:
1855 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1856err_cache:
1857 kfree(net->ct.slabname);
1858err_slabname:
1859 free_percpu(net->ct.stat); 1887 free_percpu(net->ct.stat);
1860err_pcpu_lists: 1888err_pcpu_lists:
1861 free_percpu(net->ct.pcpu_lists); 1889 free_percpu(net->ct.pcpu_lists);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 278927ab0948..9e3693128313 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -24,6 +24,7 @@
24#include <linux/moduleparam.h> 24#include <linux/moduleparam.h>
25#include <linux/export.h> 25#include <linux/export.h>
26#include <net/net_namespace.h> 26#include <net/net_namespace.h>
27#include <net/netns/hash.h>
27 28
28#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_core.h> 30#include <net/netfilter/nf_conntrack_core.h>
@@ -35,9 +36,13 @@
35unsigned int nf_ct_expect_hsize __read_mostly; 36unsigned int nf_ct_expect_hsize __read_mostly;
36EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); 37EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37 38
39struct hlist_head *nf_ct_expect_hash __read_mostly;
40EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41
38unsigned int nf_ct_expect_max __read_mostly; 42unsigned int nf_ct_expect_max __read_mostly;
39 43
40static struct kmem_cache *nf_ct_expect_cachep __read_mostly; 44static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
45static unsigned int nf_ct_expect_hashrnd __read_mostly;
41 46
42/* nf_conntrack_expect helper functions */ 47/* nf_conntrack_expect helper functions */
43void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, 48void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
72 nf_ct_expect_put(exp); 77 nf_ct_expect_put(exp);
73} 78}
74 79
75static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) 80static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
76{ 81{
77 unsigned int hash; 82 unsigned int hash, seed;
78 83
79 if (unlikely(!nf_conntrack_hash_rnd)) { 84 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
80 init_nf_conntrack_hash_rnd(); 85
81 } 86 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
82 87
83 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 88 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | 89 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85 (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); 90 (__force __u16)tuple->dst.u.all) ^ seed);
86 91
87 return reciprocal_scale(hash, nf_ct_expect_hsize); 92 return reciprocal_scale(hash, nf_ct_expect_hsize);
88} 93}
89 94
95static bool
96nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
97 const struct nf_conntrack_expect *i,
98 const struct nf_conntrack_zone *zone,
99 const struct net *net)
100{
101 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102 net_eq(net, nf_ct_net(i->master)) &&
103 nf_ct_zone_equal_any(i->master, zone);
104}
105
90struct nf_conntrack_expect * 106struct nf_conntrack_expect *
91__nf_ct_expect_find(struct net *net, 107__nf_ct_expect_find(struct net *net,
92 const struct nf_conntrack_zone *zone, 108 const struct nf_conntrack_zone *zone,
@@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net,
98 if (!net->ct.expect_count) 114 if (!net->ct.expect_count)
99 return NULL; 115 return NULL;
100 116
101 h = nf_ct_expect_dst_hash(tuple); 117 h = nf_ct_expect_dst_hash(net, tuple);
102 hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { 118 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
103 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && 119 if (nf_ct_exp_equal(tuple, i, zone, net))
104 nf_ct_zone_equal_any(i->master, zone))
105 return i; 120 return i;
106 } 121 }
107 return NULL; 122 return NULL;
@@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net,
139 if (!net->ct.expect_count) 154 if (!net->ct.expect_count)
140 return NULL; 155 return NULL;
141 156
142 h = nf_ct_expect_dst_hash(tuple); 157 h = nf_ct_expect_dst_hash(net, tuple);
143 hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { 158 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
144 if (!(i->flags & NF_CT_EXPECT_INACTIVE) && 159 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
145 nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && 160 nf_ct_exp_equal(tuple, i, zone, net)) {
146 nf_ct_zone_equal_any(i->master, zone)) {
147 exp = i; 161 exp = i;
148 break; 162 break;
149 } 163 }
@@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
223 } 237 }
224 238
225 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && 239 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
240 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
226 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); 241 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
227} 242}
228 243
@@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
232 return a->master == b->master && a->class == b->class && 247 return a->master == b->master && a->class == b->class &&
233 nf_ct_tuple_equal(&a->tuple, &b->tuple) && 248 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
234 nf_ct_tuple_mask_equal(&a->mask, &b->mask) && 249 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
250 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
235 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); 251 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
236} 252}
237 253
@@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
342 struct nf_conn_help *master_help = nfct_help(exp->master); 358 struct nf_conn_help *master_help = nfct_help(exp->master);
343 struct nf_conntrack_helper *helper; 359 struct nf_conntrack_helper *helper;
344 struct net *net = nf_ct_exp_net(exp); 360 struct net *net = nf_ct_exp_net(exp);
345 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 361 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
346 362
347 /* two references : one for hash insert, one for the timer */ 363 /* two references : one for hash insert, one for the timer */
348 atomic_add(2, &exp->use); 364 atomic_add(2, &exp->use);
@@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
350 hlist_add_head(&exp->lnode, &master_help->expectations); 366 hlist_add_head(&exp->lnode, &master_help->expectations);
351 master_help->expecting[exp->class]++; 367 master_help->expecting[exp->class]++;
352 368
353 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); 369 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
354 net->ct.expect_count++; 370 net->ct.expect_count++;
355 371
356 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 372 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
@@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
401 ret = -ESHUTDOWN; 417 ret = -ESHUTDOWN;
402 goto out; 418 goto out;
403 } 419 }
404 h = nf_ct_expect_dst_hash(&expect->tuple); 420 h = nf_ct_expect_dst_hash(net, &expect->tuple);
405 hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { 421 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
406 if (expect_matches(i, expect)) { 422 if (expect_matches(i, expect)) {
407 if (del_timer(&i->timeout)) { 423 if (del_timer(&i->timeout)) {
408 nf_ct_unlink_expect(i); 424 nf_ct_unlink_expect(i);
@@ -468,12 +484,11 @@ struct ct_expect_iter_state {
468 484
469static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 485static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
470{ 486{
471 struct net *net = seq_file_net(seq);
472 struct ct_expect_iter_state *st = seq->private; 487 struct ct_expect_iter_state *st = seq->private;
473 struct hlist_node *n; 488 struct hlist_node *n;
474 489
475 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 490 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
476 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 491 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
477 if (n) 492 if (n)
478 return n; 493 return n;
479 } 494 }
@@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
483static struct hlist_node *ct_expect_get_next(struct seq_file *seq, 498static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
484 struct hlist_node *head) 499 struct hlist_node *head)
485{ 500{
486 struct net *net = seq_file_net(seq);
487 struct ct_expect_iter_state *st = seq->private; 501 struct ct_expect_iter_state *st = seq->private;
488 502
489 head = rcu_dereference(hlist_next_rcu(head)); 503 head = rcu_dereference(hlist_next_rcu(head));
490 while (head == NULL) { 504 while (head == NULL) {
491 if (++st->bucket >= nf_ct_expect_hsize) 505 if (++st->bucket >= nf_ct_expect_hsize)
492 return NULL; 506 return NULL;
493 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 507 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
494 } 508 }
495 return head; 509 return head;
496} 510}
@@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
623 637
624int nf_conntrack_expect_pernet_init(struct net *net) 638int nf_conntrack_expect_pernet_init(struct net *net)
625{ 639{
626 int err = -ENOMEM;
627
628 net->ct.expect_count = 0; 640 net->ct.expect_count = 0;
629 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); 641 return exp_proc_init(net);
630 if (net->ct.expect_hash == NULL)
631 goto err1;
632
633 err = exp_proc_init(net);
634 if (err < 0)
635 goto err2;
636
637 return 0;
638err2:
639 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
640err1:
641 return err;
642} 642}
643 643
644void nf_conntrack_expect_pernet_fini(struct net *net) 644void nf_conntrack_expect_pernet_fini(struct net *net)
645{ 645{
646 exp_proc_remove(net); 646 exp_proc_remove(net);
647 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
648} 647}
649 648
650int nf_conntrack_expect_init(void) 649int nf_conntrack_expect_init(void)
@@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void)
660 0, 0, NULL); 659 0, 0, NULL);
661 if (!nf_ct_expect_cachep) 660 if (!nf_ct_expect_cachep)
662 return -ENOMEM; 661 return -ENOMEM;
662
663 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
664 if (!nf_ct_expect_hash) {
665 kmem_cache_destroy(nf_ct_expect_cachep);
666 return -ENOMEM;
667 }
668
663 return 0; 669 return 0;
664} 670}
665 671
@@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void)
667{ 673{
668 rcu_barrier(); /* Wait for call_rcu() before destroy */ 674 rcu_barrier(); /* Wait for call_rcu() before destroy */
669 kmem_cache_destroy(nf_ct_expect_cachep); 675 kmem_cache_destroy(nf_ct_expect_cachep);
676 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
670} 677}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3b40ec575cd5..f703adb7e5f7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly;
38EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); 38EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
39static unsigned int nf_ct_helper_count __read_mostly; 39static unsigned int nf_ct_helper_count __read_mostly;
40 40
41static bool nf_ct_auto_assign_helper __read_mostly = true; 41static bool nf_ct_auto_assign_helper __read_mostly = false;
42module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); 42module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
43MODULE_PARM_DESC(nf_conntrack_helper, 43MODULE_PARM_DESC(nf_conntrack_helper,
44 "Enable automatic conntrack helper assignment (default 1)"); 44 "Enable automatic conntrack helper assignment (default 0)");
45 45
46#ifdef CONFIG_SYSCTL 46#ifdef CONFIG_SYSCTL
47static struct ctl_table helper_sysctl_table[] = { 47static struct ctl_table helper_sysctl_table[] = {
@@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
400 spin_lock_bh(&nf_conntrack_expect_lock); 400 spin_lock_bh(&nf_conntrack_expect_lock);
401 for (i = 0; i < nf_ct_expect_hsize; i++) { 401 for (i = 0; i < nf_ct_expect_hsize; i++) {
402 hlist_for_each_entry_safe(exp, next, 402 hlist_for_each_entry_safe(exp, next,
403 &net->ct.expect_hash[i], hnode) { 403 &nf_ct_expect_hash[i], hnode) {
404 struct nf_conn_help *help = nfct_help(exp->master); 404 struct nf_conn_help *help = nfct_help(exp->master);
405 if ((rcu_dereference_protected( 405 if ((rcu_dereference_protected(
406 help->helper, 406 help->helper,
@@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
424 spin_unlock_bh(&pcpu->lock); 424 spin_unlock_bh(&pcpu->lock);
425 } 425 }
426 local_bh_disable(); 426 local_bh_disable();
427 for (i = 0; i < net->ct.htable_size; i++) { 427 for (i = 0; i < nf_conntrack_htable_size; i++) {
428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429 if (i < net->ct.htable_size) { 429 if (i < nf_conntrack_htable_size) {
430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 430 hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
431 unhelp(h, me); 431 unhelp(h, me);
432 } 432 }
433 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 433 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 294a8e28cec4..a18d1ceabad5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -824,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
824 last = (struct nf_conn *)cb->args[1]; 824 last = (struct nf_conn *)cb->args[1];
825 825
826 local_bh_disable(); 826 local_bh_disable();
827 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { 827 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
828restart: 828restart:
829 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; 829 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
830 nf_conntrack_lock(lockp); 830 nf_conntrack_lock(lockp);
831 if (cb->args[0] >= net->ct.htable_size) { 831 if (cb->args[0] >= nf_conntrack_htable_size) {
832 spin_unlock(lockp); 832 spin_unlock(lockp);
833 goto out; 833 goto out;
834 } 834 }
835 hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], 835 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
836 hnnode) { 836 hnnode) {
837 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 837 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
838 continue; 838 continue;
839 ct = nf_ct_tuplehash_to_ctrack(h); 839 ct = nf_ct_tuplehash_to_ctrack(h);
840 if (!net_eq(net, nf_ct_net(ct)))
841 continue;
842
840 /* Dump entries of a given L3 protocol number. 843 /* Dump entries of a given L3 protocol number.
841 * If it is not specified, ie. l3proto == 0, 844 * If it is not specified, ie. l3proto == 0,
842 * then dump everything. */ 845 * then dump everything. */
@@ -2629,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
2629 last = (struct nf_conntrack_expect *)cb->args[1]; 2632 last = (struct nf_conntrack_expect *)cb->args[1];
2630 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { 2633 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
2631restart: 2634restart:
2632 hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], 2635 hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
2633 hnode) { 2636 hnode) {
2634 if (l3proto && exp->tuple.src.l3num != l3proto) 2637 if (l3proto && exp->tuple.src.l3num != l3proto)
2635 continue; 2638 continue;
2639
2640 if (!net_eq(nf_ct_net(exp->master), net))
2641 continue;
2642
2636 if (cb->args[1]) { 2643 if (cb->args[1]) {
2637 if (exp != last) 2644 if (exp != last)
2638 continue; 2645 continue;
@@ -2883,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
2883 spin_lock_bh(&nf_conntrack_expect_lock); 2890 spin_lock_bh(&nf_conntrack_expect_lock);
2884 for (i = 0; i < nf_ct_expect_hsize; i++) { 2891 for (i = 0; i < nf_ct_expect_hsize; i++) {
2885 hlist_for_each_entry_safe(exp, next, 2892 hlist_for_each_entry_safe(exp, next,
2886 &net->ct.expect_hash[i], 2893 &nf_ct_expect_hash[i],
2887 hnode) { 2894 hnode) {
2895
2896 if (!net_eq(nf_ct_exp_net(exp), net))
2897 continue;
2898
2888 m_help = nfct_help(exp->master); 2899 m_help = nfct_help(exp->master);
2889 if (!strcmp(m_help->helper->name, name) && 2900 if (!strcmp(m_help->helper->name, name) &&
2890 del_timer(&exp->timeout)) { 2901 del_timer(&exp->timeout)) {
@@ -2901,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
2901 spin_lock_bh(&nf_conntrack_expect_lock); 2912 spin_lock_bh(&nf_conntrack_expect_lock);
2902 for (i = 0; i < nf_ct_expect_hsize; i++) { 2913 for (i = 0; i < nf_ct_expect_hsize; i++) {
2903 hlist_for_each_entry_safe(exp, next, 2914 hlist_for_each_entry_safe(exp, next,
2904 &net->ct.expect_hash[i], 2915 &nf_ct_expect_hash[i],
2905 hnode) { 2916 hnode) {
2917
2918 if (!net_eq(nf_ct_exp_net(exp), net))
2919 continue;
2920
2906 if (del_timer(&exp->timeout)) { 2921 if (del_timer(&exp->timeout)) {
2907 nf_ct_unlink_expect_report(exp, 2922 nf_ct_unlink_expect_report(exp,
2908 NETLINK_CB(skb).portid, 2923 NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 478f92f834b6..4fd040575ffe 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
309 .l3proto = PF_INET, 309 .l3proto = PF_INET,
310 .l4proto = IPPROTO_UDP, 310 .l4proto = IPPROTO_UDP,
311 .name = "udp", 311 .name = "udp",
312 .allow_clash = true,
312 .pkt_to_tuple = udp_pkt_to_tuple, 313 .pkt_to_tuple = udp_pkt_to_tuple,
313 .invert_tuple = udp_invert_tuple, 314 .invert_tuple = udp_invert_tuple,
314 .print_tuple = udp_print_tuple, 315 .print_tuple = udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
341 .l3proto = PF_INET6, 342 .l3proto = PF_INET6,
342 .l4proto = IPPROTO_UDP, 343 .l4proto = IPPROTO_UDP,
343 .name = "udp", 344 .name = "udp",
345 .allow_clash = true,
344 .pkt_to_tuple = udp_pkt_to_tuple, 346 .pkt_to_tuple = udp_pkt_to_tuple,
345 .invert_tuple = udp_invert_tuple, 347 .invert_tuple = udp_invert_tuple,
346 .print_tuple = udp_print_tuple, 348 .print_tuple = udp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1ac8ee13a873..9d692f5adb94 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
274 .l3proto = PF_INET, 274 .l3proto = PF_INET,
275 .l4proto = IPPROTO_UDPLITE, 275 .l4proto = IPPROTO_UDPLITE,
276 .name = "udplite", 276 .name = "udplite",
277 .allow_clash = true,
277 .pkt_to_tuple = udplite_pkt_to_tuple, 278 .pkt_to_tuple = udplite_pkt_to_tuple,
278 .invert_tuple = udplite_invert_tuple, 279 .invert_tuple = udplite_invert_tuple,
279 .print_tuple = udplite_print_tuple, 280 .print_tuple = udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
306 .l3proto = PF_INET6, 307 .l3proto = PF_INET6,
307 .l4proto = IPPROTO_UDPLITE, 308 .l4proto = IPPROTO_UDPLITE,
308 .name = "udplite", 309 .name = "udplite",
310 .allow_clash = true,
309 .pkt_to_tuple = udplite_pkt_to_tuple, 311 .pkt_to_tuple = udplite_pkt_to_tuple,
310 .invert_tuple = udplite_invert_tuple, 312 .invert_tuple = udplite_invert_tuple,
311 .print_tuple = udplite_print_tuple, 313 .print_tuple = udplite_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0f1a45bcacb2..f87e84ebcec3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -54,14 +54,13 @@ struct ct_iter_state {
54 54
55static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 55static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56{ 56{
57 struct net *net = seq_file_net(seq);
58 struct ct_iter_state *st = seq->private; 57 struct ct_iter_state *st = seq->private;
59 struct hlist_nulls_node *n; 58 struct hlist_nulls_node *n;
60 59
61 for (st->bucket = 0; 60 for (st->bucket = 0;
62 st->bucket < net->ct.htable_size; 61 st->bucket < nf_conntrack_htable_size;
63 st->bucket++) { 62 st->bucket++) {
64 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 63 n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
65 if (!is_a_nulls(n)) 64 if (!is_a_nulls(n))
66 return n; 65 return n;
67 } 66 }
@@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
71static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 70static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
72 struct hlist_nulls_node *head) 71 struct hlist_nulls_node *head)
73{ 72{
74 struct net *net = seq_file_net(seq);
75 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
76 74
77 head = rcu_dereference(hlist_nulls_next_rcu(head)); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
78 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
79 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
80 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= nf_conntrack_htable_size)
81 return NULL; 79 return NULL;
82 } 80 }
83 head = rcu_dereference( 81 head = rcu_dereference(
84 hlist_nulls_first_rcu( 82 hlist_nulls_first_rcu(
85 &net->ct.hash[st->bucket])); 83 &nf_conntrack_hash[st->bucket]));
86 } 84 }
87 return head; 85 return head;
88} 86}
@@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
458 }, 456 },
459 { 457 {
460 .procname = "nf_conntrack_buckets", 458 .procname = "nf_conntrack_buckets",
461 .data = &init_net.ct.htable_size, 459 .data = &nf_conntrack_htable_size,
462 .maxlen = sizeof(unsigned int), 460 .maxlen = sizeof(unsigned int),
463 .mode = 0444, 461 .mode = 0444,
464 .proc_handler = proc_dointvec, 462 .proc_handler = proc_dointvec,
@@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
512 goto out_kmemdup; 510 goto out_kmemdup;
513 511
514 table[1].data = &net->ct.count; 512 table[1].data = &net->ct.count;
515 table[2].data = &net->ct.htable_size;
516 table[3].data = &net->ct.sysctl_checksum; 513 table[3].data = &net->ct.sysctl_checksum;
517 table[4].data = &net->ct.sysctl_log_invalid; 514 table[4].data = &net->ct.sysctl_log_invalid;
518 515
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 06a9f45771ab..6877a396f8fc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
38static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] 38static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
39 __read_mostly; 39 __read_mostly;
40 40
41static struct hlist_head *nf_nat_bysource __read_mostly;
42static unsigned int nf_nat_htable_size __read_mostly;
43static unsigned int nf_nat_hash_rnd __read_mostly;
41 44
42inline const struct nf_nat_l3proto * 45inline const struct nf_nat_l3proto *
43__nf_nat_l3proto_find(u8 family) 46__nf_nat_l3proto_find(u8 family)
@@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
118 121
119/* We keep an extra hash for each conntrack, for fast searching. */ 122/* We keep an extra hash for each conntrack, for fast searching. */
120static inline unsigned int 123static inline unsigned int
121hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) 124hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
122{ 125{
123 unsigned int hash; 126 unsigned int hash;
124 127
128 get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
129
125 /* Original src, to ensure we map it consistently if poss. */ 130 /* Original src, to ensure we map it consistently if poss. */
126 hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), 131 hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
127 tuple->dst.protonum ^ nf_conntrack_hash_rnd); 132 tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
128 133
129 return reciprocal_scale(hash, net->ct.nat_htable_size); 134 return reciprocal_scale(hash, nf_nat_htable_size);
130} 135}
131 136
132/* Is this tuple already taken? (not by us) */ 137/* Is this tuple already taken? (not by us) */
@@ -196,9 +201,10 @@ find_appropriate_src(struct net *net,
196 const struct nf_conn_nat *nat; 201 const struct nf_conn_nat *nat;
197 const struct nf_conn *ct; 202 const struct nf_conn *ct;
198 203
199 hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { 204 hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
200 ct = nat->ct; 205 ct = nat->ct;
201 if (same_src(ct, tuple) && 206 if (same_src(ct, tuple) &&
207 net_eq(net, nf_ct_net(ct)) &&
202 nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { 208 nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
203 /* Copy source part from reply tuple. */ 209 /* Copy source part from reply tuple. */
204 nf_ct_invert_tuplepr(result, 210 nf_ct_invert_tuplepr(result,
@@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct,
431 nat = nfct_nat(ct); 437 nat = nfct_nat(ct);
432 nat->ct = ct; 438 nat->ct = ct;
433 hlist_add_head_rcu(&nat->bysource, 439 hlist_add_head_rcu(&nat->bysource,
434 &net->ct.nat_bysource[srchash]); 440 &nf_nat_bysource[srchash]);
435 spin_unlock_bh(&nf_nat_lock); 441 spin_unlock_bh(&nf_nat_lock);
436 } 442 }
437 443
@@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
819} 825}
820#endif 826#endif
821 827
822static int __net_init nf_nat_net_init(struct net *net)
823{
824 /* Leave them the same for the moment. */
825 net->ct.nat_htable_size = net->ct.htable_size;
826 net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
827 if (!net->ct.nat_bysource)
828 return -ENOMEM;
829 return 0;
830}
831
832static void __net_exit nf_nat_net_exit(struct net *net) 828static void __net_exit nf_nat_net_exit(struct net *net)
833{ 829{
834 struct nf_nat_proto_clean clean = {}; 830 struct nf_nat_proto_clean clean = {};
835 831
836 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); 832 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
837 synchronize_rcu();
838 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
839} 833}
840 834
841static struct pernet_operations nf_nat_net_ops = { 835static struct pernet_operations nf_nat_net_ops = {
842 .init = nf_nat_net_init,
843 .exit = nf_nat_net_exit, 836 .exit = nf_nat_net_exit,
844}; 837};
845 838
@@ -852,8 +845,16 @@ static int __init nf_nat_init(void)
852{ 845{
853 int ret; 846 int ret;
854 847
848 /* Leave them the same for the moment. */
849 nf_nat_htable_size = nf_conntrack_htable_size;
850
851 nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
852 if (!nf_nat_bysource)
853 return -ENOMEM;
854
855 ret = nf_ct_extend_register(&nat_extend); 855 ret = nf_ct_extend_register(&nat_extend);
856 if (ret < 0) { 856 if (ret < 0) {
857 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
857 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 858 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
858 return ret; 859 return ret;
859 } 860 }
@@ -877,6 +878,7 @@ static int __init nf_nat_init(void)
877 return 0; 878 return 0;
878 879
879 cleanup_extend: 880 cleanup_extend:
881 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
880 nf_ct_extend_unregister(&nat_extend); 882 nf_ct_extend_unregister(&nat_extend);
881 return ret; 883 return ret;
882} 884}
@@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void)
895 for (i = 0; i < NFPROTO_NUMPROTO; i++) 897 for (i = 0; i < NFPROTO_NUMPROTO; i++)
896 kfree(nf_nat_l4protos[i]); 898 kfree(nf_nat_l4protos[i]);
897 synchronize_net(); 899 synchronize_net();
900 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
898} 901}
899 902
900MODULE_LICENSE("GPL"); 903MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7a85a9dd37ad..4d292b933b5c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2317,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
2317static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { 2317static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
2318 [NFTA_SET_TABLE] = { .type = NLA_STRING }, 2318 [NFTA_SET_TABLE] = { .type = NLA_STRING },
2319 [NFTA_SET_NAME] = { .type = NLA_STRING, 2319 [NFTA_SET_NAME] = { .type = NLA_STRING,
2320 .len = IFNAMSIZ - 1 }, 2320 .len = NFT_SET_MAXNAMELEN - 1 },
2321 [NFTA_SET_FLAGS] = { .type = NLA_U32 }, 2321 [NFTA_SET_FLAGS] = { .type = NLA_U32 },
2322 [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, 2322 [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
2323 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, 2323 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
@@ -2401,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2401 unsigned long *inuse; 2401 unsigned long *inuse;
2402 unsigned int n = 0, min = 0; 2402 unsigned int n = 0, min = 0;
2403 2403
2404 p = strnchr(name, IFNAMSIZ, '%'); 2404 p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
2405 if (p != NULL) { 2405 if (p != NULL) {
2406 if (p[1] != 'd' || strchr(p + 2, '%')) 2406 if (p[1] != 'd' || strchr(p + 2, '%'))
2407 return -EINVAL; 2407 return -EINVAL;
@@ -2696,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2696 struct nft_table *table; 2696 struct nft_table *table;
2697 struct nft_set *set; 2697 struct nft_set *set;
2698 struct nft_ctx ctx; 2698 struct nft_ctx ctx;
2699 char name[IFNAMSIZ]; 2699 char name[NFT_SET_MAXNAMELEN];
2700 unsigned int size; 2700 unsigned int size;
2701 bool create; 2701 bool create;
2702 u64 timeout; 2702 u64 timeout;
@@ -3375,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
3375} 3375}
3376EXPORT_SYMBOL_GPL(nft_set_elem_destroy); 3376EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
3377 3377
3378static int nft_setelem_parse_flags(const struct nft_set *set,
3379 const struct nlattr *attr, u32 *flags)
3380{
3381 if (attr == NULL)
3382 return 0;
3383
3384 *flags = ntohl(nla_get_be32(attr));
3385 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3386 return -EINVAL;
3387 if (!(set->flags & NFT_SET_INTERVAL) &&
3388 *flags & NFT_SET_ELEM_INTERVAL_END)
3389 return -EINVAL;
3390
3391 return 0;
3392}
3393
3378static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, 3394static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3379 const struct nlattr *attr) 3395 const struct nlattr *attr)
3380{ 3396{
@@ -3388,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3388 struct nft_data data; 3404 struct nft_data data;
3389 enum nft_registers dreg; 3405 enum nft_registers dreg;
3390 struct nft_trans *trans; 3406 struct nft_trans *trans;
3407 u32 flags = 0;
3391 u64 timeout; 3408 u64 timeout;
3392 u32 flags;
3393 u8 ulen; 3409 u8 ulen;
3394 int err; 3410 int err;
3395 3411
@@ -3403,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3403 3419
3404 nft_set_ext_prepare(&tmpl); 3420 nft_set_ext_prepare(&tmpl);
3405 3421
3406 flags = 0; 3422 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3407 if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { 3423 if (err < 0)
3408 flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); 3424 return err;
3409 if (flags & ~NFT_SET_ELEM_INTERVAL_END) 3425 if (flags != 0)
3410 return -EINVAL; 3426 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3411 if (!(set->flags & NFT_SET_INTERVAL) &&
3412 flags & NFT_SET_ELEM_INTERVAL_END)
3413 return -EINVAL;
3414 if (flags != 0)
3415 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3416 }
3417 3427
3418 if (set->flags & NFT_SET_MAP) { 3428 if (set->flags & NFT_SET_MAP) {
3419 if (nla[NFTA_SET_ELEM_DATA] == NULL && 3429 if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -3582,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3582 const struct nlattr *attr) 3592 const struct nlattr *attr)
3583{ 3593{
3584 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; 3594 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3595 struct nft_set_ext_tmpl tmpl;
3585 struct nft_data_desc desc; 3596 struct nft_data_desc desc;
3586 struct nft_set_elem elem; 3597 struct nft_set_elem elem;
3598 struct nft_set_ext *ext;
3587 struct nft_trans *trans; 3599 struct nft_trans *trans;
3600 u32 flags = 0;
3601 void *priv;
3588 int err; 3602 int err;
3589 3603
3590 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3604 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3596,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3596 if (nla[NFTA_SET_ELEM_KEY] == NULL) 3610 if (nla[NFTA_SET_ELEM_KEY] == NULL)
3597 goto err1; 3611 goto err1;
3598 3612
3613 nft_set_ext_prepare(&tmpl);
3614
3615 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3616 if (err < 0)
3617 return err;
3618 if (flags != 0)
3619 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3620
3599 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, 3621 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
3600 nla[NFTA_SET_ELEM_KEY]); 3622 nla[NFTA_SET_ELEM_KEY]);
3601 if (err < 0) 3623 if (err < 0)
@@ -3605,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3605 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) 3627 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
3606 goto err2; 3628 goto err2;
3607 3629
3630 nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);
3631
3632 err = -ENOMEM;
3633 elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
3634 GFP_KERNEL);
3635 if (elem.priv == NULL)
3636 goto err2;
3637
3638 ext = nft_set_elem_ext(set, elem.priv);
3639 if (flags)
3640 *nft_set_ext_flags(ext) = flags;
3641
3608 trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); 3642 trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
3609 if (trans == NULL) { 3643 if (trans == NULL) {
3610 err = -ENOMEM; 3644 err = -ENOMEM;
3611 goto err2; 3645 goto err3;
3612 } 3646 }
3613 3647
3614 elem.priv = set->ops->deactivate(set, &elem); 3648 priv = set->ops->deactivate(set, &elem);
3615 if (elem.priv == NULL) { 3649 if (priv == NULL) {
3616 err = -ENOENT; 3650 err = -ENOENT;
3617 goto err3; 3651 goto err4;
3618 } 3652 }
3653 kfree(elem.priv);
3654 elem.priv = priv;
3619 3655
3620 nft_trans_elem(trans) = elem; 3656 nft_trans_elem(trans) = elem;
3621 list_add_tail(&trans->list, &ctx->net->nft.commit_list); 3657 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
3622 return 0; 3658 return 0;
3623 3659
3624err3: 3660err4:
3625 kfree(trans); 3661 kfree(trans);
3662err3:
3663 kfree(elem.priv);
3626err2: 3664err2:
3627 nft_data_uninit(&elem.key.val, desc.type); 3665 nft_data_uninit(&elem.key.val, desc.type);
3628err1: 3666err1:
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 2671b9deb103..3c84f14326f5 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
306 int i; 306 int i;
307 307
308 local_bh_disable(); 308 local_bh_disable();
309 for (i = 0; i < net->ct.htable_size; i++) { 309 for (i = 0; i < nf_conntrack_htable_size; i++) {
310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
311 if (i < net->ct.htable_size) { 311 if (i < nf_conntrack_htable_size) {
312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 312 hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
313 untimeout(h, timeout); 313 untimeout(h, timeout);
314 } 314 }
315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 25998facefd0..137e308d5b24 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -198,6 +198,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
198 } 198 }
199 break; 199 break;
200#endif 200#endif
201#ifdef CONFIG_NF_CONNTRACK_LABELS
202 case NFT_CT_LABELS:
203 nf_connlabels_replace(ct,
204 &regs->data[priv->sreg],
205 &regs->data[priv->sreg],
206 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
207 break;
208#endif
201 default: 209 default:
202 break; 210 break;
203 } 211 }
@@ -365,6 +373,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
365 len = FIELD_SIZEOF(struct nf_conn, mark); 373 len = FIELD_SIZEOF(struct nf_conn, mark);
366 break; 374 break;
367#endif 375#endif
376#ifdef CONFIG_NF_CONNTRACK_LABELS
377 case NFT_CT_LABELS:
378 if (tb[NFTA_CT_DIRECTION])
379 return -EINVAL;
380 len = NF_CT_LABELS_MAX_SIZE;
381 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
382 if (err)
383 return err;
384 break;
385#endif
368 default: 386 default:
369 return -EOPNOTSUPP; 387 return -EOPNOTSUPP;
370 } 388 }
@@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
384static void nft_ct_destroy(const struct nft_ctx *ctx, 402static void nft_ct_destroy(const struct nft_ctx *ctx,
385 const struct nft_expr *expr) 403 const struct nft_expr *expr)
386{ 404{
405 struct nft_ct *priv = nft_expr_priv(expr);
406
407 switch (priv->key) {
408#ifdef CONFIG_NF_CONNTRACK_LABELS
409 case NFT_CT_LABELS:
410 nf_connlabels_put(ctx->net);
411 break;
412#endif
413 default:
414 break;
415 }
416
387 nft_ct_l3proto_module_put(ctx->afi->family); 417 nft_ct_l3proto_module_put(ctx->afi->family);
388} 418}
389 419
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 1c30f41cff5b..f762094af7c1 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -29,6 +29,17 @@ struct nft_rbtree_elem {
29 struct nft_set_ext ext; 29 struct nft_set_ext ext;
30}; 30};
31 31
32static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
33{
34 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
35 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
36}
37
38static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
39 const struct nft_rbtree_elem *interval)
40{
41 return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
42}
32 43
33static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, 44static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
34 const struct nft_set_ext **ext) 45 const struct nft_set_ext **ext)
@@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
37 const struct nft_rbtree_elem *rbe, *interval = NULL; 48 const struct nft_rbtree_elem *rbe, *interval = NULL;
38 const struct rb_node *parent; 49 const struct rb_node *parent;
39 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); 50 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
51 const void *this;
40 int d; 52 int d;
41 53
42 spin_lock_bh(&nft_rbtree_lock); 54 spin_lock_bh(&nft_rbtree_lock);
@@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
44 while (parent != NULL) { 56 while (parent != NULL) {
45 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 57 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
46 58
47 d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); 59 this = nft_set_ext_key(&rbe->ext);
60 d = memcmp(this, key, set->klen);
48 if (d < 0) { 61 if (d < 0) {
49 parent = parent->rb_left; 62 parent = parent->rb_left;
63 /* In case of adjacent ranges, we always see the high
64 * part of the range in first place, before the low one.
65 * So don't update interval if the keys are equal.
66 */
67 if (interval && nft_rbtree_equal(set, this, interval))
68 continue;
50 interval = rbe; 69 interval = rbe;
51 } else if (d > 0) 70 } else if (d > 0)
52 parent = parent->rb_right; 71 parent = parent->rb_right;
@@ -56,9 +75,7 @@ found:
56 parent = parent->rb_left; 75 parent = parent->rb_left;
57 continue; 76 continue;
58 } 77 }
59 if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && 78 if (nft_rbtree_interval_end(rbe))
60 *nft_set_ext_flags(&rbe->ext) &
61 NFT_SET_ELEM_INTERVAL_END)
62 goto out; 79 goto out;
63 spin_unlock_bh(&nft_rbtree_lock); 80 spin_unlock_bh(&nft_rbtree_lock);
64 81
@@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set,
98 else if (d > 0) 115 else if (d > 0)
99 p = &parent->rb_right; 116 p = &parent->rb_right;
100 else { 117 else {
101 if (nft_set_elem_active(&rbe->ext, genmask)) 118 if (nft_set_elem_active(&rbe->ext, genmask)) {
102 return -EEXIST; 119 if (nft_rbtree_interval_end(rbe) &&
103 p = &parent->rb_left; 120 !nft_rbtree_interval_end(new))
121 p = &parent->rb_left;
122 else if (!nft_rbtree_interval_end(rbe) &&
123 nft_rbtree_interval_end(new))
124 p = &parent->rb_right;
125 else
126 return -EEXIST;
127 }
104 } 128 }
105 } 129 }
106 rb_link_node(&new->node, parent, p); 130 rb_link_node(&new->node, parent, p);
@@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
145{ 169{
146 const struct nft_rbtree *priv = nft_set_priv(set); 170 const struct nft_rbtree *priv = nft_set_priv(set);
147 const struct rb_node *parent = priv->root.rb_node; 171 const struct rb_node *parent = priv->root.rb_node;
148 struct nft_rbtree_elem *rbe; 172 struct nft_rbtree_elem *rbe, *this = elem->priv;
149 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); 173 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
150 int d; 174 int d;
151 175
@@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
163 parent = parent->rb_left; 187 parent = parent->rb_left;
164 continue; 188 continue;
165 } 189 }
190 if (nft_rbtree_interval_end(rbe) &&
191 !nft_rbtree_interval_end(this)) {
192 parent = parent->rb_left;
193 continue;
194 } else if (!nft_rbtree_interval_end(rbe) &&
195 nft_rbtree_interval_end(this)) {
196 parent = parent->rb_right;
197 continue;
198 }
166 nft_set_elem_change_active(set, &rbe->ext); 199 nft_set_elem_change_active(set, &rbe->ext);
167 return rbe; 200 return rbe;
168 } 201 }
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index fbb7a2b57b44..61fff422424f 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -64,18 +64,26 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
64 return NULL; 64 return NULL;
65} 65}
66 66
67int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id) 67int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
68 struct dest_spec_params *params)
68{ 69{
69 struct nci_conn_info *conn_info; 70 struct nci_conn_info *conn_info;
70 71
71 list_for_each_entry(conn_info, &ndev->conn_info_list, list) { 72 list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
72 if (conn_info->id == id) 73 if (conn_info->dest_type == dest_type) {
73 return conn_info->conn_id; 74 if (!params)
75 return conn_info->conn_id;
76 if (conn_info) {
77 if (params->id == conn_info->dest_params->id &&
78 params->protocol == conn_info->dest_params->protocol)
79 return conn_info->conn_id;
80 }
81 }
74 } 82 }
75 83
76 return -EINVAL; 84 return -EINVAL;
77} 85}
78EXPORT_SYMBOL(nci_get_conn_info_by_id); 86EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params);
79 87
80/* ---- NCI requests ---- */ 88/* ---- NCI requests ---- */
81 89
@@ -392,6 +400,83 @@ int nci_core_init(struct nci_dev *ndev)
392} 400}
393EXPORT_SYMBOL(nci_core_init); 401EXPORT_SYMBOL(nci_core_init);
394 402
403struct nci_loopback_data {
404 u8 conn_id;
405 struct sk_buff *data;
406};
407
408static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
409{
410 struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
411
412 nci_send_data(ndev, data->conn_id, data->data);
413}
414
415static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
416{
417 struct nci_dev *ndev = (struct nci_dev *)context;
418 struct nci_conn_info *conn_info;
419
420 conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
421 if (!conn_info) {
422 nci_req_complete(ndev, NCI_STATUS_REJECTED);
423 return;
424 }
425
426 conn_info->rx_skb = skb;
427
428 nci_req_complete(ndev, NCI_STATUS_OK);
429}
430
431int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
432 struct sk_buff **resp)
433{
434 int r;
435 struct nci_loopback_data loopback_data;
436 struct nci_conn_info *conn_info;
437 struct sk_buff *skb;
438 int conn_id = nci_get_conn_info_by_dest_type_params(ndev,
439 NCI_DESTINATION_NFCC_LOOPBACK, NULL);
440
441 if (conn_id < 0) {
442 r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK,
443 0, 0, NULL);
444 if (r != NCI_STATUS_OK)
445 return r;
446
447 conn_id = nci_get_conn_info_by_dest_type_params(ndev,
448 NCI_DESTINATION_NFCC_LOOPBACK,
449 NULL);
450 }
451
452 conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
453 if (!conn_info)
454 return -EPROTO;
455
456 /* store cb and context to be used on receiving data */
457 conn_info->data_exchange_cb = nci_nfcc_loopback_cb;
458 conn_info->data_exchange_cb_context = ndev;
459
460 skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL);
461 if (!skb)
462 return -ENOMEM;
463
464 skb_reserve(skb, NCI_DATA_HDR_SIZE);
465 memcpy(skb_put(skb, data_len), data, data_len);
466
467 loopback_data.conn_id = conn_id;
468 loopback_data.data = skb;
469
470 ndev->cur_conn_id = conn_id;
471 r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
472 msecs_to_jiffies(NCI_DATA_TIMEOUT));
473 if (r == NCI_STATUS_OK && resp)
474 *resp = conn_info->rx_skb;
475
476 return r;
477}
478EXPORT_SYMBOL(nci_nfcc_loopback);
479
395static int nci_open_device(struct nci_dev *ndev) 480static int nci_open_device(struct nci_dev *ndev)
396{ 481{
397 int rc = 0; 482 int rc = 0;
@@ -610,9 +695,6 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
610 struct nci_core_conn_create_cmd *cmd; 695 struct nci_core_conn_create_cmd *cmd;
611 struct core_conn_create_data data; 696 struct core_conn_create_data data;
612 697
613 if (!number_destination_params)
614 return -EINVAL;
615
616 data.length = params_len + sizeof(struct nci_core_conn_create_cmd); 698 data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
617 cmd = kzalloc(data.length, GFP_KERNEL); 699 cmd = kzalloc(data.length, GFP_KERNEL);
618 if (!cmd) 700 if (!cmd)
@@ -620,17 +702,23 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
620 702
621 cmd->destination_type = destination_type; 703 cmd->destination_type = destination_type;
622 cmd->number_destination_params = number_destination_params; 704 cmd->number_destination_params = number_destination_params;
623 memcpy(cmd->params, params, params_len);
624 705
625 data.cmd = cmd; 706 data.cmd = cmd;
626 707
627 if (params->length > 0) 708 if (params) {
628 ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; 709 memcpy(cmd->params, params, params_len);
629 else 710 if (params->length > 0)
630 ndev->cur_id = 0; 711 memcpy(&ndev->cur_params,
712 &params->value[DEST_SPEC_PARAMS_ID_INDEX],
713 sizeof(struct dest_spec_params));
714 else
715 ndev->cur_params.id = 0;
716 } else {
717 ndev->cur_params.id = 0;
718 }
719 ndev->cur_dest_type = destination_type;
631 720
632 r = __nci_request(ndev, nci_core_conn_create_req, 721 r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
633 (unsigned long)&data,
634 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 722 msecs_to_jiffies(NCI_CMD_TIMEOUT));
635 kfree(cmd); 723 kfree(cmd);
636 return r; 724 return r;
@@ -646,6 +734,7 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
646 734
647int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) 735int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
648{ 736{
737 ndev->cur_conn_id = conn_id;
649 return __nci_request(ndev, nci_core_conn_close_req, conn_id, 738 return __nci_request(ndev, nci_core_conn_close_req, conn_id,
650 msecs_to_jiffies(NCI_CMD_TIMEOUT)); 739 msecs_to_jiffies(NCI_CMD_TIMEOUT));
651} 740}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 2ada2b39e355..1e8c1a12aaec 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -734,7 +734,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
734 * “HCI Access”, even if the HCI Network contains multiple NFCEEs. 734 * “HCI Access”, even if the HCI Network contains multiple NFCEEs.
735 */ 735 */
736 ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id; 736 ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id;
737 ndev->cur_id = nfcee_ntf->nfcee_id; 737 ndev->cur_params.id = nfcee_ntf->nfcee_id;
738 738
739 nci_req_complete(ndev, status); 739 nci_req_complete(ndev, status);
740} 740}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 9b6eb913d801..e3bbf1937d0e 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -226,7 +226,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
226 struct sk_buff *skb) 226 struct sk_buff *skb)
227{ 227{
228 __u8 status = skb->data[0]; 228 __u8 status = skb->data[0];
229 struct nci_conn_info *conn_info; 229 struct nci_conn_info *conn_info = NULL;
230 struct nci_core_conn_create_rsp *rsp; 230 struct nci_core_conn_create_rsp *rsp;
231 231
232 pr_debug("status 0x%x\n", status); 232 pr_debug("status 0x%x\n", status);
@@ -241,7 +241,17 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
241 goto exit; 241 goto exit;
242 } 242 }
243 243
244 conn_info->id = ndev->cur_id; 244 conn_info->dest_params = devm_kzalloc(&ndev->nfc_dev->dev,
245 sizeof(struct dest_spec_params),
246 GFP_KERNEL);
247 if (!conn_info->dest_params) {
248 status = NCI_STATUS_REJECTED;
249 goto free_conn_info;
250 }
251
252 conn_info->dest_type = ndev->cur_dest_type;
253 conn_info->dest_params->id = ndev->cur_params.id;
254 conn_info->dest_params->protocol = ndev->cur_params.protocol;
245 conn_info->conn_id = rsp->conn_id; 255 conn_info->conn_id = rsp->conn_id;
246 256
247 /* Note: data_exchange_cb and data_exchange_cb_context need to 257 /* Note: data_exchange_cb and data_exchange_cb_context need to
@@ -251,7 +261,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
251 INIT_LIST_HEAD(&conn_info->list); 261 INIT_LIST_HEAD(&conn_info->list);
252 list_add(&conn_info->list, &ndev->conn_info_list); 262 list_add(&conn_info->list, &ndev->conn_info_list);
253 263
254 if (ndev->cur_id == ndev->hci_dev->nfcee_id) 264 if (ndev->cur_params.id == ndev->hci_dev->nfcee_id)
255 ndev->hci_dev->conn_info = conn_info; 265 ndev->hci_dev->conn_info = conn_info;
256 266
257 conn_info->conn_id = rsp->conn_id; 267 conn_info->conn_id = rsp->conn_id;
@@ -259,7 +269,11 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
259 atomic_set(&conn_info->credits_cnt, rsp->credits_cnt); 269 atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
260 } 270 }
261 271
272free_conn_info:
273 if (status == NCI_STATUS_REJECTED)
274 devm_kfree(&ndev->nfc_dev->dev, conn_info);
262exit: 275exit:
276
263 nci_req_complete(ndev, status); 277 nci_req_complete(ndev, status);
264} 278}
265 279
@@ -271,7 +285,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
271 285
272 pr_debug("status 0x%x\n", status); 286 pr_debug("status 0x%x\n", status);
273 if (status == NCI_STATUS_OK) { 287 if (status == NCI_STATUS_OK) {
274 conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id); 288 conn_info = nci_get_conn_info_by_conn_id(ndev,
289 ndev->cur_conn_id);
275 if (conn_info) { 290 if (conn_info) {
276 list_del(&conn_info->list); 291 list_del(&conn_info->list);
277 devm_kfree(&ndev->nfc_dev->dev, conn_info); 292 devm_kfree(&ndev->nfc_dev->dev, conn_info);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 9741a76c7405..9f0bc49fa969 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
439 u8 protonum; 439 u8 protonum;
440 440
441 l3proto = __nf_ct_l3proto_find(l3num); 441 l3proto = __nf_ct_l3proto_find(l3num);
442 if (!l3proto) {
443 pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
444 return NULL;
445 }
446 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 442 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
447 &protonum) <= 0) { 443 &protonum) <= 0) {
448 pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 444 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
449 return NULL; 445 return NULL;
450 } 446 }
451 l4proto = __nf_ct_l4proto_find(l3num, protonum); 447 l4proto = __nf_ct_l4proto_find(l3num, protonum);
452 if (!l4proto) {
453 pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
454 return NULL;
455 }
456 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 448 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
457 protonum, net, &tuple, l3proto, l4proto)) { 449 protonum, net, &tuple, l3proto, l4proto)) {
458 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 450 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 22d9a5316304..856bd8dba676 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -738,9 +738,9 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
738 len += nla_total_size(acts->orig_len); 738 len += nla_total_size(acts->orig_len);
739 739
740 return len 740 return len
741 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 741 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
742 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 742 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
743 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ 743 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
744} 744}
745 745
746/* Called with ovs_mutex or RCU read lock. */ 746/* Called with ovs_mutex or RCU read lock. */
@@ -759,7 +759,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
759 return -EMSGSIZE; 759 return -EMSGSIZE;
760 760
761 if (stats.n_packets && 761 if (stats.n_packets &&
762 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 762 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
763 sizeof(struct ovs_flow_stats), &stats,
764 OVS_FLOW_ATTR_PAD))
763 return -EMSGSIZE; 765 return -EMSGSIZE;
764 766
765 if ((u8)ntohs(tcp_flags) && 767 if ((u8)ntohs(tcp_flags) &&
@@ -1435,8 +1437,8 @@ static size_t ovs_dp_cmd_msg_size(void)
1435 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1437 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1436 1438
1437 msgsize += nla_total_size(IFNAMSIZ); 1439 msgsize += nla_total_size(IFNAMSIZ);
1438 msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); 1440 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1439 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); 1441 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1440 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1442 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1441 1443
1442 return msgsize; 1444 return msgsize;
@@ -1463,13 +1465,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1463 goto nla_put_failure; 1465 goto nla_put_failure;
1464 1466
1465 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1467 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1466 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1468 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1467 &dp_stats)) 1469 &dp_stats, OVS_DP_ATTR_PAD))
1468 goto nla_put_failure; 1470 goto nla_put_failure;
1469 1471
1470 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1472 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1471 sizeof(struct ovs_dp_megaflow_stats), 1473 sizeof(struct ovs_dp_megaflow_stats),
1472 &dp_megaflow_stats)) 1474 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1473 goto nla_put_failure; 1475 goto nla_put_failure;
1474 1476
1475 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1477 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
@@ -1838,8 +1840,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1838 goto nla_put_failure; 1840 goto nla_put_failure;
1839 1841
1840 ovs_vport_get_stats(vport, &vport_stats); 1842 ovs_vport_get_stats(vport, &vport_stats);
1841 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), 1843 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
1842 &vport_stats)) 1844 sizeof(struct ovs_vport_stats), &vport_stats,
1845 OVS_VPORT_ATTR_PAD))
1843 goto nla_put_failure; 1846 goto nla_put_failure;
1844 1847
1845 if (ovs_vport_get_upcall_portids(vport, skb)) 1848 if (ovs_vport_get_upcall_portids(vport, skb))
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
new file mode 100644
index 000000000000..673fd1f86ebe
--- /dev/null
+++ b/net/qrtr/Kconfig
@@ -0,0 +1,24 @@
1# Qualcomm IPC Router configuration
2#
3
4config QRTR
5 tristate "Qualcomm IPC Router support"
6 depends on ARCH_QCOM || COMPILE_TEST
7 ---help---
8 Say Y if you intend to use Qualcomm IPC router protocol. The
9 protocol is used to communicate with services provided by other
10 hardware blocks in the system.
11
12 In order to do service lookups, a userspace daemon is required to
13 maintain a service listing.
14
15if QRTR
16
17config QRTR_SMD
18 tristate "SMD IPC Router channels"
19 depends on QCOM_SMD || COMPILE_TEST
20 ---help---
21 Say Y here to support SMD based ipcrouter channels. SMD is the
22 most common transport for IPC Router.
23
24endif # QRTR
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
new file mode 100644
index 000000000000..6c00dc623b7e
--- /dev/null
+++ b/net/qrtr/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_QRTR) := qrtr.o
2obj-$(CONFIG_QRTR_SMD) += smd.o
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
new file mode 100644
index 000000000000..c985ecbe9bd6
--- /dev/null
+++ b/net/qrtr/qrtr.c
@@ -0,0 +1,1007 @@
1/*
2 * Copyright (c) 2015, Sony Mobile Communications Inc.
3 * Copyright (c) 2013, The Linux Foundation. All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 and
7 * only version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14#include <linux/module.h>
15#include <linux/netlink.h>
16#include <linux/qrtr.h>
17#include <linux/termios.h> /* For TIOCINQ/OUTQ */
18
19#include <net/sock.h>
20
21#include "qrtr.h"
22
23#define QRTR_PROTO_VER 1
24
25/* auto-bind range */
26#define QRTR_MIN_EPH_SOCKET 0x4000
27#define QRTR_MAX_EPH_SOCKET 0x7fff
28
29enum qrtr_pkt_type {
30 QRTR_TYPE_DATA = 1,
31 QRTR_TYPE_HELLO = 2,
32 QRTR_TYPE_BYE = 3,
33 QRTR_TYPE_NEW_SERVER = 4,
34 QRTR_TYPE_DEL_SERVER = 5,
35 QRTR_TYPE_DEL_CLIENT = 6,
36 QRTR_TYPE_RESUME_TX = 7,
37 QRTR_TYPE_EXIT = 8,
38 QRTR_TYPE_PING = 9,
39};
40
41/**
42 * struct qrtr_hdr - (I|R)PCrouter packet header
43 * @version: protocol version
44 * @type: packet type; one of QRTR_TYPE_*
45 * @src_node_id: source node
46 * @src_port_id: source port
47 * @confirm_rx: boolean; whether a resume-tx packet should be send in reply
48 * @size: length of packet, excluding this header
49 * @dst_node_id: destination node
50 * @dst_port_id: destination port
51 */
52struct qrtr_hdr {
53 __le32 version;
54 __le32 type;
55 __le32 src_node_id;
56 __le32 src_port_id;
57 __le32 confirm_rx;
58 __le32 size;
59 __le32 dst_node_id;
60 __le32 dst_port_id;
61} __packed;
62
63#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
64#define QRTR_NODE_BCAST ((unsigned int)-1)
65#define QRTR_PORT_CTRL ((unsigned int)-2)
66
67struct qrtr_sock {
68 /* WARNING: sk must be the first member */
69 struct sock sk;
70 struct sockaddr_qrtr us;
71 struct sockaddr_qrtr peer;
72};
73
74static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
75{
76 BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0);
77 return container_of(sk, struct qrtr_sock, sk);
78}
79
80static unsigned int qrtr_local_nid = -1;
81
82/* for node ids */
83static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
84/* broadcast list */
85static LIST_HEAD(qrtr_all_nodes);
86/* lock for qrtr_nodes, qrtr_all_nodes and node reference */
87static DEFINE_MUTEX(qrtr_node_lock);
88
89/* local port allocation management */
90static DEFINE_IDR(qrtr_ports);
91static DEFINE_MUTEX(qrtr_port_lock);
92
93/**
94 * struct qrtr_node - endpoint node
95 * @ep_lock: lock for endpoint management and callbacks
96 * @ep: endpoint
97 * @ref: reference count for node
98 * @nid: node id
99 * @rx_queue: receive queue
100 * @work: scheduled work struct for recv work
101 * @item: list item for broadcast list
102 */
103struct qrtr_node {
104 struct mutex ep_lock;
105 struct qrtr_endpoint *ep;
106 struct kref ref;
107 unsigned int nid;
108
109 struct sk_buff_head rx_queue;
110 struct work_struct work;
111 struct list_head item;
112};
113
114/* Release node resources and free the node.
115 *
116 * Do not call directly, use qrtr_node_release. To be used with
117 * kref_put_mutex. As such, the node mutex is expected to be locked on call.
118 */
119static void __qrtr_node_release(struct kref *kref)
120{
121 struct qrtr_node *node = container_of(kref, struct qrtr_node, ref);
122
123 if (node->nid != QRTR_EP_NID_AUTO)
124 radix_tree_delete(&qrtr_nodes, node->nid);
125
126 list_del(&node->item);
127 mutex_unlock(&qrtr_node_lock);
128
129 skb_queue_purge(&node->rx_queue);
130 kfree(node);
131}
132
133/* Increment reference to node. */
134static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node)
135{
136 if (node)
137 kref_get(&node->ref);
138 return node;
139}
140
141/* Decrement reference to node and release as necessary. */
142static void qrtr_node_release(struct qrtr_node *node)
143{
144 if (!node)
145 return;
146 kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock);
147}
148
149/* Pass an outgoing packet socket buffer to the endpoint driver. */
150static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
151{
152 int rc = -ENODEV;
153
154 mutex_lock(&node->ep_lock);
155 if (node->ep)
156 rc = node->ep->xmit(node->ep, skb);
157 else
158 kfree_skb(skb);
159 mutex_unlock(&node->ep_lock);
160
161 return rc;
162}
163
164/* Lookup node by id.
165 *
166 * callers must release with qrtr_node_release()
167 */
168static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
169{
170 struct qrtr_node *node;
171
172 mutex_lock(&qrtr_node_lock);
173 node = radix_tree_lookup(&qrtr_nodes, nid);
174 node = qrtr_node_acquire(node);
175 mutex_unlock(&qrtr_node_lock);
176
177 return node;
178}
179
180/* Assign node id to node.
181 *
182 * This is mostly useful for automatic node id assignment, based on
183 * the source id in the incoming packet.
184 */
185static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
186{
187 if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO)
188 return;
189
190 mutex_lock(&qrtr_node_lock);
191 radix_tree_insert(&qrtr_nodes, nid, node);
192 node->nid = nid;
193 mutex_unlock(&qrtr_node_lock);
194}
195
196/**
197 * qrtr_endpoint_post() - post incoming data
198 * @ep: endpoint handle
199 * @data: data pointer
200 * @len: size of data in bytes
201 *
202 * Return: 0 on success; negative error code on failure
203 */
204int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
205{
206 struct qrtr_node *node = ep->node;
207 const struct qrtr_hdr *phdr = data;
208 struct sk_buff *skb;
209 unsigned int psize;
210 unsigned int size;
211 unsigned int type;
212 unsigned int ver;
213 unsigned int dst;
214
215 if (len < QRTR_HDR_SIZE || len & 3)
216 return -EINVAL;
217
218 ver = le32_to_cpu(phdr->version);
219 size = le32_to_cpu(phdr->size);
220 type = le32_to_cpu(phdr->type);
221 dst = le32_to_cpu(phdr->dst_port_id);
222
223 psize = (size + 3) & ~3;
224
225 if (ver != QRTR_PROTO_VER)
226 return -EINVAL;
227
228 if (len != psize + QRTR_HDR_SIZE)
229 return -EINVAL;
230
231 if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
232 return -EINVAL;
233
234 skb = netdev_alloc_skb(NULL, len);
235 if (!skb)
236 return -ENOMEM;
237
238 skb_reset_transport_header(skb);
239 memcpy(skb_put(skb, len), data, len);
240
241 skb_queue_tail(&node->rx_queue, skb);
242 schedule_work(&node->work);
243
244 return 0;
245}
246EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
247
248/* Allocate and construct a resume-tx packet. */
249static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
250 u32 dst_node, u32 port)
251{
252 const int pkt_len = 20;
253 struct qrtr_hdr *hdr;
254 struct sk_buff *skb;
255 u32 *buf;
256
257 skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
258 if (!skb)
259 return NULL;
260 skb_reset_transport_header(skb);
261
262 hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE);
263 hdr->version = cpu_to_le32(QRTR_PROTO_VER);
264 hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX);
265 hdr->src_node_id = cpu_to_le32(src_node);
266 hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
267 hdr->confirm_rx = cpu_to_le32(0);
268 hdr->size = cpu_to_le32(pkt_len);
269 hdr->dst_node_id = cpu_to_le32(dst_node);
270 hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
271
272 buf = (u32 *)skb_put(skb, pkt_len);
273 memset(buf, 0, pkt_len);
274 buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
275 buf[1] = cpu_to_le32(src_node);
276 buf[2] = cpu_to_le32(port);
277
278 return skb;
279}
280
281static struct qrtr_sock *qrtr_port_lookup(int port);
282static void qrtr_port_put(struct qrtr_sock *ipc);
283
284/* Handle and route a received packet.
285 *
286 * This will auto-reply with resume-tx packet as necessary.
287 */
288static void qrtr_node_rx_work(struct work_struct *work)
289{
290 struct qrtr_node *node = container_of(work, struct qrtr_node, work);
291 struct sk_buff *skb;
292
293 while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
294 const struct qrtr_hdr *phdr;
295 u32 dst_node, dst_port;
296 struct qrtr_sock *ipc;
297 u32 src_node;
298 int confirm;
299
300 phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
301 src_node = le32_to_cpu(phdr->src_node_id);
302 dst_node = le32_to_cpu(phdr->dst_node_id);
303 dst_port = le32_to_cpu(phdr->dst_port_id);
304 confirm = !!phdr->confirm_rx;
305
306 qrtr_node_assign(node, src_node);
307
308 ipc = qrtr_port_lookup(dst_port);
309 if (!ipc) {
310 kfree_skb(skb);
311 } else {
312 if (sock_queue_rcv_skb(&ipc->sk, skb))
313 kfree_skb(skb);
314
315 qrtr_port_put(ipc);
316 }
317
318 if (confirm) {
319 skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
320 if (!skb)
321 break;
322 if (qrtr_node_enqueue(node, skb))
323 break;
324 }
325 }
326}
327
328/**
329 * qrtr_endpoint_register() - register a new endpoint
330 * @ep: endpoint to register
331 * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment
332 * Return: 0 on success; negative error code on failure
333 *
334 * The specified endpoint must have the xmit function pointer set on call.
335 */
336int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid)
337{
338 struct qrtr_node *node;
339
340 if (!ep || !ep->xmit)
341 return -EINVAL;
342
343 node = kzalloc(sizeof(*node), GFP_KERNEL);
344 if (!node)
345 return -ENOMEM;
346
347 INIT_WORK(&node->work, qrtr_node_rx_work);
348 kref_init(&node->ref);
349 mutex_init(&node->ep_lock);
350 skb_queue_head_init(&node->rx_queue);
351 node->nid = QRTR_EP_NID_AUTO;
352 node->ep = ep;
353
354 qrtr_node_assign(node, nid);
355
356 mutex_lock(&qrtr_node_lock);
357 list_add(&node->item, &qrtr_all_nodes);
358 mutex_unlock(&qrtr_node_lock);
359 ep->node = node;
360
361 return 0;
362}
363EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
364
365/**
366 * qrtr_endpoint_unregister - unregister endpoint
367 * @ep: endpoint to unregister
368 */
369void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
370{
371 struct qrtr_node *node = ep->node;
372
373 mutex_lock(&node->ep_lock);
374 node->ep = NULL;
375 mutex_unlock(&node->ep_lock);
376
377 qrtr_node_release(node);
378 ep->node = NULL;
379}
380EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister);
381
382/* Lookup socket by port.
383 *
384 * Callers must release with qrtr_port_put()
385 */
386static struct qrtr_sock *qrtr_port_lookup(int port)
387{
388 struct qrtr_sock *ipc;
389
390 if (port == QRTR_PORT_CTRL)
391 port = 0;
392
393 mutex_lock(&qrtr_port_lock);
394 ipc = idr_find(&qrtr_ports, port);
395 if (ipc)
396 sock_hold(&ipc->sk);
397 mutex_unlock(&qrtr_port_lock);
398
399 return ipc;
400}
401
402/* Release acquired socket. */
403static void qrtr_port_put(struct qrtr_sock *ipc)
404{
405 sock_put(&ipc->sk);
406}
407
408/* Remove port assignment. */
409static void qrtr_port_remove(struct qrtr_sock *ipc)
410{
411 int port = ipc->us.sq_port;
412
413 if (port == QRTR_PORT_CTRL)
414 port = 0;
415
416 __sock_put(&ipc->sk);
417
418 mutex_lock(&qrtr_port_lock);
419 idr_remove(&qrtr_ports, port);
420 mutex_unlock(&qrtr_port_lock);
421}
422
423/* Assign port number to socket.
424 *
425 * Specify port in the integer pointed to by port, and it will be adjusted
426 * on return as necesssary.
427 *
428 * Port may be:
429 * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET]
430 * <QRTR_MIN_EPH_SOCKET: Specified; requires CAP_NET_ADMIN
431 * >QRTR_MIN_EPH_SOCKET: Specified; available to all
432 */
433static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
434{
435 int rc;
436
437 mutex_lock(&qrtr_port_lock);
438 if (!*port) {
439 rc = idr_alloc(&qrtr_ports, ipc,
440 QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1,
441 GFP_ATOMIC);
442 if (rc >= 0)
443 *port = rc;
444 } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
445 rc = -EACCES;
446 } else if (*port == QRTR_PORT_CTRL) {
447 rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC);
448 } else {
449 rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC);
450 if (rc >= 0)
451 *port = rc;
452 }
453 mutex_unlock(&qrtr_port_lock);
454
455 if (rc == -ENOSPC)
456 return -EADDRINUSE;
457 else if (rc < 0)
458 return rc;
459
460 sock_hold(&ipc->sk);
461
462 return 0;
463}
464
465/* Bind socket to address.
466 *
467 * Socket should be locked upon call.
468 */
469static int __qrtr_bind(struct socket *sock,
470 const struct sockaddr_qrtr *addr, int zapped)
471{
472 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
473 struct sock *sk = sock->sk;
474 int port;
475 int rc;
476
477 /* rebinding ok */
478 if (!zapped && addr->sq_port == ipc->us.sq_port)
479 return 0;
480
481 port = addr->sq_port;
482 rc = qrtr_port_assign(ipc, &port);
483 if (rc)
484 return rc;
485
486 /* unbind previous, if any */
487 if (!zapped)
488 qrtr_port_remove(ipc);
489 ipc->us.sq_port = port;
490
491 sock_reset_flag(sk, SOCK_ZAPPED);
492
493 return 0;
494}
495
496/* Auto bind to an ephemeral port. */
497static int qrtr_autobind(struct socket *sock)
498{
499 struct sock *sk = sock->sk;
500 struct sockaddr_qrtr addr;
501
502 if (!sock_flag(sk, SOCK_ZAPPED))
503 return 0;
504
505 addr.sq_family = AF_QIPCRTR;
506 addr.sq_node = qrtr_local_nid;
507 addr.sq_port = 0;
508
509 return __qrtr_bind(sock, &addr, 1);
510}
511
512/* Bind socket to specified sockaddr. */
513static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
514{
515 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
516 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
517 struct sock *sk = sock->sk;
518 int rc;
519
520 if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
521 return -EINVAL;
522
523 if (addr->sq_node != ipc->us.sq_node)
524 return -EINVAL;
525
526 lock_sock(sk);
527 rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED));
528 release_sock(sk);
529
530 return rc;
531}
532
533/* Queue packet to local peer socket. */
534static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
535{
536 const struct qrtr_hdr *phdr;
537 struct qrtr_sock *ipc;
538
539 phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
540
541 ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
542 if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
543 kfree_skb(skb);
544 return -ENODEV;
545 }
546
547 if (sock_queue_rcv_skb(&ipc->sk, skb)) {
548 qrtr_port_put(ipc);
549 kfree_skb(skb);
550 return -ENOSPC;
551 }
552
553 qrtr_port_put(ipc);
554
555 return 0;
556}
557
558/* Queue packet for broadcast. */
559static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
560{
561 struct sk_buff *skbn;
562
563 mutex_lock(&qrtr_node_lock);
564 list_for_each_entry(node, &qrtr_all_nodes, item) {
565 skbn = skb_clone(skb, GFP_KERNEL);
566 if (!skbn)
567 break;
568 skb_set_owner_w(skbn, skb->sk);
569 qrtr_node_enqueue(node, skbn);
570 }
571 mutex_unlock(&qrtr_node_lock);
572
573 qrtr_local_enqueue(node, skb);
574
575 return 0;
576}
577
578static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
579{
580 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
581 int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
582 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
583 struct sock *sk = sock->sk;
584 struct qrtr_node *node;
585 struct qrtr_hdr *hdr;
586 struct sk_buff *skb;
587 size_t plen;
588 int rc;
589
590 if (msg->msg_flags & ~(MSG_DONTWAIT))
591 return -EINVAL;
592
593 if (len > 65535)
594 return -EMSGSIZE;
595
596 lock_sock(sk);
597
598 if (addr) {
599 if (msg->msg_namelen < sizeof(*addr)) {
600 release_sock(sk);
601 return -EINVAL;
602 }
603
604 if (addr->sq_family != AF_QIPCRTR) {
605 release_sock(sk);
606 return -EINVAL;
607 }
608
609 rc = qrtr_autobind(sock);
610 if (rc) {
611 release_sock(sk);
612 return rc;
613 }
614 } else if (sk->sk_state == TCP_ESTABLISHED) {
615 addr = &ipc->peer;
616 } else {
617 release_sock(sk);
618 return -ENOTCONN;
619 }
620
621 node = NULL;
622 if (addr->sq_node == QRTR_NODE_BCAST) {
623 enqueue_fn = qrtr_bcast_enqueue;
624 } else if (addr->sq_node == ipc->us.sq_node) {
625 enqueue_fn = qrtr_local_enqueue;
626 } else {
627 enqueue_fn = qrtr_node_enqueue;
628 node = qrtr_node_lookup(addr->sq_node);
629 if (!node) {
630 release_sock(sk);
631 return -ECONNRESET;
632 }
633 }
634
635 plen = (len + 3) & ~3;
636 skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
637 msg->msg_flags & MSG_DONTWAIT, &rc);
638 if (!skb)
639 goto out_node;
640
641 skb_reset_transport_header(skb);
642 skb_put(skb, len + QRTR_HDR_SIZE);
643
644 hdr = (struct qrtr_hdr *)skb_transport_header(skb);
645 hdr->version = cpu_to_le32(QRTR_PROTO_VER);
646 hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
647 hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
648 hdr->confirm_rx = cpu_to_le32(0);
649 hdr->size = cpu_to_le32(len);
650 hdr->dst_node_id = cpu_to_le32(addr->sq_node);
651 hdr->dst_port_id = cpu_to_le32(addr->sq_port);
652
653 rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
654 &msg->msg_iter, len);
655 if (rc) {
656 kfree_skb(skb);
657 goto out_node;
658 }
659
660 if (plen != len) {
661 skb_pad(skb, plen - len);
662 skb_put(skb, plen - len);
663 }
664
665 if (ipc->us.sq_port == QRTR_PORT_CTRL) {
666 if (len < 4) {
667 rc = -EINVAL;
668 kfree_skb(skb);
669 goto out_node;
670 }
671
672 /* control messages already require the type as 'command' */
673 skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
674 } else {
675 hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
676 }
677
678 rc = enqueue_fn(node, skb);
679 if (rc >= 0)
680 rc = len;
681
682out_node:
683 qrtr_node_release(node);
684 release_sock(sk);
685
686 return rc;
687}
688
689static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
690 size_t size, int flags)
691{
692 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
693 const struct qrtr_hdr *phdr;
694 struct sock *sk = sock->sk;
695 struct sk_buff *skb;
696 int copied, rc;
697
698 lock_sock(sk);
699
700 if (sock_flag(sk, SOCK_ZAPPED)) {
701 release_sock(sk);
702 return -EADDRNOTAVAIL;
703 }
704
705 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
706 flags & MSG_DONTWAIT, &rc);
707 if (!skb) {
708 release_sock(sk);
709 return rc;
710 }
711
712 phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
713 copied = le32_to_cpu(phdr->size);
714 if (copied > size) {
715 copied = size;
716 msg->msg_flags |= MSG_TRUNC;
717 }
718
719 rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
720 if (rc < 0)
721 goto out;
722 rc = copied;
723
724 if (addr) {
725 addr->sq_family = AF_QIPCRTR;
726 addr->sq_node = le32_to_cpu(phdr->src_node_id);
727 addr->sq_port = le32_to_cpu(phdr->src_port_id);
728 msg->msg_namelen = sizeof(*addr);
729 }
730
731out:
732 skb_free_datagram(sk, skb);
733 release_sock(sk);
734
735 return rc;
736}
737
738static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
739 int len, int flags)
740{
741 DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr);
742 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
743 struct sock *sk = sock->sk;
744 int rc;
745
746 if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR)
747 return -EINVAL;
748
749 lock_sock(sk);
750
751 sk->sk_state = TCP_CLOSE;
752 sock->state = SS_UNCONNECTED;
753
754 rc = qrtr_autobind(sock);
755 if (rc) {
756 release_sock(sk);
757 return rc;
758 }
759
760 ipc->peer = *addr;
761 sock->state = SS_CONNECTED;
762 sk->sk_state = TCP_ESTABLISHED;
763
764 release_sock(sk);
765
766 return 0;
767}
768
769static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
770 int *len, int peer)
771{
772 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
773 struct sockaddr_qrtr qaddr;
774 struct sock *sk = sock->sk;
775
776 lock_sock(sk);
777 if (peer) {
778 if (sk->sk_state != TCP_ESTABLISHED) {
779 release_sock(sk);
780 return -ENOTCONN;
781 }
782
783 qaddr = ipc->peer;
784 } else {
785 qaddr = ipc->us;
786 }
787 release_sock(sk);
788
789 *len = sizeof(qaddr);
790 qaddr.sq_family = AF_QIPCRTR;
791
792 memcpy(saddr, &qaddr, sizeof(qaddr));
793
794 return 0;
795}
796
797static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
798{
799 void __user *argp = (void __user *)arg;
800 struct qrtr_sock *ipc = qrtr_sk(sock->sk);
801 struct sock *sk = sock->sk;
802 struct sockaddr_qrtr *sq;
803 struct sk_buff *skb;
804 struct ifreq ifr;
805 long len = 0;
806 int rc = 0;
807
808 lock_sock(sk);
809
810 switch (cmd) {
811 case TIOCOUTQ:
812 len = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
813 if (len < 0)
814 len = 0;
815 rc = put_user(len, (int __user *)argp);
816 break;
817 case TIOCINQ:
818 skb = skb_peek(&sk->sk_receive_queue);
819 if (skb)
820 len = skb->len - QRTR_HDR_SIZE;
821 rc = put_user(len, (int __user *)argp);
822 break;
823 case SIOCGIFADDR:
824 if (copy_from_user(&ifr, argp, sizeof(ifr))) {
825 rc = -EFAULT;
826 break;
827 }
828
829 sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
830 *sq = ipc->us;
831 if (copy_to_user(argp, &ifr, sizeof(ifr))) {
832 rc = -EFAULT;
833 break;
834 }
835 break;
836 case SIOCGSTAMP:
837 rc = sock_get_timestamp(sk, argp);
838 break;
839 case SIOCADDRT:
840 case SIOCDELRT:
841 case SIOCSIFADDR:
842 case SIOCGIFDSTADDR:
843 case SIOCSIFDSTADDR:
844 case SIOCGIFBRDADDR:
845 case SIOCSIFBRDADDR:
846 case SIOCGIFNETMASK:
847 case SIOCSIFNETMASK:
848 rc = -EINVAL;
849 break;
850 default:
851 rc = -ENOIOCTLCMD;
852 break;
853 }
854
855 release_sock(sk);
856
857 return rc;
858}
859
860static int qrtr_release(struct socket *sock)
861{
862 struct sock *sk = sock->sk;
863 struct qrtr_sock *ipc;
864
865 if (!sk)
866 return 0;
867
868 lock_sock(sk);
869
870 ipc = qrtr_sk(sk);
871 sk->sk_shutdown = SHUTDOWN_MASK;
872 if (!sock_flag(sk, SOCK_DEAD))
873 sk->sk_state_change(sk);
874
875 sock_set_flag(sk, SOCK_DEAD);
876 sock->sk = NULL;
877
878 if (!sock_flag(sk, SOCK_ZAPPED))
879 qrtr_port_remove(ipc);
880
881 skb_queue_purge(&sk->sk_receive_queue);
882
883 release_sock(sk);
884 sock_put(sk);
885
886 return 0;
887}
888
889static const struct proto_ops qrtr_proto_ops = {
890 .owner = THIS_MODULE,
891 .family = AF_QIPCRTR,
892 .bind = qrtr_bind,
893 .connect = qrtr_connect,
894 .socketpair = sock_no_socketpair,
895 .accept = sock_no_accept,
896 .listen = sock_no_listen,
897 .sendmsg = qrtr_sendmsg,
898 .recvmsg = qrtr_recvmsg,
899 .getname = qrtr_getname,
900 .ioctl = qrtr_ioctl,
901 .poll = datagram_poll,
902 .shutdown = sock_no_shutdown,
903 .setsockopt = sock_no_setsockopt,
904 .getsockopt = sock_no_getsockopt,
905 .release = qrtr_release,
906 .mmap = sock_no_mmap,
907 .sendpage = sock_no_sendpage,
908};
909
910static struct proto qrtr_proto = {
911 .name = "QIPCRTR",
912 .owner = THIS_MODULE,
913 .obj_size = sizeof(struct qrtr_sock),
914};
915
916static int qrtr_create(struct net *net, struct socket *sock,
917 int protocol, int kern)
918{
919 struct qrtr_sock *ipc;
920 struct sock *sk;
921
922 if (sock->type != SOCK_DGRAM)
923 return -EPROTOTYPE;
924
925 sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern);
926 if (!sk)
927 return -ENOMEM;
928
929 sock_set_flag(sk, SOCK_ZAPPED);
930
931 sock_init_data(sock, sk);
932 sock->ops = &qrtr_proto_ops;
933
934 ipc = qrtr_sk(sk);
935 ipc->us.sq_family = AF_QIPCRTR;
936 ipc->us.sq_node = qrtr_local_nid;
937 ipc->us.sq_port = 0;
938
939 return 0;
940}
941
942static const struct nla_policy qrtr_policy[IFA_MAX + 1] = {
943 [IFA_LOCAL] = { .type = NLA_U32 },
944};
945
946static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
947{
948 struct nlattr *tb[IFA_MAX + 1];
949 struct ifaddrmsg *ifm;
950 int rc;
951
952 if (!netlink_capable(skb, CAP_NET_ADMIN))
953 return -EPERM;
954
955 if (!netlink_capable(skb, CAP_SYS_ADMIN))
956 return -EPERM;
957
958 ASSERT_RTNL();
959
960 rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy);
961 if (rc < 0)
962 return rc;
963
964 ifm = nlmsg_data(nlh);
965 if (!tb[IFA_LOCAL])
966 return -EINVAL;
967
968 qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]);
969 return 0;
970}
971
972static const struct net_proto_family qrtr_family = {
973 .owner = THIS_MODULE,
974 .family = AF_QIPCRTR,
975 .create = qrtr_create,
976};
977
978static int __init qrtr_proto_init(void)
979{
980 int rc;
981
982 rc = proto_register(&qrtr_proto, 1);
983 if (rc)
984 return rc;
985
986 rc = sock_register(&qrtr_family);
987 if (rc) {
988 proto_unregister(&qrtr_proto);
989 return rc;
990 }
991
992 rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
993
994 return 0;
995}
996module_init(qrtr_proto_init);
997
998static void __exit qrtr_proto_fini(void)
999{
1000 rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR);
1001 sock_unregister(qrtr_family.family);
1002 proto_unregister(&qrtr_proto);
1003}
1004module_exit(qrtr_proto_fini);
1005
1006MODULE_DESCRIPTION("Qualcomm IPC-router driver");
1007MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
new file mode 100644
index 000000000000..2b848718f8fe
--- /dev/null
+++ b/net/qrtr/qrtr.h
@@ -0,0 +1,31 @@
1#ifndef __QRTR_H_
2#define __QRTR_H_
3
4#include <linux/types.h>
5
6struct sk_buff;
7
8/* endpoint node id auto assignment */
9#define QRTR_EP_NID_AUTO (-1)
10
11/**
12 * struct qrtr_endpoint - endpoint handle
13 * @xmit: Callback for outgoing packets
14 *
15 * The socket buffer passed to the xmit function becomes owned by the endpoint
16 * driver. As such, when the driver is done with the buffer, it should
17 * call kfree_skb() on failure, or consume_skb() on success.
18 */
19struct qrtr_endpoint {
20 int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb);
21 /* private: not for endpoint use */
22 struct qrtr_node *node;
23};
24
25int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid);
26
27void qrtr_endpoint_unregister(struct qrtr_endpoint *ep);
28
29int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len);
30
31#endif
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
new file mode 100644
index 000000000000..84ebce73aa23
--- /dev/null
+++ b/net/qrtr/smd.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (c) 2015, Sony Mobile Communications Inc.
3 * Copyright (c) 2013, The Linux Foundation. All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 and
7 * only version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/soc/qcom/smd.h>
18
19#include "qrtr.h"
20
21struct qrtr_smd_dev {
22 struct qrtr_endpoint ep;
23 struct qcom_smd_channel *channel;
24};
25
26/* from smd to qrtr */
27static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev,
28 const void *data, size_t len)
29{
30 struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
31 int rc;
32
33 if (!qdev)
34 return -EAGAIN;
35
36 rc = qrtr_endpoint_post(&qdev->ep, data, len);
37 if (rc == -EINVAL) {
38 dev_err(&sdev->dev, "invalid ipcrouter packet\n");
39 /* return 0 to let smd drop the packet */
40 rc = 0;
41 }
42
43 return rc;
44}
45
46/* from qrtr to smd */
47static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
48{
49 struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep);
50 int rc;
51
52 rc = skb_linearize(skb);
53 if (rc)
54 goto out;
55
56 rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
57
58out:
59 if (rc)
60 kfree_skb(skb);
61 else
62 consume_skb(skb);
63 return rc;
64}
65
66static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
67{
68 struct qrtr_smd_dev *qdev;
69 int rc;
70
71 qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
72 if (!qdev)
73 return -ENOMEM;
74
75 qdev->channel = sdev->channel;
76 qdev->ep.xmit = qcom_smd_qrtr_send;
77
78 rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
79 if (rc)
80 return rc;
81
82 dev_set_drvdata(&sdev->dev, qdev);
83
84 dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
85
86 return 0;
87}
88
89static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
90{
91 struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
92
93 qrtr_endpoint_unregister(&qdev->ep);
94
95 dev_set_drvdata(&sdev->dev, NULL);
96}
97
98static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
99 { "IPCRTR" },
100 {}
101};
102
103static struct qcom_smd_driver qcom_smd_qrtr_driver = {
104 .probe = qcom_smd_qrtr_probe,
105 .remove = qcom_smd_qrtr_remove,
106 .callback = qcom_smd_qrtr_callback,
107 .smd_match_table = qcom_smd_qrtr_smd_match,
108 .driver = {
109 .name = "qcom_smd_qrtr",
110 .owner = THIS_MODULE,
111 },
112};
113
114module_qcom_smd_driver(qcom_smd_qrtr_driver);
115
116MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
117MODULE_LICENSE("GPL v2");
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 61ed2a8764ba..86187dad1440 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
127 127
128/* 128/*
129 * This is the only path that sets tc->t_sock. Send and receive trust that 129 * This is the only path that sets tc->t_sock. Send and receive trust that
130 * it is set. The RDS_CONN_CONNECTED bit protects those paths from being 130 * it is set. The RDS_CONN_UP bit protects those paths from being
131 * called while it isn't set. 131 * called while it isn't set.
132 */ 132 */
133void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) 133void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
@@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
216 if (!tc) 216 if (!tc)
217 return -ENOMEM; 217 return -ENOMEM;
218 218
219 mutex_init(&tc->t_conn_lock);
219 tc->t_sock = NULL; 220 tc->t_sock = NULL;
220 tc->t_tinc = NULL; 221 tc->t_tinc = NULL;
221 tc->t_tinc_hdr_rem = sizeof(struct rds_header); 222 tc->t_tinc_hdr_rem = sizeof(struct rds_header);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 64f873c0c6b6..41c228300525 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,10 @@ struct rds_tcp_connection {
12 12
13 struct list_head t_tcp_node; 13 struct list_head t_tcp_node;
14 struct rds_connection *conn; 14 struct rds_connection *conn;
15 /* t_conn_lock synchronizes the connection establishment between
16 * rds_tcp_accept_one and rds_tcp_conn_connect
17 */
18 struct mutex t_conn_lock;
15 struct socket *t_sock; 19 struct socket *t_sock;
16 void *t_orig_write_space; 20 void *t_orig_write_space;
17 void *t_orig_data_ready; 21 void *t_orig_data_ready;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5cb16875c460..49a3fcfed360 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
78 struct socket *sock = NULL; 78 struct socket *sock = NULL;
79 struct sockaddr_in src, dest; 79 struct sockaddr_in src, dest;
80 int ret; 80 int ret;
81 struct rds_tcp_connection *tc = conn->c_transport_data;
82
83 mutex_lock(&tc->t_conn_lock);
81 84
85 if (rds_conn_up(conn)) {
86 mutex_unlock(&tc->t_conn_lock);
87 return 0;
88 }
82 ret = sock_create_kern(rds_conn_net(conn), PF_INET, 89 ret = sock_create_kern(rds_conn_net(conn), PF_INET,
83 SOCK_STREAM, IPPROTO_TCP, &sock); 90 SOCK_STREAM, IPPROTO_TCP, &sock);
84 if (ret < 0) 91 if (ret < 0)
@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
120 } 127 }
121 128
122out: 129out:
130 mutex_unlock(&tc->t_conn_lock);
123 if (sock) 131 if (sock)
124 sock_release(sock); 132 sock_release(sock);
125 return ret; 133 return ret;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 0936a4a32b47..be263cdf268b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock)
76 struct rds_connection *conn; 76 struct rds_connection *conn;
77 int ret; 77 int ret;
78 struct inet_sock *inet; 78 struct inet_sock *inet;
79 struct rds_tcp_connection *rs_tcp; 79 struct rds_tcp_connection *rs_tcp = NULL;
80 int conn_state;
81 struct sock *nsk;
80 82
81 ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, 83 ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
82 sock->sk->sk_type, sock->sk->sk_protocol, 84 sock->sk->sk_type, sock->sk->sk_protocol,
@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock)
115 * rds_tcp_state_change() will do that cleanup 117 * rds_tcp_state_change() will do that cleanup
116 */ 118 */
117 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; 119 rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
118 if (rs_tcp->t_sock &&
119 ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
120 struct sock *nsk = new_sock->sk;
121
122 nsk->sk_user_data = NULL;
123 nsk->sk_prot->disconnect(nsk, 0);
124 tcp_done(nsk);
125 new_sock = NULL;
126 ret = 0;
127 goto out;
128 } else if (rs_tcp->t_sock) {
129 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
130 conn->c_outgoing = 0;
131 }
132
133 rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); 120 rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
121 mutex_lock(&rs_tcp->t_conn_lock);
122 conn_state = rds_conn_state(conn);
123 if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
124 goto rst_nsk;
125 if (rs_tcp->t_sock) {
126 /* Need to resolve a duelling SYN between peers.
127 * We have an outstanding SYN to this peer, which may
128 * potentially have transitioned to the RDS_CONN_UP state,
129 * so we must quiesce any send threads before resetting
130 * c_transport_data.
131 */
132 wait_event(conn->c_waitq,
133 !test_bit(RDS_IN_XMIT, &conn->c_flags));
134 if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
135 goto rst_nsk;
136 } else if (rs_tcp->t_sock) {
137 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
138 conn->c_outgoing = 0;
139 }
140 }
134 rds_tcp_set_callbacks(new_sock, conn); 141 rds_tcp_set_callbacks(new_sock, conn);
135 rds_connect_complete(conn); 142 rds_connect_complete(conn); /* marks RDS_CONN_UP */
143 new_sock = NULL;
144 ret = 0;
145 goto out;
146rst_nsk:
147 /* reset the newly returned accept sock and bail */
148 nsk = new_sock->sk;
149 rds_tcp_stats_inc(s_tcp_listen_closed_stale);
150 nsk->sk_user_data = NULL;
151 nsk->sk_prot->disconnect(nsk, 0);
152 tcp_done(nsk);
136 new_sock = NULL; 153 new_sock = NULL;
137 ret = 0; 154 ret = 0;
138
139out: 155out:
156 if (rs_tcp)
157 mutex_unlock(&rs_tcp->t_conn_lock);
140 if (new_sock) 158 if (new_sock)
141 sock_release(new_sock); 159 sock_release(new_sock);
142 return ret; 160 return ret;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 27a992154804..d75d8b56a9e3 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
207 } 207 }
208 208
209 if (left && tc->t_tinc_data_rem) { 209 if (left && tc->t_tinc_data_rem) {
210 clone = skb_clone(skb, arg->gfp); 210 to_copy = min(tc->t_tinc_data_rem, left);
211
212 clone = pskb_extract(skb, offset, to_copy, arg->gfp);
211 if (!clone) { 213 if (!clone) {
212 desc->error = -ENOMEM; 214 desc->error = -ENOMEM;
213 goto out; 215 goto out;
214 } 216 }
215 217
216 to_copy = min(tc->t_tinc_data_rem, left);
217 if (!pskb_pull(clone, offset) ||
218 pskb_trim(clone, to_copy)) {
219 pr_warn("rds_tcp_data_recv: pull/trim failed "
220 "left %zu data_rem %zu skb_len %d\n",
221 left, tc->t_tinc_data_rem, skb->len);
222 kfree_skb(clone);
223 desc->error = -ENOMEM;
224 goto out;
225 }
226 skb_queue_tail(&tinc->ti_skb_list, clone); 218 skb_queue_tail(&tinc->ti_skb_list, clone);
227 219
228 rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " 220 rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 01e038146b7c..6ff97412a0bb 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -698,12 +698,12 @@ void rxrpc_data_ready(struct sock *sk)
698 if (skb_checksum_complete(skb)) { 698 if (skb_checksum_complete(skb)) {
699 rxrpc_free_skb(skb); 699 rxrpc_free_skb(skb);
700 rxrpc_put_local(local); 700 rxrpc_put_local(local);
701 UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); 701 __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
702 _leave(" [CSUM failed]"); 702 _leave(" [CSUM failed]");
703 return; 703 return;
704 } 704 }
705 705
706 UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); 706 __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
707 707
708 /* The socket buffer we have is owned by UDP, with UDP's data all over 708 /* The socket buffer we have is owned by UDP, with UDP's data all over
709 * it, but we really want our own data there. 709 * it, but we really want our own data there.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 96066665e376..336774a535c3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -657,12 +657,15 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
657 if (compat_mode) { 657 if (compat_mode) {
658 if (a->type == TCA_OLD_COMPAT) 658 if (a->type == TCA_OLD_COMPAT)
659 err = gnet_stats_start_copy_compat(skb, 0, 659 err = gnet_stats_start_copy_compat(skb, 0,
660 TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d); 660 TCA_STATS,
661 TCA_XSTATS,
662 &p->tcfc_lock, &d,
663 TCA_PAD);
661 else 664 else
662 return 0; 665 return 0;
663 } else 666 } else
664 err = gnet_stats_start_copy(skb, TCA_ACT_STATS, 667 err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
665 &p->tcfc_lock, &d); 668 &p->tcfc_lock, &d, TCA_ACT_PAD);
666 669
667 if (err < 0) 670 if (err < 0)
668 goto errout; 671 goto errout;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 8c9f1f0459ab..c7123e01c2ca 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -53,9 +53,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
53 filter = rcu_dereference(prog->filter); 53 filter = rcu_dereference(prog->filter);
54 if (at_ingress) { 54 if (at_ingress) {
55 __skb_push(skb, skb->mac_len); 55 __skb_push(skb, skb->mac_len);
56 bpf_compute_data_end(skb);
56 filter_res = BPF_PROG_RUN(filter, skb); 57 filter_res = BPF_PROG_RUN(filter, skb);
57 __skb_pull(skb, skb->mac_len); 58 __skb_pull(skb, skb->mac_len);
58 } else { 59 } else {
60 bpf_compute_data_end(skb);
59 filter_res = BPF_PROG_RUN(filter, skb); 61 filter_res = BPF_PROG_RUN(filter, skb);
60 } 62 }
61 rcu_read_unlock(); 63 rcu_read_unlock();
@@ -156,7 +158,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
156 tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); 158 tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
157 tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); 159 tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
158 160
159 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) 161 if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
162 TCA_ACT_BPF_PAD))
160 goto nla_put_failure; 163 goto nla_put_failure;
161 164
162 return skb->len; 165 return skb->len;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index c0ed93ce2391..2ba700c765e0 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -163,7 +163,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
163 t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); 163 t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
164 t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); 164 t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
165 t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); 165 t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
166 if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) 166 if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
167 TCA_CONNMARK_PAD))
167 goto nla_put_failure; 168 goto nla_put_failure;
168 169
169 return skb->len; 170 return skb->len;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d22426cdebc0..28e934ed038a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -549,7 +549,7 @@ static int tcf_csum_dump(struct sk_buff *skb,
549 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 549 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
550 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 550 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
551 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 551 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
552 if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t)) 552 if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
553 goto nla_put_failure; 553 goto nla_put_failure;
554 554
555 return skb->len; 555 return skb->len;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 887fc1f209ff..1a6e09fbb2a5 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -177,7 +177,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
177 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); 177 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
178 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); 178 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
179 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); 179 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
180 if (nla_put(skb, TCA_GACT_TM, sizeof(t), &t)) 180 if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD))
181 goto nla_put_failure; 181 goto nla_put_failure;
182 return skb->len; 182 return skb->len;
183 183
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c589a9ba506a..556f44c9c454 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -550,7 +550,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
550 t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install); 550 t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
551 t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse); 551 t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
552 t.expires = jiffies_to_clock_t(ife->tcf_tm.expires); 552 t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
553 if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t)) 553 if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
554 goto nla_put_failure; 554 goto nla_put_failure;
555 555
556 if (!is_zero_ether_addr(ife->eth_dst)) { 556 if (!is_zero_ether_addr(ife->eth_dst)) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 350e134cffb3..1464f6a09446 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -275,7 +275,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
275 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); 275 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
276 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); 276 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
277 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); 277 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
278 if (nla_put(skb, TCA_IPT_TM, sizeof (tm), &tm)) 278 if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
279 goto nla_put_failure; 279 goto nla_put_failure;
280 kfree(t); 280 kfree(t);
281 return skb->len; 281 return skb->len;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e8a760cf7775..dea57c1ec90c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -214,7 +214,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
214 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); 214 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
215 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); 215 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
216 t.expires = jiffies_to_clock_t(m->tcf_tm.expires); 216 t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
217 if (nla_put(skb, TCA_MIRRED_TM, sizeof(t), &t)) 217 if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
218 goto nla_put_failure; 218 goto nla_put_failure;
219 return skb->len; 219 return skb->len;
220 220
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 0f65cdfbfb1d..c0a879f940de 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -267,7 +267,7 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
267 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 267 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
268 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 268 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
269 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 269 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
270 if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) 270 if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
271 goto nla_put_failure; 271 goto nla_put_failure;
272 272
273 return skb->len; 273 return skb->len;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 429c3ab65142..c6e18f230af6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -203,7 +203,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
203 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 203 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
204 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 204 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
205 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 205 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
206 if (nla_put(skb, TCA_PEDIT_TM, sizeof(t), &t)) 206 if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
207 goto nla_put_failure; 207 goto nla_put_failure;
208 kfree(opt); 208 kfree(opt);
209 return skb->len; 209 return skb->len;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 75b2be13fbcc..2057fd56d74c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -155,7 +155,7 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
155 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); 155 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
156 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); 156 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
157 t.expires = jiffies_to_clock_t(d->tcf_tm.expires); 157 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
158 if (nla_put(skb, TCA_DEF_TM, sizeof(t), &t)) 158 if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD))
159 goto nla_put_failure; 159 goto nla_put_failure;
160 return skb->len; 160 return skb->len;
161 161
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfcdbdc00c9b..51b24998904f 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -167,7 +167,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
167 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); 167 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
168 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); 168 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
169 t.expires = jiffies_to_clock_t(d->tcf_tm.expires); 169 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
170 if (nla_put(skb, TCA_SKBEDIT_TM, sizeof(t), &t)) 170 if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
171 goto nla_put_failure; 171 goto nla_put_failure;
172 return skb->len; 172 return skb->len;
173 173
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index bab8ae0cefc0..c1682ab9bc7e 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -175,7 +175,7 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
175 t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install); 175 t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install);
176 t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse); 176 t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse);
177 t.expires = jiffies_to_clock_t(v->tcf_tm.expires); 177 t.expires = jiffies_to_clock_t(v->tcf_tm.expires);
178 if (nla_put(skb, TCA_VLAN_TM, sizeof(t), &t)) 178 if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD))
179 goto nla_put_failure; 179 goto nla_put_failure;
180 return skb->len; 180 return skb->len;
181 181
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 425fe6a0eda3..7b342c779da7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -96,9 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
96 if (at_ingress) { 96 if (at_ingress) {
97 /* It is safe to push/pull even if skb_shared() */ 97 /* It is safe to push/pull even if skb_shared() */
98 __skb_push(skb, skb->mac_len); 98 __skb_push(skb, skb->mac_len);
99 bpf_compute_data_end(skb);
99 filter_res = BPF_PROG_RUN(prog->filter, skb); 100 filter_res = BPF_PROG_RUN(prog->filter, skb);
100 __skb_pull(skb, skb->mac_len); 101 __skb_pull(skb, skb->mac_len);
101 } else { 102 } else {
103 bpf_compute_data_end(skb);
102 filter_res = BPF_PROG_RUN(prog->filter, skb); 104 filter_res = BPF_PROG_RUN(prog->filter, skb);
103 } 105 }
104 106
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 563cdad76448..e64877a3c084 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1140,9 +1140,10 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
1140 gpf->kcnts[i] += pf->kcnts[i]; 1140 gpf->kcnts[i] += pf->kcnts[i];
1141 } 1141 }
1142 1142
1143 if (nla_put(skb, TCA_U32_PCNT, 1143 if (nla_put_64bit(skb, TCA_U32_PCNT,
1144 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), 1144 sizeof(struct tc_u32_pcnt) +
1145 gpf)) { 1145 n->sel.nkeys * sizeof(u64),
1146 gpf, TCA_U32_PAD)) {
1146 kfree(gpf); 1147 kfree(gpf);
1147 goto nla_put_failure; 1148 goto nla_put_failure;
1148 } 1149 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 3b180ff72f79..64f71a2155f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1365,7 +1365,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1365 goto nla_put_failure; 1365 goto nla_put_failure;
1366 1366
1367 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1367 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1368 qdisc_root_sleeping_lock(q), &d) < 0) 1368 qdisc_root_sleeping_lock(q), &d,
1369 TCA_PAD) < 0)
1369 goto nla_put_failure; 1370 goto nla_put_failure;
1370 1371
1371 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 1372 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -1679,7 +1680,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1679 goto nla_put_failure; 1680 goto nla_put_failure;
1680 1681
1681 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1682 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1682 qdisc_root_sleeping_lock(q), &d) < 0) 1683 qdisc_root_sleeping_lock(q), &d,
1684 TCA_PAD) < 0)
1683 goto nla_put_failure; 1685 goto nla_put_failure;
1684 1686
1685 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1687 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a5e420b3d4ab..bb8bd9314629 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -59,8 +59,12 @@ struct fq_codel_sched_data {
59 u32 flows_cnt; /* number of flows */ 59 u32 flows_cnt; /* number of flows */
60 u32 perturbation; /* hash perturbation */ 60 u32 perturbation; /* hash perturbation */
61 u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ 61 u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
62 u32 drop_batch_size;
63 u32 memory_limit;
62 struct codel_params cparams; 64 struct codel_params cparams;
63 struct codel_stats cstats; 65 struct codel_stats cstats;
66 u32 memory_usage;
67 u32 drop_overmemory;
64 u32 drop_overlimit; 68 u32 drop_overlimit;
65 u32 new_flow_count; 69 u32 new_flow_count;
66 70
@@ -135,17 +139,21 @@ static inline void flow_queue_add(struct fq_codel_flow *flow,
135 skb->next = NULL; 139 skb->next = NULL;
136} 140}
137 141
138static unsigned int fq_codel_drop(struct Qdisc *sch) 142static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets)
139{ 143{
140 struct fq_codel_sched_data *q = qdisc_priv(sch); 144 struct fq_codel_sched_data *q = qdisc_priv(sch);
141 struct sk_buff *skb; 145 struct sk_buff *skb;
142 unsigned int maxbacklog = 0, idx = 0, i, len; 146 unsigned int maxbacklog = 0, idx = 0, i, len;
143 struct fq_codel_flow *flow; 147 struct fq_codel_flow *flow;
148 unsigned int threshold;
149 unsigned int mem = 0;
144 150
145 /* Queue is full! Find the fat flow and drop packet from it. 151 /* Queue is full! Find the fat flow and drop packet(s) from it.
146 * This might sound expensive, but with 1024 flows, we scan 152 * This might sound expensive, but with 1024 flows, we scan
147 * 4KB of memory, and we dont need to handle a complex tree 153 * 4KB of memory, and we dont need to handle a complex tree
148 * in fast path (packet queue/enqueue) with many cache misses. 154 * in fast path (packet queue/enqueue) with many cache misses.
155 * In stress mode, we'll try to drop 64 packets from the flow,
156 * amortizing this linear lookup to one cache line per drop.
149 */ 157 */
150 for (i = 0; i < q->flows_cnt; i++) { 158 for (i = 0; i < q->flows_cnt; i++) {
151 if (q->backlogs[i] > maxbacklog) { 159 if (q->backlogs[i] > maxbacklog) {
@@ -153,15 +161,26 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
153 idx = i; 161 idx = i;
154 } 162 }
155 } 163 }
164
165 /* Our goal is to drop half of this fat flow backlog */
166 threshold = maxbacklog >> 1;
167
156 flow = &q->flows[idx]; 168 flow = &q->flows[idx];
157 skb = dequeue_head(flow); 169 len = 0;
158 len = qdisc_pkt_len(skb); 170 i = 0;
171 do {
172 skb = dequeue_head(flow);
173 len += qdisc_pkt_len(skb);
174 mem += skb->truesize;
175 kfree_skb(skb);
176 } while (++i < max_packets && len < threshold);
177
178 flow->dropped += i;
159 q->backlogs[idx] -= len; 179 q->backlogs[idx] -= len;
160 sch->q.qlen--; 180 q->memory_usage -= mem;
161 qdisc_qstats_drop(sch); 181 sch->qstats.drops += i;
162 qdisc_qstats_backlog_dec(sch, skb); 182 sch->qstats.backlog -= len;
163 kfree_skb(skb); 183 sch->q.qlen -= i;
164 flow->dropped++;
165 return idx; 184 return idx;
166} 185}
167 186
@@ -170,16 +189,17 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
170 unsigned int prev_backlog; 189 unsigned int prev_backlog;
171 190
172 prev_backlog = sch->qstats.backlog; 191 prev_backlog = sch->qstats.backlog;
173 fq_codel_drop(sch); 192 fq_codel_drop(sch, 1U);
174 return prev_backlog - sch->qstats.backlog; 193 return prev_backlog - sch->qstats.backlog;
175} 194}
176 195
177static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) 196static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
178{ 197{
179 struct fq_codel_sched_data *q = qdisc_priv(sch); 198 struct fq_codel_sched_data *q = qdisc_priv(sch);
180 unsigned int idx, prev_backlog; 199 unsigned int idx, prev_backlog, prev_qlen;
181 struct fq_codel_flow *flow; 200 struct fq_codel_flow *flow;
182 int uninitialized_var(ret); 201 int uninitialized_var(ret);
202 bool memory_limited;
183 203
184 idx = fq_codel_classify(skb, sch, &ret); 204 idx = fq_codel_classify(skb, sch, &ret);
185 if (idx == 0) { 205 if (idx == 0) {
@@ -202,20 +222,29 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
202 flow->deficit = q->quantum; 222 flow->deficit = q->quantum;
203 flow->dropped = 0; 223 flow->dropped = 0;
204 } 224 }
205 if (++sch->q.qlen <= sch->limit) 225 q->memory_usage += skb->truesize;
226 memory_limited = q->memory_usage > q->memory_limit;
227 if (++sch->q.qlen <= sch->limit && !memory_limited)
206 return NET_XMIT_SUCCESS; 228 return NET_XMIT_SUCCESS;
207 229
208 prev_backlog = sch->qstats.backlog; 230 prev_backlog = sch->qstats.backlog;
209 q->drop_overlimit++; 231 prev_qlen = sch->q.qlen;
210 /* Return Congestion Notification only if we dropped a packet 232
211 * from this flow. 233 /* fq_codel_drop() is quite expensive, as it performs a linear search
234 * in q->backlogs[] to find a fat flow.
235 * So instead of dropping a single packet, drop half of its backlog
236 * with a 64 packets limit to not add a too big cpu spike here.
212 */ 237 */
213 if (fq_codel_drop(sch) == idx) 238 ret = fq_codel_drop(sch, q->drop_batch_size);
214 return NET_XMIT_CN; 239
240 q->drop_overlimit += prev_qlen - sch->q.qlen;
241 if (memory_limited)
242 q->drop_overmemory += prev_qlen - sch->q.qlen;
243 /* As we dropped packet(s), better let upper stack know this */
244 qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen,
245 prev_backlog - sch->qstats.backlog);
215 246
216 /* As we dropped a packet, better let upper stack know this */ 247 return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS;
217 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
218 return NET_XMIT_SUCCESS;
219} 248}
220 249
221/* This is the specific function called from codel_dequeue() 250/* This is the specific function called from codel_dequeue()
@@ -289,6 +318,7 @@ begin:
289 list_del_init(&flow->flowchain); 318 list_del_init(&flow->flowchain);
290 goto begin; 319 goto begin;
291 } 320 }
321 q->memory_usage -= skb->truesize;
292 qdisc_bstats_update(sch, skb); 322 qdisc_bstats_update(sch, skb);
293 flow->deficit -= qdisc_pkt_len(skb); 323 flow->deficit -= qdisc_pkt_len(skb);
294 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, 324 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
@@ -335,6 +365,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
335 [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, 365 [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
336 [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, 366 [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
337 [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, 367 [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
368 [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 },
369 [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
338}; 370};
339 371
340static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) 372static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -386,7 +418,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
386 if (tb[TCA_FQ_CODEL_QUANTUM]) 418 if (tb[TCA_FQ_CODEL_QUANTUM])
387 q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); 419 q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
388 420
389 while (sch->q.qlen > sch->limit) { 421 if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
422 q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
423
424 if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
425 q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
426
427 while (sch->q.qlen > sch->limit ||
428 q->memory_usage > q->memory_limit) {
390 struct sk_buff *skb = fq_codel_dequeue(sch); 429 struct sk_buff *skb = fq_codel_dequeue(sch);
391 430
392 q->cstats.drop_len += qdisc_pkt_len(skb); 431 q->cstats.drop_len += qdisc_pkt_len(skb);
@@ -431,6 +470,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
431 470
432 sch->limit = 10*1024; 471 sch->limit = 10*1024;
433 q->flows_cnt = 1024; 472 q->flows_cnt = 1024;
473 q->memory_limit = 32 << 20; /* 32 MBytes */
474 q->drop_batch_size = 64;
434 q->quantum = psched_mtu(qdisc_dev(sch)); 475 q->quantum = psched_mtu(qdisc_dev(sch));
435 q->perturbation = prandom_u32(); 476 q->perturbation = prandom_u32();
436 INIT_LIST_HEAD(&q->new_flows); 477 INIT_LIST_HEAD(&q->new_flows);
@@ -489,6 +530,10 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
489 q->cparams.ecn) || 530 q->cparams.ecn) ||
490 nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, 531 nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
491 q->quantum) || 532 q->quantum) ||
533 nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
534 q->drop_batch_size) ||
535 nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
536 q->memory_limit) ||
492 nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, 537 nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
493 q->flows_cnt)) 538 q->flows_cnt))
494 goto nla_put_failure; 539 goto nla_put_failure;
@@ -517,6 +562,8 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
517 st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; 562 st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
518 st.qdisc_stats.new_flow_count = q->new_flow_count; 563 st.qdisc_stats.new_flow_count = q->new_flow_count;
519 st.qdisc_stats.ce_mark = q->cstats.ce_mark; 564 st.qdisc_stats.ce_mark = q->cstats.ce_mark;
565 st.qdisc_stats.memory_usage = q->memory_usage;
566 st.qdisc_stats.drop_overmemory = q->drop_overmemory;
520 567
521 list_for_each(pos, &q->new_flows) 568 list_for_each(pos, &q->new_flows)
522 st.qdisc_stats.new_flows_len++; 569 st.qdisc_stats.new_flows_len++;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 80742edea96f..269dd71b3828 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -108,35 +108,6 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
108 return skb; 108 return skb;
109} 109}
110 110
111static inline int handle_dev_cpu_collision(struct sk_buff *skb,
112 struct netdev_queue *dev_queue,
113 struct Qdisc *q)
114{
115 int ret;
116
117 if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
118 /*
119 * Same CPU holding the lock. It may be a transient
120 * configuration error, when hard_start_xmit() recurses. We
121 * detect it by checking xmit owner and drop the packet when
122 * deadloop is detected. Return OK to try the next skb.
123 */
124 kfree_skb_list(skb);
125 net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
126 dev_queue->dev->name);
127 ret = qdisc_qlen(q);
128 } else {
129 /*
130 * Another cpu is holding lock, requeue & delay xmits for
131 * some time.
132 */
133 __this_cpu_inc(softnet_data.cpu_collision);
134 ret = dev_requeue_skb(skb, q);
135 }
136
137 return ret;
138}
139
140/* 111/*
141 * Transmit possibly several skbs, and handle the return status as 112 * Transmit possibly several skbs, and handle the return status as
142 * required. Holding the __QDISC___STATE_RUNNING bit guarantees that 113 * required. Holding the __QDISC___STATE_RUNNING bit guarantees that
@@ -174,9 +145,6 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
174 if (dev_xmit_complete(ret)) { 145 if (dev_xmit_complete(ret)) {
175 /* Driver sent out skb successfully or skb was consumed */ 146 /* Driver sent out skb successfully or skb was consumed */
176 ret = qdisc_qlen(q); 147 ret = qdisc_qlen(q);
177 } else if (ret == NETDEV_TX_LOCKED) {
178 /* Driver try lock failed */
179 ret = handle_dev_cpu_collision(skb, txq, q);
180 } else { 148 } else {
181 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 149 /* Driver returned NETDEV_TX_BUSY - requeue skb */
182 if (unlikely(ret != NETDEV_TX_BUSY)) 150 if (unlikely(ret != NETDEV_TX_BUSY))
@@ -259,13 +227,12 @@ unsigned long dev_trans_start(struct net_device *dev)
259 227
260 if (is_vlan_dev(dev)) 228 if (is_vlan_dev(dev))
261 dev = vlan_dev_real_dev(dev); 229 dev = vlan_dev_real_dev(dev);
262 res = dev->trans_start; 230 res = netdev_get_tx_queue(dev, 0)->trans_start;
263 for (i = 0; i < dev->num_tx_queues; i++) { 231 for (i = 1; i < dev->num_tx_queues; i++) {
264 val = netdev_get_tx_queue(dev, i)->trans_start; 232 val = netdev_get_tx_queue(dev, i)->trans_start;
265 if (val && time_after(val, res)) 233 if (val && time_after(val, res))
266 res = val; 234 res = val;
267 } 235 }
268 dev->trans_start = res;
269 236
270 return res; 237 return res;
271} 238}
@@ -288,10 +255,7 @@ static void dev_watchdog(unsigned long arg)
288 struct netdev_queue *txq; 255 struct netdev_queue *txq;
289 256
290 txq = netdev_get_tx_queue(dev, i); 257 txq = netdev_get_tx_queue(dev, i);
291 /* 258 trans_start = txq->trans_start;
292 * old device drivers set dev->trans_start
293 */
294 trans_start = txq->trans_start ? : dev->trans_start;
295 if (netif_xmit_stopped(txq) && 259 if (netif_xmit_stopped(txq) &&
296 time_after(jiffies, (trans_start + 260 time_after(jiffies, (trans_start +
297 dev->watchdog_timeo))) { 261 dev->watchdog_timeo))) {
@@ -807,7 +771,7 @@ void dev_activate(struct net_device *dev)
807 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL); 771 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
808 772
809 if (need_watchdog) { 773 if (need_watchdog) {
810 dev->trans_start = jiffies; 774 netif_trans_update(dev);
811 dev_watchdog_up(dev); 775 dev_watchdog_up(dev);
812 } 776 }
813} 777}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 491d6fd6430c..205bed00dd34 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
395 sch->q.qlen++; 395 sch->q.qlen++;
396} 396}
397 397
398/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
399 * when we statistically choose to corrupt one, we instead segment it, returning
400 * the first packet to be corrupted, and re-enqueue the remaining frames
401 */
402static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct sk_buff *segs;
405 netdev_features_t features = netif_skb_features(skb);
406
407 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
408
409 if (IS_ERR_OR_NULL(segs)) {
410 qdisc_reshape_fail(skb, sch);
411 return NULL;
412 }
413 consume_skb(skb);
414 return segs;
415}
416
398/* 417/*
399 * Insert one skb into qdisc. 418 * Insert one skb into qdisc.
400 * Note: parent depends on return value to account for queue length. 419 * Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
407 /* We don't fill cb now as skb_unshare() may invalidate it */ 426 /* We don't fill cb now as skb_unshare() may invalidate it */
408 struct netem_skb_cb *cb; 427 struct netem_skb_cb *cb;
409 struct sk_buff *skb2; 428 struct sk_buff *skb2;
429 struct sk_buff *segs = NULL;
430 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
431 int nb = 0;
410 int count = 1; 432 int count = 1;
433 int rc = NET_XMIT_SUCCESS;
411 434
412 /* Random duplication */ 435 /* Random duplication */
413 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 436 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
453 * do it now in software before we mangle it. 476 * do it now in software before we mangle it.
454 */ 477 */
455 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 478 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
479 if (skb_is_gso(skb)) {
480 segs = netem_segment(skb, sch);
481 if (!segs)
482 return NET_XMIT_DROP;
483 } else {
484 segs = skb;
485 }
486
487 skb = segs;
488 segs = segs->next;
489
456 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 490 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
457 (skb->ip_summed == CHECKSUM_PARTIAL && 491 (skb->ip_summed == CHECKSUM_PARTIAL &&
458 skb_checksum_help(skb))) 492 skb_checksum_help(skb))) {
459 return qdisc_drop(skb, sch); 493 rc = qdisc_drop(skb, sch);
494 goto finish_segs;
495 }
460 496
461 skb->data[prandom_u32() % skb_headlen(skb)] ^= 497 skb->data[prandom_u32() % skb_headlen(skb)] ^=
462 1<<(prandom_u32() % 8); 498 1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
516 sch->qstats.requeues++; 552 sch->qstats.requeues++;
517 } 553 }
518 554
555finish_segs:
556 if (segs) {
557 while (segs) {
558 skb2 = segs->next;
559 segs->next = NULL;
560 qdisc_skb_cb(segs)->pkt_len = segs->len;
561 last_len = segs->len;
562 rc = qdisc_enqueue(segs, sch);
563 if (rc != NET_XMIT_SUCCESS) {
564 if (net_xmit_drop_count(rc))
565 qdisc_qstats_drop(sch);
566 } else {
567 nb++;
568 len += last_len;
569 }
570 segs = skb2;
571 }
572 sch->q.qlen += nb;
573 if (nb > 1)
574 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
575 }
519 return NET_XMIT_SUCCESS; 576 return NET_XMIT_SUCCESS;
520} 577}
521 578
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 958ef5f33f4b..1eb94bf18ef4 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -239,7 +239,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
239 offset = 0; 239 offset = 0;
240 240
241 if ((whole > 1) || (whole && over)) 241 if ((whole > 1) || (whole && over))
242 SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); 242 SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
243 243
244 /* Create chunks for all the full sized DATA chunks. */ 244 /* Create chunks for all the full sized DATA chunks. */
245 for (i = 0, len = first_len; i < whole; i++) { 245 for (i = 0, len = first_len; i < whole; i++) {
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 00b8445364e3..a701527a9480 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
84 84
85 if (val != cmp) { 85 if (val != cmp) {
86 /* CRC failure, dump it. */ 86 /* CRC failure, dump it. */
87 SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); 87 __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS);
88 return -1; 88 return -1;
89 } 89 }
90 return 0; 90 return 0;
@@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb)
122 if (skb->pkt_type != PACKET_HOST) 122 if (skb->pkt_type != PACKET_HOST)
123 goto discard_it; 123 goto discard_it;
124 124
125 SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); 125 __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
126 126
127 if (skb_linearize(skb)) 127 if (skb_linearize(skb))
128 goto discard_it; 128 goto discard_it;
@@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb)
208 */ 208 */
209 if (!asoc) { 209 if (!asoc) {
210 if (sctp_rcv_ootb(skb)) { 210 if (sctp_rcv_ootb(skb)) {
211 SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); 211 __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
212 goto discard_release; 212 goto discard_release;
213 } 213 }
214 } 214 }
@@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb)
264 skb = NULL; /* sctp_chunk_free already freed the skb */ 264 skb = NULL; /* sctp_chunk_free already freed the skb */
265 goto discard_release; 265 goto discard_release;
266 } 266 }
267 SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); 267 __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG);
268 } else { 268 } else {
269 SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); 269 __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ);
270 sctp_inq_push(&chunk->rcvr->inqueue, chunk); 270 sctp_inq_push(&chunk->rcvr->inqueue, chunk);
271 } 271 }
272 272
@@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb)
281 return 0; 281 return 0;
282 282
283discard_it: 283discard_it:
284 SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); 284 __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
285 kfree_skb(skb); 285 kfree_skb(skb);
286 return 0; 286 return 0;
287 287
@@ -532,7 +532,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
532 * servers this needs to be solved differently. 532 * servers this needs to be solved differently.
533 */ 533 */
534 if (sock_owned_by_user(sk)) 534 if (sock_owned_by_user(sk))
535 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 535 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
536 536
537 *app = asoc; 537 *app = asoc;
538 *tpp = transport; 538 *tpp = transport;
@@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
589 skb->network_header = saveip; 589 skb->network_header = saveip;
590 skb->transport_header = savesctp; 590 skb->transport_header = savesctp;
591 if (!sk) { 591 if (!sk) {
592 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 592 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
593 return; 593 return;
594 } 594 }
595 /* Warning: The sock lock is held. Remember to call 595 /* Warning: The sock lock is held. Remember to call
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index b335ffcef0b9..9d87bba0ff1d 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
89 * Eventually, we should clean up inqueue to not rely 89 * Eventually, we should clean up inqueue to not rely
90 * on the BH related data structures. 90 * on the BH related data structures.
91 */ 91 */
92 local_bh_disable();
92 list_add_tail(&chunk->list, &q->in_chunk_list); 93 list_add_tail(&chunk->list, &q->in_chunk_list);
93 if (chunk->asoc) 94 if (chunk->asoc)
94 chunk->asoc->stats.ipackets++; 95 chunk->asoc->stats.ipackets++;
95 q->immediate.func(&q->immediate); 96 q->immediate.func(&q->immediate);
97 local_bh_enable();
96} 98}
97 99
98/* Peek at the next chunk on the inqeue. */ 100/* Peek at the next chunk on the inqeue. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ce46f1c7f133..0657d18a85bf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -162,7 +162,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
162 skb->network_header = saveip; 162 skb->network_header = saveip;
163 skb->transport_header = savesctp; 163 skb->transport_header = savesctp;
164 if (!sk) { 164 if (!sk) {
165 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); 165 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
166 goto out; 166 goto out;
167 } 167 }
168 168
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index bb2d8d9608e9..8e3e769dc9ea 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -145,7 +145,11 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
145 else 145 else
146 amt = sk_wmem_alloc_get(sk); 146 amt = sk_wmem_alloc_get(sk);
147 mem[SK_MEMINFO_WMEM_ALLOC] = amt; 147 mem[SK_MEMINFO_WMEM_ALLOC] = amt;
148 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 148 if (asoc && asoc->ep->rcvbuf_policy)
149 amt = atomic_read(&asoc->rmem_alloc);
150 else
151 amt = sk_rmem_alloc_get(sk);
152 mem[SK_MEMINFO_RMEM_ALLOC] = amt;
149 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; 153 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
150 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; 154 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
151 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 155 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
@@ -161,8 +165,9 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
161 if (ext & (1 << (INET_DIAG_INFO - 1))) { 165 if (ext & (1 << (INET_DIAG_INFO - 1))) {
162 struct nlattr *attr; 166 struct nlattr *attr;
163 167
164 attr = nla_reserve(skb, INET_DIAG_INFO, 168 attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
165 sizeof(struct sctp_info)); 169 sizeof(struct sctp_info),
170 INET_DIAG_PAD);
166 if (!attr) 171 if (!attr)
167 goto errout; 172 goto errout;
168 173
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e8f0112f9b28..aa3712259368 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1741,10 +1741,9 @@ out:
1741 } else if (local_cork) 1741 } else if (local_cork)
1742 error = sctp_outq_uncork(&asoc->outqueue, gfp); 1742 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1743 1743
1744 if (sp->pending_data_ready) { 1744 if (sp->data_ready_signalled)
1745 sk->sk_data_ready(sk); 1745 sp->data_ready_signalled = 0;
1746 sp->pending_data_ready = 0; 1746
1747 }
1748 return error; 1747 return error;
1749nomem: 1748nomem:
1750 error = -ENOMEM; 1749 error = -ENOMEM;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ec12a8920e5f..ec166d2bd2d9 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -194,6 +194,7 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
194int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) 194int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
195{ 195{
196 struct sock *sk = ulpq->asoc->base.sk; 196 struct sock *sk = ulpq->asoc->base.sk;
197 struct sctp_sock *sp = sctp_sk(sk);
197 struct sk_buff_head *queue, *skb_list; 198 struct sk_buff_head *queue, *skb_list;
198 struct sk_buff *skb = sctp_event2skb(event); 199 struct sk_buff *skb = sctp_event2skb(event);
199 int clear_pd = 0; 200 int clear_pd = 0;
@@ -211,7 +212,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
211 sk_incoming_cpu_update(sk); 212 sk_incoming_cpu_update(sk);
212 } 213 }
213 /* Check if the user wishes to receive this event. */ 214 /* Check if the user wishes to receive this event. */
214 if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) 215 if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
215 goto out_free; 216 goto out_free;
216 217
217 /* If we are in partial delivery mode, post to the lobby until 218 /* If we are in partial delivery mode, post to the lobby until
@@ -219,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
219 * the association the cause of the partial delivery. 220 * the association the cause of the partial delivery.
220 */ 221 */
221 222
222 if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) { 223 if (atomic_read(&sp->pd_mode) == 0) {
223 queue = &sk->sk_receive_queue; 224 queue = &sk->sk_receive_queue;
224 } else { 225 } else {
225 if (ulpq->pd_mode) { 226 if (ulpq->pd_mode) {
@@ -231,7 +232,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
231 if ((event->msg_flags & MSG_NOTIFICATION) || 232 if ((event->msg_flags & MSG_NOTIFICATION) ||
232 (SCTP_DATA_NOT_FRAG == 233 (SCTP_DATA_NOT_FRAG ==
233 (event->msg_flags & SCTP_DATA_FRAG_MASK))) 234 (event->msg_flags & SCTP_DATA_FRAG_MASK)))
234 queue = &sctp_sk(sk)->pd_lobby; 235 queue = &sp->pd_lobby;
235 else { 236 else {
236 clear_pd = event->msg_flags & MSG_EOR; 237 clear_pd = event->msg_flags & MSG_EOR;
237 queue = &sk->sk_receive_queue; 238 queue = &sk->sk_receive_queue;
@@ -242,10 +243,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
242 * can queue this to the receive queue instead 243 * can queue this to the receive queue instead
243 * of the lobby. 244 * of the lobby.
244 */ 245 */
245 if (sctp_sk(sk)->frag_interleave) 246 if (sp->frag_interleave)
246 queue = &sk->sk_receive_queue; 247 queue = &sk->sk_receive_queue;
247 else 248 else
248 queue = &sctp_sk(sk)->pd_lobby; 249 queue = &sp->pd_lobby;
249 } 250 }
250 } 251 }
251 252
@@ -264,8 +265,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
264 if (clear_pd) 265 if (clear_pd)
265 sctp_ulpq_clear_pd(ulpq); 266 sctp_ulpq_clear_pd(ulpq);
266 267
267 if (queue == &sk->sk_receive_queue) 268 if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
268 sctp_sk(sk)->pending_data_ready = 1; 269 sp->data_ready_signalled = 1;
270 sk->sk_data_ready(sk);
271 }
269 return 1; 272 return 1;
270 273
271out_free: 274out_free:
@@ -1126,11 +1129,13 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
1126{ 1129{
1127 struct sctp_ulpevent *ev = NULL; 1130 struct sctp_ulpevent *ev = NULL;
1128 struct sock *sk; 1131 struct sock *sk;
1132 struct sctp_sock *sp;
1129 1133
1130 if (!ulpq->pd_mode) 1134 if (!ulpq->pd_mode)
1131 return; 1135 return;
1132 1136
1133 sk = ulpq->asoc->base.sk; 1137 sk = ulpq->asoc->base.sk;
1138 sp = sctp_sk(sk);
1134 if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT, 1139 if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
1135 &sctp_sk(sk)->subscribe)) 1140 &sctp_sk(sk)->subscribe))
1136 ev = sctp_ulpevent_make_pdapi(ulpq->asoc, 1141 ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
@@ -1140,6 +1145,8 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
1140 __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); 1145 __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
1141 1146
1142 /* If there is data waiting, send it up the socket now. */ 1147 /* If there is data waiting, send it up the socket now. */
1143 if (sctp_ulpq_clear_pd(ulpq) || ev) 1148 if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) {
1144 sctp_sk(sk)->pending_data_ready = 1; 1149 sp->data_ready_signalled = 1;
1150 sk->sk_data_ready(sk);
1151 }
1145} 1152}
diff --git a/net/socket.c b/net/socket.c
index 5dbb0bbe12a7..7789d79609dd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
600 if (tsflags & SOF_TIMESTAMPING_TX_SCHED) 600 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
601 flags |= SKBTX_SCHED_TSTAMP; 601 flags |= SKBTX_SCHED_TSTAMP;
602 602
603 if (tsflags & SOF_TIMESTAMPING_TX_ACK)
604 flags |= SKBTX_ACK_TSTAMP;
605
606 *tx_flags = flags; 603 *tx_flags = flags;
607} 604}
608EXPORT_SYMBOL(__sock_tx_timestamp); 605EXPORT_SYMBOL(__sock_tx_timestamp);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d0756ac5c0f2..a6c68dc086af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1018,11 +1018,11 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
1018 1018
1019 /* Suck it into the iovec, verify checksum if not done by hw. */ 1019 /* Suck it into the iovec, verify checksum if not done by hw. */
1020 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1020 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
1021 UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS); 1021 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
1022 goto out_unlock; 1022 goto out_unlock;
1023 } 1023 }
1024 1024
1025 UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); 1025 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
1026 1026
1027 xprt_adjust_cwnd(xprt, task, copied); 1027 xprt_adjust_cwnd(xprt, task, copied);
1028 xprt_complete_rqst(task, copied); 1028 xprt_complete_rqst(task, copied);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 2b9b98f1c2ff..b7e01d88bdc5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -305,6 +305,8 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev,
305 if (err && err != -EOPNOTSUPP) 305 if (err && err != -EOPNOTSUPP)
306 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 306 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
307 err, attr->id); 307 err, attr->id);
308 if (attr->complete)
309 attr->complete(dev, err, attr->complete_priv);
308} 310}
309 311
310static int switchdev_port_attr_set_defer(struct net_device *dev, 312static int switchdev_port_attr_set_defer(struct net_device *dev,
@@ -434,6 +436,8 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
434 if (err && err != -EOPNOTSUPP) 436 if (err && err != -EOPNOTSUPP)
435 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 437 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
436 err, obj->id); 438 err, obj->id);
439 if (obj->complete)
440 obj->complete(dev, err, obj->complete_priv);
437} 441}
438 442
439static int switchdev_port_obj_add_defer(struct net_device *dev, 443static int switchdev_port_obj_add_defer(struct net_device *dev,
@@ -502,6 +506,8 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
502 if (err && err != -EOPNOTSUPP) 506 if (err && err != -EOPNOTSUPP)
503 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 507 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
504 err, obj->id); 508 err, obj->id);
509 if (obj->complete)
510 obj->complete(dev, err, obj->complete_priv);
505} 511}
506 512
507static int switchdev_port_obj_del_defer(struct net_device *dev, 513static int switchdev_port_obj_del_defer(struct net_device *dev,
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e2bdb07a49a2..fe1b062c4f18 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -112,11 +112,9 @@ static int __init tipc_init(void)
112 112
113 pr_info("Activated (version " TIPC_MOD_VER ")\n"); 113 pr_info("Activated (version " TIPC_MOD_VER ")\n");
114 114
115 sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << 115 sysctl_tipc_rmem[0] = RCVBUF_MIN;
116 TIPC_LOW_IMPORTANCE; 116 sysctl_tipc_rmem[1] = RCVBUF_DEF;
117 sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << 117 sysctl_tipc_rmem[2] = RCVBUF_MAX;
118 TIPC_CRITICAL_IMPORTANCE;
119 sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
120 118
121 err = tipc_netlink_start(); 119 err = tipc_netlink_start();
122 if (err) 120 if (err)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 58bf51541813..024da8af91f0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -743,16 +743,26 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
743 msg_set_bits(m, 9, 16, 0xffff, n); 743 msg_set_bits(m, 9, 16, 0xffff, n);
744} 744}
745 745
746static inline u32 msg_bcast_tag(struct tipc_msg *m) 746static inline u32 msg_conn_ack(struct tipc_msg *m)
747{ 747{
748 return msg_bits(m, 9, 16, 0xffff); 748 return msg_bits(m, 9, 16, 0xffff);
749} 749}
750 750
751static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) 751static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
752{ 752{
753 msg_set_bits(m, 9, 16, 0xffff, n); 753 msg_set_bits(m, 9, 16, 0xffff, n);
754} 754}
755 755
756static inline u32 msg_adv_win(struct tipc_msg *m)
757{
758 return msg_bits(m, 9, 0, 0xffff);
759}
760
761static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
762{
763 msg_set_bits(m, 9, 0, 0xffff, n);
764}
765
756static inline u32 msg_max_pkt(struct tipc_msg *m) 766static inline u32 msg_max_pkt(struct tipc_msg *m)
757{ 767{
758 return msg_bits(m, 9, 16, 0xffff) * 4; 768 return msg_bits(m, 9, 16, 0xffff) * 4;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 68d9f7b8485c..d903f560e2fd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/node.c: TIPC node management routines 2 * net/tipc/node.c: TIPC node management routines
3 * 3 *
4 * Copyright (c) 2000-2006, 2012-2015, Ericsson AB 4 * Copyright (c) 2000-2006, 2012-2016, Ericsson AB
5 * Copyright (c) 2005-2006, 2010-2014, Wind River Systems 5 * Copyright (c) 2005-2006, 2010-2014, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -191,6 +191,20 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
191 tipc_node_put(n); 191 tipc_node_put(n);
192 return mtu; 192 return mtu;
193} 193}
194
195u16 tipc_node_get_capabilities(struct net *net, u32 addr)
196{
197 struct tipc_node *n;
198 u16 caps;
199
200 n = tipc_node_find(net, addr);
201 if (unlikely(!n))
202 return TIPC_NODE_CAPABILITIES;
203 caps = n->capabilities;
204 tipc_node_put(n);
205 return caps;
206}
207
194/* 208/*
195 * A trivial power-of-two bitmask technique is used for speed, since this 209 * A trivial power-of-two bitmask technique is used for speed, since this
196 * operation is done for every incoming TIPC packet. The number of hash table 210 * operation is done for every incoming TIPC packet. The number of hash table
@@ -304,8 +318,11 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
304 318
305 spin_lock_bh(&tn->node_list_lock); 319 spin_lock_bh(&tn->node_list_lock);
306 n = tipc_node_find(net, addr); 320 n = tipc_node_find(net, addr);
307 if (n) 321 if (n) {
322 /* Same node may come back with new capabilities */
323 n->capabilities = capabilities;
308 goto exit; 324 goto exit;
325 }
309 n = kzalloc(sizeof(*n), GFP_ATOMIC); 326 n = kzalloc(sizeof(*n), GFP_ATOMIC);
310 if (!n) { 327 if (!n) {
311 pr_warn("Node creation failed, no memory\n"); 328 pr_warn("Node creation failed, no memory\n");
@@ -554,6 +571,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
554 *slot1 = bearer_id; 571 *slot1 = bearer_id;
555 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); 572 tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
556 n->action_flags |= TIPC_NOTIFY_NODE_UP; 573 n->action_flags |= TIPC_NOTIFY_NODE_UP;
574 tipc_link_set_active(nl, true);
557 tipc_bcast_add_peer(n->net, nl, xmitq); 575 tipc_bcast_add_peer(n->net, nl, xmitq);
558 return; 576 return;
559 } 577 }
@@ -1451,6 +1469,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1451 int bearer_id = b->identity; 1469 int bearer_id = b->identity;
1452 struct tipc_link_entry *le; 1470 struct tipc_link_entry *le;
1453 u16 bc_ack = msg_bcast_ack(hdr); 1471 u16 bc_ack = msg_bcast_ack(hdr);
1472 u32 self = tipc_own_addr(net);
1454 int rc = 0; 1473 int rc = 0;
1455 1474
1456 __skb_queue_head_init(&xmitq); 1475 __skb_queue_head_init(&xmitq);
@@ -1467,6 +1486,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
1467 return tipc_node_bc_rcv(net, skb, bearer_id); 1486 return tipc_node_bc_rcv(net, skb, bearer_id);
1468 } 1487 }
1469 1488
1489 /* Discard unicast link messages destined for another node */
1490 if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self)))
1491 goto discard;
1492
1470 /* Locate neighboring node that sent packet */ 1493 /* Locate neighboring node that sent packet */
1471 n = tipc_node_find(net, msg_prevnode(hdr)); 1494 n = tipc_node_find(net, msg_prevnode(hdr));
1472 if (unlikely(!n)) 1495 if (unlikely(!n))
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f39d9d06e8bb..8264b3d97dc4 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,10 +45,11 @@
45/* Optional capabilities supported by this code version 45/* Optional capabilities supported by this code version
46 */ 46 */
47enum { 47enum {
48 TIPC_BCAST_SYNCH = (1 << 1) 48 TIPC_BCAST_SYNCH = (1 << 1),
49 TIPC_BLOCK_FLOWCTL = (2 << 1)
49}; 50};
50 51
51#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH 52#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
52#define INVALID_BEARER_ID -1 53#define INVALID_BEARER_ID -1
53 54
54void tipc_node_stop(struct net *net); 55void tipc_node_stop(struct net *net);
@@ -70,6 +71,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
70int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); 71int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
71void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); 72void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
72int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); 73int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
74u16 tipc_node_get_capabilities(struct net *net, u32 addr);
73int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); 75int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
74int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); 76int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
75int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); 77int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3eeb50a27b89..12628890c219 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -96,8 +96,11 @@ struct tipc_sock {
96 uint conn_timeout; 96 uint conn_timeout;
97 atomic_t dupl_rcvcnt; 97 atomic_t dupl_rcvcnt;
98 bool link_cong; 98 bool link_cong;
99 uint sent_unacked; 99 u16 snt_unacked;
100 uint rcv_unacked; 100 u16 snd_win;
101 u16 peer_caps;
102 u16 rcv_unacked;
103 u16 rcv_win;
101 struct sockaddr_tipc remote; 104 struct sockaddr_tipc remote;
102 struct rhash_head node; 105 struct rhash_head node;
103 struct rcu_head rcu; 106 struct rcu_head rcu;
@@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
227 return container_of(sk, struct tipc_sock, sk); 230 return container_of(sk, struct tipc_sock, sk);
228} 231}
229 232
230static int tsk_conn_cong(struct tipc_sock *tsk) 233static bool tsk_conn_cong(struct tipc_sock *tsk)
231{ 234{
232 return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; 235 return tsk->snt_unacked >= tsk->snd_win;
236}
237
238/* tsk_blocks(): translate a buffer size in bytes to number of
239 * advertisable blocks, taking into account the ratio truesize(len)/len
240 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
241 */
242static u16 tsk_adv_blocks(int len)
243{
244 return len / FLOWCTL_BLK_SZ / 4;
245}
246
247/* tsk_inc(): increment counter for sent or received data
248 * - If block based flow control is not supported by peer we
249 * fall back to message based ditto, incrementing the counter
250 */
251static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
252{
253 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
254 return ((msglen / FLOWCTL_BLK_SZ) + 1);
255 return 1;
233} 256}
234 257
235/** 258/**
@@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
377 sk->sk_write_space = tipc_write_space; 400 sk->sk_write_space = tipc_write_space;
378 sk->sk_destruct = tipc_sock_destruct; 401 sk->sk_destruct = tipc_sock_destruct;
379 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; 402 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
380 tsk->sent_unacked = 0;
381 atomic_set(&tsk->dupl_rcvcnt, 0); 403 atomic_set(&tsk->dupl_rcvcnt, 0);
382 404
405 /* Start out with safe limits until we receive an advertised window */
406 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
407 tsk->rcv_win = tsk->snd_win;
408
383 if (sock->state == SS_READY) { 409 if (sock->state == SS_READY) {
384 tsk_set_unreturnable(tsk, true); 410 tsk_set_unreturnable(tsk, true);
385 if (sock->type == SOCK_DGRAM) 411 if (sock->type == SOCK_DGRAM)
@@ -775,7 +801,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
775 struct sock *sk = &tsk->sk; 801 struct sock *sk = &tsk->sk;
776 struct tipc_msg *hdr = buf_msg(skb); 802 struct tipc_msg *hdr = buf_msg(skb);
777 int mtyp = msg_type(hdr); 803 int mtyp = msg_type(hdr);
778 int conn_cong; 804 bool conn_cong;
779 805
780 /* Ignore if connection cannot be validated: */ 806 /* Ignore if connection cannot be validated: */
781 if (!tsk_peer_msg(tsk, hdr)) 807 if (!tsk_peer_msg(tsk, hdr))
@@ -789,7 +815,9 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
789 return; 815 return;
790 } else if (mtyp == CONN_ACK) { 816 } else if (mtyp == CONN_ACK) {
791 conn_cong = tsk_conn_cong(tsk); 817 conn_cong = tsk_conn_cong(tsk);
792 tsk->sent_unacked -= msg_msgcnt(hdr); 818 tsk->snt_unacked -= msg_conn_ack(hdr);
819 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
820 tsk->snd_win = msg_adv_win(hdr);
793 if (conn_cong) 821 if (conn_cong)
794 sk->sk_write_space(sk); 822 sk->sk_write_space(sk);
795 } else if (mtyp != CONN_PROBE_REPLY) { 823 } else if (mtyp != CONN_PROBE_REPLY) {
@@ -1020,12 +1048,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1020 u32 dnode; 1048 u32 dnode;
1021 uint mtu, send, sent = 0; 1049 uint mtu, send, sent = 0;
1022 struct iov_iter save; 1050 struct iov_iter save;
1051 int hlen = MIN_H_SIZE;
1023 1052
1024 /* Handle implied connection establishment */ 1053 /* Handle implied connection establishment */
1025 if (unlikely(dest)) { 1054 if (unlikely(dest)) {
1026 rc = __tipc_sendmsg(sock, m, dsz); 1055 rc = __tipc_sendmsg(sock, m, dsz);
1056 hlen = msg_hdr_sz(mhdr);
1027 if (dsz && (dsz == rc)) 1057 if (dsz && (dsz == rc))
1028 tsk->sent_unacked = 1; 1058 tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
1029 return rc; 1059 return rc;
1030 } 1060 }
1031 if (dsz > (uint)INT_MAX) 1061 if (dsz > (uint)INT_MAX)
@@ -1054,7 +1084,7 @@ next:
1054 if (likely(!tsk_conn_cong(tsk))) { 1084 if (likely(!tsk_conn_cong(tsk))) {
1055 rc = tipc_node_xmit(net, &pktchain, dnode, portid); 1085 rc = tipc_node_xmit(net, &pktchain, dnode, portid);
1056 if (likely(!rc)) { 1086 if (likely(!rc)) {
1057 tsk->sent_unacked++; 1087 tsk->snt_unacked += tsk_inc(tsk, send + hlen);
1058 sent += send; 1088 sent += send;
1059 if (sent == dsz) 1089 if (sent == dsz)
1060 return dsz; 1090 return dsz;
@@ -1118,6 +1148,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1118 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); 1148 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
1119 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1149 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1120 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1150 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1151 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1152 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
1153 return;
1154
1155 /* Fall back to message based flow control */
1156 tsk->rcv_win = FLOWCTL_MSG_WIN;
1157 tsk->snd_win = FLOWCTL_MSG_WIN;
1121} 1158}
1122 1159
1123/** 1160/**
@@ -1214,7 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
1214 return 0; 1251 return 0;
1215} 1252}
1216 1253
1217static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) 1254static void tipc_sk_send_ack(struct tipc_sock *tsk)
1218{ 1255{
1219 struct net *net = sock_net(&tsk->sk); 1256 struct net *net = sock_net(&tsk->sk);
1220 struct sk_buff *skb = NULL; 1257 struct sk_buff *skb = NULL;
@@ -1230,7 +1267,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
1230 if (!skb) 1267 if (!skb)
1231 return; 1268 return;
1232 msg = buf_msg(skb); 1269 msg = buf_msg(skb);
1233 msg_set_msgcnt(msg, ack); 1270 msg_set_conn_ack(msg, tsk->rcv_unacked);
1271 tsk->rcv_unacked = 0;
1272
1273 /* Adjust to and advertize the correct window limit */
1274 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
1275 tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
1276 msg_set_adv_win(msg, tsk->rcv_win);
1277 }
1234 tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); 1278 tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1235} 1279}
1236 1280
@@ -1288,7 +1332,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
1288 long timeo; 1332 long timeo;
1289 unsigned int sz; 1333 unsigned int sz;
1290 u32 err; 1334 u32 err;
1291 int res; 1335 int res, hlen;
1292 1336
1293 /* Catch invalid receive requests */ 1337 /* Catch invalid receive requests */
1294 if (unlikely(!buf_len)) 1338 if (unlikely(!buf_len))
@@ -1313,6 +1357,7 @@ restart:
1313 buf = skb_peek(&sk->sk_receive_queue); 1357 buf = skb_peek(&sk->sk_receive_queue);
1314 msg = buf_msg(buf); 1358 msg = buf_msg(buf);
1315 sz = msg_data_sz(msg); 1359 sz = msg_data_sz(msg);
1360 hlen = msg_hdr_sz(msg);
1316 err = msg_errcode(msg); 1361 err = msg_errcode(msg);
1317 1362
1318 /* Discard an empty non-errored message & try again */ 1363 /* Discard an empty non-errored message & try again */
@@ -1335,7 +1380,7 @@ restart:
1335 sz = buf_len; 1380 sz = buf_len;
1336 m->msg_flags |= MSG_TRUNC; 1381 m->msg_flags |= MSG_TRUNC;
1337 } 1382 }
1338 res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz); 1383 res = skb_copy_datagram_msg(buf, hlen, m, sz);
1339 if (res) 1384 if (res)
1340 goto exit; 1385 goto exit;
1341 res = sz; 1386 res = sz;
@@ -1347,15 +1392,15 @@ restart:
1347 res = -ECONNRESET; 1392 res = -ECONNRESET;
1348 } 1393 }
1349 1394
1350 /* Consume received message (optional) */ 1395 if (unlikely(flags & MSG_PEEK))
1351 if (likely(!(flags & MSG_PEEK))) { 1396 goto exit;
1352 if ((sock->state != SS_READY) && 1397
1353 (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { 1398 if (likely(sock->state != SS_READY)) {
1354 tipc_sk_send_ack(tsk, tsk->rcv_unacked); 1399 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
1355 tsk->rcv_unacked = 0; 1400 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
1356 } 1401 tipc_sk_send_ack(tsk);
1357 tsk_advance_rx_queue(sk);
1358 } 1402 }
1403 tsk_advance_rx_queue(sk);
1359exit: 1404exit:
1360 release_sock(sk); 1405 release_sock(sk);
1361 return res; 1406 return res;
@@ -1384,7 +1429,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
1384 int sz_to_copy, target, needed; 1429 int sz_to_copy, target, needed;
1385 int sz_copied = 0; 1430 int sz_copied = 0;
1386 u32 err; 1431 u32 err;
1387 int res = 0; 1432 int res = 0, hlen;
1388 1433
1389 /* Catch invalid receive attempts */ 1434 /* Catch invalid receive attempts */
1390 if (unlikely(!buf_len)) 1435 if (unlikely(!buf_len))
@@ -1410,6 +1455,7 @@ restart:
1410 buf = skb_peek(&sk->sk_receive_queue); 1455 buf = skb_peek(&sk->sk_receive_queue);
1411 msg = buf_msg(buf); 1456 msg = buf_msg(buf);
1412 sz = msg_data_sz(msg); 1457 sz = msg_data_sz(msg);
1458 hlen = msg_hdr_sz(msg);
1413 err = msg_errcode(msg); 1459 err = msg_errcode(msg);
1414 1460
1415 /* Discard an empty non-errored message & try again */ 1461 /* Discard an empty non-errored message & try again */
@@ -1434,8 +1480,7 @@ restart:
1434 needed = (buf_len - sz_copied); 1480 needed = (buf_len - sz_copied);
1435 sz_to_copy = (sz <= needed) ? sz : needed; 1481 sz_to_copy = (sz <= needed) ? sz : needed;
1436 1482
1437 res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, 1483 res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1438 m, sz_to_copy);
1439 if (res) 1484 if (res)
1440 goto exit; 1485 goto exit;
1441 1486
@@ -1457,20 +1502,18 @@ restart:
1457 res = -ECONNRESET; 1502 res = -ECONNRESET;
1458 } 1503 }
1459 1504
1460 /* Consume received message (optional) */ 1505 if (unlikely(flags & MSG_PEEK))
1461 if (likely(!(flags & MSG_PEEK))) { 1506 goto exit;
1462 if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { 1507
1463 tipc_sk_send_ack(tsk, tsk->rcv_unacked); 1508 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
1464 tsk->rcv_unacked = 0; 1509 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
1465 } 1510 tipc_sk_send_ack(tsk);
1466 tsk_advance_rx_queue(sk); 1511 tsk_advance_rx_queue(sk);
1467 }
1468 1512
1469 /* Loop around if more data is required */ 1513 /* Loop around if more data is required */
1470 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1514 if ((sz_copied < buf_len) && /* didn't get all requested data */
1471 (!skb_queue_empty(&sk->sk_receive_queue) || 1515 (!skb_queue_empty(&sk->sk_receive_queue) ||
1472 (sz_copied < target)) && /* and more is ready or required */ 1516 (sz_copied < target)) && /* and more is ready or required */
1473 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1474 (!err)) /* and haven't reached a FIN */ 1517 (!err)) /* and haven't reached a FIN */
1475 goto restart; 1518 goto restart;
1476 1519
@@ -1602,30 +1645,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1602/** 1645/**
1603 * rcvbuf_limit - get proper overload limit of socket receive queue 1646 * rcvbuf_limit - get proper overload limit of socket receive queue
1604 * @sk: socket 1647 * @sk: socket
1605 * @buf: message 1648 * @skb: message
1606 * 1649 *
1607 * For all connection oriented messages, irrespective of importance, 1650 * For connection oriented messages, irrespective of importance,
1608 * the default overload value (i.e. 67MB) is set as limit. 1651 * default queue limit is 2 MB.
1609 * 1652 *
1610 * For all connectionless messages, by default new queue limits are 1653 * For connectionless messages, queue limits are based on message
1611 * as belows: 1654 * importance as follows:
1612 * 1655 *
1613 * TIPC_LOW_IMPORTANCE (4 MB) 1656 * TIPC_LOW_IMPORTANCE (2 MB)
1614 * TIPC_MEDIUM_IMPORTANCE (8 MB) 1657 * TIPC_MEDIUM_IMPORTANCE (4 MB)
1615 * TIPC_HIGH_IMPORTANCE (16 MB) 1658 * TIPC_HIGH_IMPORTANCE (8 MB)
1616 * TIPC_CRITICAL_IMPORTANCE (32 MB) 1659 * TIPC_CRITICAL_IMPORTANCE (16 MB)
1617 * 1660 *
1618 * Returns overload limit according to corresponding message importance 1661 * Returns overload limit according to corresponding message importance
1619 */ 1662 */
1620static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) 1663static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1621{ 1664{
1622 struct tipc_msg *msg = buf_msg(buf); 1665 struct tipc_sock *tsk = tipc_sk(sk);
1666 struct tipc_msg *hdr = buf_msg(skb);
1667
1668 if (unlikely(!msg_connected(hdr)))
1669 return sk->sk_rcvbuf << msg_importance(hdr);
1623 1670
1624 if (msg_connected(msg)) 1671 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
1625 return sysctl_tipc_rmem[2]; 1672 return sk->sk_rcvbuf;
1626 1673
1627 return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << 1674 return FLOWCTL_MSG_LIM;
1628 msg_importance(msg);
1629} 1675}
1630 1676
1631/** 1677/**
@@ -1748,7 +1794,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1748 1794
1749 /* Try backlog, compensating for double-counted bytes */ 1795 /* Try backlog, compensating for double-counted bytes */
1750 dcnt = &tipc_sk(sk)->dupl_rcvcnt; 1796 dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1751 if (sk->sk_backlog.len) 1797 if (!sk->sk_backlog.len)
1752 atomic_set(dcnt, 0); 1798 atomic_set(dcnt, 0);
1753 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); 1799 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
1754 if (likely(!sk_add_backlog(sk, skb, lim))) 1800 if (likely(!sk_add_backlog(sk, skb, lim)))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 4241f22069dc..06fb5944cf76 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -1,6 +1,6 @@
1/* net/tipc/socket.h: Include file for TIPC socket code 1/* net/tipc/socket.h: Include file for TIPC socket code
2 * 2 *
3 * Copyright (c) 2014-2015, Ericsson AB 3 * Copyright (c) 2014-2016, Ericsson AB
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,17 @@
38#include <net/sock.h> 38#include <net/sock.h>
39#include <net/genetlink.h> 39#include <net/genetlink.h>
40 40
41#define TIPC_CONNACK_INTV 256 41/* Compatibility values for deprecated message based flow control */
42#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) 42#define FLOWCTL_MSG_WIN 512
43#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ 43#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE))
44 SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) 44
45#define FLOWCTL_BLK_SZ 1024
46
47/* Socket receive buffer sizes */
48#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512)
49#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2)
50#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16)
51
45int tipc_socket_init(void); 52int tipc_socket_init(void);
46void tipc_socket_stop(void); 53void tipc_socket_stop(void);
47void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); 54void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 79de588c7bd6..0dd02244e21d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -326,8 +326,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
326 return tipc_subscrp_cancel(s, subscriber); 326 return tipc_subscrp_cancel(s, subscriber);
327 } 327 }
328 328
329 if (s) 329 tipc_subscrp_subscribe(net, s, subscriber, swap);
330 tipc_subscrp_subscribe(net, s, subscriber, swap);
331} 330}
332 331
333/* Handle one request to establish a new subscriber */ 332/* Handle one request to establish a new subscriber */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3dce53ebea92..b5f1221f48d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1808 else if (sk->sk_shutdown & RCV_SHUTDOWN) 1808 else if (sk->sk_shutdown & RCV_SHUTDOWN)
1809 err = 0; 1809 err = 0;
1810 1810
1811 if (copied > 0) { 1811 if (copied > 0)
1812 /* We only do these additional bookkeeping/notification steps
1813 * if we actually copied something out of the queue pair
1814 * instead of just peeking ahead.
1815 */
1816
1817 if (!(flags & MSG_PEEK)) {
1818 /* If the other side has shutdown for sending and there
1819 * is nothing more to read, then modify the socket
1820 * state.
1821 */
1822 if (vsk->peer_shutdown & SEND_SHUTDOWN) {
1823 if (vsock_stream_has_data(vsk) <= 0) {
1824 sk->sk_state = SS_UNCONNECTED;
1825 sock_set_flag(sk, SOCK_DONE);
1826 sk->sk_state_change(sk);
1827 }
1828 }
1829 }
1830 err = copied; 1812 err = copied;
1831 }
1832 1813
1833out: 1814out:
1834 release_sock(sk); 1815 release_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 56214736fe88..4120b7a538be 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -2051,7 +2051,7 @@ static u32 vmci_transport_get_local_cid(void)
2051 return vmci_get_context_id(); 2051 return vmci_get_context_id();
2052} 2052}
2053 2053
2054static struct vsock_transport vmci_transport = { 2054static const struct vsock_transport vmci_transport = {
2055 .init = vmci_transport_socket_init, 2055 .init = vmci_transport_socket_init,
2056 .destruct = vmci_transport_destruct, 2056 .destruct = vmci_transport_destruct,
2057 .release = vmci_transport_release, 2057 .release = vmci_transport_release,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ff4a91fcab9f..637387bbaaea 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
99 99
100 skb_dst_force(skb); 100 skb_dst_force(skb);
101 101
102 /* Inner headers are invalid now. */
103 skb->encapsulation = 0;
104
102 err = x->type->output(x, skb); 105 err = x->type->output(x, skb);
103 if (err == -EINPROGRESS) 106 if (err == -EINPROGRESS)
104 goto out; 107 goto out;