aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-17 16:52:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-17 16:52:59 -0500
commit7f151f1d8abb7d5930b49d4796b463dca1673cb7 (patch)
treef995b6444729c105fe0a123b8240ef3dc3f1bf4a /net
parenta18ab2f6cb79eeccedea61b8c7bf71d24e087d42 (diff)
parente7523a497d48a9921983a80670f7a02dc4639d41 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix list tests in netfilter ingress support, from Florian Westphal. 2) Fix reversal of input and output interfaces in ingress hook invocation, from Pablo Neira Ayuso. 3) We have a use after free in r8169, caught by Dave Jones, fixed by Francois Romieu. 4) Splice use-after-free fix in AF_UNIX frmo Hannes Frederic Sowa. 5) Three ipv6 route handling bug fixes from Martin KaFai Lau: a) Don't create clone routes not managed by the fib6 tree b) Don't forget to check expiration of DST_NOCACHE routes. c) Handle rt->dst.from == NULL properly. 6) Several AF_PACKET fixes wrt transport header setting and SKB protocol setting, from Daniel Borkmann. 7) Fix thunder driver crash on shutdown, from Pavel Fedin. 8) Several Mellanox driver fixes (max MTU calculations, use of correct DMA unmap in TX path, etc.) from Saeed Mahameed, Tariq Toukan, Doron Tsur, Achiad Shochat, Eran Ben Elisha, and Noa Osherovich. 9) Several mv88e6060 DSA driver fixes (wrong bit definitions for certain registers, etc.) from Neil Armstrong. 10) Make sure to disable preemption while updating per-cpu stats of ip tunnels, from Jason A. Donenfeld. 11) Various ARM64 bpf JIT fixes, from Yang Shi. 12) Flush icache properly in ARM JITs, from Daniel Borkmann. 13) Fix masking of RX and TX interrupts in ravb driver, from Masaru Nagai. 14) Fix netdev feature propagation for devices not implementing ->ndo_set_features(). From Nikolay Aleksandrov. 15) Big endian fix in vmxnet3 driver, from Shrikrishna Khare. 16) RAW socket code increments incorrect SNMP counters, fix from Ben Cartwright-Cox. 17) IPv6 multicast SNMP counters are bumped twice, fix from Neil Horman. 18) Fix handling of VLAN headers on stacked devices when REORDER is disabled. From Vlad Yasevich. 19) Fix SKB leaks and use-after-free in ipvlan and macvlan drivers, from Sabrina Dubroca. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (83 commits) MAINTAINERS: Update Mellanox's Eth NIC driver entries net/core: revert "net: fix __netdev_update_features return.." and add comment af_unix: take receive queue lock while appending new skb rtnetlink: fix frame size warning in rtnl_fill_ifinfo net: use skb_clone to avoid alloc_pages failure. packet: Use PAGE_ALIGNED macro packet: Don't check frames_per_block against negative values net: phy: Use interrupts when available in NOLINK state phy: marvell: Add support for 88E1540 PHY arm64: bpf: make BPF prologue and epilogue align with ARM64 AAPCS macvlan: fix leak in macvlan_handle_frame ipvlan: fix use after free of skb ipvlan: fix leak in ipvlan_rcv_frame vlan: Do not put vlan headers back on bridge and macvlan ports vlan: Fix untag operations of stacked vlans with REORDER_HEADER off via-velocity: unconditionally drop frames with bad l2 length ipg: Remove ipg driver dl2k: Add support for IP1000A-based cards snmp: Remove duplicate OUTMCAST stat increment net: thunder: Check for driver data in nicvf_remove() ...
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/bridge/br_stp.c2
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/rtnetlink.c274
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/route.c22
-rw-r--r--net/ipv6/tcp_ipv6.c19
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/packet/af_packet.c92
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/unix/af_unix.c24
31 files changed, 437 insertions, 318 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
30 skb->pkt_type = PACKET_HOST; 30 skb->pkt_type = PACKET_HOST;
31 } 31 }
32 32
33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { 33 if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
34 !netif_is_macvlan_port(vlan_dev) &&
35 !netif_is_bridge_port(vlan_dev)) {
34 unsigned int offset = skb->data - skb_mac_header(skb); 36 unsigned int offset = skb->data - skb_mac_header(skb);
35 37
36 /* 38 /*
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee64fc8..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
48 48
49 p->state = state; 49 p->state = state;
50 err = switchdev_port_attr_set(p->dev, &attr); 50 err = switchdev_port_attr_set(p->dev, &attr);
51 if (err) 51 if (err && err != -EOPNOTSUPP)
52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 52 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
53 (unsigned int) p->port_no, p->dev->name); 53 (unsigned int) p->port_no, p->dev->name);
54} 54}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..5396ff08af32 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
50 p->config_pending = 0; 50 p->config_pending = 0;
51 51
52 err = switchdev_port_attr_set(p->dev, &attr); 52 err = switchdev_port_attr_set(p->dev, &attr);
53 if (err) 53 if (err && err != -EOPNOTSUPP)
54 netdev_err(p->dev, "failed to set HW ageing time\n"); 54 netdev_err(p->dev, "failed to set HW ageing time\n");
55} 55}
56 56
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0d115e..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
2403{ 2403{
2404 static const netdev_features_t null_features = 0; 2404 static const netdev_features_t null_features = 0;
2405 struct net_device *dev = skb->dev; 2405 struct net_device *dev = skb->dev;
2406 const char *driver = ""; 2406 const char *name = "";
2407 2407
2408 if (!net_ratelimit()) 2408 if (!net_ratelimit())
2409 return; 2409 return;
2410 2410
2411 if (dev && dev->dev.parent) 2411 if (dev) {
2412 driver = dev_driver_string(dev->dev.parent); 2412 if (dev->dev.parent)
2413 2413 name = dev_driver_string(dev->dev.parent);
2414 else
2415 name = netdev_name(dev);
2416 }
2414 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " 2417 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2415 "gso_type=%d ip_summed=%d\n", 2418 "gso_type=%d ip_summed=%d\n",
2416 driver, dev ? &dev->features : &null_features, 2419 name, dev ? &dev->features : &null_features,
2417 skb->sk ? &skb->sk->sk_route_caps : &null_features, 2420 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2418 skb->len, skb->data_len, skb_shinfo(skb)->gso_size, 2421 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2419 skb_shinfo(skb)->gso_type, skb->ip_summed); 2422 skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6426,11 +6429,16 @@ int __netdev_update_features(struct net_device *dev)
6426 6429
6427 if (dev->netdev_ops->ndo_set_features) 6430 if (dev->netdev_ops->ndo_set_features)
6428 err = dev->netdev_ops->ndo_set_features(dev, features); 6431 err = dev->netdev_ops->ndo_set_features(dev, features);
6432 else
6433 err = 0;
6429 6434
6430 if (unlikely(err < 0)) { 6435 if (unlikely(err < 0)) {
6431 netdev_err(dev, 6436 netdev_err(dev,
6432 "set_features() failed (%d); wanted %pNF, left %pNF\n", 6437 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6433 err, &features, &dev->features); 6438 err, &features, &dev->features);
6439 /* return non-0 since some features might have changed and
6440 * it's better to fire a spurious notification than miss it
6441 */
6434 return -1; 6442 return -1;
6435 } 6443 }
6436 6444
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..e6af42da28d9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 857 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
858 /* keep skb alive even if arp_queue overflows */ 858 /* keep skb alive even if arp_queue overflows */
859 if (skb) 859 if (skb)
860 skb = skb_copy(skb, GFP_ATOMIC); 860 skb = skb_clone(skb, GFP_ATOMIC);
861 write_unlock(&neigh->lock); 861 write_unlock(&neigh->lock);
862 neigh->ops->solicit(neigh, skb); 862 neigh->ops->solicit(neigh, skb);
863 atomic_inc(&neigh->probes); 863 atomic_inc(&neigh->probes);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
1045 return 0; 1045 return 0;
1046} 1046}
1047 1047
1048static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
1049 struct net_device *dev)
1050{
1051 const struct rtnl_link_stats64 *stats;
1052 struct rtnl_link_stats64 temp;
1053 struct nlattr *attr;
1054
1055 stats = dev_get_stats(dev, &temp);
1056
1057 attr = nla_reserve(skb, IFLA_STATS,
1058 sizeof(struct rtnl_link_stats));
1059 if (!attr)
1060 return -EMSGSIZE;
1061
1062 copy_rtnl_link_stats(nla_data(attr), stats);
1063
1064 attr = nla_reserve(skb, IFLA_STATS64,
1065 sizeof(struct rtnl_link_stats64));
1066 if (!attr)
1067 return -EMSGSIZE;
1068
1069 copy_rtnl_link_stats64(nla_data(attr), stats);
1070
1071 return 0;
1072}
1073
1074static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1075 struct net_device *dev,
1076 int vfs_num,
1077 struct nlattr *vfinfo)
1078{
1079 struct ifla_vf_rss_query_en vf_rss_query_en;
1080 struct ifla_vf_link_state vf_linkstate;
1081 struct ifla_vf_spoofchk vf_spoofchk;
1082 struct ifla_vf_tx_rate vf_tx_rate;
1083 struct ifla_vf_stats vf_stats;
1084 struct ifla_vf_trust vf_trust;
1085 struct ifla_vf_vlan vf_vlan;
1086 struct ifla_vf_rate vf_rate;
1087 struct nlattr *vf, *vfstats;
1088 struct ifla_vf_mac vf_mac;
1089 struct ifla_vf_info ivi;
1090
1091 /* Not all SR-IOV capable drivers support the
1092 * spoofcheck and "RSS query enable" query. Preset to
1093 * -1 so the user space tool can detect that the driver
1094 * didn't report anything.
1095 */
1096 ivi.spoofchk = -1;
1097 ivi.rss_query_en = -1;
1098 ivi.trusted = -1;
1099 memset(ivi.mac, 0, sizeof(ivi.mac));
1100 /* The default value for VF link state is "auto"
1101 * IFLA_VF_LINK_STATE_AUTO which equals zero
1102 */
1103 ivi.linkstate = 0;
1104 if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
1105 return 0;
1106
1107 vf_mac.vf =
1108 vf_vlan.vf =
1109 vf_rate.vf =
1110 vf_tx_rate.vf =
1111 vf_spoofchk.vf =
1112 vf_linkstate.vf =
1113 vf_rss_query_en.vf =
1114 vf_trust.vf = ivi.vf;
1115
1116 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1117 vf_vlan.vlan = ivi.vlan;
1118 vf_vlan.qos = ivi.qos;
1119 vf_tx_rate.rate = ivi.max_tx_rate;
1120 vf_rate.min_tx_rate = ivi.min_tx_rate;
1121 vf_rate.max_tx_rate = ivi.max_tx_rate;
1122 vf_spoofchk.setting = ivi.spoofchk;
1123 vf_linkstate.link_state = ivi.linkstate;
1124 vf_rss_query_en.setting = ivi.rss_query_en;
1125 vf_trust.setting = ivi.trusted;
1126 vf = nla_nest_start(skb, IFLA_VF_INFO);
1127 if (!vf) {
1128 nla_nest_cancel(skb, vfinfo);
1129 return -EMSGSIZE;
1130 }
1131 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1132 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1133 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1134 &vf_rate) ||
1135 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1136 &vf_tx_rate) ||
1137 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1138 &vf_spoofchk) ||
1139 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1140 &vf_linkstate) ||
1141 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1142 sizeof(vf_rss_query_en),
1143 &vf_rss_query_en) ||
1144 nla_put(skb, IFLA_VF_TRUST,
1145 sizeof(vf_trust), &vf_trust))
1146 return -EMSGSIZE;
1147 memset(&vf_stats, 0, sizeof(vf_stats));
1148 if (dev->netdev_ops->ndo_get_vf_stats)
1149 dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
1150 &vf_stats);
1151 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1152 if (!vfstats) {
1153 nla_nest_cancel(skb, vf);
1154 nla_nest_cancel(skb, vfinfo);
1155 return -EMSGSIZE;
1156 }
1157 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1158 vf_stats.rx_packets) ||
1159 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1160 vf_stats.tx_packets) ||
1161 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1162 vf_stats.rx_bytes) ||
1163 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1164 vf_stats.tx_bytes) ||
1165 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1166 vf_stats.broadcast) ||
1167 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1168 vf_stats.multicast))
1169 return -EMSGSIZE;
1170 nla_nest_end(skb, vfstats);
1171 nla_nest_end(skb, vf);
1172 return 0;
1173}
1174
1175static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1176{
1177 struct rtnl_link_ifmap map = {
1178 .mem_start = dev->mem_start,
1179 .mem_end = dev->mem_end,
1180 .base_addr = dev->base_addr,
1181 .irq = dev->irq,
1182 .dma = dev->dma,
1183 .port = dev->if_port,
1184 };
1185 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1186 return -EMSGSIZE;
1187
1188 return 0;
1189}
1190
1048static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1191static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1049 int type, u32 pid, u32 seq, u32 change, 1192 int type, u32 pid, u32 seq, u32 change,
1050 unsigned int flags, u32 ext_filter_mask) 1193 unsigned int flags, u32 ext_filter_mask)
1051{ 1194{
1052 struct ifinfomsg *ifm; 1195 struct ifinfomsg *ifm;
1053 struct nlmsghdr *nlh; 1196 struct nlmsghdr *nlh;
1054 struct rtnl_link_stats64 temp; 1197 struct nlattr *af_spec;
1055 const struct rtnl_link_stats64 *stats;
1056 struct nlattr *attr, *af_spec;
1057 struct rtnl_af_ops *af_ops; 1198 struct rtnl_af_ops *af_ops;
1058 struct net_device *upper_dev = netdev_master_upper_dev_get(dev); 1199 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1059 1200
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1096 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) 1237 nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
1097 goto nla_put_failure; 1238 goto nla_put_failure;
1098 1239
1099 if (1) { 1240 if (rtnl_fill_link_ifmap(skb, dev))
1100 struct rtnl_link_ifmap map = { 1241 goto nla_put_failure;
1101 .mem_start = dev->mem_start,
1102 .mem_end = dev->mem_end,
1103 .base_addr = dev->base_addr,
1104 .irq = dev->irq,
1105 .dma = dev->dma,
1106 .port = dev->if_port,
1107 };
1108 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1109 goto nla_put_failure;
1110 }
1111 1242
1112 if (dev->addr_len) { 1243 if (dev->addr_len) {
1113 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || 1244 if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1124 if (rtnl_phys_switch_id_fill(skb, dev)) 1255 if (rtnl_phys_switch_id_fill(skb, dev))
1125 goto nla_put_failure; 1256 goto nla_put_failure;
1126 1257
1127 attr = nla_reserve(skb, IFLA_STATS, 1258 if (rtnl_fill_stats(skb, dev))
1128 sizeof(struct rtnl_link_stats));
1129 if (attr == NULL)
1130 goto nla_put_failure;
1131
1132 stats = dev_get_stats(dev, &temp);
1133 copy_rtnl_link_stats(nla_data(attr), stats);
1134
1135 attr = nla_reserve(skb, IFLA_STATS64,
1136 sizeof(struct rtnl_link_stats64));
1137 if (attr == NULL)
1138 goto nla_put_failure; 1259 goto nla_put_failure;
1139 copy_rtnl_link_stats64(nla_data(attr), stats);
1140 1260
1141 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && 1261 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
1142 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) 1262 nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
1143 goto nla_put_failure; 1263 goto nla_put_failure;
1144 1264
1145 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent 1265 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
1146 && (ext_filter_mask & RTEXT_FILTER_VF)) { 1266 ext_filter_mask & RTEXT_FILTER_VF) {
1147 int i; 1267 int i;
1148 1268 struct nlattr *vfinfo;
1149 struct nlattr *vfinfo, *vf, *vfstats;
1150 int num_vfs = dev_num_vf(dev->dev.parent); 1269 int num_vfs = dev_num_vf(dev->dev.parent);
1151 1270
1152 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); 1271 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
1153 if (!vfinfo) 1272 if (!vfinfo)
1154 goto nla_put_failure; 1273 goto nla_put_failure;
1155 for (i = 0; i < num_vfs; i++) { 1274 for (i = 0; i < num_vfs; i++) {
1156 struct ifla_vf_info ivi; 1275 if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
1157 struct ifla_vf_mac vf_mac;
1158 struct ifla_vf_vlan vf_vlan;
1159 struct ifla_vf_rate vf_rate;
1160 struct ifla_vf_tx_rate vf_tx_rate;
1161 struct ifla_vf_spoofchk vf_spoofchk;
1162 struct ifla_vf_link_state vf_linkstate;
1163 struct ifla_vf_rss_query_en vf_rss_query_en;
1164 struct ifla_vf_stats vf_stats;
1165 struct ifla_vf_trust vf_trust;
1166
1167 /*
1168 * Not all SR-IOV capable drivers support the
1169 * spoofcheck and "RSS query enable" query. Preset to
1170 * -1 so the user space tool can detect that the driver
1171 * didn't report anything.
1172 */
1173 ivi.spoofchk = -1;
1174 ivi.rss_query_en = -1;
1175 ivi.trusted = -1;
1176 memset(ivi.mac, 0, sizeof(ivi.mac));
1177 /* The default value for VF link state is "auto"
1178 * IFLA_VF_LINK_STATE_AUTO which equals zero
1179 */
1180 ivi.linkstate = 0;
1181 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
1182 break;
1183 vf_mac.vf =
1184 vf_vlan.vf =
1185 vf_rate.vf =
1186 vf_tx_rate.vf =
1187 vf_spoofchk.vf =
1188 vf_linkstate.vf =
1189 vf_rss_query_en.vf =
1190 vf_trust.vf = ivi.vf;
1191
1192 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1193 vf_vlan.vlan = ivi.vlan;
1194 vf_vlan.qos = ivi.qos;
1195 vf_tx_rate.rate = ivi.max_tx_rate;
1196 vf_rate.min_tx_rate = ivi.min_tx_rate;
1197 vf_rate.max_tx_rate = ivi.max_tx_rate;
1198 vf_spoofchk.setting = ivi.spoofchk;
1199 vf_linkstate.link_state = ivi.linkstate;
1200 vf_rss_query_en.setting = ivi.rss_query_en;
1201 vf_trust.setting = ivi.trusted;
1202 vf = nla_nest_start(skb, IFLA_VF_INFO);
1203 if (!vf) {
1204 nla_nest_cancel(skb, vfinfo);
1205 goto nla_put_failure;
1206 }
1207 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1208 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1209 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1210 &vf_rate) ||
1211 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1212 &vf_tx_rate) ||
1213 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
1214 &vf_spoofchk) ||
1215 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
1216 &vf_linkstate) ||
1217 nla_put(skb, IFLA_VF_RSS_QUERY_EN,
1218 sizeof(vf_rss_query_en),
1219 &vf_rss_query_en) ||
1220 nla_put(skb, IFLA_VF_TRUST,
1221 sizeof(vf_trust), &vf_trust))
1222 goto nla_put_failure; 1276 goto nla_put_failure;
1223 memset(&vf_stats, 0, sizeof(vf_stats));
1224 if (dev->netdev_ops->ndo_get_vf_stats)
1225 dev->netdev_ops->ndo_get_vf_stats(dev, i,
1226 &vf_stats);
1227 vfstats = nla_nest_start(skb, IFLA_VF_STATS);
1228 if (!vfstats) {
1229 nla_nest_cancel(skb, vf);
1230 nla_nest_cancel(skb, vfinfo);
1231 goto nla_put_failure;
1232 }
1233 if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
1234 vf_stats.rx_packets) ||
1235 nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
1236 vf_stats.tx_packets) ||
1237 nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
1238 vf_stats.rx_bytes) ||
1239 nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
1240 vf_stats.tx_bytes) ||
1241 nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
1242 vf_stats.broadcast) ||
1243 nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
1244 vf_stats.multicast))
1245 goto nla_put_failure;
1246 nla_nest_end(skb, vfstats);
1247 nla_nest_end(skb, vf);
1248 } 1277 }
1278
1249 nla_nest_end(skb, vfinfo); 1279 nla_nest_end(skb, vfinfo);
1250 } 1280 }
1251 1281
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..152b9c70e252 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
4268 return NULL; 4268 return NULL;
4269 } 4269 }
4270 4270
4271 memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); 4271 memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
4272 2 * ETH_ALEN);
4272 skb->mac_header += VLAN_HLEN; 4273 skb->mac_header += VLAN_HLEN;
4273 return skb; 4274 return skb;
4274} 4275}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
563 int max_retries, thresh; 563 int max_retries, thresh;
564 u8 defer_accept; 564 u8 defer_accept;
565 565
566 if (sk_listener->sk_state != TCP_LISTEN) 566 if (sk_state_load(sk_listener) != TCP_LISTEN)
567 goto drop; 567 goto drop;
568 568
569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
749 * It is OK, because this socket enters to hash table only 749 * It is OK, because this socket enters to hash table only
750 * after validation is complete. 750 * after validation is complete.
751 */ 751 */
752 sk->sk_state = TCP_LISTEN; 752 sk_state_store(sk, TCP_LISTEN);
753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) { 753 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
754 inet->inet_sport = htons(inet->inet_num); 754 inet->inet_sport = htons(inet->inet_num);
755 755
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
45 struct net *net = nf_ct_net(ct); 45 struct net *net = nf_ct_net(ct);
46 const struct nf_conn *master = ct->master; 46 const struct nf_conn *master = ct->master;
47 struct nf_conntrack_expect *other_exp; 47 struct nf_conntrack_expect *other_exp;
48 struct nf_conntrack_tuple t; 48 struct nf_conntrack_tuple t = {};
49 const struct nf_ct_pptp_master *ct_pptp_info; 49 const struct nf_ct_pptp_master *ct_pptp_info;
50 const struct nf_nat_pptp *nat_pptp_info; 50 const struct nf_nat_pptp *nat_pptp_info;
51 struct nf_nat_range range; 51 struct nf_nat_range range;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..63e5be0abd86 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
406 ip_select_ident(net, skb, NULL); 406 ip_select_ident(net, skb, NULL);
407 407
408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 408 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
409 skb->transport_header += iphlen;
410 if (iph->protocol == IPPROTO_ICMP &&
411 length >= iphlen + sizeof(struct icmphdr))
412 icmp_out_count(net, ((struct icmphdr *)
413 skb_transport_header(skb))->type);
409 } 414 }
410 if (iph->protocol == IPPROTO_ICMP)
411 icmp_out_count(net, ((struct icmphdr *)
412 skb_transport_header(skb))->type);
413 415
414 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, 416 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
415 net, sk, skb, NULL, rt->dst.dev, 417 net, sk, skb, NULL, rt->dst.dev,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..c1728771cf89 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
451 unsigned int mask; 451 unsigned int mask;
452 struct sock *sk = sock->sk; 452 struct sock *sk = sock->sk;
453 const struct tcp_sock *tp = tcp_sk(sk); 453 const struct tcp_sock *tp = tcp_sk(sk);
454 int state;
454 455
455 sock_rps_record_flow(sk); 456 sock_rps_record_flow(sk);
456 457
457 sock_poll_wait(file, sk_sleep(sk), wait); 458 sock_poll_wait(file, sk_sleep(sk), wait);
458 if (sk->sk_state == TCP_LISTEN) 459
460 state = sk_state_load(sk);
461 if (state == TCP_LISTEN)
459 return inet_csk_listen_poll(sk); 462 return inet_csk_listen_poll(sk);
460 463
461 /* Socket is not locked. We are protected from async events 464 /* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
492 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 495 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
493 * blocking on fresh not-connected or disconnected socket. --ANK 496 * blocking on fresh not-connected or disconnected socket. --ANK
494 */ 497 */
495 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) 498 if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
496 mask |= POLLHUP; 499 mask |= POLLHUP;
497 if (sk->sk_shutdown & RCV_SHUTDOWN) 500 if (sk->sk_shutdown & RCV_SHUTDOWN)
498 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 501 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
499 502
500 /* Connected or passive Fast Open socket? */ 503 /* Connected or passive Fast Open socket? */
501 if (sk->sk_state != TCP_SYN_SENT && 504 if (state != TCP_SYN_SENT &&
502 (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { 505 (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
503 int target = sock_rcvlowat(sk, 0, INT_MAX); 506 int target = sock_rcvlowat(sk, 0, INT_MAX);
504 507
505 if (tp->urg_seq == tp->copied_seq && 508 if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
507 tp->urg_data) 510 tp->urg_data)
508 target++; 511 target++;
509 512
510 /* Potential race condition. If read of tp below will
511 * escape above sk->sk_state, we can be illegally awaken
512 * in SYN_* states. */
513 if (tp->rcv_nxt - tp->copied_seq >= target) 513 if (tp->rcv_nxt - tp->copied_seq >= target)
514 mask |= POLLIN | POLLRDNORM; 514 mask |= POLLIN | POLLRDNORM;
515 515
@@ -1934,7 +1934,7 @@ void tcp_set_state(struct sock *sk, int state)
1934 /* Change state AFTER socket is unhashed to avoid closed 1934 /* Change state AFTER socket is unhashed to avoid closed
1935 * socket sitting in hash tables. 1935 * socket sitting in hash tables.
1936 */ 1936 */
1937 sk->sk_state = state; 1937 sk_state_store(sk, state);
1938 1938
1939#ifdef STATE_TRACE 1939#ifdef STATE_TRACE
1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); 1940 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2644,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2644 if (sk->sk_type != SOCK_STREAM) 2644 if (sk->sk_type != SOCK_STREAM)
2645 return; 2645 return;
2646 2646
2647 info->tcpi_state = sk->sk_state; 2647 info->tcpi_state = sk_state_load(sk);
2648
2648 info->tcpi_ca_state = icsk->icsk_ca_state; 2649 info->tcpi_ca_state = icsk->icsk_ca_state;
2649 info->tcpi_retransmits = icsk->icsk_retransmits; 2650 info->tcpi_retransmits = icsk->icsk_retransmits;
2650 info->tcpi_probes = icsk->icsk_probes_out; 2651 info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2673,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2672 info->tcpi_snd_mss = tp->mss_cache; 2673 info->tcpi_snd_mss = tp->mss_cache;
2673 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; 2674 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2674 2675
2675 if (sk->sk_state == TCP_LISTEN) { 2676 if (info->tcpi_state == TCP_LISTEN) {
2676 info->tcpi_unacked = sk->sk_ack_backlog; 2677 info->tcpi_unacked = sk->sk_ack_backlog;
2677 info->tcpi_sacked = sk->sk_max_ack_backlog; 2678 info->tcpi_sacked = sk->sk_max_ack_backlog;
2678 } else { 2679 } else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
21{ 21{
22 struct tcp_info *info = _info; 22 struct tcp_info *info = _info;
23 23
24 if (sk->sk_state == TCP_LISTEN) { 24 if (sk_state_load(sk) == TCP_LISTEN) {
25 r->idiag_rqueue = sk->sk_ack_backlog; 25 r->idiag_rqueue = sk->sk_ack_backlog;
26 r->idiag_wqueue = sk->sk_max_ack_backlog; 26 r->idiag_wqueue = sk->sk_max_ack_backlog;
27 } else if (sk->sk_type == SOCK_STREAM) { 27 } else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 950e28c0cdf2..ba09016d1bfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2158,6 +2158,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2158 __u16 destp = ntohs(inet->inet_dport); 2158 __u16 destp = ntohs(inet->inet_dport);
2159 __u16 srcp = ntohs(inet->inet_sport); 2159 __u16 srcp = ntohs(inet->inet_sport);
2160 int rx_queue; 2160 int rx_queue;
2161 int state;
2161 2162
2162 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2163 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2163 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 2164 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2175,17 +2176,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2175 timer_expires = jiffies; 2176 timer_expires = jiffies;
2176 } 2177 }
2177 2178
2178 if (sk->sk_state == TCP_LISTEN) 2179 state = sk_state_load(sk);
2180 if (state == TCP_LISTEN)
2179 rx_queue = sk->sk_ack_backlog; 2181 rx_queue = sk->sk_ack_backlog;
2180 else 2182 else
2181 /* 2183 /* Because we don't lock the socket,
2182 * because we dont lock socket, we might find a transient negative value 2184 * we might find a transient negative value.
2183 */ 2185 */
2184 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2186 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2185 2187
2186 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2188 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2187 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2189 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2188 i, src, srcp, dest, destp, sk->sk_state, 2190 i, src, srcp, dest, destp, state,
2189 tp->write_seq - tp->snd_una, 2191 tp->write_seq - tp->snd_una,
2190 rx_queue, 2192 rx_queue,
2191 timer_active, 2193 timer_active,
@@ -2199,8 +2201,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2199 jiffies_to_clock_t(icsk->icsk_ack.ato), 2201 jiffies_to_clock_t(icsk->icsk_ack.ato),
2200 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2202 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2201 tp->snd_cwnd, 2203 tp->snd_cwnd,
2202 sk->sk_state == TCP_LISTEN ? 2204 state == TCP_LISTEN ?
2203 (fastopenq ? fastopenq->max_qlen : 0) : 2205 fastopenq->max_qlen :
2204 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 2206 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2205} 2207}
2206 2208
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
1651 if (!err) { 1651 if (!err) {
1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); 1652 ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1653 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1654 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
1655 } else { 1654 } else {
1656 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1655 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1657 } 1656 }
@@ -2015,7 +2014,6 @@ out:
2015 if (!err) { 2014 if (!err) {
2016 ICMP6MSGOUT_INC_STATS(net, idev, type); 2015 ICMP6MSGOUT_INC_STATS(net, idev, type);
2017 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 2016 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
2018 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
2019 } else 2017 } else
2020 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 2018 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2021 2019
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..6f01fe122abd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
404 } 404 }
405} 405}
406 406
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
407static bool rt6_check_expired(const struct rt6_info *rt) 415static bool rt6_check_expired(const struct rt6_info *rt)
408{ 416{
409 if (rt->rt6i_flags & RTF_EXPIRES) { 417 if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -1252,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1252 1260
1253static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1261static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254{ 1262{
1255 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1263 if (!__rt6_check_expired(rt) &&
1264 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1265 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257 return &rt->dst; 1266 return &rt->dst;
1258 else 1267 else
@@ -1272,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1272 1281
1273 rt6_dst_from_metrics_check(rt); 1282 rt6_dst_from_metrics_check(rt);
1274 1283
1275 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1284 if (rt->rt6i_flags & RTF_PCPU ||
1285 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1276 return rt6_dst_from_check(rt, cookie); 1286 return rt6_dst_from_check(rt, cookie);
1277 else 1287 else
1278 return rt6_check(rt, cookie); 1288 return rt6_check(rt, cookie);
@@ -1322,6 +1332,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1322 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1332 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323} 1333}
1324 1334
1335static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1336{
1337 return !(rt->rt6i_flags & RTF_CACHE) &&
1338 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1339}
1340
1325static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1341static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326 const struct ipv6hdr *iph, u32 mtu) 1342 const struct ipv6hdr *iph, u32 mtu)
1327{ 1343{
@@ -1335,7 +1351,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1335 if (mtu >= dst_mtu(dst)) 1351 if (mtu >= dst_mtu(dst))
1336 return; 1352 return;
1337 1353
1338 if (rt6->rt6i_flags & RTF_CACHE) { 1354 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1339 rt6_do_update_pmtu(rt6, mtu); 1355 rt6_do_update_pmtu(rt6, mtu);
1340 } else { 1356 } else {
1341 const struct in6_addr *daddr, *saddr; 1357 const struct in6_addr *daddr, *saddr;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5baa8e754e41..c5429a636f1a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1690,6 +1690,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1690 const struct tcp_sock *tp = tcp_sk(sp); 1690 const struct tcp_sock *tp = tcp_sk(sp);
1691 const struct inet_connection_sock *icsk = inet_csk(sp); 1691 const struct inet_connection_sock *icsk = inet_csk(sp);
1692 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1692 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1693 int rx_queue;
1694 int state;
1693 1695
1694 dest = &sp->sk_v6_daddr; 1696 dest = &sp->sk_v6_daddr;
1695 src = &sp->sk_v6_rcv_saddr; 1697 src = &sp->sk_v6_rcv_saddr;
@@ -1710,6 +1712,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1710 timer_expires = jiffies; 1712 timer_expires = jiffies;
1711 } 1713 }
1712 1714
1715 state = sk_state_load(sp);
1716 if (state == TCP_LISTEN)
1717 rx_queue = sp->sk_ack_backlog;
1718 else
1719 /* Because we don't lock the socket,
1720 * we might find a transient negative value.
1721 */
1722 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1723
1713 seq_printf(seq, 1724 seq_printf(seq,
1714 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1725 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1715 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 1726 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1718,9 +1729,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1718 src->s6_addr32[2], src->s6_addr32[3], srcp, 1729 src->s6_addr32[2], src->s6_addr32[3], srcp,
1719 dest->s6_addr32[0], dest->s6_addr32[1], 1730 dest->s6_addr32[0], dest->s6_addr32[1],
1720 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1731 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1721 sp->sk_state, 1732 state,
1722 tp->write_seq-tp->snd_una, 1733 tp->write_seq - tp->snd_una,
1723 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1734 rx_queue,
1724 timer_active, 1735 timer_active,
1725 jiffies_delta_to_clock_t(timer_expires - jiffies), 1736 jiffies_delta_to_clock_t(timer_expires - jiffies),
1726 icsk->icsk_retransmits, 1737 icsk->icsk_retransmits,
@@ -1732,7 +1743,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1732 jiffies_to_clock_t(icsk->icsk_ack.ato), 1743 jiffies_to_clock_t(icsk->icsk_ack.ato),
1733 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 1744 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1734 tp->snd_cwnd, 1745 tp->snd_cwnd,
1735 sp->sk_state == TCP_LISTEN ? 1746 state == TCP_LISTEN ?
1736 fastopenq->max_qlen : 1747 fastopenq->max_qlen :
1737 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 1748 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1738 ); 1749 );
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
869 depends on IPV6 || IPV6=n 869 depends on IPV6 || IPV6=n
870 depends on !NF_CONNTRACK || NF_CONNTRACK 870 depends on !NF_CONNTRACK || NF_CONNTRACK
871 select NF_DUP_IPV4 871 select NF_DUP_IPV4
872 select NF_DUP_IPV6 if IP6_NF_IPTABLES 872 select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
873 ---help--- 873 ---help---
874 This option adds a "TEE" target with which a packet can be cloned and 874 This option adds a "TEE" target with which a packet can be cloned and
875 this clone be rerouted to another nexthop. 875 this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 882 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
883 depends on IP_NF_MANGLE 883 depends on IP_NF_MANGLE
884 select NF_DEFRAG_IPV4 884 select NF_DEFRAG_IPV4
885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 885 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
886 help 886 help
887 This option adds a `TPROXY' target, which is somewhat similar to 887 This option adds a `TPROXY' target, which is somewhat similar to
888 REDIRECT. It can only be used in the mangle table and is useful 888 REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
1375 depends on IPV6 || IPV6=n 1375 depends on IPV6 || IPV6=n
1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1376 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
1377 select NF_DEFRAG_IPV4 1377 select NF_DEFRAG_IPV4
1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES 1378 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
1379 help 1379 help
1380 This option adds a `socket' match, which can be used to match 1380 This option adds a `socket' match, which can be used to match
1381 packets for which a TCP or UDP socket lookup finds a valid socket. 1381 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
33#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 33#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
34#define mtype MTYPE 34#define mtype MTYPE
35 35
36#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) 36#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
37 37
38static void 38static void
39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) 39mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
67 del_timer_sync(&map->gc); 67 del_timer_sync(&map->gc);
68 68
69 ip_set_free(map->members); 69 ip_set_free(map->members);
70 if (set->dsize) { 70 if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
71 if (set->extensions & IPSET_EXT_DESTROY) 71 mtype_ext_cleanup(set);
72 mtype_ext_cleanup(set); 72 ip_set_free(map);
73 ip_set_free(map->extensions);
74 }
75 kfree(map);
76 73
77 set->data = NULL; 74 set->data = NULL;
78} 75}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
92{ 89{
93 const struct mtype *map = set->data; 90 const struct mtype *map = set->data;
94 struct nlattr *nested; 91 struct nlattr *nested;
92 size_t memsize = sizeof(*map) + map->memsize;
95 93
96 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
97 if (!nested) 95 if (!nested)
98 goto nla_put_failure; 96 goto nla_put_failure;
99 if (mtype_do_head(skb, map) || 97 if (mtype_do_head(skb, map) ||
100 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
101 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
102 htonl(sizeof(*map) +
103 map->memsize +
104 set->dsize * map->elements)))
105 goto nla_put_failure; 100 goto nla_put_failure;
106 if (unlikely(ip_set_put_flags(skb, set))) 101 if (unlikely(ip_set_put_flags(skb, set)))
107 goto nla_put_failure; 102 goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
41/* Type structure */ 41/* Type structure */
42struct bitmap_ip { 42struct bitmap_ip {
43 void *members; /* the set members */ 43 void *members; /* the set members */
44 void *extensions; /* data extensions */
45 u32 first_ip; /* host byte order, included in range */ 44 u32 first_ip; /* host byte order, included in range */
46 u32 last_ip; /* host byte order, included in range */ 45 u32 last_ip; /* host byte order, included in range */
47 u32 elements; /* number of max elements in the set */ 46 u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
49 size_t memsize; /* members size */ 48 size_t memsize; /* members size */
50 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
51 struct timer_list gc; /* garbage collection */ 50 struct timer_list gc; /* garbage collection */
51 unsigned char extensions[0] /* data extensions */
52 __aligned(__alignof__(u64));
52}; 53};
53 54
54/* ADT structure for generic function args */ 55/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
224 map->members = ip_set_alloc(map->memsize); 225 map->members = ip_set_alloc(map->memsize);
225 if (!map->members) 226 if (!map->members)
226 return false; 227 return false;
227 if (set->dsize) {
228 map->extensions = ip_set_alloc(set->dsize * elements);
229 if (!map->extensions) {
230 kfree(map->members);
231 return false;
232 }
233 }
234 map->first_ip = first_ip; 228 map->first_ip = first_ip;
235 map->last_ip = last_ip; 229 map->last_ip = last_ip;
236 map->elements = elements; 230 map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
316 pr_debug("hosts %u, elements %llu\n", 310 pr_debug("hosts %u, elements %llu\n",
317 hosts, (unsigned long long)elements); 311 hosts, (unsigned long long)elements);
318 312
319 map = kzalloc(sizeof(*map), GFP_KERNEL); 313 set->dsize = ip_set_elem_len(set, tb, 0, 0);
314 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
320 if (!map) 315 if (!map)
321 return -ENOMEM; 316 return -ENOMEM;
322 317
323 map->memsize = bitmap_bytes(0, elements - 1); 318 map->memsize = bitmap_bytes(0, elements - 1);
324 set->variant = &bitmap_ip; 319 set->variant = &bitmap_ip;
325 set->dsize = ip_set_elem_len(set, tb, 0);
326 if (!init_map_ip(set, map, first_ip, last_ip, 320 if (!init_map_ip(set, map, first_ip, last_ip,
327 elements, hosts, netmask)) { 321 elements, hosts, netmask)) {
328 kfree(map); 322 kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
47/* Type structure */ 47/* Type structure */
48struct bitmap_ipmac { 48struct bitmap_ipmac {
49 void *members; /* the set members */ 49 void *members; /* the set members */
50 void *extensions; /* MAC + data extensions */
51 u32 first_ip; /* host byte order, included in range */ 50 u32 first_ip; /* host byte order, included in range */
52 u32 last_ip; /* host byte order, included in range */ 51 u32 last_ip; /* host byte order, included in range */
53 u32 elements; /* number of max elements in the set */ 52 u32 elements; /* number of max elements in the set */
54 size_t memsize; /* members size */ 53 size_t memsize; /* members size */
55 struct timer_list gc; /* garbage collector */ 54 struct timer_list gc; /* garbage collector */
55 unsigned char extensions[0] /* MAC + data extensions */
56 __aligned(__alignof__(u64));
56}; 57};
57 58
58/* ADT structure for generic function args */ 59/* ADT structure for generic function args */
59struct bitmap_ipmac_adt_elem { 60struct bitmap_ipmac_adt_elem {
61 unsigned char ether[ETH_ALEN] __aligned(2);
60 u16 id; 62 u16 id;
61 unsigned char *ether; 63 u16 add_mac;
62}; 64};
63 65
64struct bitmap_ipmac_elem { 66struct bitmap_ipmac_elem {
65 unsigned char ether[ETH_ALEN]; 67 unsigned char ether[ETH_ALEN];
66 unsigned char filled; 68 unsigned char filled;
67} __attribute__ ((aligned)); 69} __aligned(__alignof__(u64));
68 70
69static inline u32 71static inline u32
70ip_to_id(const struct bitmap_ipmac *m, u32 ip) 72ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
72 return ip - m->first_ip; 74 return ip - m->first_ip;
73} 75}
74 76
75static inline struct bitmap_ipmac_elem * 77#define get_elem(extensions, id, dsize) \
76get_elem(void *extensions, u16 id, size_t dsize) 78 (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
77{ 79
78 return (struct bitmap_ipmac_elem *)(extensions + id * dsize); 80#define get_const_elem(extensions, id, dsize) \
79} 81 (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
80 82
81/* Common functions */ 83/* Common functions */
82 84
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
88 90
89 if (!test_bit(e->id, map->members)) 91 if (!test_bit(e->id, map->members))
90 return 0; 92 return 0;
91 elem = get_elem(map->extensions, e->id, dsize); 93 elem = get_const_elem(map->extensions, e->id, dsize);
92 if (elem->filled == MAC_FILLED) 94 if (e->add_mac && elem->filled == MAC_FILLED)
93 return !e->ether || 95 return ether_addr_equal(e->ether, elem->ether);
94 ether_addr_equal(e->ether, elem->ether);
95 /* Trigger kernel to fill out the ethernet address */ 96 /* Trigger kernel to fill out the ethernet address */
96 return -EAGAIN; 97 return -EAGAIN;
97} 98}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
103 104
104 if (!test_bit(id, map->members)) 105 if (!test_bit(id, map->members))
105 return 0; 106 return 0;
106 elem = get_elem(map->extensions, id, dsize); 107 elem = get_const_elem(map->extensions, id, dsize);
107 /* Timer not started for the incomplete elements */ 108 /* Timer not started for the incomplete elements */
108 return elem->filled == MAC_FILLED; 109 return elem->filled == MAC_FILLED;
109} 110}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
133 * and we can reuse it later when MAC is filled out, 134 * and we can reuse it later when MAC is filled out,
134 * possibly by the kernel 135 * possibly by the kernel
135 */ 136 */
136 if (e->ether) 137 if (e->add_mac)
137 ip_set_timeout_set(timeout, t); 138 ip_set_timeout_set(timeout, t);
138 else 139 else
139 *timeout = t; 140 *timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
150 elem = get_elem(map->extensions, e->id, dsize); 151 elem = get_elem(map->extensions, e->id, dsize);
151 if (test_bit(e->id, map->members)) { 152 if (test_bit(e->id, map->members)) {
152 if (elem->filled == MAC_FILLED) { 153 if (elem->filled == MAC_FILLED) {
153 if (e->ether && 154 if (e->add_mac &&
154 (flags & IPSET_FLAG_EXIST) && 155 (flags & IPSET_FLAG_EXIST) &&
155 !ether_addr_equal(e->ether, elem->ether)) { 156 !ether_addr_equal(e->ether, elem->ether)) {
156 /* memcpy isn't atomic */ 157 /* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
159 ether_addr_copy(elem->ether, e->ether); 160 ether_addr_copy(elem->ether, e->ether);
160 } 161 }
161 return IPSET_ADD_FAILED; 162 return IPSET_ADD_FAILED;
162 } else if (!e->ether) 163 } else if (!e->add_mac)
163 /* Already added without ethernet address */ 164 /* Already added without ethernet address */
164 return IPSET_ADD_FAILED; 165 return IPSET_ADD_FAILED;
165 /* Fill the MAC address and trigger the timer activation */ 166 /* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
168 ether_addr_copy(elem->ether, e->ether); 169 ether_addr_copy(elem->ether, e->ether);
169 elem->filled = MAC_FILLED; 170 elem->filled = MAC_FILLED;
170 return IPSET_ADD_START_STORED_TIMEOUT; 171 return IPSET_ADD_START_STORED_TIMEOUT;
171 } else if (e->ether) { 172 } else if (e->add_mac) {
172 /* We can store MAC too */ 173 /* We can store MAC too */
173 ether_addr_copy(elem->ether, e->ether); 174 ether_addr_copy(elem->ether, e->ether);
174 elem->filled = MAC_FILLED; 175 elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
191 u32 id, size_t dsize) 192 u32 id, size_t dsize)
192{ 193{
193 const struct bitmap_ipmac_elem *elem = 194 const struct bitmap_ipmac_elem *elem =
194 get_elem(map->extensions, id, dsize); 195 get_const_elem(map->extensions, id, dsize);
195 196
196 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, 197 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
197 htonl(map->first_ip + id)) || 198 htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
213{ 214{
214 struct bitmap_ipmac *map = set->data; 215 struct bitmap_ipmac *map = set->data;
215 ipset_adtfn adtfn = set->variant->adt[adt]; 216 ipset_adtfn adtfn = set->variant->adt[adt];
216 struct bitmap_ipmac_adt_elem e = { .id = 0 }; 217 struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
217 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 218 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
218 u32 ip; 219 u32 ip;
219 220
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
231 return -EINVAL; 232 return -EINVAL;
232 233
233 e.id = ip_to_id(map, ip); 234 e.id = ip_to_id(map, ip);
234 e.ether = eth_hdr(skb)->h_source; 235 memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
235 236
236 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); 237 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
237} 238}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
265 return -IPSET_ERR_BITMAP_RANGE; 266 return -IPSET_ERR_BITMAP_RANGE;
266 267
267 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
268 if (tb[IPSET_ATTR_ETHER]) 269 if (tb[IPSET_ATTR_ETHER]) {
269 e.ether = nla_data(tb[IPSET_ATTR_ETHER]); 270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
270 else 271 e.add_mac = 1;
271 e.ether = NULL; 272 }
272
273 ret = adtfn(set, &e, &ext, &ext, flags); 273 ret = adtfn(set, &e, &ext, &ext, flags);
274 274
275 return ip_set_eexist(ret, flags) ? 0 : ret; 275 return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
300 map->members = ip_set_alloc(map->memsize); 300 map->members = ip_set_alloc(map->memsize);
301 if (!map->members) 301 if (!map->members)
302 return false; 302 return false;
303 if (set->dsize) {
304 map->extensions = ip_set_alloc(set->dsize * elements);
305 if (!map->extensions) {
306 kfree(map->members);
307 return false;
308 }
309 }
310 map->first_ip = first_ip; 303 map->first_ip = first_ip;
311 map->last_ip = last_ip; 304 map->last_ip = last_ip;
312 map->elements = elements; 305 map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
361 if (elements > IPSET_BITMAP_MAX_RANGE + 1) 354 if (elements > IPSET_BITMAP_MAX_RANGE + 1)
362 return -IPSET_ERR_BITMAP_RANGE_SIZE; 355 return -IPSET_ERR_BITMAP_RANGE_SIZE;
363 356
364 map = kzalloc(sizeof(*map), GFP_KERNEL); 357 set->dsize = ip_set_elem_len(set, tb,
358 sizeof(struct bitmap_ipmac_elem),
359 __alignof__(struct bitmap_ipmac_elem));
360 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
365 if (!map) 361 if (!map)
366 return -ENOMEM; 362 return -ENOMEM;
367 363
368 map->memsize = bitmap_bytes(0, elements - 1); 364 map->memsize = bitmap_bytes(0, elements - 1);
369 set->variant = &bitmap_ipmac; 365 set->variant = &bitmap_ipmac;
370 set->dsize = ip_set_elem_len(set, tb,
371 sizeof(struct bitmap_ipmac_elem));
372 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { 366 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
373 kfree(map); 367 kfree(map);
374 return -ENOMEM; 368 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
35/* Type structure */ 35/* Type structure */
36struct bitmap_port { 36struct bitmap_port {
37 void *members; /* the set members */ 37 void *members; /* the set members */
38 void *extensions; /* data extensions */
39 u16 first_port; /* host byte order, included in range */ 38 u16 first_port; /* host byte order, included in range */
40 u16 last_port; /* host byte order, included in range */ 39 u16 last_port; /* host byte order, included in range */
41 u32 elements; /* number of max elements in the set */ 40 u32 elements; /* number of max elements in the set */
42 size_t memsize; /* members size */ 41 size_t memsize; /* members size */
43 struct timer_list gc; /* garbage collection */ 42 struct timer_list gc; /* garbage collection */
43 unsigned char extensions[0] /* data extensions */
44 __aligned(__alignof__(u64));
44}; 45};
45 46
46/* ADT structure for generic function args */ 47/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
209 map->members = ip_set_alloc(map->memsize); 210 map->members = ip_set_alloc(map->memsize);
210 if (!map->members) 211 if (!map->members)
211 return false; 212 return false;
212 if (set->dsize) {
213 map->extensions = ip_set_alloc(set->dsize * map->elements);
214 if (!map->extensions) {
215 kfree(map->members);
216 return false;
217 }
218 }
219 map->first_port = first_port; 213 map->first_port = first_port;
220 map->last_port = last_port; 214 map->last_port = last_port;
221 set->timeout = IPSET_NO_TIMEOUT; 215 set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
232{ 226{
233 struct bitmap_port *map; 227 struct bitmap_port *map;
234 u16 first_port, last_port; 228 u16 first_port, last_port;
229 u32 elements;
235 230
236 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 231 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
237 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || 232 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
248 last_port = tmp; 243 last_port = tmp;
249 } 244 }
250 245
251 map = kzalloc(sizeof(*map), GFP_KERNEL); 246 elements = last_port - first_port + 1;
247 set->dsize = ip_set_elem_len(set, tb, 0, 0);
248 map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
252 if (!map) 249 if (!map)
253 return -ENOMEM; 250 return -ENOMEM;
254 251
255 map->elements = last_port - first_port + 1; 252 map->elements = elements;
256 map->memsize = bitmap_bytes(0, map->elements); 253 map->memsize = bitmap_bytes(0, map->elements);
257 set->variant = &bitmap_port; 254 set->variant = &bitmap_port;
258 set->dsize = ip_set_elem_len(set, tb, 0);
259 if (!init_map_port(set, map, first_port, last_port)) { 255 if (!init_map_port(set, map, first_port, last_port)) {
260 kfree(map); 256 kfree(map);
261 return -ENOMEM; 257 return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
364} 364}
365 365
366size_t 366size_t
367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) 367ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
368 size_t align)
368{ 369{
369 enum ip_set_ext_id id; 370 enum ip_set_ext_id id;
370 size_t offset = len;
371 u32 cadt_flags = 0; 371 u32 cadt_flags = 0;
372 372
373 if (tb[IPSET_ATTR_CADT_FLAGS]) 373 if (tb[IPSET_ATTR_CADT_FLAGS])
374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 374 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) 375 if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
376 set->flags |= IPSET_CREATE_FLAG_FORCEADD; 376 set->flags |= IPSET_CREATE_FLAG_FORCEADD;
377 if (!align)
378 align = 1;
377 for (id = 0; id < IPSET_EXT_ID_MAX; id++) { 379 for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
378 if (!add_extension(id, cadt_flags, tb)) 380 if (!add_extension(id, cadt_flags, tb))
379 continue; 381 continue;
380 offset = ALIGN(offset, ip_set_extensions[id].align); 382 len = ALIGN(len, ip_set_extensions[id].align);
381 set->offset[id] = offset; 383 set->offset[id] = len;
382 set->extensions |= ip_set_extensions[id].type; 384 set->extensions |= ip_set_extensions[id].type;
383 offset += ip_set_extensions[id].len; 385 len += ip_set_extensions[id].len;
384 } 386 }
385 return offset; 387 return ALIGN(len, align);
386} 388}
387EXPORT_SYMBOL_GPL(ip_set_elem_len); 389EXPORT_SYMBOL_GPL(ip_set_elem_len);
388 390
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
72 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 72 DECLARE_BITMAP(used, AHASH_MAX_TUNED);
73 u8 size; /* size of the array */ 73 u8 size; /* size of the array */
74 u8 pos; /* position of the first free entry */ 74 u8 pos; /* position of the first free entry */
75 unsigned char value[0]; /* the array of the values */ 75 unsigned char value[0] /* the array of the values */
76} __attribute__ ((aligned)); 76 __aligned(__alignof__(u64));
77};
77 78
78/* The hash table: the table size stored here in order to make resizing easy */ 79/* The hash table: the table size stored here in order to make resizing easy */
79struct htable { 80struct htable {
@@ -475,7 +476,7 @@ static void
475mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) 476mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
476{ 477{
477 struct htable *t; 478 struct htable *t;
478 struct hbucket *n; 479 struct hbucket *n, *tmp;
479 struct mtype_elem *data; 480 struct mtype_elem *data;
480 u32 i, j, d; 481 u32 i, j, d;
481#ifdef IP_SET_HASH_WITH_NETS 482#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
510 } 511 }
511 } 512 }
512 if (d >= AHASH_INIT_SIZE) { 513 if (d >= AHASH_INIT_SIZE) {
513 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 514 if (d >= n->size) {
514 (n->size - AHASH_INIT_SIZE) * dsize, 515 rcu_assign_pointer(hbucket(t, i), NULL);
515 GFP_ATOMIC); 516 kfree_rcu(n, rcu);
517 continue;
518 }
519 tmp = kzalloc(sizeof(*tmp) +
520 (n->size - AHASH_INIT_SIZE) * dsize,
521 GFP_ATOMIC);
516 if (!tmp) 522 if (!tmp)
517 /* Still try to delete expired elements */ 523 /* Still try to delete expired elements */
518 continue; 524 continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
522 continue; 528 continue;
523 data = ahash_data(n, j, dsize); 529 data = ahash_data(n, j, dsize);
524 memcpy(tmp->value + d * dsize, data, dsize); 530 memcpy(tmp->value + d * dsize, data, dsize);
525 set_bit(j, tmp->used); 531 set_bit(d, tmp->used);
526 d++; 532 d++;
527 } 533 }
528 tmp->pos = d; 534 tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
1323#endif 1329#endif
1324 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1330 set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
1325 set->dsize = ip_set_elem_len(set, tb, 1331 set->dsize = ip_set_elem_len(set, tb,
1326 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); 1332 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
1333 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
1327#ifndef IP_SET_PROTO_UNDEF 1334#ifndef IP_SET_PROTO_UNDEF
1328 } else { 1335 } else {
1329 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1336 set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
1330 set->dsize = ip_set_elem_len(set, tb, 1337 set->dsize = ip_set_elem_len(set, tb,
1331 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); 1338 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
1339 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
1332 } 1340 }
1333#endif 1341#endif
1334 if (tb[IPSET_ATTR_TIMEOUT]) { 1342 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 ip_set_id_t id; 33 ip_set_id_t id;
34}; 34} __aligned(__alignof__(u64));
35 35
36struct set_adt_elem { 36struct set_adt_elem {
37 ip_set_id_t id; 37 ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
618 size = IP_SET_LIST_MIN_SIZE; 618 size = IP_SET_LIST_MIN_SIZE;
619 619
620 set->variant = &set_variant; 620 set->variant = &set_variant;
621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); 621 set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
622 __alignof__(struct set_elem));
622 if (!init_list_set(net, set, size)) 623 if (!init_list_set(net, set, size))
623 return -ENOMEM; 624 return -ENOMEM;
624 if (tb[IPSET_ATTR_TIMEOUT]) { 625 if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1176 struct ip_vs_protocol *pp; 1176 struct ip_vs_protocol *pp;
1177 struct ip_vs_proto_data *pd; 1177 struct ip_vs_proto_data *pd;
1178 struct ip_vs_conn *cp; 1178 struct ip_vs_conn *cp;
1179 struct sock *sk;
1179 1180
1180 EnterFunction(11); 1181 EnterFunction(11);
1181 1182
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1183 if (skb->ipvs_property) 1184 if (skb->ipvs_property)
1184 return NF_ACCEPT; 1185 return NF_ACCEPT;
1185 1186
1187 sk = skb_to_full_sk(skb);
1186 /* Bad... Do not break raw sockets */ 1188 /* Bad... Do not break raw sockets */
1187 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1189 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1188 af == AF_INET)) { 1190 af == AF_INET)) {
1189 struct sock *sk = skb->sk;
1190 struct inet_sock *inet = inet_sk(skb->sk);
1191 1191
1192 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1192 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1193 return NF_ACCEPT; 1193 return NF_ACCEPT;
1194 } 1194 }
1195 1195
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1681 struct ip_vs_conn *cp; 1681 struct ip_vs_conn *cp;
1682 int ret, pkts; 1682 int ret, pkts;
1683 int conn_reuse_mode; 1683 int conn_reuse_mode;
1684 struct sock *sk;
1684 1685
1685 /* Already marked as IPVS request or reply? */ 1686 /* Already marked as IPVS request or reply? */
1686 if (skb->ipvs_property) 1687 if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1708 ip_vs_fill_iph_skb(af, skb, false, &iph); 1709 ip_vs_fill_iph_skb(af, skb, false, &iph);
1709 1710
1710 /* Bad... Do not break raw sockets */ 1711 /* Bad... Do not break raw sockets */
1711 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1712 sk = skb_to_full_sk(skb);
1713 if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
1712 af == AF_INET)) { 1714 af == AF_INET)) {
1713 struct sock *sk = skb->sk;
1714 struct inet_sock *inet = inet_sk(skb->sk);
1715 1715
1716 if (inet && sk->sk_family == PF_INET && inet->nodefrag) 1716 if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
1717 return NF_ACCEPT; 1717 return NF_ACCEPT;
1718 } 1718 }
1719 1719
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
825 struct net *net = sock_net(ctnl); 825 struct net *net = sock_net(ctnl);
826 struct nfnl_log_net *log = nfnl_log_pernet(net); 826 struct nfnl_log_net *log = nfnl_log_pernet(net);
827 int ret = 0; 827 int ret = 0;
828 u16 flags; 828 u16 flags = 0;
829 829
830 if (nfula[NFULA_CFG_CMD]) { 830 if (nfula[NFULA_CFG_CMD]) {
831 u_int8_t pf = nfmsg->nfgen_family; 831 u_int8_t pf = nfmsg->nfgen_family;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
47 local_bh_enable(); 47 local_bh_enable();
48} 48}
49 49
50static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) 50static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
51 struct nft_counter *total)
51{ 52{
52 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); 53 const struct nft_counter_percpu *cpu_stats;
53 struct nft_counter_percpu *cpu_stats;
54 struct nft_counter total;
55 u64 bytes, packets; 54 u64 bytes, packets;
56 unsigned int seq; 55 unsigned int seq;
57 int cpu; 56 int cpu;
58 57
59 memset(&total, 0, sizeof(total)); 58 memset(total, 0, sizeof(*total));
60 for_each_possible_cpu(cpu) { 59 for_each_possible_cpu(cpu) {
61 cpu_stats = per_cpu_ptr(priv->counter, cpu); 60 cpu_stats = per_cpu_ptr(counter, cpu);
62 do { 61 do {
63 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 62 seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
64 bytes = cpu_stats->counter.bytes; 63 bytes = cpu_stats->counter.bytes;
65 packets = cpu_stats->counter.packets; 64 packets = cpu_stats->counter.packets;
66 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); 65 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
67 66
68 total.packets += packets; 67 total->packets += packets;
69 total.bytes += bytes; 68 total->bytes += bytes;
70 } 69 }
70}
71
72static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
73{
74 struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
75 struct nft_counter total;
76
77 nft_counter_fetch(priv->counter, &total);
71 78
72 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || 79 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
73 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) 80 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
118 free_percpu(priv->counter); 125 free_percpu(priv->counter);
119} 126}
120 127
128static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
129{
130 struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
131 struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
132 struct nft_counter_percpu __percpu *cpu_stats;
133 struct nft_counter_percpu *this_cpu;
134 struct nft_counter total;
135
136 nft_counter_fetch(priv->counter, &total);
137
138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
139 GFP_ATOMIC);
140 if (cpu_stats == NULL)
141 return ENOMEM;
142
143 preempt_disable();
144 this_cpu = this_cpu_ptr(cpu_stats);
145 this_cpu->counter.packets = total.packets;
146 this_cpu->counter.bytes = total.bytes;
147 preempt_enable();
148
149 priv_clone->counter = cpu_stats;
150 return 0;
151}
152
121static struct nft_expr_type nft_counter_type; 153static struct nft_expr_type nft_counter_type;
122static const struct nft_expr_ops nft_counter_ops = { 154static const struct nft_expr_ops nft_counter_ops = {
123 .type = &nft_counter_type, 155 .type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
126 .init = nft_counter_init, 158 .init = nft_counter_init,
127 .destroy = nft_counter_destroy, 159 .destroy = nft_counter_destroy,
128 .dump = nft_counter_dump, 160 .dump = nft_counter_dump,
161 .clone = nft_counter_clone,
129}; 162};
130 163
131static struct nft_expr_type nft_counter_type __read_mostly = { 164static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
50 } 50 }
51 51
52 ext = nft_set_elem_ext(set, elem); 52 ext = nft_set_elem_ext(set, elem);
53 if (priv->expr != NULL) 53 if (priv->expr != NULL &&
54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr); 54 nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
55 return NULL;
55 56
56 return elem; 57 return elem;
57} 58}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af399cac5205..1cf928fb573e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
1741 kfree_rcu(po->rollover, rcu); 1741 kfree_rcu(po->rollover, rcu);
1742} 1742}
1743 1743
1744static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1745 struct sk_buff *skb)
1746{
1747 /* Earlier code assumed this would be a VLAN pkt, double-check
1748 * this now that we have the actual packet in hand. We can only
1749 * do this check on Ethernet devices.
1750 */
1751 if (unlikely(dev->type != ARPHRD_ETHER))
1752 return false;
1753
1754 skb_reset_mac_header(skb);
1755 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1756}
1757
1744static const struct proto_ops packet_ops; 1758static const struct proto_ops packet_ops;
1745 1759
1746static const struct proto_ops packet_ops_spkt; 1760static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
1902 goto retry; 1916 goto retry;
1903 } 1917 }
1904 1918
1905 if (len > (dev->mtu + dev->hard_header_len + extra_len)) { 1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1906 /* Earlier code assumed this would be a VLAN pkt, 1920 !packet_extra_vlan_len_allowed(dev, skb)) {
1907 * double-check this now that we have the actual 1921 err = -EMSGSIZE;
1908 * packet in hand. 1922 goto out_unlock;
1909 */
1910 struct ethhdr *ehdr;
1911 skb_reset_mac_header(skb);
1912 ehdr = eth_hdr(skb);
1913 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1914 err = -EMSGSIZE;
1915 goto out_unlock;
1916 }
1917 } 1923 }
1918 1924
1919 skb->protocol = proto; 1925 skb->protocol = proto;
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
2332 return false; 2338 return false;
2333} 2339}
2334 2340
2341static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb)
2343{
2344 if (dev->type == ARPHRD_ETHER) {
2345 skb_reset_mac_header(skb);
2346 skb->protocol = eth_hdr(skb)->h_proto;
2347 }
2348}
2349
2335static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2336 void *frame, struct net_device *dev, int size_max, 2351 void *frame, struct net_device *dev, int size_max,
2337 __be16 proto, unsigned char *addr, int hlen) 2352 __be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2368 skb_reserve(skb, hlen); 2383 skb_reserve(skb, hlen);
2369 skb_reset_network_header(skb); 2384 skb_reset_network_header(skb);
2370 2385
2371 if (!packet_use_direct_xmit(po))
2372 skb_probe_transport_header(skb, 0);
2373 if (unlikely(po->tp_tx_has_off)) { 2386 if (unlikely(po->tp_tx_has_off)) {
2374 int off_min, off_max, off; 2387 int off_min, off_max, off;
2375 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); 2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2415 dev->hard_header_len); 2428 dev->hard_header_len);
2416 if (unlikely(err)) 2429 if (unlikely(err))
2417 return err; 2430 return err;
2431 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb);
2418 2433
2419 data += dev->hard_header_len; 2434 data += dev->hard_header_len;
2420 to_write -= dev->hard_header_len; 2435 to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2449 len = ((to_write > len_max) ? len_max : to_write); 2464 len = ((to_write > len_max) ? len_max : to_write);
2450 } 2465 }
2451 2466
2467 skb_probe_transport_header(skb, 0);
2468
2452 return tp_len; 2469 return tp_len;
2453} 2470}
2454 2471
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2493 if (unlikely(!(dev->flags & IFF_UP))) 2510 if (unlikely(!(dev->flags & IFF_UP)))
2494 goto out_put; 2511 goto out_put;
2495 2512
2496 reserve = dev->hard_header_len + VLAN_HLEN; 2513 if (po->sk.sk_socket->type == SOCK_RAW)
2514 reserve = dev->hard_header_len;
2497 size_max = po->tx_ring.frame_size 2515 size_max = po->tx_ring.frame_size
2498 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2499 2517
2500 if (size_max > dev->mtu + reserve) 2518 if (size_max > dev->mtu + reserve + VLAN_HLEN)
2501 size_max = dev->mtu + reserve; 2519 size_max = dev->mtu + reserve + VLAN_HLEN;
2502 2520
2503 do { 2521 do {
2504 ph = packet_current_frame(po, &po->tx_ring, 2522 ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2525 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2526 addr, hlen); 2544 addr, hlen);
2527 if (likely(tp_len >= 0) && 2545 if (likely(tp_len >= 0) &&
2528 tp_len > dev->mtu + dev->hard_header_len) { 2546 tp_len > dev->mtu + reserve &&
2529 struct ethhdr *ehdr; 2547 !packet_extra_vlan_len_allowed(dev, skb))
2530 /* Earlier code assumed this would be a VLAN pkt, 2548 tp_len = -EMSGSIZE;
2531 * double-check this now that we have the actual
2532 * packet in hand.
2533 */
2534 2549
2535 skb_reset_mac_header(skb);
2536 ehdr = eth_hdr(skb);
2537 if (ehdr->h_proto != htons(ETH_P_8021Q))
2538 tp_len = -EMSGSIZE;
2539 }
2540 if (unlikely(tp_len < 0)) { 2550 if (unlikely(tp_len < 0)) {
2541 if (po->tp_loss) { 2551 if (po->tp_loss) {
2542 __packet_set_status(po, ph, 2552 __packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2765 2775
2766 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2767 2777
2768 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
2769 /* Earlier code assumed this would be a VLAN pkt, 2779 !packet_extra_vlan_len_allowed(dev, skb)) {
2770 * double-check this now that we have the actual 2780 err = -EMSGSIZE;
2771 * packet in hand. 2781 goto out_free;
2772 */
2773 struct ethhdr *ehdr;
2774 skb_reset_mac_header(skb);
2775 ehdr = eth_hdr(skb);
2776 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2777 err = -EMSGSIZE;
2778 goto out_free;
2779 }
2780 } 2782 }
2781 2783
2782 skb->protocol = proto; 2784 skb->protocol = proto;
@@ -2807,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2807 len += vnet_hdr_len; 2809 len += vnet_hdr_len;
2808 } 2810 }
2809 2811
2810 if (!packet_use_direct_xmit(po)) 2812 skb_probe_transport_header(skb, reserve);
2811 skb_probe_transport_header(skb, reserve); 2813
2812 if (unlikely(extra_len == 4)) 2814 if (unlikely(extra_len == 4))
2813 skb->no_fcs = 1; 2815 skb->no_fcs = 1;
2814 2816
@@ -4107,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4107 err = -EINVAL; 4109 err = -EINVAL;
4108 if (unlikely((int)req->tp_block_size <= 0)) 4110 if (unlikely((int)req->tp_block_size <= 0))
4109 goto out; 4111 goto out;
4110 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 4112 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4111 goto out; 4113 goto out;
4112 if (po->tp_version >= TPACKET_V3 && 4114 if (po->tp_version >= TPACKET_V3 &&
4113 (int)(req->tp_block_size - 4115 (int)(req->tp_block_size -
@@ -4119,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4119 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 4121 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4120 goto out; 4122 goto out;
4121 4123
4122 rb->frames_per_block = req->tp_block_size/req->tp_frame_size; 4124 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4123 if (unlikely(rb->frames_per_block <= 0)) 4125 if (unlikely(rb->frames_per_block == 0))
4124 goto out; 4126 goto out;
4125 if (unlikely((rb->frames_per_block * req->tp_block_nr) != 4127 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4126 req->tp_frame_nr)) 4128 req->tp_frame_nr))
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
809 if (!has_sha1) 809 if (!has_sha1)
810 return -EINVAL; 810 return -EINVAL;
811 811
812 memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], 812 for (i = 0; i < hmacs->shmac_num_idents; i++)
813 hmacs->shmac_num_idents * sizeof(__u16)); 813 ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + 814 ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
815 hmacs->shmac_num_idents * sizeof(__u16)); 815 hmacs->shmac_num_idents * sizeof(__u16));
816 return 0; 816 return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..955ec152cb71 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -441,6 +441,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
441 if (state == TCP_LISTEN) 441 if (state == TCP_LISTEN)
442 unix_release_sock(skb->sk, 1); 442 unix_release_sock(skb->sk, 1);
443 /* passed fds are erased in the kfree_skb hook */ 443 /* passed fds are erased in the kfree_skb hook */
444 UNIXCB(skb).consumed = skb->len;
444 kfree_skb(skb); 445 kfree_skb(skb);
445 } 446 }
446 447
@@ -1799,6 +1800,7 @@ alloc_skb:
1799 * this - does no harm 1800 * this - does no harm
1800 */ 1801 */
1801 consume_skb(newskb); 1802 consume_skb(newskb);
1803 newskb = NULL;
1802 } 1804 }
1803 1805
1804 if (skb_append_pagefrags(skb, page, offset, size)) { 1806 if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,8 +1813,11 @@ alloc_skb:
1811 skb->truesize += size; 1813 skb->truesize += size;
1812 atomic_add(size, &sk->sk_wmem_alloc); 1814 atomic_add(size, &sk->sk_wmem_alloc);
1813 1815
1814 if (newskb) 1816 if (newskb) {
1817 spin_lock(&other->sk_receive_queue.lock);
1815 __skb_queue_tail(&other->sk_receive_queue, newskb); 1818 __skb_queue_tail(&other->sk_receive_queue, newskb);
1819 spin_unlock(&other->sk_receive_queue.lock);
1820 }
1816 1821
1817 unix_state_unlock(other); 1822 unix_state_unlock(other);
1818 mutex_unlock(&unix_sk(other)->readlock); 1823 mutex_unlock(&unix_sk(other)->readlock);
@@ -2072,6 +2077,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2072 2077
2073 do { 2078 do {
2074 int chunk; 2079 int chunk;
2080 bool drop_skb;
2075 struct sk_buff *skb, *last; 2081 struct sk_buff *skb, *last;
2076 2082
2077 unix_state_lock(sk); 2083 unix_state_lock(sk);
@@ -2152,7 +2158,11 @@ unlock:
2152 } 2158 }
2153 2159
2154 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2160 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2161 skb_get(skb);
2155 chunk = state->recv_actor(skb, skip, chunk, state); 2162 chunk = state->recv_actor(skb, skip, chunk, state);
2163 drop_skb = !unix_skb_len(skb);
2164 /* skb is only safe to use if !drop_skb */
2165 consume_skb(skb);
2156 if (chunk < 0) { 2166 if (chunk < 0) {
2157 if (copied == 0) 2167 if (copied == 0)
2158 copied = -EFAULT; 2168 copied = -EFAULT;
@@ -2161,6 +2171,18 @@ unlock:
2161 copied += chunk; 2171 copied += chunk;
2162 size -= chunk; 2172 size -= chunk;
2163 2173
2174 if (drop_skb) {
2175 /* the skb was touched by a concurrent reader;
2176 * we should not expect anything from this skb
2177 * anymore and assume it invalid - we can be
2178 * sure it was dropped from the socket queue
2179 *
2180 * let's report a short read
2181 */
2182 err = 0;
2183 break;
2184 }
2185
2164 /* Mark read part of skb as used */ 2186 /* Mark read part of skb as used */
2165 if (!(flags & MSG_PEEK)) { 2187 if (!(flags & MSG_PEEK)) {
2166 UNIXCB(skb).consumed += chunk; 2188 UNIXCB(skb).consumed += chunk;