aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c48
-rw-r--r--net/core/devlink.c738
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c168
-rw-r--r--net/core/ethtool.c638
-rw-r--r--net/core/filter.c292
-rw-r--r--net/core/flow_dissector.c58
-rw-r--r--net/core/gen_estimator.c2
-rw-r--r--net/core/gen_stats.c1
-rw-r--r--net/core/hwbm.c87
-rw-r--r--net/core/lwtunnel.c37
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c144
-rw-r--r--net/core/skbuff.c194
-rw-r--r--net/core/sock.c16
19 files changed, 2247 insertions, 213 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..d6508c2ddca5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,6 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o 25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o 26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
27obj-$(CONFIG_DST_CACHE) += dst_cache.o
28obj-$(CONFIG_HWBM) += hwbm.o
29obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 8cba3d852f25..5c925ac50b95 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
2802 2802
2803 if (skb->ip_summed != CHECKSUM_NONE && 2803 if (skb->ip_summed != CHECKSUM_NONE &&
2804 !can_checksum_protocol(features, type)) { 2804 !can_checksum_protocol(features, type)) {
2805 features &= ~NETIF_F_CSUM_MASK; 2805 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2806 } else if (illegal_highdma(skb->dev, skb)) { 2806 } else if (illegal_highdma(skb->dev, skb)) {
2807 features &= ~NETIF_F_SG; 2807 features &= ~NETIF_F_SG;
2808 } 2808 }
@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
3829 trace_consume_skb(skb); 3829 trace_consume_skb(skb);
3830 else 3830 else
3831 trace_kfree_skb(skb, net_tx_action); 3831 trace_kfree_skb(skb, net_tx_action);
3832 __kfree_skb(skb); 3832
3833 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3834 __kfree_skb(skb);
3835 else
3836 __kfree_skb_defer(skb);
3833 } 3837 }
3838
3839 __kfree_skb_flush();
3834 } 3840 }
3835 3841
3836 if (sd->output_queue) { 3842 if (sd->output_queue) {
@@ -4154,7 +4160,10 @@ ncls:
4154 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 4160 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4155 } else { 4161 } else {
4156drop: 4162drop:
4157 atomic_long_inc(&skb->dev->rx_dropped); 4163 if (!deliver_exact)
4164 atomic_long_inc(&skb->dev->rx_dropped);
4165 else
4166 atomic_long_inc(&skb->dev->rx_nohandler);
4158 kfree_skb(skb); 4167 kfree_skb(skb);
4159 /* Jamal, now you will not able to escape explaining 4168 /* Jamal, now you will not able to escape explaining
4160 * me how you were going to use this. :-) 4169 * me how you were going to use this. :-)
@@ -4429,7 +4438,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4429 NAPI_GRO_CB(skb)->same_flow = 0; 4438 NAPI_GRO_CB(skb)->same_flow = 0;
4430 NAPI_GRO_CB(skb)->flush = 0; 4439 NAPI_GRO_CB(skb)->flush = 0;
4431 NAPI_GRO_CB(skb)->free = 0; 4440 NAPI_GRO_CB(skb)->free = 0;
4432 NAPI_GRO_CB(skb)->udp_mark = 0; 4441 NAPI_GRO_CB(skb)->encap_mark = 0;
4442 NAPI_GRO_CB(skb)->is_fou = 0;
4433 NAPI_GRO_CB(skb)->gro_remcsum_start = 0; 4443 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4434 4444
4435 /* Setup for GRO checksum validation */ 4445 /* Setup for GRO checksum validation */
@@ -5152,6 +5162,7 @@ static void net_rx_action(struct softirq_action *h)
5152 } 5162 }
5153 } 5163 }
5154 5164
5165 __kfree_skb_flush();
5155 local_irq_disable(); 5166 local_irq_disable();
5156 5167
5157 list_splice_tail_init(&sd->poll_list, &list); 5168 list_splice_tail_init(&sd->poll_list, &list);
@@ -5379,12 +5390,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5379{ 5390{
5380 struct netdev_adjacent *lower; 5391 struct netdev_adjacent *lower;
5381 5392
5382 lower = list_entry((*iter)->next, struct netdev_adjacent, list); 5393 lower = list_entry(*iter, struct netdev_adjacent, list);
5383 5394
5384 if (&lower->list == &dev->adj_list.lower) 5395 if (&lower->list == &dev->adj_list.lower)
5385 return NULL; 5396 return NULL;
5386 5397
5387 *iter = &lower->list; 5398 *iter = lower->list.next;
5388 5399
5389 return lower->dev; 5400 return lower->dev;
5390} 5401}
@@ -6435,6 +6446,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id);
6435 * dev_get_phys_port_name - Get device physical port name 6446 * dev_get_phys_port_name - Get device physical port name
6436 * @dev: device 6447 * @dev: device
6437 * @name: port name 6448 * @name: port name
6449 * @len: limit of bytes to copy to name
6438 * 6450 *
6439 * Get device physical port name 6451 * Get device physical port name
6440 */ 6452 */
@@ -7253,24 +7265,31 @@ void netdev_run_todo(void)
7253 } 7265 }
7254} 7266}
7255 7267
7256/* Convert net_device_stats to rtnl_link_stats64. They have the same 7268/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
7257 * fields in the same order, with only the type differing. 7269 * all the same fields in the same order as net_device_stats, with only
7270 * the type differing, but rtnl_link_stats64 may have additional fields
7271 * at the end for newer counters.
7258 */ 7272 */
7259void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 7273void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7260 const struct net_device_stats *netdev_stats) 7274 const struct net_device_stats *netdev_stats)
7261{ 7275{
7262#if BITS_PER_LONG == 64 7276#if BITS_PER_LONG == 64
7263 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); 7277 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7264 memcpy(stats64, netdev_stats, sizeof(*stats64)); 7278 memcpy(stats64, netdev_stats, sizeof(*stats64));
7279 /* zero out counters that only exist in rtnl_link_stats64 */
7280 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7281 sizeof(*stats64) - sizeof(*netdev_stats));
7265#else 7282#else
7266 size_t i, n = sizeof(*stats64) / sizeof(u64); 7283 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7267 const unsigned long *src = (const unsigned long *)netdev_stats; 7284 const unsigned long *src = (const unsigned long *)netdev_stats;
7268 u64 *dst = (u64 *)stats64; 7285 u64 *dst = (u64 *)stats64;
7269 7286
7270 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != 7287 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7271 sizeof(*stats64) / sizeof(u64));
7272 for (i = 0; i < n; i++) 7288 for (i = 0; i < n; i++)
7273 dst[i] = src[i]; 7289 dst[i] = src[i];
7290 /* zero out counters that only exist in rtnl_link_stats64 */
7291 memset((char *)stats64 + n * sizeof(u64), 0,
7292 sizeof(*stats64) - n * sizeof(u64));
7274#endif 7293#endif
7275} 7294}
7276EXPORT_SYMBOL(netdev_stats_to_stats64); 7295EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -7300,6 +7319,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7300 } 7319 }
7301 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 7320 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7302 storage->tx_dropped += atomic_long_read(&dev->tx_dropped); 7321 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7322 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7303 return storage; 7323 return storage;
7304} 7324}
7305EXPORT_SYMBOL(dev_get_stats); 7325EXPORT_SYMBOL(dev_get_stats);
@@ -7422,8 +7442,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7422 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; 7442 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
7423 setup(dev); 7443 setup(dev);
7424 7444
7425 if (!dev->tx_queue_len) 7445 if (!dev->tx_queue_len) {
7426 dev->priv_flags |= IFF_NO_QUEUE; 7446 dev->priv_flags |= IFF_NO_QUEUE;
7447 dev->tx_queue_len = 1;
7448 }
7427 7449
7428 dev->num_tx_queues = txqs; 7450 dev->num_tx_queues = txqs;
7429 dev->real_num_tx_queues = txqs; 7451 dev->real_num_tx_queues = txqs;
diff --git a/net/core/devlink.c b/net/core/devlink.c
new file mode 100644
index 000000000000..590fa561cb7f
--- /dev/null
+++ b/net/core/devlink.c
@@ -0,0 +1,738 @@
1/*
2 * net/core/devlink.c - Network physical/parent device Netlink interface
3 *
4 * Heavily inspired by net/wireless/
5 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/gfp.h>
19#include <linux/device.h>
20#include <linux/list.h>
21#include <linux/netdevice.h>
22#include <rdma/ib_verbs.h>
23#include <net/netlink.h>
24#include <net/genetlink.h>
25#include <net/rtnetlink.h>
26#include <net/net_namespace.h>
27#include <net/sock.h>
28#include <net/devlink.h>
29
30static LIST_HEAD(devlink_list);
31
32/* devlink_mutex
33 *
34 * An overall lock guarding every operation coming from userspace.
35 * It also guards devlink devices list and it is taken when
36 * driver registers/unregisters it.
37 */
38static DEFINE_MUTEX(devlink_mutex);
39
40/* devlink_port_mutex
41 *
42 * Shared lock to guard lists of ports in all devlink devices.
43 */
44static DEFINE_MUTEX(devlink_port_mutex);
45
46static struct net *devlink_net(const struct devlink *devlink)
47{
48 return read_pnet(&devlink->_net);
49}
50
51static void devlink_net_set(struct devlink *devlink, struct net *net)
52{
53 write_pnet(&devlink->_net, net);
54}
55
56static struct devlink *devlink_get_from_attrs(struct net *net,
57 struct nlattr **attrs)
58{
59 struct devlink *devlink;
60 char *busname;
61 char *devname;
62
63 if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
64 return ERR_PTR(-EINVAL);
65
66 busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
67 devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
68
69 list_for_each_entry(devlink, &devlink_list, list) {
70 if (strcmp(devlink->dev->bus->name, busname) == 0 &&
71 strcmp(dev_name(devlink->dev), devname) == 0 &&
72 net_eq(devlink_net(devlink), net))
73 return devlink;
74 }
75
76 return ERR_PTR(-ENODEV);
77}
78
79static struct devlink *devlink_get_from_info(struct genl_info *info)
80{
81 return devlink_get_from_attrs(genl_info_net(info), info->attrs);
82}
83
84static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
85 int port_index)
86{
87 struct devlink_port *devlink_port;
88
89 list_for_each_entry(devlink_port, &devlink->port_list, list) {
90 if (devlink_port->index == port_index)
91 return devlink_port;
92 }
93 return NULL;
94}
95
96static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
97{
98 return devlink_port_get_by_index(devlink, port_index);
99}
100
101static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
102 struct nlattr **attrs)
103{
104 if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
105 u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
106 struct devlink_port *devlink_port;
107
108 devlink_port = devlink_port_get_by_index(devlink, port_index);
109 if (!devlink_port)
110 return ERR_PTR(-ENODEV);
111 return devlink_port;
112 }
113 return ERR_PTR(-EINVAL);
114}
115
116static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
117 struct genl_info *info)
118{
119 return devlink_port_get_from_attrs(devlink, info->attrs);
120}
121
122#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
123
124static int devlink_nl_pre_doit(const struct genl_ops *ops,
125 struct sk_buff *skb, struct genl_info *info)
126{
127 struct devlink *devlink;
128
129 mutex_lock(&devlink_mutex);
130 devlink = devlink_get_from_info(info);
131 if (IS_ERR(devlink)) {
132 mutex_unlock(&devlink_mutex);
133 return PTR_ERR(devlink);
134 }
135 info->user_ptr[0] = devlink;
136 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
137 struct devlink_port *devlink_port;
138
139 mutex_lock(&devlink_port_mutex);
140 devlink_port = devlink_port_get_from_info(devlink, info);
141 if (IS_ERR(devlink_port)) {
142 mutex_unlock(&devlink_port_mutex);
143 mutex_unlock(&devlink_mutex);
144 return PTR_ERR(devlink_port);
145 }
146 info->user_ptr[1] = devlink_port;
147 }
148 return 0;
149}
150
151static void devlink_nl_post_doit(const struct genl_ops *ops,
152 struct sk_buff *skb, struct genl_info *info)
153{
154 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
155 mutex_unlock(&devlink_port_mutex);
156 mutex_unlock(&devlink_mutex);
157}
158
159static struct genl_family devlink_nl_family = {
160 .id = GENL_ID_GENERATE,
161 .name = DEVLINK_GENL_NAME,
162 .version = DEVLINK_GENL_VERSION,
163 .maxattr = DEVLINK_ATTR_MAX,
164 .netnsok = true,
165 .pre_doit = devlink_nl_pre_doit,
166 .post_doit = devlink_nl_post_doit,
167};
168
169enum devlink_multicast_groups {
170 DEVLINK_MCGRP_CONFIG,
171};
172
173static const struct genl_multicast_group devlink_nl_mcgrps[] = {
174 [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
175};
176
177static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
178{
179 if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
180 return -EMSGSIZE;
181 if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
182 return -EMSGSIZE;
183 return 0;
184}
185
186static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
187 enum devlink_command cmd, u32 portid,
188 u32 seq, int flags)
189{
190 void *hdr;
191
192 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
193 if (!hdr)
194 return -EMSGSIZE;
195
196 if (devlink_nl_put_handle(msg, devlink))
197 goto nla_put_failure;
198
199 genlmsg_end(msg, hdr);
200 return 0;
201
202nla_put_failure:
203 genlmsg_cancel(msg, hdr);
204 return -EMSGSIZE;
205}
206
207static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
208{
209 struct sk_buff *msg;
210 int err;
211
212 WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
213
214 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
215 if (!msg)
216 return;
217
218 err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
219 if (err) {
220 nlmsg_free(msg);
221 return;
222 }
223
224 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
225 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
226}
227
228static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
229 struct devlink_port *devlink_port,
230 enum devlink_command cmd, u32 portid,
231 u32 seq, int flags)
232{
233 void *hdr;
234
235 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
236 if (!hdr)
237 return -EMSGSIZE;
238
239 if (devlink_nl_put_handle(msg, devlink))
240 goto nla_put_failure;
241 if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
242 goto nla_put_failure;
243 if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
244 goto nla_put_failure;
245 if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
246 nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
247 devlink_port->desired_type))
248 goto nla_put_failure;
249 if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
250 struct net_device *netdev = devlink_port->type_dev;
251
252 if (netdev &&
253 (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
254 netdev->ifindex) ||
255 nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
256 netdev->name)))
257 goto nla_put_failure;
258 }
259 if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
260 struct ib_device *ibdev = devlink_port->type_dev;
261
262 if (ibdev &&
263 nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
264 ibdev->name))
265 goto nla_put_failure;
266 }
267 if (devlink_port->split &&
268 nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
269 devlink_port->split_group))
270 goto nla_put_failure;
271
272 genlmsg_end(msg, hdr);
273 return 0;
274
275nla_put_failure:
276 genlmsg_cancel(msg, hdr);
277 return -EMSGSIZE;
278}
279
280static void devlink_port_notify(struct devlink_port *devlink_port,
281 enum devlink_command cmd)
282{
283 struct devlink *devlink = devlink_port->devlink;
284 struct sk_buff *msg;
285 int err;
286
287 if (!devlink_port->registered)
288 return;
289
290 WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
291
292 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
293 if (!msg)
294 return;
295
296 err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
297 if (err) {
298 nlmsg_free(msg);
299 return;
300 }
301
302 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
303 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
304}
305
306static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
307{
308 struct devlink *devlink = info->user_ptr[0];
309 struct sk_buff *msg;
310 int err;
311
312 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
313 if (!msg)
314 return -ENOMEM;
315
316 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
317 info->snd_portid, info->snd_seq, 0);
318 if (err) {
319 nlmsg_free(msg);
320 return err;
321 }
322
323 return genlmsg_reply(msg, info);
324}
325
326static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
327 struct netlink_callback *cb)
328{
329 struct devlink *devlink;
330 int start = cb->args[0];
331 int idx = 0;
332 int err;
333
334 mutex_lock(&devlink_mutex);
335 list_for_each_entry(devlink, &devlink_list, list) {
336 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
337 continue;
338 if (idx < start) {
339 idx++;
340 continue;
341 }
342 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
343 NETLINK_CB(cb->skb).portid,
344 cb->nlh->nlmsg_seq, NLM_F_MULTI);
345 if (err)
346 goto out;
347 idx++;
348 }
349out:
350 mutex_unlock(&devlink_mutex);
351
352 cb->args[0] = idx;
353 return msg->len;
354}
355
356static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
357 struct genl_info *info)
358{
359 struct devlink *devlink = info->user_ptr[0];
360 struct devlink_port *devlink_port = info->user_ptr[1];
361 struct sk_buff *msg;
362 int err;
363
364 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
365 if (!msg)
366 return -ENOMEM;
367
368 err = devlink_nl_port_fill(msg, devlink, devlink_port,
369 DEVLINK_CMD_PORT_NEW,
370 info->snd_portid, info->snd_seq, 0);
371 if (err) {
372 nlmsg_free(msg);
373 return err;
374 }
375
376 return genlmsg_reply(msg, info);
377}
378
379static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
380 struct netlink_callback *cb)
381{
382 struct devlink *devlink;
383 struct devlink_port *devlink_port;
384 int start = cb->args[0];
385 int idx = 0;
386 int err;
387
388 mutex_lock(&devlink_mutex);
389 mutex_lock(&devlink_port_mutex);
390 list_for_each_entry(devlink, &devlink_list, list) {
391 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
392 continue;
393 list_for_each_entry(devlink_port, &devlink->port_list, list) {
394 if (idx < start) {
395 idx++;
396 continue;
397 }
398 err = devlink_nl_port_fill(msg, devlink, devlink_port,
399 DEVLINK_CMD_NEW,
400 NETLINK_CB(cb->skb).portid,
401 cb->nlh->nlmsg_seq,
402 NLM_F_MULTI);
403 if (err)
404 goto out;
405 idx++;
406 }
407 }
408out:
409 mutex_unlock(&devlink_port_mutex);
410 mutex_unlock(&devlink_mutex);
411
412 cb->args[0] = idx;
413 return msg->len;
414}
415
416static int devlink_port_type_set(struct devlink *devlink,
417 struct devlink_port *devlink_port,
418 enum devlink_port_type port_type)
419
420{
421 int err;
422
423 if (devlink->ops && devlink->ops->port_type_set) {
424 if (port_type == DEVLINK_PORT_TYPE_NOTSET)
425 return -EINVAL;
426 err = devlink->ops->port_type_set(devlink_port, port_type);
427 if (err)
428 return err;
429 devlink_port->desired_type = port_type;
430 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
431 return 0;
432 }
433 return -EOPNOTSUPP;
434}
435
436static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
437 struct genl_info *info)
438{
439 struct devlink *devlink = info->user_ptr[0];
440 struct devlink_port *devlink_port = info->user_ptr[1];
441 int err;
442
443 if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
444 enum devlink_port_type port_type;
445
446 port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
447 err = devlink_port_type_set(devlink, devlink_port, port_type);
448 if (err)
449 return err;
450 }
451 return 0;
452}
453
454static int devlink_port_split(struct devlink *devlink,
455 u32 port_index, u32 count)
456
457{
458 if (devlink->ops && devlink->ops->port_split)
459 return devlink->ops->port_split(devlink, port_index, count);
460 return -EOPNOTSUPP;
461}
462
463static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
464 struct genl_info *info)
465{
466 struct devlink *devlink = info->user_ptr[0];
467 u32 port_index;
468 u32 count;
469
470 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
471 !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
472 return -EINVAL;
473
474 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
475 count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
476 return devlink_port_split(devlink, port_index, count);
477}
478
479static int devlink_port_unsplit(struct devlink *devlink, u32 port_index)
480
481{
482 if (devlink->ops && devlink->ops->port_unsplit)
483 return devlink->ops->port_unsplit(devlink, port_index);
484 return -EOPNOTSUPP;
485}
486
487static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
488 struct genl_info *info)
489{
490 struct devlink *devlink = info->user_ptr[0];
491 u32 port_index;
492
493 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
494 return -EINVAL;
495
496 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
497 return devlink_port_unsplit(devlink, port_index);
498}
499
500static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
501 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
502 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
503 [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
504 [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
505 [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
506};
507
508static const struct genl_ops devlink_nl_ops[] = {
509 {
510 .cmd = DEVLINK_CMD_GET,
511 .doit = devlink_nl_cmd_get_doit,
512 .dumpit = devlink_nl_cmd_get_dumpit,
513 .policy = devlink_nl_policy,
514 /* can be retrieved by unprivileged users */
515 },
516 {
517 .cmd = DEVLINK_CMD_PORT_GET,
518 .doit = devlink_nl_cmd_port_get_doit,
519 .dumpit = devlink_nl_cmd_port_get_dumpit,
520 .policy = devlink_nl_policy,
521 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
522 /* can be retrieved by unprivileged users */
523 },
524 {
525 .cmd = DEVLINK_CMD_PORT_SET,
526 .doit = devlink_nl_cmd_port_set_doit,
527 .policy = devlink_nl_policy,
528 .flags = GENL_ADMIN_PERM,
529 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
530 },
531 {
532 .cmd = DEVLINK_CMD_PORT_SPLIT,
533 .doit = devlink_nl_cmd_port_split_doit,
534 .policy = devlink_nl_policy,
535 .flags = GENL_ADMIN_PERM,
536 },
537 {
538 .cmd = DEVLINK_CMD_PORT_UNSPLIT,
539 .doit = devlink_nl_cmd_port_unsplit_doit,
540 .policy = devlink_nl_policy,
541 .flags = GENL_ADMIN_PERM,
542 },
543};
544
545/**
546 * devlink_alloc - Allocate new devlink instance resources
547 *
548 * @ops: ops
549 * @priv_size: size of user private data
550 *
551 * Allocate new devlink instance resources, including devlink index
552 * and name.
553 */
554struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
555{
556 struct devlink *devlink;
557
558 devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
559 if (!devlink)
560 return NULL;
561 devlink->ops = ops;
562 devlink_net_set(devlink, &init_net);
563 INIT_LIST_HEAD(&devlink->port_list);
564 return devlink;
565}
566EXPORT_SYMBOL_GPL(devlink_alloc);
567
568/**
569 * devlink_register - Register devlink instance
570 *
571 * @devlink: devlink
572 */
573int devlink_register(struct devlink *devlink, struct device *dev)
574{
575 mutex_lock(&devlink_mutex);
576 devlink->dev = dev;
577 list_add_tail(&devlink->list, &devlink_list);
578 devlink_notify(devlink, DEVLINK_CMD_NEW);
579 mutex_unlock(&devlink_mutex);
580 return 0;
581}
582EXPORT_SYMBOL_GPL(devlink_register);
583
584/**
585 * devlink_unregister - Unregister devlink instance
586 *
587 * @devlink: devlink
588 */
589void devlink_unregister(struct devlink *devlink)
590{
591 mutex_lock(&devlink_mutex);
592 devlink_notify(devlink, DEVLINK_CMD_DEL);
593 list_del(&devlink->list);
594 mutex_unlock(&devlink_mutex);
595}
596EXPORT_SYMBOL_GPL(devlink_unregister);
597
598/**
599 * devlink_free - Free devlink instance resources
600 *
601 * @devlink: devlink
602 */
603void devlink_free(struct devlink *devlink)
604{
605 kfree(devlink);
606}
607EXPORT_SYMBOL_GPL(devlink_free);
608
609/**
610 * devlink_port_register - Register devlink port
611 *
612 * @devlink: devlink
613 * @devlink_port: devlink port
614 * @port_index
615 *
616 * Register devlink port with provided port index. User can use
617 * any indexing, even hw-related one. devlink_port structure
618 * is convenient to be embedded inside user driver private structure.
619 * Note that the caller should take care of zeroing the devlink_port
620 * structure.
621 */
622int devlink_port_register(struct devlink *devlink,
623 struct devlink_port *devlink_port,
624 unsigned int port_index)
625{
626 mutex_lock(&devlink_port_mutex);
627 if (devlink_port_index_exists(devlink, port_index)) {
628 mutex_unlock(&devlink_port_mutex);
629 return -EEXIST;
630 }
631 devlink_port->devlink = devlink;
632 devlink_port->index = port_index;
633 devlink_port->type = DEVLINK_PORT_TYPE_NOTSET;
634 devlink_port->registered = true;
635 list_add_tail(&devlink_port->list, &devlink->port_list);
636 mutex_unlock(&devlink_port_mutex);
637 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
638 return 0;
639}
640EXPORT_SYMBOL_GPL(devlink_port_register);
641
642/**
643 * devlink_port_unregister - Unregister devlink port
644 *
645 * @devlink_port: devlink port
646 */
647void devlink_port_unregister(struct devlink_port *devlink_port)
648{
649 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
650 mutex_lock(&devlink_port_mutex);
651 list_del(&devlink_port->list);
652 mutex_unlock(&devlink_port_mutex);
653}
654EXPORT_SYMBOL_GPL(devlink_port_unregister);
655
656static void __devlink_port_type_set(struct devlink_port *devlink_port,
657 enum devlink_port_type type,
658 void *type_dev)
659{
660 devlink_port->type = type;
661 devlink_port->type_dev = type_dev;
662 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
663}
664
665/**
666 * devlink_port_type_eth_set - Set port type to Ethernet
667 *
668 * @devlink_port: devlink port
669 * @netdev: related netdevice
670 */
671void devlink_port_type_eth_set(struct devlink_port *devlink_port,
672 struct net_device *netdev)
673{
674 return __devlink_port_type_set(devlink_port,
675 DEVLINK_PORT_TYPE_ETH, netdev);
676}
677EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
678
679/**
680 * devlink_port_type_ib_set - Set port type to InfiniBand
681 *
682 * @devlink_port: devlink port
683 * @ibdev: related IB device
684 */
685void devlink_port_type_ib_set(struct devlink_port *devlink_port,
686 struct ib_device *ibdev)
687{
688 return __devlink_port_type_set(devlink_port,
689 DEVLINK_PORT_TYPE_IB, ibdev);
690}
691EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
692
693/**
694 * devlink_port_type_clear - Clear port type
695 *
696 * @devlink_port: devlink port
697 */
698void devlink_port_type_clear(struct devlink_port *devlink_port)
699{
700 return __devlink_port_type_set(devlink_port,
701 DEVLINK_PORT_TYPE_NOTSET, NULL);
702}
703EXPORT_SYMBOL_GPL(devlink_port_type_clear);
704
705/**
706 * devlink_port_split_set - Set port is split
707 *
708 * @devlink_port: devlink port
709 * @split_group: split group - identifies group split port is part of
710 */
711void devlink_port_split_set(struct devlink_port *devlink_port,
712 u32 split_group)
713{
714 devlink_port->split = true;
715 devlink_port->split_group = split_group;
716 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
717}
718EXPORT_SYMBOL_GPL(devlink_port_split_set);
719
720static int __init devlink_module_init(void)
721{
722 return genl_register_family_with_ops_groups(&devlink_nl_family,
723 devlink_nl_ops,
724 devlink_nl_mcgrps);
725}
726
727static void __exit devlink_module_exit(void)
728{
729 genl_unregister_family(&devlink_nl_family);
730}
731
732module_init(devlink_module_init);
733module_exit(devlink_module_exit);
734
735MODULE_LICENSE("GPL v2");
736MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
737MODULE_DESCRIPTION("Network physical device Netlink interface");
738MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
diff --git a/net/core/dst.c b/net/core/dst.c
index a1656e3b8d72..b5cbbe07f786 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -265,7 +265,7 @@ again:
265 lwtstate_put(dst->lwtstate); 265 lwtstate_put(dst->lwtstate);
266 266
267 if (dst->flags & DST_METADATA) 267 if (dst->flags & DST_METADATA)
268 kfree(dst); 268 metadata_dst_free((struct metadata_dst *)dst);
269 else 269 else
270 kmem_cache_free(dst->ops->kmem_cachep, dst); 270 kmem_cache_free(dst->ops->kmem_cachep, dst);
271 271
@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
395} 395}
396EXPORT_SYMBOL_GPL(metadata_dst_alloc); 396EXPORT_SYMBOL_GPL(metadata_dst_alloc);
397 397
398void metadata_dst_free(struct metadata_dst *md_dst)
399{
400#ifdef CONFIG_DST_CACHE
401 dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
402#endif
403 kfree(md_dst);
404}
405
398struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) 406struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
399{ 407{
400 int cpu; 408 int cpu;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
new file mode 100644
index 000000000000..554d36449231
--- /dev/null
+++ b/net/core/dst_cache.c
@@ -0,0 +1,168 @@
1/*
2 * net/core/dst_cache.c - dst entry cache
3 *
4 * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/percpu.h>
14#include <net/dst_cache.h>
15#include <net/route.h>
16#if IS_ENABLED(CONFIG_IPV6)
17#include <net/ip6_fib.h>
18#endif
19#include <uapi/linux/in.h>
20
21struct dst_cache_pcpu {
22 unsigned long refresh_ts;
23 struct dst_entry *dst;
24 u32 cookie;
25 union {
26 struct in_addr in_saddr;
27 struct in6_addr in6_saddr;
28 };
29};
30
31static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
32 struct dst_entry *dst, u32 cookie)
33{
34 dst_release(dst_cache->dst);
35 if (dst)
36 dst_hold(dst);
37
38 dst_cache->cookie = cookie;
39 dst_cache->dst = dst;
40}
41
42static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
43 struct dst_cache_pcpu *idst)
44{
45 struct dst_entry *dst;
46
47 dst = idst->dst;
48 if (!dst)
49 goto fail;
50
51 /* the cache already hold a dst reference; it can't go away */
52 dst_hold(dst);
53
54 if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
55 (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
56 dst_cache_per_cpu_dst_set(idst, NULL, 0);
57 dst_release(dst);
58 goto fail;
59 }
60 return dst;
61
62fail:
63 idst->refresh_ts = jiffies;
64 return NULL;
65}
66
67struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
68{
69 if (!dst_cache->cache)
70 return NULL;
71
72 return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
73}
74EXPORT_SYMBOL_GPL(dst_cache_get);
75
76struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
77{
78 struct dst_cache_pcpu *idst;
79 struct dst_entry *dst;
80
81 if (!dst_cache->cache)
82 return NULL;
83
84 idst = this_cpu_ptr(dst_cache->cache);
85 dst = dst_cache_per_cpu_get(dst_cache, idst);
86 if (!dst)
87 return NULL;
88
89 *saddr = idst->in_saddr.s_addr;
90 return container_of(dst, struct rtable, dst);
91}
92EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
93
94void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
95 __be32 saddr)
96{
97 struct dst_cache_pcpu *idst;
98
99 if (!dst_cache->cache)
100 return;
101
102 idst = this_cpu_ptr(dst_cache->cache);
103 dst_cache_per_cpu_dst_set(idst, dst, 0);
104 idst->in_saddr.s_addr = saddr;
105}
106EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
107
108#if IS_ENABLED(CONFIG_IPV6)
109void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
110 const struct in6_addr *addr)
111{
112 struct dst_cache_pcpu *idst;
113
114 if (!dst_cache->cache)
115 return;
116
117 idst = this_cpu_ptr(dst_cache->cache);
118 dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
119 rt6_get_cookie((struct rt6_info *)dst));
120 idst->in6_saddr = *addr;
121}
122EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
123
124struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
125 struct in6_addr *saddr)
126{
127 struct dst_cache_pcpu *idst;
128 struct dst_entry *dst;
129
130 if (!dst_cache->cache)
131 return NULL;
132
133 idst = this_cpu_ptr(dst_cache->cache);
134 dst = dst_cache_per_cpu_get(dst_cache, idst);
135 if (!dst)
136 return NULL;
137
138 *saddr = idst->in6_saddr;
139 return dst;
140}
141EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
142#endif
143
144int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
145{
146 dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
147 gfp | __GFP_ZERO);
148 if (!dst_cache->cache)
149 return -ENOMEM;
150
151 dst_cache_reset(dst_cache);
152 return 0;
153}
154EXPORT_SYMBOL_GPL(dst_cache_init);
155
156void dst_cache_destroy(struct dst_cache *dst_cache)
157{
158 int i;
159
160 if (!dst_cache->cache)
161 return;
162
163 for_each_possible_cpu(i)
164 dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
165
166 free_percpu(dst_cache->cache);
167}
168EXPORT_SYMBOL_GPL(dst_cache_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..f426c5ad6149 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
98 [NETIF_F_RXALL_BIT] = "rx-all", 98 [NETIF_F_RXALL_BIT] = "rx-all",
99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", 99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll", 100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
101 [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
101}; 102};
102 103
103static const char 104static const char
@@ -386,43 +387,461 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
386 return 0; 387 return 0;
387} 388}
388 389
389int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 390static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
390{ 391{
392 bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
393 dst[0] = legacy_u32;
394}
395
396/* return false if src had higher bits set. lower bits always updated. */
397static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
398 const unsigned long *src)
399{
400 bool retval = true;
401
402 /* TODO: following test will soon always be true */
403 if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
404 __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
405
406 bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
407 bitmap_fill(ext, 32);
408 bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
409 if (bitmap_intersects(ext, src,
410 __ETHTOOL_LINK_MODE_MASK_NBITS)) {
411 /* src mask goes beyond bit 31 */
412 retval = false;
413 }
414 }
415 *legacy_u32 = src[0];
416 return retval;
417}
418
419/* return false if legacy contained non-0 deprecated fields
420 * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
421 */
422static bool
423convert_legacy_settings_to_link_ksettings(
424 struct ethtool_link_ksettings *link_ksettings,
425 const struct ethtool_cmd *legacy_settings)
426{
427 bool retval = true;
428
429 memset(link_ksettings, 0, sizeof(*link_ksettings));
430
431 /* This is used to tell users that driver is still using these
432 * deprecated legacy fields, and they should not use
433 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
434 */
435 if (legacy_settings->transceiver ||
436 legacy_settings->maxtxpkt ||
437 legacy_settings->maxrxpkt)
438 retval = false;
439
440 convert_legacy_u32_to_link_mode(
441 link_ksettings->link_modes.supported,
442 legacy_settings->supported);
443 convert_legacy_u32_to_link_mode(
444 link_ksettings->link_modes.advertising,
445 legacy_settings->advertising);
446 convert_legacy_u32_to_link_mode(
447 link_ksettings->link_modes.lp_advertising,
448 legacy_settings->lp_advertising);
449 link_ksettings->base.speed
450 = ethtool_cmd_speed(legacy_settings);
451 link_ksettings->base.duplex
452 = legacy_settings->duplex;
453 link_ksettings->base.port
454 = legacy_settings->port;
455 link_ksettings->base.phy_address
456 = legacy_settings->phy_address;
457 link_ksettings->base.autoneg
458 = legacy_settings->autoneg;
459 link_ksettings->base.mdio_support
460 = legacy_settings->mdio_support;
461 link_ksettings->base.eth_tp_mdix
462 = legacy_settings->eth_tp_mdix;
463 link_ksettings->base.eth_tp_mdix_ctrl
464 = legacy_settings->eth_tp_mdix_ctrl;
465 return retval;
466}
467
468/* return false if ksettings link modes had higher bits
469 * set. legacy_settings always updated (best effort)
470 */
471static bool
472convert_link_ksettings_to_legacy_settings(
473 struct ethtool_cmd *legacy_settings,
474 const struct ethtool_link_ksettings *link_ksettings)
475{
476 bool retval = true;
477
478 memset(legacy_settings, 0, sizeof(*legacy_settings));
479 /* this also clears the deprecated fields in legacy structure:
480 * __u8 transceiver;
481 * __u32 maxtxpkt;
482 * __u32 maxrxpkt;
483 */
484
485 retval &= convert_link_mode_to_legacy_u32(
486 &legacy_settings->supported,
487 link_ksettings->link_modes.supported);
488 retval &= convert_link_mode_to_legacy_u32(
489 &legacy_settings->advertising,
490 link_ksettings->link_modes.advertising);
491 retval &= convert_link_mode_to_legacy_u32(
492 &legacy_settings->lp_advertising,
493 link_ksettings->link_modes.lp_advertising);
494 ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
495 legacy_settings->duplex
496 = link_ksettings->base.duplex;
497 legacy_settings->port
498 = link_ksettings->base.port;
499 legacy_settings->phy_address
500 = link_ksettings->base.phy_address;
501 legacy_settings->autoneg
502 = link_ksettings->base.autoneg;
503 legacy_settings->mdio_support
504 = link_ksettings->base.mdio_support;
505 legacy_settings->eth_tp_mdix
506 = link_ksettings->base.eth_tp_mdix;
507 legacy_settings->eth_tp_mdix_ctrl
508 = link_ksettings->base.eth_tp_mdix_ctrl;
509 return retval;
510}
511
512/* number of 32-bit words to store the user's link mode bitmaps */
513#define __ETHTOOL_LINK_MODE_MASK_NU32 \
514 DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
515
516/* layout of the struct passed from/to userland */
517struct ethtool_link_usettings {
518 struct ethtool_link_settings base;
519 struct {
520 __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
521 __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
522 __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
523 } link_modes;
524};
525
526/* Internal kernel helper to query a device ethtool_link_settings.
527 *
528 * Backward compatibility note: for compatibility with legacy drivers
529 * that implement only the ethtool_cmd API, this has to work with both
530 * drivers implementing get_link_ksettings API and drivers
531 * implementing get_settings API. When drivers implement get_settings
532 * and report ethtool_cmd deprecated fields
533 * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
534 * because the resulting struct ethtool_link_settings does not report them.
535 */
536int __ethtool_get_link_ksettings(struct net_device *dev,
537 struct ethtool_link_ksettings *link_ksettings)
538{
539 int err;
540 struct ethtool_cmd cmd;
541
391 ASSERT_RTNL(); 542 ASSERT_RTNL();
392 543
544 if (dev->ethtool_ops->get_link_ksettings) {
545 memset(link_ksettings, 0, sizeof(*link_ksettings));
546 return dev->ethtool_ops->get_link_ksettings(dev,
547 link_ksettings);
548 }
549
550 /* driver doesn't support %ethtool_link_ksettings API. revert to
551 * legacy %ethtool_cmd API, unless it's not supported either.
552 * TODO: remove when ethtool_ops::get_settings disappears internally
553 */
393 if (!dev->ethtool_ops->get_settings) 554 if (!dev->ethtool_ops->get_settings)
394 return -EOPNOTSUPP; 555 return -EOPNOTSUPP;
395 556
396 memset(cmd, 0, sizeof(struct ethtool_cmd)); 557 memset(&cmd, 0, sizeof(cmd));
397 cmd->cmd = ETHTOOL_GSET; 558 cmd.cmd = ETHTOOL_GSET;
398 return dev->ethtool_ops->get_settings(dev, cmd); 559 err = dev->ethtool_ops->get_settings(dev, &cmd);
560 if (err < 0)
561 return err;
562
563 /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
564 */
565 convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
566 return err;
399} 567}
400EXPORT_SYMBOL(__ethtool_get_settings); 568EXPORT_SYMBOL(__ethtool_get_link_ksettings);
401 569
402static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 570/* convert ethtool_link_usettings in user space to a kernel internal
571 * ethtool_link_ksettings. return 0 on success, errno on error.
572 */
573static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
574 const void __user *from)
403{ 575{
404 int err; 576 struct ethtool_link_usettings link_usettings;
405 struct ethtool_cmd cmd; 577
578 if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
579 return -EFAULT;
580
581 memcpy(&to->base, &link_usettings.base, sizeof(to->base));
582 bitmap_from_u32array(to->link_modes.supported,
583 __ETHTOOL_LINK_MODE_MASK_NBITS,
584 link_usettings.link_modes.supported,
585 __ETHTOOL_LINK_MODE_MASK_NU32);
586 bitmap_from_u32array(to->link_modes.advertising,
587 __ETHTOOL_LINK_MODE_MASK_NBITS,
588 link_usettings.link_modes.advertising,
589 __ETHTOOL_LINK_MODE_MASK_NU32);
590 bitmap_from_u32array(to->link_modes.lp_advertising,
591 __ETHTOOL_LINK_MODE_MASK_NBITS,
592 link_usettings.link_modes.lp_advertising,
593 __ETHTOOL_LINK_MODE_MASK_NU32);
594
595 return 0;
596}
597
598/* convert a kernel internal ethtool_link_ksettings to
599 * ethtool_link_usettings in user space. return 0 on success, errno on
600 * error.
601 */
602static int
603store_link_ksettings_for_user(void __user *to,
604 const struct ethtool_link_ksettings *from)
605{
606 struct ethtool_link_usettings link_usettings;
607
608 memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
609 bitmap_to_u32array(link_usettings.link_modes.supported,
610 __ETHTOOL_LINK_MODE_MASK_NU32,
611 from->link_modes.supported,
612 __ETHTOOL_LINK_MODE_MASK_NBITS);
613 bitmap_to_u32array(link_usettings.link_modes.advertising,
614 __ETHTOOL_LINK_MODE_MASK_NU32,
615 from->link_modes.advertising,
616 __ETHTOOL_LINK_MODE_MASK_NBITS);
617 bitmap_to_u32array(link_usettings.link_modes.lp_advertising,
618 __ETHTOOL_LINK_MODE_MASK_NU32,
619 from->link_modes.lp_advertising,
620 __ETHTOOL_LINK_MODE_MASK_NBITS);
621
622 if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
623 return -EFAULT;
624
625 return 0;
626}
627
628/* Query device for its ethtool_link_settings.
629 *
630 * Backward compatibility note: this function must fail when driver
631 * does not implement ethtool::get_link_ksettings, even if legacy
632 * ethtool_ops::get_settings is implemented. This tells new versions
633 * of ethtool that they should use the legacy API %ETHTOOL_GSET for
634 * this driver, so that they can correctly access the ethtool_cmd
635 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
636 * implements ethtool_ops::get_settings anymore.
637 */
638static int ethtool_get_link_ksettings(struct net_device *dev,
639 void __user *useraddr)
640{
641 int err = 0;
642 struct ethtool_link_ksettings link_ksettings;
406 643
407 err = __ethtool_get_settings(dev, &cmd); 644 ASSERT_RTNL();
645
646 if (!dev->ethtool_ops->get_link_ksettings)
647 return -EOPNOTSUPP;
648
649 /* handle bitmap nbits handshake */
650 if (copy_from_user(&link_ksettings.base, useraddr,
651 sizeof(link_ksettings.base)))
652 return -EFAULT;
653
654 if (__ETHTOOL_LINK_MODE_MASK_NU32
655 != link_ksettings.base.link_mode_masks_nwords) {
656 /* wrong link mode nbits requested */
657 memset(&link_ksettings, 0, sizeof(link_ksettings));
658 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
659 /* send back number of words required as negative val */
660 compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
661 "need too many bits for link modes!");
662 link_ksettings.base.link_mode_masks_nwords
663 = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
664
665 /* copy the base fields back to user, not the link
666 * mode bitmaps
667 */
668 if (copy_to_user(useraddr, &link_ksettings.base,
669 sizeof(link_ksettings.base)))
670 return -EFAULT;
671
672 return 0;
673 }
674
675 /* handshake successful: user/kernel agree on
676 * link_mode_masks_nwords
677 */
678
679 memset(&link_ksettings, 0, sizeof(link_ksettings));
680 err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
408 if (err < 0) 681 if (err < 0)
409 return err; 682 return err;
410 683
684 /* make sure we tell the right values to user */
685 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
686 link_ksettings.base.link_mode_masks_nwords
687 = __ETHTOOL_LINK_MODE_MASK_NU32;
688
689 return store_link_ksettings_for_user(useraddr, &link_ksettings);
690}
691
692/* Update device ethtool_link_settings.
693 *
694 * Backward compatibility note: this function must fail when driver
695 * does not implement ethtool::set_link_ksettings, even if legacy
696 * ethtool_ops::set_settings is implemented. This tells new versions
697 * of ethtool that they should use the legacy API %ETHTOOL_SSET for
698 * this driver, so that they can correctly update the ethtool_cmd
699 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
700 * implements ethtool_ops::get_settings anymore.
701 */
702static int ethtool_set_link_ksettings(struct net_device *dev,
703 void __user *useraddr)
704{
705 int err;
706 struct ethtool_link_ksettings link_ksettings;
707
708 ASSERT_RTNL();
709
710 if (!dev->ethtool_ops->set_link_ksettings)
711 return -EOPNOTSUPP;
712
713 /* make sure nbits field has expected value */
714 if (copy_from_user(&link_ksettings.base, useraddr,
715 sizeof(link_ksettings.base)))
716 return -EFAULT;
717
718 if (__ETHTOOL_LINK_MODE_MASK_NU32
719 != link_ksettings.base.link_mode_masks_nwords)
720 return -EINVAL;
721
722 /* copy the whole structure, now that we know it has expected
723 * format
724 */
725 err = load_link_ksettings_from_user(&link_ksettings, useraddr);
726 if (err)
727 return err;
728
729 /* re-check nwords field, just in case */
730 if (__ETHTOOL_LINK_MODE_MASK_NU32
731 != link_ksettings.base.link_mode_masks_nwords)
732 return -EINVAL;
733
734 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
735}
736
737static void
738warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
739{
740 char name[sizeof(current->comm)];
741
742 pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
743 get_task_comm(name, current), details);
744}
745
746/* Query device for its ethtool_cmd settings.
747 *
748 * Backward compatibility note: for compatibility with legacy ethtool,
749 * this has to work with both drivers implementing get_link_ksettings
750 * API and drivers implementing get_settings API. When drivers
751 * implement get_link_ksettings and report higher link mode bits, a
752 * kernel warning is logged once (with name of 1st driver/device) to
753 * recommend user to upgrade ethtool, but the command is successful
754 * (only the lower link mode bits reported back to user).
755 */
756static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
757{
758 struct ethtool_cmd cmd;
759
760 ASSERT_RTNL();
761
762 if (dev->ethtool_ops->get_link_ksettings) {
763 /* First, use link_ksettings API if it is supported */
764 int err;
765 struct ethtool_link_ksettings link_ksettings;
766
767 memset(&link_ksettings, 0, sizeof(link_ksettings));
768 err = dev->ethtool_ops->get_link_ksettings(dev,
769 &link_ksettings);
770 if (err < 0)
771 return err;
772 if (!convert_link_ksettings_to_legacy_settings(&cmd,
773 &link_ksettings))
774 warn_incomplete_ethtool_legacy_settings_conversion(
775 "link modes are only partially reported");
776
777 /* send a sensible cmd tag back to user */
778 cmd.cmd = ETHTOOL_GSET;
779 } else {
780 /* driver doesn't support %ethtool_link_ksettings
781 * API. revert to legacy %ethtool_cmd API, unless it's
782 * not supported either.
783 */
784 int err;
785
786 if (!dev->ethtool_ops->get_settings)
787 return -EOPNOTSUPP;
788
789 memset(&cmd, 0, sizeof(cmd));
790 cmd.cmd = ETHTOOL_GSET;
791 err = dev->ethtool_ops->get_settings(dev, &cmd);
792 if (err < 0)
793 return err;
794 }
795
411 if (copy_to_user(useraddr, &cmd, sizeof(cmd))) 796 if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
412 return -EFAULT; 797 return -EFAULT;
798
413 return 0; 799 return 0;
414} 800}
415 801
802/* Update device link settings with given ethtool_cmd.
803 *
804 * Backward compatibility note: for compatibility with legacy ethtool,
805 * this has to work with both drivers implementing set_link_ksettings
806 * API and drivers implementing set_settings API. When drivers
807 * implement set_link_ksettings and user's request updates deprecated
808 * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
809 * warning is logged once (with name of 1st driver/device) to
810 * recommend user to upgrade ethtool, and the request is rejected.
811 */
416static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) 812static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
417{ 813{
418 struct ethtool_cmd cmd; 814 struct ethtool_cmd cmd;
419 815
420 if (!dev->ethtool_ops->set_settings) 816 ASSERT_RTNL();
421 return -EOPNOTSUPP;
422 817
423 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 818 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
424 return -EFAULT; 819 return -EFAULT;
425 820
821 /* first, try new %ethtool_link_ksettings API. */
822 if (dev->ethtool_ops->set_link_ksettings) {
823 struct ethtool_link_ksettings link_ksettings;
824
825 if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
826 &cmd))
827 return -EINVAL;
828
829 link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
830 link_ksettings.base.link_mode_masks_nwords
831 = __ETHTOOL_LINK_MODE_MASK_NU32;
832 return dev->ethtool_ops->set_link_ksettings(dev,
833 &link_ksettings);
834 }
835
836 /* legacy %ethtool_cmd API */
837
838 /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
839 * disappears internally
840 */
841
842 if (!dev->ethtool_ops->set_settings)
843 return -EOPNOTSUPP;
844
426 return dev->ethtool_ops->set_settings(dev, &cmd); 845 return dev->ethtool_ops->set_settings(dev, &cmd);
427} 846}
428 847
@@ -632,7 +1051,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
632 return 0; 1051 return 0;
633} 1052}
634 1053
635u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; 1054u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
636 1055
637void netdev_rss_key_fill(void *buffer, size_t len) 1056void netdev_rss_key_fill(void *buffer, size_t len)
638{ 1057{
@@ -642,6 +1061,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
642} 1061}
643EXPORT_SYMBOL(netdev_rss_key_fill); 1062EXPORT_SYMBOL(netdev_rss_key_fill);
644 1063
1064static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
1065{
1066 u32 dev_size, current_max = 0;
1067 u32 *indir;
1068 int ret;
1069
1070 if (!dev->ethtool_ops->get_rxfh_indir_size ||
1071 !dev->ethtool_ops->get_rxfh)
1072 return -EOPNOTSUPP;
1073 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
1074 if (dev_size == 0)
1075 return -EOPNOTSUPP;
1076
1077 indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
1078 if (!indir)
1079 return -ENOMEM;
1080
1081 ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
1082 if (ret)
1083 goto out;
1084
1085 while (dev_size--)
1086 current_max = max(current_max, indir[dev_size]);
1087
1088 *max = current_max;
1089
1090out:
1091 kfree(indir);
1092 return ret;
1093}
1094
645static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, 1095static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
646 void __user *useraddr) 1096 void __user *useraddr)
647{ 1097{
@@ -738,6 +1188,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
738 } 1188 }
739 1189
740 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); 1190 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
1191 if (ret)
1192 goto out;
1193
1194 /* indicate whether rxfh was set to default */
1195 if (user_size == 0)
1196 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1197 else
1198 dev->priv_flags |= IFF_RXFH_CONFIGURED;
741 1199
742out: 1200out:
743 kfree(indir); 1201 kfree(indir);
@@ -897,6 +1355,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
897 } 1355 }
898 1356
899 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); 1357 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
1358 if (ret)
1359 goto out;
1360
1361 /* indicate whether rxfh was set to default */
1362 if (rxfh.indir_size == 0)
1363 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1364 else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
1365 dev->priv_flags |= IFF_RXFH_CONFIGURED;
900 1366
901out: 1367out:
902 kfree(rss_config); 1368 kfree(rss_config);
@@ -1227,14 +1693,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
1227static noinline_for_stack int ethtool_set_channels(struct net_device *dev, 1693static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
1228 void __user *useraddr) 1694 void __user *useraddr)
1229{ 1695{
1230 struct ethtool_channels channels; 1696 struct ethtool_channels channels, max;
1697 u32 max_rx_in_use = 0;
1231 1698
1232 if (!dev->ethtool_ops->set_channels) 1699 if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
1233 return -EOPNOTSUPP; 1700 return -EOPNOTSUPP;
1234 1701
1235 if (copy_from_user(&channels, useraddr, sizeof(channels))) 1702 if (copy_from_user(&channels, useraddr, sizeof(channels)))
1236 return -EFAULT; 1703 return -EFAULT;
1237 1704
1705 dev->ethtool_ops->get_channels(dev, &max);
1706
1707 /* ensure new counts are within the maximums */
1708 if ((channels.rx_count > max.max_rx) ||
1709 (channels.tx_count > max.max_tx) ||
1710 (channels.combined_count > max.max_combined) ||
1711 (channels.other_count > max.max_other))
1712 return -EINVAL;
1713
1714 /* ensure the new Rx count fits within the configured Rx flow
1715 * indirection table settings */
1716 if (netif_is_rxfh_configured(dev) &&
1717 !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
1718 (channels.combined_count + channels.rx_count) <= max_rx_in_use)
1719 return -EINVAL;
1720
1238 return dev->ethtool_ops->set_channels(dev, &channels); 1721 return dev->ethtool_ops->set_channels(dev, &channels);
1239} 1722}
1240 1723
@@ -1823,13 +2306,121 @@ out:
1823 return ret; 2306 return ret;
1824} 2307}
1825 2308
2309static int ethtool_get_per_queue_coalesce(struct net_device *dev,
2310 void __user *useraddr,
2311 struct ethtool_per_queue_op *per_queue_opt)
2312{
2313 u32 bit;
2314 int ret;
2315 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2316
2317 if (!dev->ethtool_ops->get_per_queue_coalesce)
2318 return -EOPNOTSUPP;
2319
2320 useraddr += sizeof(*per_queue_opt);
2321
2322 bitmap_from_u32array(queue_mask,
2323 MAX_NUM_QUEUE,
2324 per_queue_opt->queue_mask,
2325 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2326
2327 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2328 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
2329
2330 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
2331 if (ret != 0)
2332 return ret;
2333 if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
2334 return -EFAULT;
2335 useraddr += sizeof(coalesce);
2336 }
2337
2338 return 0;
2339}
2340
2341static int ethtool_set_per_queue_coalesce(struct net_device *dev,
2342 void __user *useraddr,
2343 struct ethtool_per_queue_op *per_queue_opt)
2344{
2345 u32 bit;
2346 int i, ret = 0;
2347 int n_queue;
2348 struct ethtool_coalesce *backup = NULL, *tmp = NULL;
2349 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2350
2351 if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
2352 (!dev->ethtool_ops->get_per_queue_coalesce))
2353 return -EOPNOTSUPP;
2354
2355 useraddr += sizeof(*per_queue_opt);
2356
2357 bitmap_from_u32array(queue_mask,
2358 MAX_NUM_QUEUE,
2359 per_queue_opt->queue_mask,
2360 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2361 n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
2362 tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
2363 if (!backup)
2364 return -ENOMEM;
2365
2366 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2367 struct ethtool_coalesce coalesce;
2368
2369 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
2370 if (ret != 0)
2371 goto roll_back;
2372
2373 tmp++;
2374
2375 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
2376 ret = -EFAULT;
2377 goto roll_back;
2378 }
2379
2380 ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
2381 if (ret != 0)
2382 goto roll_back;
2383
2384 useraddr += sizeof(coalesce);
2385 }
2386
2387roll_back:
2388 if (ret != 0) {
2389 tmp = backup;
2390 for_each_set_bit(i, queue_mask, bit) {
2391 dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
2392 tmp++;
2393 }
2394 }
2395 kfree(backup);
2396
2397 return ret;
2398}
2399
2400static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
2401{
2402 struct ethtool_per_queue_op per_queue_opt;
2403
2404 if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
2405 return -EFAULT;
2406
2407 switch (per_queue_opt.sub_command) {
2408 case ETHTOOL_GCOALESCE:
2409 return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2410 case ETHTOOL_SCOALESCE:
2411 return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2412 default:
2413 return -EOPNOTSUPP;
2414 };
2415}
2416
1826/* The main entry point in this file. Called from net/core/dev_ioctl.c */ 2417/* The main entry point in this file. Called from net/core/dev_ioctl.c */
1827 2418
1828int dev_ethtool(struct net *net, struct ifreq *ifr) 2419int dev_ethtool(struct net *net, struct ifreq *ifr)
1829{ 2420{
1830 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 2421 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
1831 void __user *useraddr = ifr->ifr_data; 2422 void __user *useraddr = ifr->ifr_data;
1832 u32 ethcmd; 2423 u32 ethcmd, sub_cmd;
1833 int rc; 2424 int rc;
1834 netdev_features_t old_features; 2425 netdev_features_t old_features;
1835 2426
@@ -1839,8 +2430,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1839 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 2430 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1840 return -EFAULT; 2431 return -EFAULT;
1841 2432
2433 if (ethcmd == ETHTOOL_PERQUEUE) {
2434 if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
2435 return -EFAULT;
2436 } else {
2437 sub_cmd = ethcmd;
2438 }
1842 /* Allow some commands to be done by anyone */ 2439 /* Allow some commands to be done by anyone */
1843 switch (ethcmd) { 2440 switch (sub_cmd) {
1844 case ETHTOOL_GSET: 2441 case ETHTOOL_GSET:
1845 case ETHTOOL_GDRVINFO: 2442 case ETHTOOL_GDRVINFO:
1846 case ETHTOOL_GMSGLVL: 2443 case ETHTOOL_GMSGLVL:
@@ -2070,6 +2667,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
2070 case ETHTOOL_GPHYSTATS: 2667 case ETHTOOL_GPHYSTATS:
2071 rc = ethtool_get_phy_stats(dev, useraddr); 2668 rc = ethtool_get_phy_stats(dev, useraddr);
2072 break; 2669 break;
2670 case ETHTOOL_PERQUEUE:
2671 rc = ethtool_set_per_queue(dev, useraddr);
2672 break;
2673 case ETHTOOL_GLINKSETTINGS:
2674 rc = ethtool_get_link_ksettings(dev, useraddr);
2675 break;
2676 case ETHTOOL_SLINKSETTINGS:
2677 rc = ethtool_set_link_ksettings(dev, useraddr);
2678 break;
2073 default: 2679 default:
2074 rc = -EOPNOTSUPP; 2680 rc = -EOPNOTSUPP;
2075 } 2681 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 94d26201080d..ca7f832b2980 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -530,12 +530,14 @@ do_pass:
530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
531 break; 531 break;
532 532
533 /* RET_K, RET_A are remaped into 2 insns. */ 533 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
534 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
535 */
534 case BPF_RET | BPF_A: 536 case BPF_RET | BPF_A:
535 case BPF_RET | BPF_K: 537 case BPF_RET | BPF_K:
536 *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? 538 if (BPF_RVAL(fp->code) == BPF_K)
537 BPF_K : BPF_X, BPF_REG_0, 539 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
538 BPF_REG_A, fp->k); 540 0, fp->k);
539 *insn = BPF_EXIT_INSN(); 541 *insn = BPF_EXIT_INSN();
540 break; 542 break;
541 543
@@ -1147,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp)
1147} 1149}
1148EXPORT_SYMBOL_GPL(bpf_prog_destroy); 1150EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1149 1151
1150static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) 1152static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
1153 bool locked)
1151{ 1154{
1152 struct sk_filter *fp, *old_fp; 1155 struct sk_filter *fp, *old_fp;
1153 1156
@@ -1163,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1163 return -ENOMEM; 1166 return -ENOMEM;
1164 } 1167 }
1165 1168
1166 old_fp = rcu_dereference_protected(sk->sk_filter, 1169 old_fp = rcu_dereference_protected(sk->sk_filter, locked);
1167 sock_owned_by_user(sk));
1168 rcu_assign_pointer(sk->sk_filter, fp); 1170 rcu_assign_pointer(sk->sk_filter, fp);
1169
1170 if (old_fp) 1171 if (old_fp)
1171 sk_filter_uncharge(sk, old_fp); 1172 sk_filter_uncharge(sk, old_fp);
1172 1173
@@ -1181,7 +1182,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1181 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1182 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1182 return -ENOMEM; 1183 return -ENOMEM;
1183 1184
1184 if (sk_unhashed(sk)) { 1185 if (sk_unhashed(sk) && sk->sk_reuseport) {
1185 err = reuseport_alloc(sk); 1186 err = reuseport_alloc(sk);
1186 if (err) 1187 if (err)
1187 return err; 1188 return err;
@@ -1245,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1245 * occurs or there is insufficient memory for the filter a negative 1246 * occurs or there is insufficient memory for the filter a negative
1246 * errno code is returned. On success the return is zero. 1247 * errno code is returned. On success the return is zero.
1247 */ 1248 */
1248int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1249int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
1250 bool locked)
1249{ 1251{
1250 struct bpf_prog *prog = __get_filter(fprog, sk); 1252 struct bpf_prog *prog = __get_filter(fprog, sk);
1251 int err; 1253 int err;
@@ -1253,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1253 if (IS_ERR(prog)) 1255 if (IS_ERR(prog))
1254 return PTR_ERR(prog); 1256 return PTR_ERR(prog);
1255 1257
1256 err = __sk_attach_prog(prog, sk); 1258 err = __sk_attach_prog(prog, sk, locked);
1257 if (err < 0) { 1259 if (err < 0) {
1258 __bpf_prog_release(prog); 1260 __bpf_prog_release(prog);
1259 return err; 1261 return err;
@@ -1261,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1261 1263
1262 return 0; 1264 return 0;
1263} 1265}
1264EXPORT_SYMBOL_GPL(sk_attach_filter); 1266EXPORT_SYMBOL_GPL(__sk_attach_filter);
1267
1268int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1269{
1270 return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
1271}
1265 1272
1266int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1273int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1267{ 1274{
@@ -1307,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
1307 if (IS_ERR(prog)) 1314 if (IS_ERR(prog))
1308 return PTR_ERR(prog); 1315 return PTR_ERR(prog);
1309 1316
1310 err = __sk_attach_prog(prog, sk); 1317 err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
1311 if (err < 0) { 1318 if (err < 0) {
1312 bpf_prog_put(prog); 1319 bpf_prog_put(prog);
1313 return err; 1320 return err;
@@ -1333,18 +1340,25 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1333 return 0; 1340 return 0;
1334} 1341}
1335 1342
1336#define BPF_LDST_LEN 16U 1343struct bpf_scratchpad {
1344 union {
1345 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1346 u8 buff[MAX_BPF_STACK];
1347 };
1348};
1349
1350static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1337 1351
1338static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1352static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1339{ 1353{
1354 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1340 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1355 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1341 int offset = (int) r2; 1356 int offset = (int) r2;
1342 void *from = (void *) (long) r3; 1357 void *from = (void *) (long) r3;
1343 unsigned int len = (unsigned int) r4; 1358 unsigned int len = (unsigned int) r4;
1344 char buf[BPF_LDST_LEN];
1345 void *ptr; 1359 void *ptr;
1346 1360
1347 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) 1361 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1348 return -EINVAL; 1362 return -EINVAL;
1349 1363
1350 /* bpf verifier guarantees that: 1364 /* bpf verifier guarantees that:
@@ -1355,14 +1369,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1355 * 1369 *
1356 * so check for invalid 'offset' and too large 'len' 1370 * so check for invalid 'offset' and too large 'len'
1357 */ 1371 */
1358 if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) 1372 if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
1359 return -EFAULT; 1373 return -EFAULT;
1360 1374 if (unlikely(skb_try_make_writable(skb, offset + len)))
1361 if (unlikely(skb_cloned(skb) &&
1362 !skb_clone_writable(skb, offset + len)))
1363 return -EFAULT; 1375 return -EFAULT;
1364 1376
1365 ptr = skb_header_pointer(skb, offset, len, buf); 1377 ptr = skb_header_pointer(skb, offset, len, sp->buff);
1366 if (unlikely(!ptr)) 1378 if (unlikely(!ptr))
1367 return -EFAULT; 1379 return -EFAULT;
1368 1380
@@ -1371,17 +1383,19 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1371 1383
1372 memcpy(ptr, from, len); 1384 memcpy(ptr, from, len);
1373 1385
1374 if (ptr == buf) 1386 if (ptr == sp->buff)
1375 /* skb_store_bits cannot return -EFAULT here */ 1387 /* skb_store_bits cannot return -EFAULT here */
1376 skb_store_bits(skb, offset, ptr, len); 1388 skb_store_bits(skb, offset, ptr, len);
1377 1389
1378 if (flags & BPF_F_RECOMPUTE_CSUM) 1390 if (flags & BPF_F_RECOMPUTE_CSUM)
1379 skb_postpush_rcsum(skb, ptr, len); 1391 skb_postpush_rcsum(skb, ptr, len);
1392 if (flags & BPF_F_INVALIDATE_HASH)
1393 skb_clear_hash(skb);
1380 1394
1381 return 0; 1395 return 0;
1382} 1396}
1383 1397
1384const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1398static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1385 .func = bpf_skb_store_bytes, 1399 .func = bpf_skb_store_bytes,
1386 .gpl_only = false, 1400 .gpl_only = false,
1387 .ret_type = RET_INTEGER, 1401 .ret_type = RET_INTEGER,
@@ -1400,7 +1414,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1400 unsigned int len = (unsigned int) r4; 1414 unsigned int len = (unsigned int) r4;
1401 void *ptr; 1415 void *ptr;
1402 1416
1403 if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) 1417 if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
1404 return -EFAULT; 1418 return -EFAULT;
1405 1419
1406 ptr = skb_header_pointer(skb, offset, len, to); 1420 ptr = skb_header_pointer(skb, offset, len, to);
@@ -1412,7 +1426,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1412 return 0; 1426 return 0;
1413} 1427}
1414 1428
1415const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1429static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1416 .func = bpf_skb_load_bytes, 1430 .func = bpf_skb_load_bytes,
1417 .gpl_only = false, 1431 .gpl_only = false,
1418 .ret_type = RET_INTEGER, 1432 .ret_type = RET_INTEGER,
@@ -1432,9 +1446,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1432 return -EINVAL; 1446 return -EINVAL;
1433 if (unlikely((u32) offset > 0xffff)) 1447 if (unlikely((u32) offset > 0xffff))
1434 return -EFAULT; 1448 return -EFAULT;
1435 1449 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1436 if (unlikely(skb_cloned(skb) &&
1437 !skb_clone_writable(skb, offset + sizeof(sum))))
1438 return -EFAULT; 1450 return -EFAULT;
1439 1451
1440 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1452 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1442,6 +1454,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1442 return -EFAULT; 1454 return -EFAULT;
1443 1455
1444 switch (flags & BPF_F_HDR_FIELD_MASK) { 1456 switch (flags & BPF_F_HDR_FIELD_MASK) {
1457 case 0:
1458 if (unlikely(from != 0))
1459 return -EINVAL;
1460
1461 csum_replace_by_diff(ptr, to);
1462 break;
1445 case 2: 1463 case 2:
1446 csum_replace2(ptr, from, to); 1464 csum_replace2(ptr, from, to);
1447 break; 1465 break;
@@ -1459,7 +1477,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1459 return 0; 1477 return 0;
1460} 1478}
1461 1479
1462const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1480static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1463 .func = bpf_l3_csum_replace, 1481 .func = bpf_l3_csum_replace,
1464 .gpl_only = false, 1482 .gpl_only = false,
1465 .ret_type = RET_INTEGER, 1483 .ret_type = RET_INTEGER,
@@ -1474,23 +1492,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1474{ 1492{
1475 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1493 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1476 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1494 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1495 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1477 int offset = (int) r2; 1496 int offset = (int) r2;
1478 __sum16 sum, *ptr; 1497 __sum16 sum, *ptr;
1479 1498
1480 if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1499 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
1500 BPF_F_HDR_FIELD_MASK)))
1481 return -EINVAL; 1501 return -EINVAL;
1482 if (unlikely((u32) offset > 0xffff)) 1502 if (unlikely((u32) offset > 0xffff))
1483 return -EFAULT; 1503 return -EFAULT;
1484 1504 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1485 if (unlikely(skb_cloned(skb) &&
1486 !skb_clone_writable(skb, offset + sizeof(sum))))
1487 return -EFAULT; 1505 return -EFAULT;
1488 1506
1489 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1507 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
1490 if (unlikely(!ptr)) 1508 if (unlikely(!ptr))
1491 return -EFAULT; 1509 return -EFAULT;
1510 if (is_mmzero && !*ptr)
1511 return 0;
1492 1512
1493 switch (flags & BPF_F_HDR_FIELD_MASK) { 1513 switch (flags & BPF_F_HDR_FIELD_MASK) {
1514 case 0:
1515 if (unlikely(from != 0))
1516 return -EINVAL;
1517
1518 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1519 break;
1494 case 2: 1520 case 2:
1495 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1521 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1496 break; 1522 break;
@@ -1501,6 +1527,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1501 return -EINVAL; 1527 return -EINVAL;
1502 } 1528 }
1503 1529
1530 if (is_mmzero && !*ptr)
1531 *ptr = CSUM_MANGLED_0;
1504 if (ptr == &sum) 1532 if (ptr == &sum)
1505 /* skb_store_bits guaranteed to not return -EFAULT here */ 1533 /* skb_store_bits guaranteed to not return -EFAULT here */
1506 skb_store_bits(skb, offset, ptr, sizeof(sum)); 1534 skb_store_bits(skb, offset, ptr, sizeof(sum));
@@ -1508,7 +1536,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1508 return 0; 1536 return 0;
1509} 1537}
1510 1538
1511const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1539static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1512 .func = bpf_l4_csum_replace, 1540 .func = bpf_l4_csum_replace,
1513 .gpl_only = false, 1541 .gpl_only = false,
1514 .ret_type = RET_INTEGER, 1542 .ret_type = RET_INTEGER,
@@ -1519,6 +1547,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1519 .arg5_type = ARG_ANYTHING, 1547 .arg5_type = ARG_ANYTHING,
1520}; 1548};
1521 1549
1550static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
1551{
1552 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1553 u64 diff_size = from_size + to_size;
1554 __be32 *from = (__be32 *) (long) r1;
1555 __be32 *to = (__be32 *) (long) r3;
1556 int i, j = 0;
1557
1558 /* This is quite flexible, some examples:
1559 *
1560 * from_size == 0, to_size > 0, seed := csum --> pushing data
1561 * from_size > 0, to_size == 0, seed := csum --> pulling data
1562 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1563 *
1564 * Even for diffing, from_size and to_size don't need to be equal.
1565 */
1566 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1567 diff_size > sizeof(sp->diff)))
1568 return -EINVAL;
1569
1570 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1571 sp->diff[j] = ~from[i];
1572 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1573 sp->diff[j] = to[i];
1574
1575 return csum_partial(sp->diff, diff_size, seed);
1576}
1577
1578static const struct bpf_func_proto bpf_csum_diff_proto = {
1579 .func = bpf_csum_diff,
1580 .gpl_only = false,
1581 .ret_type = RET_INTEGER,
1582 .arg1_type = ARG_PTR_TO_STACK,
1583 .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1584 .arg3_type = ARG_PTR_TO_STACK,
1585 .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1586 .arg5_type = ARG_ANYTHING,
1587};
1588
1522static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) 1589static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1523{ 1590{
1524 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; 1591 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
@@ -1543,11 +1610,10 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1543 } 1610 }
1544 1611
1545 skb2->dev = dev; 1612 skb2->dev = dev;
1546 skb_sender_cpu_clear(skb2);
1547 return dev_queue_xmit(skb2); 1613 return dev_queue_xmit(skb2);
1548} 1614}
1549 1615
1550const struct bpf_func_proto bpf_clone_redirect_proto = { 1616static const struct bpf_func_proto bpf_clone_redirect_proto = {
1551 .func = bpf_clone_redirect, 1617 .func = bpf_clone_redirect,
1552 .gpl_only = false, 1618 .gpl_only = false,
1553 .ret_type = RET_INTEGER, 1619 .ret_type = RET_INTEGER,
@@ -1596,11 +1662,10 @@ int skb_do_redirect(struct sk_buff *skb)
1596 } 1662 }
1597 1663
1598 skb->dev = dev; 1664 skb->dev = dev;
1599 skb_sender_cpu_clear(skb);
1600 return dev_queue_xmit(skb); 1665 return dev_queue_xmit(skb);
1601} 1666}
1602 1667
1603const struct bpf_func_proto bpf_redirect_proto = { 1668static const struct bpf_func_proto bpf_redirect_proto = {
1604 .func = bpf_redirect, 1669 .func = bpf_redirect,
1605 .gpl_only = false, 1670 .gpl_only = false,
1606 .ret_type = RET_INTEGER, 1671 .ret_type = RET_INTEGER,
@@ -1622,14 +1687,7 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
1622 1687
1623static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1688static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1624{ 1689{
1625#ifdef CONFIG_IP_ROUTE_CLASSID 1690 return dst_tclassid((struct sk_buff *) (unsigned long) r1);
1626 const struct dst_entry *dst;
1627
1628 dst = skb_dst((struct sk_buff *) (unsigned long) r1);
1629 if (dst)
1630 return dst->tclassid;
1631#endif
1632 return 0;
1633} 1691}
1634 1692
1635static const struct bpf_func_proto bpf_get_route_realm_proto = { 1693static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1682,6 +1740,13 @@ bool bpf_helper_changes_skb_data(void *func)
1682 return true; 1740 return true;
1683 if (func == bpf_skb_vlan_pop) 1741 if (func == bpf_skb_vlan_pop)
1684 return true; 1742 return true;
1743 if (func == bpf_skb_store_bytes)
1744 return true;
1745 if (func == bpf_l3_csum_replace)
1746 return true;
1747 if (func == bpf_l4_csum_replace)
1748 return true;
1749
1685 return false; 1750 return false;
1686} 1751}
1687 1752
@@ -1703,12 +1768,16 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1703 return -EPROTO; 1768 return -EPROTO;
1704 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1769 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1705 switch (size) { 1770 switch (size) {
1771 case offsetof(struct bpf_tunnel_key, tunnel_label):
1772 case offsetof(struct bpf_tunnel_key, tunnel_ext):
1773 goto set_compat;
1706 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1774 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1707 /* Fixup deprecated structure layouts here, so we have 1775 /* Fixup deprecated structure layouts here, so we have
1708 * a common path later on. 1776 * a common path later on.
1709 */ 1777 */
1710 if (ip_tunnel_info_af(info) != AF_INET) 1778 if (ip_tunnel_info_af(info) != AF_INET)
1711 return -EINVAL; 1779 return -EINVAL;
1780set_compat:
1712 to = (struct bpf_tunnel_key *)compat; 1781 to = (struct bpf_tunnel_key *)compat;
1713 break; 1782 break;
1714 default: 1783 default:
@@ -1720,11 +1789,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1720 to->tunnel_tos = info->key.tos; 1789 to->tunnel_tos = info->key.tos;
1721 to->tunnel_ttl = info->key.ttl; 1790 to->tunnel_ttl = info->key.ttl;
1722 1791
1723 if (flags & BPF_F_TUNINFO_IPV6) 1792 if (flags & BPF_F_TUNINFO_IPV6) {
1724 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 1793 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
1725 sizeof(to->remote_ipv6)); 1794 sizeof(to->remote_ipv6));
1726 else 1795 to->tunnel_label = be32_to_cpu(info->key.label);
1796 } else {
1727 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 1797 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1798 }
1728 1799
1729 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 1800 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
1730 memcpy((void *)(long) r2, to, size); 1801 memcpy((void *)(long) r2, to, size);
@@ -1732,7 +1803,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1732 return 0; 1803 return 0;
1733} 1804}
1734 1805
1735const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 1806static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1736 .func = bpf_skb_get_tunnel_key, 1807 .func = bpf_skb_get_tunnel_key,
1737 .gpl_only = false, 1808 .gpl_only = false,
1738 .ret_type = RET_INTEGER, 1809 .ret_type = RET_INTEGER,
@@ -1742,6 +1813,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1742 .arg4_type = ARG_ANYTHING, 1813 .arg4_type = ARG_ANYTHING,
1743}; 1814};
1744 1815
1816static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1817{
1818 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1819 u8 *to = (u8 *) (long) r2;
1820 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
1821
1822 if (unlikely(!info ||
1823 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
1824 return -ENOENT;
1825 if (unlikely(size < info->options_len))
1826 return -ENOMEM;
1827
1828 ip_tunnel_info_opts_get(to, info);
1829
1830 return info->options_len;
1831}
1832
1833static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
1834 .func = bpf_skb_get_tunnel_opt,
1835 .gpl_only = false,
1836 .ret_type = RET_INTEGER,
1837 .arg1_type = ARG_PTR_TO_CTX,
1838 .arg2_type = ARG_PTR_TO_STACK,
1839 .arg3_type = ARG_CONST_STACK_SIZE,
1840};
1841
1745static struct metadata_dst __percpu *md_dst; 1842static struct metadata_dst __percpu *md_dst;
1746 1843
1747static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) 1844static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
@@ -1752,10 +1849,13 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1752 u8 compat[sizeof(struct bpf_tunnel_key)]; 1849 u8 compat[sizeof(struct bpf_tunnel_key)];
1753 struct ip_tunnel_info *info; 1850 struct ip_tunnel_info *info;
1754 1851
1755 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) 1852 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
1853 BPF_F_DONT_FRAGMENT)))
1756 return -EINVAL; 1854 return -EINVAL;
1757 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1855 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1758 switch (size) { 1856 switch (size) {
1857 case offsetof(struct bpf_tunnel_key, tunnel_label):
1858 case offsetof(struct bpf_tunnel_key, tunnel_ext):
1759 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1859 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1760 /* Fixup deprecated structure layouts here, so we have 1860 /* Fixup deprecated structure layouts here, so we have
1761 * a common path later on. 1861 * a common path later on.
@@ -1768,6 +1868,9 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1768 return -EINVAL; 1868 return -EINVAL;
1769 } 1869 }
1770 } 1870 }
1871 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
1872 from->tunnel_ext))
1873 return -EINVAL;
1771 1874
1772 skb_dst_drop(skb); 1875 skb_dst_drop(skb);
1773 dst_hold((struct dst_entry *) md); 1876 dst_hold((struct dst_entry *) md);
@@ -1776,7 +1879,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1776 info = &md->u.tun_info; 1879 info = &md->u.tun_info;
1777 info->mode = IP_TUNNEL_INFO_TX; 1880 info->mode = IP_TUNNEL_INFO_TX;
1778 1881
1779 info->key.tun_flags = TUNNEL_KEY; 1882 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
1883 if (flags & BPF_F_DONT_FRAGMENT)
1884 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
1885
1780 info->key.tun_id = cpu_to_be64(from->tunnel_id); 1886 info->key.tun_id = cpu_to_be64(from->tunnel_id);
1781 info->key.tos = from->tunnel_tos; 1887 info->key.tos = from->tunnel_tos;
1782 info->key.ttl = from->tunnel_ttl; 1888 info->key.ttl = from->tunnel_ttl;
@@ -1785,14 +1891,18 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1785 info->mode |= IP_TUNNEL_INFO_IPV6; 1891 info->mode |= IP_TUNNEL_INFO_IPV6;
1786 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 1892 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
1787 sizeof(from->remote_ipv6)); 1893 sizeof(from->remote_ipv6));
1894 info->key.label = cpu_to_be32(from->tunnel_label) &
1895 IPV6_FLOWLABEL_MASK;
1788 } else { 1896 } else {
1789 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 1897 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
1898 if (flags & BPF_F_ZERO_CSUM_TX)
1899 info->key.tun_flags &= ~TUNNEL_CSUM;
1790 } 1900 }
1791 1901
1792 return 0; 1902 return 0;
1793} 1903}
1794 1904
1795const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 1905static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1796 .func = bpf_skb_set_tunnel_key, 1906 .func = bpf_skb_set_tunnel_key,
1797 .gpl_only = false, 1907 .gpl_only = false,
1798 .ret_type = RET_INTEGER, 1908 .ret_type = RET_INTEGER,
@@ -1802,17 +1912,53 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1802 .arg4_type = ARG_ANYTHING, 1912 .arg4_type = ARG_ANYTHING,
1803}; 1913};
1804 1914
1805static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void) 1915static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1916{
1917 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1918 u8 *from = (u8 *) (long) r2;
1919 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1920 const struct metadata_dst *md = this_cpu_ptr(md_dst);
1921
1922 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
1923 return -EINVAL;
1924 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
1925 return -ENOMEM;
1926
1927 ip_tunnel_info_opts_set(info, from, size);
1928
1929 return 0;
1930}
1931
1932static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
1933 .func = bpf_skb_set_tunnel_opt,
1934 .gpl_only = false,
1935 .ret_type = RET_INTEGER,
1936 .arg1_type = ARG_PTR_TO_CTX,
1937 .arg2_type = ARG_PTR_TO_STACK,
1938 .arg3_type = ARG_CONST_STACK_SIZE,
1939};
1940
1941static const struct bpf_func_proto *
1942bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
1806{ 1943{
1807 if (!md_dst) { 1944 if (!md_dst) {
1808 /* race is not possible, since it's called from 1945 /* Race is not possible, since it's called from verifier
1809 * verifier that is holding verifier mutex 1946 * that is holding verifier mutex.
1810 */ 1947 */
1811 md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL); 1948 md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
1949 GFP_KERNEL);
1812 if (!md_dst) 1950 if (!md_dst)
1813 return NULL; 1951 return NULL;
1814 } 1952 }
1815 return &bpf_skb_set_tunnel_key_proto; 1953
1954 switch (which) {
1955 case BPF_FUNC_skb_set_tunnel_key:
1956 return &bpf_skb_set_tunnel_key_proto;
1957 case BPF_FUNC_skb_set_tunnel_opt:
1958 return &bpf_skb_set_tunnel_opt_proto;
1959 default:
1960 return NULL;
1961 }
1816} 1962}
1817 1963
1818static const struct bpf_func_proto * 1964static const struct bpf_func_proto *
@@ -1849,6 +1995,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1849 return &bpf_skb_store_bytes_proto; 1995 return &bpf_skb_store_bytes_proto;
1850 case BPF_FUNC_skb_load_bytes: 1996 case BPF_FUNC_skb_load_bytes:
1851 return &bpf_skb_load_bytes_proto; 1997 return &bpf_skb_load_bytes_proto;
1998 case BPF_FUNC_csum_diff:
1999 return &bpf_csum_diff_proto;
1852 case BPF_FUNC_l3_csum_replace: 2000 case BPF_FUNC_l3_csum_replace:
1853 return &bpf_l3_csum_replace_proto; 2001 return &bpf_l3_csum_replace_proto;
1854 case BPF_FUNC_l4_csum_replace: 2002 case BPF_FUNC_l4_csum_replace:
@@ -1864,7 +2012,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1864 case BPF_FUNC_skb_get_tunnel_key: 2012 case BPF_FUNC_skb_get_tunnel_key:
1865 return &bpf_skb_get_tunnel_key_proto; 2013 return &bpf_skb_get_tunnel_key_proto;
1866 case BPF_FUNC_skb_set_tunnel_key: 2014 case BPF_FUNC_skb_set_tunnel_key:
1867 return bpf_get_skb_set_tunnel_key_proto(); 2015 return bpf_get_skb_set_tunnel_proto(func_id);
2016 case BPF_FUNC_skb_get_tunnel_opt:
2017 return &bpf_skb_get_tunnel_opt_proto;
2018 case BPF_FUNC_skb_set_tunnel_opt:
2019 return bpf_get_skb_set_tunnel_proto(func_id);
1868 case BPF_FUNC_redirect: 2020 case BPF_FUNC_redirect:
1869 return &bpf_redirect_proto; 2021 return &bpf_redirect_proto;
1870 case BPF_FUNC_get_route_realm: 2022 case BPF_FUNC_get_route_realm:
@@ -1913,16 +2065,14 @@ static bool sk_filter_is_valid_access(int off, int size,
1913static bool tc_cls_act_is_valid_access(int off, int size, 2065static bool tc_cls_act_is_valid_access(int off, int size,
1914 enum bpf_access_type type) 2066 enum bpf_access_type type)
1915{ 2067{
1916 if (off == offsetof(struct __sk_buff, tc_classid))
1917 return type == BPF_WRITE ? true : false;
1918
1919 if (type == BPF_WRITE) { 2068 if (type == BPF_WRITE) {
1920 switch (off) { 2069 switch (off) {
1921 case offsetof(struct __sk_buff, mark): 2070 case offsetof(struct __sk_buff, mark):
1922 case offsetof(struct __sk_buff, tc_index): 2071 case offsetof(struct __sk_buff, tc_index):
1923 case offsetof(struct __sk_buff, priority): 2072 case offsetof(struct __sk_buff, priority):
1924 case offsetof(struct __sk_buff, cb[0]) ... 2073 case offsetof(struct __sk_buff, cb[0]) ...
1925 offsetof(struct __sk_buff, cb[4]): 2074 offsetof(struct __sk_buff, cb[4]):
2075 case offsetof(struct __sk_buff, tc_classid):
1926 break; 2076 break;
1927 default: 2077 default:
1928 return false; 2078 return false;
@@ -2039,8 +2189,10 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2039 ctx_off -= offsetof(struct __sk_buff, tc_classid); 2189 ctx_off -= offsetof(struct __sk_buff, tc_classid);
2040 ctx_off += offsetof(struct sk_buff, cb); 2190 ctx_off += offsetof(struct sk_buff, cb);
2041 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); 2191 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
2042 WARN_ON(type != BPF_WRITE); 2192 if (type == BPF_WRITE)
2043 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); 2193 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2194 else
2195 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2044 break; 2196 break;
2045 2197
2046 case offsetof(struct __sk_buff, tc_index): 2198 case offsetof(struct __sk_buff, tc_index):
@@ -2103,7 +2255,7 @@ static int __init register_sk_filter_ops(void)
2103} 2255}
2104late_initcall(register_sk_filter_ops); 2256late_initcall(register_sk_filter_ops);
2105 2257
2106int sk_detach_filter(struct sock *sk) 2258int __sk_detach_filter(struct sock *sk, bool locked)
2107{ 2259{
2108 int ret = -ENOENT; 2260 int ret = -ENOENT;
2109 struct sk_filter *filter; 2261 struct sk_filter *filter;
@@ -2111,8 +2263,7 @@ int sk_detach_filter(struct sock *sk)
2111 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 2263 if (sock_flag(sk, SOCK_FILTER_LOCKED))
2112 return -EPERM; 2264 return -EPERM;
2113 2265
2114 filter = rcu_dereference_protected(sk->sk_filter, 2266 filter = rcu_dereference_protected(sk->sk_filter, locked);
2115 sock_owned_by_user(sk));
2116 if (filter) { 2267 if (filter) {
2117 RCU_INIT_POINTER(sk->sk_filter, NULL); 2268 RCU_INIT_POINTER(sk->sk_filter, NULL);
2118 sk_filter_uncharge(sk, filter); 2269 sk_filter_uncharge(sk, filter);
@@ -2121,7 +2272,12 @@ int sk_detach_filter(struct sock *sk)
2121 2272
2122 return ret; 2273 return ret;
2123} 2274}
2124EXPORT_SYMBOL_GPL(sk_detach_filter); 2275EXPORT_SYMBOL_GPL(__sk_detach_filter);
2276
2277int sk_detach_filter(struct sock *sk)
2278{
2279 return __sk_detach_filter(sk, sock_owned_by_user(sk));
2280}
2125 2281
2126int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 2282int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
2127 unsigned int len) 2283 unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 12e700332010..a669dea146c6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
19#include <net/flow_dissector.h> 19#include <net/flow_dissector.h>
20#include <scsi/fc/fc_fcoe.h> 20#include <scsi/fc/fc_fcoe.h>
21 21
22static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
23 enum flow_dissector_key_id key_id)
24{
25 return flow_dissector->used_keys & (1 << key_id);
26}
27
28static void dissector_set_key(struct flow_dissector *flow_dissector, 22static void dissector_set_key(struct flow_dissector *flow_dissector,
29 enum flow_dissector_key_id key_id) 23 enum flow_dissector_key_id key_id)
30{ 24{
31 flow_dissector->used_keys |= (1 << key_id); 25 flow_dissector->used_keys |= (1 << key_id);
32} 26}
33 27
34static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
35 enum flow_dissector_key_id key_id,
36 void *target_container)
37{
38 return ((char *) target_container) + flow_dissector->offset[key_id];
39}
40
41void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 28void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
42 const struct flow_dissector_key *key, 29 const struct flow_dissector_key *key,
43 unsigned int key_count) 30 unsigned int key_count)
@@ -178,15 +165,16 @@ ip:
178 165
179 ip_proto = iph->protocol; 166 ip_proto = iph->protocol;
180 167
181 if (!dissector_uses_key(flow_dissector, 168 if (dissector_uses_key(flow_dissector,
182 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) 169 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
183 break; 170 key_addrs = skb_flow_dissector_target(flow_dissector,
171 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
172 target_container);
184 173
185 key_addrs = skb_flow_dissector_target(flow_dissector, 174 memcpy(&key_addrs->v4addrs, &iph->saddr,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); 175 sizeof(key_addrs->v4addrs));
187 memcpy(&key_addrs->v4addrs, &iph->saddr, 176 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
188 sizeof(key_addrs->v4addrs)); 177 }
189 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
190 178
191 if (ip_is_fragment(iph)) { 179 if (ip_is_fragment(iph)) {
192 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 180 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -219,13 +207,12 @@ ipv6:
219 207
220 if (dissector_uses_key(flow_dissector, 208 if (dissector_uses_key(flow_dissector,
221 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 209 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
222 struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; 210 key_addrs = skb_flow_dissector_target(flow_dissector,
223 211 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
224 key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, 212 target_container);
225 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
226 target_container);
227 213
228 memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); 214 memcpy(&key_addrs->v6addrs, &iph->saddr,
215 sizeof(key_addrs->v6addrs));
229 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 216 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
230 } 217 }
231 218
@@ -339,8 +326,11 @@ mpls:
339 } 326 }
340 327
341 case htons(ETH_P_FCOE): 328 case htons(ETH_P_FCOE):
342 key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); 329 if ((hlen - nhoff) < FCOE_HEADER_LEN)
343 /* fall through */ 330 goto out_bad;
331
332 nhoff += FCOE_HEADER_LEN;
333 goto out_good;
344 default: 334 default:
345 goto out_bad; 335 goto out_bad;
346 } 336 }
@@ -447,13 +437,12 @@ ip_proto_again:
447 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 437 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
448 438
449 nhoff += sizeof(_fh); 439 nhoff += sizeof(_fh);
440 ip_proto = fh->nexthdr;
450 441
451 if (!(fh->frag_off & htons(IP6_OFFSET))) { 442 if (!(fh->frag_off & htons(IP6_OFFSET))) {
452 key_control->flags |= FLOW_DIS_FIRST_FRAG; 443 key_control->flags |= FLOW_DIS_FIRST_FRAG;
453 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 444 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
454 ip_proto = fh->nexthdr;
455 goto ip_proto_again; 445 goto ip_proto_again;
456 }
457 } 446 }
458 goto out_good; 447 goto out_good;
459 } 448 }
@@ -740,6 +729,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
740{ 729{
741 u32 poff = keys->control.thoff; 730 u32 poff = keys->control.thoff;
742 731
732 /* skip L4 headers for fragments after the first */
733 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
734 !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
735 return poff;
736
743 switch (keys->basic.ip_proto) { 737 switch (keys->basic.ip_proto) {
744 case IPPROTO_TCP: { 738 case IPPROTO_TCP: {
745 /* access doff as u8 to avoid unaligned access */ 739 /* access doff as u8 to avoid unaligned access */
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 92d886f4adcb..4573d81093fe 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
191/** 191/**
192 * gen_new_estimator - create a new rate estimator 192 * gen_new_estimator - create a new rate estimator
193 * @bstats: basic statistics 193 * @bstats: basic statistics
194 * @cpu_bstats: bstats per cpu
194 * @rate_est: rate estimator statistics 195 * @rate_est: rate estimator statistics
195 * @stats_lock: statistics lock 196 * @stats_lock: statistics lock
196 * @opt: rate estimator configuration TLV 197 * @opt: rate estimator configuration TLV
@@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
287/** 288/**
288 * gen_replace_estimator - replace rate estimator configuration 289 * gen_replace_estimator - replace rate estimator configuration
289 * @bstats: basic statistics 290 * @bstats: basic statistics
291 * @cpu_bstats: bstats per cpu
290 * @rate_est: rate estimator statistics 292 * @rate_est: rate estimator statistics
291 * @stats_lock: statistics lock 293 * @stats_lock: statistics lock
292 * @opt: rate estimator configuration TLV 294 * @opt: rate estimator configuration TLV
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1e2f46a69d50..e640462ea8bf 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
140/** 140/**
141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
142 * @d: dumping handle 142 * @d: dumping handle
143 * @cpu: copy statistic per cpu
143 * @b: basic statistics 144 * @b: basic statistics
144 * 145 *
145 * Appends the basic statistics to the top level TLV created by 146 * Appends the basic statistics to the top level TLV created by
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
new file mode 100644
index 000000000000..941c28486896
--- /dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
1/* Support for hardware buffer manager.
2 *
3 * Copyright (C) 2016 Marvell
4 *
5 * Gregory CLEMENT <gregory.clement@free-electrons.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/printk.h>
14#include <linux/skbuff.h>
15#include <net/hwbm.h>
16
17void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
18{
19 if (likely(bm_pool->frag_size <= PAGE_SIZE))
20 skb_free_frag(buf);
21 else
22 kfree(buf);
23}
24EXPORT_SYMBOL_GPL(hwbm_buf_free);
25
26/* Refill processing for HW buffer management */
27int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
28{
29 int frag_size = bm_pool->frag_size;
30 void *buf;
31
32 if (likely(frag_size <= PAGE_SIZE))
33 buf = netdev_alloc_frag(frag_size);
34 else
35 buf = kmalloc(frag_size, gfp);
36
37 if (!buf)
38 return -ENOMEM;
39
40 if (bm_pool->construct)
41 if (bm_pool->construct(bm_pool, buf)) {
42 hwbm_buf_free(bm_pool, buf);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48EXPORT_SYMBOL_GPL(hwbm_pool_refill);
49
50int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
51{
52 int err, i;
53 unsigned long flags;
54
55 spin_lock_irqsave(&bm_pool->lock, flags);
56 if (bm_pool->buf_num == bm_pool->size) {
57 pr_warn("pool already filled\n");
58 return bm_pool->buf_num;
59 }
60
61 if (buf_num + bm_pool->buf_num > bm_pool->size) {
62 pr_warn("cannot allocate %d buffers for pool\n",
63 buf_num);
64 return 0;
65 }
66
67 if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
68 pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
69 buf_num, bm_pool->buf_num);
70 return 0;
71 }
72
73 for (i = 0; i < buf_num; i++) {
74 err = hwbm_pool_refill(bm_pool, gfp);
75 if (err < 0)
76 break;
77 }
78
79 /* Update BM driver with number of buffers added to pool */
80 bm_pool->buf_num += i;
81
82 pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
83 spin_unlock_irqrestore(&bm_pool->lock, flags);
84
85 return i;
86}
87EXPORT_SYMBOL_GPL(hwbm_pool_add);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 299cfc24d888..669ecc9f884e 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -27,6 +27,31 @@
27#include <net/rtnetlink.h> 27#include <net/rtnetlink.h>
28#include <net/ip6_fib.h> 28#include <net/ip6_fib.h>
29 29
30#ifdef CONFIG_MODULES
31
32static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
33{
34 /* Only lwt encaps implemented without using an interface for
35 * the encap need to return a string here.
36 */
37 switch (encap_type) {
38 case LWTUNNEL_ENCAP_MPLS:
39 return "MPLS";
40 case LWTUNNEL_ENCAP_ILA:
41 return "ILA";
42 case LWTUNNEL_ENCAP_IP6:
43 case LWTUNNEL_ENCAP_IP:
44 case LWTUNNEL_ENCAP_NONE:
45 case __LWTUNNEL_ENCAP_MAX:
46 /* should not have got here */
47 WARN_ON(1);
48 break;
49 }
50 return NULL;
51}
52
53#endif /* CONFIG_MODULES */
54
30struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 55struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
31{ 56{
32 struct lwtunnel_state *lws; 57 struct lwtunnel_state *lws;
@@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
85 ret = -EOPNOTSUPP; 110 ret = -EOPNOTSUPP;
86 rcu_read_lock(); 111 rcu_read_lock();
87 ops = rcu_dereference(lwtun_encaps[encap_type]); 112 ops = rcu_dereference(lwtun_encaps[encap_type]);
113#ifdef CONFIG_MODULES
114 if (!ops) {
115 const char *encap_type_str = lwtunnel_encap_str(encap_type);
116
117 if (encap_type_str) {
118 rcu_read_unlock();
119 request_module("rtnl-lwt-%s", encap_type_str);
120 rcu_read_lock();
121 ops = rcu_dereference(lwtun_encaps[encap_type]);
122 }
123 }
124#endif
88 if (likely(ops && ops->build_state)) 125 if (likely(ops && ops->build_state))
89 ret = ops->build_state(dev, encap, family, cfg, lws); 126 ret = ops->build_state(dev, encap, family, cfg, lws);
90 rcu_read_unlock(); 127 rcu_read_unlock();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b6c8a6629b39..2b3f76fe65f4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,7 +29,6 @@
29 29
30#ifdef CONFIG_SYSFS 30#ifdef CONFIG_SYSFS
31static const char fmt_hex[] = "%#x\n"; 31static const char fmt_hex[] = "%#x\n";
32static const char fmt_long_hex[] = "%#lx\n";
33static const char fmt_dec[] = "%d\n"; 32static const char fmt_dec[] = "%d\n";
34static const char fmt_ulong[] = "%lu\n"; 33static const char fmt_ulong[] = "%lu\n";
35static const char fmt_u64[] = "%llu\n"; 34static const char fmt_u64[] = "%llu\n";
@@ -199,9 +198,10 @@ static ssize_t speed_show(struct device *dev,
199 return restart_syscall(); 198 return restart_syscall();
200 199
201 if (netif_running(netdev)) { 200 if (netif_running(netdev)) {
202 struct ethtool_cmd cmd; 201 struct ethtool_link_ksettings cmd;
203 if (!__ethtool_get_settings(netdev, &cmd)) 202
204 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); 203 if (!__ethtool_get_link_ksettings(netdev, &cmd))
204 ret = sprintf(buf, fmt_dec, cmd.base.speed);
205 } 205 }
206 rtnl_unlock(); 206 rtnl_unlock();
207 return ret; 207 return ret;
@@ -218,10 +218,12 @@ static ssize_t duplex_show(struct device *dev,
218 return restart_syscall(); 218 return restart_syscall();
219 219
220 if (netif_running(netdev)) { 220 if (netif_running(netdev)) {
221 struct ethtool_cmd cmd; 221 struct ethtool_link_ksettings cmd;
222 if (!__ethtool_get_settings(netdev, &cmd)) { 222
223 if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
223 const char *duplex; 224 const char *duplex;
224 switch (cmd.duplex) { 225
226 switch (cmd.base.duplex) {
225 case DUPLEX_HALF: 227 case DUPLEX_HALF:
226 duplex = "half"; 228 duplex = "half";
227 break; 229 break;
@@ -574,6 +576,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors);
574NETSTAT_ENTRY(tx_window_errors); 576NETSTAT_ENTRY(tx_window_errors);
575NETSTAT_ENTRY(rx_compressed); 577NETSTAT_ENTRY(rx_compressed);
576NETSTAT_ENTRY(tx_compressed); 578NETSTAT_ENTRY(tx_compressed);
579NETSTAT_ENTRY(rx_nohandler);
577 580
578static struct attribute *netstat_attrs[] = { 581static struct attribute *netstat_attrs[] = {
579 &dev_attr_rx_packets.attr, 582 &dev_attr_rx_packets.attr,
@@ -599,6 +602,7 @@ static struct attribute *netstat_attrs[] = {
599 &dev_attr_tx_window_errors.attr, 602 &dev_attr_tx_window_errors.attr,
600 &dev_attr_rx_compressed.attr, 603 &dev_attr_rx_compressed.attr,
601 &dev_attr_tx_compressed.attr, 604 &dev_attr_tx_compressed.attr,
605 &dev_attr_rx_nohandler.attr,
602 NULL 606 NULL
603}; 607};
604 608
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0260c84ed83c..11fce17274f6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -9,7 +9,6 @@
9 * Authors: Thomas Graf <tgraf@suug.ch> 9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */ 10 */
11 11
12#include <linux/module.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/cgroup.h> 13#include <linux/cgroup.h>
15#include <linux/fdtable.h> 14#include <linux/fdtable.h>
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f1efbc39ef6b..2ec86fc552df 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,7 +11,6 @@
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include <linux/module.h>
15#include <linux/slab.h> 14#include <linux/slab.h>
16#include <linux/types.h> 15#include <linux/types.h>
17#include <linux/string.h> 16#include <linux/string.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1474cfd2dc1c..20999aa596dd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2856,7 +2856,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2856 *vlan_encapsulated_proto = htons(ETH_P_IP); 2856 *vlan_encapsulated_proto = htons(ETH_P_IP);
2857 } 2857 }
2858 2858
2859 skb_set_mac_header(skb, 0); 2859 skb_reset_mac_header(skb);
2860 skb_set_network_header(skb, skb->len); 2860 skb_set_network_header(skb, skb->len);
2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); 2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
2862 2862
@@ -2983,7 +2983,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6); 2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2984 } 2984 }
2985 2985
2986 skb_set_mac_header(skb, 0); 2986 skb_reset_mac_header(skb);
2987 skb_set_network_header(skb, skb->len); 2987 skb_set_network_header(skb, skb->len);
2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); 2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
2989 2989
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e854f916..65763c29f845 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
804 804
805 a->rx_compressed = b->rx_compressed; 805 a->rx_compressed = b->rx_compressed;
806 a->tx_compressed = b->tx_compressed; 806 a->tx_compressed = b->tx_compressed;
807
808 a->rx_nohandler = b->rx_nohandler;
807} 809}
808 810
809static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 811static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
@@ -893,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
893 + nla_total_size(4) /* IFLA_PROMISCUITY */ 895 + nla_total_size(4) /* IFLA_PROMISCUITY */
894 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ 896 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
895 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ 897 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
898 + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */
899 + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */
896 + nla_total_size(1) /* IFLA_OPERSTATE */ 900 + nla_total_size(1) /* IFLA_OPERSTATE */
897 + nla_total_size(1) /* IFLA_LINKMODE */ 901 + nla_total_size(1) /* IFLA_LINKMODE */
898 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ 902 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -905,6 +909,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
905 + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ 909 + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
906 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ 910 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
907 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ 911 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
912 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
908 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 913 + nla_total_size(1); /* IFLA_PROTO_DOWN */
909 914
910} 915}
@@ -1175,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
1175 1180
1176static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) 1181static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1177{ 1182{
1178 struct rtnl_link_ifmap map = { 1183 struct rtnl_link_ifmap map;
1179 .mem_start = dev->mem_start, 1184
1180 .mem_end = dev->mem_end, 1185 memset(&map, 0, sizeof(map));
1181 .base_addr = dev->base_addr, 1186 map.mem_start = dev->mem_start;
1182 .irq = dev->irq, 1187 map.mem_end = dev->mem_end;
1183 .dma = dev->dma, 1188 map.base_addr = dev->base_addr;
1184 .port = dev->if_port, 1189 map.irq = dev->irq;
1185 }; 1190 map.dma = dev->dma;
1191 map.port = dev->if_port;
1192
1186 if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) 1193 if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
1187 return -EMSGSIZE; 1194 return -EMSGSIZE;
1188 1195
@@ -1221,6 +1228,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1221 nla_put_u32(skb, IFLA_GROUP, dev->group) || 1228 nla_put_u32(skb, IFLA_GROUP, dev->group) ||
1222 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || 1229 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
1223 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || 1230 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
1231 nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
1232 nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
1224#ifdef CONFIG_RPS 1233#ifdef CONFIG_RPS
1225 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || 1234 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
1226#endif 1235#endif
@@ -1387,15 +1396,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1387 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, 1396 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
1388 [IFLA_VF_STATS] = { .type = NLA_NESTED }, 1397 [IFLA_VF_STATS] = { .type = NLA_NESTED },
1389 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, 1398 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
1390}; 1399 [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1391 1400 [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1392static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
1393 [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
1394 [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
1395 [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
1396 [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
1397 [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
1398 [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
1399}; 1401};
1400 1402
1401static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { 1403static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1412,6 +1414,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
1412 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, 1414 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
1413}; 1415};
1414 1416
1417static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
1418{
1419 const struct rtnl_link_ops *ops = NULL;
1420 struct nlattr *linfo[IFLA_INFO_MAX + 1];
1421
1422 if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
1423 return NULL;
1424
1425 if (linfo[IFLA_INFO_KIND]) {
1426 char kind[MODULE_NAME_LEN];
1427
1428 nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
1429 ops = rtnl_link_ops_get(kind);
1430 }
1431
1432 return ops;
1433}
1434
1435static bool link_master_filtered(struct net_device *dev, int master_idx)
1436{
1437 struct net_device *master;
1438
1439 if (!master_idx)
1440 return false;
1441
1442 master = netdev_master_upper_dev_get(dev);
1443 if (!master || master->ifindex != master_idx)
1444 return true;
1445
1446 return false;
1447}
1448
1449static bool link_kind_filtered(const struct net_device *dev,
1450 const struct rtnl_link_ops *kind_ops)
1451{
1452 if (kind_ops && dev->rtnl_link_ops != kind_ops)
1453 return true;
1454
1455 return false;
1456}
1457
1458static bool link_dump_filtered(struct net_device *dev,
1459 int master_idx,
1460 const struct rtnl_link_ops *kind_ops)
1461{
1462 if (link_master_filtered(dev, master_idx) ||
1463 link_kind_filtered(dev, kind_ops))
1464 return true;
1465
1466 return false;
1467}
1468
1415static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1469static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1416{ 1470{
1417 struct net *net = sock_net(skb->sk); 1471 struct net *net = sock_net(skb->sk);
@@ -1421,6 +1475,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1421 struct hlist_head *head; 1475 struct hlist_head *head;
1422 struct nlattr *tb[IFLA_MAX+1]; 1476 struct nlattr *tb[IFLA_MAX+1];
1423 u32 ext_filter_mask = 0; 1477 u32 ext_filter_mask = 0;
1478 const struct rtnl_link_ops *kind_ops = NULL;
1479 unsigned int flags = NLM_F_MULTI;
1480 int master_idx = 0;
1424 int err; 1481 int err;
1425 int hdrlen; 1482 int hdrlen;
1426 1483
@@ -1443,18 +1500,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1443 1500
1444 if (tb[IFLA_EXT_MASK]) 1501 if (tb[IFLA_EXT_MASK])
1445 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1502 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1503
1504 if (tb[IFLA_MASTER])
1505 master_idx = nla_get_u32(tb[IFLA_MASTER]);
1506
1507 if (tb[IFLA_LINKINFO])
1508 kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
1509
1510 if (master_idx || kind_ops)
1511 flags |= NLM_F_DUMP_FILTERED;
1446 } 1512 }
1447 1513
1448 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1514 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1449 idx = 0; 1515 idx = 0;
1450 head = &net->dev_index_head[h]; 1516 head = &net->dev_index_head[h];
1451 hlist_for_each_entry(dev, head, index_hlist) { 1517 hlist_for_each_entry(dev, head, index_hlist) {
1518 if (link_dump_filtered(dev, master_idx, kind_ops))
1519 continue;
1452 if (idx < s_idx) 1520 if (idx < s_idx)
1453 goto cont; 1521 goto cont;
1454 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1522 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
1455 NETLINK_CB(cb->skb).portid, 1523 NETLINK_CB(cb->skb).portid,
1456 cb->nlh->nlmsg_seq, 0, 1524 cb->nlh->nlmsg_seq, 0,
1457 NLM_F_MULTI, 1525 flags,
1458 ext_filter_mask); 1526 ext_filter_mask);
1459 /* If we ran out of room on the first message, 1527 /* If we ran out of room on the first message,
1460 * we're in trouble 1528 * we're in trouble
@@ -1534,6 +1602,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1534 return 0; 1602 return 0;
1535} 1603}
1536 1604
1605static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
1606 int guid_type)
1607{
1608 const struct net_device_ops *ops = dev->netdev_ops;
1609
1610 return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
1611}
1612
1613static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
1614{
1615 if (dev->type != ARPHRD_INFINIBAND)
1616 return -EOPNOTSUPP;
1617
1618 return handle_infiniband_guid(dev, ivt, guid_type);
1619}
1620
1537static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) 1621static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1538{ 1622{
1539 const struct net_device_ops *ops = dev->netdev_ops; 1623 const struct net_device_ops *ops = dev->netdev_ops;
@@ -1636,6 +1720,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1636 return err; 1720 return err;
1637 } 1721 }
1638 1722
1723 if (tb[IFLA_VF_IB_NODE_GUID]) {
1724 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
1725
1726 if (!ops->ndo_set_vf_guid)
1727 return -EOPNOTSUPP;
1728
1729 return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
1730 }
1731
1732 if (tb[IFLA_VF_IB_PORT_GUID]) {
1733 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
1734
1735 if (!ops->ndo_set_vf_guid)
1736 return -EOPNOTSUPP;
1737
1738 return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
1739 }
1740
1639 return err; 1741 return err;
1640} 1742}
1641 1743
@@ -2911,6 +3013,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
2911 nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); 3013 nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
2912out: 3014out:
2913 netif_addr_unlock_bh(dev); 3015 netif_addr_unlock_bh(dev);
3016 cb->args[1] = err;
2914 return idx; 3017 return idx;
2915} 3018}
2916EXPORT_SYMBOL(ndo_dflt_fdb_dump); 3019EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -2944,6 +3047,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2944 ops = br_dev->netdev_ops; 3047 ops = br_dev->netdev_ops;
2945 } 3048 }
2946 3049
3050 cb->args[1] = 0;
2947 for_each_netdev(net, dev) { 3051 for_each_netdev(net, dev) {
2948 if (brport_idx && (dev->ifindex != brport_idx)) 3052 if (brport_idx && (dev->ifindex != brport_idx))
2949 continue; 3053 continue;
@@ -2971,12 +3075,16 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2971 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, 3075 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
2972 idx); 3076 idx);
2973 } 3077 }
3078 if (cb->args[1] == -EMSGSIZE)
3079 break;
2974 3080
2975 if (dev->netdev_ops->ndo_fdb_dump) 3081 if (dev->netdev_ops->ndo_fdb_dump)
2976 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, 3082 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
2977 idx); 3083 idx);
2978 else 3084 else
2979 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); 3085 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
3086 if (cb->args[1] == -EMSGSIZE)
3087 break;
2980 3088
2981 cops = NULL; 3089 cops = NULL;
2982 } 3090 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5bf88f58bee7..e561f9f07d6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -349,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
349} 349}
350EXPORT_SYMBOL(build_skb); 350EXPORT_SYMBOL(build_skb);
351 351
352#define NAPI_SKB_CACHE_SIZE 64
353
354struct napi_alloc_cache {
355 struct page_frag_cache page;
356 size_t skb_count;
357 void *skb_cache[NAPI_SKB_CACHE_SIZE];
358};
359
352static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); 360static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
353static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); 361static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
354 362
355static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 363static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
356{ 364{
@@ -380,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
380 388
381static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 389static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
382{ 390{
383 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 391 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
384 392
385 return __alloc_page_frag(nc, fragsz, gfp_mask); 393 return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
386} 394}
387 395
388void *napi_alloc_frag(unsigned int fragsz) 396void *napi_alloc_frag(unsigned int fragsz)
@@ -476,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
476struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, 484struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
477 gfp_t gfp_mask) 485 gfp_t gfp_mask)
478{ 486{
479 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 487 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
480 struct sk_buff *skb; 488 struct sk_buff *skb;
481 void *data; 489 void *data;
482 490
@@ -496,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
496 if (sk_memalloc_socks()) 504 if (sk_memalloc_socks())
497 gfp_mask |= __GFP_MEMALLOC; 505 gfp_mask |= __GFP_MEMALLOC;
498 506
499 data = __alloc_page_frag(nc, len, gfp_mask); 507 data = __alloc_page_frag(&nc->page, len, gfp_mask);
500 if (unlikely(!data)) 508 if (unlikely(!data))
501 return NULL; 509 return NULL;
502 510
@@ -507,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
507 } 515 }
508 516
509 /* use OR instead of assignment to avoid clearing of bits in mask */ 517 /* use OR instead of assignment to avoid clearing of bits in mask */
510 if (nc->pfmemalloc) 518 if (nc->page.pfmemalloc)
511 skb->pfmemalloc = 1; 519 skb->pfmemalloc = 1;
512 skb->head_frag = 1; 520 skb->head_frag = 1;
513 521
@@ -749,6 +757,73 @@ void consume_skb(struct sk_buff *skb)
749} 757}
750EXPORT_SYMBOL(consume_skb); 758EXPORT_SYMBOL(consume_skb);
751 759
760void __kfree_skb_flush(void)
761{
762 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
763
764 /* flush skb_cache if containing objects */
765 if (nc->skb_count) {
766 kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
767 nc->skb_cache);
768 nc->skb_count = 0;
769 }
770}
771
772static inline void _kfree_skb_defer(struct sk_buff *skb)
773{
774 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
775
776 /* drop skb->head and call any destructors for packet */
777 skb_release_all(skb);
778
779 /* record skb to CPU local list */
780 nc->skb_cache[nc->skb_count++] = skb;
781
782#ifdef CONFIG_SLUB
783 /* SLUB writes into objects when freeing */
784 prefetchw(skb);
785#endif
786
787 /* flush skb_cache if it is filled */
788 if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
789 kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
790 nc->skb_cache);
791 nc->skb_count = 0;
792 }
793}
794void __kfree_skb_defer(struct sk_buff *skb)
795{
796 _kfree_skb_defer(skb);
797}
798
799void napi_consume_skb(struct sk_buff *skb, int budget)
800{
801 if (unlikely(!skb))
802 return;
803
804 /* Zero budget indicate non-NAPI context called us, like netpoll */
805 if (unlikely(!budget)) {
806 dev_consume_skb_any(skb);
807 return;
808 }
809
810 if (likely(atomic_read(&skb->users) == 1))
811 smp_rmb();
812 else if (likely(!atomic_dec_and_test(&skb->users)))
813 return;
814 /* if reaching here SKB is ready to free */
815 trace_consume_skb(skb);
816
817 /* if SKB is a clone, don't handle this case */
818 if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
819 __kfree_skb(skb);
820 return;
821 }
822
823 _kfree_skb_defer(skb);
824}
825EXPORT_SYMBOL(napi_consume_skb);
826
752/* Make sure a field is enclosed inside headers_start/headers_end section */ 827/* Make sure a field is enclosed inside headers_start/headers_end section */
753#define CHECK_SKB_FIELD(field) \ 828#define CHECK_SKB_FIELD(field) \
754 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ 829 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
@@ -1843,6 +1918,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1843 struct splice_pipe_desc *spd, struct sock *sk) 1918 struct splice_pipe_desc *spd, struct sock *sk)
1844{ 1919{
1845 int seg; 1920 int seg;
1921 struct sk_buff *iter;
1846 1922
1847 /* map the linear part : 1923 /* map the linear part :
1848 * If skb->head_frag is set, this 'linear' part is backed by a 1924 * If skb->head_frag is set, this 'linear' part is backed by a
@@ -1869,6 +1945,19 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1869 return true; 1945 return true;
1870 } 1946 }
1871 1947
1948 skb_walk_frags(skb, iter) {
1949 if (*offset >= iter->len) {
1950 *offset -= iter->len;
1951 continue;
1952 }
1953 /* __skb_splice_bits() only fails if the output has no room
1954 * left, so no point in going over the frag_list for the error
1955 * case.
1956 */
1957 if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
1958 return true;
1959 }
1960
1872 return false; 1961 return false;
1873} 1962}
1874 1963
@@ -1895,9 +1984,7 @@ ssize_t skb_socket_splice(struct sock *sk,
1895 1984
1896/* 1985/*
1897 * Map data from the skb to a pipe. Should handle both the linear part, 1986 * Map data from the skb to a pipe. Should handle both the linear part,
1898 * the fragments, and the frag list. It does NOT handle frag lists within 1987 * the fragments, and the frag list.
1899 * the frag list, if such a thing exists. We'd probably need to recurse to
1900 * handle that cleanly.
1901 */ 1988 */
1902int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, 1989int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1903 struct pipe_inode_info *pipe, unsigned int tlen, 1990 struct pipe_inode_info *pipe, unsigned int tlen,
@@ -1916,29 +2003,10 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1916 .ops = &nosteal_pipe_buf_ops, 2003 .ops = &nosteal_pipe_buf_ops,
1917 .spd_release = sock_spd_release, 2004 .spd_release = sock_spd_release,
1918 }; 2005 };
1919 struct sk_buff *frag_iter;
1920 int ret = 0; 2006 int ret = 0;
1921 2007
1922 /* 2008 __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
1923 * __skb_splice_bits() only fails if the output has no room left,
1924 * so no point in going over the frag_list for the error case.
1925 */
1926 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1927 goto done;
1928 else if (!tlen)
1929 goto done;
1930
1931 /*
1932 * now see if we have a frag_list to map
1933 */
1934 skb_walk_frags(skb, frag_iter) {
1935 if (!tlen)
1936 break;
1937 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1938 break;
1939 }
1940 2009
1941done:
1942 if (spd.nr_pages) 2010 if (spd.nr_pages)
1943 ret = splice_cb(sk, pipe, &spd); 2011 ret = splice_cb(sk, pipe, &spd);
1944 2012
@@ -2948,6 +3016,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
2948EXPORT_SYMBOL_GPL(skb_append_pagefrags); 3016EXPORT_SYMBOL_GPL(skb_append_pagefrags);
2949 3017
2950/** 3018/**
3019 * skb_push_rcsum - push skb and update receive checksum
3020 * @skb: buffer to update
3021 * @len: length of data pulled
3022 *
3023 * This function performs an skb_push on the packet and updates
3024 * the CHECKSUM_COMPLETE checksum. It should be used on
3025 * receive path processing instead of skb_push unless you know
3026 * that the checksum difference is zero (e.g., a valid IP header)
3027 * or you are setting ip_summed to CHECKSUM_NONE.
3028 */
3029static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
3030{
3031 skb_push(skb, len);
3032 skb_postpush_rcsum(skb, skb->data, len);
3033 return skb->data;
3034}
3035
3036/**
2951 * skb_pull_rcsum - pull skb and update receive checksum 3037 * skb_pull_rcsum - pull skb and update receive checksum
2952 * @skb: buffer to update 3038 * @skb: buffer to update
2953 * @len: length of data pulled 3039 * @len: length of data pulled
@@ -3006,8 +3092,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3006 if (unlikely(!proto)) 3092 if (unlikely(!proto))
3007 return ERR_PTR(-EINVAL); 3093 return ERR_PTR(-EINVAL);
3008 3094
3009 csum = !head_skb->encap_hdr_csum && 3095 csum = !!can_checksum_protocol(features, proto);
3010 !!can_checksum_protocol(features, proto);
3011 3096
3012 headroom = skb_headroom(head_skb); 3097 headroom = skb_headroom(head_skb);
3013 pos = skb_headlen(head_skb); 3098 pos = skb_headlen(head_skb);
@@ -3100,13 +3185,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3100 if (nskb->len == len + doffset) 3185 if (nskb->len == len + doffset)
3101 goto perform_csum_check; 3186 goto perform_csum_check;
3102 3187
3103 if (!sg && !nskb->remcsum_offload) { 3188 if (!sg) {
3104 nskb->ip_summed = CHECKSUM_NONE; 3189 if (!nskb->remcsum_offload)
3105 nskb->csum = skb_copy_and_csum_bits(head_skb, offset, 3190 nskb->ip_summed = CHECKSUM_NONE;
3106 skb_put(nskb, len), 3191 SKB_GSO_CB(nskb)->csum =
3107 len, 0); 3192 skb_copy_and_csum_bits(head_skb, offset,
3193 skb_put(nskb, len),
3194 len, 0);
3108 SKB_GSO_CB(nskb)->csum_start = 3195 SKB_GSO_CB(nskb)->csum_start =
3109 skb_headroom(nskb) + doffset; 3196 skb_headroom(nskb) + doffset;
3110 continue; 3197 continue;
3111 } 3198 }
3112 3199
@@ -3172,12 +3259,19 @@ skip_fraglist:
3172 nskb->truesize += nskb->data_len; 3259 nskb->truesize += nskb->data_len;
3173 3260
3174perform_csum_check: 3261perform_csum_check:
3175 if (!csum && !nskb->remcsum_offload) { 3262 if (!csum) {
3176 nskb->csum = skb_checksum(nskb, doffset, 3263 if (skb_has_shared_frag(nskb)) {
3177 nskb->len - doffset, 0); 3264 err = __skb_linearize(nskb);
3178 nskb->ip_summed = CHECKSUM_NONE; 3265 if (err)
3266 goto err;
3267 }
3268 if (!nskb->remcsum_offload)
3269 nskb->ip_summed = CHECKSUM_NONE;
3270 SKB_GSO_CB(nskb)->csum =
3271 skb_checksum(nskb, doffset,
3272 nskb->len - doffset, 0);
3179 SKB_GSO_CB(nskb)->csum_start = 3273 SKB_GSO_CB(nskb)->csum_start =
3180 skb_headroom(nskb) + doffset; 3274 skb_headroom(nskb) + doffset;
3181 } 3275 }
3182 } while ((offset += len) < head_skb->len); 3276 } while ((offset += len) < head_skb->len);
3183 3277
@@ -4084,9 +4178,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
4084 if (!pskb_may_pull(skb_chk, offset)) 4178 if (!pskb_may_pull(skb_chk, offset))
4085 goto err; 4179 goto err;
4086 4180
4087 __skb_pull(skb_chk, offset); 4181 skb_pull_rcsum(skb_chk, offset);
4088 ret = skb_chkf(skb_chk); 4182 ret = skb_chkf(skb_chk);
4089 __skb_push(skb_chk, offset); 4183 skb_push_rcsum(skb_chk, offset);
4090 4184
4091 if (ret) 4185 if (ret)
4092 goto err; 4186 goto err;
@@ -4219,7 +4313,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4219 skb->skb_iif = 0; 4313 skb->skb_iif = 0;
4220 skb->ignore_df = 0; 4314 skb->ignore_df = 0;
4221 skb_dst_drop(skb); 4315 skb_dst_drop(skb);
4222 skb_sender_cpu_clear(skb);
4223 secpath_reset(skb); 4316 secpath_reset(skb);
4224 nf_reset(skb); 4317 nf_reset(skb);
4225 nf_reset_trace(skb); 4318 nf_reset_trace(skb);
@@ -4409,15 +4502,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4409 __skb_push(skb, offset); 4502 __skb_push(skb, offset);
4410 err = __vlan_insert_tag(skb, skb->vlan_proto, 4503 err = __vlan_insert_tag(skb, skb->vlan_proto,
4411 skb_vlan_tag_get(skb)); 4504 skb_vlan_tag_get(skb));
4412 if (err) 4505 if (err) {
4506 __skb_pull(skb, offset);
4413 return err; 4507 return err;
4508 }
4509
4414 skb->protocol = skb->vlan_proto; 4510 skb->protocol = skb->vlan_proto;
4415 skb->mac_len += VLAN_HLEN; 4511 skb->mac_len += VLAN_HLEN;
4416 __skb_pull(skb, offset);
4417 4512
4418 if (skb->ip_summed == CHECKSUM_COMPLETE) 4513 skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
4419 skb->csum = csum_add(skb->csum, csum_partial(skb->data 4514 __skb_pull(skb, offset);
4420 + (2 * ETH_ALEN), VLAN_HLEN, 0));
4421 } 4515 }
4422 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); 4516 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
4423 return 0; 4517 return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c1c8bc93412..7e73c26b6bb4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -221,7 +221,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" 224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
225 "sk_lock-AF_MAX"
225}; 226};
226static const char *const af_family_slock_key_strings[AF_MAX+1] = { 227static const char *const af_family_slock_key_strings[AF_MAX+1] = {
227 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 228 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -237,7 +238,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
237 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 238 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
238 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 239 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
239 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 240 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
240 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" 241 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
242 "slock-AF_MAX"
241}; 243};
242static const char *const af_family_clock_key_strings[AF_MAX+1] = { 244static const char *const af_family_clock_key_strings[AF_MAX+1] = {
243 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 245 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -253,7 +255,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
253 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 255 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
254 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 256 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
255 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 257 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
256 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" 258 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
259 "clock-AF_MAX"
257}; 260};
258 261
259/* 262/*
@@ -987,6 +990,10 @@ set_rcvbuf:
987 sk->sk_incoming_cpu = val; 990 sk->sk_incoming_cpu = val;
988 break; 991 break;
989 992
993 case SO_CNX_ADVICE:
994 if (val == 1)
995 dst_negative_advice(sk);
996 break;
990 default: 997 default:
991 ret = -ENOPROTOOPT; 998 ret = -ENOPROTOOPT;
992 break; 999 break;
@@ -1531,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1531 newsk = NULL; 1538 newsk = NULL;
1532 goto out; 1539 goto out;
1533 } 1540 }
1541 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1534 1542
1535 newsk->sk_err = 0; 1543 newsk->sk_err = 0;
1536 newsk->sk_priority = 0; 1544 newsk->sk_priority = 0;
@@ -1903,7 +1911,7 @@ EXPORT_SYMBOL(sock_cmsg_send);
1903bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) 1911bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1904{ 1912{
1905 if (pfrag->page) { 1913 if (pfrag->page) {
1906 if (atomic_read(&pfrag->page->_count) == 1) { 1914 if (page_ref_count(pfrag->page) == 1) {
1907 pfrag->offset = 0; 1915 pfrag->offset = 0;
1908 return true; 1916 return true;
1909 } 1917 }