diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/dev.c | 48 | ||||
-rw-r--r-- | net/core/devlink.c | 738 | ||||
-rw-r--r-- | net/core/dst.c | 10 | ||||
-rw-r--r-- | net/core/dst_cache.c | 168 | ||||
-rw-r--r-- | net/core/ethtool.c | 638 | ||||
-rw-r--r-- | net/core/filter.c | 292 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 58 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 2 | ||||
-rw-r--r-- | net/core/gen_stats.c | 1 | ||||
-rw-r--r-- | net/core/hwbm.c | 87 | ||||
-rw-r--r-- | net/core/lwtunnel.c | 37 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 18 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 1 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 1 | ||||
-rw-r--r-- | net/core/pktgen.c | 4 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 144 | ||||
-rw-r--r-- | net/core/skbuff.c | 194 | ||||
-rw-r--r-- | net/core/sock.c | 16 |
19 files changed, 2247 insertions, 213 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 0b835de04de3..d6508c2ddca5 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -24,3 +24,6 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o | |||
24 | obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o | 24 | obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o |
25 | obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o | 25 | obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o |
26 | obj-$(CONFIG_LWTUNNEL) += lwtunnel.o | 26 | obj-$(CONFIG_LWTUNNEL) += lwtunnel.o |
27 | obj-$(CONFIG_DST_CACHE) += dst_cache.o | ||
28 | obj-$(CONFIG_HWBM) += hwbm.o | ||
29 | obj-$(CONFIG_NET_DEVLINK) += devlink.o | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 8cba3d852f25..5c925ac50b95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, | |||
2802 | 2802 | ||
2803 | if (skb->ip_summed != CHECKSUM_NONE && | 2803 | if (skb->ip_summed != CHECKSUM_NONE && |
2804 | !can_checksum_protocol(features, type)) { | 2804 | !can_checksum_protocol(features, type)) { |
2805 | features &= ~NETIF_F_CSUM_MASK; | 2805 | features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); |
2806 | } else if (illegal_highdma(skb->dev, skb)) { | 2806 | } else if (illegal_highdma(skb->dev, skb)) { |
2807 | features &= ~NETIF_F_SG; | 2807 | features &= ~NETIF_F_SG; |
2808 | } | 2808 | } |
@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h) | |||
3829 | trace_consume_skb(skb); | 3829 | trace_consume_skb(skb); |
3830 | else | 3830 | else |
3831 | trace_kfree_skb(skb, net_tx_action); | 3831 | trace_kfree_skb(skb, net_tx_action); |
3832 | __kfree_skb(skb); | 3832 | |
3833 | if (skb->fclone != SKB_FCLONE_UNAVAILABLE) | ||
3834 | __kfree_skb(skb); | ||
3835 | else | ||
3836 | __kfree_skb_defer(skb); | ||
3833 | } | 3837 | } |
3838 | |||
3839 | __kfree_skb_flush(); | ||
3834 | } | 3840 | } |
3835 | 3841 | ||
3836 | if (sd->output_queue) { | 3842 | if (sd->output_queue) { |
@@ -4154,7 +4160,10 @@ ncls: | |||
4154 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 4160 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
4155 | } else { | 4161 | } else { |
4156 | drop: | 4162 | drop: |
4157 | atomic_long_inc(&skb->dev->rx_dropped); | 4163 | if (!deliver_exact) |
4164 | atomic_long_inc(&skb->dev->rx_dropped); | ||
4165 | else | ||
4166 | atomic_long_inc(&skb->dev->rx_nohandler); | ||
4158 | kfree_skb(skb); | 4167 | kfree_skb(skb); |
4159 | /* Jamal, now you will not able to escape explaining | 4168 | /* Jamal, now you will not able to escape explaining |
4160 | * me how you were going to use this. :-) | 4169 | * me how you were going to use this. :-) |
@@ -4429,7 +4438,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
4429 | NAPI_GRO_CB(skb)->same_flow = 0; | 4438 | NAPI_GRO_CB(skb)->same_flow = 0; |
4430 | NAPI_GRO_CB(skb)->flush = 0; | 4439 | NAPI_GRO_CB(skb)->flush = 0; |
4431 | NAPI_GRO_CB(skb)->free = 0; | 4440 | NAPI_GRO_CB(skb)->free = 0; |
4432 | NAPI_GRO_CB(skb)->udp_mark = 0; | 4441 | NAPI_GRO_CB(skb)->encap_mark = 0; |
4442 | NAPI_GRO_CB(skb)->is_fou = 0; | ||
4433 | NAPI_GRO_CB(skb)->gro_remcsum_start = 0; | 4443 | NAPI_GRO_CB(skb)->gro_remcsum_start = 0; |
4434 | 4444 | ||
4435 | /* Setup for GRO checksum validation */ | 4445 | /* Setup for GRO checksum validation */ |
@@ -5152,6 +5162,7 @@ static void net_rx_action(struct softirq_action *h) | |||
5152 | } | 5162 | } |
5153 | } | 5163 | } |
5154 | 5164 | ||
5165 | __kfree_skb_flush(); | ||
5155 | local_irq_disable(); | 5166 | local_irq_disable(); |
5156 | 5167 | ||
5157 | list_splice_tail_init(&sd->poll_list, &list); | 5168 | list_splice_tail_init(&sd->poll_list, &list); |
@@ -5379,12 +5390,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) | |||
5379 | { | 5390 | { |
5380 | struct netdev_adjacent *lower; | 5391 | struct netdev_adjacent *lower; |
5381 | 5392 | ||
5382 | lower = list_entry((*iter)->next, struct netdev_adjacent, list); | 5393 | lower = list_entry(*iter, struct netdev_adjacent, list); |
5383 | 5394 | ||
5384 | if (&lower->list == &dev->adj_list.lower) | 5395 | if (&lower->list == &dev->adj_list.lower) |
5385 | return NULL; | 5396 | return NULL; |
5386 | 5397 | ||
5387 | *iter = &lower->list; | 5398 | *iter = lower->list.next; |
5388 | 5399 | ||
5389 | return lower->dev; | 5400 | return lower->dev; |
5390 | } | 5401 | } |
@@ -6435,6 +6446,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id); | |||
6435 | * dev_get_phys_port_name - Get device physical port name | 6446 | * dev_get_phys_port_name - Get device physical port name |
6436 | * @dev: device | 6447 | * @dev: device |
6437 | * @name: port name | 6448 | * @name: port name |
6449 | * @len: limit of bytes to copy to name | ||
6438 | * | 6450 | * |
6439 | * Get device physical port name | 6451 | * Get device physical port name |
6440 | */ | 6452 | */ |
@@ -7253,24 +7265,31 @@ void netdev_run_todo(void) | |||
7253 | } | 7265 | } |
7254 | } | 7266 | } |
7255 | 7267 | ||
7256 | /* Convert net_device_stats to rtnl_link_stats64. They have the same | 7268 | /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has |
7257 | * fields in the same order, with only the type differing. | 7269 | * all the same fields in the same order as net_device_stats, with only |
7270 | * the type differing, but rtnl_link_stats64 may have additional fields | ||
7271 | * at the end for newer counters. | ||
7258 | */ | 7272 | */ |
7259 | void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, | 7273 | void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, |
7260 | const struct net_device_stats *netdev_stats) | 7274 | const struct net_device_stats *netdev_stats) |
7261 | { | 7275 | { |
7262 | #if BITS_PER_LONG == 64 | 7276 | #if BITS_PER_LONG == 64 |
7263 | BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); | 7277 | BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); |
7264 | memcpy(stats64, netdev_stats, sizeof(*stats64)); | 7278 | memcpy(stats64, netdev_stats, sizeof(*stats64)); |
7279 | /* zero out counters that only exist in rtnl_link_stats64 */ | ||
7280 | memset((char *)stats64 + sizeof(*netdev_stats), 0, | ||
7281 | sizeof(*stats64) - sizeof(*netdev_stats)); | ||
7265 | #else | 7282 | #else |
7266 | size_t i, n = sizeof(*stats64) / sizeof(u64); | 7283 | size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); |
7267 | const unsigned long *src = (const unsigned long *)netdev_stats; | 7284 | const unsigned long *src = (const unsigned long *)netdev_stats; |
7268 | u64 *dst = (u64 *)stats64; | 7285 | u64 *dst = (u64 *)stats64; |
7269 | 7286 | ||
7270 | BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != | 7287 | BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); |
7271 | sizeof(*stats64) / sizeof(u64)); | ||
7272 | for (i = 0; i < n; i++) | 7288 | for (i = 0; i < n; i++) |
7273 | dst[i] = src[i]; | 7289 | dst[i] = src[i]; |
7290 | /* zero out counters that only exist in rtnl_link_stats64 */ | ||
7291 | memset((char *)stats64 + n * sizeof(u64), 0, | ||
7292 | sizeof(*stats64) - n * sizeof(u64)); | ||
7274 | #endif | 7293 | #endif |
7275 | } | 7294 | } |
7276 | EXPORT_SYMBOL(netdev_stats_to_stats64); | 7295 | EXPORT_SYMBOL(netdev_stats_to_stats64); |
@@ -7300,6 +7319,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | |||
7300 | } | 7319 | } |
7301 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); | 7320 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
7302 | storage->tx_dropped += atomic_long_read(&dev->tx_dropped); | 7321 | storage->tx_dropped += atomic_long_read(&dev->tx_dropped); |
7322 | storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler); | ||
7303 | return storage; | 7323 | return storage; |
7304 | } | 7324 | } |
7305 | EXPORT_SYMBOL(dev_get_stats); | 7325 | EXPORT_SYMBOL(dev_get_stats); |
@@ -7422,8 +7442,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
7422 | dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; | 7442 | dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; |
7423 | setup(dev); | 7443 | setup(dev); |
7424 | 7444 | ||
7425 | if (!dev->tx_queue_len) | 7445 | if (!dev->tx_queue_len) { |
7426 | dev->priv_flags |= IFF_NO_QUEUE; | 7446 | dev->priv_flags |= IFF_NO_QUEUE; |
7447 | dev->tx_queue_len = 1; | ||
7448 | } | ||
7427 | 7449 | ||
7428 | dev->num_tx_queues = txqs; | 7450 | dev->num_tx_queues = txqs; |
7429 | dev->real_num_tx_queues = txqs; | 7451 | dev->real_num_tx_queues = txqs; |
diff --git a/net/core/devlink.c b/net/core/devlink.c new file mode 100644 index 000000000000..590fa561cb7f --- /dev/null +++ b/net/core/devlink.c | |||
@@ -0,0 +1,738 @@ | |||
1 | /* | ||
2 | * net/core/devlink.c - Network physical/parent device Netlink interface | ||
3 | * | ||
4 | * Heavily inspired by net/wireless/ | ||
5 | * Copyright (c) 2016 Mellanox Technologies. All rights reserved. | ||
6 | * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/gfp.h> | ||
19 | #include <linux/device.h> | ||
20 | #include <linux/list.h> | ||
21 | #include <linux/netdevice.h> | ||
22 | #include <rdma/ib_verbs.h> | ||
23 | #include <net/netlink.h> | ||
24 | #include <net/genetlink.h> | ||
25 | #include <net/rtnetlink.h> | ||
26 | #include <net/net_namespace.h> | ||
27 | #include <net/sock.h> | ||
28 | #include <net/devlink.h> | ||
29 | |||
30 | static LIST_HEAD(devlink_list); | ||
31 | |||
32 | /* devlink_mutex | ||
33 | * | ||
34 | * An overall lock guarding every operation coming from userspace. | ||
35 | * It also guards devlink devices list and it is taken when | ||
36 | * driver registers/unregisters it. | ||
37 | */ | ||
38 | static DEFINE_MUTEX(devlink_mutex); | ||
39 | |||
40 | /* devlink_port_mutex | ||
41 | * | ||
42 | * Shared lock to guard lists of ports in all devlink devices. | ||
43 | */ | ||
44 | static DEFINE_MUTEX(devlink_port_mutex); | ||
45 | |||
46 | static struct net *devlink_net(const struct devlink *devlink) | ||
47 | { | ||
48 | return read_pnet(&devlink->_net); | ||
49 | } | ||
50 | |||
51 | static void devlink_net_set(struct devlink *devlink, struct net *net) | ||
52 | { | ||
53 | write_pnet(&devlink->_net, net); | ||
54 | } | ||
55 | |||
56 | static struct devlink *devlink_get_from_attrs(struct net *net, | ||
57 | struct nlattr **attrs) | ||
58 | { | ||
59 | struct devlink *devlink; | ||
60 | char *busname; | ||
61 | char *devname; | ||
62 | |||
63 | if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME]) | ||
64 | return ERR_PTR(-EINVAL); | ||
65 | |||
66 | busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); | ||
67 | devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); | ||
68 | |||
69 | list_for_each_entry(devlink, &devlink_list, list) { | ||
70 | if (strcmp(devlink->dev->bus->name, busname) == 0 && | ||
71 | strcmp(dev_name(devlink->dev), devname) == 0 && | ||
72 | net_eq(devlink_net(devlink), net)) | ||
73 | return devlink; | ||
74 | } | ||
75 | |||
76 | return ERR_PTR(-ENODEV); | ||
77 | } | ||
78 | |||
79 | static struct devlink *devlink_get_from_info(struct genl_info *info) | ||
80 | { | ||
81 | return devlink_get_from_attrs(genl_info_net(info), info->attrs); | ||
82 | } | ||
83 | |||
84 | static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, | ||
85 | int port_index) | ||
86 | { | ||
87 | struct devlink_port *devlink_port; | ||
88 | |||
89 | list_for_each_entry(devlink_port, &devlink->port_list, list) { | ||
90 | if (devlink_port->index == port_index) | ||
91 | return devlink_port; | ||
92 | } | ||
93 | return NULL; | ||
94 | } | ||
95 | |||
96 | static bool devlink_port_index_exists(struct devlink *devlink, int port_index) | ||
97 | { | ||
98 | return devlink_port_get_by_index(devlink, port_index); | ||
99 | } | ||
100 | |||
101 | static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, | ||
102 | struct nlattr **attrs) | ||
103 | { | ||
104 | if (attrs[DEVLINK_ATTR_PORT_INDEX]) { | ||
105 | u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); | ||
106 | struct devlink_port *devlink_port; | ||
107 | |||
108 | devlink_port = devlink_port_get_by_index(devlink, port_index); | ||
109 | if (!devlink_port) | ||
110 | return ERR_PTR(-ENODEV); | ||
111 | return devlink_port; | ||
112 | } | ||
113 | return ERR_PTR(-EINVAL); | ||
114 | } | ||
115 | |||
116 | static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, | ||
117 | struct genl_info *info) | ||
118 | { | ||
119 | return devlink_port_get_from_attrs(devlink, info->attrs); | ||
120 | } | ||
121 | |||
122 | #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) | ||
123 | |||
124 | static int devlink_nl_pre_doit(const struct genl_ops *ops, | ||
125 | struct sk_buff *skb, struct genl_info *info) | ||
126 | { | ||
127 | struct devlink *devlink; | ||
128 | |||
129 | mutex_lock(&devlink_mutex); | ||
130 | devlink = devlink_get_from_info(info); | ||
131 | if (IS_ERR(devlink)) { | ||
132 | mutex_unlock(&devlink_mutex); | ||
133 | return PTR_ERR(devlink); | ||
134 | } | ||
135 | info->user_ptr[0] = devlink; | ||
136 | if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { | ||
137 | struct devlink_port *devlink_port; | ||
138 | |||
139 | mutex_lock(&devlink_port_mutex); | ||
140 | devlink_port = devlink_port_get_from_info(devlink, info); | ||
141 | if (IS_ERR(devlink_port)) { | ||
142 | mutex_unlock(&devlink_port_mutex); | ||
143 | mutex_unlock(&devlink_mutex); | ||
144 | return PTR_ERR(devlink_port); | ||
145 | } | ||
146 | info->user_ptr[1] = devlink_port; | ||
147 | } | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static void devlink_nl_post_doit(const struct genl_ops *ops, | ||
152 | struct sk_buff *skb, struct genl_info *info) | ||
153 | { | ||
154 | if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) | ||
155 | mutex_unlock(&devlink_port_mutex); | ||
156 | mutex_unlock(&devlink_mutex); | ||
157 | } | ||
158 | |||
159 | static struct genl_family devlink_nl_family = { | ||
160 | .id = GENL_ID_GENERATE, | ||
161 | .name = DEVLINK_GENL_NAME, | ||
162 | .version = DEVLINK_GENL_VERSION, | ||
163 | .maxattr = DEVLINK_ATTR_MAX, | ||
164 | .netnsok = true, | ||
165 | .pre_doit = devlink_nl_pre_doit, | ||
166 | .post_doit = devlink_nl_post_doit, | ||
167 | }; | ||
168 | |||
169 | enum devlink_multicast_groups { | ||
170 | DEVLINK_MCGRP_CONFIG, | ||
171 | }; | ||
172 | |||
173 | static const struct genl_multicast_group devlink_nl_mcgrps[] = { | ||
174 | [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, | ||
175 | }; | ||
176 | |||
177 | static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) | ||
178 | { | ||
179 | if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name)) | ||
180 | return -EMSGSIZE; | ||
181 | if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev))) | ||
182 | return -EMSGSIZE; | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, | ||
187 | enum devlink_command cmd, u32 portid, | ||
188 | u32 seq, int flags) | ||
189 | { | ||
190 | void *hdr; | ||
191 | |||
192 | hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); | ||
193 | if (!hdr) | ||
194 | return -EMSGSIZE; | ||
195 | |||
196 | if (devlink_nl_put_handle(msg, devlink)) | ||
197 | goto nla_put_failure; | ||
198 | |||
199 | genlmsg_end(msg, hdr); | ||
200 | return 0; | ||
201 | |||
202 | nla_put_failure: | ||
203 | genlmsg_cancel(msg, hdr); | ||
204 | return -EMSGSIZE; | ||
205 | } | ||
206 | |||
207 | static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) | ||
208 | { | ||
209 | struct sk_buff *msg; | ||
210 | int err; | ||
211 | |||
212 | WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); | ||
213 | |||
214 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
215 | if (!msg) | ||
216 | return; | ||
217 | |||
218 | err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0); | ||
219 | if (err) { | ||
220 | nlmsg_free(msg); | ||
221 | return; | ||
222 | } | ||
223 | |||
224 | genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), | ||
225 | msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); | ||
226 | } | ||
227 | |||
228 | static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, | ||
229 | struct devlink_port *devlink_port, | ||
230 | enum devlink_command cmd, u32 portid, | ||
231 | u32 seq, int flags) | ||
232 | { | ||
233 | void *hdr; | ||
234 | |||
235 | hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); | ||
236 | if (!hdr) | ||
237 | return -EMSGSIZE; | ||
238 | |||
239 | if (devlink_nl_put_handle(msg, devlink)) | ||
240 | goto nla_put_failure; | ||
241 | if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) | ||
242 | goto nla_put_failure; | ||
243 | if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) | ||
244 | goto nla_put_failure; | ||
245 | if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET && | ||
246 | nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE, | ||
247 | devlink_port->desired_type)) | ||
248 | goto nla_put_failure; | ||
249 | if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { | ||
250 | struct net_device *netdev = devlink_port->type_dev; | ||
251 | |||
252 | if (netdev && | ||
253 | (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX, | ||
254 | netdev->ifindex) || | ||
255 | nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, | ||
256 | netdev->name))) | ||
257 | goto nla_put_failure; | ||
258 | } | ||
259 | if (devlink_port->type == DEVLINK_PORT_TYPE_IB) { | ||
260 | struct ib_device *ibdev = devlink_port->type_dev; | ||
261 | |||
262 | if (ibdev && | ||
263 | nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME, | ||
264 | ibdev->name)) | ||
265 | goto nla_put_failure; | ||
266 | } | ||
267 | if (devlink_port->split && | ||
268 | nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, | ||
269 | devlink_port->split_group)) | ||
270 | goto nla_put_failure; | ||
271 | |||
272 | genlmsg_end(msg, hdr); | ||
273 | return 0; | ||
274 | |||
275 | nla_put_failure: | ||
276 | genlmsg_cancel(msg, hdr); | ||
277 | return -EMSGSIZE; | ||
278 | } | ||
279 | |||
280 | static void devlink_port_notify(struct devlink_port *devlink_port, | ||
281 | enum devlink_command cmd) | ||
282 | { | ||
283 | struct devlink *devlink = devlink_port->devlink; | ||
284 | struct sk_buff *msg; | ||
285 | int err; | ||
286 | |||
287 | if (!devlink_port->registered) | ||
288 | return; | ||
289 | |||
290 | WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); | ||
291 | |||
292 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
293 | if (!msg) | ||
294 | return; | ||
295 | |||
296 | err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0); | ||
297 | if (err) { | ||
298 | nlmsg_free(msg); | ||
299 | return; | ||
300 | } | ||
301 | |||
302 | genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), | ||
303 | msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); | ||
304 | } | ||
305 | |||
306 | static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) | ||
307 | { | ||
308 | struct devlink *devlink = info->user_ptr[0]; | ||
309 | struct sk_buff *msg; | ||
310 | int err; | ||
311 | |||
312 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
313 | if (!msg) | ||
314 | return -ENOMEM; | ||
315 | |||
316 | err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, | ||
317 | info->snd_portid, info->snd_seq, 0); | ||
318 | if (err) { | ||
319 | nlmsg_free(msg); | ||
320 | return err; | ||
321 | } | ||
322 | |||
323 | return genlmsg_reply(msg, info); | ||
324 | } | ||
325 | |||
326 | static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, | ||
327 | struct netlink_callback *cb) | ||
328 | { | ||
329 | struct devlink *devlink; | ||
330 | int start = cb->args[0]; | ||
331 | int idx = 0; | ||
332 | int err; | ||
333 | |||
334 | mutex_lock(&devlink_mutex); | ||
335 | list_for_each_entry(devlink, &devlink_list, list) { | ||
336 | if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) | ||
337 | continue; | ||
338 | if (idx < start) { | ||
339 | idx++; | ||
340 | continue; | ||
341 | } | ||
342 | err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, | ||
343 | NETLINK_CB(cb->skb).portid, | ||
344 | cb->nlh->nlmsg_seq, NLM_F_MULTI); | ||
345 | if (err) | ||
346 | goto out; | ||
347 | idx++; | ||
348 | } | ||
349 | out: | ||
350 | mutex_unlock(&devlink_mutex); | ||
351 | |||
352 | cb->args[0] = idx; | ||
353 | return msg->len; | ||
354 | } | ||
355 | |||
356 | static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, | ||
357 | struct genl_info *info) | ||
358 | { | ||
359 | struct devlink *devlink = info->user_ptr[0]; | ||
360 | struct devlink_port *devlink_port = info->user_ptr[1]; | ||
361 | struct sk_buff *msg; | ||
362 | int err; | ||
363 | |||
364 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
365 | if (!msg) | ||
366 | return -ENOMEM; | ||
367 | |||
368 | err = devlink_nl_port_fill(msg, devlink, devlink_port, | ||
369 | DEVLINK_CMD_PORT_NEW, | ||
370 | info->snd_portid, info->snd_seq, 0); | ||
371 | if (err) { | ||
372 | nlmsg_free(msg); | ||
373 | return err; | ||
374 | } | ||
375 | |||
376 | return genlmsg_reply(msg, info); | ||
377 | } | ||
378 | |||
379 | static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, | ||
380 | struct netlink_callback *cb) | ||
381 | { | ||
382 | struct devlink *devlink; | ||
383 | struct devlink_port *devlink_port; | ||
384 | int start = cb->args[0]; | ||
385 | int idx = 0; | ||
386 | int err; | ||
387 | |||
388 | mutex_lock(&devlink_mutex); | ||
389 | mutex_lock(&devlink_port_mutex); | ||
390 | list_for_each_entry(devlink, &devlink_list, list) { | ||
391 | if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) | ||
392 | continue; | ||
393 | list_for_each_entry(devlink_port, &devlink->port_list, list) { | ||
394 | if (idx < start) { | ||
395 | idx++; | ||
396 | continue; | ||
397 | } | ||
398 | err = devlink_nl_port_fill(msg, devlink, devlink_port, | ||
399 | DEVLINK_CMD_NEW, | ||
400 | NETLINK_CB(cb->skb).portid, | ||
401 | cb->nlh->nlmsg_seq, | ||
402 | NLM_F_MULTI); | ||
403 | if (err) | ||
404 | goto out; | ||
405 | idx++; | ||
406 | } | ||
407 | } | ||
408 | out: | ||
409 | mutex_unlock(&devlink_port_mutex); | ||
410 | mutex_unlock(&devlink_mutex); | ||
411 | |||
412 | cb->args[0] = idx; | ||
413 | return msg->len; | ||
414 | } | ||
415 | |||
416 | static int devlink_port_type_set(struct devlink *devlink, | ||
417 | struct devlink_port *devlink_port, | ||
418 | enum devlink_port_type port_type) | ||
419 | |||
420 | { | ||
421 | int err; | ||
422 | |||
423 | if (devlink->ops && devlink->ops->port_type_set) { | ||
424 | if (port_type == DEVLINK_PORT_TYPE_NOTSET) | ||
425 | return -EINVAL; | ||
426 | err = devlink->ops->port_type_set(devlink_port, port_type); | ||
427 | if (err) | ||
428 | return err; | ||
429 | devlink_port->desired_type = port_type; | ||
430 | devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); | ||
431 | return 0; | ||
432 | } | ||
433 | return -EOPNOTSUPP; | ||
434 | } | ||
435 | |||
436 | static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, | ||
437 | struct genl_info *info) | ||
438 | { | ||
439 | struct devlink *devlink = info->user_ptr[0]; | ||
440 | struct devlink_port *devlink_port = info->user_ptr[1]; | ||
441 | int err; | ||
442 | |||
443 | if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { | ||
444 | enum devlink_port_type port_type; | ||
445 | |||
446 | port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]); | ||
447 | err = devlink_port_type_set(devlink, devlink_port, port_type); | ||
448 | if (err) | ||
449 | return err; | ||
450 | } | ||
451 | return 0; | ||
452 | } | ||
453 | |||
454 | static int devlink_port_split(struct devlink *devlink, | ||
455 | u32 port_index, u32 count) | ||
456 | |||
457 | { | ||
458 | if (devlink->ops && devlink->ops->port_split) | ||
459 | return devlink->ops->port_split(devlink, port_index, count); | ||
460 | return -EOPNOTSUPP; | ||
461 | } | ||
462 | |||
463 | static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, | ||
464 | struct genl_info *info) | ||
465 | { | ||
466 | struct devlink *devlink = info->user_ptr[0]; | ||
467 | u32 port_index; | ||
468 | u32 count; | ||
469 | |||
470 | if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] || | ||
471 | !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) | ||
472 | return -EINVAL; | ||
473 | |||
474 | port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); | ||
475 | count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); | ||
476 | return devlink_port_split(devlink, port_index, count); | ||
477 | } | ||
478 | |||
479 | static int devlink_port_unsplit(struct devlink *devlink, u32 port_index) | ||
480 | |||
481 | { | ||
482 | if (devlink->ops && devlink->ops->port_unsplit) | ||
483 | return devlink->ops->port_unsplit(devlink, port_index); | ||
484 | return -EOPNOTSUPP; | ||
485 | } | ||
486 | |||
487 | static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, | ||
488 | struct genl_info *info) | ||
489 | { | ||
490 | struct devlink *devlink = info->user_ptr[0]; | ||
491 | u32 port_index; | ||
492 | |||
493 | if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) | ||
494 | return -EINVAL; | ||
495 | |||
496 | port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); | ||
497 | return devlink_port_unsplit(devlink, port_index); | ||
498 | } | ||
499 | |||
500 | static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { | ||
501 | [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, | ||
502 | [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, | ||
503 | [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, | ||
504 | [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 }, | ||
505 | [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, | ||
506 | }; | ||
507 | |||
508 | static const struct genl_ops devlink_nl_ops[] = { | ||
509 | { | ||
510 | .cmd = DEVLINK_CMD_GET, | ||
511 | .doit = devlink_nl_cmd_get_doit, | ||
512 | .dumpit = devlink_nl_cmd_get_dumpit, | ||
513 | .policy = devlink_nl_policy, | ||
514 | /* can be retrieved by unprivileged users */ | ||
515 | }, | ||
516 | { | ||
517 | .cmd = DEVLINK_CMD_PORT_GET, | ||
518 | .doit = devlink_nl_cmd_port_get_doit, | ||
519 | .dumpit = devlink_nl_cmd_port_get_dumpit, | ||
520 | .policy = devlink_nl_policy, | ||
521 | .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, | ||
522 | /* can be retrieved by unprivileged users */ | ||
523 | }, | ||
524 | { | ||
525 | .cmd = DEVLINK_CMD_PORT_SET, | ||
526 | .doit = devlink_nl_cmd_port_set_doit, | ||
527 | .policy = devlink_nl_policy, | ||
528 | .flags = GENL_ADMIN_PERM, | ||
529 | .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, | ||
530 | }, | ||
531 | { | ||
532 | .cmd = DEVLINK_CMD_PORT_SPLIT, | ||
533 | .doit = devlink_nl_cmd_port_split_doit, | ||
534 | .policy = devlink_nl_policy, | ||
535 | .flags = GENL_ADMIN_PERM, | ||
536 | }, | ||
537 | { | ||
538 | .cmd = DEVLINK_CMD_PORT_UNSPLIT, | ||
539 | .doit = devlink_nl_cmd_port_unsplit_doit, | ||
540 | .policy = devlink_nl_policy, | ||
541 | .flags = GENL_ADMIN_PERM, | ||
542 | }, | ||
543 | }; | ||
544 | |||
545 | /** | ||
546 | * devlink_alloc - Allocate new devlink instance resources | ||
547 | * | ||
548 | * @ops: ops | ||
549 | * @priv_size: size of user private data | ||
550 | * | ||
551 | * Allocate new devlink instance resources, including devlink index | ||
552 | * and name. | ||
553 | */ | ||
554 | struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) | ||
555 | { | ||
556 | struct devlink *devlink; | ||
557 | |||
558 | devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); | ||
559 | if (!devlink) | ||
560 | return NULL; | ||
561 | devlink->ops = ops; | ||
562 | devlink_net_set(devlink, &init_net); | ||
563 | INIT_LIST_HEAD(&devlink->port_list); | ||
564 | return devlink; | ||
565 | } | ||
566 | EXPORT_SYMBOL_GPL(devlink_alloc); | ||
567 | |||
568 | /** | ||
569 | * devlink_register - Register devlink instance | ||
570 | * | ||
571 | * @devlink: devlink | ||
572 | */ | ||
573 | int devlink_register(struct devlink *devlink, struct device *dev) | ||
574 | { | ||
575 | mutex_lock(&devlink_mutex); | ||
576 | devlink->dev = dev; | ||
577 | list_add_tail(&devlink->list, &devlink_list); | ||
578 | devlink_notify(devlink, DEVLINK_CMD_NEW); | ||
579 | mutex_unlock(&devlink_mutex); | ||
580 | return 0; | ||
581 | } | ||
582 | EXPORT_SYMBOL_GPL(devlink_register); | ||
583 | |||
584 | /** | ||
585 | * devlink_unregister - Unregister devlink instance | ||
586 | * | ||
587 | * @devlink: devlink | ||
588 | */ | ||
589 | void devlink_unregister(struct devlink *devlink) | ||
590 | { | ||
591 | mutex_lock(&devlink_mutex); | ||
592 | devlink_notify(devlink, DEVLINK_CMD_DEL); | ||
593 | list_del(&devlink->list); | ||
594 | mutex_unlock(&devlink_mutex); | ||
595 | } | ||
596 | EXPORT_SYMBOL_GPL(devlink_unregister); | ||
597 | |||
598 | /** | ||
599 | * devlink_free - Free devlink instance resources | ||
600 | * | ||
601 | * @devlink: devlink | ||
602 | */ | ||
603 | void devlink_free(struct devlink *devlink) | ||
604 | { | ||
605 | kfree(devlink); | ||
606 | } | ||
607 | EXPORT_SYMBOL_GPL(devlink_free); | ||
608 | |||
609 | /** | ||
610 | * devlink_port_register - Register devlink port | ||
611 | * | ||
612 | * @devlink: devlink | ||
613 | * @devlink_port: devlink port | ||
614 | * @port_index | ||
615 | * | ||
616 | * Register devlink port with provided port index. User can use | ||
617 | * any indexing, even hw-related one. devlink_port structure | ||
618 | * is convenient to be embedded inside user driver private structure. | ||
619 | * Note that the caller should take care of zeroing the devlink_port | ||
620 | * structure. | ||
621 | */ | ||
622 | int devlink_port_register(struct devlink *devlink, | ||
623 | struct devlink_port *devlink_port, | ||
624 | unsigned int port_index) | ||
625 | { | ||
626 | mutex_lock(&devlink_port_mutex); | ||
627 | if (devlink_port_index_exists(devlink, port_index)) { | ||
628 | mutex_unlock(&devlink_port_mutex); | ||
629 | return -EEXIST; | ||
630 | } | ||
631 | devlink_port->devlink = devlink; | ||
632 | devlink_port->index = port_index; | ||
633 | devlink_port->type = DEVLINK_PORT_TYPE_NOTSET; | ||
634 | devlink_port->registered = true; | ||
635 | list_add_tail(&devlink_port->list, &devlink->port_list); | ||
636 | mutex_unlock(&devlink_port_mutex); | ||
637 | devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); | ||
638 | return 0; | ||
639 | } | ||
640 | EXPORT_SYMBOL_GPL(devlink_port_register); | ||
641 | |||
642 | /** | ||
643 | * devlink_port_unregister - Unregister devlink port | ||
644 | * | ||
645 | * @devlink_port: devlink port | ||
646 | */ | ||
647 | void devlink_port_unregister(struct devlink_port *devlink_port) | ||
648 | { | ||
649 | devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); | ||
650 | mutex_lock(&devlink_port_mutex); | ||
651 | list_del(&devlink_port->list); | ||
652 | mutex_unlock(&devlink_port_mutex); | ||
653 | } | ||
654 | EXPORT_SYMBOL_GPL(devlink_port_unregister); | ||
655 | |||
656 | static void __devlink_port_type_set(struct devlink_port *devlink_port, | ||
657 | enum devlink_port_type type, | ||
658 | void *type_dev) | ||
659 | { | ||
660 | devlink_port->type = type; | ||
661 | devlink_port->type_dev = type_dev; | ||
662 | devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); | ||
663 | } | ||
664 | |||
665 | /** | ||
666 | * devlink_port_type_eth_set - Set port type to Ethernet | ||
667 | * | ||
668 | * @devlink_port: devlink port | ||
669 | * @netdev: related netdevice | ||
670 | */ | ||
671 | void devlink_port_type_eth_set(struct devlink_port *devlink_port, | ||
672 | struct net_device *netdev) | ||
673 | { | ||
674 | return __devlink_port_type_set(devlink_port, | ||
675 | DEVLINK_PORT_TYPE_ETH, netdev); | ||
676 | } | ||
677 | EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); | ||
678 | |||
679 | /** | ||
680 | * devlink_port_type_ib_set - Set port type to InfiniBand | ||
681 | * | ||
682 | * @devlink_port: devlink port | ||
683 | * @ibdev: related IB device | ||
684 | */ | ||
685 | void devlink_port_type_ib_set(struct devlink_port *devlink_port, | ||
686 | struct ib_device *ibdev) | ||
687 | { | ||
688 | return __devlink_port_type_set(devlink_port, | ||
689 | DEVLINK_PORT_TYPE_IB, ibdev); | ||
690 | } | ||
691 | EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); | ||
692 | |||
693 | /** | ||
694 | * devlink_port_type_clear - Clear port type | ||
695 | * | ||
696 | * @devlink_port: devlink port | ||
697 | */ | ||
698 | void devlink_port_type_clear(struct devlink_port *devlink_port) | ||
699 | { | ||
700 | return __devlink_port_type_set(devlink_port, | ||
701 | DEVLINK_PORT_TYPE_NOTSET, NULL); | ||
702 | } | ||
703 | EXPORT_SYMBOL_GPL(devlink_port_type_clear); | ||
704 | |||
705 | /** | ||
706 | * devlink_port_split_set - Set port is split | ||
707 | * | ||
708 | * @devlink_port: devlink port | ||
709 | * @split_group: split group - identifies group split port is part of | ||
710 | */ | ||
711 | void devlink_port_split_set(struct devlink_port *devlink_port, | ||
712 | u32 split_group) | ||
713 | { | ||
714 | devlink_port->split = true; | ||
715 | devlink_port->split_group = split_group; | ||
716 | devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); | ||
717 | } | ||
718 | EXPORT_SYMBOL_GPL(devlink_port_split_set); | ||
719 | |||
720 | static int __init devlink_module_init(void) | ||
721 | { | ||
722 | return genl_register_family_with_ops_groups(&devlink_nl_family, | ||
723 | devlink_nl_ops, | ||
724 | devlink_nl_mcgrps); | ||
725 | } | ||
726 | |||
727 | static void __exit devlink_module_exit(void) | ||
728 | { | ||
729 | genl_unregister_family(&devlink_nl_family); | ||
730 | } | ||
731 | |||
732 | module_init(devlink_module_init); | ||
733 | module_exit(devlink_module_exit); | ||
734 | |||
735 | MODULE_LICENSE("GPL v2"); | ||
736 | MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); | ||
737 | MODULE_DESCRIPTION("Network physical device Netlink interface"); | ||
738 | MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME); | ||
diff --git a/net/core/dst.c b/net/core/dst.c index a1656e3b8d72..b5cbbe07f786 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -265,7 +265,7 @@ again: | |||
265 | lwtstate_put(dst->lwtstate); | 265 | lwtstate_put(dst->lwtstate); |
266 | 266 | ||
267 | if (dst->flags & DST_METADATA) | 267 | if (dst->flags & DST_METADATA) |
268 | kfree(dst); | 268 | metadata_dst_free((struct metadata_dst *)dst); |
269 | else | 269 | else |
270 | kmem_cache_free(dst->ops->kmem_cachep, dst); | 270 | kmem_cache_free(dst->ops->kmem_cachep, dst); |
271 | 271 | ||
@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) | |||
395 | } | 395 | } |
396 | EXPORT_SYMBOL_GPL(metadata_dst_alloc); | 396 | EXPORT_SYMBOL_GPL(metadata_dst_alloc); |
397 | 397 | ||
398 | void metadata_dst_free(struct metadata_dst *md_dst) | ||
399 | { | ||
400 | #ifdef CONFIG_DST_CACHE | ||
401 | dst_cache_destroy(&md_dst->u.tun_info.dst_cache); | ||
402 | #endif | ||
403 | kfree(md_dst); | ||
404 | } | ||
405 | |||
398 | struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) | 406 | struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) |
399 | { | 407 | { |
400 | int cpu; | 408 | int cpu; |
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c new file mode 100644 index 000000000000..554d36449231 --- /dev/null +++ b/net/core/dst_cache.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * net/core/dst_cache.c - dst entry cache | ||
3 | * | ||
4 | * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/percpu.h> | ||
14 | #include <net/dst_cache.h> | ||
15 | #include <net/route.h> | ||
16 | #if IS_ENABLED(CONFIG_IPV6) | ||
17 | #include <net/ip6_fib.h> | ||
18 | #endif | ||
19 | #include <uapi/linux/in.h> | ||
20 | |||
21 | struct dst_cache_pcpu { | ||
22 | unsigned long refresh_ts; | ||
23 | struct dst_entry *dst; | ||
24 | u32 cookie; | ||
25 | union { | ||
26 | struct in_addr in_saddr; | ||
27 | struct in6_addr in6_saddr; | ||
28 | }; | ||
29 | }; | ||
30 | |||
31 | static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache, | ||
32 | struct dst_entry *dst, u32 cookie) | ||
33 | { | ||
34 | dst_release(dst_cache->dst); | ||
35 | if (dst) | ||
36 | dst_hold(dst); | ||
37 | |||
38 | dst_cache->cookie = cookie; | ||
39 | dst_cache->dst = dst; | ||
40 | } | ||
41 | |||
42 | static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache, | ||
43 | struct dst_cache_pcpu *idst) | ||
44 | { | ||
45 | struct dst_entry *dst; | ||
46 | |||
47 | dst = idst->dst; | ||
48 | if (!dst) | ||
49 | goto fail; | ||
50 | |||
51 | /* the cache already hold a dst reference; it can't go away */ | ||
52 | dst_hold(dst); | ||
53 | |||
54 | if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) || | ||
55 | (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) { | ||
56 | dst_cache_per_cpu_dst_set(idst, NULL, 0); | ||
57 | dst_release(dst); | ||
58 | goto fail; | ||
59 | } | ||
60 | return dst; | ||
61 | |||
62 | fail: | ||
63 | idst->refresh_ts = jiffies; | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | struct dst_entry *dst_cache_get(struct dst_cache *dst_cache) | ||
68 | { | ||
69 | if (!dst_cache->cache) | ||
70 | return NULL; | ||
71 | |||
72 | return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); | ||
73 | } | ||
74 | EXPORT_SYMBOL_GPL(dst_cache_get); | ||
75 | |||
76 | struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) | ||
77 | { | ||
78 | struct dst_cache_pcpu *idst; | ||
79 | struct dst_entry *dst; | ||
80 | |||
81 | if (!dst_cache->cache) | ||
82 | return NULL; | ||
83 | |||
84 | idst = this_cpu_ptr(dst_cache->cache); | ||
85 | dst = dst_cache_per_cpu_get(dst_cache, idst); | ||
86 | if (!dst) | ||
87 | return NULL; | ||
88 | |||
89 | *saddr = idst->in_saddr.s_addr; | ||
90 | return container_of(dst, struct rtable, dst); | ||
91 | } | ||
92 | EXPORT_SYMBOL_GPL(dst_cache_get_ip4); | ||
93 | |||
94 | void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, | ||
95 | __be32 saddr) | ||
96 | { | ||
97 | struct dst_cache_pcpu *idst; | ||
98 | |||
99 | if (!dst_cache->cache) | ||
100 | return; | ||
101 | |||
102 | idst = this_cpu_ptr(dst_cache->cache); | ||
103 | dst_cache_per_cpu_dst_set(idst, dst, 0); | ||
104 | idst->in_saddr.s_addr = saddr; | ||
105 | } | ||
106 | EXPORT_SYMBOL_GPL(dst_cache_set_ip4); | ||
107 | |||
108 | #if IS_ENABLED(CONFIG_IPV6) | ||
109 | void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, | ||
110 | const struct in6_addr *addr) | ||
111 | { | ||
112 | struct dst_cache_pcpu *idst; | ||
113 | |||
114 | if (!dst_cache->cache) | ||
115 | return; | ||
116 | |||
117 | idst = this_cpu_ptr(dst_cache->cache); | ||
118 | dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, | ||
119 | rt6_get_cookie((struct rt6_info *)dst)); | ||
120 | idst->in6_saddr = *addr; | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(dst_cache_set_ip6); | ||
123 | |||
124 | struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, | ||
125 | struct in6_addr *saddr) | ||
126 | { | ||
127 | struct dst_cache_pcpu *idst; | ||
128 | struct dst_entry *dst; | ||
129 | |||
130 | if (!dst_cache->cache) | ||
131 | return NULL; | ||
132 | |||
133 | idst = this_cpu_ptr(dst_cache->cache); | ||
134 | dst = dst_cache_per_cpu_get(dst_cache, idst); | ||
135 | if (!dst) | ||
136 | return NULL; | ||
137 | |||
138 | *saddr = idst->in6_saddr; | ||
139 | return dst; | ||
140 | } | ||
141 | EXPORT_SYMBOL_GPL(dst_cache_get_ip6); | ||
142 | #endif | ||
143 | |||
144 | int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp) | ||
145 | { | ||
146 | dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu, | ||
147 | gfp | __GFP_ZERO); | ||
148 | if (!dst_cache->cache) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | dst_cache_reset(dst_cache); | ||
152 | return 0; | ||
153 | } | ||
154 | EXPORT_SYMBOL_GPL(dst_cache_init); | ||
155 | |||
156 | void dst_cache_destroy(struct dst_cache *dst_cache) | ||
157 | { | ||
158 | int i; | ||
159 | |||
160 | if (!dst_cache->cache) | ||
161 | return; | ||
162 | |||
163 | for_each_possible_cpu(i) | ||
164 | dst_release(per_cpu_ptr(dst_cache->cache, i)->dst); | ||
165 | |||
166 | free_percpu(dst_cache->cache); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(dst_cache_destroy); | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index daf04709dd3c..f426c5ad6149 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] | |||
98 | [NETIF_F_RXALL_BIT] = "rx-all", | 98 | [NETIF_F_RXALL_BIT] = "rx-all", |
99 | [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", | 99 | [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", |
100 | [NETIF_F_BUSY_POLL_BIT] = "busy-poll", | 100 | [NETIF_F_BUSY_POLL_BIT] = "busy-poll", |
101 | [NETIF_F_HW_TC_BIT] = "hw-tc-offload", | ||
101 | }; | 102 | }; |
102 | 103 | ||
103 | static const char | 104 | static const char |
@@ -386,43 +387,461 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) | |||
386 | return 0; | 387 | return 0; |
387 | } | 388 | } |
388 | 389 | ||
389 | int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) | 390 | static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) |
390 | { | 391 | { |
392 | bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
393 | dst[0] = legacy_u32; | ||
394 | } | ||
395 | |||
396 | /* return false if src had higher bits set. lower bits always updated. */ | ||
397 | static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32, | ||
398 | const unsigned long *src) | ||
399 | { | ||
400 | bool retval = true; | ||
401 | |||
402 | /* TODO: following test will soon always be true */ | ||
403 | if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) { | ||
404 | __ETHTOOL_DECLARE_LINK_MODE_MASK(ext); | ||
405 | |||
406 | bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
407 | bitmap_fill(ext, 32); | ||
408 | bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
409 | if (bitmap_intersects(ext, src, | ||
410 | __ETHTOOL_LINK_MODE_MASK_NBITS)) { | ||
411 | /* src mask goes beyond bit 31 */ | ||
412 | retval = false; | ||
413 | } | ||
414 | } | ||
415 | *legacy_u32 = src[0]; | ||
416 | return retval; | ||
417 | } | ||
418 | |||
419 | /* return false if legacy contained non-0 deprecated fields | ||
420 | * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated | ||
421 | */ | ||
422 | static bool | ||
423 | convert_legacy_settings_to_link_ksettings( | ||
424 | struct ethtool_link_ksettings *link_ksettings, | ||
425 | const struct ethtool_cmd *legacy_settings) | ||
426 | { | ||
427 | bool retval = true; | ||
428 | |||
429 | memset(link_ksettings, 0, sizeof(*link_ksettings)); | ||
430 | |||
431 | /* This is used to tell users that driver is still using these | ||
432 | * deprecated legacy fields, and they should not use | ||
433 | * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS | ||
434 | */ | ||
435 | if (legacy_settings->transceiver || | ||
436 | legacy_settings->maxtxpkt || | ||
437 | legacy_settings->maxrxpkt) | ||
438 | retval = false; | ||
439 | |||
440 | convert_legacy_u32_to_link_mode( | ||
441 | link_ksettings->link_modes.supported, | ||
442 | legacy_settings->supported); | ||
443 | convert_legacy_u32_to_link_mode( | ||
444 | link_ksettings->link_modes.advertising, | ||
445 | legacy_settings->advertising); | ||
446 | convert_legacy_u32_to_link_mode( | ||
447 | link_ksettings->link_modes.lp_advertising, | ||
448 | legacy_settings->lp_advertising); | ||
449 | link_ksettings->base.speed | ||
450 | = ethtool_cmd_speed(legacy_settings); | ||
451 | link_ksettings->base.duplex | ||
452 | = legacy_settings->duplex; | ||
453 | link_ksettings->base.port | ||
454 | = legacy_settings->port; | ||
455 | link_ksettings->base.phy_address | ||
456 | = legacy_settings->phy_address; | ||
457 | link_ksettings->base.autoneg | ||
458 | = legacy_settings->autoneg; | ||
459 | link_ksettings->base.mdio_support | ||
460 | = legacy_settings->mdio_support; | ||
461 | link_ksettings->base.eth_tp_mdix | ||
462 | = legacy_settings->eth_tp_mdix; | ||
463 | link_ksettings->base.eth_tp_mdix_ctrl | ||
464 | = legacy_settings->eth_tp_mdix_ctrl; | ||
465 | return retval; | ||
466 | } | ||
467 | |||
468 | /* return false if ksettings link modes had higher bits | ||
469 | * set. legacy_settings always updated (best effort) | ||
470 | */ | ||
471 | static bool | ||
472 | convert_link_ksettings_to_legacy_settings( | ||
473 | struct ethtool_cmd *legacy_settings, | ||
474 | const struct ethtool_link_ksettings *link_ksettings) | ||
475 | { | ||
476 | bool retval = true; | ||
477 | |||
478 | memset(legacy_settings, 0, sizeof(*legacy_settings)); | ||
479 | /* this also clears the deprecated fields in legacy structure: | ||
480 | * __u8 transceiver; | ||
481 | * __u32 maxtxpkt; | ||
482 | * __u32 maxrxpkt; | ||
483 | */ | ||
484 | |||
485 | retval &= convert_link_mode_to_legacy_u32( | ||
486 | &legacy_settings->supported, | ||
487 | link_ksettings->link_modes.supported); | ||
488 | retval &= convert_link_mode_to_legacy_u32( | ||
489 | &legacy_settings->advertising, | ||
490 | link_ksettings->link_modes.advertising); | ||
491 | retval &= convert_link_mode_to_legacy_u32( | ||
492 | &legacy_settings->lp_advertising, | ||
493 | link_ksettings->link_modes.lp_advertising); | ||
494 | ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); | ||
495 | legacy_settings->duplex | ||
496 | = link_ksettings->base.duplex; | ||
497 | legacy_settings->port | ||
498 | = link_ksettings->base.port; | ||
499 | legacy_settings->phy_address | ||
500 | = link_ksettings->base.phy_address; | ||
501 | legacy_settings->autoneg | ||
502 | = link_ksettings->base.autoneg; | ||
503 | legacy_settings->mdio_support | ||
504 | = link_ksettings->base.mdio_support; | ||
505 | legacy_settings->eth_tp_mdix | ||
506 | = link_ksettings->base.eth_tp_mdix; | ||
507 | legacy_settings->eth_tp_mdix_ctrl | ||
508 | = link_ksettings->base.eth_tp_mdix_ctrl; | ||
509 | return retval; | ||
510 | } | ||
511 | |||
512 | /* number of 32-bit words to store the user's link mode bitmaps */ | ||
513 | #define __ETHTOOL_LINK_MODE_MASK_NU32 \ | ||
514 | DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) | ||
515 | |||
516 | /* layout of the struct passed from/to userland */ | ||
517 | struct ethtool_link_usettings { | ||
518 | struct ethtool_link_settings base; | ||
519 | struct { | ||
520 | __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32]; | ||
521 | __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; | ||
522 | __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; | ||
523 | } link_modes; | ||
524 | }; | ||
525 | |||
526 | /* Internal kernel helper to query a device ethtool_link_settings. | ||
527 | * | ||
528 | * Backward compatibility note: for compatibility with legacy drivers | ||
529 | * that implement only the ethtool_cmd API, this has to work with both | ||
530 | * drivers implementing get_link_ksettings API and drivers | ||
531 | * implementing get_settings API. When drivers implement get_settings | ||
532 | * and report ethtool_cmd deprecated fields | ||
533 | * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored | ||
534 | * because the resulting struct ethtool_link_settings does not report them. | ||
535 | */ | ||
536 | int __ethtool_get_link_ksettings(struct net_device *dev, | ||
537 | struct ethtool_link_ksettings *link_ksettings) | ||
538 | { | ||
539 | int err; | ||
540 | struct ethtool_cmd cmd; | ||
541 | |||
391 | ASSERT_RTNL(); | 542 | ASSERT_RTNL(); |
392 | 543 | ||
544 | if (dev->ethtool_ops->get_link_ksettings) { | ||
545 | memset(link_ksettings, 0, sizeof(*link_ksettings)); | ||
546 | return dev->ethtool_ops->get_link_ksettings(dev, | ||
547 | link_ksettings); | ||
548 | } | ||
549 | |||
550 | /* driver doesn't support %ethtool_link_ksettings API. revert to | ||
551 | * legacy %ethtool_cmd API, unless it's not supported either. | ||
552 | * TODO: remove when ethtool_ops::get_settings disappears internally | ||
553 | */ | ||
393 | if (!dev->ethtool_ops->get_settings) | 554 | if (!dev->ethtool_ops->get_settings) |
394 | return -EOPNOTSUPP; | 555 | return -EOPNOTSUPP; |
395 | 556 | ||
396 | memset(cmd, 0, sizeof(struct ethtool_cmd)); | 557 | memset(&cmd, 0, sizeof(cmd)); |
397 | cmd->cmd = ETHTOOL_GSET; | 558 | cmd.cmd = ETHTOOL_GSET; |
398 | return dev->ethtool_ops->get_settings(dev, cmd); | 559 | err = dev->ethtool_ops->get_settings(dev, &cmd); |
560 | if (err < 0) | ||
561 | return err; | ||
562 | |||
563 | /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt | ||
564 | */ | ||
565 | convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd); | ||
566 | return err; | ||
399 | } | 567 | } |
400 | EXPORT_SYMBOL(__ethtool_get_settings); | 568 | EXPORT_SYMBOL(__ethtool_get_link_ksettings); |
401 | 569 | ||
402 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | 570 | /* convert ethtool_link_usettings in user space to a kernel internal |
571 | * ethtool_link_ksettings. return 0 on success, errno on error. | ||
572 | */ | ||
573 | static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, | ||
574 | const void __user *from) | ||
403 | { | 575 | { |
404 | int err; | 576 | struct ethtool_link_usettings link_usettings; |
405 | struct ethtool_cmd cmd; | 577 | |
578 | if (copy_from_user(&link_usettings, from, sizeof(link_usettings))) | ||
579 | return -EFAULT; | ||
580 | |||
581 | memcpy(&to->base, &link_usettings.base, sizeof(to->base)); | ||
582 | bitmap_from_u32array(to->link_modes.supported, | ||
583 | __ETHTOOL_LINK_MODE_MASK_NBITS, | ||
584 | link_usettings.link_modes.supported, | ||
585 | __ETHTOOL_LINK_MODE_MASK_NU32); | ||
586 | bitmap_from_u32array(to->link_modes.advertising, | ||
587 | __ETHTOOL_LINK_MODE_MASK_NBITS, | ||
588 | link_usettings.link_modes.advertising, | ||
589 | __ETHTOOL_LINK_MODE_MASK_NU32); | ||
590 | bitmap_from_u32array(to->link_modes.lp_advertising, | ||
591 | __ETHTOOL_LINK_MODE_MASK_NBITS, | ||
592 | link_usettings.link_modes.lp_advertising, | ||
593 | __ETHTOOL_LINK_MODE_MASK_NU32); | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /* convert a kernel internal ethtool_link_ksettings to | ||
599 | * ethtool_link_usettings in user space. return 0 on success, errno on | ||
600 | * error. | ||
601 | */ | ||
602 | static int | ||
603 | store_link_ksettings_for_user(void __user *to, | ||
604 | const struct ethtool_link_ksettings *from) | ||
605 | { | ||
606 | struct ethtool_link_usettings link_usettings; | ||
607 | |||
608 | memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); | ||
609 | bitmap_to_u32array(link_usettings.link_modes.supported, | ||
610 | __ETHTOOL_LINK_MODE_MASK_NU32, | ||
611 | from->link_modes.supported, | ||
612 | __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
613 | bitmap_to_u32array(link_usettings.link_modes.advertising, | ||
614 | __ETHTOOL_LINK_MODE_MASK_NU32, | ||
615 | from->link_modes.advertising, | ||
616 | __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
617 | bitmap_to_u32array(link_usettings.link_modes.lp_advertising, | ||
618 | __ETHTOOL_LINK_MODE_MASK_NU32, | ||
619 | from->link_modes.lp_advertising, | ||
620 | __ETHTOOL_LINK_MODE_MASK_NBITS); | ||
621 | |||
622 | if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) | ||
623 | return -EFAULT; | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | |||
628 | /* Query device for its ethtool_link_settings. | ||
629 | * | ||
630 | * Backward compatibility note: this function must fail when driver | ||
631 | * does not implement ethtool::get_link_ksettings, even if legacy | ||
632 | * ethtool_ops::get_settings is implemented. This tells new versions | ||
633 | * of ethtool that they should use the legacy API %ETHTOOL_GSET for | ||
634 | * this driver, so that they can correctly access the ethtool_cmd | ||
635 | * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver | ||
636 | * implements ethtool_ops::get_settings anymore. | ||
637 | */ | ||
638 | static int ethtool_get_link_ksettings(struct net_device *dev, | ||
639 | void __user *useraddr) | ||
640 | { | ||
641 | int err = 0; | ||
642 | struct ethtool_link_ksettings link_ksettings; | ||
406 | 643 | ||
407 | err = __ethtool_get_settings(dev, &cmd); | 644 | ASSERT_RTNL(); |
645 | |||
646 | if (!dev->ethtool_ops->get_link_ksettings) | ||
647 | return -EOPNOTSUPP; | ||
648 | |||
649 | /* handle bitmap nbits handshake */ | ||
650 | if (copy_from_user(&link_ksettings.base, useraddr, | ||
651 | sizeof(link_ksettings.base))) | ||
652 | return -EFAULT; | ||
653 | |||
654 | if (__ETHTOOL_LINK_MODE_MASK_NU32 | ||
655 | != link_ksettings.base.link_mode_masks_nwords) { | ||
656 | /* wrong link mode nbits requested */ | ||
657 | memset(&link_ksettings, 0, sizeof(link_ksettings)); | ||
658 | link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; | ||
659 | /* send back number of words required as negative val */ | ||
660 | compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX, | ||
661 | "need too many bits for link modes!"); | ||
662 | link_ksettings.base.link_mode_masks_nwords | ||
663 | = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32); | ||
664 | |||
665 | /* copy the base fields back to user, not the link | ||
666 | * mode bitmaps | ||
667 | */ | ||
668 | if (copy_to_user(useraddr, &link_ksettings.base, | ||
669 | sizeof(link_ksettings.base))) | ||
670 | return -EFAULT; | ||
671 | |||
672 | return 0; | ||
673 | } | ||
674 | |||
675 | /* handshake successful: user/kernel agree on | ||
676 | * link_mode_masks_nwords | ||
677 | */ | ||
678 | |||
679 | memset(&link_ksettings, 0, sizeof(link_ksettings)); | ||
680 | err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); | ||
408 | if (err < 0) | 681 | if (err < 0) |
409 | return err; | 682 | return err; |
410 | 683 | ||
684 | /* make sure we tell the right values to user */ | ||
685 | link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; | ||
686 | link_ksettings.base.link_mode_masks_nwords | ||
687 | = __ETHTOOL_LINK_MODE_MASK_NU32; | ||
688 | |||
689 | return store_link_ksettings_for_user(useraddr, &link_ksettings); | ||
690 | } | ||
691 | |||
692 | /* Update device ethtool_link_settings. | ||
693 | * | ||
694 | * Backward compatibility note: this function must fail when driver | ||
695 | * does not implement ethtool::set_link_ksettings, even if legacy | ||
696 | * ethtool_ops::set_settings is implemented. This tells new versions | ||
697 | * of ethtool that they should use the legacy API %ETHTOOL_SSET for | ||
698 | * this driver, so that they can correctly update the ethtool_cmd | ||
699 | * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver | ||
700 | * implements ethtool_ops::get_settings anymore. | ||
701 | */ | ||
702 | static int ethtool_set_link_ksettings(struct net_device *dev, | ||
703 | void __user *useraddr) | ||
704 | { | ||
705 | int err; | ||
706 | struct ethtool_link_ksettings link_ksettings; | ||
707 | |||
708 | ASSERT_RTNL(); | ||
709 | |||
710 | if (!dev->ethtool_ops->set_link_ksettings) | ||
711 | return -EOPNOTSUPP; | ||
712 | |||
713 | /* make sure nbits field has expected value */ | ||
714 | if (copy_from_user(&link_ksettings.base, useraddr, | ||
715 | sizeof(link_ksettings.base))) | ||
716 | return -EFAULT; | ||
717 | |||
718 | if (__ETHTOOL_LINK_MODE_MASK_NU32 | ||
719 | != link_ksettings.base.link_mode_masks_nwords) | ||
720 | return -EINVAL; | ||
721 | |||
722 | /* copy the whole structure, now that we know it has expected | ||
723 | * format | ||
724 | */ | ||
725 | err = load_link_ksettings_from_user(&link_ksettings, useraddr); | ||
726 | if (err) | ||
727 | return err; | ||
728 | |||
729 | /* re-check nwords field, just in case */ | ||
730 | if (__ETHTOOL_LINK_MODE_MASK_NU32 | ||
731 | != link_ksettings.base.link_mode_masks_nwords) | ||
732 | return -EINVAL; | ||
733 | |||
734 | return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); | ||
735 | } | ||
736 | |||
737 | static void | ||
738 | warn_incomplete_ethtool_legacy_settings_conversion(const char *details) | ||
739 | { | ||
740 | char name[sizeof(current->comm)]; | ||
741 | |||
742 | pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", | ||
743 | get_task_comm(name, current), details); | ||
744 | } | ||
745 | |||
746 | /* Query device for its ethtool_cmd settings. | ||
747 | * | ||
748 | * Backward compatibility note: for compatibility with legacy ethtool, | ||
749 | * this has to work with both drivers implementing get_link_ksettings | ||
750 | * API and drivers implementing get_settings API. When drivers | ||
751 | * implement get_link_ksettings and report higher link mode bits, a | ||
752 | * kernel warning is logged once (with name of 1st driver/device) to | ||
753 | * recommend user to upgrade ethtool, but the command is successful | ||
754 | * (only the lower link mode bits reported back to user). | ||
755 | */ | ||
756 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | ||
757 | { | ||
758 | struct ethtool_cmd cmd; | ||
759 | |||
760 | ASSERT_RTNL(); | ||
761 | |||
762 | if (dev->ethtool_ops->get_link_ksettings) { | ||
763 | /* First, use link_ksettings API if it is supported */ | ||
764 | int err; | ||
765 | struct ethtool_link_ksettings link_ksettings; | ||
766 | |||
767 | memset(&link_ksettings, 0, sizeof(link_ksettings)); | ||
768 | err = dev->ethtool_ops->get_link_ksettings(dev, | ||
769 | &link_ksettings); | ||
770 | if (err < 0) | ||
771 | return err; | ||
772 | if (!convert_link_ksettings_to_legacy_settings(&cmd, | ||
773 | &link_ksettings)) | ||
774 | warn_incomplete_ethtool_legacy_settings_conversion( | ||
775 | "link modes are only partially reported"); | ||
776 | |||
777 | /* send a sensible cmd tag back to user */ | ||
778 | cmd.cmd = ETHTOOL_GSET; | ||
779 | } else { | ||
780 | /* driver doesn't support %ethtool_link_ksettings | ||
781 | * API. revert to legacy %ethtool_cmd API, unless it's | ||
782 | * not supported either. | ||
783 | */ | ||
784 | int err; | ||
785 | |||
786 | if (!dev->ethtool_ops->get_settings) | ||
787 | return -EOPNOTSUPP; | ||
788 | |||
789 | memset(&cmd, 0, sizeof(cmd)); | ||
790 | cmd.cmd = ETHTOOL_GSET; | ||
791 | err = dev->ethtool_ops->get_settings(dev, &cmd); | ||
792 | if (err < 0) | ||
793 | return err; | ||
794 | } | ||
795 | |||
411 | if (copy_to_user(useraddr, &cmd, sizeof(cmd))) | 796 | if (copy_to_user(useraddr, &cmd, sizeof(cmd))) |
412 | return -EFAULT; | 797 | return -EFAULT; |
798 | |||
413 | return 0; | 799 | return 0; |
414 | } | 800 | } |
415 | 801 | ||
802 | /* Update device link settings with given ethtool_cmd. | ||
803 | * | ||
804 | * Backward compatibility note: for compatibility with legacy ethtool, | ||
805 | * this has to work with both drivers implementing set_link_ksettings | ||
806 | * API and drivers implementing set_settings API. When drivers | ||
807 | * implement set_link_ksettings and user's request updates deprecated | ||
808 | * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel | ||
809 | * warning is logged once (with name of 1st driver/device) to | ||
810 | * recommend user to upgrade ethtool, and the request is rejected. | ||
811 | */ | ||
416 | static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) | 812 | static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) |
417 | { | 813 | { |
418 | struct ethtool_cmd cmd; | 814 | struct ethtool_cmd cmd; |
419 | 815 | ||
420 | if (!dev->ethtool_ops->set_settings) | 816 | ASSERT_RTNL(); |
421 | return -EOPNOTSUPP; | ||
422 | 817 | ||
423 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | 818 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
424 | return -EFAULT; | 819 | return -EFAULT; |
425 | 820 | ||
821 | /* first, try new %ethtool_link_ksettings API. */ | ||
822 | if (dev->ethtool_ops->set_link_ksettings) { | ||
823 | struct ethtool_link_ksettings link_ksettings; | ||
824 | |||
825 | if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, | ||
826 | &cmd)) | ||
827 | return -EINVAL; | ||
828 | |||
829 | link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS; | ||
830 | link_ksettings.base.link_mode_masks_nwords | ||
831 | = __ETHTOOL_LINK_MODE_MASK_NU32; | ||
832 | return dev->ethtool_ops->set_link_ksettings(dev, | ||
833 | &link_ksettings); | ||
834 | } | ||
835 | |||
836 | /* legacy %ethtool_cmd API */ | ||
837 | |||
838 | /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings | ||
839 | * disappears internally | ||
840 | */ | ||
841 | |||
842 | if (!dev->ethtool_ops->set_settings) | ||
843 | return -EOPNOTSUPP; | ||
844 | |||
426 | return dev->ethtool_ops->set_settings(dev, &cmd); | 845 | return dev->ethtool_ops->set_settings(dev, &cmd); |
427 | } | 846 | } |
428 | 847 | ||
@@ -632,7 +1051,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, | |||
632 | return 0; | 1051 | return 0; |
633 | } | 1052 | } |
634 | 1053 | ||
635 | u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; | 1054 | u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; |
636 | 1055 | ||
637 | void netdev_rss_key_fill(void *buffer, size_t len) | 1056 | void netdev_rss_key_fill(void *buffer, size_t len) |
638 | { | 1057 | { |
@@ -642,6 +1061,37 @@ void netdev_rss_key_fill(void *buffer, size_t len) | |||
642 | } | 1061 | } |
643 | EXPORT_SYMBOL(netdev_rss_key_fill); | 1062 | EXPORT_SYMBOL(netdev_rss_key_fill); |
644 | 1063 | ||
1064 | static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) | ||
1065 | { | ||
1066 | u32 dev_size, current_max = 0; | ||
1067 | u32 *indir; | ||
1068 | int ret; | ||
1069 | |||
1070 | if (!dev->ethtool_ops->get_rxfh_indir_size || | ||
1071 | !dev->ethtool_ops->get_rxfh) | ||
1072 | return -EOPNOTSUPP; | ||
1073 | dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); | ||
1074 | if (dev_size == 0) | ||
1075 | return -EOPNOTSUPP; | ||
1076 | |||
1077 | indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); | ||
1078 | if (!indir) | ||
1079 | return -ENOMEM; | ||
1080 | |||
1081 | ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); | ||
1082 | if (ret) | ||
1083 | goto out; | ||
1084 | |||
1085 | while (dev_size--) | ||
1086 | current_max = max(current_max, indir[dev_size]); | ||
1087 | |||
1088 | *max = current_max; | ||
1089 | |||
1090 | out: | ||
1091 | kfree(indir); | ||
1092 | return ret; | ||
1093 | } | ||
1094 | |||
645 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, | 1095 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, |
646 | void __user *useraddr) | 1096 | void __user *useraddr) |
647 | { | 1097 | { |
@@ -738,6 +1188,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, | |||
738 | } | 1188 | } |
739 | 1189 | ||
740 | ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); | 1190 | ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); |
1191 | if (ret) | ||
1192 | goto out; | ||
1193 | |||
1194 | /* indicate whether rxfh was set to default */ | ||
1195 | if (user_size == 0) | ||
1196 | dev->priv_flags &= ~IFF_RXFH_CONFIGURED; | ||
1197 | else | ||
1198 | dev->priv_flags |= IFF_RXFH_CONFIGURED; | ||
741 | 1199 | ||
742 | out: | 1200 | out: |
743 | kfree(indir); | 1201 | kfree(indir); |
@@ -897,6 +1355,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, | |||
897 | } | 1355 | } |
898 | 1356 | ||
899 | ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); | 1357 | ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); |
1358 | if (ret) | ||
1359 | goto out; | ||
1360 | |||
1361 | /* indicate whether rxfh was set to default */ | ||
1362 | if (rxfh.indir_size == 0) | ||
1363 | dev->priv_flags &= ~IFF_RXFH_CONFIGURED; | ||
1364 | else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) | ||
1365 | dev->priv_flags |= IFF_RXFH_CONFIGURED; | ||
900 | 1366 | ||
901 | out: | 1367 | out: |
902 | kfree(rss_config); | 1368 | kfree(rss_config); |
@@ -1227,14 +1693,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev, | |||
1227 | static noinline_for_stack int ethtool_set_channels(struct net_device *dev, | 1693 | static noinline_for_stack int ethtool_set_channels(struct net_device *dev, |
1228 | void __user *useraddr) | 1694 | void __user *useraddr) |
1229 | { | 1695 | { |
1230 | struct ethtool_channels channels; | 1696 | struct ethtool_channels channels, max; |
1697 | u32 max_rx_in_use = 0; | ||
1231 | 1698 | ||
1232 | if (!dev->ethtool_ops->set_channels) | 1699 | if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) |
1233 | return -EOPNOTSUPP; | 1700 | return -EOPNOTSUPP; |
1234 | 1701 | ||
1235 | if (copy_from_user(&channels, useraddr, sizeof(channels))) | 1702 | if (copy_from_user(&channels, useraddr, sizeof(channels))) |
1236 | return -EFAULT; | 1703 | return -EFAULT; |
1237 | 1704 | ||
1705 | dev->ethtool_ops->get_channels(dev, &max); | ||
1706 | |||
1707 | /* ensure new counts are within the maximums */ | ||
1708 | if ((channels.rx_count > max.max_rx) || | ||
1709 | (channels.tx_count > max.max_tx) || | ||
1710 | (channels.combined_count > max.max_combined) || | ||
1711 | (channels.other_count > max.max_other)) | ||
1712 | return -EINVAL; | ||
1713 | |||
1714 | /* ensure the new Rx count fits within the configured Rx flow | ||
1715 | * indirection table settings */ | ||
1716 | if (netif_is_rxfh_configured(dev) && | ||
1717 | !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && | ||
1718 | (channels.combined_count + channels.rx_count) <= max_rx_in_use) | ||
1719 | return -EINVAL; | ||
1720 | |||
1238 | return dev->ethtool_ops->set_channels(dev, &channels); | 1721 | return dev->ethtool_ops->set_channels(dev, &channels); |
1239 | } | 1722 | } |
1240 | 1723 | ||
@@ -1823,13 +2306,121 @@ out: | |||
1823 | return ret; | 2306 | return ret; |
1824 | } | 2307 | } |
1825 | 2308 | ||
2309 | static int ethtool_get_per_queue_coalesce(struct net_device *dev, | ||
2310 | void __user *useraddr, | ||
2311 | struct ethtool_per_queue_op *per_queue_opt) | ||
2312 | { | ||
2313 | u32 bit; | ||
2314 | int ret; | ||
2315 | DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); | ||
2316 | |||
2317 | if (!dev->ethtool_ops->get_per_queue_coalesce) | ||
2318 | return -EOPNOTSUPP; | ||
2319 | |||
2320 | useraddr += sizeof(*per_queue_opt); | ||
2321 | |||
2322 | bitmap_from_u32array(queue_mask, | ||
2323 | MAX_NUM_QUEUE, | ||
2324 | per_queue_opt->queue_mask, | ||
2325 | DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); | ||
2326 | |||
2327 | for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { | ||
2328 | struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; | ||
2329 | |||
2330 | ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce); | ||
2331 | if (ret != 0) | ||
2332 | return ret; | ||
2333 | if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) | ||
2334 | return -EFAULT; | ||
2335 | useraddr += sizeof(coalesce); | ||
2336 | } | ||
2337 | |||
2338 | return 0; | ||
2339 | } | ||
2340 | |||
2341 | static int ethtool_set_per_queue_coalesce(struct net_device *dev, | ||
2342 | void __user *useraddr, | ||
2343 | struct ethtool_per_queue_op *per_queue_opt) | ||
2344 | { | ||
2345 | u32 bit; | ||
2346 | int i, ret = 0; | ||
2347 | int n_queue; | ||
2348 | struct ethtool_coalesce *backup = NULL, *tmp = NULL; | ||
2349 | DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); | ||
2350 | |||
2351 | if ((!dev->ethtool_ops->set_per_queue_coalesce) || | ||
2352 | (!dev->ethtool_ops->get_per_queue_coalesce)) | ||
2353 | return -EOPNOTSUPP; | ||
2354 | |||
2355 | useraddr += sizeof(*per_queue_opt); | ||
2356 | |||
2357 | bitmap_from_u32array(queue_mask, | ||
2358 | MAX_NUM_QUEUE, | ||
2359 | per_queue_opt->queue_mask, | ||
2360 | DIV_ROUND_UP(MAX_NUM_QUEUE, 32)); | ||
2361 | n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); | ||
2362 | tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); | ||
2363 | if (!backup) | ||
2364 | return -ENOMEM; | ||
2365 | |||
2366 | for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { | ||
2367 | struct ethtool_coalesce coalesce; | ||
2368 | |||
2369 | ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp); | ||
2370 | if (ret != 0) | ||
2371 | goto roll_back; | ||
2372 | |||
2373 | tmp++; | ||
2374 | |||
2375 | if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) { | ||
2376 | ret = -EFAULT; | ||
2377 | goto roll_back; | ||
2378 | } | ||
2379 | |||
2380 | ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); | ||
2381 | if (ret != 0) | ||
2382 | goto roll_back; | ||
2383 | |||
2384 | useraddr += sizeof(coalesce); | ||
2385 | } | ||
2386 | |||
2387 | roll_back: | ||
2388 | if (ret != 0) { | ||
2389 | tmp = backup; | ||
2390 | for_each_set_bit(i, queue_mask, bit) { | ||
2391 | dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp); | ||
2392 | tmp++; | ||
2393 | } | ||
2394 | } | ||
2395 | kfree(backup); | ||
2396 | |||
2397 | return ret; | ||
2398 | } | ||
2399 | |||
2400 | static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) | ||
2401 | { | ||
2402 | struct ethtool_per_queue_op per_queue_opt; | ||
2403 | |||
2404 | if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) | ||
2405 | return -EFAULT; | ||
2406 | |||
2407 | switch (per_queue_opt.sub_command) { | ||
2408 | case ETHTOOL_GCOALESCE: | ||
2409 | return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); | ||
2410 | case ETHTOOL_SCOALESCE: | ||
2411 | return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); | ||
2412 | default: | ||
2413 | return -EOPNOTSUPP; | ||
2414 | }; | ||
2415 | } | ||
2416 | |||
1826 | /* The main entry point in this file. Called from net/core/dev_ioctl.c */ | 2417 | /* The main entry point in this file. Called from net/core/dev_ioctl.c */ |
1827 | 2418 | ||
1828 | int dev_ethtool(struct net *net, struct ifreq *ifr) | 2419 | int dev_ethtool(struct net *net, struct ifreq *ifr) |
1829 | { | 2420 | { |
1830 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); | 2421 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); |
1831 | void __user *useraddr = ifr->ifr_data; | 2422 | void __user *useraddr = ifr->ifr_data; |
1832 | u32 ethcmd; | 2423 | u32 ethcmd, sub_cmd; |
1833 | int rc; | 2424 | int rc; |
1834 | netdev_features_t old_features; | 2425 | netdev_features_t old_features; |
1835 | 2426 | ||
@@ -1839,8 +2430,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1839 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) | 2430 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) |
1840 | return -EFAULT; | 2431 | return -EFAULT; |
1841 | 2432 | ||
2433 | if (ethcmd == ETHTOOL_PERQUEUE) { | ||
2434 | if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) | ||
2435 | return -EFAULT; | ||
2436 | } else { | ||
2437 | sub_cmd = ethcmd; | ||
2438 | } | ||
1842 | /* Allow some commands to be done by anyone */ | 2439 | /* Allow some commands to be done by anyone */ |
1843 | switch (ethcmd) { | 2440 | switch (sub_cmd) { |
1844 | case ETHTOOL_GSET: | 2441 | case ETHTOOL_GSET: |
1845 | case ETHTOOL_GDRVINFO: | 2442 | case ETHTOOL_GDRVINFO: |
1846 | case ETHTOOL_GMSGLVL: | 2443 | case ETHTOOL_GMSGLVL: |
@@ -2070,6 +2667,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
2070 | case ETHTOOL_GPHYSTATS: | 2667 | case ETHTOOL_GPHYSTATS: |
2071 | rc = ethtool_get_phy_stats(dev, useraddr); | 2668 | rc = ethtool_get_phy_stats(dev, useraddr); |
2072 | break; | 2669 | break; |
2670 | case ETHTOOL_PERQUEUE: | ||
2671 | rc = ethtool_set_per_queue(dev, useraddr); | ||
2672 | break; | ||
2673 | case ETHTOOL_GLINKSETTINGS: | ||
2674 | rc = ethtool_get_link_ksettings(dev, useraddr); | ||
2675 | break; | ||
2676 | case ETHTOOL_SLINKSETTINGS: | ||
2677 | rc = ethtool_set_link_ksettings(dev, useraddr); | ||
2678 | break; | ||
2073 | default: | 2679 | default: |
2074 | rc = -EOPNOTSUPP; | 2680 | rc = -EOPNOTSUPP; |
2075 | } | 2681 | } |
diff --git a/net/core/filter.c b/net/core/filter.c index 94d26201080d..ca7f832b2980 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -530,12 +530,14 @@ do_pass: | |||
530 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); | 530 | *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); |
531 | break; | 531 | break; |
532 | 532 | ||
533 | /* RET_K, RET_A are remaped into 2 insns. */ | 533 | /* RET_K is remaped into 2 insns. RET_A case doesn't need an |
534 | * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. | ||
535 | */ | ||
534 | case BPF_RET | BPF_A: | 536 | case BPF_RET | BPF_A: |
535 | case BPF_RET | BPF_K: | 537 | case BPF_RET | BPF_K: |
536 | *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? | 538 | if (BPF_RVAL(fp->code) == BPF_K) |
537 | BPF_K : BPF_X, BPF_REG_0, | 539 | *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, |
538 | BPF_REG_A, fp->k); | 540 | 0, fp->k); |
539 | *insn = BPF_EXIT_INSN(); | 541 | *insn = BPF_EXIT_INSN(); |
540 | break; | 542 | break; |
541 | 543 | ||
@@ -1147,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) | |||
1147 | } | 1149 | } |
1148 | EXPORT_SYMBOL_GPL(bpf_prog_destroy); | 1150 | EXPORT_SYMBOL_GPL(bpf_prog_destroy); |
1149 | 1151 | ||
1150 | static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) | 1152 | static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, |
1153 | bool locked) | ||
1151 | { | 1154 | { |
1152 | struct sk_filter *fp, *old_fp; | 1155 | struct sk_filter *fp, *old_fp; |
1153 | 1156 | ||
@@ -1163,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) | |||
1163 | return -ENOMEM; | 1166 | return -ENOMEM; |
1164 | } | 1167 | } |
1165 | 1168 | ||
1166 | old_fp = rcu_dereference_protected(sk->sk_filter, | 1169 | old_fp = rcu_dereference_protected(sk->sk_filter, locked); |
1167 | sock_owned_by_user(sk)); | ||
1168 | rcu_assign_pointer(sk->sk_filter, fp); | 1170 | rcu_assign_pointer(sk->sk_filter, fp); |
1169 | |||
1170 | if (old_fp) | 1171 | if (old_fp) |
1171 | sk_filter_uncharge(sk, old_fp); | 1172 | sk_filter_uncharge(sk, old_fp); |
1172 | 1173 | ||
@@ -1181,7 +1182,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) | |||
1181 | if (bpf_prog_size(prog->len) > sysctl_optmem_max) | 1182 | if (bpf_prog_size(prog->len) > sysctl_optmem_max) |
1182 | return -ENOMEM; | 1183 | return -ENOMEM; |
1183 | 1184 | ||
1184 | if (sk_unhashed(sk)) { | 1185 | if (sk_unhashed(sk) && sk->sk_reuseport) { |
1185 | err = reuseport_alloc(sk); | 1186 | err = reuseport_alloc(sk); |
1186 | if (err) | 1187 | if (err) |
1187 | return err; | 1188 | return err; |
@@ -1245,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) | |||
1245 | * occurs or there is insufficient memory for the filter a negative | 1246 | * occurs or there is insufficient memory for the filter a negative |
1246 | * errno code is returned. On success the return is zero. | 1247 | * errno code is returned. On success the return is zero. |
1247 | */ | 1248 | */ |
1248 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | 1249 | int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, |
1250 | bool locked) | ||
1249 | { | 1251 | { |
1250 | struct bpf_prog *prog = __get_filter(fprog, sk); | 1252 | struct bpf_prog *prog = __get_filter(fprog, sk); |
1251 | int err; | 1253 | int err; |
@@ -1253,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
1253 | if (IS_ERR(prog)) | 1255 | if (IS_ERR(prog)) |
1254 | return PTR_ERR(prog); | 1256 | return PTR_ERR(prog); |
1255 | 1257 | ||
1256 | err = __sk_attach_prog(prog, sk); | 1258 | err = __sk_attach_prog(prog, sk, locked); |
1257 | if (err < 0) { | 1259 | if (err < 0) { |
1258 | __bpf_prog_release(prog); | 1260 | __bpf_prog_release(prog); |
1259 | return err; | 1261 | return err; |
@@ -1261,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
1261 | 1263 | ||
1262 | return 0; | 1264 | return 0; |
1263 | } | 1265 | } |
1264 | EXPORT_SYMBOL_GPL(sk_attach_filter); | 1266 | EXPORT_SYMBOL_GPL(__sk_attach_filter); |
1267 | |||
1268 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | ||
1269 | { | ||
1270 | return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); | ||
1271 | } | ||
1265 | 1272 | ||
1266 | int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) | 1273 | int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) |
1267 | { | 1274 | { |
@@ -1307,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) | |||
1307 | if (IS_ERR(prog)) | 1314 | if (IS_ERR(prog)) |
1308 | return PTR_ERR(prog); | 1315 | return PTR_ERR(prog); |
1309 | 1316 | ||
1310 | err = __sk_attach_prog(prog, sk); | 1317 | err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); |
1311 | if (err < 0) { | 1318 | if (err < 0) { |
1312 | bpf_prog_put(prog); | 1319 | bpf_prog_put(prog); |
1313 | return err; | 1320 | return err; |
@@ -1333,18 +1340,25 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) | |||
1333 | return 0; | 1340 | return 0; |
1334 | } | 1341 | } |
1335 | 1342 | ||
1336 | #define BPF_LDST_LEN 16U | 1343 | struct bpf_scratchpad { |
1344 | union { | ||
1345 | __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; | ||
1346 | u8 buff[MAX_BPF_STACK]; | ||
1347 | }; | ||
1348 | }; | ||
1349 | |||
1350 | static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); | ||
1337 | 1351 | ||
1338 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | 1352 | static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) |
1339 | { | 1353 | { |
1354 | struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); | ||
1340 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1355 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1341 | int offset = (int) r2; | 1356 | int offset = (int) r2; |
1342 | void *from = (void *) (long) r3; | 1357 | void *from = (void *) (long) r3; |
1343 | unsigned int len = (unsigned int) r4; | 1358 | unsigned int len = (unsigned int) r4; |
1344 | char buf[BPF_LDST_LEN]; | ||
1345 | void *ptr; | 1359 | void *ptr; |
1346 | 1360 | ||
1347 | if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) | 1361 | if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) |
1348 | return -EINVAL; | 1362 | return -EINVAL; |
1349 | 1363 | ||
1350 | /* bpf verifier guarantees that: | 1364 | /* bpf verifier guarantees that: |
@@ -1355,14 +1369,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | |||
1355 | * | 1369 | * |
1356 | * so check for invalid 'offset' and too large 'len' | 1370 | * so check for invalid 'offset' and too large 'len' |
1357 | */ | 1371 | */ |
1358 | if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) | 1372 | if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) |
1359 | return -EFAULT; | 1373 | return -EFAULT; |
1360 | 1374 | if (unlikely(skb_try_make_writable(skb, offset + len))) | |
1361 | if (unlikely(skb_cloned(skb) && | ||
1362 | !skb_clone_writable(skb, offset + len))) | ||
1363 | return -EFAULT; | 1375 | return -EFAULT; |
1364 | 1376 | ||
1365 | ptr = skb_header_pointer(skb, offset, len, buf); | 1377 | ptr = skb_header_pointer(skb, offset, len, sp->buff); |
1366 | if (unlikely(!ptr)) | 1378 | if (unlikely(!ptr)) |
1367 | return -EFAULT; | 1379 | return -EFAULT; |
1368 | 1380 | ||
@@ -1371,17 +1383,19 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) | |||
1371 | 1383 | ||
1372 | memcpy(ptr, from, len); | 1384 | memcpy(ptr, from, len); |
1373 | 1385 | ||
1374 | if (ptr == buf) | 1386 | if (ptr == sp->buff) |
1375 | /* skb_store_bits cannot return -EFAULT here */ | 1387 | /* skb_store_bits cannot return -EFAULT here */ |
1376 | skb_store_bits(skb, offset, ptr, len); | 1388 | skb_store_bits(skb, offset, ptr, len); |
1377 | 1389 | ||
1378 | if (flags & BPF_F_RECOMPUTE_CSUM) | 1390 | if (flags & BPF_F_RECOMPUTE_CSUM) |
1379 | skb_postpush_rcsum(skb, ptr, len); | 1391 | skb_postpush_rcsum(skb, ptr, len); |
1392 | if (flags & BPF_F_INVALIDATE_HASH) | ||
1393 | skb_clear_hash(skb); | ||
1380 | 1394 | ||
1381 | return 0; | 1395 | return 0; |
1382 | } | 1396 | } |
1383 | 1397 | ||
1384 | const struct bpf_func_proto bpf_skb_store_bytes_proto = { | 1398 | static const struct bpf_func_proto bpf_skb_store_bytes_proto = { |
1385 | .func = bpf_skb_store_bytes, | 1399 | .func = bpf_skb_store_bytes, |
1386 | .gpl_only = false, | 1400 | .gpl_only = false, |
1387 | .ret_type = RET_INTEGER, | 1401 | .ret_type = RET_INTEGER, |
@@ -1400,7 +1414,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | |||
1400 | unsigned int len = (unsigned int) r4; | 1414 | unsigned int len = (unsigned int) r4; |
1401 | void *ptr; | 1415 | void *ptr; |
1402 | 1416 | ||
1403 | if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) | 1417 | if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK)) |
1404 | return -EFAULT; | 1418 | return -EFAULT; |
1405 | 1419 | ||
1406 | ptr = skb_header_pointer(skb, offset, len, to); | 1420 | ptr = skb_header_pointer(skb, offset, len, to); |
@@ -1412,7 +1426,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | |||
1412 | return 0; | 1426 | return 0; |
1413 | } | 1427 | } |
1414 | 1428 | ||
1415 | const struct bpf_func_proto bpf_skb_load_bytes_proto = { | 1429 | static const struct bpf_func_proto bpf_skb_load_bytes_proto = { |
1416 | .func = bpf_skb_load_bytes, | 1430 | .func = bpf_skb_load_bytes, |
1417 | .gpl_only = false, | 1431 | .gpl_only = false, |
1418 | .ret_type = RET_INTEGER, | 1432 | .ret_type = RET_INTEGER, |
@@ -1432,9 +1446,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1432 | return -EINVAL; | 1446 | return -EINVAL; |
1433 | if (unlikely((u32) offset > 0xffff)) | 1447 | if (unlikely((u32) offset > 0xffff)) |
1434 | return -EFAULT; | 1448 | return -EFAULT; |
1435 | 1449 | if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) | |
1436 | if (unlikely(skb_cloned(skb) && | ||
1437 | !skb_clone_writable(skb, offset + sizeof(sum)))) | ||
1438 | return -EFAULT; | 1450 | return -EFAULT; |
1439 | 1451 | ||
1440 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | 1452 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); |
@@ -1442,6 +1454,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1442 | return -EFAULT; | 1454 | return -EFAULT; |
1443 | 1455 | ||
1444 | switch (flags & BPF_F_HDR_FIELD_MASK) { | 1456 | switch (flags & BPF_F_HDR_FIELD_MASK) { |
1457 | case 0: | ||
1458 | if (unlikely(from != 0)) | ||
1459 | return -EINVAL; | ||
1460 | |||
1461 | csum_replace_by_diff(ptr, to); | ||
1462 | break; | ||
1445 | case 2: | 1463 | case 2: |
1446 | csum_replace2(ptr, from, to); | 1464 | csum_replace2(ptr, from, to); |
1447 | break; | 1465 | break; |
@@ -1459,7 +1477,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1459 | return 0; | 1477 | return 0; |
1460 | } | 1478 | } |
1461 | 1479 | ||
1462 | const struct bpf_func_proto bpf_l3_csum_replace_proto = { | 1480 | static const struct bpf_func_proto bpf_l3_csum_replace_proto = { |
1463 | .func = bpf_l3_csum_replace, | 1481 | .func = bpf_l3_csum_replace, |
1464 | .gpl_only = false, | 1482 | .gpl_only = false, |
1465 | .ret_type = RET_INTEGER, | 1483 | .ret_type = RET_INTEGER, |
@@ -1474,23 +1492,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1474 | { | 1492 | { |
1475 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | 1493 | struct sk_buff *skb = (struct sk_buff *) (long) r1; |
1476 | bool is_pseudo = flags & BPF_F_PSEUDO_HDR; | 1494 | bool is_pseudo = flags & BPF_F_PSEUDO_HDR; |
1495 | bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; | ||
1477 | int offset = (int) r2; | 1496 | int offset = (int) r2; |
1478 | __sum16 sum, *ptr; | 1497 | __sum16 sum, *ptr; |
1479 | 1498 | ||
1480 | if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) | 1499 | if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | |
1500 | BPF_F_HDR_FIELD_MASK))) | ||
1481 | return -EINVAL; | 1501 | return -EINVAL; |
1482 | if (unlikely((u32) offset > 0xffff)) | 1502 | if (unlikely((u32) offset > 0xffff)) |
1483 | return -EFAULT; | 1503 | return -EFAULT; |
1484 | 1504 | if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) | |
1485 | if (unlikely(skb_cloned(skb) && | ||
1486 | !skb_clone_writable(skb, offset + sizeof(sum)))) | ||
1487 | return -EFAULT; | 1505 | return -EFAULT; |
1488 | 1506 | ||
1489 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); | 1507 | ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); |
1490 | if (unlikely(!ptr)) | 1508 | if (unlikely(!ptr)) |
1491 | return -EFAULT; | 1509 | return -EFAULT; |
1510 | if (is_mmzero && !*ptr) | ||
1511 | return 0; | ||
1492 | 1512 | ||
1493 | switch (flags & BPF_F_HDR_FIELD_MASK) { | 1513 | switch (flags & BPF_F_HDR_FIELD_MASK) { |
1514 | case 0: | ||
1515 | if (unlikely(from != 0)) | ||
1516 | return -EINVAL; | ||
1517 | |||
1518 | inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); | ||
1519 | break; | ||
1494 | case 2: | 1520 | case 2: |
1495 | inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); | 1521 | inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); |
1496 | break; | 1522 | break; |
@@ -1501,6 +1527,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1501 | return -EINVAL; | 1527 | return -EINVAL; |
1502 | } | 1528 | } |
1503 | 1529 | ||
1530 | if (is_mmzero && !*ptr) | ||
1531 | *ptr = CSUM_MANGLED_0; | ||
1504 | if (ptr == &sum) | 1532 | if (ptr == &sum) |
1505 | /* skb_store_bits guaranteed to not return -EFAULT here */ | 1533 | /* skb_store_bits guaranteed to not return -EFAULT here */ |
1506 | skb_store_bits(skb, offset, ptr, sizeof(sum)); | 1534 | skb_store_bits(skb, offset, ptr, sizeof(sum)); |
@@ -1508,7 +1536,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) | |||
1508 | return 0; | 1536 | return 0; |
1509 | } | 1537 | } |
1510 | 1538 | ||
1511 | const struct bpf_func_proto bpf_l4_csum_replace_proto = { | 1539 | static const struct bpf_func_proto bpf_l4_csum_replace_proto = { |
1512 | .func = bpf_l4_csum_replace, | 1540 | .func = bpf_l4_csum_replace, |
1513 | .gpl_only = false, | 1541 | .gpl_only = false, |
1514 | .ret_type = RET_INTEGER, | 1542 | .ret_type = RET_INTEGER, |
@@ -1519,6 +1547,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { | |||
1519 | .arg5_type = ARG_ANYTHING, | 1547 | .arg5_type = ARG_ANYTHING, |
1520 | }; | 1548 | }; |
1521 | 1549 | ||
1550 | static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) | ||
1551 | { | ||
1552 | struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); | ||
1553 | u64 diff_size = from_size + to_size; | ||
1554 | __be32 *from = (__be32 *) (long) r1; | ||
1555 | __be32 *to = (__be32 *) (long) r3; | ||
1556 | int i, j = 0; | ||
1557 | |||
1558 | /* This is quite flexible, some examples: | ||
1559 | * | ||
1560 | * from_size == 0, to_size > 0, seed := csum --> pushing data | ||
1561 | * from_size > 0, to_size == 0, seed := csum --> pulling data | ||
1562 | * from_size > 0, to_size > 0, seed := 0 --> diffing data | ||
1563 | * | ||
1564 | * Even for diffing, from_size and to_size don't need to be equal. | ||
1565 | */ | ||
1566 | if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || | ||
1567 | diff_size > sizeof(sp->diff))) | ||
1568 | return -EINVAL; | ||
1569 | |||
1570 | for (i = 0; i < from_size / sizeof(__be32); i++, j++) | ||
1571 | sp->diff[j] = ~from[i]; | ||
1572 | for (i = 0; i < to_size / sizeof(__be32); i++, j++) | ||
1573 | sp->diff[j] = to[i]; | ||
1574 | |||
1575 | return csum_partial(sp->diff, diff_size, seed); | ||
1576 | } | ||
1577 | |||
1578 | static const struct bpf_func_proto bpf_csum_diff_proto = { | ||
1579 | .func = bpf_csum_diff, | ||
1580 | .gpl_only = false, | ||
1581 | .ret_type = RET_INTEGER, | ||
1582 | .arg1_type = ARG_PTR_TO_STACK, | ||
1583 | .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, | ||
1584 | .arg3_type = ARG_PTR_TO_STACK, | ||
1585 | .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO, | ||
1586 | .arg5_type = ARG_ANYTHING, | ||
1587 | }; | ||
1588 | |||
1522 | static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) | 1589 | static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) |
1523 | { | 1590 | { |
1524 | struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; | 1591 | struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; |
@@ -1543,11 +1610,10 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) | |||
1543 | } | 1610 | } |
1544 | 1611 | ||
1545 | skb2->dev = dev; | 1612 | skb2->dev = dev; |
1546 | skb_sender_cpu_clear(skb2); | ||
1547 | return dev_queue_xmit(skb2); | 1613 | return dev_queue_xmit(skb2); |
1548 | } | 1614 | } |
1549 | 1615 | ||
1550 | const struct bpf_func_proto bpf_clone_redirect_proto = { | 1616 | static const struct bpf_func_proto bpf_clone_redirect_proto = { |
1551 | .func = bpf_clone_redirect, | 1617 | .func = bpf_clone_redirect, |
1552 | .gpl_only = false, | 1618 | .gpl_only = false, |
1553 | .ret_type = RET_INTEGER, | 1619 | .ret_type = RET_INTEGER, |
@@ -1596,11 +1662,10 @@ int skb_do_redirect(struct sk_buff *skb) | |||
1596 | } | 1662 | } |
1597 | 1663 | ||
1598 | skb->dev = dev; | 1664 | skb->dev = dev; |
1599 | skb_sender_cpu_clear(skb); | ||
1600 | return dev_queue_xmit(skb); | 1665 | return dev_queue_xmit(skb); |
1601 | } | 1666 | } |
1602 | 1667 | ||
1603 | const struct bpf_func_proto bpf_redirect_proto = { | 1668 | static const struct bpf_func_proto bpf_redirect_proto = { |
1604 | .func = bpf_redirect, | 1669 | .func = bpf_redirect, |
1605 | .gpl_only = false, | 1670 | .gpl_only = false, |
1606 | .ret_type = RET_INTEGER, | 1671 | .ret_type = RET_INTEGER, |
@@ -1622,14 +1687,7 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { | |||
1622 | 1687 | ||
1623 | static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | 1688 | static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) |
1624 | { | 1689 | { |
1625 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1690 | return dst_tclassid((struct sk_buff *) (unsigned long) r1); |
1626 | const struct dst_entry *dst; | ||
1627 | |||
1628 | dst = skb_dst((struct sk_buff *) (unsigned long) r1); | ||
1629 | if (dst) | ||
1630 | return dst->tclassid; | ||
1631 | #endif | ||
1632 | return 0; | ||
1633 | } | 1691 | } |
1634 | 1692 | ||
1635 | static const struct bpf_func_proto bpf_get_route_realm_proto = { | 1693 | static const struct bpf_func_proto bpf_get_route_realm_proto = { |
@@ -1682,6 +1740,13 @@ bool bpf_helper_changes_skb_data(void *func) | |||
1682 | return true; | 1740 | return true; |
1683 | if (func == bpf_skb_vlan_pop) | 1741 | if (func == bpf_skb_vlan_pop) |
1684 | return true; | 1742 | return true; |
1743 | if (func == bpf_skb_store_bytes) | ||
1744 | return true; | ||
1745 | if (func == bpf_l3_csum_replace) | ||
1746 | return true; | ||
1747 | if (func == bpf_l4_csum_replace) | ||
1748 | return true; | ||
1749 | |||
1685 | return false; | 1750 | return false; |
1686 | } | 1751 | } |
1687 | 1752 | ||
@@ -1703,12 +1768,16 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1703 | return -EPROTO; | 1768 | return -EPROTO; |
1704 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) { | 1769 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) { |
1705 | switch (size) { | 1770 | switch (size) { |
1771 | case offsetof(struct bpf_tunnel_key, tunnel_label): | ||
1772 | case offsetof(struct bpf_tunnel_key, tunnel_ext): | ||
1773 | goto set_compat; | ||
1706 | case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): | 1774 | case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): |
1707 | /* Fixup deprecated structure layouts here, so we have | 1775 | /* Fixup deprecated structure layouts here, so we have |
1708 | * a common path later on. | 1776 | * a common path later on. |
1709 | */ | 1777 | */ |
1710 | if (ip_tunnel_info_af(info) != AF_INET) | 1778 | if (ip_tunnel_info_af(info) != AF_INET) |
1711 | return -EINVAL; | 1779 | return -EINVAL; |
1780 | set_compat: | ||
1712 | to = (struct bpf_tunnel_key *)compat; | 1781 | to = (struct bpf_tunnel_key *)compat; |
1713 | break; | 1782 | break; |
1714 | default: | 1783 | default: |
@@ -1720,11 +1789,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1720 | to->tunnel_tos = info->key.tos; | 1789 | to->tunnel_tos = info->key.tos; |
1721 | to->tunnel_ttl = info->key.ttl; | 1790 | to->tunnel_ttl = info->key.ttl; |
1722 | 1791 | ||
1723 | if (flags & BPF_F_TUNINFO_IPV6) | 1792 | if (flags & BPF_F_TUNINFO_IPV6) { |
1724 | memcpy(to->remote_ipv6, &info->key.u.ipv6.src, | 1793 | memcpy(to->remote_ipv6, &info->key.u.ipv6.src, |
1725 | sizeof(to->remote_ipv6)); | 1794 | sizeof(to->remote_ipv6)); |
1726 | else | 1795 | to->tunnel_label = be32_to_cpu(info->key.label); |
1796 | } else { | ||
1727 | to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); | 1797 | to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); |
1798 | } | ||
1728 | 1799 | ||
1729 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) | 1800 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) |
1730 | memcpy((void *)(long) r2, to, size); | 1801 | memcpy((void *)(long) r2, to, size); |
@@ -1732,7 +1803,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1732 | return 0; | 1803 | return 0; |
1733 | } | 1804 | } |
1734 | 1805 | ||
1735 | const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { | 1806 | static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { |
1736 | .func = bpf_skb_get_tunnel_key, | 1807 | .func = bpf_skb_get_tunnel_key, |
1737 | .gpl_only = false, | 1808 | .gpl_only = false, |
1738 | .ret_type = RET_INTEGER, | 1809 | .ret_type = RET_INTEGER, |
@@ -1742,6 +1813,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { | |||
1742 | .arg4_type = ARG_ANYTHING, | 1813 | .arg4_type = ARG_ANYTHING, |
1743 | }; | 1814 | }; |
1744 | 1815 | ||
1816 | static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) | ||
1817 | { | ||
1818 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1819 | u8 *to = (u8 *) (long) r2; | ||
1820 | const struct ip_tunnel_info *info = skb_tunnel_info(skb); | ||
1821 | |||
1822 | if (unlikely(!info || | ||
1823 | !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) | ||
1824 | return -ENOENT; | ||
1825 | if (unlikely(size < info->options_len)) | ||
1826 | return -ENOMEM; | ||
1827 | |||
1828 | ip_tunnel_info_opts_get(to, info); | ||
1829 | |||
1830 | return info->options_len; | ||
1831 | } | ||
1832 | |||
1833 | static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { | ||
1834 | .func = bpf_skb_get_tunnel_opt, | ||
1835 | .gpl_only = false, | ||
1836 | .ret_type = RET_INTEGER, | ||
1837 | .arg1_type = ARG_PTR_TO_CTX, | ||
1838 | .arg2_type = ARG_PTR_TO_STACK, | ||
1839 | .arg3_type = ARG_CONST_STACK_SIZE, | ||
1840 | }; | ||
1841 | |||
1745 | static struct metadata_dst __percpu *md_dst; | 1842 | static struct metadata_dst __percpu *md_dst; |
1746 | 1843 | ||
1747 | static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | 1844 | static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) |
@@ -1752,10 +1849,13 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1752 | u8 compat[sizeof(struct bpf_tunnel_key)]; | 1849 | u8 compat[sizeof(struct bpf_tunnel_key)]; |
1753 | struct ip_tunnel_info *info; | 1850 | struct ip_tunnel_info *info; |
1754 | 1851 | ||
1755 | if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) | 1852 | if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | |
1853 | BPF_F_DONT_FRAGMENT))) | ||
1756 | return -EINVAL; | 1854 | return -EINVAL; |
1757 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) { | 1855 | if (unlikely(size != sizeof(struct bpf_tunnel_key))) { |
1758 | switch (size) { | 1856 | switch (size) { |
1857 | case offsetof(struct bpf_tunnel_key, tunnel_label): | ||
1858 | case offsetof(struct bpf_tunnel_key, tunnel_ext): | ||
1759 | case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): | 1859 | case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): |
1760 | /* Fixup deprecated structure layouts here, so we have | 1860 | /* Fixup deprecated structure layouts here, so we have |
1761 | * a common path later on. | 1861 | * a common path later on. |
@@ -1768,6 +1868,9 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1768 | return -EINVAL; | 1868 | return -EINVAL; |
1769 | } | 1869 | } |
1770 | } | 1870 | } |
1871 | if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || | ||
1872 | from->tunnel_ext)) | ||
1873 | return -EINVAL; | ||
1771 | 1874 | ||
1772 | skb_dst_drop(skb); | 1875 | skb_dst_drop(skb); |
1773 | dst_hold((struct dst_entry *) md); | 1876 | dst_hold((struct dst_entry *) md); |
@@ -1776,7 +1879,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1776 | info = &md->u.tun_info; | 1879 | info = &md->u.tun_info; |
1777 | info->mode = IP_TUNNEL_INFO_TX; | 1880 | info->mode = IP_TUNNEL_INFO_TX; |
1778 | 1881 | ||
1779 | info->key.tun_flags = TUNNEL_KEY; | 1882 | info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; |
1883 | if (flags & BPF_F_DONT_FRAGMENT) | ||
1884 | info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; | ||
1885 | |||
1780 | info->key.tun_id = cpu_to_be64(from->tunnel_id); | 1886 | info->key.tun_id = cpu_to_be64(from->tunnel_id); |
1781 | info->key.tos = from->tunnel_tos; | 1887 | info->key.tos = from->tunnel_tos; |
1782 | info->key.ttl = from->tunnel_ttl; | 1888 | info->key.ttl = from->tunnel_ttl; |
@@ -1785,14 +1891,18 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) | |||
1785 | info->mode |= IP_TUNNEL_INFO_IPV6; | 1891 | info->mode |= IP_TUNNEL_INFO_IPV6; |
1786 | memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, | 1892 | memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, |
1787 | sizeof(from->remote_ipv6)); | 1893 | sizeof(from->remote_ipv6)); |
1894 | info->key.label = cpu_to_be32(from->tunnel_label) & | ||
1895 | IPV6_FLOWLABEL_MASK; | ||
1788 | } else { | 1896 | } else { |
1789 | info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); | 1897 | info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); |
1898 | if (flags & BPF_F_ZERO_CSUM_TX) | ||
1899 | info->key.tun_flags &= ~TUNNEL_CSUM; | ||
1790 | } | 1900 | } |
1791 | 1901 | ||
1792 | return 0; | 1902 | return 0; |
1793 | } | 1903 | } |
1794 | 1904 | ||
1795 | const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { | 1905 | static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { |
1796 | .func = bpf_skb_set_tunnel_key, | 1906 | .func = bpf_skb_set_tunnel_key, |
1797 | .gpl_only = false, | 1907 | .gpl_only = false, |
1798 | .ret_type = RET_INTEGER, | 1908 | .ret_type = RET_INTEGER, |
@@ -1802,17 +1912,53 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { | |||
1802 | .arg4_type = ARG_ANYTHING, | 1912 | .arg4_type = ARG_ANYTHING, |
1803 | }; | 1913 | }; |
1804 | 1914 | ||
1805 | static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void) | 1915 | static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) |
1916 | { | ||
1917 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1918 | u8 *from = (u8 *) (long) r2; | ||
1919 | struct ip_tunnel_info *info = skb_tunnel_info(skb); | ||
1920 | const struct metadata_dst *md = this_cpu_ptr(md_dst); | ||
1921 | |||
1922 | if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) | ||
1923 | return -EINVAL; | ||
1924 | if (unlikely(size > IP_TUNNEL_OPTS_MAX)) | ||
1925 | return -ENOMEM; | ||
1926 | |||
1927 | ip_tunnel_info_opts_set(info, from, size); | ||
1928 | |||
1929 | return 0; | ||
1930 | } | ||
1931 | |||
1932 | static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { | ||
1933 | .func = bpf_skb_set_tunnel_opt, | ||
1934 | .gpl_only = false, | ||
1935 | .ret_type = RET_INTEGER, | ||
1936 | .arg1_type = ARG_PTR_TO_CTX, | ||
1937 | .arg2_type = ARG_PTR_TO_STACK, | ||
1938 | .arg3_type = ARG_CONST_STACK_SIZE, | ||
1939 | }; | ||
1940 | |||
1941 | static const struct bpf_func_proto * | ||
1942 | bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) | ||
1806 | { | 1943 | { |
1807 | if (!md_dst) { | 1944 | if (!md_dst) { |
1808 | /* race is not possible, since it's called from | 1945 | /* Race is not possible, since it's called from verifier |
1809 | * verifier that is holding verifier mutex | 1946 | * that is holding verifier mutex. |
1810 | */ | 1947 | */ |
1811 | md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL); | 1948 | md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, |
1949 | GFP_KERNEL); | ||
1812 | if (!md_dst) | 1950 | if (!md_dst) |
1813 | return NULL; | 1951 | return NULL; |
1814 | } | 1952 | } |
1815 | return &bpf_skb_set_tunnel_key_proto; | 1953 | |
1954 | switch (which) { | ||
1955 | case BPF_FUNC_skb_set_tunnel_key: | ||
1956 | return &bpf_skb_set_tunnel_key_proto; | ||
1957 | case BPF_FUNC_skb_set_tunnel_opt: | ||
1958 | return &bpf_skb_set_tunnel_opt_proto; | ||
1959 | default: | ||
1960 | return NULL; | ||
1961 | } | ||
1816 | } | 1962 | } |
1817 | 1963 | ||
1818 | static const struct bpf_func_proto * | 1964 | static const struct bpf_func_proto * |
@@ -1849,6 +1995,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | |||
1849 | return &bpf_skb_store_bytes_proto; | 1995 | return &bpf_skb_store_bytes_proto; |
1850 | case BPF_FUNC_skb_load_bytes: | 1996 | case BPF_FUNC_skb_load_bytes: |
1851 | return &bpf_skb_load_bytes_proto; | 1997 | return &bpf_skb_load_bytes_proto; |
1998 | case BPF_FUNC_csum_diff: | ||
1999 | return &bpf_csum_diff_proto; | ||
1852 | case BPF_FUNC_l3_csum_replace: | 2000 | case BPF_FUNC_l3_csum_replace: |
1853 | return &bpf_l3_csum_replace_proto; | 2001 | return &bpf_l3_csum_replace_proto; |
1854 | case BPF_FUNC_l4_csum_replace: | 2002 | case BPF_FUNC_l4_csum_replace: |
@@ -1864,7 +2012,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | |||
1864 | case BPF_FUNC_skb_get_tunnel_key: | 2012 | case BPF_FUNC_skb_get_tunnel_key: |
1865 | return &bpf_skb_get_tunnel_key_proto; | 2013 | return &bpf_skb_get_tunnel_key_proto; |
1866 | case BPF_FUNC_skb_set_tunnel_key: | 2014 | case BPF_FUNC_skb_set_tunnel_key: |
1867 | return bpf_get_skb_set_tunnel_key_proto(); | 2015 | return bpf_get_skb_set_tunnel_proto(func_id); |
2016 | case BPF_FUNC_skb_get_tunnel_opt: | ||
2017 | return &bpf_skb_get_tunnel_opt_proto; | ||
2018 | case BPF_FUNC_skb_set_tunnel_opt: | ||
2019 | return bpf_get_skb_set_tunnel_proto(func_id); | ||
1868 | case BPF_FUNC_redirect: | 2020 | case BPF_FUNC_redirect: |
1869 | return &bpf_redirect_proto; | 2021 | return &bpf_redirect_proto; |
1870 | case BPF_FUNC_get_route_realm: | 2022 | case BPF_FUNC_get_route_realm: |
@@ -1913,16 +2065,14 @@ static bool sk_filter_is_valid_access(int off, int size, | |||
1913 | static bool tc_cls_act_is_valid_access(int off, int size, | 2065 | static bool tc_cls_act_is_valid_access(int off, int size, |
1914 | enum bpf_access_type type) | 2066 | enum bpf_access_type type) |
1915 | { | 2067 | { |
1916 | if (off == offsetof(struct __sk_buff, tc_classid)) | ||
1917 | return type == BPF_WRITE ? true : false; | ||
1918 | |||
1919 | if (type == BPF_WRITE) { | 2068 | if (type == BPF_WRITE) { |
1920 | switch (off) { | 2069 | switch (off) { |
1921 | case offsetof(struct __sk_buff, mark): | 2070 | case offsetof(struct __sk_buff, mark): |
1922 | case offsetof(struct __sk_buff, tc_index): | 2071 | case offsetof(struct __sk_buff, tc_index): |
1923 | case offsetof(struct __sk_buff, priority): | 2072 | case offsetof(struct __sk_buff, priority): |
1924 | case offsetof(struct __sk_buff, cb[0]) ... | 2073 | case offsetof(struct __sk_buff, cb[0]) ... |
1925 | offsetof(struct __sk_buff, cb[4]): | 2074 | offsetof(struct __sk_buff, cb[4]): |
2075 | case offsetof(struct __sk_buff, tc_classid): | ||
1926 | break; | 2076 | break; |
1927 | default: | 2077 | default: |
1928 | return false; | 2078 | return false; |
@@ -2039,8 +2189,10 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, | |||
2039 | ctx_off -= offsetof(struct __sk_buff, tc_classid); | 2189 | ctx_off -= offsetof(struct __sk_buff, tc_classid); |
2040 | ctx_off += offsetof(struct sk_buff, cb); | 2190 | ctx_off += offsetof(struct sk_buff, cb); |
2041 | ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); | 2191 | ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); |
2042 | WARN_ON(type != BPF_WRITE); | 2192 | if (type == BPF_WRITE) |
2043 | *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); | 2193 | *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); |
2194 | else | ||
2195 | *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off); | ||
2044 | break; | 2196 | break; |
2045 | 2197 | ||
2046 | case offsetof(struct __sk_buff, tc_index): | 2198 | case offsetof(struct __sk_buff, tc_index): |
@@ -2103,7 +2255,7 @@ static int __init register_sk_filter_ops(void) | |||
2103 | } | 2255 | } |
2104 | late_initcall(register_sk_filter_ops); | 2256 | late_initcall(register_sk_filter_ops); |
2105 | 2257 | ||
2106 | int sk_detach_filter(struct sock *sk) | 2258 | int __sk_detach_filter(struct sock *sk, bool locked) |
2107 | { | 2259 | { |
2108 | int ret = -ENOENT; | 2260 | int ret = -ENOENT; |
2109 | struct sk_filter *filter; | 2261 | struct sk_filter *filter; |
@@ -2111,8 +2263,7 @@ int sk_detach_filter(struct sock *sk) | |||
2111 | if (sock_flag(sk, SOCK_FILTER_LOCKED)) | 2263 | if (sock_flag(sk, SOCK_FILTER_LOCKED)) |
2112 | return -EPERM; | 2264 | return -EPERM; |
2113 | 2265 | ||
2114 | filter = rcu_dereference_protected(sk->sk_filter, | 2266 | filter = rcu_dereference_protected(sk->sk_filter, locked); |
2115 | sock_owned_by_user(sk)); | ||
2116 | if (filter) { | 2267 | if (filter) { |
2117 | RCU_INIT_POINTER(sk->sk_filter, NULL); | 2268 | RCU_INIT_POINTER(sk->sk_filter, NULL); |
2118 | sk_filter_uncharge(sk, filter); | 2269 | sk_filter_uncharge(sk, filter); |
@@ -2121,7 +2272,12 @@ int sk_detach_filter(struct sock *sk) | |||
2121 | 2272 | ||
2122 | return ret; | 2273 | return ret; |
2123 | } | 2274 | } |
2124 | EXPORT_SYMBOL_GPL(sk_detach_filter); | 2275 | EXPORT_SYMBOL_GPL(__sk_detach_filter); |
2276 | |||
2277 | int sk_detach_filter(struct sock *sk) | ||
2278 | { | ||
2279 | return __sk_detach_filter(sk, sock_owned_by_user(sk)); | ||
2280 | } | ||
2125 | 2281 | ||
2126 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, | 2282 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, |
2127 | unsigned int len) | 2283 | unsigned int len) |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 12e700332010..a669dea146c6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -19,25 +19,12 @@ | |||
19 | #include <net/flow_dissector.h> | 19 | #include <net/flow_dissector.h> |
20 | #include <scsi/fc/fc_fcoe.h> | 20 | #include <scsi/fc/fc_fcoe.h> |
21 | 21 | ||
22 | static bool dissector_uses_key(const struct flow_dissector *flow_dissector, | ||
23 | enum flow_dissector_key_id key_id) | ||
24 | { | ||
25 | return flow_dissector->used_keys & (1 << key_id); | ||
26 | } | ||
27 | |||
28 | static void dissector_set_key(struct flow_dissector *flow_dissector, | 22 | static void dissector_set_key(struct flow_dissector *flow_dissector, |
29 | enum flow_dissector_key_id key_id) | 23 | enum flow_dissector_key_id key_id) |
30 | { | 24 | { |
31 | flow_dissector->used_keys |= (1 << key_id); | 25 | flow_dissector->used_keys |= (1 << key_id); |
32 | } | 26 | } |
33 | 27 | ||
34 | static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector, | ||
35 | enum flow_dissector_key_id key_id, | ||
36 | void *target_container) | ||
37 | { | ||
38 | return ((char *) target_container) + flow_dissector->offset[key_id]; | ||
39 | } | ||
40 | |||
41 | void skb_flow_dissector_init(struct flow_dissector *flow_dissector, | 28 | void skb_flow_dissector_init(struct flow_dissector *flow_dissector, |
42 | const struct flow_dissector_key *key, | 29 | const struct flow_dissector_key *key, |
43 | unsigned int key_count) | 30 | unsigned int key_count) |
@@ -178,15 +165,16 @@ ip: | |||
178 | 165 | ||
179 | ip_proto = iph->protocol; | 166 | ip_proto = iph->protocol; |
180 | 167 | ||
181 | if (!dissector_uses_key(flow_dissector, | 168 | if (dissector_uses_key(flow_dissector, |
182 | FLOW_DISSECTOR_KEY_IPV4_ADDRS)) | 169 | FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { |
183 | break; | 170 | key_addrs = skb_flow_dissector_target(flow_dissector, |
171 | FLOW_DISSECTOR_KEY_IPV4_ADDRS, | ||
172 | target_container); | ||
184 | 173 | ||
185 | key_addrs = skb_flow_dissector_target(flow_dissector, | 174 | memcpy(&key_addrs->v4addrs, &iph->saddr, |
186 | FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); | 175 | sizeof(key_addrs->v4addrs)); |
187 | memcpy(&key_addrs->v4addrs, &iph->saddr, | 176 | key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; |
188 | sizeof(key_addrs->v4addrs)); | 177 | } |
189 | key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | ||
190 | 178 | ||
191 | if (ip_is_fragment(iph)) { | 179 | if (ip_is_fragment(iph)) { |
192 | key_control->flags |= FLOW_DIS_IS_FRAGMENT; | 180 | key_control->flags |= FLOW_DIS_IS_FRAGMENT; |
@@ -219,13 +207,12 @@ ipv6: | |||
219 | 207 | ||
220 | if (dissector_uses_key(flow_dissector, | 208 | if (dissector_uses_key(flow_dissector, |
221 | FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { | 209 | FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { |
222 | struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; | 210 | key_addrs = skb_flow_dissector_target(flow_dissector, |
223 | 211 | FLOW_DISSECTOR_KEY_IPV6_ADDRS, | |
224 | key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, | 212 | target_container); |
225 | FLOW_DISSECTOR_KEY_IPV6_ADDRS, | ||
226 | target_container); | ||
227 | 213 | ||
228 | memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); | 214 | memcpy(&key_addrs->v6addrs, &iph->saddr, |
215 | sizeof(key_addrs->v6addrs)); | ||
229 | key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; | 216 | key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; |
230 | } | 217 | } |
231 | 218 | ||
@@ -339,8 +326,11 @@ mpls: | |||
339 | } | 326 | } |
340 | 327 | ||
341 | case htons(ETH_P_FCOE): | 328 | case htons(ETH_P_FCOE): |
342 | key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); | 329 | if ((hlen - nhoff) < FCOE_HEADER_LEN) |
343 | /* fall through */ | 330 | goto out_bad; |
331 | |||
332 | nhoff += FCOE_HEADER_LEN; | ||
333 | goto out_good; | ||
344 | default: | 334 | default: |
345 | goto out_bad; | 335 | goto out_bad; |
346 | } | 336 | } |
@@ -447,13 +437,12 @@ ip_proto_again: | |||
447 | key_control->flags |= FLOW_DIS_IS_FRAGMENT; | 437 | key_control->flags |= FLOW_DIS_IS_FRAGMENT; |
448 | 438 | ||
449 | nhoff += sizeof(_fh); | 439 | nhoff += sizeof(_fh); |
440 | ip_proto = fh->nexthdr; | ||
450 | 441 | ||
451 | if (!(fh->frag_off & htons(IP6_OFFSET))) { | 442 | if (!(fh->frag_off & htons(IP6_OFFSET))) { |
452 | key_control->flags |= FLOW_DIS_FIRST_FRAG; | 443 | key_control->flags |= FLOW_DIS_FIRST_FRAG; |
453 | if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { | 444 | if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) |
454 | ip_proto = fh->nexthdr; | ||
455 | goto ip_proto_again; | 445 | goto ip_proto_again; |
456 | } | ||
457 | } | 446 | } |
458 | goto out_good; | 447 | goto out_good; |
459 | } | 448 | } |
@@ -740,6 +729,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, | |||
740 | { | 729 | { |
741 | u32 poff = keys->control.thoff; | 730 | u32 poff = keys->control.thoff; |
742 | 731 | ||
732 | /* skip L4 headers for fragments after the first */ | ||
733 | if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && | ||
734 | !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) | ||
735 | return poff; | ||
736 | |||
743 | switch (keys->basic.ip_proto) { | 737 | switch (keys->basic.ip_proto) { |
744 | case IPPROTO_TCP: { | 738 | case IPPROTO_TCP: { |
745 | /* access doff as u8 to avoid unaligned access */ | 739 | /* access doff as u8 to avoid unaligned access */ |
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 92d886f4adcb..4573d81093fe 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c | |||
@@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats | |||
191 | /** | 191 | /** |
192 | * gen_new_estimator - create a new rate estimator | 192 | * gen_new_estimator - create a new rate estimator |
193 | * @bstats: basic statistics | 193 | * @bstats: basic statistics |
194 | * @cpu_bstats: bstats per cpu | ||
194 | * @rate_est: rate estimator statistics | 195 | * @rate_est: rate estimator statistics |
195 | * @stats_lock: statistics lock | 196 | * @stats_lock: statistics lock |
196 | * @opt: rate estimator configuration TLV | 197 | * @opt: rate estimator configuration TLV |
@@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator); | |||
287 | /** | 288 | /** |
288 | * gen_replace_estimator - replace rate estimator configuration | 289 | * gen_replace_estimator - replace rate estimator configuration |
289 | * @bstats: basic statistics | 290 | * @bstats: basic statistics |
291 | * @cpu_bstats: bstats per cpu | ||
290 | * @rate_est: rate estimator statistics | 292 | * @rate_est: rate estimator statistics |
291 | * @stats_lock: statistics lock | 293 | * @stats_lock: statistics lock |
292 | * @opt: rate estimator configuration TLV | 294 | * @opt: rate estimator configuration TLV |
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 1e2f46a69d50..e640462ea8bf 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c | |||
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); | |||
140 | /** | 140 | /** |
141 | * gnet_stats_copy_basic - copy basic statistics into statistic TLV | 141 | * gnet_stats_copy_basic - copy basic statistics into statistic TLV |
142 | * @d: dumping handle | 142 | * @d: dumping handle |
143 | * @cpu: copy statistic per cpu | ||
143 | * @b: basic statistics | 144 | * @b: basic statistics |
144 | * | 145 | * |
145 | * Appends the basic statistics to the top level TLV created by | 146 | * Appends the basic statistics to the top level TLV created by |
diff --git a/net/core/hwbm.c b/net/core/hwbm.c new file mode 100644 index 000000000000..941c28486896 --- /dev/null +++ b/net/core/hwbm.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* Support for hardware buffer manager. | ||
2 | * | ||
3 | * Copyright (C) 2016 Marvell | ||
4 | * | ||
5 | * Gregory CLEMENT <gregory.clement@free-electrons.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/printk.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <net/hwbm.h> | ||
16 | |||
17 | void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) | ||
18 | { | ||
19 | if (likely(bm_pool->frag_size <= PAGE_SIZE)) | ||
20 | skb_free_frag(buf); | ||
21 | else | ||
22 | kfree(buf); | ||
23 | } | ||
24 | EXPORT_SYMBOL_GPL(hwbm_buf_free); | ||
25 | |||
26 | /* Refill processing for HW buffer management */ | ||
27 | int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) | ||
28 | { | ||
29 | int frag_size = bm_pool->frag_size; | ||
30 | void *buf; | ||
31 | |||
32 | if (likely(frag_size <= PAGE_SIZE)) | ||
33 | buf = netdev_alloc_frag(frag_size); | ||
34 | else | ||
35 | buf = kmalloc(frag_size, gfp); | ||
36 | |||
37 | if (!buf) | ||
38 | return -ENOMEM; | ||
39 | |||
40 | if (bm_pool->construct) | ||
41 | if (bm_pool->construct(bm_pool, buf)) { | ||
42 | hwbm_buf_free(bm_pool, buf); | ||
43 | return -ENOMEM; | ||
44 | } | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | EXPORT_SYMBOL_GPL(hwbm_pool_refill); | ||
49 | |||
50 | int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) | ||
51 | { | ||
52 | int err, i; | ||
53 | unsigned long flags; | ||
54 | |||
55 | spin_lock_irqsave(&bm_pool->lock, flags); | ||
56 | if (bm_pool->buf_num == bm_pool->size) { | ||
57 | pr_warn("pool already filled\n"); | ||
58 | return bm_pool->buf_num; | ||
59 | } | ||
60 | |||
61 | if (buf_num + bm_pool->buf_num > bm_pool->size) { | ||
62 | pr_warn("cannot allocate %d buffers for pool\n", | ||
63 | buf_num); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) { | ||
68 | pr_warn("Adding %d buffers to the %d current buffers will overflow\n", | ||
69 | buf_num, bm_pool->buf_num); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | for (i = 0; i < buf_num; i++) { | ||
74 | err = hwbm_pool_refill(bm_pool, gfp); | ||
75 | if (err < 0) | ||
76 | break; | ||
77 | } | ||
78 | |||
79 | /* Update BM driver with number of buffers added to pool */ | ||
80 | bm_pool->buf_num += i; | ||
81 | |||
82 | pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num); | ||
83 | spin_unlock_irqrestore(&bm_pool->lock, flags); | ||
84 | |||
85 | return i; | ||
86 | } | ||
87 | EXPORT_SYMBOL_GPL(hwbm_pool_add); | ||
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 299cfc24d888..669ecc9f884e 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c | |||
@@ -27,6 +27,31 @@ | |||
27 | #include <net/rtnetlink.h> | 27 | #include <net/rtnetlink.h> |
28 | #include <net/ip6_fib.h> | 28 | #include <net/ip6_fib.h> |
29 | 29 | ||
30 | #ifdef CONFIG_MODULES | ||
31 | |||
32 | static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) | ||
33 | { | ||
34 | /* Only lwt encaps implemented without using an interface for | ||
35 | * the encap need to return a string here. | ||
36 | */ | ||
37 | switch (encap_type) { | ||
38 | case LWTUNNEL_ENCAP_MPLS: | ||
39 | return "MPLS"; | ||
40 | case LWTUNNEL_ENCAP_ILA: | ||
41 | return "ILA"; | ||
42 | case LWTUNNEL_ENCAP_IP6: | ||
43 | case LWTUNNEL_ENCAP_IP: | ||
44 | case LWTUNNEL_ENCAP_NONE: | ||
45 | case __LWTUNNEL_ENCAP_MAX: | ||
46 | /* should not have got here */ | ||
47 | WARN_ON(1); | ||
48 | break; | ||
49 | } | ||
50 | return NULL; | ||
51 | } | ||
52 | |||
53 | #endif /* CONFIG_MODULES */ | ||
54 | |||
30 | struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) | 55 | struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) |
31 | { | 56 | { |
32 | struct lwtunnel_state *lws; | 57 | struct lwtunnel_state *lws; |
@@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, | |||
85 | ret = -EOPNOTSUPP; | 110 | ret = -EOPNOTSUPP; |
86 | rcu_read_lock(); | 111 | rcu_read_lock(); |
87 | ops = rcu_dereference(lwtun_encaps[encap_type]); | 112 | ops = rcu_dereference(lwtun_encaps[encap_type]); |
113 | #ifdef CONFIG_MODULES | ||
114 | if (!ops) { | ||
115 | const char *encap_type_str = lwtunnel_encap_str(encap_type); | ||
116 | |||
117 | if (encap_type_str) { | ||
118 | rcu_read_unlock(); | ||
119 | request_module("rtnl-lwt-%s", encap_type_str); | ||
120 | rcu_read_lock(); | ||
121 | ops = rcu_dereference(lwtun_encaps[encap_type]); | ||
122 | } | ||
123 | } | ||
124 | #endif | ||
88 | if (likely(ops && ops->build_state)) | 125 | if (likely(ops && ops->build_state)) |
89 | ret = ops->build_state(dev, encap, family, cfg, lws); | 126 | ret = ops->build_state(dev, encap, family, cfg, lws); |
90 | rcu_read_unlock(); | 127 | rcu_read_unlock(); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b6c8a6629b39..2b3f76fe65f4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -29,7 +29,6 @@ | |||
29 | 29 | ||
30 | #ifdef CONFIG_SYSFS | 30 | #ifdef CONFIG_SYSFS |
31 | static const char fmt_hex[] = "%#x\n"; | 31 | static const char fmt_hex[] = "%#x\n"; |
32 | static const char fmt_long_hex[] = "%#lx\n"; | ||
33 | static const char fmt_dec[] = "%d\n"; | 32 | static const char fmt_dec[] = "%d\n"; |
34 | static const char fmt_ulong[] = "%lu\n"; | 33 | static const char fmt_ulong[] = "%lu\n"; |
35 | static const char fmt_u64[] = "%llu\n"; | 34 | static const char fmt_u64[] = "%llu\n"; |
@@ -199,9 +198,10 @@ static ssize_t speed_show(struct device *dev, | |||
199 | return restart_syscall(); | 198 | return restart_syscall(); |
200 | 199 | ||
201 | if (netif_running(netdev)) { | 200 | if (netif_running(netdev)) { |
202 | struct ethtool_cmd cmd; | 201 | struct ethtool_link_ksettings cmd; |
203 | if (!__ethtool_get_settings(netdev, &cmd)) | 202 | |
204 | ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); | 203 | if (!__ethtool_get_link_ksettings(netdev, &cmd)) |
204 | ret = sprintf(buf, fmt_dec, cmd.base.speed); | ||
205 | } | 205 | } |
206 | rtnl_unlock(); | 206 | rtnl_unlock(); |
207 | return ret; | 207 | return ret; |
@@ -218,10 +218,12 @@ static ssize_t duplex_show(struct device *dev, | |||
218 | return restart_syscall(); | 218 | return restart_syscall(); |
219 | 219 | ||
220 | if (netif_running(netdev)) { | 220 | if (netif_running(netdev)) { |
221 | struct ethtool_cmd cmd; | 221 | struct ethtool_link_ksettings cmd; |
222 | if (!__ethtool_get_settings(netdev, &cmd)) { | 222 | |
223 | if (!__ethtool_get_link_ksettings(netdev, &cmd)) { | ||
223 | const char *duplex; | 224 | const char *duplex; |
224 | switch (cmd.duplex) { | 225 | |
226 | switch (cmd.base.duplex) { | ||
225 | case DUPLEX_HALF: | 227 | case DUPLEX_HALF: |
226 | duplex = "half"; | 228 | duplex = "half"; |
227 | break; | 229 | break; |
@@ -574,6 +576,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors); | |||
574 | NETSTAT_ENTRY(tx_window_errors); | 576 | NETSTAT_ENTRY(tx_window_errors); |
575 | NETSTAT_ENTRY(rx_compressed); | 577 | NETSTAT_ENTRY(rx_compressed); |
576 | NETSTAT_ENTRY(tx_compressed); | 578 | NETSTAT_ENTRY(tx_compressed); |
579 | NETSTAT_ENTRY(rx_nohandler); | ||
577 | 580 | ||
578 | static struct attribute *netstat_attrs[] = { | 581 | static struct attribute *netstat_attrs[] = { |
579 | &dev_attr_rx_packets.attr, | 582 | &dev_attr_rx_packets.attr, |
@@ -599,6 +602,7 @@ static struct attribute *netstat_attrs[] = { | |||
599 | &dev_attr_tx_window_errors.attr, | 602 | &dev_attr_tx_window_errors.attr, |
600 | &dev_attr_rx_compressed.attr, | 603 | &dev_attr_rx_compressed.attr, |
601 | &dev_attr_tx_compressed.attr, | 604 | &dev_attr_tx_compressed.attr, |
605 | &dev_attr_rx_nohandler.attr, | ||
602 | NULL | 606 | NULL |
603 | }; | 607 | }; |
604 | 608 | ||
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0260c84ed83c..11fce17274f6 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c | |||
@@ -9,7 +9,6 @@ | |||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | 9 | * Authors: Thomas Graf <tgraf@suug.ch> |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
14 | #include <linux/cgroup.h> | 13 | #include <linux/cgroup.h> |
15 | #include <linux/fdtable.h> | 14 | #include <linux/fdtable.h> |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index f1efbc39ef6b..2ec86fc552df 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -11,7 +11,6 @@ | |||
11 | 11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | 13 | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
16 | #include <linux/types.h> | 15 | #include <linux/types.h> |
17 | #include <linux/string.h> | 16 | #include <linux/string.h> |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1474cfd2dc1c..20999aa596dd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -2856,7 +2856,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2856 | *vlan_encapsulated_proto = htons(ETH_P_IP); | 2856 | *vlan_encapsulated_proto = htons(ETH_P_IP); |
2857 | } | 2857 | } |
2858 | 2858 | ||
2859 | skb_set_mac_header(skb, 0); | 2859 | skb_reset_mac_header(skb); |
2860 | skb_set_network_header(skb, skb->len); | 2860 | skb_set_network_header(skb, skb->len); |
2861 | iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); | 2861 | iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); |
2862 | 2862 | ||
@@ -2983,7 +2983,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2983 | *vlan_encapsulated_proto = htons(ETH_P_IPV6); | 2983 | *vlan_encapsulated_proto = htons(ETH_P_IPV6); |
2984 | } | 2984 | } |
2985 | 2985 | ||
2986 | skb_set_mac_header(skb, 0); | 2986 | skb_reset_mac_header(skb); |
2987 | skb_set_network_header(skb, skb->len); | 2987 | skb_set_network_header(skb, skb->len); |
2988 | iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); | 2988 | iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); |
2989 | 2989 | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d735e854f916..65763c29f845 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, | |||
804 | 804 | ||
805 | a->rx_compressed = b->rx_compressed; | 805 | a->rx_compressed = b->rx_compressed; |
806 | a->tx_compressed = b->tx_compressed; | 806 | a->tx_compressed = b->tx_compressed; |
807 | |||
808 | a->rx_nohandler = b->rx_nohandler; | ||
807 | } | 809 | } |
808 | 810 | ||
809 | static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) | 811 | static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) |
@@ -893,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, | |||
893 | + nla_total_size(4) /* IFLA_PROMISCUITY */ | 895 | + nla_total_size(4) /* IFLA_PROMISCUITY */ |
894 | + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ | 896 | + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ |
895 | + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ | 897 | + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ |
898 | + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */ | ||
899 | + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */ | ||
896 | + nla_total_size(1) /* IFLA_OPERSTATE */ | 900 | + nla_total_size(1) /* IFLA_OPERSTATE */ |
897 | + nla_total_size(1) /* IFLA_LINKMODE */ | 901 | + nla_total_size(1) /* IFLA_LINKMODE */ |
898 | + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ | 902 | + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ |
@@ -905,6 +909,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, | |||
905 | + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ | 909 | + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ |
906 | + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ | 910 | + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ |
907 | + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ | 911 | + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ |
912 | + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ | ||
908 | + nla_total_size(1); /* IFLA_PROTO_DOWN */ | 913 | + nla_total_size(1); /* IFLA_PROTO_DOWN */ |
909 | 914 | ||
910 | } | 915 | } |
@@ -1175,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, | |||
1175 | 1180 | ||
1176 | static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) | 1181 | static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) |
1177 | { | 1182 | { |
1178 | struct rtnl_link_ifmap map = { | 1183 | struct rtnl_link_ifmap map; |
1179 | .mem_start = dev->mem_start, | 1184 | |
1180 | .mem_end = dev->mem_end, | 1185 | memset(&map, 0, sizeof(map)); |
1181 | .base_addr = dev->base_addr, | 1186 | map.mem_start = dev->mem_start; |
1182 | .irq = dev->irq, | 1187 | map.mem_end = dev->mem_end; |
1183 | .dma = dev->dma, | 1188 | map.base_addr = dev->base_addr; |
1184 | .port = dev->if_port, | 1189 | map.irq = dev->irq; |
1185 | }; | 1190 | map.dma = dev->dma; |
1191 | map.port = dev->if_port; | ||
1192 | |||
1186 | if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) | 1193 | if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) |
1187 | return -EMSGSIZE; | 1194 | return -EMSGSIZE; |
1188 | 1195 | ||
@@ -1221,6 +1228,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
1221 | nla_put_u32(skb, IFLA_GROUP, dev->group) || | 1228 | nla_put_u32(skb, IFLA_GROUP, dev->group) || |
1222 | nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || | 1229 | nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || |
1223 | nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || | 1230 | nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || |
1231 | nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || | ||
1232 | nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || | ||
1224 | #ifdef CONFIG_RPS | 1233 | #ifdef CONFIG_RPS |
1225 | nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || | 1234 | nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || |
1226 | #endif | 1235 | #endif |
@@ -1387,15 +1396,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { | |||
1387 | [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, | 1396 | [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, |
1388 | [IFLA_VF_STATS] = { .type = NLA_NESTED }, | 1397 | [IFLA_VF_STATS] = { .type = NLA_NESTED }, |
1389 | [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, | 1398 | [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, |
1390 | }; | 1399 | [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) }, |
1391 | 1400 | [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) }, | |
1392 | static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { | ||
1393 | [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 }, | ||
1394 | [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 }, | ||
1395 | [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 }, | ||
1396 | [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 }, | ||
1397 | [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 }, | ||
1398 | [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 }, | ||
1399 | }; | 1401 | }; |
1400 | 1402 | ||
1401 | static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { | 1403 | static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { |
@@ -1412,6 +1414,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { | |||
1412 | [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, | 1414 | [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, |
1413 | }; | 1415 | }; |
1414 | 1416 | ||
1417 | static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) | ||
1418 | { | ||
1419 | const struct rtnl_link_ops *ops = NULL; | ||
1420 | struct nlattr *linfo[IFLA_INFO_MAX + 1]; | ||
1421 | |||
1422 | if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0) | ||
1423 | return NULL; | ||
1424 | |||
1425 | if (linfo[IFLA_INFO_KIND]) { | ||
1426 | char kind[MODULE_NAME_LEN]; | ||
1427 | |||
1428 | nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); | ||
1429 | ops = rtnl_link_ops_get(kind); | ||
1430 | } | ||
1431 | |||
1432 | return ops; | ||
1433 | } | ||
1434 | |||
1435 | static bool link_master_filtered(struct net_device *dev, int master_idx) | ||
1436 | { | ||
1437 | struct net_device *master; | ||
1438 | |||
1439 | if (!master_idx) | ||
1440 | return false; | ||
1441 | |||
1442 | master = netdev_master_upper_dev_get(dev); | ||
1443 | if (!master || master->ifindex != master_idx) | ||
1444 | return true; | ||
1445 | |||
1446 | return false; | ||
1447 | } | ||
1448 | |||
1449 | static bool link_kind_filtered(const struct net_device *dev, | ||
1450 | const struct rtnl_link_ops *kind_ops) | ||
1451 | { | ||
1452 | if (kind_ops && dev->rtnl_link_ops != kind_ops) | ||
1453 | return true; | ||
1454 | |||
1455 | return false; | ||
1456 | } | ||
1457 | |||
1458 | static bool link_dump_filtered(struct net_device *dev, | ||
1459 | int master_idx, | ||
1460 | const struct rtnl_link_ops *kind_ops) | ||
1461 | { | ||
1462 | if (link_master_filtered(dev, master_idx) || | ||
1463 | link_kind_filtered(dev, kind_ops)) | ||
1464 | return true; | ||
1465 | |||
1466 | return false; | ||
1467 | } | ||
1468 | |||
1415 | static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | 1469 | static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) |
1416 | { | 1470 | { |
1417 | struct net *net = sock_net(skb->sk); | 1471 | struct net *net = sock_net(skb->sk); |
@@ -1421,6 +1475,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1421 | struct hlist_head *head; | 1475 | struct hlist_head *head; |
1422 | struct nlattr *tb[IFLA_MAX+1]; | 1476 | struct nlattr *tb[IFLA_MAX+1]; |
1423 | u32 ext_filter_mask = 0; | 1477 | u32 ext_filter_mask = 0; |
1478 | const struct rtnl_link_ops *kind_ops = NULL; | ||
1479 | unsigned int flags = NLM_F_MULTI; | ||
1480 | int master_idx = 0; | ||
1424 | int err; | 1481 | int err; |
1425 | int hdrlen; | 1482 | int hdrlen; |
1426 | 1483 | ||
@@ -1443,18 +1500,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1443 | 1500 | ||
1444 | if (tb[IFLA_EXT_MASK]) | 1501 | if (tb[IFLA_EXT_MASK]) |
1445 | ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); | 1502 | ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); |
1503 | |||
1504 | if (tb[IFLA_MASTER]) | ||
1505 | master_idx = nla_get_u32(tb[IFLA_MASTER]); | ||
1506 | |||
1507 | if (tb[IFLA_LINKINFO]) | ||
1508 | kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]); | ||
1509 | |||
1510 | if (master_idx || kind_ops) | ||
1511 | flags |= NLM_F_DUMP_FILTERED; | ||
1446 | } | 1512 | } |
1447 | 1513 | ||
1448 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 1514 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
1449 | idx = 0; | 1515 | idx = 0; |
1450 | head = &net->dev_index_head[h]; | 1516 | head = &net->dev_index_head[h]; |
1451 | hlist_for_each_entry(dev, head, index_hlist) { | 1517 | hlist_for_each_entry(dev, head, index_hlist) { |
1518 | if (link_dump_filtered(dev, master_idx, kind_ops)) | ||
1519 | continue; | ||
1452 | if (idx < s_idx) | 1520 | if (idx < s_idx) |
1453 | goto cont; | 1521 | goto cont; |
1454 | err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | 1522 | err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, |
1455 | NETLINK_CB(cb->skb).portid, | 1523 | NETLINK_CB(cb->skb).portid, |
1456 | cb->nlh->nlmsg_seq, 0, | 1524 | cb->nlh->nlmsg_seq, 0, |
1457 | NLM_F_MULTI, | 1525 | flags, |
1458 | ext_filter_mask); | 1526 | ext_filter_mask); |
1459 | /* If we ran out of room on the first message, | 1527 | /* If we ran out of room on the first message, |
1460 | * we're in trouble | 1528 | * we're in trouble |
@@ -1534,6 +1602,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) | |||
1534 | return 0; | 1602 | return 0; |
1535 | } | 1603 | } |
1536 | 1604 | ||
1605 | static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt, | ||
1606 | int guid_type) | ||
1607 | { | ||
1608 | const struct net_device_ops *ops = dev->netdev_ops; | ||
1609 | |||
1610 | return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type); | ||
1611 | } | ||
1612 | |||
1613 | static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type) | ||
1614 | { | ||
1615 | if (dev->type != ARPHRD_INFINIBAND) | ||
1616 | return -EOPNOTSUPP; | ||
1617 | |||
1618 | return handle_infiniband_guid(dev, ivt, guid_type); | ||
1619 | } | ||
1620 | |||
1537 | static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) | 1621 | static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) |
1538 | { | 1622 | { |
1539 | const struct net_device_ops *ops = dev->netdev_ops; | 1623 | const struct net_device_ops *ops = dev->netdev_ops; |
@@ -1636,6 +1720,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) | |||
1636 | return err; | 1720 | return err; |
1637 | } | 1721 | } |
1638 | 1722 | ||
1723 | if (tb[IFLA_VF_IB_NODE_GUID]) { | ||
1724 | struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); | ||
1725 | |||
1726 | if (!ops->ndo_set_vf_guid) | ||
1727 | return -EOPNOTSUPP; | ||
1728 | |||
1729 | return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); | ||
1730 | } | ||
1731 | |||
1732 | if (tb[IFLA_VF_IB_PORT_GUID]) { | ||
1733 | struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); | ||
1734 | |||
1735 | if (!ops->ndo_set_vf_guid) | ||
1736 | return -EOPNOTSUPP; | ||
1737 | |||
1738 | return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID); | ||
1739 | } | ||
1740 | |||
1639 | return err; | 1741 | return err; |
1640 | } | 1742 | } |
1641 | 1743 | ||
@@ -2911,6 +3013,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, | |||
2911 | nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); | 3013 | nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); |
2912 | out: | 3014 | out: |
2913 | netif_addr_unlock_bh(dev); | 3015 | netif_addr_unlock_bh(dev); |
3016 | cb->args[1] = err; | ||
2914 | return idx; | 3017 | return idx; |
2915 | } | 3018 | } |
2916 | EXPORT_SYMBOL(ndo_dflt_fdb_dump); | 3019 | EXPORT_SYMBOL(ndo_dflt_fdb_dump); |
@@ -2944,6 +3047,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2944 | ops = br_dev->netdev_ops; | 3047 | ops = br_dev->netdev_ops; |
2945 | } | 3048 | } |
2946 | 3049 | ||
3050 | cb->args[1] = 0; | ||
2947 | for_each_netdev(net, dev) { | 3051 | for_each_netdev(net, dev) { |
2948 | if (brport_idx && (dev->ifindex != brport_idx)) | 3052 | if (brport_idx && (dev->ifindex != brport_idx)) |
2949 | continue; | 3053 | continue; |
@@ -2971,12 +3075,16 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2971 | idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, | 3075 | idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, |
2972 | idx); | 3076 | idx); |
2973 | } | 3077 | } |
3078 | if (cb->args[1] == -EMSGSIZE) | ||
3079 | break; | ||
2974 | 3080 | ||
2975 | if (dev->netdev_ops->ndo_fdb_dump) | 3081 | if (dev->netdev_ops->ndo_fdb_dump) |
2976 | idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, | 3082 | idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, |
2977 | idx); | 3083 | idx); |
2978 | else | 3084 | else |
2979 | idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); | 3085 | idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); |
3086 | if (cb->args[1] == -EMSGSIZE) | ||
3087 | break; | ||
2980 | 3088 | ||
2981 | cops = NULL; | 3089 | cops = NULL; |
2982 | } | 3090 | } |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5bf88f58bee7..e561f9f07d6d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -349,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) | |||
349 | } | 349 | } |
350 | EXPORT_SYMBOL(build_skb); | 350 | EXPORT_SYMBOL(build_skb); |
351 | 351 | ||
352 | #define NAPI_SKB_CACHE_SIZE 64 | ||
353 | |||
354 | struct napi_alloc_cache { | ||
355 | struct page_frag_cache page; | ||
356 | size_t skb_count; | ||
357 | void *skb_cache[NAPI_SKB_CACHE_SIZE]; | ||
358 | }; | ||
359 | |||
352 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | 360 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); |
353 | static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); | 361 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); |
354 | 362 | ||
355 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 363 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
356 | { | 364 | { |
@@ -380,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |||
380 | 388 | ||
381 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | 389 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
382 | { | 390 | { |
383 | struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); | 391 | struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); |
384 | 392 | ||
385 | return __alloc_page_frag(nc, fragsz, gfp_mask); | 393 | return __alloc_page_frag(&nc->page, fragsz, gfp_mask); |
386 | } | 394 | } |
387 | 395 | ||
388 | void *napi_alloc_frag(unsigned int fragsz) | 396 | void *napi_alloc_frag(unsigned int fragsz) |
@@ -476,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |||
476 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | 484 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, |
477 | gfp_t gfp_mask) | 485 | gfp_t gfp_mask) |
478 | { | 486 | { |
479 | struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); | 487 | struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); |
480 | struct sk_buff *skb; | 488 | struct sk_buff *skb; |
481 | void *data; | 489 | void *data; |
482 | 490 | ||
@@ -496,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |||
496 | if (sk_memalloc_socks()) | 504 | if (sk_memalloc_socks()) |
497 | gfp_mask |= __GFP_MEMALLOC; | 505 | gfp_mask |= __GFP_MEMALLOC; |
498 | 506 | ||
499 | data = __alloc_page_frag(nc, len, gfp_mask); | 507 | data = __alloc_page_frag(&nc->page, len, gfp_mask); |
500 | if (unlikely(!data)) | 508 | if (unlikely(!data)) |
501 | return NULL; | 509 | return NULL; |
502 | 510 | ||
@@ -507,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |||
507 | } | 515 | } |
508 | 516 | ||
509 | /* use OR instead of assignment to avoid clearing of bits in mask */ | 517 | /* use OR instead of assignment to avoid clearing of bits in mask */ |
510 | if (nc->pfmemalloc) | 518 | if (nc->page.pfmemalloc) |
511 | skb->pfmemalloc = 1; | 519 | skb->pfmemalloc = 1; |
512 | skb->head_frag = 1; | 520 | skb->head_frag = 1; |
513 | 521 | ||
@@ -749,6 +757,73 @@ void consume_skb(struct sk_buff *skb) | |||
749 | } | 757 | } |
750 | EXPORT_SYMBOL(consume_skb); | 758 | EXPORT_SYMBOL(consume_skb); |
751 | 759 | ||
760 | void __kfree_skb_flush(void) | ||
761 | { | ||
762 | struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | ||
763 | |||
764 | /* flush skb_cache if containing objects */ | ||
765 | if (nc->skb_count) { | ||
766 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | ||
767 | nc->skb_cache); | ||
768 | nc->skb_count = 0; | ||
769 | } | ||
770 | } | ||
771 | |||
772 | static inline void _kfree_skb_defer(struct sk_buff *skb) | ||
773 | { | ||
774 | struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | ||
775 | |||
776 | /* drop skb->head and call any destructors for packet */ | ||
777 | skb_release_all(skb); | ||
778 | |||
779 | /* record skb to CPU local list */ | ||
780 | nc->skb_cache[nc->skb_count++] = skb; | ||
781 | |||
782 | #ifdef CONFIG_SLUB | ||
783 | /* SLUB writes into objects when freeing */ | ||
784 | prefetchw(skb); | ||
785 | #endif | ||
786 | |||
787 | /* flush skb_cache if it is filled */ | ||
788 | if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { | ||
789 | kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE, | ||
790 | nc->skb_cache); | ||
791 | nc->skb_count = 0; | ||
792 | } | ||
793 | } | ||
794 | void __kfree_skb_defer(struct sk_buff *skb) | ||
795 | { | ||
796 | _kfree_skb_defer(skb); | ||
797 | } | ||
798 | |||
799 | void napi_consume_skb(struct sk_buff *skb, int budget) | ||
800 | { | ||
801 | if (unlikely(!skb)) | ||
802 | return; | ||
803 | |||
804 | /* Zero budget indicate non-NAPI context called us, like netpoll */ | ||
805 | if (unlikely(!budget)) { | ||
806 | dev_consume_skb_any(skb); | ||
807 | return; | ||
808 | } | ||
809 | |||
810 | if (likely(atomic_read(&skb->users) == 1)) | ||
811 | smp_rmb(); | ||
812 | else if (likely(!atomic_dec_and_test(&skb->users))) | ||
813 | return; | ||
814 | /* if reaching here SKB is ready to free */ | ||
815 | trace_consume_skb(skb); | ||
816 | |||
817 | /* if SKB is a clone, don't handle this case */ | ||
818 | if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { | ||
819 | __kfree_skb(skb); | ||
820 | return; | ||
821 | } | ||
822 | |||
823 | _kfree_skb_defer(skb); | ||
824 | } | ||
825 | EXPORT_SYMBOL(napi_consume_skb); | ||
826 | |||
752 | /* Make sure a field is enclosed inside headers_start/headers_end section */ | 827 | /* Make sure a field is enclosed inside headers_start/headers_end section */ |
753 | #define CHECK_SKB_FIELD(field) \ | 828 | #define CHECK_SKB_FIELD(field) \ |
754 | BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ | 829 | BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ |
@@ -1843,6 +1918,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, | |||
1843 | struct splice_pipe_desc *spd, struct sock *sk) | 1918 | struct splice_pipe_desc *spd, struct sock *sk) |
1844 | { | 1919 | { |
1845 | int seg; | 1920 | int seg; |
1921 | struct sk_buff *iter; | ||
1846 | 1922 | ||
1847 | /* map the linear part : | 1923 | /* map the linear part : |
1848 | * If skb->head_frag is set, this 'linear' part is backed by a | 1924 | * If skb->head_frag is set, this 'linear' part is backed by a |
@@ -1869,6 +1945,19 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, | |||
1869 | return true; | 1945 | return true; |
1870 | } | 1946 | } |
1871 | 1947 | ||
1948 | skb_walk_frags(skb, iter) { | ||
1949 | if (*offset >= iter->len) { | ||
1950 | *offset -= iter->len; | ||
1951 | continue; | ||
1952 | } | ||
1953 | /* __skb_splice_bits() only fails if the output has no room | ||
1954 | * left, so no point in going over the frag_list for the error | ||
1955 | * case. | ||
1956 | */ | ||
1957 | if (__skb_splice_bits(iter, pipe, offset, len, spd, sk)) | ||
1958 | return true; | ||
1959 | } | ||
1960 | |||
1872 | return false; | 1961 | return false; |
1873 | } | 1962 | } |
1874 | 1963 | ||
@@ -1895,9 +1984,7 @@ ssize_t skb_socket_splice(struct sock *sk, | |||
1895 | 1984 | ||
1896 | /* | 1985 | /* |
1897 | * Map data from the skb to a pipe. Should handle both the linear part, | 1986 | * Map data from the skb to a pipe. Should handle both the linear part, |
1898 | * the fragments, and the frag list. It does NOT handle frag lists within | 1987 | * the fragments, and the frag list. |
1899 | * the frag list, if such a thing exists. We'd probably need to recurse to | ||
1900 | * handle that cleanly. | ||
1901 | */ | 1988 | */ |
1902 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, | 1989 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, |
1903 | struct pipe_inode_info *pipe, unsigned int tlen, | 1990 | struct pipe_inode_info *pipe, unsigned int tlen, |
@@ -1916,29 +2003,10 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, | |||
1916 | .ops = &nosteal_pipe_buf_ops, | 2003 | .ops = &nosteal_pipe_buf_ops, |
1917 | .spd_release = sock_spd_release, | 2004 | .spd_release = sock_spd_release, |
1918 | }; | 2005 | }; |
1919 | struct sk_buff *frag_iter; | ||
1920 | int ret = 0; | 2006 | int ret = 0; |
1921 | 2007 | ||
1922 | /* | 2008 | __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); |
1923 | * __skb_splice_bits() only fails if the output has no room left, | ||
1924 | * so no point in going over the frag_list for the error case. | ||
1925 | */ | ||
1926 | if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) | ||
1927 | goto done; | ||
1928 | else if (!tlen) | ||
1929 | goto done; | ||
1930 | |||
1931 | /* | ||
1932 | * now see if we have a frag_list to map | ||
1933 | */ | ||
1934 | skb_walk_frags(skb, frag_iter) { | ||
1935 | if (!tlen) | ||
1936 | break; | ||
1937 | if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) | ||
1938 | break; | ||
1939 | } | ||
1940 | 2009 | ||
1941 | done: | ||
1942 | if (spd.nr_pages) | 2010 | if (spd.nr_pages) |
1943 | ret = splice_cb(sk, pipe, &spd); | 2011 | ret = splice_cb(sk, pipe, &spd); |
1944 | 2012 | ||
@@ -2948,6 +3016,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, | |||
2948 | EXPORT_SYMBOL_GPL(skb_append_pagefrags); | 3016 | EXPORT_SYMBOL_GPL(skb_append_pagefrags); |
2949 | 3017 | ||
2950 | /** | 3018 | /** |
3019 | * skb_push_rcsum - push skb and update receive checksum | ||
3020 | * @skb: buffer to update | ||
3021 | * @len: length of data pulled | ||
3022 | * | ||
3023 | * This function performs an skb_push on the packet and updates | ||
3024 | * the CHECKSUM_COMPLETE checksum. It should be used on | ||
3025 | * receive path processing instead of skb_push unless you know | ||
3026 | * that the checksum difference is zero (e.g., a valid IP header) | ||
3027 | * or you are setting ip_summed to CHECKSUM_NONE. | ||
3028 | */ | ||
3029 | static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) | ||
3030 | { | ||
3031 | skb_push(skb, len); | ||
3032 | skb_postpush_rcsum(skb, skb->data, len); | ||
3033 | return skb->data; | ||
3034 | } | ||
3035 | |||
3036 | /** | ||
2951 | * skb_pull_rcsum - pull skb and update receive checksum | 3037 | * skb_pull_rcsum - pull skb and update receive checksum |
2952 | * @skb: buffer to update | 3038 | * @skb: buffer to update |
2953 | * @len: length of data pulled | 3039 | * @len: length of data pulled |
@@ -3006,8 +3092,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, | |||
3006 | if (unlikely(!proto)) | 3092 | if (unlikely(!proto)) |
3007 | return ERR_PTR(-EINVAL); | 3093 | return ERR_PTR(-EINVAL); |
3008 | 3094 | ||
3009 | csum = !head_skb->encap_hdr_csum && | 3095 | csum = !!can_checksum_protocol(features, proto); |
3010 | !!can_checksum_protocol(features, proto); | ||
3011 | 3096 | ||
3012 | headroom = skb_headroom(head_skb); | 3097 | headroom = skb_headroom(head_skb); |
3013 | pos = skb_headlen(head_skb); | 3098 | pos = skb_headlen(head_skb); |
@@ -3100,13 +3185,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, | |||
3100 | if (nskb->len == len + doffset) | 3185 | if (nskb->len == len + doffset) |
3101 | goto perform_csum_check; | 3186 | goto perform_csum_check; |
3102 | 3187 | ||
3103 | if (!sg && !nskb->remcsum_offload) { | 3188 | if (!sg) { |
3104 | nskb->ip_summed = CHECKSUM_NONE; | 3189 | if (!nskb->remcsum_offload) |
3105 | nskb->csum = skb_copy_and_csum_bits(head_skb, offset, | 3190 | nskb->ip_summed = CHECKSUM_NONE; |
3106 | skb_put(nskb, len), | 3191 | SKB_GSO_CB(nskb)->csum = |
3107 | len, 0); | 3192 | skb_copy_and_csum_bits(head_skb, offset, |
3193 | skb_put(nskb, len), | ||
3194 | len, 0); | ||
3108 | SKB_GSO_CB(nskb)->csum_start = | 3195 | SKB_GSO_CB(nskb)->csum_start = |
3109 | skb_headroom(nskb) + doffset; | 3196 | skb_headroom(nskb) + doffset; |
3110 | continue; | 3197 | continue; |
3111 | } | 3198 | } |
3112 | 3199 | ||
@@ -3172,12 +3259,19 @@ skip_fraglist: | |||
3172 | nskb->truesize += nskb->data_len; | 3259 | nskb->truesize += nskb->data_len; |
3173 | 3260 | ||
3174 | perform_csum_check: | 3261 | perform_csum_check: |
3175 | if (!csum && !nskb->remcsum_offload) { | 3262 | if (!csum) { |
3176 | nskb->csum = skb_checksum(nskb, doffset, | 3263 | if (skb_has_shared_frag(nskb)) { |
3177 | nskb->len - doffset, 0); | 3264 | err = __skb_linearize(nskb); |
3178 | nskb->ip_summed = CHECKSUM_NONE; | 3265 | if (err) |
3266 | goto err; | ||
3267 | } | ||
3268 | if (!nskb->remcsum_offload) | ||
3269 | nskb->ip_summed = CHECKSUM_NONE; | ||
3270 | SKB_GSO_CB(nskb)->csum = | ||
3271 | skb_checksum(nskb, doffset, | ||
3272 | nskb->len - doffset, 0); | ||
3179 | SKB_GSO_CB(nskb)->csum_start = | 3273 | SKB_GSO_CB(nskb)->csum_start = |
3180 | skb_headroom(nskb) + doffset; | 3274 | skb_headroom(nskb) + doffset; |
3181 | } | 3275 | } |
3182 | } while ((offset += len) < head_skb->len); | 3276 | } while ((offset += len) < head_skb->len); |
3183 | 3277 | ||
@@ -4084,9 +4178,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, | |||
4084 | if (!pskb_may_pull(skb_chk, offset)) | 4178 | if (!pskb_may_pull(skb_chk, offset)) |
4085 | goto err; | 4179 | goto err; |
4086 | 4180 | ||
4087 | __skb_pull(skb_chk, offset); | 4181 | skb_pull_rcsum(skb_chk, offset); |
4088 | ret = skb_chkf(skb_chk); | 4182 | ret = skb_chkf(skb_chk); |
4089 | __skb_push(skb_chk, offset); | 4183 | skb_push_rcsum(skb_chk, offset); |
4090 | 4184 | ||
4091 | if (ret) | 4185 | if (ret) |
4092 | goto err; | 4186 | goto err; |
@@ -4219,7 +4313,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) | |||
4219 | skb->skb_iif = 0; | 4313 | skb->skb_iif = 0; |
4220 | skb->ignore_df = 0; | 4314 | skb->ignore_df = 0; |
4221 | skb_dst_drop(skb); | 4315 | skb_dst_drop(skb); |
4222 | skb_sender_cpu_clear(skb); | ||
4223 | secpath_reset(skb); | 4316 | secpath_reset(skb); |
4224 | nf_reset(skb); | 4317 | nf_reset(skb); |
4225 | nf_reset_trace(skb); | 4318 | nf_reset_trace(skb); |
@@ -4409,15 +4502,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) | |||
4409 | __skb_push(skb, offset); | 4502 | __skb_push(skb, offset); |
4410 | err = __vlan_insert_tag(skb, skb->vlan_proto, | 4503 | err = __vlan_insert_tag(skb, skb->vlan_proto, |
4411 | skb_vlan_tag_get(skb)); | 4504 | skb_vlan_tag_get(skb)); |
4412 | if (err) | 4505 | if (err) { |
4506 | __skb_pull(skb, offset); | ||
4413 | return err; | 4507 | return err; |
4508 | } | ||
4509 | |||
4414 | skb->protocol = skb->vlan_proto; | 4510 | skb->protocol = skb->vlan_proto; |
4415 | skb->mac_len += VLAN_HLEN; | 4511 | skb->mac_len += VLAN_HLEN; |
4416 | __skb_pull(skb, offset); | ||
4417 | 4512 | ||
4418 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 4513 | skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); |
4419 | skb->csum = csum_add(skb->csum, csum_partial(skb->data | 4514 | __skb_pull(skb, offset); |
4420 | + (2 * ETH_ALEN), VLAN_HLEN, 0)); | ||
4421 | } | 4515 | } |
4422 | __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); | 4516 | __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); |
4423 | return 0; | 4517 | return 0; |
diff --git a/net/core/sock.c b/net/core/sock.c index 6c1c8bc93412..7e73c26b6bb4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -221,7 +221,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { | |||
221 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , | 221 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , |
222 | "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , | 222 | "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , |
223 | "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , | 223 | "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , |
224 | "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" | 224 | "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , |
225 | "sk_lock-AF_MAX" | ||
225 | }; | 226 | }; |
226 | static const char *const af_family_slock_key_strings[AF_MAX+1] = { | 227 | static const char *const af_family_slock_key_strings[AF_MAX+1] = { |
227 | "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , | 228 | "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , |
@@ -237,7 +238,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { | |||
237 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , | 238 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , |
238 | "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , | 239 | "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , |
239 | "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , | 240 | "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , |
240 | "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" | 241 | "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , |
242 | "slock-AF_MAX" | ||
241 | }; | 243 | }; |
242 | static const char *const af_family_clock_key_strings[AF_MAX+1] = { | 244 | static const char *const af_family_clock_key_strings[AF_MAX+1] = { |
243 | "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , | 245 | "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , |
@@ -253,7 +255,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { | |||
253 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , | 255 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , |
254 | "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , | 256 | "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , |
255 | "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , | 257 | "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , |
256 | "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" | 258 | "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , |
259 | "clock-AF_MAX" | ||
257 | }; | 260 | }; |
258 | 261 | ||
259 | /* | 262 | /* |
@@ -987,6 +990,10 @@ set_rcvbuf: | |||
987 | sk->sk_incoming_cpu = val; | 990 | sk->sk_incoming_cpu = val; |
988 | break; | 991 | break; |
989 | 992 | ||
993 | case SO_CNX_ADVICE: | ||
994 | if (val == 1) | ||
995 | dst_negative_advice(sk); | ||
996 | break; | ||
990 | default: | 997 | default: |
991 | ret = -ENOPROTOOPT; | 998 | ret = -ENOPROTOOPT; |
992 | break; | 999 | break; |
@@ -1531,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1531 | newsk = NULL; | 1538 | newsk = NULL; |
1532 | goto out; | 1539 | goto out; |
1533 | } | 1540 | } |
1541 | RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); | ||
1534 | 1542 | ||
1535 | newsk->sk_err = 0; | 1543 | newsk->sk_err = 0; |
1536 | newsk->sk_priority = 0; | 1544 | newsk->sk_priority = 0; |
@@ -1903,7 +1911,7 @@ EXPORT_SYMBOL(sock_cmsg_send); | |||
1903 | bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) | 1911 | bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) |
1904 | { | 1912 | { |
1905 | if (pfrag->page) { | 1913 | if (pfrag->page) { |
1906 | if (atomic_read(&pfrag->page->_count) == 1) { | 1914 | if (page_ref_count(pfrag->page) == 1) { |
1907 | pfrag->offset = 0; | 1915 | pfrag->offset = 0; |
1908 | return true; | 1916 | return true; |
1909 | } | 1917 | } |