aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/netprio_cgroup.c260
-rw-r--r--net/core/skbuff.c6
3 files changed, 118 insertions, 150 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 47838509f5fd..d0cbc93fcf32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3549,6 +3549,8 @@ static int napi_gro_complete(struct sk_buff *skb)
3549 struct list_head *head = &offload_base; 3549 struct list_head *head = &offload_base;
3550 int err = -ENOENT; 3550 int err = -ENOENT;
3551 3551
3552 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3553
3552 if (NAPI_GRO_CB(skb)->count == 1) { 3554 if (NAPI_GRO_CB(skb)->count == 1) {
3553 skb_shinfo(skb)->gso_size = 0; 3555 skb_shinfo(skb)->gso_size = 0;
3554 goto out; 3556 goto out;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 847c02b197b0..5e67defe2cb0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -27,11 +27,7 @@
27 27
28#include <linux/fdtable.h> 28#include <linux/fdtable.h>
29 29
30#define PRIOIDX_SZ 128 30#define PRIOMAP_MIN_SZ 128
31
32static unsigned long prioidx_map[PRIOIDX_SZ];
33static DEFINE_SPINLOCK(prioidx_map_lock);
34static atomic_t max_prioidx = ATOMIC_INIT(0);
35 31
36static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) 32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
37{ 33{
@@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr
39 struct cgroup_netprio_state, css); 35 struct cgroup_netprio_state, css);
40} 36}
41 37
42static int get_prioidx(u32 *prio) 38/*
43{ 39 * Extend @dev->priomap so that it's large enough to accomodate
44 unsigned long flags; 40 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
45 u32 prioidx; 41 * return. Must be called under rtnl lock.
46 42 */
47 spin_lock_irqsave(&prioidx_map_lock, flags); 43static int extend_netdev_table(struct net_device *dev, u32 target_idx)
48 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
49 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
50 spin_unlock_irqrestore(&prioidx_map_lock, flags);
51 return -ENOSPC;
52 }
53 set_bit(prioidx, prioidx_map);
54 if (atomic_read(&max_prioidx) < prioidx)
55 atomic_set(&max_prioidx, prioidx);
56 spin_unlock_irqrestore(&prioidx_map_lock, flags);
57 *prio = prioidx;
58 return 0;
59}
60
61static void put_prioidx(u32 idx)
62{ 44{
63 unsigned long flags; 45 struct netprio_map *old, *new;
64 46 size_t new_sz, new_len;
65 spin_lock_irqsave(&prioidx_map_lock, flags);
66 clear_bit(idx, prioidx_map);
67 spin_unlock_irqrestore(&prioidx_map_lock, flags);
68}
69 47
70static int extend_netdev_table(struct net_device *dev, u32 new_len) 48 /* is the existing priomap large enough? */
71{ 49 old = rtnl_dereference(dev->priomap);
72 size_t new_size = sizeof(struct netprio_map) + 50 if (old && old->priomap_len > target_idx)
73 ((sizeof(u32) * new_len)); 51 return 0;
74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
75 struct netprio_map *old_priomap;
76 52
77 old_priomap = rtnl_dereference(dev->priomap); 53 /*
54 * Determine the new size. Let's keep it power-of-two. We start
55 * from PRIOMAP_MIN_SZ and double it until it's large enough to
56 * accommodate @target_idx.
57 */
58 new_sz = PRIOMAP_MIN_SZ;
59 while (true) {
60 new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
61 sizeof(new->priomap[0]);
62 if (new_len > target_idx)
63 break;
64 new_sz *= 2;
65 /* overflowed? */
66 if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
67 return -ENOSPC;
68 }
78 69
79 if (!new_priomap) { 70 /* allocate & copy */
71 new = kzalloc(new_sz, GFP_KERNEL);
72 if (!new) {
80 pr_warn("Unable to alloc new priomap!\n"); 73 pr_warn("Unable to alloc new priomap!\n");
81 return -ENOMEM; 74 return -ENOMEM;
82 } 75 }
83 76
84 if (old_priomap) 77 if (old)
85 memcpy(new_priomap->priomap, old_priomap->priomap, 78 memcpy(new->priomap, old->priomap,
86 old_priomap->priomap_len * 79 old->priomap_len * sizeof(old->priomap[0]));
87 sizeof(old_priomap->priomap[0]));
88 80
89 new_priomap->priomap_len = new_len; 81 new->priomap_len = new_len;
90 82
91 rcu_assign_pointer(dev->priomap, new_priomap); 83 /* install the new priomap */
92 if (old_priomap) 84 rcu_assign_pointer(dev->priomap, new);
93 kfree_rcu(old_priomap, rcu); 85 if (old)
86 kfree_rcu(old, rcu);
94 return 0; 87 return 0;
95} 88}
96 89
97static int write_update_netdev_table(struct net_device *dev) 90/**
91 * netprio_prio - return the effective netprio of a cgroup-net_device pair
92 * @cgrp: cgroup part of the target pair
93 * @dev: net_device part of the target pair
94 *
95 * Should be called under RCU read or rtnl lock.
96 */
97static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
98{
99 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
100
101 if (map && cgrp->id < map->priomap_len)
102 return map->priomap[cgrp->id];
103 return 0;
104}
105
106/**
107 * netprio_set_prio - set netprio on a cgroup-net_device pair
108 * @cgrp: cgroup part of the target pair
109 * @dev: net_device part of the target pair
110 * @prio: prio to set
111 *
112 * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
113 * lock and may fail under memory pressure for non-zero @prio.
114 */
115static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
116 u32 prio)
98{ 117{
99 int ret = 0;
100 u32 max_len;
101 struct netprio_map *map; 118 struct netprio_map *map;
119 int ret;
102 120
103 max_len = atomic_read(&max_prioidx) + 1; 121 /* avoid extending priomap for zero writes */
104 map = rtnl_dereference(dev->priomap); 122 map = rtnl_dereference(dev->priomap);
105 if (!map || map->priomap_len < max_len) 123 if (!prio && (!map || map->priomap_len <= cgrp->id))
106 ret = extend_netdev_table(dev, max_len); 124 return 0;
107 125
108 return ret; 126 ret = extend_netdev_table(dev, cgrp->id);
127 if (ret)
128 return ret;
129
130 map = rtnl_dereference(dev->priomap);
131 map->priomap[cgrp->id] = prio;
132 return 0;
109} 133}
110 134
111static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 135static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
112{ 136{
113 struct cgroup_netprio_state *cs; 137 struct cgroup_netprio_state *cs;
114 int ret = -EINVAL;
115 138
116 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 139 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
117 if (!cs) 140 if (!cs)
118 return ERR_PTR(-ENOMEM); 141 return ERR_PTR(-ENOMEM);
119 142
120 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
121 goto out;
122
123 ret = get_prioidx(&cs->prioidx);
124 if (ret < 0) {
125 pr_warn("No space in priority index array\n");
126 goto out;
127 }
128
129 return &cs->css; 143 return &cs->css;
130out:
131 kfree(cs);
132 return ERR_PTR(ret);
133} 144}
134 145
135static void cgrp_destroy(struct cgroup *cgrp) 146static int cgrp_css_online(struct cgroup *cgrp)
136{ 147{
137 struct cgroup_netprio_state *cs; 148 struct cgroup *parent = cgrp->parent;
138 struct net_device *dev; 149 struct net_device *dev;
139 struct netprio_map *map; 150 int ret = 0;
151
152 if (!parent)
153 return 0;
140 154
141 cs = cgrp_netprio_state(cgrp);
142 rtnl_lock(); 155 rtnl_lock();
156 /*
157 * Inherit prios from the parent. As all prios are set during
158 * onlining, there is no need to clear them on offline.
159 */
143 for_each_netdev(&init_net, dev) { 160 for_each_netdev(&init_net, dev) {
144 map = rtnl_dereference(dev->priomap); 161 u32 prio = netprio_prio(parent, dev);
145 if (map && cs->prioidx < map->priomap_len) 162
146 map->priomap[cs->prioidx] = 0; 163 ret = netprio_set_prio(cgrp, dev, prio);
164 if (ret)
165 break;
147 } 166 }
148 rtnl_unlock(); 167 rtnl_unlock();
149 put_prioidx(cs->prioidx); 168 return ret;
150 kfree(cs); 169}
170
171static void cgrp_css_free(struct cgroup *cgrp)
172{
173 kfree(cgrp_netprio_state(cgrp));
151} 174}
152 175
153static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 176static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
154{ 177{
155 return (u64)cgrp_netprio_state(cgrp)->prioidx; 178 return cgrp->id;
156} 179}
157 180
158static int read_priomap(struct cgroup *cont, struct cftype *cft, 181static int read_priomap(struct cgroup *cont, struct cftype *cft,
159 struct cgroup_map_cb *cb) 182 struct cgroup_map_cb *cb)
160{ 183{
161 struct net_device *dev; 184 struct net_device *dev;
162 u32 prioidx = cgrp_netprio_state(cont)->prioidx;
163 u32 priority;
164 struct netprio_map *map;
165 185
166 rcu_read_lock(); 186 rcu_read_lock();
167 for_each_netdev_rcu(&init_net, dev) { 187 for_each_netdev_rcu(&init_net, dev)
168 map = rcu_dereference(dev->priomap); 188 cb->fill(cb, dev->name, netprio_prio(cont, dev));
169 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
170 cb->fill(cb, dev->name, priority);
171 }
172 rcu_read_unlock(); 189 rcu_read_unlock();
173 return 0; 190 return 0;
174} 191}
@@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
176static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 193static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
177 const char *buffer) 194 const char *buffer)
178{ 195{
179 char *devname = kstrdup(buffer, GFP_KERNEL); 196 char devname[IFNAMSIZ + 1];
180 int ret = -EINVAL;
181 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
182 unsigned long priority;
183 char *priostr;
184 struct net_device *dev; 197 struct net_device *dev;
185 struct netprio_map *map; 198 u32 prio;
186 199 int ret;
187 if (!devname)
188 return -ENOMEM;
189
190 /*
191 * Minimally sized valid priomap string
192 */
193 if (strlen(devname) < 3)
194 goto out_free_devname;
195
196 priostr = strstr(devname, " ");
197 if (!priostr)
198 goto out_free_devname;
199
200 /*
201 *Separate the devname from the associated priority
202 *and advance the priostr pointer to the priority value
203 */
204 *priostr = '\0';
205 priostr++;
206
207 /*
208 * If the priostr points to NULL, we're at the end of the passed
209 * in string, and its not a valid write
210 */
211 if (*priostr == '\0')
212 goto out_free_devname;
213
214 ret = kstrtoul(priostr, 10, &priority);
215 if (ret < 0)
216 goto out_free_devname;
217 200
218 ret = -ENODEV; 201 if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
202 return -EINVAL;
219 203
220 dev = dev_get_by_name(&init_net, devname); 204 dev = dev_get_by_name(&init_net, devname);
221 if (!dev) 205 if (!dev)
222 goto out_free_devname; 206 return -ENODEV;
223 207
224 rtnl_lock(); 208 rtnl_lock();
225 ret = write_update_netdev_table(dev);
226 if (ret < 0)
227 goto out_put_dev;
228 209
229 map = rtnl_dereference(dev->priomap); 210 ret = netprio_set_prio(cgrp, dev, prio);
230 if (map)
231 map->priomap[prioidx] = priority;
232 211
233out_put_dev:
234 rtnl_unlock(); 212 rtnl_unlock();
235 dev_put(dev); 213 dev_put(dev);
236
237out_free_devname:
238 kfree(devname);
239 return ret; 214 return ret;
240} 215}
241 216
@@ -276,22 +251,13 @@ static struct cftype ss_files[] = {
276 251
277struct cgroup_subsys net_prio_subsys = { 252struct cgroup_subsys net_prio_subsys = {
278 .name = "net_prio", 253 .name = "net_prio",
279 .create = cgrp_create, 254 .css_alloc = cgrp_css_alloc,
280 .destroy = cgrp_destroy, 255 .css_online = cgrp_css_online,
256 .css_free = cgrp_css_free,
281 .attach = net_prio_attach, 257 .attach = net_prio_attach,
282 .subsys_id = net_prio_subsys_id, 258 .subsys_id = net_prio_subsys_id,
283 .base_cftypes = ss_files, 259 .base_cftypes = ss_files,
284 .module = THIS_MODULE, 260 .module = THIS_MODULE,
285
286 /*
287 * net_prio has artificial limit on the number of cgroups and
288 * disallows nesting making it impossible to co-mount it with other
289 * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
290 * limit and properly nest configuration such that children follow
291 * their parents' configurations by default and are allowed to
292 * override and remove the following.
293 */
294 .broken_hierarchy = true,
295}; 261};
296 262
297static int netprio_device_event(struct notifier_block *unused, 263static int netprio_device_event(struct notifier_block *unused,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ac9e44a6ab2b..3ab989b0de42 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3032,7 +3032,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
3032 skb_shinfo(nskb)->gso_size = pinfo->gso_size; 3032 skb_shinfo(nskb)->gso_size = pinfo->gso_size;
3033 pinfo->gso_size = 0; 3033 pinfo->gso_size = 0;
3034 skb_header_release(p); 3034 skb_header_release(p);
3035 nskb->prev = p; 3035 NAPI_GRO_CB(nskb)->last = p;
3036 3036
3037 nskb->data_len += p->len; 3037 nskb->data_len += p->len;
3038 nskb->truesize += p->truesize; 3038 nskb->truesize += p->truesize;
@@ -3058,8 +3058,8 @@ merge:
3058 3058
3059 __skb_pull(skb, offset); 3059 __skb_pull(skb, offset);
3060 3060
3061 p->prev->next = skb; 3061 NAPI_GRO_CB(p)->last->next = skb;
3062 p->prev = skb; 3062 NAPI_GRO_CB(p)->last = skb;
3063 skb_header_release(skb); 3063 skb_header_release(skb);
3064 3064
3065done: 3065done: