diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 2 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 260 | ||||
-rw-r--r-- | net/core/skbuff.c | 6 |
3 files changed, 118 insertions, 150 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 47838509f5fd..d0cbc93fcf32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3549,6 +3549,8 @@ static int napi_gro_complete(struct sk_buff *skb) | |||
3549 | struct list_head *head = &offload_base; | 3549 | struct list_head *head = &offload_base; |
3550 | int err = -ENOENT; | 3550 | int err = -ENOENT; |
3551 | 3551 | ||
3552 | BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); | ||
3553 | |||
3552 | if (NAPI_GRO_CB(skb)->count == 1) { | 3554 | if (NAPI_GRO_CB(skb)->count == 1) { |
3553 | skb_shinfo(skb)->gso_size = 0; | 3555 | skb_shinfo(skb)->gso_size = 0; |
3554 | goto out; | 3556 | goto out; |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 847c02b197b0..5e67defe2cb0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
@@ -27,11 +27,7 @@ | |||
27 | 27 | ||
28 | #include <linux/fdtable.h> | 28 | #include <linux/fdtable.h> |
29 | 29 | ||
30 | #define PRIOIDX_SZ 128 | 30 | #define PRIOMAP_MIN_SZ 128 |
31 | |||
32 | static unsigned long prioidx_map[PRIOIDX_SZ]; | ||
33 | static DEFINE_SPINLOCK(prioidx_map_lock); | ||
34 | static atomic_t max_prioidx = ATOMIC_INIT(0); | ||
35 | 31 | ||
36 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | 32 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) |
37 | { | 33 | { |
@@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr | |||
39 | struct cgroup_netprio_state, css); | 35 | struct cgroup_netprio_state, css); |
40 | } | 36 | } |
41 | 37 | ||
42 | static int get_prioidx(u32 *prio) | 38 | /* |
43 | { | 39 | * Extend @dev->priomap so that it's large enough to accomodate |
44 | unsigned long flags; | 40 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
45 | u32 prioidx; | 41 | * return. Must be called under rtnl lock. |
46 | 42 | */ | |
47 | spin_lock_irqsave(&prioidx_map_lock, flags); | 43 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) |
48 | prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); | ||
49 | if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { | ||
50 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
51 | return -ENOSPC; | ||
52 | } | ||
53 | set_bit(prioidx, prioidx_map); | ||
54 | if (atomic_read(&max_prioidx) < prioidx) | ||
55 | atomic_set(&max_prioidx, prioidx); | ||
56 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
57 | *prio = prioidx; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void put_prioidx(u32 idx) | ||
62 | { | 44 | { |
63 | unsigned long flags; | 45 | struct netprio_map *old, *new; |
64 | 46 | size_t new_sz, new_len; | |
65 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
66 | clear_bit(idx, prioidx_map); | ||
67 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
68 | } | ||
69 | 47 | ||
70 | static int extend_netdev_table(struct net_device *dev, u32 new_len) | 48 | /* is the existing priomap large enough? */ |
71 | { | 49 | old = rtnl_dereference(dev->priomap); |
72 | size_t new_size = sizeof(struct netprio_map) + | 50 | if (old && old->priomap_len > target_idx) |
73 | ((sizeof(u32) * new_len)); | 51 | return 0; |
74 | struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); | ||
75 | struct netprio_map *old_priomap; | ||
76 | 52 | ||
77 | old_priomap = rtnl_dereference(dev->priomap); | 53 | /* |
54 | * Determine the new size. Let's keep it power-of-two. We start | ||
55 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | ||
56 | * accommodate @target_idx. | ||
57 | */ | ||
58 | new_sz = PRIOMAP_MIN_SZ; | ||
59 | while (true) { | ||
60 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | ||
61 | sizeof(new->priomap[0]); | ||
62 | if (new_len > target_idx) | ||
63 | break; | ||
64 | new_sz *= 2; | ||
65 | /* overflowed? */ | ||
66 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | ||
67 | return -ENOSPC; | ||
68 | } | ||
78 | 69 | ||
79 | if (!new_priomap) { | 70 | /* allocate & copy */ |
71 | new = kzalloc(new_sz, GFP_KERNEL); | ||
72 | if (!new) { | ||
80 | pr_warn("Unable to alloc new priomap!\n"); | 73 | pr_warn("Unable to alloc new priomap!\n"); |
81 | return -ENOMEM; | 74 | return -ENOMEM; |
82 | } | 75 | } |
83 | 76 | ||
84 | if (old_priomap) | 77 | if (old) |
85 | memcpy(new_priomap->priomap, old_priomap->priomap, | 78 | memcpy(new->priomap, old->priomap, |
86 | old_priomap->priomap_len * | 79 | old->priomap_len * sizeof(old->priomap[0])); |
87 | sizeof(old_priomap->priomap[0])); | ||
88 | 80 | ||
89 | new_priomap->priomap_len = new_len; | 81 | new->priomap_len = new_len; |
90 | 82 | ||
91 | rcu_assign_pointer(dev->priomap, new_priomap); | 83 | /* install the new priomap */ |
92 | if (old_priomap) | 84 | rcu_assign_pointer(dev->priomap, new); |
93 | kfree_rcu(old_priomap, rcu); | 85 | if (old) |
86 | kfree_rcu(old, rcu); | ||
94 | return 0; | 87 | return 0; |
95 | } | 88 | } |
96 | 89 | ||
97 | static int write_update_netdev_table(struct net_device *dev) | 90 | /** |
91 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | ||
92 | * @cgrp: cgroup part of the target pair | ||
93 | * @dev: net_device part of the target pair | ||
94 | * | ||
95 | * Should be called under RCU read or rtnl lock. | ||
96 | */ | ||
97 | static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) | ||
98 | { | ||
99 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | ||
100 | |||
101 | if (map && cgrp->id < map->priomap_len) | ||
102 | return map->priomap[cgrp->id]; | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | /** | ||
107 | * netprio_set_prio - set netprio on a cgroup-net_device pair | ||
108 | * @cgrp: cgroup part of the target pair | ||
109 | * @dev: net_device part of the target pair | ||
110 | * @prio: prio to set | ||
111 | * | ||
112 | * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl | ||
113 | * lock and may fail under memory pressure for non-zero @prio. | ||
114 | */ | ||
115 | static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, | ||
116 | u32 prio) | ||
98 | { | 117 | { |
99 | int ret = 0; | ||
100 | u32 max_len; | ||
101 | struct netprio_map *map; | 118 | struct netprio_map *map; |
119 | int ret; | ||
102 | 120 | ||
103 | max_len = atomic_read(&max_prioidx) + 1; | 121 | /* avoid extending priomap for zero writes */ |
104 | map = rtnl_dereference(dev->priomap); | 122 | map = rtnl_dereference(dev->priomap); |
105 | if (!map || map->priomap_len < max_len) | 123 | if (!prio && (!map || map->priomap_len <= cgrp->id)) |
106 | ret = extend_netdev_table(dev, max_len); | 124 | return 0; |
107 | 125 | ||
108 | return ret; | 126 | ret = extend_netdev_table(dev, cgrp->id); |
127 | if (ret) | ||
128 | return ret; | ||
129 | |||
130 | map = rtnl_dereference(dev->priomap); | ||
131 | map->priomap[cgrp->id] = prio; | ||
132 | return 0; | ||
109 | } | 133 | } |
110 | 134 | ||
111 | static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) | 135 | static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) |
112 | { | 136 | { |
113 | struct cgroup_netprio_state *cs; | 137 | struct cgroup_netprio_state *cs; |
114 | int ret = -EINVAL; | ||
115 | 138 | ||
116 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | 139 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
117 | if (!cs) | 140 | if (!cs) |
118 | return ERR_PTR(-ENOMEM); | 141 | return ERR_PTR(-ENOMEM); |
119 | 142 | ||
120 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) | ||
121 | goto out; | ||
122 | |||
123 | ret = get_prioidx(&cs->prioidx); | ||
124 | if (ret < 0) { | ||
125 | pr_warn("No space in priority index array\n"); | ||
126 | goto out; | ||
127 | } | ||
128 | |||
129 | return &cs->css; | 143 | return &cs->css; |
130 | out: | ||
131 | kfree(cs); | ||
132 | return ERR_PTR(ret); | ||
133 | } | 144 | } |
134 | 145 | ||
135 | static void cgrp_destroy(struct cgroup *cgrp) | 146 | static int cgrp_css_online(struct cgroup *cgrp) |
136 | { | 147 | { |
137 | struct cgroup_netprio_state *cs; | 148 | struct cgroup *parent = cgrp->parent; |
138 | struct net_device *dev; | 149 | struct net_device *dev; |
139 | struct netprio_map *map; | 150 | int ret = 0; |
151 | |||
152 | if (!parent) | ||
153 | return 0; | ||
140 | 154 | ||
141 | cs = cgrp_netprio_state(cgrp); | ||
142 | rtnl_lock(); | 155 | rtnl_lock(); |
156 | /* | ||
157 | * Inherit prios from the parent. As all prios are set during | ||
158 | * onlining, there is no need to clear them on offline. | ||
159 | */ | ||
143 | for_each_netdev(&init_net, dev) { | 160 | for_each_netdev(&init_net, dev) { |
144 | map = rtnl_dereference(dev->priomap); | 161 | u32 prio = netprio_prio(parent, dev); |
145 | if (map && cs->prioidx < map->priomap_len) | 162 | |
146 | map->priomap[cs->prioidx] = 0; | 163 | ret = netprio_set_prio(cgrp, dev, prio); |
164 | if (ret) | ||
165 | break; | ||
147 | } | 166 | } |
148 | rtnl_unlock(); | 167 | rtnl_unlock(); |
149 | put_prioidx(cs->prioidx); | 168 | return ret; |
150 | kfree(cs); | 169 | } |
170 | |||
171 | static void cgrp_css_free(struct cgroup *cgrp) | ||
172 | { | ||
173 | kfree(cgrp_netprio_state(cgrp)); | ||
151 | } | 174 | } |
152 | 175 | ||
153 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | 176 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) |
154 | { | 177 | { |
155 | return (u64)cgrp_netprio_state(cgrp)->prioidx; | 178 | return cgrp->id; |
156 | } | 179 | } |
157 | 180 | ||
158 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | 181 | static int read_priomap(struct cgroup *cont, struct cftype *cft, |
159 | struct cgroup_map_cb *cb) | 182 | struct cgroup_map_cb *cb) |
160 | { | 183 | { |
161 | struct net_device *dev; | 184 | struct net_device *dev; |
162 | u32 prioidx = cgrp_netprio_state(cont)->prioidx; | ||
163 | u32 priority; | ||
164 | struct netprio_map *map; | ||
165 | 185 | ||
166 | rcu_read_lock(); | 186 | rcu_read_lock(); |
167 | for_each_netdev_rcu(&init_net, dev) { | 187 | for_each_netdev_rcu(&init_net, dev) |
168 | map = rcu_dereference(dev->priomap); | 188 | cb->fill(cb, dev->name, netprio_prio(cont, dev)); |
169 | priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; | ||
170 | cb->fill(cb, dev->name, priority); | ||
171 | } | ||
172 | rcu_read_unlock(); | 189 | rcu_read_unlock(); |
173 | return 0; | 190 | return 0; |
174 | } | 191 | } |
@@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, | |||
176 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | 193 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, |
177 | const char *buffer) | 194 | const char *buffer) |
178 | { | 195 | { |
179 | char *devname = kstrdup(buffer, GFP_KERNEL); | 196 | char devname[IFNAMSIZ + 1]; |
180 | int ret = -EINVAL; | ||
181 | u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; | ||
182 | unsigned long priority; | ||
183 | char *priostr; | ||
184 | struct net_device *dev; | 197 | struct net_device *dev; |
185 | struct netprio_map *map; | 198 | u32 prio; |
186 | 199 | int ret; | |
187 | if (!devname) | ||
188 | return -ENOMEM; | ||
189 | |||
190 | /* | ||
191 | * Minimally sized valid priomap string | ||
192 | */ | ||
193 | if (strlen(devname) < 3) | ||
194 | goto out_free_devname; | ||
195 | |||
196 | priostr = strstr(devname, " "); | ||
197 | if (!priostr) | ||
198 | goto out_free_devname; | ||
199 | |||
200 | /* | ||
201 | *Separate the devname from the associated priority | ||
202 | *and advance the priostr pointer to the priority value | ||
203 | */ | ||
204 | *priostr = '\0'; | ||
205 | priostr++; | ||
206 | |||
207 | /* | ||
208 | * If the priostr points to NULL, we're at the end of the passed | ||
209 | * in string, and its not a valid write | ||
210 | */ | ||
211 | if (*priostr == '\0') | ||
212 | goto out_free_devname; | ||
213 | |||
214 | ret = kstrtoul(priostr, 10, &priority); | ||
215 | if (ret < 0) | ||
216 | goto out_free_devname; | ||
217 | 200 | ||
218 | ret = -ENODEV; | 201 | if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) |
202 | return -EINVAL; | ||
219 | 203 | ||
220 | dev = dev_get_by_name(&init_net, devname); | 204 | dev = dev_get_by_name(&init_net, devname); |
221 | if (!dev) | 205 | if (!dev) |
222 | goto out_free_devname; | 206 | return -ENODEV; |
223 | 207 | ||
224 | rtnl_lock(); | 208 | rtnl_lock(); |
225 | ret = write_update_netdev_table(dev); | ||
226 | if (ret < 0) | ||
227 | goto out_put_dev; | ||
228 | 209 | ||
229 | map = rtnl_dereference(dev->priomap); | 210 | ret = netprio_set_prio(cgrp, dev, prio); |
230 | if (map) | ||
231 | map->priomap[prioidx] = priority; | ||
232 | 211 | ||
233 | out_put_dev: | ||
234 | rtnl_unlock(); | 212 | rtnl_unlock(); |
235 | dev_put(dev); | 213 | dev_put(dev); |
236 | |||
237 | out_free_devname: | ||
238 | kfree(devname); | ||
239 | return ret; | 214 | return ret; |
240 | } | 215 | } |
241 | 216 | ||
@@ -276,22 +251,13 @@ static struct cftype ss_files[] = { | |||
276 | 251 | ||
277 | struct cgroup_subsys net_prio_subsys = { | 252 | struct cgroup_subsys net_prio_subsys = { |
278 | .name = "net_prio", | 253 | .name = "net_prio", |
279 | .create = cgrp_create, | 254 | .css_alloc = cgrp_css_alloc, |
280 | .destroy = cgrp_destroy, | 255 | .css_online = cgrp_css_online, |
256 | .css_free = cgrp_css_free, | ||
281 | .attach = net_prio_attach, | 257 | .attach = net_prio_attach, |
282 | .subsys_id = net_prio_subsys_id, | 258 | .subsys_id = net_prio_subsys_id, |
283 | .base_cftypes = ss_files, | 259 | .base_cftypes = ss_files, |
284 | .module = THIS_MODULE, | 260 | .module = THIS_MODULE, |
285 | |||
286 | /* | ||
287 | * net_prio has artificial limit on the number of cgroups and | ||
288 | * disallows nesting making it impossible to co-mount it with other | ||
289 | * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ | ||
290 | * limit and properly nest configuration such that children follow | ||
291 | * their parents' configurations by default and are allowed to | ||
292 | * override and remove the following. | ||
293 | */ | ||
294 | .broken_hierarchy = true, | ||
295 | }; | 261 | }; |
296 | 262 | ||
297 | static int netprio_device_event(struct notifier_block *unused, | 263 | static int netprio_device_event(struct notifier_block *unused, |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ac9e44a6ab2b..3ab989b0de42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -3032,7 +3032,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
3032 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; | 3032 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; |
3033 | pinfo->gso_size = 0; | 3033 | pinfo->gso_size = 0; |
3034 | skb_header_release(p); | 3034 | skb_header_release(p); |
3035 | nskb->prev = p; | 3035 | NAPI_GRO_CB(nskb)->last = p; |
3036 | 3036 | ||
3037 | nskb->data_len += p->len; | 3037 | nskb->data_len += p->len; |
3038 | nskb->truesize += p->truesize; | 3038 | nskb->truesize += p->truesize; |
@@ -3058,8 +3058,8 @@ merge: | |||
3058 | 3058 | ||
3059 | __skb_pull(skb, offset); | 3059 | __skb_pull(skb, offset); |
3060 | 3060 | ||
3061 | p->prev->next = skb; | 3061 | NAPI_GRO_CB(p)->last->next = skb; |
3062 | p->prev = skb; | 3062 | NAPI_GRO_CB(p)->last = skb; |
3063 | skb_header_release(skb); | 3063 | skb_header_release(skb); |
3064 | 3064 | ||
3065 | done: | 3065 | done: |