diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2017-08-22 19:47:54 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-23 00:26:29 -0400 |
commit | af4d045ceeca04946d89453206269aea6c338a8e (patch) | |
tree | 253eb5960b63588ed6a7f3fa4128c8507d8f4f77 /kernel/bpf/devmap.c | |
parent | e4a8e817d3cb2a5108f8bb2e47e81eb25a2c5e30 (diff) |
bpf: minor cleanups for dev_map
Some minor code cleanups, while going over it I also noticed that
we're accounting the bitmap only for one CPU currently, so fix that
up as well.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r-- | kernel/bpf/devmap.c | 100 |
1 files changed, 41 insertions, 59 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fa08181d1c3d..bfecabfd4974 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -48,30 +48,30 @@ | |||
48 | * calls will fail at this point. | 48 | * calls will fail at this point. |
49 | */ | 49 | */ |
50 | #include <linux/bpf.h> | 50 | #include <linux/bpf.h> |
51 | #include <linux/jhash.h> | ||
52 | #include <linux/filter.h> | 51 | #include <linux/filter.h> |
53 | #include <linux/rculist_nulls.h> | ||
54 | #include "percpu_freelist.h" | ||
55 | #include "bpf_lru_list.h" | ||
56 | #include "map_in_map.h" | ||
57 | 52 | ||
58 | struct bpf_dtab_netdev { | 53 | struct bpf_dtab_netdev { |
59 | struct net_device *dev; | 54 | struct net_device *dev; |
60 | int key; | ||
61 | struct rcu_head rcu; | ||
62 | struct bpf_dtab *dtab; | 55 | struct bpf_dtab *dtab; |
56 | unsigned int bit; | ||
57 | struct rcu_head rcu; | ||
63 | }; | 58 | }; |
64 | 59 | ||
65 | struct bpf_dtab { | 60 | struct bpf_dtab { |
66 | struct bpf_map map; | 61 | struct bpf_map map; |
67 | struct bpf_dtab_netdev **netdev_map; | 62 | struct bpf_dtab_netdev **netdev_map; |
68 | unsigned long int __percpu *flush_needed; | 63 | unsigned long __percpu *flush_needed; |
69 | struct list_head list; | 64 | struct list_head list; |
70 | }; | 65 | }; |
71 | 66 | ||
72 | static DEFINE_SPINLOCK(dev_map_lock); | 67 | static DEFINE_SPINLOCK(dev_map_lock); |
73 | static LIST_HEAD(dev_map_list); | 68 | static LIST_HEAD(dev_map_list); |
74 | 69 | ||
70 | static u64 dev_map_bitmap_size(const union bpf_attr *attr) | ||
71 | { | ||
72 | return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); | ||
73 | } | ||
74 | |||
75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
76 | { | 76 | { |
77 | struct bpf_dtab *dtab; | 77 | struct bpf_dtab *dtab; |
@@ -95,11 +95,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
95 | dtab->map.map_flags = attr->map_flags; | 95 | dtab->map.map_flags = attr->map_flags; |
96 | dtab->map.numa_node = bpf_map_attr_numa_node(attr); | 96 | dtab->map.numa_node = bpf_map_attr_numa_node(attr); |
97 | 97 | ||
98 | err = -ENOMEM; | ||
99 | |||
100 | /* make sure page count doesn't overflow */ | 98 | /* make sure page count doesn't overflow */ |
101 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); | 99 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); |
102 | cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); | 100 | cost += dev_map_bitmap_size(attr) * num_possible_cpus(); |
103 | if (cost >= U32_MAX - PAGE_SIZE) | 101 | if (cost >= U32_MAX - PAGE_SIZE) |
104 | goto free_dtab; | 102 | goto free_dtab; |
105 | 103 | ||
@@ -110,12 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
110 | if (err) | 108 | if (err) |
111 | goto free_dtab; | 109 | goto free_dtab; |
112 | 110 | ||
113 | err = -ENOMEM; | ||
114 | /* A per cpu bitfield with a bit per possible net device */ | 111 | /* A per cpu bitfield with a bit per possible net device */ |
115 | dtab->flush_needed = __alloc_percpu( | 112 | dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), |
116 | BITS_TO_LONGS(attr->max_entries) * | 113 | __alignof__(unsigned long)); |
117 | sizeof(unsigned long), | ||
118 | __alignof__(unsigned long)); | ||
119 | if (!dtab->flush_needed) | 114 | if (!dtab->flush_needed) |
120 | goto free_dtab; | 115 | goto free_dtab; |
121 | 116 | ||
@@ -128,12 +123,12 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
128 | spin_lock(&dev_map_lock); | 123 | spin_lock(&dev_map_lock); |
129 | list_add_tail_rcu(&dtab->list, &dev_map_list); | 124 | list_add_tail_rcu(&dtab->list, &dev_map_list); |
130 | spin_unlock(&dev_map_lock); | 125 | spin_unlock(&dev_map_lock); |
131 | return &dtab->map; | ||
132 | 126 | ||
127 | return &dtab->map; | ||
133 | free_dtab: | 128 | free_dtab: |
134 | free_percpu(dtab->flush_needed); | 129 | free_percpu(dtab->flush_needed); |
135 | kfree(dtab); | 130 | kfree(dtab); |
136 | return ERR_PTR(err); | 131 | return ERR_PTR(-ENOMEM); |
137 | } | 132 | } |
138 | 133 | ||
139 | static void dev_map_free(struct bpf_map *map) | 134 | static void dev_map_free(struct bpf_map *map) |
@@ -178,9 +173,6 @@ static void dev_map_free(struct bpf_map *map) | |||
178 | kfree(dev); | 173 | kfree(dev); |
179 | } | 174 | } |
180 | 175 | ||
181 | /* At this point bpf program is detached and all pending operations | ||
182 | * _must_ be complete | ||
183 | */ | ||
184 | free_percpu(dtab->flush_needed); | 176 | free_percpu(dtab->flush_needed); |
185 | bpf_map_area_free(dtab->netdev_map); | 177 | bpf_map_area_free(dtab->netdev_map); |
186 | kfree(dtab); | 178 | kfree(dtab); |
@@ -190,7 +182,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |||
190 | { | 182 | { |
191 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 183 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
192 | u32 index = key ? *(u32 *)key : U32_MAX; | 184 | u32 index = key ? *(u32 *)key : U32_MAX; |
193 | u32 *next = (u32 *)next_key; | 185 | u32 *next = next_key; |
194 | 186 | ||
195 | if (index >= dtab->map.max_entries) { | 187 | if (index >= dtab->map.max_entries) { |
196 | *next = 0; | 188 | *next = 0; |
@@ -199,29 +191,16 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |||
199 | 191 | ||
200 | if (index == dtab->map.max_entries - 1) | 192 | if (index == dtab->map.max_entries - 1) |
201 | return -ENOENT; | 193 | return -ENOENT; |
202 | |||
203 | *next = index + 1; | 194 | *next = index + 1; |
204 | return 0; | 195 | return 0; |
205 | } | 196 | } |
206 | 197 | ||
207 | void __dev_map_insert_ctx(struct bpf_map *map, u32 key) | 198 | void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) |
208 | { | 199 | { |
209 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 200 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
210 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); | 201 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); |
211 | 202 | ||
212 | __set_bit(key, bitmap); | 203 | __set_bit(bit, bitmap); |
213 | } | ||
214 | |||
215 | struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) | ||
216 | { | ||
217 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | ||
218 | struct bpf_dtab_netdev *dev; | ||
219 | |||
220 | if (key >= map->max_entries) | ||
221 | return NULL; | ||
222 | |||
223 | dev = READ_ONCE(dtab->netdev_map[key]); | ||
224 | return dev ? dev->dev : NULL; | ||
225 | } | 204 | } |
226 | 205 | ||
227 | /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled | 206 | /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled |
@@ -248,7 +227,6 @@ void __dev_map_flush(struct bpf_map *map) | |||
248 | continue; | 227 | continue; |
249 | 228 | ||
250 | netdev = dev->dev; | 229 | netdev = dev->dev; |
251 | |||
252 | __clear_bit(bit, bitmap); | 230 | __clear_bit(bit, bitmap); |
253 | if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush)) | 231 | if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush)) |
254 | continue; | 232 | continue; |
@@ -261,43 +239,49 @@ void __dev_map_flush(struct bpf_map *map) | |||
261 | * update happens in parallel here a dev_put wont happen until after reading the | 239 | * update happens in parallel here a dev_put wont happen until after reading the |
262 | * ifindex. | 240 | * ifindex. |
263 | */ | 241 | */ |
264 | static void *dev_map_lookup_elem(struct bpf_map *map, void *key) | 242 | struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) |
265 | { | 243 | { |
266 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 244 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
267 | struct bpf_dtab_netdev *dev; | 245 | struct bpf_dtab_netdev *dev; |
268 | u32 i = *(u32 *)key; | ||
269 | 246 | ||
270 | if (i >= map->max_entries) | 247 | if (key >= map->max_entries) |
271 | return NULL; | 248 | return NULL; |
272 | 249 | ||
273 | dev = READ_ONCE(dtab->netdev_map[i]); | 250 | dev = READ_ONCE(dtab->netdev_map[key]); |
274 | return dev ? &dev->dev->ifindex : NULL; | 251 | return dev ? dev->dev : NULL; |
275 | } | 252 | } |
276 | 253 | ||
277 | static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev) | 254 | static void *dev_map_lookup_elem(struct bpf_map *map, void *key) |
255 | { | ||
256 | struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key); | ||
257 | |||
258 | return dev ? &dev->ifindex : NULL; | ||
259 | } | ||
260 | |||
261 | static void dev_map_flush_old(struct bpf_dtab_netdev *dev) | ||
278 | { | 262 | { |
279 | if (old_dev->dev->netdev_ops->ndo_xdp_flush) { | 263 | if (dev->dev->netdev_ops->ndo_xdp_flush) { |
280 | struct net_device *fl = old_dev->dev; | 264 | struct net_device *fl = dev->dev; |
281 | unsigned long *bitmap; | 265 | unsigned long *bitmap; |
282 | int cpu; | 266 | int cpu; |
283 | 267 | ||
284 | for_each_online_cpu(cpu) { | 268 | for_each_online_cpu(cpu) { |
285 | bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu); | 269 | bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); |
286 | __clear_bit(old_dev->key, bitmap); | 270 | __clear_bit(dev->bit, bitmap); |
287 | 271 | ||
288 | fl->netdev_ops->ndo_xdp_flush(old_dev->dev); | 272 | fl->netdev_ops->ndo_xdp_flush(dev->dev); |
289 | } | 273 | } |
290 | } | 274 | } |
291 | } | 275 | } |
292 | 276 | ||
293 | static void __dev_map_entry_free(struct rcu_head *rcu) | 277 | static void __dev_map_entry_free(struct rcu_head *rcu) |
294 | { | 278 | { |
295 | struct bpf_dtab_netdev *old_dev; | 279 | struct bpf_dtab_netdev *dev; |
296 | 280 | ||
297 | old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); | 281 | dev = container_of(rcu, struct bpf_dtab_netdev, rcu); |
298 | dev_map_flush_old(old_dev); | 282 | dev_map_flush_old(dev); |
299 | dev_put(old_dev->dev); | 283 | dev_put(dev->dev); |
300 | kfree(old_dev); | 284 | kfree(dev); |
301 | } | 285 | } |
302 | 286 | ||
303 | static int dev_map_delete_elem(struct bpf_map *map, void *key) | 287 | static int dev_map_delete_elem(struct bpf_map *map, void *key) |
@@ -309,8 +293,8 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) | |||
309 | if (k >= map->max_entries) | 293 | if (k >= map->max_entries) |
310 | return -EINVAL; | 294 | return -EINVAL; |
311 | 295 | ||
312 | /* Use synchronize_rcu() here to ensure any rcu critical sections | 296 | /* Use call_rcu() here to ensure any rcu critical sections have |
313 | * have completed, but this does not guarantee a flush has happened | 297 | * completed, but this does not guarantee a flush has happened |
314 | * yet. Because driver side rcu_read_lock/unlock only protects the | 298 | * yet. Because driver side rcu_read_lock/unlock only protects the |
315 | * running XDP program. However, for pending flush operations the | 299 | * running XDP program. However, for pending flush operations the |
316 | * dev and ctx are stored in another per cpu map. And additionally, | 300 | * dev and ctx are stored in another per cpu map. And additionally, |
@@ -334,10 +318,8 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
334 | 318 | ||
335 | if (unlikely(map_flags > BPF_EXIST)) | 319 | if (unlikely(map_flags > BPF_EXIST)) |
336 | return -EINVAL; | 320 | return -EINVAL; |
337 | |||
338 | if (unlikely(i >= dtab->map.max_entries)) | 321 | if (unlikely(i >= dtab->map.max_entries)) |
339 | return -E2BIG; | 322 | return -E2BIG; |
340 | |||
341 | if (unlikely(map_flags == BPF_NOEXIST)) | 323 | if (unlikely(map_flags == BPF_NOEXIST)) |
342 | return -EEXIST; | 324 | return -EEXIST; |
343 | 325 | ||
@@ -355,7 +337,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
355 | return -EINVAL; | 337 | return -EINVAL; |
356 | } | 338 | } |
357 | 339 | ||
358 | dev->key = i; | 340 | dev->bit = i; |
359 | dev->dtab = dtab; | 341 | dev->dtab = dtab; |
360 | } | 342 | } |
361 | 343 | ||