diff options
author | John Fastabend <john.fastabend@gmail.com> | 2017-07-17 12:30:02 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-07-17 12:48:06 -0400 |
commit | 2ddf71e23cc246e95af72a6deed67b4a50a7b81c (patch) | |
tree | b5153dd5672bf7f6af6311df36b6037eb652b7c2 /kernel/bpf/devmap.c | |
parent | 11393cc9b9be2a1f61559e6fb9c27bc8fa20b1ff (diff) |
net: add notifier hooks for devmap bpf map
The BPF map devmap holds a refcnt on the net_device structure when
it is in the map. We need to do this to ensure on driver unload we
don't lose a dev reference.
However, its not very convenient to have to manually unload the map
when destroying a net device so add notifier handlers to do the cleanup
automatically. But this creates a race between update/destroy BPF
syscall and programs and the unregister netdev hook.
Unfortunately, the best I could come up with is either to live with
requiring manual removal of net devices from the map before removing
the net device OR to add a mutex in devmap to ensure the map is not
modified while we are removing a device. The fallout also requires
that BPF programs no longer update/delete the map from the BPF program
side because the mutex may sleep and this can not be done from inside
an rcu critical section. This is not a real problem though because I
have not come up with any use cases where this is actually useful in
practice. If/when we come up with a compelling user for this we may
need to revisit this.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r-- | kernel/bpf/devmap.c | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b2ef04a1c86a..899364d097f5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -34,6 +34,17 @@ | |||
34 | * netdev_map consistent in this case. From the devmap side BPF programs | 34 | * netdev_map consistent in this case. From the devmap side BPF programs |
35 | * calling into these operations are the same as multiple user space threads | 35 | * calling into these operations are the same as multiple user space threads |
36 | * making system calls. | 36 | * making system calls. |
37 | * | ||
38 | * Finally, any of the above may race with a netdev_unregister notifier. The | ||
39 | * unregister notifier must search for net devices in the map structure that | ||
40 | * contain a reference to the net device and remove them. This is a two step | ||
41 | * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) | ||
42 | * check to see if the ifindex is the same as the net_device being removed. | ||
43 | * Unfortunately, the xchg() operations do not protect against this. To avoid | ||
44 | * potentially removing incorrect objects the dev_map_list_mutex protects | ||
45 | * conflicting netdev unregister and BPF syscall operations. Updates and | ||
46 | * deletes from a BPF program (done in rcu critical section) are blocked | ||
47 | * because of this mutex. | ||
37 | */ | 48 | */ |
38 | #include <linux/bpf.h> | 49 | #include <linux/bpf.h> |
39 | #include <linux/jhash.h> | 50 | #include <linux/jhash.h> |
@@ -54,8 +65,12 @@ struct bpf_dtab { | |||
54 | struct bpf_map map; | 65 | struct bpf_map map; |
55 | struct bpf_dtab_netdev **netdev_map; | 66 | struct bpf_dtab_netdev **netdev_map; |
56 | unsigned long int __percpu *flush_needed; | 67 | unsigned long int __percpu *flush_needed; |
68 | struct list_head list; | ||
57 | }; | 69 | }; |
58 | 70 | ||
71 | static DEFINE_MUTEX(dev_map_list_mutex); | ||
72 | static LIST_HEAD(dev_map_list); | ||
73 | |||
59 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 74 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
60 | { | 75 | { |
61 | struct bpf_dtab *dtab; | 76 | struct bpf_dtab *dtab; |
@@ -112,6 +127,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
112 | if (!dtab->netdev_map) | 127 | if (!dtab->netdev_map) |
113 | goto free_dtab; | 128 | goto free_dtab; |
114 | 129 | ||
130 | mutex_lock(&dev_map_list_mutex); | ||
131 | list_add_tail(&dtab->list, &dev_map_list); | ||
132 | mutex_unlock(&dev_map_list_mutex); | ||
115 | return &dtab->map; | 133 | return &dtab->map; |
116 | 134 | ||
117 | free_dtab: | 135 | free_dtab: |
@@ -146,6 +164,11 @@ static void dev_map_free(struct bpf_map *map) | |||
146 | cpu_relax(); | 164 | cpu_relax(); |
147 | } | 165 | } |
148 | 166 | ||
167 | /* Although we should no longer have datapath or bpf syscall operations | ||
168 | * at this point we we can still race with netdev notifier, hence the | ||
169 | * lock. | ||
170 | */ | ||
171 | mutex_lock(&dev_map_list_mutex); | ||
149 | for (i = 0; i < dtab->map.max_entries; i++) { | 172 | for (i = 0; i < dtab->map.max_entries; i++) { |
150 | struct bpf_dtab_netdev *dev; | 173 | struct bpf_dtab_netdev *dev; |
151 | 174 | ||
@@ -160,6 +183,8 @@ static void dev_map_free(struct bpf_map *map) | |||
160 | /* At this point bpf program is detached and all pending operations | 183 | /* At this point bpf program is detached and all pending operations |
161 | * _must_ be complete | 184 | * _must_ be complete |
162 | */ | 185 | */ |
186 | list_del(&dtab->list); | ||
187 | mutex_unlock(&dev_map_list_mutex); | ||
163 | free_percpu(dtab->flush_needed); | 188 | free_percpu(dtab->flush_needed); |
164 | bpf_map_area_free(dtab->netdev_map); | 189 | bpf_map_area_free(dtab->netdev_map); |
165 | kfree(dtab); | 190 | kfree(dtab); |
@@ -296,9 +321,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) | |||
296 | * the driver tear down ensures all soft irqs are complete before | 321 | * the driver tear down ensures all soft irqs are complete before |
297 | * removing the net device in the case of dev_put equals zero. | 322 | * removing the net device in the case of dev_put equals zero. |
298 | */ | 323 | */ |
324 | mutex_lock(&dev_map_list_mutex); | ||
299 | old_dev = xchg(&dtab->netdev_map[k], NULL); | 325 | old_dev = xchg(&dtab->netdev_map[k], NULL); |
300 | if (old_dev) | 326 | if (old_dev) |
301 | call_rcu(&old_dev->rcu, __dev_map_entry_free); | 327 | call_rcu(&old_dev->rcu, __dev_map_entry_free); |
328 | mutex_unlock(&dev_map_list_mutex); | ||
302 | return 0; | 329 | return 0; |
303 | } | 330 | } |
304 | 331 | ||
@@ -341,9 +368,11 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
341 | * Remembering the driver side flush operation will happen before the | 368 | * Remembering the driver side flush operation will happen before the |
342 | * net device is removed. | 369 | * net device is removed. |
343 | */ | 370 | */ |
371 | mutex_lock(&dev_map_list_mutex); | ||
344 | old_dev = xchg(&dtab->netdev_map[i], dev); | 372 | old_dev = xchg(&dtab->netdev_map[i], dev); |
345 | if (old_dev) | 373 | if (old_dev) |
346 | call_rcu(&old_dev->rcu, __dev_map_entry_free); | 374 | call_rcu(&old_dev->rcu, __dev_map_entry_free); |
375 | mutex_unlock(&dev_map_list_mutex); | ||
347 | 376 | ||
348 | return 0; | 377 | return 0; |
349 | } | 378 | } |
@@ -356,3 +385,47 @@ const struct bpf_map_ops dev_map_ops = { | |||
356 | .map_update_elem = dev_map_update_elem, | 385 | .map_update_elem = dev_map_update_elem, |
357 | .map_delete_elem = dev_map_delete_elem, | 386 | .map_delete_elem = dev_map_delete_elem, |
358 | }; | 387 | }; |
388 | |||
389 | static int dev_map_notification(struct notifier_block *notifier, | ||
390 | ulong event, void *ptr) | ||
391 | { | ||
392 | struct net_device *netdev = netdev_notifier_info_to_dev(ptr); | ||
393 | struct bpf_dtab *dtab; | ||
394 | int i; | ||
395 | |||
396 | switch (event) { | ||
397 | case NETDEV_UNREGISTER: | ||
398 | mutex_lock(&dev_map_list_mutex); | ||
399 | list_for_each_entry(dtab, &dev_map_list, list) { | ||
400 | for (i = 0; i < dtab->map.max_entries; i++) { | ||
401 | struct bpf_dtab_netdev *dev; | ||
402 | |||
403 | dev = dtab->netdev_map[i]; | ||
404 | if (!dev || | ||
405 | dev->dev->ifindex != netdev->ifindex) | ||
406 | continue; | ||
407 | dev = xchg(&dtab->netdev_map[i], NULL); | ||
408 | if (dev) | ||
409 | call_rcu(&dev->rcu, | ||
410 | __dev_map_entry_free); | ||
411 | } | ||
412 | } | ||
413 | mutex_unlock(&dev_map_list_mutex); | ||
414 | break; | ||
415 | default: | ||
416 | break; | ||
417 | } | ||
418 | return NOTIFY_OK; | ||
419 | } | ||
420 | |||
421 | static struct notifier_block dev_map_notifier = { | ||
422 | .notifier_call = dev_map_notification, | ||
423 | }; | ||
424 | |||
425 | static int __init dev_map_init(void) | ||
426 | { | ||
427 | register_netdevice_notifier(&dev_map_notifier); | ||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | subsys_initcall(dev_map_init); | ||