diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/devmap.c | 73 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 2 |
2 files changed, 74 insertions, 1 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b2ef04a1c86a..899364d097f5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -34,6 +34,17 @@ | |||
34 | * netdev_map consistent in this case. From the devmap side BPF programs | 34 | * netdev_map consistent in this case. From the devmap side BPF programs |
35 | * calling into these operations are the same as multiple user space threads | 35 | * calling into these operations are the same as multiple user space threads |
36 | * making system calls. | 36 | * making system calls. |
37 | * | ||
38 | * Finally, any of the above may race with a netdev_unregister notifier. The | ||
39 | * unregister notifier must search for net devices in the map structure that | ||
40 | * contain a reference to the net device and remove them. This is a two step | ||
41 | * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) | ||
42 | * check to see if the ifindex is the same as the net_device being removed. | ||
43 | * Unfortunately, the xchg() operations do not protect against this. To avoid | ||
44 | * potentially removing incorrect objects the dev_map_list_mutex protects | ||
45 | * conflicting netdev unregister and BPF syscall operations. Updates and | ||
46 | * deletes from a BPF program (done in rcu critical section) are blocked | ||
47 | * because of this mutex. | ||
37 | */ | 48 | */ |
38 | #include <linux/bpf.h> | 49 | #include <linux/bpf.h> |
39 | #include <linux/jhash.h> | 50 | #include <linux/jhash.h> |
@@ -54,8 +65,12 @@ struct bpf_dtab { | |||
54 | struct bpf_map map; | 65 | struct bpf_map map; |
55 | struct bpf_dtab_netdev **netdev_map; | 66 | struct bpf_dtab_netdev **netdev_map; |
56 | unsigned long int __percpu *flush_needed; | 67 | unsigned long int __percpu *flush_needed; |
68 | struct list_head list; | ||
57 | }; | 69 | }; |
58 | 70 | ||
71 | static DEFINE_MUTEX(dev_map_list_mutex); | ||
72 | static LIST_HEAD(dev_map_list); | ||
73 | |||
59 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 74 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
60 | { | 75 | { |
61 | struct bpf_dtab *dtab; | 76 | struct bpf_dtab *dtab; |
@@ -112,6 +127,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
112 | if (!dtab->netdev_map) | 127 | if (!dtab->netdev_map) |
113 | goto free_dtab; | 128 | goto free_dtab; |
114 | 129 | ||
130 | mutex_lock(&dev_map_list_mutex); | ||
131 | list_add_tail(&dtab->list, &dev_map_list); | ||
132 | mutex_unlock(&dev_map_list_mutex); | ||
115 | return &dtab->map; | 133 | return &dtab->map; |
116 | 134 | ||
117 | free_dtab: | 135 | free_dtab: |
@@ -146,6 +164,11 @@ static void dev_map_free(struct bpf_map *map) | |||
146 | cpu_relax(); | 164 | cpu_relax(); |
147 | } | 165 | } |
148 | 166 | ||
167 | /* Although we should no longer have datapath or bpf syscall operations | ||
168 | * at this point we we can still race with netdev notifier, hence the | ||
169 | * lock. | ||
170 | */ | ||
171 | mutex_lock(&dev_map_list_mutex); | ||
149 | for (i = 0; i < dtab->map.max_entries; i++) { | 172 | for (i = 0; i < dtab->map.max_entries; i++) { |
150 | struct bpf_dtab_netdev *dev; | 173 | struct bpf_dtab_netdev *dev; |
151 | 174 | ||
@@ -160,6 +183,8 @@ static void dev_map_free(struct bpf_map *map) | |||
160 | /* At this point bpf program is detached and all pending operations | 183 | /* At this point bpf program is detached and all pending operations |
161 | * _must_ be complete | 184 | * _must_ be complete |
162 | */ | 185 | */ |
186 | list_del(&dtab->list); | ||
187 | mutex_unlock(&dev_map_list_mutex); | ||
163 | free_percpu(dtab->flush_needed); | 188 | free_percpu(dtab->flush_needed); |
164 | bpf_map_area_free(dtab->netdev_map); | 189 | bpf_map_area_free(dtab->netdev_map); |
165 | kfree(dtab); | 190 | kfree(dtab); |
@@ -296,9 +321,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) | |||
296 | * the driver tear down ensures all soft irqs are complete before | 321 | * the driver tear down ensures all soft irqs are complete before |
297 | * removing the net device in the case of dev_put equals zero. | 322 | * removing the net device in the case of dev_put equals zero. |
298 | */ | 323 | */ |
324 | mutex_lock(&dev_map_list_mutex); | ||
299 | old_dev = xchg(&dtab->netdev_map[k], NULL); | 325 | old_dev = xchg(&dtab->netdev_map[k], NULL); |
300 | if (old_dev) | 326 | if (old_dev) |
301 | call_rcu(&old_dev->rcu, __dev_map_entry_free); | 327 | call_rcu(&old_dev->rcu, __dev_map_entry_free); |
328 | mutex_unlock(&dev_map_list_mutex); | ||
302 | return 0; | 329 | return 0; |
303 | } | 330 | } |
304 | 331 | ||
@@ -341,9 +368,11 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
341 | * Remembering the driver side flush operation will happen before the | 368 | * Remembering the driver side flush operation will happen before the |
342 | * net device is removed. | 369 | * net device is removed. |
343 | */ | 370 | */ |
371 | mutex_lock(&dev_map_list_mutex); | ||
344 | old_dev = xchg(&dtab->netdev_map[i], dev); | 372 | old_dev = xchg(&dtab->netdev_map[i], dev); |
345 | if (old_dev) | 373 | if (old_dev) |
346 | call_rcu(&old_dev->rcu, __dev_map_entry_free); | 374 | call_rcu(&old_dev->rcu, __dev_map_entry_free); |
375 | mutex_unlock(&dev_map_list_mutex); | ||
347 | 376 | ||
348 | return 0; | 377 | return 0; |
349 | } | 378 | } |
@@ -356,3 +385,47 @@ const struct bpf_map_ops dev_map_ops = { | |||
356 | .map_update_elem = dev_map_update_elem, | 385 | .map_update_elem = dev_map_update_elem, |
357 | .map_delete_elem = dev_map_delete_elem, | 386 | .map_delete_elem = dev_map_delete_elem, |
358 | }; | 387 | }; |
388 | |||
389 | static int dev_map_notification(struct notifier_block *notifier, | ||
390 | ulong event, void *ptr) | ||
391 | { | ||
392 | struct net_device *netdev = netdev_notifier_info_to_dev(ptr); | ||
393 | struct bpf_dtab *dtab; | ||
394 | int i; | ||
395 | |||
396 | switch (event) { | ||
397 | case NETDEV_UNREGISTER: | ||
398 | mutex_lock(&dev_map_list_mutex); | ||
399 | list_for_each_entry(dtab, &dev_map_list, list) { | ||
400 | for (i = 0; i < dtab->map.max_entries; i++) { | ||
401 | struct bpf_dtab_netdev *dev; | ||
402 | |||
403 | dev = dtab->netdev_map[i]; | ||
404 | if (!dev || | ||
405 | dev->dev->ifindex != netdev->ifindex) | ||
406 | continue; | ||
407 | dev = xchg(&dtab->netdev_map[i], NULL); | ||
408 | if (dev) | ||
409 | call_rcu(&dev->rcu, | ||
410 | __dev_map_entry_free); | ||
411 | } | ||
412 | } | ||
413 | mutex_unlock(&dev_map_list_mutex); | ||
414 | break; | ||
415 | default: | ||
416 | break; | ||
417 | } | ||
418 | return NOTIFY_OK; | ||
419 | } | ||
420 | |||
421 | static struct notifier_block dev_map_notifier = { | ||
422 | .notifier_call = dev_map_notification, | ||
423 | }; | ||
424 | |||
425 | static int __init dev_map_init(void) | ||
426 | { | ||
427 | register_netdevice_notifier(&dev_map_notifier); | ||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | subsys_initcall(dev_map_init); | ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index df05d65f0c87..ebe9b38ff522 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -1281,7 +1281,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) | |||
1281 | * for now. | 1281 | * for now. |
1282 | */ | 1282 | */ |
1283 | case BPF_MAP_TYPE_DEVMAP: | 1283 | case BPF_MAP_TYPE_DEVMAP: |
1284 | if (func_id == BPF_FUNC_map_lookup_elem) | 1284 | if (func_id != BPF_FUNC_redirect_map) |
1285 | goto error; | 1285 | goto error; |
1286 | break; | 1286 | break; |
1287 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: | 1287 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: |