aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorJakub Kicinski <jakub.kicinski@netronome.com>2018-01-11 23:29:09 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2018-01-14 17:36:30 -0500
commita38845729ea3985db5d2544ec3ef3dc8f6313a27 (patch)
treebff003aa4bf56742db6579dca6338bedfe882fd7 /kernel/bpf/syscall.c
parent5bc2d55c6a69ef9efd11740359974b08ea11f1d7 (diff)
bpf: offload: add map offload infrastructure
BPF map offload follow similar path to program offload. At creation time users may specify ifindex of the device on which they want to create the map. Map will be validated by the kernel's .map_alloc_check callback and device driver will be called for the actual allocation. Map will have an empty set of operations associated with it (save for alloc and free callbacks). The real device callbacks are kept in map->offload->dev_ops because they have slightly different signatures. Map operations are called in process context so the driver may communicate with HW freely, msleep(), wait() etc. Map alloc and free callbacks are muxed via existing .ndo_bpf, and are always called with rtnl lock held. Maps and programs are guaranteed to be destroyed before .ndo_uninit (i.e. before unregister_netdev() returns). Map callbacks are invoked with bpf_devs_lock *read* locked, drivers must take care of exclusive locking if necessary. All offload-specific branches are marked with unlikely() (through bpf_map_is_dev_bound()), given that branch penalty will be negligible compared to IO anyway, and we don't want to penalize SW path unnecessarily. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c44
1 files changed, 38 insertions, 6 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a3f726bb42ea..c691b9e972e3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -94,6 +94,11 @@ static int check_uarg_tail_zero(void __user *uaddr,
94 return 0; 94 return 0;
95} 95}
96 96
97const struct bpf_map_ops bpf_map_offload_ops = {
98 .map_alloc = bpf_map_offload_map_alloc,
99 .map_free = bpf_map_offload_map_free,
100};
101
97static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 102static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
98{ 103{
99 const struct bpf_map_ops *ops; 104 const struct bpf_map_ops *ops;
@@ -111,6 +116,8 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
111 if (err) 116 if (err)
112 return ERR_PTR(err); 117 return ERR_PTR(err);
113 } 118 }
119 if (attr->map_ifindex)
120 ops = &bpf_map_offload_ops;
114 map = ops->map_alloc(attr); 121 map = ops->map_alloc(attr);
115 if (IS_ERR(map)) 122 if (IS_ERR(map))
116 return map; 123 return map;
@@ -208,16 +215,25 @@ static int bpf_map_alloc_id(struct bpf_map *map)
208 return id > 0 ? 0 : id; 215 return id > 0 ? 0 : id;
209} 216}
210 217
211static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 218void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
212{ 219{
213 unsigned long flags; 220 unsigned long flags;
214 221
222 /* Offloaded maps are removed from the IDR store when their device
223 * disappears - even if someone holds an fd to them they are unusable,
224 * the memory is gone, all ops will fail; they are simply waiting for
225 * refcnt to drop to be freed.
226 */
227 if (!map->id)
228 return;
229
215 if (do_idr_lock) 230 if (do_idr_lock)
216 spin_lock_irqsave(&map_idr_lock, flags); 231 spin_lock_irqsave(&map_idr_lock, flags);
217 else 232 else
218 __acquire(&map_idr_lock); 233 __acquire(&map_idr_lock);
219 234
220 idr_remove(&map_idr, map->id); 235 idr_remove(&map_idr, map->id);
236 map->id = 0;
221 237
222 if (do_idr_lock) 238 if (do_idr_lock)
223 spin_unlock_irqrestore(&map_idr_lock, flags); 239 spin_unlock_irqrestore(&map_idr_lock, flags);
@@ -397,7 +413,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
397 return 0; 413 return 0;
398} 414}
399 415
400#define BPF_MAP_CREATE_LAST_FIELD map_name 416#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
401/* called via syscall */ 417/* called via syscall */
402static int map_create(union bpf_attr *attr) 418static int map_create(union bpf_attr *attr)
403{ 419{
@@ -585,8 +601,10 @@ static int map_lookup_elem(union bpf_attr *attr)
585 if (!value) 601 if (!value)
586 goto free_key; 602 goto free_key;
587 603
588 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 604 if (bpf_map_is_dev_bound(map)) {
589 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 605 err = bpf_map_offload_lookup_elem(map, key, value);
606 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
607 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
590 err = bpf_percpu_hash_copy(map, key, value); 608 err = bpf_percpu_hash_copy(map, key, value);
591 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 609 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
592 err = bpf_percpu_array_copy(map, key, value); 610 err = bpf_percpu_array_copy(map, key, value);
@@ -673,7 +691,10 @@ static int map_update_elem(union bpf_attr *attr)
673 goto free_value; 691 goto free_value;
674 692
675 /* Need to create a kthread, thus must support schedule */ 693 /* Need to create a kthread, thus must support schedule */
676 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 694 if (bpf_map_is_dev_bound(map)) {
695 err = bpf_map_offload_update_elem(map, key, value, attr->flags);
696 goto out;
697 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
677 err = map->ops->map_update_elem(map, key, value, attr->flags); 698 err = map->ops->map_update_elem(map, key, value, attr->flags);
678 goto out; 699 goto out;
679 } 700 }
@@ -750,6 +771,11 @@ static int map_delete_elem(union bpf_attr *attr)
750 goto err_put; 771 goto err_put;
751 } 772 }
752 773
774 if (bpf_map_is_dev_bound(map)) {
775 err = bpf_map_offload_delete_elem(map, key);
776 goto out;
777 }
778
753 preempt_disable(); 779 preempt_disable();
754 __this_cpu_inc(bpf_prog_active); 780 __this_cpu_inc(bpf_prog_active);
755 rcu_read_lock(); 781 rcu_read_lock();
@@ -757,7 +783,7 @@ static int map_delete_elem(union bpf_attr *attr)
757 rcu_read_unlock(); 783 rcu_read_unlock();
758 __this_cpu_dec(bpf_prog_active); 784 __this_cpu_dec(bpf_prog_active);
759 preempt_enable(); 785 preempt_enable();
760 786out:
761 if (!err) 787 if (!err)
762 trace_bpf_map_delete_elem(map, ufd, key); 788 trace_bpf_map_delete_elem(map, ufd, key);
763 kfree(key); 789 kfree(key);
@@ -807,9 +833,15 @@ static int map_get_next_key(union bpf_attr *attr)
807 if (!next_key) 833 if (!next_key)
808 goto free_key; 834 goto free_key;
809 835
836 if (bpf_map_is_dev_bound(map)) {
837 err = bpf_map_offload_get_next_key(map, key, next_key);
838 goto out;
839 }
840
810 rcu_read_lock(); 841 rcu_read_lock();
811 err = map->ops->map_get_next_key(map, key, next_key); 842 err = map->ops->map_get_next_key(map, key, next_key);
812 rcu_read_unlock(); 843 rcu_read_unlock();
844out:
813 if (err) 845 if (err)
814 goto free_next_key; 846 goto free_next_key;
815 847