aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-01-31 18:40:09 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-01 14:55:39 -0500
commit96049f3afd50fe8db69fa0068cdca822e747b1e4 (patch)
treeb082ac077ea0bb78a073a25e540be72034ce0451 /kernel/bpf
parentab963beb9f5db303b4fd7e34e422b96270e5b972 (diff)
bpf: introduce BPF_F_LOCK flag
Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands and for map_update() helper function. In all these cases take a lock of existing element (which was provided in BTF description) before copying (in or out) the rest of map value. Implementation details that are part of uapi: Array: The array map takes the element lock for lookup/update. Hash: hash map also takes the lock for lookup/update and tries to avoid the bucket lock. If old element exists it takes the element lock and updates the element in place. If element doesn't exist it allocates new one and inserts into hash table while holding the bucket lock. In rare case the hashmap has to take both the bucket lock and the element lock to update old value in place. Cgroup local storage: It is similar to array. update in place and lookup are done with lock taken. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c24
-rw-r--r--kernel/bpf/hashtab.c42
-rw-r--r--kernel/bpf/helpers.c16
-rw-r--r--kernel/bpf/local_storage.c14
-rw-r--r--kernel/bpf/syscall.c25
5 files changed, 107 insertions, 14 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d6d979910a2a..c72e0d8e1e65 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
253{ 253{
254 struct bpf_array *array = container_of(map, struct bpf_array, map); 254 struct bpf_array *array = container_of(map, struct bpf_array, map);
255 u32 index = *(u32 *)key; 255 u32 index = *(u32 *)key;
256 char *val;
256 257
257 if (unlikely(map_flags > BPF_EXIST)) 258 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
258 /* unknown flags */ 259 /* unknown flags */
259 return -EINVAL; 260 return -EINVAL;
260 261
@@ -262,18 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
262 /* all elements were pre-allocated, cannot insert a new one */ 263 /* all elements were pre-allocated, cannot insert a new one */
263 return -E2BIG; 264 return -E2BIG;
264 265
265 if (unlikely(map_flags == BPF_NOEXIST)) 266 if (unlikely(map_flags & BPF_NOEXIST))
266 /* all elements already exist */ 267 /* all elements already exist */
267 return -EEXIST; 268 return -EEXIST;
268 269
269 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 270 if (unlikely((map_flags & BPF_F_LOCK) &&
271 !map_value_has_spin_lock(map)))
272 return -EINVAL;
273
274 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
270 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 275 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271 value, map->value_size); 276 value, map->value_size);
272 else 277 } else {
273 copy_map_value(map, 278 val = array->value +
274 array->value + 279 array->elem_size * (index & array->index_mask);
275 array->elem_size * (index & array->index_mask), 280 if (map_flags & BPF_F_LOCK)
276 value); 281 copy_map_value_locked(map, val, value, false);
282 else
283 copy_map_value(map, val, value);
284 }
277 return 0; 285 return 0;
278} 286}
279 287
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6d3b22c5ad68..937776531998 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -804,11 +804,11 @@ dec_count:
804static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 804static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
805 u64 map_flags) 805 u64 map_flags)
806{ 806{
807 if (l_old && map_flags == BPF_NOEXIST) 807 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
808 /* elem already exists */ 808 /* elem already exists */
809 return -EEXIST; 809 return -EEXIST;
810 810
811 if (!l_old && map_flags == BPF_EXIST) 811 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
812 /* elem doesn't exist, cannot update it */ 812 /* elem doesn't exist, cannot update it */
813 return -ENOENT; 813 return -ENOENT;
814 814
@@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
827 u32 key_size, hash; 827 u32 key_size, hash;
828 int ret; 828 int ret;
829 829
830 if (unlikely(map_flags > BPF_EXIST)) 830 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
831 /* unknown flags */ 831 /* unknown flags */
832 return -EINVAL; 832 return -EINVAL;
833 833
@@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
840 b = __select_bucket(htab, hash); 840 b = __select_bucket(htab, hash);
841 head = &b->head; 841 head = &b->head;
842 842
843 if (unlikely(map_flags & BPF_F_LOCK)) {
844 if (unlikely(!map_value_has_spin_lock(map)))
845 return -EINVAL;
846 /* find an element without taking the bucket lock */
847 l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
848 htab->n_buckets);
849 ret = check_flags(htab, l_old, map_flags);
850 if (ret)
851 return ret;
852 if (l_old) {
853 /* grab the element lock and update value in place */
854 copy_map_value_locked(map,
855 l_old->key + round_up(key_size, 8),
856 value, false);
857 return 0;
858 }
859 /* fall through, grab the bucket lock and lookup again.
860 * 99.9% chance that the element won't be found,
861 * but second lookup under lock has to be done.
862 */
863 }
864
843 /* bpf_map_update_elem() can be called in_irq() */ 865 /* bpf_map_update_elem() can be called in_irq() */
844 raw_spin_lock_irqsave(&b->lock, flags); 866 raw_spin_lock_irqsave(&b->lock, flags);
845 867
@@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
849 if (ret) 871 if (ret)
850 goto err; 872 goto err;
851 873
874 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
875 /* first lookup without the bucket lock didn't find the element,
876 * but second lookup with the bucket lock found it.
877 * This case is highly unlikely, but has to be dealt with:
878 * grab the element lock in addition to the bucket lock
879 * and update element in place
880 */
881 copy_map_value_locked(map,
882 l_old->key + round_up(key_size, 8),
883 value, false);
884 ret = 0;
885 goto err;
886 }
887
852 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 888 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
853 l_old); 889 l_old);
854 if (IS_ERR(l_new)) { 890 if (IS_ERR(l_new)) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index fbe544761628..a411fc17d265 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
301 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 301 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
302}; 302};
303 303
304void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
305 bool lock_src)
306{
307 struct bpf_spin_lock *lock;
308
309 if (lock_src)
310 lock = src + map->spin_lock_off;
311 else
312 lock = dst + map->spin_lock_off;
313 preempt_disable();
314 ____bpf_spin_lock(lock);
315 copy_map_value(map, dst, src);
316 ____bpf_spin_unlock(lock);
317 preempt_enable();
318}
319
304#ifdef CONFIG_CGROUPS 320#ifdef CONFIG_CGROUPS
305BPF_CALL_0(bpf_get_current_cgroup_id) 321BPF_CALL_0(bpf_get_current_cgroup_id)
306{ 322{
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 0295427f06e2..6b572e2de7fb 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
131 struct bpf_cgroup_storage *storage; 131 struct bpf_cgroup_storage *storage;
132 struct bpf_storage_buffer *new; 132 struct bpf_storage_buffer *new;
133 133
134 if (flags != BPF_ANY && flags != BPF_EXIST) 134 if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
135 return -EINVAL;
136
137 if (unlikely(flags & BPF_NOEXIST))
138 return -EINVAL;
139
140 if (unlikely((flags & BPF_F_LOCK) &&
141 !map_value_has_spin_lock(map)))
135 return -EINVAL; 142 return -EINVAL;
136 143
137 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, 144 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
139 if (!storage) 146 if (!storage)
140 return -ENOENT; 147 return -ENOENT;
141 148
149 if (flags & BPF_F_LOCK) {
150 copy_map_value_locked(map, storage->buf->data, value, false);
151 return 0;
152 }
153
142 new = kmalloc_node(sizeof(struct bpf_storage_buffer) + 154 new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
143 map->value_size, 155 map->value_size,
144 __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, 156 __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b29e6dc44650..0834958f1dc4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
682} 682}
683 683
684/* last field in 'union bpf_attr' used by this command */ 684/* last field in 'union bpf_attr' used by this command */
685#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 685#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
686 686
687static int map_lookup_elem(union bpf_attr *attr) 687static int map_lookup_elem(union bpf_attr *attr)
688{ 688{
@@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr)
698 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 698 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
699 return -EINVAL; 699 return -EINVAL;
700 700
701 if (attr->flags & ~BPF_F_LOCK)
702 return -EINVAL;
703
701 f = fdget(ufd); 704 f = fdget(ufd);
702 map = __bpf_map_get(f); 705 map = __bpf_map_get(f);
703 if (IS_ERR(map)) 706 if (IS_ERR(map))
@@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr)
708 goto err_put; 711 goto err_put;
709 } 712 }
710 713
714 if ((attr->flags & BPF_F_LOCK) &&
715 !map_value_has_spin_lock(map)) {
716 err = -EINVAL;
717 goto err_put;
718 }
719
711 key = __bpf_copy_key(ukey, map->key_size); 720 key = __bpf_copy_key(ukey, map->key_size);
712 if (IS_ERR(key)) { 721 if (IS_ERR(key)) {
713 err = PTR_ERR(key); 722 err = PTR_ERR(key);
@@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr)
758 err = -ENOENT; 767 err = -ENOENT;
759 } else { 768 } else {
760 err = 0; 769 err = 0;
761 copy_map_value(map, value, ptr); 770 if (attr->flags & BPF_F_LOCK)
771 /* lock 'ptr' and copy everything but lock */
772 copy_map_value_locked(map, value, ptr, true);
773 else
774 copy_map_value(map, value, ptr);
775 /* mask lock, since value wasn't zero inited */
776 check_and_init_map_lock(map, value);
762 } 777 }
763 rcu_read_unlock(); 778 rcu_read_unlock();
764 } 779 }
@@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr)
818 goto err_put; 833 goto err_put;
819 } 834 }
820 835
836 if ((attr->flags & BPF_F_LOCK) &&
837 !map_value_has_spin_lock(map)) {
838 err = -EINVAL;
839 goto err_put;
840 }
841
821 key = __bpf_copy_key(ukey, map->key_size); 842 key = __bpf_copy_key(ukey, map->key_size);
822 if (IS_ERR(key)) { 843 if (IS_ERR(key)) {
823 err = PTR_ERR(key); 844 err = PTR_ERR(key);