aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-01-31 18:40:04 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-01 14:55:38 -0500
commitd83525ca62cf8ebe3271d14c36fb900c294274a2 (patch)
tree14c11f7a76bf1d9778eaa29a37d734818f02e2e0 /kernel
parent1832f4ef5867fd3898d8a6c6c1978b75d76fc246 (diff)
bpf: introduce bpf_spin_lock
Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks3
-rw-r--r--kernel/bpf/arraymap.c7
-rw-r--r--kernel/bpf/btf.c42
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/hashtab.c21
-rw-r--r--kernel/bpf/helpers.c80
-rw-r--r--kernel/bpf/map_in_map.c5
-rw-r--r--kernel/bpf/syscall.c21
-rw-r--r--kernel/bpf/verifier.c169
9 files changed, 331 insertions, 19 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 84d882f3e299..fbba478ae522 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS
242 def_bool y if ARCH_USE_QUEUED_SPINLOCKS 242 def_bool y if ARCH_USE_QUEUED_SPINLOCKS
243 depends on SMP 243 depends on SMP
244 244
245config BPF_ARCH_SPINLOCK
246 bool
247
245config ARCH_USE_QUEUED_RWLOCKS 248config ARCH_USE_QUEUED_RWLOCKS
246 bool 249 bool
247 250
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 25632a75d630..d6d979910a2a 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -270,9 +270,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
270 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 270 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271 value, map->value_size); 271 value, map->value_size);
272 else 272 else
273 memcpy(array->value + 273 copy_map_value(map,
274 array->elem_size * (index & array->index_mask), 274 array->value +
275 value, map->value_size); 275 array->elem_size * (index & array->index_mask),
276 value);
276 return 0; 277 return 0;
277} 278}
278 279
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 3d661f0606fe..7019c1f05cab 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t)
355 return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; 355 return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
356} 356}
357 357
358static bool __btf_type_is_struct(const struct btf_type *t)
359{
360 return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
361}
362
358static bool btf_type_is_array(const struct btf_type *t) 363static bool btf_type_is_array(const struct btf_type *t)
359{ 364{
360 return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; 365 return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
@@ -2045,6 +2050,43 @@ static void btf_struct_log(struct btf_verifier_env *env,
2045 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); 2050 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
2046} 2051}
2047 2052
2053/* find 'struct bpf_spin_lock' in map value.
2054 * return >= 0 offset if found
2055 * and < 0 in case of error
2056 */
2057int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
2058{
2059 const struct btf_member *member;
2060 u32 i, off = -ENOENT;
2061
2062 if (!__btf_type_is_struct(t))
2063 return -EINVAL;
2064
2065 for_each_member(i, t, member) {
2066 const struct btf_type *member_type = btf_type_by_id(btf,
2067 member->type);
2068 if (!__btf_type_is_struct(member_type))
2069 continue;
2070 if (member_type->size != sizeof(struct bpf_spin_lock))
2071 continue;
2072 if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
2073 "bpf_spin_lock"))
2074 continue;
2075 if (off != -ENOENT)
2076 /* only one 'struct bpf_spin_lock' is allowed */
2077 return -E2BIG;
2078 off = btf_member_bit_offset(t, member);
2079 if (off % 8)
2080 /* valid C code cannot generate such BTF */
2081 return -EINVAL;
2082 off /= 8;
2083 if (off % __alignof__(struct bpf_spin_lock))
2084 /* valid struct bpf_spin_lock will be 4 byte aligned */
2085 return -EINVAL;
2086 }
2087 return off;
2088}
2089
2048static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, 2090static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
2049 u32 type_id, void *data, u8 bits_offset, 2091 u32 type_id, void *data, u8 bits_offset,
2050 struct seq_file *m) 2092 struct seq_file *m)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f13c543b7b36..ef88b167959d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2002,6 +2002,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2002const struct bpf_func_proto bpf_map_push_elem_proto __weak; 2002const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2003const struct bpf_func_proto bpf_map_pop_elem_proto __weak; 2003const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2004const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 2004const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2005const struct bpf_func_proto bpf_spin_lock_proto __weak;
2006const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2005 2007
2006const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 2008const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
2007const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; 2009const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..6d3b22c5ad68 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
718 BITS_PER_LONG == 64; 718 BITS_PER_LONG == 64;
719} 719}
720 720
721static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
722{
723 u32 size = htab->map.value_size;
724
725 if (percpu || fd_htab_map_needs_adjust(htab))
726 size = round_up(size, 8);
727 return size;
728}
729
730static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 721static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
731 void *value, u32 key_size, u32 hash, 722 void *value, u32 key_size, u32 hash,
732 bool percpu, bool onallcpus, 723 bool percpu, bool onallcpus,
733 struct htab_elem *old_elem) 724 struct htab_elem *old_elem)
734{ 725{
735 u32 size = htab_size_value(htab, percpu); 726 u32 size = htab->map.value_size;
736 bool prealloc = htab_is_prealloc(htab); 727 bool prealloc = htab_is_prealloc(htab);
737 struct htab_elem *l_new, **pl_new; 728 struct htab_elem *l_new, **pl_new;
738 void __percpu *pptr; 729 void __percpu *pptr;
@@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
770 l_new = ERR_PTR(-ENOMEM); 761 l_new = ERR_PTR(-ENOMEM);
771 goto dec_count; 762 goto dec_count;
772 } 763 }
764 check_and_init_map_lock(&htab->map,
765 l_new->key + round_up(key_size, 8));
773 } 766 }
774 767
775 memcpy(l_new->key, key, key_size); 768 memcpy(l_new->key, key, key_size);
776 if (percpu) { 769 if (percpu) {
770 size = round_up(size, 8);
777 if (prealloc) { 771 if (prealloc) {
778 pptr = htab_elem_get_ptr(l_new, key_size); 772 pptr = htab_elem_get_ptr(l_new, key_size);
779 } else { 773 } else {
@@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
791 785
792 if (!prealloc) 786 if (!prealloc)
793 htab_elem_set_ptr(l_new, key_size, pptr); 787 htab_elem_set_ptr(l_new, key_size, pptr);
794 } else { 788 } else if (fd_htab_map_needs_adjust(htab)) {
789 size = round_up(size, 8);
795 memcpy(l_new->key + round_up(key_size, 8), value, size); 790 memcpy(l_new->key + round_up(key_size, 8), value, size);
791 } else {
792 copy_map_value(&htab->map,
793 l_new->key + round_up(key_size, 8),
794 value);
796 } 795 }
797 796
798 l_new->hash = hash; 797 l_new->hash = hash;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..fbe544761628 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
221 .arg2_type = ARG_CONST_SIZE, 221 .arg2_type = ARG_CONST_SIZE,
222}; 222};
223 223
224#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
225
226static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
227{
228 arch_spinlock_t *l = (void *)lock;
229 union {
230 __u32 val;
231 arch_spinlock_t lock;
232 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
233
234 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
235 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
236 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
237 arch_spin_lock(l);
238}
239
240static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
241{
242 arch_spinlock_t *l = (void *)lock;
243
244 arch_spin_unlock(l);
245}
246
247#else
248
249static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
250{
251 atomic_t *l = (void *)lock;
252
253 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
254 do {
255 atomic_cond_read_relaxed(l, !VAL);
256 } while (atomic_xchg(l, 1));
257}
258
259static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
260{
261 atomic_t *l = (void *)lock;
262
263 atomic_set_release(l, 0);
264}
265
266#endif
267
268static DEFINE_PER_CPU(unsigned long, irqsave_flags);
269
270notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
271{
272 unsigned long flags;
273
274 local_irq_save(flags);
275 __bpf_spin_lock(lock);
276 __this_cpu_write(irqsave_flags, flags);
277 return 0;
278}
279
280const struct bpf_func_proto bpf_spin_lock_proto = {
281 .func = bpf_spin_lock,
282 .gpl_only = false,
283 .ret_type = RET_VOID,
284 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
285};
286
287notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
288{
289 unsigned long flags;
290
291 flags = __this_cpu_read(irqsave_flags);
292 __bpf_spin_unlock(lock);
293 local_irq_restore(flags);
294 return 0;
295}
296
297const struct bpf_func_proto bpf_spin_unlock_proto = {
298 .func = bpf_spin_unlock,
299 .gpl_only = false,
300 .ret_type = RET_VOID,
301 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
302};
303
224#ifdef CONFIG_CGROUPS 304#ifdef CONFIG_CGROUPS
225BPF_CALL_0(bpf_get_current_cgroup_id) 305BPF_CALL_0(bpf_get_current_cgroup_id)
226{ 306{
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 52378d3e34b3..583346a0ab29 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 } 38 }
39 39
40 if (map_value_has_spin_lock(inner_map)) {
41 fdput(f);
42 return ERR_PTR(-ENOTSUPP);
43 }
44
40 inner_map_meta_size = sizeof(*inner_map_meta); 45 inner_map_meta_size = sizeof(*inner_map_meta);
41 /* In some cases verifier needs to access beyond just base map. */ 46 /* In some cases verifier needs to access beyond just base map. */
42 if (inner_map->ops == &array_map_ops) 47 if (inner_map->ops == &array_map_ops)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..ebf0a673cb83 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map,
463 return -ENOTSUPP; 463 return -ENOTSUPP;
464} 464}
465 465
466static int map_check_btf(const struct bpf_map *map, const struct btf *btf, 466static int map_check_btf(struct bpf_map *map, const struct btf *btf,
467 u32 btf_key_id, u32 btf_value_id) 467 u32 btf_key_id, u32 btf_value_id)
468{ 468{
469 const struct btf_type *key_type, *value_type; 469 const struct btf_type *key_type, *value_type;
@@ -478,6 +478,21 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
478 if (!value_type || value_size != map->value_size) 478 if (!value_type || value_size != map->value_size)
479 return -EINVAL; 479 return -EINVAL;
480 480
481 map->spin_lock_off = btf_find_spin_lock(btf, value_type);
482
483 if (map_value_has_spin_lock(map)) {
484 if (map->map_type != BPF_MAP_TYPE_HASH &&
485 map->map_type != BPF_MAP_TYPE_ARRAY)
486 return -ENOTSUPP;
487 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
488 map->value_size) {
489 WARN_ONCE(1,
490 "verifier bug spin_lock_off %d value_size %d\n",
491 map->spin_lock_off, map->value_size);
492 return -EFAULT;
493 }
494 }
495
481 if (map->ops->map_check_btf) 496 if (map->ops->map_check_btf)
482 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 497 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
483 498
@@ -542,6 +557,8 @@ static int map_create(union bpf_attr *attr)
542 map->btf = btf; 557 map->btf = btf;
543 map->btf_key_type_id = attr->btf_key_type_id; 558 map->btf_key_type_id = attr->btf_key_type_id;
544 map->btf_value_type_id = attr->btf_value_type_id; 559 map->btf_value_type_id = attr->btf_value_type_id;
560 } else {
561 map->spin_lock_off = -EINVAL;
545 } 562 }
546 563
547 err = security_bpf_map_alloc(map); 564 err = security_bpf_map_alloc(map);
@@ -740,7 +757,7 @@ static int map_lookup_elem(union bpf_attr *attr)
740 err = -ENOENT; 757 err = -ENOENT;
741 } else { 758 } else {
742 err = 0; 759 err = 0;
743 memcpy(value, ptr, value_size); 760 copy_map_value(map, value, ptr);
744 } 761 }
745 rcu_read_unlock(); 762 rcu_read_unlock();
746 } 763 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8c1c21cd50b4..38892bdee651 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -213,6 +213,7 @@ struct bpf_call_arg_meta {
213 s64 msize_smax_value; 213 s64 msize_smax_value;
214 u64 msize_umax_value; 214 u64 msize_umax_value;
215 int ptr_id; 215 int ptr_id;
216 int func_id;
216}; 217};
217 218
218static DEFINE_MUTEX(bpf_verifier_lock); 219static DEFINE_MUTEX(bpf_verifier_lock);
@@ -351,6 +352,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg)
351 return type_is_refcounted(reg->type); 352 return type_is_refcounted(reg->type);
352} 353}
353 354
355static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
356{
357 return reg->type == PTR_TO_MAP_VALUE &&
358 map_value_has_spin_lock(reg->map_ptr);
359}
360
354static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) 361static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
355{ 362{
356 return type_is_refcounted_or_null(reg->type); 363 return type_is_refcounted_or_null(reg->type);
@@ -712,6 +719,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
712 } 719 }
713 dst_state->speculative = src->speculative; 720 dst_state->speculative = src->speculative;
714 dst_state->curframe = src->curframe; 721 dst_state->curframe = src->curframe;
722 dst_state->active_spin_lock = src->active_spin_lock;
715 for (i = 0; i <= src->curframe; i++) { 723 for (i = 0; i <= src->curframe; i++) {
716 dst = dst_state->frame[i]; 724 dst = dst_state->frame[i];
717 if (!dst) { 725 if (!dst) {
@@ -1483,6 +1491,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1483 if (err) 1491 if (err)
1484 verbose(env, "R%d max value is outside of the array range\n", 1492 verbose(env, "R%d max value is outside of the array range\n",
1485 regno); 1493 regno);
1494
1495 if (map_value_has_spin_lock(reg->map_ptr)) {
1496 u32 lock = reg->map_ptr->spin_lock_off;
1497
1498 /* if any part of struct bpf_spin_lock can be touched by
1499 * load/store reject this program.
1500 * To check that [x1, x2) overlaps with [y1, y2)
1501 * it is sufficient to check x1 < y2 && y1 < x2.
1502 */
1503 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1504 lock < reg->umax_value + off + size) {
1505 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1506 return -EACCES;
1507 }
1508 }
1486 return err; 1509 return err;
1487} 1510}
1488 1511
@@ -2192,6 +2215,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2192 } 2215 }
2193} 2216}
2194 2217
2218/* Implementation details:
2219 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2220 * Two bpf_map_lookups (even with the same key) will have different reg->id.
2221 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2222 * value_or_null->value transition, since the verifier only cares about
2223 * the range of access to valid map value pointer and doesn't care about actual
2224 * address of the map element.
2225 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2226 * reg->id > 0 after value_or_null->value transition. By doing so
2227 * two bpf_map_lookups will be considered two different pointers that
2228 * point to different bpf_spin_locks.
2229 * The verifier allows taking only one bpf_spin_lock at a time to avoid
2230 * dead-locks.
2231 * Since only one bpf_spin_lock is allowed the checks are simpler than
2232 * reg_is_refcounted() logic. The verifier needs to remember only
2233 * one spin_lock instead of array of acquired_refs.
2234 * cur_state->active_spin_lock remembers which map value element got locked
2235 * and clears it after bpf_spin_unlock.
2236 */
2237static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2238 bool is_lock)
2239{
2240 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2241 struct bpf_verifier_state *cur = env->cur_state;
2242 bool is_const = tnum_is_const(reg->var_off);
2243 struct bpf_map *map = reg->map_ptr;
2244 u64 val = reg->var_off.value;
2245
2246 if (reg->type != PTR_TO_MAP_VALUE) {
2247 verbose(env, "R%d is not a pointer to map_value\n", regno);
2248 return -EINVAL;
2249 }
2250 if (!is_const) {
2251 verbose(env,
2252 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2253 regno);
2254 return -EINVAL;
2255 }
2256 if (!map->btf) {
2257 verbose(env,
2258 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
2259 map->name);
2260 return -EINVAL;
2261 }
2262 if (!map_value_has_spin_lock(map)) {
2263 if (map->spin_lock_off == -E2BIG)
2264 verbose(env,
2265 "map '%s' has more than one 'struct bpf_spin_lock'\n",
2266 map->name);
2267 else if (map->spin_lock_off == -ENOENT)
2268 verbose(env,
2269 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
2270 map->name);
2271 else
2272 verbose(env,
2273 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2274 map->name);
2275 return -EINVAL;
2276 }
2277 if (map->spin_lock_off != val + reg->off) {
2278 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2279 val + reg->off);
2280 return -EINVAL;
2281 }
2282 if (is_lock) {
2283 if (cur->active_spin_lock) {
2284 verbose(env,
2285 "Locking two bpf_spin_locks are not allowed\n");
2286 return -EINVAL;
2287 }
2288 cur->active_spin_lock = reg->id;
2289 } else {
2290 if (!cur->active_spin_lock) {
2291 verbose(env, "bpf_spin_unlock without taking a lock\n");
2292 return -EINVAL;
2293 }
2294 if (cur->active_spin_lock != reg->id) {
2295 verbose(env, "bpf_spin_unlock of different lock\n");
2296 return -EINVAL;
2297 }
2298 cur->active_spin_lock = 0;
2299 }
2300 return 0;
2301}
2302
2195static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 2303static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2196{ 2304{
2197 return type == ARG_PTR_TO_MEM || 2305 return type == ARG_PTR_TO_MEM ||
@@ -2268,6 +2376,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2268 return -EFAULT; 2376 return -EFAULT;
2269 } 2377 }
2270 meta->ptr_id = reg->id; 2378 meta->ptr_id = reg->id;
2379 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2380 if (meta->func_id == BPF_FUNC_spin_lock) {
2381 if (process_spin_lock(env, regno, true))
2382 return -EACCES;
2383 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
2384 if (process_spin_lock(env, regno, false))
2385 return -EACCES;
2386 } else {
2387 verbose(env, "verifier internal error\n");
2388 return -EFAULT;
2389 }
2271 } else if (arg_type_is_mem_ptr(arg_type)) { 2390 } else if (arg_type_is_mem_ptr(arg_type)) {
2272 expected_type = PTR_TO_STACK; 2391 expected_type = PTR_TO_STACK;
2273 /* One exception here. In case function allows for NULL to be 2392 /* One exception here. In case function allows for NULL to be
@@ -2887,6 +3006,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2887 return err; 3006 return err;
2888 } 3007 }
2889 3008
3009 meta.func_id = func_id;
2890 /* check args */ 3010 /* check args */
2891 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); 3011 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2892 if (err) 3012 if (err)
@@ -4473,7 +4593,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4473 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 4593 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4474 reg->type = PTR_TO_SOCKET; 4594 reg->type = PTR_TO_SOCKET;
4475 } 4595 }
4476 if (is_null || !reg_is_refcounted(reg)) { 4596 if (is_null || !(reg_is_refcounted(reg) ||
4597 reg_may_point_to_spin_lock(reg))) {
4477 /* We don't need id from this point onwards anymore, 4598 /* We don't need id from this point onwards anymore,
4478 * thus we should better reset it, so that state 4599 * thus we should better reset it, so that state
4479 * pruning has chances to take effect. 4600 * pruning has chances to take effect.
@@ -4871,6 +4992,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
4871 return err; 4992 return err;
4872 } 4993 }
4873 4994
4995 if (env->cur_state->active_spin_lock) {
4996 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
4997 return -EINVAL;
4998 }
4999
4874 if (regs[BPF_REG_6].type != PTR_TO_CTX) { 5000 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
4875 verbose(env, 5001 verbose(env,
4876 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 5002 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -5607,8 +5733,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5607 case PTR_TO_MAP_VALUE: 5733 case PTR_TO_MAP_VALUE:
5608 /* If the new min/max/var_off satisfy the old ones and 5734 /* If the new min/max/var_off satisfy the old ones and
5609 * everything else matches, we are OK. 5735 * everything else matches, we are OK.
5610 * We don't care about the 'id' value, because nothing 5736 * 'id' is not compared, since it's only used for maps with
5611 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) 5737 * bpf_spin_lock inside map element and in such cases if
5738 * the rest of the prog is valid for one map element then
5739 * it's valid for all map elements regardless of the key
5740 * used in bpf_map_lookup()
5612 */ 5741 */
5613 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 5742 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5614 range_within(rold, rcur) && 5743 range_within(rold, rcur) &&
@@ -5811,6 +5940,9 @@ static bool states_equal(struct bpf_verifier_env *env,
5811 if (old->speculative && !cur->speculative) 5940 if (old->speculative && !cur->speculative)
5812 return false; 5941 return false;
5813 5942
5943 if (old->active_spin_lock != cur->active_spin_lock)
5944 return false;
5945
5814 /* for states to be equal callsites have to be the same 5946 /* for states to be equal callsites have to be the same
5815 * and all frame states need to be equivalent 5947 * and all frame states need to be equivalent
5816 */ 5948 */
@@ -6229,6 +6361,12 @@ static int do_check(struct bpf_verifier_env *env)
6229 return -EINVAL; 6361 return -EINVAL;
6230 } 6362 }
6231 6363
6364 if (env->cur_state->active_spin_lock &&
6365 (insn->src_reg == BPF_PSEUDO_CALL ||
6366 insn->imm != BPF_FUNC_spin_unlock)) {
6367 verbose(env, "function calls are not allowed while holding a lock\n");
6368 return -EINVAL;
6369 }
6232 if (insn->src_reg == BPF_PSEUDO_CALL) 6370 if (insn->src_reg == BPF_PSEUDO_CALL)
6233 err = check_func_call(env, insn, &env->insn_idx); 6371 err = check_func_call(env, insn, &env->insn_idx);
6234 else 6372 else
@@ -6259,6 +6397,11 @@ static int do_check(struct bpf_verifier_env *env)
6259 return -EINVAL; 6397 return -EINVAL;
6260 } 6398 }
6261 6399
6400 if (env->cur_state->active_spin_lock) {
6401 verbose(env, "bpf_spin_unlock is missing\n");
6402 return -EINVAL;
6403 }
6404
6262 if (state->curframe) { 6405 if (state->curframe) {
6263 /* exit from nested function */ 6406 /* exit from nested function */
6264 env->prev_insn_idx = env->insn_idx; 6407 env->prev_insn_idx = env->insn_idx;
@@ -6356,6 +6499,19 @@ static int check_map_prealloc(struct bpf_map *map)
6356 !(map->map_flags & BPF_F_NO_PREALLOC); 6499 !(map->map_flags & BPF_F_NO_PREALLOC);
6357} 6500}
6358 6501
6502static bool is_tracing_prog_type(enum bpf_prog_type type)
6503{
6504 switch (type) {
6505 case BPF_PROG_TYPE_KPROBE:
6506 case BPF_PROG_TYPE_TRACEPOINT:
6507 case BPF_PROG_TYPE_PERF_EVENT:
6508 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6509 return true;
6510 default:
6511 return false;
6512 }
6513}
6514
6359static int check_map_prog_compatibility(struct bpf_verifier_env *env, 6515static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6360 struct bpf_map *map, 6516 struct bpf_map *map,
6361 struct bpf_prog *prog) 6517 struct bpf_prog *prog)
@@ -6378,6 +6534,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6378 } 6534 }
6379 } 6535 }
6380 6536
6537 if ((is_tracing_prog_type(prog->type) ||
6538 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6539 map_value_has_spin_lock(map)) {
6540 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6541 return -EINVAL;
6542 }
6543
6381 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 6544 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
6382 !bpf_offload_prog_map_match(prog, map)) { 6545 !bpf_offload_prog_map_match(prog, map)) {
6383 verbose(env, "offload device mismatch between prog and map\n"); 6546 verbose(env, "offload device mismatch between prog and map\n");