bpf: introduce bpf_spin_lock

Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
author: Alexei Starovoitov <ast@kernel.org> 2019-01-31 18:40:04 -0500
committer: Daniel Borkmann <daniel@iogearbox.net> 2019-02-01 14:55:38 -0500
commit: d83525ca62cf8ebe3271d14c36fb900c294274a2 (patch)
tree: 14c11f7a76bf1d9778eaa29a37d734818f02e2e0 /kernel/bpf/helpers.c
parent: 1832f4ef5867fd3898d8a6c6c1978b75d76fc246 (diff)
1 files changed, 80 insertions, 0 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..fbe544761628 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
        .arg2_type      = ARG_CONST_SIZE,
 };
+#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+        arch_spinlock_t *l = (void *)lock;
+        union {
+                __u32 val;
+                arch_spinlock_t lock;
+        } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
+        compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
+        BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
+        BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
+        arch_spin_lock(l);
+}
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+        arch_spinlock_t *l = (void *)lock;
+        arch_spin_unlock(l);
+}
+#else
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+        atomic_t *l = (void *)lock;
+        BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
+        do {
+                atomic_cond_read_relaxed(l, !VAL);
+        } while (atomic_xchg(l, 1));
+}
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+        atomic_t *l = (void *)lock;
+        atomic_set_release(l, 0);
+}
+#endif
+static DEFINE_PER_CPU(unsigned long, irqsave_flags);
+notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __bpf_spin_lock(lock);
+        __this_cpu_write(irqsave_flags, flags);
+        return 0;
+}
+const struct bpf_func_proto bpf_spin_lock_proto = {
+        .func           = bpf_spin_lock,
+        .gpl_only       = false,
+        .ret_type       = RET_VOID,
+        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
+};
+notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+        unsigned long flags;
+        flags = __this_cpu_read(irqsave_flags);
+        __bpf_spin_unlock(lock);
+        local_irq_restore(flags);
+        return 0;
+}
+const struct bpf_func_proto bpf_spin_unlock_proto = {
+        .func           = bpf_spin_unlock,
+        .gpl_only       = false,
+        .ret_type       = RET_VOID,
+        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
+};
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
author	Alexei Starovoitov <ast@kernel.org>	2019-01-31 18:40:04 -0500
committer	Daniel Borkmann <daniel@iogearbox.net>	2019-02-01 14:55:38 -0500
commit	d83525ca62cf8ebe3271d14c36fb900c294274a2 (patch)
tree	14c11f7a76bf1d9778eaa29a37d734818f02e2e0 /kernel/bpf/helpers.c
parent	1832f4ef5867fd3898d8a6c6c1978b75d76fc246 (diff)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a74972b07e74..fbe544761628 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
221	.arg2_type = ARG_CONST_SIZE,	221	.arg2_type = ARG_CONST_SIZE,
222	};	222	};
223		223
		224	#if defined(CONFIG_QUEUED_SPINLOCKS) \|\| defined(CONFIG_BPF_ARCH_SPINLOCK)
		225
		226	static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
		227	{
		228	arch_spinlock_t l = (void )lock;
		229	union {
		230	__u32 val;
		231	arch_spinlock_t lock;
		232	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
		233
		234	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
		235	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
		236	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
		237	arch_spin_lock(l);
		238	}
		239
		240	static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
		241	{
		242	arch_spinlock_t l = (void )lock;
		243
		244	arch_spin_unlock(l);
		245	}
		246
		247	#else
		248
		249	static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
		250	{
		251	atomic_t l = (void )lock;
		252
		253	BUILD_BUG_ON(sizeof(l) != sizeof(lock));
		254	do {
		255	atomic_cond_read_relaxed(l, !VAL);
		256	} while (atomic_xchg(l, 1));
		257	}
		258
		259	static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
		260	{
		261	atomic_t l = (void )lock;
		262
		263	atomic_set_release(l, 0);
		264	}
		265
		266	#endif
		267
		268	static DEFINE_PER_CPU(unsigned long, irqsave_flags);
		269
		270	notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
		271	{
		272	unsigned long flags;
		273
		274	local_irq_save(flags);
		275	__bpf_spin_lock(lock);
		276	__this_cpu_write(irqsave_flags, flags);
		277	return 0;
		278	}
		279
		280	const struct bpf_func_proto bpf_spin_lock_proto = {
		281	.func = bpf_spin_lock,
		282	.gpl_only = false,
		283	.ret_type = RET_VOID,
		284	.arg1_type = ARG_PTR_TO_SPIN_LOCK,
		285	};
		286
		287	notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
		288	{
		289	unsigned long flags;
		290
		291	flags = __this_cpu_read(irqsave_flags);
		292	__bpf_spin_unlock(lock);
		293	local_irq_restore(flags);
		294	return 0;
		295	}
		296
		297	const struct bpf_func_proto bpf_spin_unlock_proto = {
		298	.func = bpf_spin_unlock,
		299	.gpl_only = false,
		300	.ret_type = RET_VOID,
		301	.arg1_type = ARG_PTR_TO_SPIN_LOCK,
		302	};
		303
224	#ifdef CONFIG_CGROUPS	304	#ifdef CONFIG_CGROUPS
225	BPF_CALL_0(bpf_get_current_cgroup_id)	305	BPF_CALL_0(bpf_get_current_cgroup_id)
226	{	306	{