aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/helpers.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-01-31 18:40:04 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-01 14:55:38 -0500
commitd83525ca62cf8ebe3271d14c36fb900c294274a2 (patch)
tree14c11f7a76bf1d9778eaa29a37d734818f02e2e0 /kernel/bpf/helpers.c
parent1832f4ef5867fd3898d8a6c6c1978b75d76fc246 (diff)
bpf: introduce bpf_spin_lock
Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/helpers.c')
-rw-r--r--kernel/bpf/helpers.c80
1 files changed, 80 insertions, 0 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..fbe544761628 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
221 .arg2_type = ARG_CONST_SIZE, 221 .arg2_type = ARG_CONST_SIZE,
222}; 222};
223 223
224#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
225
226static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
227{
228 arch_spinlock_t *l = (void *)lock;
229 union {
230 __u32 val;
231 arch_spinlock_t lock;
232 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
233
234 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
235 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
236 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
237 arch_spin_lock(l);
238}
239
240static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
241{
242 arch_spinlock_t *l = (void *)lock;
243
244 arch_spin_unlock(l);
245}
246
247#else
248
249static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
250{
251 atomic_t *l = (void *)lock;
252
253 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
254 do {
255 atomic_cond_read_relaxed(l, !VAL);
256 } while (atomic_xchg(l, 1));
257}
258
259static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
260{
261 atomic_t *l = (void *)lock;
262
263 atomic_set_release(l, 0);
264}
265
266#endif
267
268static DEFINE_PER_CPU(unsigned long, irqsave_flags);
269
270notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
271{
272 unsigned long flags;
273
274 local_irq_save(flags);
275 __bpf_spin_lock(lock);
276 __this_cpu_write(irqsave_flags, flags);
277 return 0;
278}
279
280const struct bpf_func_proto bpf_spin_lock_proto = {
281 .func = bpf_spin_lock,
282 .gpl_only = false,
283 .ret_type = RET_VOID,
284 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
285};
286
287notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
288{
289 unsigned long flags;
290
291 flags = __this_cpu_read(irqsave_flags);
292 __bpf_spin_unlock(lock);
293 local_irq_restore(flags);
294 return 0;
295}
296
297const struct bpf_func_proto bpf_spin_unlock_proto = {
298 .func = bpf_spin_unlock,
299 .gpl_only = false,
300 .ret_type = RET_VOID,
301 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
302};
303
224#ifdef CONFIG_CGROUPS 304#ifdef CONFIG_CGROUPS
225BPF_CALL_0(bpf_get_current_cgroup_id) 305BPF_CALL_0(bpf_get_current_cgroup_id)
226{ 306{