aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 11:26:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 11:26:13 -0500
commit6456300356433873309a1cae6aa05e77d6b59153 (patch)
tree3158f04f2ca63a48e4d3021aba31aee8f18221cf /kernel
parentcd2a3bf02625ffad02a6b9f7df758ee36cf12769 (diff)
parent18a4d8bf250a33c015955f0dec27259780ef6448 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Here we go, another merge window full of networking and #ebpf changes: 1) Snoop DHCPACKS in batman-adv to learn MAC/IP pairs in the DHCP range without dealing with floods of ARP traffic, from Linus Lüssing. 2) Throttle buffered multicast packet transmission in mt76, from Felix Fietkau. 3) Support adaptive interrupt moderation in ice, from Brett Creeley. 4) A lot of struct_size conversions, from Gustavo A. R. Silva. 5) Add peek/push/pop commands to bpftool, as well as bash completion, from Stanislav Fomichev. 6) Optimize sk_msg_clone(), from Vakul Garg. 7) Add SO_BINDTOIFINDEX, from David Herrmann. 8) Be more conservative with local resends due to local congestion, from Yuchung Cheng. 9) Allow vetoing of unsupported VXLAN FDBs, from Petr Machata. 10) Add health buffer support to devlink, from Eran Ben Elisha. 11) Add TXQ scheduling API to mac80211, from Toke Høiland-Jørgensen. 12) Add statistics to basic packet scheduler filter, from Cong Wang. 13) Add GRE tunnel support for mlxsw Spectrum-2, from Nir Dotan. 14) Lots of new IP tunneling forwarding tests, also from Nir Dotan. 15) Add 3ad stats to bonding, from Nikolay Aleksandrov. 16) Lots of probing improvements for bpftool, from Quentin Monnet. 17) Various nfp drive #ebpf JIT improvements from Jakub Kicinski. 18) Allow #ebpf programs to access gso_segs from skb shared info, from Eric Dumazet. 19) Add sock_diag support for AF_XDP sockets, from Björn Töpel. 20) Support 22260 iwlwifi devices, from Luca Coelho. 21) Use rbtree for ipv6 defragmentation, from Peter Oskolkov. 22) Add JMP32 instruction class support to #ebpf, from Jiong Wang. 23) Add spinlock support to #ebpf, from Alexei Starovoitov. 24) Support 256-bit keys and TLS 1.3 in ktls, from Dave Watson. 25) Add device infomation API to devlink, from Jakub Kicinski. 26) Add new timestamping socket options which are y2038 safe, from Deepa Dinamani. 27) Add RX checksum offloading for various sh_eth chips, from Sergei Shtylyov. 28) Flow offload infrastructure, from Pablo Neira Ayuso. 29) Numerous cleanups, improvements, and bug fixes to the PHY layer and many drivers from Heiner Kallweit. 30) Lots of changes to try and make packet scheduler classifiers run lockless as much as possible, from Vlad Buslov. 31) Support BCM957504 chip in bnxt_en driver, from Erik Burrows. 32) Add concurrency tests to tc-tests infrastructure, from Vlad Buslov. 33) Add hwmon support to aquantia, from Heiner Kallweit. 34) Allow 64-bit values for SO_MAX_PACING_RATE, from Eric Dumazet. And I would be remiss if I didn't thank the various major networking subsystem maintainers for integrating much of this work before I even saw it. Alexei Starovoitov, Daniel Borkmann, Pablo Neira Ayuso, Johannes Berg, Kalle Valo, and many others. Thank you!" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2207 commits) net/sched: avoid unused-label warning net: ignore sysctl_devconf_inherit_init_net without SYSCTL phy: mdio-mux: fix Kconfig dependencies net: phy: use phy_modify_mmd_changed in genphy_c45_an_config_aneg net: dsa: mv88e6xxx: add call to mv88e6xxx_ports_cmode_init to probe for new DSA framework selftest/net: Remove duplicate header sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79 net/mlx5e: Update tx reporter status in case channels were successfully opened devlink: Add support for direct reporter health state update devlink: Update reporter state to error even if recover aborted sctp: call iov_iter_revert() after sending ABORT team: Free BPF filter when unregistering netdev ip6mr: Do not call __IP6_INC_STATS() from preemptible context isdn: mISDN: Fix potential NULL pointer dereference of kzalloc net: dsa: mv88e6xxx: support in-band signalling on SGMII ports with external PHYs cxgb4/chtls: Prefix adapter flags with CXGB4 net-sysfs: Switch to bitmap_zalloc() mellanox: Switch to bitmap_zalloc() bpf: add test cases for non-pointer sanitiation logic mlxsw: i2c: Extend initialization by querying resources data ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks3
-rw-r--r--kernel/bpf/arraymap.c23
-rw-r--r--kernel/bpf/btf.c146
-rw-r--r--kernel/bpf/cgroup.c3
-rw-r--r--kernel/bpf/core.c315
-rw-r--r--kernel/bpf/disasm.c34
-rw-r--r--kernel/bpf/hashtab.c63
-rw-r--r--kernel/bpf/helpers.c96
-rw-r--r--kernel/bpf/local_storage.c16
-rw-r--r--kernel/bpf/map_in_map.c6
-rw-r--r--kernel/bpf/offload.c45
-rw-r--r--kernel/bpf/syscall.c84
-rw-r--r--kernel/bpf/verifier.c961
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/sched/core.c28
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/sysctl.c37
17 files changed, 1491 insertions, 373 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 84d882f3e299..fbba478ae522 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS
242 def_bool y if ARCH_USE_QUEUED_SPINLOCKS 242 def_bool y if ARCH_USE_QUEUED_SPINLOCKS
243 depends on SMP 243 depends on SMP
244 244
245config BPF_ARCH_SPINLOCK
246 bool
247
245config ARCH_USE_QUEUED_RWLOCKS 248config ARCH_USE_QUEUED_RWLOCKS
246 bool 249 bool
247 250
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 25632a75d630..c72e0d8e1e65 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
253{ 253{
254 struct bpf_array *array = container_of(map, struct bpf_array, map); 254 struct bpf_array *array = container_of(map, struct bpf_array, map);
255 u32 index = *(u32 *)key; 255 u32 index = *(u32 *)key;
256 char *val;
256 257
257 if (unlikely(map_flags > BPF_EXIST)) 258 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
258 /* unknown flags */ 259 /* unknown flags */
259 return -EINVAL; 260 return -EINVAL;
260 261
@@ -262,17 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
262 /* all elements were pre-allocated, cannot insert a new one */ 263 /* all elements were pre-allocated, cannot insert a new one */
263 return -E2BIG; 264 return -E2BIG;
264 265
265 if (unlikely(map_flags == BPF_NOEXIST)) 266 if (unlikely(map_flags & BPF_NOEXIST))
266 /* all elements already exist */ 267 /* all elements already exist */
267 return -EEXIST; 268 return -EEXIST;
268 269
269 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 270 if (unlikely((map_flags & BPF_F_LOCK) &&
271 !map_value_has_spin_lock(map)))
272 return -EINVAL;
273
274 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
270 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 275 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271 value, map->value_size); 276 value, map->value_size);
272 else 277 } else {
273 memcpy(array->value + 278 val = array->value +
274 array->elem_size * (index & array->index_mask), 279 array->elem_size * (index & array->index_mask);
275 value, map->value_size); 280 if (map_flags & BPF_F_LOCK)
281 copy_map_value_locked(map, val, value, false);
282 else
283 copy_map_value(map, val, value);
284 }
276 return 0; 285 return 0;
277} 286}
278 287
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c57bd10340ed..bd3921b1514b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -157,7 +157,7 @@
157 * 157 *
158 */ 158 */
159 159
160#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) 160#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
161#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) 161#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
162#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) 162#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
163#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) 163#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
@@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t)
355 return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; 355 return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
356} 356}
357 357
358static bool __btf_type_is_struct(const struct btf_type *t)
359{
360 return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
361}
362
358static bool btf_type_is_array(const struct btf_type *t) 363static bool btf_type_is_array(const struct btf_type *t)
359{ 364{
360 return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; 365 return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
@@ -525,7 +530,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
525 530
526/* 531/*
527 * Regular int is not a bit field and it must be either 532 * Regular int is not a bit field and it must be either
528 * u8/u16/u32/u64. 533 * u8/u16/u32/u64 or __int128.
529 */ 534 */
530static bool btf_type_int_is_regular(const struct btf_type *t) 535static bool btf_type_int_is_regular(const struct btf_type *t)
531{ 536{
@@ -538,7 +543,8 @@ static bool btf_type_int_is_regular(const struct btf_type *t)
538 if (BITS_PER_BYTE_MASKED(nr_bits) || 543 if (BITS_PER_BYTE_MASKED(nr_bits) ||
539 BTF_INT_OFFSET(int_data) || 544 BTF_INT_OFFSET(int_data) ||
540 (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && 545 (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
541 nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { 546 nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
547 nr_bytes != (2 * sizeof(u64)))) {
542 return false; 548 return false;
543 } 549 }
544 550
@@ -1063,9 +1069,9 @@ static int btf_int_check_member(struct btf_verifier_env *env,
1063 nr_copy_bits = BTF_INT_BITS(int_data) + 1069 nr_copy_bits = BTF_INT_BITS(int_data) +
1064 BITS_PER_BYTE_MASKED(struct_bits_off); 1070 BITS_PER_BYTE_MASKED(struct_bits_off);
1065 1071
1066 if (nr_copy_bits > BITS_PER_U64) { 1072 if (nr_copy_bits > BITS_PER_U128) {
1067 btf_verifier_log_member(env, struct_type, member, 1073 btf_verifier_log_member(env, struct_type, member,
1068 "nr_copy_bits exceeds 64"); 1074 "nr_copy_bits exceeds 128");
1069 return -EINVAL; 1075 return -EINVAL;
1070 } 1076 }
1071 1077
@@ -1119,9 +1125,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env,
1119 1125
1120 bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); 1126 bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
1121 nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); 1127 nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
1122 if (nr_copy_bits > BITS_PER_U64) { 1128 if (nr_copy_bits > BITS_PER_U128) {
1123 btf_verifier_log_member(env, struct_type, member, 1129 btf_verifier_log_member(env, struct_type, member,
1124 "nr_copy_bits exceeds 64"); 1130 "nr_copy_bits exceeds 128");
1125 return -EINVAL; 1131 return -EINVAL;
1126 } 1132 }
1127 1133
@@ -1168,9 +1174,9 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
1168 1174
1169 nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); 1175 nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
1170 1176
1171 if (nr_bits > BITS_PER_U64) { 1177 if (nr_bits > BITS_PER_U128) {
1172 btf_verifier_log_type(env, t, "nr_bits exceeds %zu", 1178 btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
1173 BITS_PER_U64); 1179 BITS_PER_U128);
1174 return -EINVAL; 1180 return -EINVAL;
1175 } 1181 }
1176 1182
@@ -1211,31 +1217,93 @@ static void btf_int_log(struct btf_verifier_env *env,
1211 btf_int_encoding_str(BTF_INT_ENCODING(int_data))); 1217 btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
1212} 1218}
1213 1219
1220static void btf_int128_print(struct seq_file *m, void *data)
1221{
1222 /* data points to a __int128 number.
1223 * Suppose
1224 * int128_num = *(__int128 *)data;
1225 * The below formulas shows what upper_num and lower_num represents:
1226 * upper_num = int128_num >> 64;
1227 * lower_num = int128_num & 0xffffffffFFFFFFFFULL;
1228 */
1229 u64 upper_num, lower_num;
1230
1231#ifdef __BIG_ENDIAN_BITFIELD
1232 upper_num = *(u64 *)data;
1233 lower_num = *(u64 *)(data + 8);
1234#else
1235 upper_num = *(u64 *)(data + 8);
1236 lower_num = *(u64 *)data;
1237#endif
1238 if (upper_num == 0)
1239 seq_printf(m, "0x%llx", lower_num);
1240 else
1241 seq_printf(m, "0x%llx%016llx", upper_num, lower_num);
1242}
1243
1244static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
1245 u16 right_shift_bits)
1246{
1247 u64 upper_num, lower_num;
1248
1249#ifdef __BIG_ENDIAN_BITFIELD
1250 upper_num = print_num[0];
1251 lower_num = print_num[1];
1252#else
1253 upper_num = print_num[1];
1254 lower_num = print_num[0];
1255#endif
1256
1257 /* shake out un-needed bits by shift/or operations */
1258 if (left_shift_bits >= 64) {
1259 upper_num = lower_num << (left_shift_bits - 64);
1260 lower_num = 0;
1261 } else {
1262 upper_num = (upper_num << left_shift_bits) |
1263 (lower_num >> (64 - left_shift_bits));
1264 lower_num = lower_num << left_shift_bits;
1265 }
1266
1267 if (right_shift_bits >= 64) {
1268 lower_num = upper_num >> (right_shift_bits - 64);
1269 upper_num = 0;
1270 } else {
1271 lower_num = (lower_num >> right_shift_bits) |
1272 (upper_num << (64 - right_shift_bits));
1273 upper_num = upper_num >> right_shift_bits;
1274 }
1275
1276#ifdef __BIG_ENDIAN_BITFIELD
1277 print_num[0] = upper_num;
1278 print_num[1] = lower_num;
1279#else
1280 print_num[0] = lower_num;
1281 print_num[1] = upper_num;
1282#endif
1283}
1284
1214static void btf_bitfield_seq_show(void *data, u8 bits_offset, 1285static void btf_bitfield_seq_show(void *data, u8 bits_offset,
1215 u8 nr_bits, struct seq_file *m) 1286 u8 nr_bits, struct seq_file *m)
1216{ 1287{
1217 u16 left_shift_bits, right_shift_bits; 1288 u16 left_shift_bits, right_shift_bits;
1218 u8 nr_copy_bytes; 1289 u8 nr_copy_bytes;
1219 u8 nr_copy_bits; 1290 u8 nr_copy_bits;
1220 u64 print_num; 1291 u64 print_num[2] = {};
1221 1292
1222 nr_copy_bits = nr_bits + bits_offset; 1293 nr_copy_bits = nr_bits + bits_offset;
1223 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); 1294 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
1224 1295
1225 print_num = 0; 1296 memcpy(print_num, data, nr_copy_bytes);
1226 memcpy(&print_num, data, nr_copy_bytes);
1227 1297
1228#ifdef __BIG_ENDIAN_BITFIELD 1298#ifdef __BIG_ENDIAN_BITFIELD
1229 left_shift_bits = bits_offset; 1299 left_shift_bits = bits_offset;
1230#else 1300#else
1231 left_shift_bits = BITS_PER_U64 - nr_copy_bits; 1301 left_shift_bits = BITS_PER_U128 - nr_copy_bits;
1232#endif 1302#endif
1233 right_shift_bits = BITS_PER_U64 - nr_bits; 1303 right_shift_bits = BITS_PER_U128 - nr_bits;
1234 1304
1235 print_num <<= left_shift_bits; 1305 btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
1236 print_num >>= right_shift_bits; 1306 btf_int128_print(m, print_num);
1237
1238 seq_printf(m, "0x%llx", print_num);
1239} 1307}
1240 1308
1241 1309
@@ -1250,7 +1318,7 @@ static void btf_int_bits_seq_show(const struct btf *btf,
1250 1318
1251 /* 1319 /*
1252 * bits_offset is at most 7. 1320 * bits_offset is at most 7.
1253 * BTF_INT_OFFSET() cannot exceed 64 bits. 1321 * BTF_INT_OFFSET() cannot exceed 128 bits.
1254 */ 1322 */
1255 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); 1323 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
1256 data += BITS_ROUNDDOWN_BYTES(total_bits_offset); 1324 data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -1274,6 +1342,9 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
1274 } 1342 }
1275 1343
1276 switch (nr_bits) { 1344 switch (nr_bits) {
1345 case 128:
1346 btf_int128_print(m, data);
1347 break;
1277 case 64: 1348 case 64:
1278 if (sign) 1349 if (sign)
1279 seq_printf(m, "%lld", *(s64 *)data); 1350 seq_printf(m, "%lld", *(s64 *)data);
@@ -1980,6 +2051,43 @@ static void btf_struct_log(struct btf_verifier_env *env,
1980 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); 2051 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
1981} 2052}
1982 2053
2054/* find 'struct bpf_spin_lock' in map value.
2055 * return >= 0 offset if found
2056 * and < 0 in case of error
2057 */
2058int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
2059{
2060 const struct btf_member *member;
2061 u32 i, off = -ENOENT;
2062
2063 if (!__btf_type_is_struct(t))
2064 return -EINVAL;
2065
2066 for_each_member(i, t, member) {
2067 const struct btf_type *member_type = btf_type_by_id(btf,
2068 member->type);
2069 if (!__btf_type_is_struct(member_type))
2070 continue;
2071 if (member_type->size != sizeof(struct bpf_spin_lock))
2072 continue;
2073 if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
2074 "bpf_spin_lock"))
2075 continue;
2076 if (off != -ENOENT)
2077 /* only one 'struct bpf_spin_lock' is allowed */
2078 return -E2BIG;
2079 off = btf_member_bit_offset(t, member);
2080 if (off % 8)
2081 /* valid C code cannot generate such BTF */
2082 return -EINVAL;
2083 off /= 8;
2084 if (off % __alignof__(struct bpf_spin_lock))
2085 /* valid struct bpf_spin_lock will be 4 byte aligned */
2086 return -EINVAL;
2087 }
2088 return off;
2089}
2090
1983static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, 2091static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
1984 u32 type_id, void *data, u8 bits_offset, 2092 u32 type_id, void *data, u8 bits_offset,
1985 struct seq_file *m) 2093 struct seq_file *m)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index d17d05570a3f..4e807973aa80 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -230,6 +230,7 @@ cleanup:
230 * @cgrp: The cgroup which descendants to traverse 230 * @cgrp: The cgroup which descendants to traverse
231 * @prog: A program to attach 231 * @prog: A program to attach
232 * @type: Type of attach operation 232 * @type: Type of attach operation
233 * @flags: Option flags
233 * 234 *
234 * Must be called with cgroup_mutex held. 235 * Must be called with cgroup_mutex held.
235 */ 236 */
@@ -363,7 +364,7 @@ cleanup:
363 * Must be called with cgroup_mutex held. 364 * Must be called with cgroup_mutex held.
364 */ 365 */
365int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 366int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
366 enum bpf_attach_type type, u32 unused_flags) 367 enum bpf_attach_type type)
367{ 368{
368 struct list_head *progs = &cgrp->bpf.progs[type]; 369 struct list_head *progs = &cgrp->bpf.progs[type];
369 enum bpf_cgroup_storage_type stype; 370 enum bpf_cgroup_storage_type stype;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b9356025..3f08c257858e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
78 return NULL; 78 return NULL;
79} 79}
80 80
81struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) 81struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
82{ 82{
83 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; 83 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
84 struct bpf_prog_aux *aux; 84 struct bpf_prog_aux *aux;
@@ -104,6 +104,32 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
104 104
105 return fp; 105 return fp;
106} 106}
107
108struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
109{
110 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
111 struct bpf_prog *prog;
112 int cpu;
113
114 prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
115 if (!prog)
116 return NULL;
117
118 prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
119 if (!prog->aux->stats) {
120 kfree(prog->aux);
121 vfree(prog);
122 return NULL;
123 }
124
125 for_each_possible_cpu(cpu) {
126 struct bpf_prog_stats *pstats;
127
128 pstats = per_cpu_ptr(prog->aux->stats, cpu);
129 u64_stats_init(&pstats->syncp);
130 }
131 return prog;
132}
107EXPORT_SYMBOL_GPL(bpf_prog_alloc); 133EXPORT_SYMBOL_GPL(bpf_prog_alloc);
108 134
109int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) 135int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
@@ -231,7 +257,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
231 257
232void __bpf_prog_free(struct bpf_prog *fp) 258void __bpf_prog_free(struct bpf_prog *fp)
233{ 259{
234 kfree(fp->aux); 260 if (fp->aux) {
261 free_percpu(fp->aux->stats);
262 kfree(fp->aux);
263 }
235 vfree(fp); 264 vfree(fp);
236} 265}
237 266
@@ -307,15 +336,16 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
307 return 0; 336 return 0;
308} 337}
309 338
310static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, 339static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
311 u32 curr, const bool probe_pass) 340 s32 end_new, u32 curr, const bool probe_pass)
312{ 341{
313 const s64 imm_min = S32_MIN, imm_max = S32_MAX; 342 const s64 imm_min = S32_MIN, imm_max = S32_MAX;
343 s32 delta = end_new - end_old;
314 s64 imm = insn->imm; 344 s64 imm = insn->imm;
315 345
316 if (curr < pos && curr + imm + 1 > pos) 346 if (curr < pos && curr + imm + 1 >= end_old)
317 imm += delta; 347 imm += delta;
318 else if (curr > pos + delta && curr + imm + 1 <= pos + delta) 348 else if (curr >= end_new && curr + imm + 1 < end_new)
319 imm -= delta; 349 imm -= delta;
320 if (imm < imm_min || imm > imm_max) 350 if (imm < imm_min || imm > imm_max)
321 return -ERANGE; 351 return -ERANGE;
@@ -324,15 +354,16 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
324 return 0; 354 return 0;
325} 355}
326 356
327static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, 357static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
328 u32 curr, const bool probe_pass) 358 s32 end_new, u32 curr, const bool probe_pass)
329{ 359{
330 const s32 off_min = S16_MIN, off_max = S16_MAX; 360 const s32 off_min = S16_MIN, off_max = S16_MAX;
361 s32 delta = end_new - end_old;
331 s32 off = insn->off; 362 s32 off = insn->off;
332 363
333 if (curr < pos && curr + off + 1 > pos) 364 if (curr < pos && curr + off + 1 >= end_old)
334 off += delta; 365 off += delta;
335 else if (curr > pos + delta && curr + off + 1 <= pos + delta) 366 else if (curr >= end_new && curr + off + 1 < end_new)
336 off -= delta; 367 off -= delta;
337 if (off < off_min || off > off_max) 368 if (off < off_min || off > off_max)
338 return -ERANGE; 369 return -ERANGE;
@@ -341,10 +372,10 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
341 return 0; 372 return 0;
342} 373}
343 374
344static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, 375static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
345 const bool probe_pass) 376 s32 end_new, const bool probe_pass)
346{ 377{
347 u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); 378 u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
348 struct bpf_insn *insn = prog->insnsi; 379 struct bpf_insn *insn = prog->insnsi;
349 int ret = 0; 380 int ret = 0;
350 381
@@ -356,22 +387,23 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
356 * do any other adjustments. Therefore skip the patchlet. 387 * do any other adjustments. Therefore skip the patchlet.
357 */ 388 */
358 if (probe_pass && i == pos) { 389 if (probe_pass && i == pos) {
359 i += delta + 1; 390 i = end_new;
360 insn++; 391 insn = prog->insnsi + end_old;
361 } 392 }
362 code = insn->code; 393 code = insn->code;
363 if (BPF_CLASS(code) != BPF_JMP || 394 if ((BPF_CLASS(code) != BPF_JMP &&
395 BPF_CLASS(code) != BPF_JMP32) ||
364 BPF_OP(code) == BPF_EXIT) 396 BPF_OP(code) == BPF_EXIT)
365 continue; 397 continue;
366 /* Adjust offset of jmps if we cross patch boundaries. */ 398 /* Adjust offset of jmps if we cross patch boundaries. */
367 if (BPF_OP(code) == BPF_CALL) { 399 if (BPF_OP(code) == BPF_CALL) {
368 if (insn->src_reg != BPF_PSEUDO_CALL) 400 if (insn->src_reg != BPF_PSEUDO_CALL)
369 continue; 401 continue;
370 ret = bpf_adj_delta_to_imm(insn, pos, delta, i, 402 ret = bpf_adj_delta_to_imm(insn, pos, end_old,
371 probe_pass); 403 end_new, i, probe_pass);
372 } else { 404 } else {
373 ret = bpf_adj_delta_to_off(insn, pos, delta, i, 405 ret = bpf_adj_delta_to_off(insn, pos, end_old,
374 probe_pass); 406 end_new, i, probe_pass);
375 } 407 }
376 if (ret) 408 if (ret)
377 break; 409 break;
@@ -421,7 +453,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
421 * we afterwards may not fail anymore. 453 * we afterwards may not fail anymore.
422 */ 454 */
423 if (insn_adj_cnt > cnt_max && 455 if (insn_adj_cnt > cnt_max &&
424 bpf_adj_branches(prog, off, insn_delta, true)) 456 bpf_adj_branches(prog, off, off + 1, off + len, true))
425 return NULL; 457 return NULL;
426 458
427 /* Several new instructions need to be inserted. Make room 459 /* Several new instructions need to be inserted. Make room
@@ -453,13 +485,25 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
453 * the ship has sailed to reverse to the original state. An 485 * the ship has sailed to reverse to the original state. An
454 * overflow cannot happen at this point. 486 * overflow cannot happen at this point.
455 */ 487 */
456 BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); 488 BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
457 489
458 bpf_adj_linfo(prog_adj, off, insn_delta); 490 bpf_adj_linfo(prog_adj, off, insn_delta);
459 491
460 return prog_adj; 492 return prog_adj;
461} 493}
462 494
495int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
496{
497 /* Branch offsets can't overflow when program is shrinking, no need
498 * to call bpf_adj_branches(..., true) here
499 */
500 memmove(prog->insnsi + off, prog->insnsi + off + cnt,
501 sizeof(struct bpf_insn) * (prog->len - off - cnt));
502 prog->len -= cnt;
503
504 return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
505}
506
463void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) 507void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
464{ 508{
465 int i; 509 int i;
@@ -934,6 +978,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
934 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); 978 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
935 break; 979 break;
936 980
981 case BPF_JMP32 | BPF_JEQ | BPF_K:
982 case BPF_JMP32 | BPF_JNE | BPF_K:
983 case BPF_JMP32 | BPF_JGT | BPF_K:
984 case BPF_JMP32 | BPF_JLT | BPF_K:
985 case BPF_JMP32 | BPF_JGE | BPF_K:
986 case BPF_JMP32 | BPF_JLE | BPF_K:
987 case BPF_JMP32 | BPF_JSGT | BPF_K:
988 case BPF_JMP32 | BPF_JSLT | BPF_K:
989 case BPF_JMP32 | BPF_JSGE | BPF_K:
990 case BPF_JMP32 | BPF_JSLE | BPF_K:
991 case BPF_JMP32 | BPF_JSET | BPF_K:
992 /* Accommodate for extra offset in case of a backjump. */
993 off = from->off;
994 if (off < 0)
995 off -= 2;
996 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
997 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
998 *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
999 off);
1000 break;
1001
937 case BPF_LD | BPF_IMM | BPF_DW: 1002 case BPF_LD | BPF_IMM | BPF_DW:
938 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); 1003 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
939 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1004 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
@@ -1130,6 +1195,31 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
1130 INSN_2(JMP, CALL), \ 1195 INSN_2(JMP, CALL), \
1131 /* Exit instruction. */ \ 1196 /* Exit instruction. */ \
1132 INSN_2(JMP, EXIT), \ 1197 INSN_2(JMP, EXIT), \
1198 /* 32-bit Jump instructions. */ \
1199 /* Register based. */ \
1200 INSN_3(JMP32, JEQ, X), \
1201 INSN_3(JMP32, JNE, X), \
1202 INSN_3(JMP32, JGT, X), \
1203 INSN_3(JMP32, JLT, X), \
1204 INSN_3(JMP32, JGE, X), \
1205 INSN_3(JMP32, JLE, X), \
1206 INSN_3(JMP32, JSGT, X), \
1207 INSN_3(JMP32, JSLT, X), \
1208 INSN_3(JMP32, JSGE, X), \
1209 INSN_3(JMP32, JSLE, X), \
1210 INSN_3(JMP32, JSET, X), \
1211 /* Immediate based. */ \
1212 INSN_3(JMP32, JEQ, K), \
1213 INSN_3(JMP32, JNE, K), \
1214 INSN_3(JMP32, JGT, K), \
1215 INSN_3(JMP32, JLT, K), \
1216 INSN_3(JMP32, JGE, K), \
1217 INSN_3(JMP32, JLE, K), \
1218 INSN_3(JMP32, JSGT, K), \
1219 INSN_3(JMP32, JSLT, K), \
1220 INSN_3(JMP32, JSGE, K), \
1221 INSN_3(JMP32, JSLE, K), \
1222 INSN_3(JMP32, JSET, K), \
1133 /* Jump instructions. */ \ 1223 /* Jump instructions. */ \
1134 /* Register based. */ \ 1224 /* Register based. */ \
1135 INSN_3(JMP, JEQ, X), \ 1225 INSN_3(JMP, JEQ, X), \
@@ -1202,8 +1292,9 @@ bool bpf_opcode_in_insntable(u8 code)
1202#ifndef CONFIG_BPF_JIT_ALWAYS_ON 1292#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1203/** 1293/**
1204 * __bpf_prog_run - run eBPF program on a given context 1294 * __bpf_prog_run - run eBPF program on a given context
1205 * @ctx: is the data we are operating on 1295 * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
1206 * @insn: is the array of eBPF instructions 1296 * @insn: is the array of eBPF instructions
1297 * @stack: is the eBPF storage stack
1207 * 1298 *
1208 * Decode and execute eBPF instructions. 1299 * Decode and execute eBPF instructions.
1209 */ 1300 */
@@ -1390,145 +1481,49 @@ select_insn:
1390out: 1481out:
1391 CONT; 1482 CONT;
1392 } 1483 }
1393 /* JMP */
1394 JMP_JA: 1484 JMP_JA:
1395 insn += insn->off; 1485 insn += insn->off;
1396 CONT; 1486 CONT;
1397 JMP_JEQ_X:
1398 if (DST == SRC) {
1399 insn += insn->off;
1400 CONT_JMP;
1401 }
1402 CONT;
1403 JMP_JEQ_K:
1404 if (DST == IMM) {
1405 insn += insn->off;
1406 CONT_JMP;
1407 }
1408 CONT;
1409 JMP_JNE_X:
1410 if (DST != SRC) {
1411 insn += insn->off;
1412 CONT_JMP;
1413 }
1414 CONT;
1415 JMP_JNE_K:
1416 if (DST != IMM) {
1417 insn += insn->off;
1418 CONT_JMP;
1419 }
1420 CONT;
1421 JMP_JGT_X:
1422 if (DST > SRC) {
1423 insn += insn->off;
1424 CONT_JMP;
1425 }
1426 CONT;
1427 JMP_JGT_K:
1428 if (DST > IMM) {
1429 insn += insn->off;
1430 CONT_JMP;
1431 }
1432 CONT;
1433 JMP_JLT_X:
1434 if (DST < SRC) {
1435 insn += insn->off;
1436 CONT_JMP;
1437 }
1438 CONT;
1439 JMP_JLT_K:
1440 if (DST < IMM) {
1441 insn += insn->off;
1442 CONT_JMP;
1443 }
1444 CONT;
1445 JMP_JGE_X:
1446 if (DST >= SRC) {
1447 insn += insn->off;
1448 CONT_JMP;
1449 }
1450 CONT;
1451 JMP_JGE_K:
1452 if (DST >= IMM) {
1453 insn += insn->off;
1454 CONT_JMP;
1455 }
1456 CONT;
1457 JMP_JLE_X:
1458 if (DST <= SRC) {
1459 insn += insn->off;
1460 CONT_JMP;
1461 }
1462 CONT;
1463 JMP_JLE_K:
1464 if (DST <= IMM) {
1465 insn += insn->off;
1466 CONT_JMP;
1467 }
1468 CONT;
1469 JMP_JSGT_X:
1470 if (((s64) DST) > ((s64) SRC)) {
1471 insn += insn->off;
1472 CONT_JMP;
1473 }
1474 CONT;
1475 JMP_JSGT_K:
1476 if (((s64) DST) > ((s64) IMM)) {
1477 insn += insn->off;
1478 CONT_JMP;
1479 }
1480 CONT;
1481 JMP_JSLT_X:
1482 if (((s64) DST) < ((s64) SRC)) {
1483 insn += insn->off;
1484 CONT_JMP;
1485 }
1486 CONT;
1487 JMP_JSLT_K:
1488 if (((s64) DST) < ((s64) IMM)) {
1489 insn += insn->off;
1490 CONT_JMP;
1491 }
1492 CONT;
1493 JMP_JSGE_X:
1494 if (((s64) DST) >= ((s64) SRC)) {
1495 insn += insn->off;
1496 CONT_JMP;
1497 }
1498 CONT;
1499 JMP_JSGE_K:
1500 if (((s64) DST) >= ((s64) IMM)) {
1501 insn += insn->off;
1502 CONT_JMP;
1503 }
1504 CONT;
1505 JMP_JSLE_X:
1506 if (((s64) DST) <= ((s64) SRC)) {
1507 insn += insn->off;
1508 CONT_JMP;
1509 }
1510 CONT;
1511 JMP_JSLE_K:
1512 if (((s64) DST) <= ((s64) IMM)) {
1513 insn += insn->off;
1514 CONT_JMP;
1515 }
1516 CONT;
1517 JMP_JSET_X:
1518 if (DST & SRC) {
1519 insn += insn->off;
1520 CONT_JMP;
1521 }
1522 CONT;
1523 JMP_JSET_K:
1524 if (DST & IMM) {
1525 insn += insn->off;
1526 CONT_JMP;
1527 }
1528 CONT;
1529 JMP_EXIT: 1487 JMP_EXIT:
1530 return BPF_R0; 1488 return BPF_R0;
1531 1489 /* JMP */
1490#define COND_JMP(SIGN, OPCODE, CMP_OP) \
1491 JMP_##OPCODE##_X: \
1492 if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1493 insn += insn->off; \
1494 CONT_JMP; \
1495 } \
1496 CONT; \
1497 JMP32_##OPCODE##_X: \
1498 if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1499 insn += insn->off; \
1500 CONT_JMP; \
1501 } \
1502 CONT; \
1503 JMP_##OPCODE##_K: \
1504 if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1505 insn += insn->off; \
1506 CONT_JMP; \
1507 } \
1508 CONT; \
1509 JMP32_##OPCODE##_K: \
1510 if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1511 insn += insn->off; \
1512 CONT_JMP; \
1513 } \
1514 CONT;
1515 COND_JMP(u, JEQ, ==)
1516 COND_JMP(u, JNE, !=)
1517 COND_JMP(u, JGT, >)
1518 COND_JMP(u, JLT, <)
1519 COND_JMP(u, JGE, >=)
1520 COND_JMP(u, JLE, <=)
1521 COND_JMP(u, JSET, &)
1522 COND_JMP(s, JSGT, >)
1523 COND_JMP(s, JSLT, <)
1524 COND_JMP(s, JSGE, >=)
1525 COND_JMP(s, JSLE, <=)
1526#undef COND_JMP
1532 /* STX and ST and LDX*/ 1527 /* STX and ST and LDX*/
1533#define LDST(SIZEOP, SIZE) \ 1528#define LDST(SIZEOP, SIZE) \
1534 STX_MEM_##SIZEOP: \ 1529 STX_MEM_##SIZEOP: \
@@ -2036,6 +2031,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2036const struct bpf_func_proto bpf_map_push_elem_proto __weak; 2031const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2037const struct bpf_func_proto bpf_map_pop_elem_proto __weak; 2032const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2038const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 2033const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2034const struct bpf_func_proto bpf_spin_lock_proto __weak;
2035const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2039 2036
2040const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 2037const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
2041const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; 2038const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
@@ -2101,6 +2098,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
2101 return -EFAULT; 2098 return -EFAULT;
2102} 2099}
2103 2100
2101DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2102EXPORT_SYMBOL(bpf_stats_enabled_key);
2103int sysctl_bpf_stats_enabled __read_mostly;
2104
2104/* All definitions of tracepoints related to BPF. */ 2105/* All definitions of tracepoints related to BPF. */
2105#define CREATE_TRACE_POINTS 2106#define CREATE_TRACE_POINTS
2106#include <linux/bpf_trace.h> 2107#include <linux/bpf_trace.h>
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index d6b76377cb6e..de73f55e42fd 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -67,7 +67,7 @@ const char *const bpf_class_string[8] = {
67 [BPF_STX] = "stx", 67 [BPF_STX] = "stx",
68 [BPF_ALU] = "alu", 68 [BPF_ALU] = "alu",
69 [BPF_JMP] = "jmp", 69 [BPF_JMP] = "jmp",
70 [BPF_RET] = "BUG", 70 [BPF_JMP32] = "jmp32",
71 [BPF_ALU64] = "alu64", 71 [BPF_ALU64] = "alu64",
72}; 72};
73 73
@@ -136,23 +136,22 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
136 else 136 else
137 print_bpf_end_insn(verbose, cbs->private_data, insn); 137 print_bpf_end_insn(verbose, cbs->private_data, insn);
138 } else if (BPF_OP(insn->code) == BPF_NEG) { 138 } else if (BPF_OP(insn->code) == BPF_NEG) {
139 verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n", 139 verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n",
140 insn->code, insn->dst_reg, 140 insn->code, class == BPF_ALU ? 'w' : 'r',
141 class == BPF_ALU ? "(u32) " : "", 141 insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
142 insn->dst_reg); 142 insn->dst_reg);
143 } else if (BPF_SRC(insn->code) == BPF_X) { 143 } else if (BPF_SRC(insn->code) == BPF_X) {
144 verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n", 144 verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n",
145 insn->code, class == BPF_ALU ? "(u32) " : "", 145 insn->code, class == BPF_ALU ? 'w' : 'r',
146 insn->dst_reg, 146 insn->dst_reg,
147 bpf_alu_string[BPF_OP(insn->code) >> 4], 147 bpf_alu_string[BPF_OP(insn->code) >> 4],
148 class == BPF_ALU ? "(u32) " : "", 148 class == BPF_ALU ? 'w' : 'r',
149 insn->src_reg); 149 insn->src_reg);
150 } else { 150 } else {
151 verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n", 151 verbose(cbs->private_data, "(%02x) %c%d %s %d\n",
152 insn->code, class == BPF_ALU ? "(u32) " : "", 152 insn->code, class == BPF_ALU ? 'w' : 'r',
153 insn->dst_reg, 153 insn->dst_reg,
154 bpf_alu_string[BPF_OP(insn->code) >> 4], 154 bpf_alu_string[BPF_OP(insn->code) >> 4],
155 class == BPF_ALU ? "(u32) " : "",
156 insn->imm); 155 insn->imm);
157 } 156 }
158 } else if (class == BPF_STX) { 157 } else if (class == BPF_STX) {
@@ -220,7 +219,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
220 verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code); 219 verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
221 return; 220 return;
222 } 221 }
223 } else if (class == BPF_JMP) { 222 } else if (class == BPF_JMP32 || class == BPF_JMP) {
224 u8 opcode = BPF_OP(insn->code); 223 u8 opcode = BPF_OP(insn->code);
225 224
226 if (opcode == BPF_CALL) { 225 if (opcode == BPF_CALL) {
@@ -244,13 +243,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
244 } else if (insn->code == (BPF_JMP | BPF_EXIT)) { 243 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
245 verbose(cbs->private_data, "(%02x) exit\n", insn->code); 244 verbose(cbs->private_data, "(%02x) exit\n", insn->code);
246 } else if (BPF_SRC(insn->code) == BPF_X) { 245 } else if (BPF_SRC(insn->code) == BPF_X) {
247 verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n", 246 verbose(cbs->private_data,
248 insn->code, insn->dst_reg, 247 "(%02x) if %c%d %s %c%d goto pc%+d\n",
248 insn->code, class == BPF_JMP32 ? 'w' : 'r',
249 insn->dst_reg,
249 bpf_jmp_string[BPF_OP(insn->code) >> 4], 250 bpf_jmp_string[BPF_OP(insn->code) >> 4],
251 class == BPF_JMP32 ? 'w' : 'r',
250 insn->src_reg, insn->off); 252 insn->src_reg, insn->off);
251 } else { 253 } else {
252 verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n", 254 verbose(cbs->private_data,
253 insn->code, insn->dst_reg, 255 "(%02x) if %c%d %s 0x%x goto pc%+d\n",
256 insn->code, class == BPF_JMP32 ? 'w' : 'r',
257 insn->dst_reg,
254 bpf_jmp_string[BPF_OP(insn->code) >> 4], 258 bpf_jmp_string[BPF_OP(insn->code) >> 4],
255 insn->imm, insn->off); 259 insn->imm, insn->off);
256 } 260 }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index f9274114c88d..fed15cf94dca 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
718 BITS_PER_LONG == 64; 718 BITS_PER_LONG == 64;
719} 719}
720 720
721static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
722{
723 u32 size = htab->map.value_size;
724
725 if (percpu || fd_htab_map_needs_adjust(htab))
726 size = round_up(size, 8);
727 return size;
728}
729
730static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 721static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
731 void *value, u32 key_size, u32 hash, 722 void *value, u32 key_size, u32 hash,
732 bool percpu, bool onallcpus, 723 bool percpu, bool onallcpus,
733 struct htab_elem *old_elem) 724 struct htab_elem *old_elem)
734{ 725{
735 u32 size = htab_size_value(htab, percpu); 726 u32 size = htab->map.value_size;
736 bool prealloc = htab_is_prealloc(htab); 727 bool prealloc = htab_is_prealloc(htab);
737 struct htab_elem *l_new, **pl_new; 728 struct htab_elem *l_new, **pl_new;
738 void __percpu *pptr; 729 void __percpu *pptr;
@@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
770 l_new = ERR_PTR(-ENOMEM); 761 l_new = ERR_PTR(-ENOMEM);
771 goto dec_count; 762 goto dec_count;
772 } 763 }
764 check_and_init_map_lock(&htab->map,
765 l_new->key + round_up(key_size, 8));
773 } 766 }
774 767
775 memcpy(l_new->key, key, key_size); 768 memcpy(l_new->key, key, key_size);
776 if (percpu) { 769 if (percpu) {
770 size = round_up(size, 8);
777 if (prealloc) { 771 if (prealloc) {
778 pptr = htab_elem_get_ptr(l_new, key_size); 772 pptr = htab_elem_get_ptr(l_new, key_size);
779 } else { 773 } else {
@@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
791 785
792 if (!prealloc) 786 if (!prealloc)
793 htab_elem_set_ptr(l_new, key_size, pptr); 787 htab_elem_set_ptr(l_new, key_size, pptr);
794 } else { 788 } else if (fd_htab_map_needs_adjust(htab)) {
789 size = round_up(size, 8);
795 memcpy(l_new->key + round_up(key_size, 8), value, size); 790 memcpy(l_new->key + round_up(key_size, 8), value, size);
791 } else {
792 copy_map_value(&htab->map,
793 l_new->key + round_up(key_size, 8),
794 value);
796 } 795 }
797 796
798 l_new->hash = hash; 797 l_new->hash = hash;
@@ -805,11 +804,11 @@ dec_count:
805static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 804static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
806 u64 map_flags) 805 u64 map_flags)
807{ 806{
808 if (l_old && map_flags == BPF_NOEXIST) 807 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
809 /* elem already exists */ 808 /* elem already exists */
810 return -EEXIST; 809 return -EEXIST;
811 810
812 if (!l_old && map_flags == BPF_EXIST) 811 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
813 /* elem doesn't exist, cannot update it */ 812 /* elem doesn't exist, cannot update it */
814 return -ENOENT; 813 return -ENOENT;
815 814
@@ -828,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
828 u32 key_size, hash; 827 u32 key_size, hash;
829 int ret; 828 int ret;
830 829
831 if (unlikely(map_flags > BPF_EXIST)) 830 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
832 /* unknown flags */ 831 /* unknown flags */
833 return -EINVAL; 832 return -EINVAL;
834 833
@@ -841,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
841 b = __select_bucket(htab, hash); 840 b = __select_bucket(htab, hash);
842 head = &b->head; 841 head = &b->head;
843 842
843 if (unlikely(map_flags & BPF_F_LOCK)) {
844 if (unlikely(!map_value_has_spin_lock(map)))
845 return -EINVAL;
846 /* find an element without taking the bucket lock */
847 l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
848 htab->n_buckets);
849 ret = check_flags(htab, l_old, map_flags);
850 if (ret)
851 return ret;
852 if (l_old) {
853 /* grab the element lock and update value in place */
854 copy_map_value_locked(map,
855 l_old->key + round_up(key_size, 8),
856 value, false);
857 return 0;
858 }
859 /* fall through, grab the bucket lock and lookup again.
860 * 99.9% chance that the element won't be found,
861 * but second lookup under lock has to be done.
862 */
863 }
864
844 /* bpf_map_update_elem() can be called in_irq() */ 865 /* bpf_map_update_elem() can be called in_irq() */
845 raw_spin_lock_irqsave(&b->lock, flags); 866 raw_spin_lock_irqsave(&b->lock, flags);
846 867
@@ -850,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
850 if (ret) 871 if (ret)
851 goto err; 872 goto err;
852 873
874 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
875 /* first lookup without the bucket lock didn't find the element,
876 * but second lookup with the bucket lock found it.
877 * This case is highly unlikely, but has to be dealt with:
878 * grab the element lock in addition to the bucket lock
879 * and update element in place
880 */
881 copy_map_value_locked(map,
882 l_old->key + round_up(key_size, 8),
883 value, false);
884 ret = 0;
885 goto err;
886 }
887
853 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 888 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
854 l_old); 889 l_old);
855 if (IS_ERR(l_new)) { 890 if (IS_ERR(l_new)) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..a411fc17d265 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,102 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
221 .arg2_type = ARG_CONST_SIZE, 221 .arg2_type = ARG_CONST_SIZE,
222}; 222};
223 223
224#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
225
226static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
227{
228 arch_spinlock_t *l = (void *)lock;
229 union {
230 __u32 val;
231 arch_spinlock_t lock;
232 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
233
234 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
235 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
236 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
237 arch_spin_lock(l);
238}
239
240static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
241{
242 arch_spinlock_t *l = (void *)lock;
243
244 arch_spin_unlock(l);
245}
246
247#else
248
249static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
250{
251 atomic_t *l = (void *)lock;
252
253 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
254 do {
255 atomic_cond_read_relaxed(l, !VAL);
256 } while (atomic_xchg(l, 1));
257}
258
259static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
260{
261 atomic_t *l = (void *)lock;
262
263 atomic_set_release(l, 0);
264}
265
266#endif
267
268static DEFINE_PER_CPU(unsigned long, irqsave_flags);
269
270notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
271{
272 unsigned long flags;
273
274 local_irq_save(flags);
275 __bpf_spin_lock(lock);
276 __this_cpu_write(irqsave_flags, flags);
277 return 0;
278}
279
280const struct bpf_func_proto bpf_spin_lock_proto = {
281 .func = bpf_spin_lock,
282 .gpl_only = false,
283 .ret_type = RET_VOID,
284 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
285};
286
287notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
288{
289 unsigned long flags;
290
291 flags = __this_cpu_read(irqsave_flags);
292 __bpf_spin_unlock(lock);
293 local_irq_restore(flags);
294 return 0;
295}
296
297const struct bpf_func_proto bpf_spin_unlock_proto = {
298 .func = bpf_spin_unlock,
299 .gpl_only = false,
300 .ret_type = RET_VOID,
301 .arg1_type = ARG_PTR_TO_SPIN_LOCK,
302};
303
304void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
305 bool lock_src)
306{
307 struct bpf_spin_lock *lock;
308
309 if (lock_src)
310 lock = src + map->spin_lock_off;
311 else
312 lock = dst + map->spin_lock_off;
313 preempt_disable();
314 ____bpf_spin_lock(lock);
315 copy_map_value(map, dst, src);
316 ____bpf_spin_unlock(lock);
317 preempt_enable();
318}
319
224#ifdef CONFIG_CGROUPS 320#ifdef CONFIG_CGROUPS
225BPF_CALL_0(bpf_get_current_cgroup_id) 321BPF_CALL_0(bpf_get_current_cgroup_id)
226{ 322{
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 07a34ef562a0..6b572e2de7fb 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
131 struct bpf_cgroup_storage *storage; 131 struct bpf_cgroup_storage *storage;
132 struct bpf_storage_buffer *new; 132 struct bpf_storage_buffer *new;
133 133
134 if (flags != BPF_ANY && flags != BPF_EXIST) 134 if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
135 return -EINVAL;
136
137 if (unlikely(flags & BPF_NOEXIST))
138 return -EINVAL;
139
140 if (unlikely((flags & BPF_F_LOCK) &&
141 !map_value_has_spin_lock(map)))
135 return -EINVAL; 142 return -EINVAL;
136 143
137 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, 144 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
139 if (!storage) 146 if (!storage)
140 return -ENOENT; 147 return -ENOENT;
141 148
149 if (flags & BPF_F_LOCK) {
150 copy_map_value_locked(map, storage->buf->data, value, false);
151 return 0;
152 }
153
142 new = kmalloc_node(sizeof(struct bpf_storage_buffer) + 154 new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
143 map->value_size, 155 map->value_size,
144 __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, 156 __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
@@ -147,6 +159,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
147 return -ENOMEM; 159 return -ENOMEM;
148 160
149 memcpy(&new->data[0], value, map->value_size); 161 memcpy(&new->data[0], value, map->value_size);
162 check_and_init_map_lock(map, new->data);
150 163
151 new = xchg(&storage->buf, new); 164 new = xchg(&storage->buf, new);
152 kfree_rcu(new, rcu); 165 kfree_rcu(new, rcu);
@@ -483,6 +496,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
483 storage->buf = kmalloc_node(size, flags, map->numa_node); 496 storage->buf = kmalloc_node(size, flags, map->numa_node);
484 if (!storage->buf) 497 if (!storage->buf)
485 goto enomem; 498 goto enomem;
499 check_and_init_map_lock(map, storage->buf->data);
486 } else { 500 } else {
487 storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); 501 storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
488 if (!storage->percpu_buf) 502 if (!storage->percpu_buf)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 52378d3e34b3..3dff41403583 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 } 38 }
39 39
40 if (map_value_has_spin_lock(inner_map)) {
41 fdput(f);
42 return ERR_PTR(-ENOTSUPP);
43 }
44
40 inner_map_meta_size = sizeof(*inner_map_meta); 45 inner_map_meta_size = sizeof(*inner_map_meta);
41 /* In some cases verifier needs to access beyond just base map. */ 46 /* In some cases verifier needs to access beyond just base map. */
42 if (inner_map->ops == &array_map_ops) 47 if (inner_map->ops == &array_map_ops)
@@ -53,6 +58,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
53 inner_map_meta->value_size = inner_map->value_size; 58 inner_map_meta->value_size = inner_map->value_size;
54 inner_map_meta->map_flags = inner_map->map_flags; 59 inner_map_meta->map_flags = inner_map->map_flags;
55 inner_map_meta->max_entries = inner_map->max_entries; 60 inner_map_meta->max_entries = inner_map->max_entries;
61 inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
56 62
57 /* Misc members not needed in bpf_map_meta_equal() check. */ 63 /* Misc members not needed in bpf_map_meta_equal() check. */
58 inner_map_meta->ops = inner_map->ops; 64 inner_map_meta->ops = inner_map->ops;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 54cf2b9c44a4..ba635209ae9a 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock);
35struct bpf_offload_dev { 35struct bpf_offload_dev {
36 const struct bpf_prog_offload_ops *ops; 36 const struct bpf_prog_offload_ops *ops;
37 struct list_head netdevs; 37 struct list_head netdevs;
38 void *priv;
38}; 39};
39 40
40struct bpf_offload_netdev { 41struct bpf_offload_netdev {
@@ -173,6 +174,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
173 return ret; 174 return ret;
174} 175}
175 176
177void
178bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
179 struct bpf_insn *insn)
180{
181 const struct bpf_prog_offload_ops *ops;
182 struct bpf_prog_offload *offload;
183 int ret = -EOPNOTSUPP;
184
185 down_read(&bpf_devs_lock);
186 offload = env->prog->aux->offload;
187 if (offload) {
188 ops = offload->offdev->ops;
189 if (!offload->opt_failed && ops->replace_insn)
190 ret = ops->replace_insn(env, off, insn);
191 offload->opt_failed |= ret;
192 }
193 up_read(&bpf_devs_lock);
194}
195
196void
197bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
198{
199 struct bpf_prog_offload *offload;
200 int ret = -EOPNOTSUPP;
201
202 down_read(&bpf_devs_lock);
203 offload = env->prog->aux->offload;
204 if (offload) {
205 if (!offload->opt_failed && offload->offdev->ops->remove_insns)
206 ret = offload->offdev->ops->remove_insns(env, off, cnt);
207 offload->opt_failed |= ret;
208 }
209 up_read(&bpf_devs_lock);
210}
211
176static void __bpf_prog_offload_destroy(struct bpf_prog *prog) 212static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
177{ 213{
178 struct bpf_prog_offload *offload = prog->aux->offload; 214 struct bpf_prog_offload *offload = prog->aux->offload;
@@ -634,7 +670,7 @@ unlock:
634EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); 670EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
635 671
636struct bpf_offload_dev * 672struct bpf_offload_dev *
637bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) 673bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
638{ 674{
639 struct bpf_offload_dev *offdev; 675 struct bpf_offload_dev *offdev;
640 int err; 676 int err;
@@ -653,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
653 return ERR_PTR(-ENOMEM); 689 return ERR_PTR(-ENOMEM);
654 690
655 offdev->ops = ops; 691 offdev->ops = ops;
692 offdev->priv = priv;
656 INIT_LIST_HEAD(&offdev->netdevs); 693 INIT_LIST_HEAD(&offdev->netdevs);
657 694
658 return offdev; 695 return offdev;
@@ -665,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
665 kfree(offdev); 702 kfree(offdev);
666} 703}
667EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy); 704EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
705
706void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
707{
708 return offdev->priv;
709}
710EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 84470d1480aa..bc34cf9fe9ee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map,
463 return -ENOTSUPP; 463 return -ENOTSUPP;
464} 464}
465 465
466static int map_check_btf(const struct bpf_map *map, const struct btf *btf, 466static int map_check_btf(struct bpf_map *map, const struct btf *btf,
467 u32 btf_key_id, u32 btf_value_id) 467 u32 btf_key_id, u32 btf_value_id)
468{ 468{
469 const struct btf_type *key_type, *value_type; 469 const struct btf_type *key_type, *value_type;
@@ -478,6 +478,22 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
478 if (!value_type || value_size != map->value_size) 478 if (!value_type || value_size != map->value_size)
479 return -EINVAL; 479 return -EINVAL;
480 480
481 map->spin_lock_off = btf_find_spin_lock(btf, value_type);
482
483 if (map_value_has_spin_lock(map)) {
484 if (map->map_type != BPF_MAP_TYPE_HASH &&
485 map->map_type != BPF_MAP_TYPE_ARRAY &&
486 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
487 return -ENOTSUPP;
488 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
489 map->value_size) {
490 WARN_ONCE(1,
491 "verifier bug spin_lock_off %d value_size %d\n",
492 map->spin_lock_off, map->value_size);
493 return -EFAULT;
494 }
495 }
496
481 if (map->ops->map_check_btf) 497 if (map->ops->map_check_btf)
482 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 498 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
483 499
@@ -542,6 +558,8 @@ static int map_create(union bpf_attr *attr)
542 map->btf = btf; 558 map->btf = btf;
543 map->btf_key_type_id = attr->btf_key_type_id; 559 map->btf_key_type_id = attr->btf_key_type_id;
544 map->btf_value_type_id = attr->btf_value_type_id; 560 map->btf_value_type_id = attr->btf_value_type_id;
561 } else {
562 map->spin_lock_off = -EINVAL;
545 } 563 }
546 564
547 err = security_bpf_map_alloc(map); 565 err = security_bpf_map_alloc(map);
@@ -664,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
664} 682}
665 683
666/* last field in 'union bpf_attr' used by this command */ 684/* last field in 'union bpf_attr' used by this command */
667#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 685#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
668 686
669static int map_lookup_elem(union bpf_attr *attr) 687static int map_lookup_elem(union bpf_attr *attr)
670{ 688{
@@ -680,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr)
680 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 698 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
681 return -EINVAL; 699 return -EINVAL;
682 700
701 if (attr->flags & ~BPF_F_LOCK)
702 return -EINVAL;
703
683 f = fdget(ufd); 704 f = fdget(ufd);
684 map = __bpf_map_get(f); 705 map = __bpf_map_get(f);
685 if (IS_ERR(map)) 706 if (IS_ERR(map))
@@ -690,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr)
690 goto err_put; 711 goto err_put;
691 } 712 }
692 713
714 if ((attr->flags & BPF_F_LOCK) &&
715 !map_value_has_spin_lock(map)) {
716 err = -EINVAL;
717 goto err_put;
718 }
719
693 key = __bpf_copy_key(ukey, map->key_size); 720 key = __bpf_copy_key(ukey, map->key_size);
694 if (IS_ERR(key)) { 721 if (IS_ERR(key)) {
695 err = PTR_ERR(key); 722 err = PTR_ERR(key);
@@ -745,7 +772,13 @@ static int map_lookup_elem(union bpf_attr *attr)
745 err = -ENOENT; 772 err = -ENOENT;
746 } else { 773 } else {
747 err = 0; 774 err = 0;
748 memcpy(value, ptr, value_size); 775 if (attr->flags & BPF_F_LOCK)
776 /* lock 'ptr' and copy everything but lock */
777 copy_map_value_locked(map, value, ptr, true);
778 else
779 copy_map_value(map, value, ptr);
780 /* mask lock, since value wasn't zero inited */
781 check_and_init_map_lock(map, value);
749 } 782 }
750 rcu_read_unlock(); 783 rcu_read_unlock();
751 } 784 }
@@ -808,6 +841,12 @@ static int map_update_elem(union bpf_attr *attr)
808 goto err_put; 841 goto err_put;
809 } 842 }
810 843
844 if ((attr->flags & BPF_F_LOCK) &&
845 !map_value_has_spin_lock(map)) {
846 err = -EINVAL;
847 goto err_put;
848 }
849
811 key = __bpf_copy_key(ukey, map->key_size); 850 key = __bpf_copy_key(ukey, map->key_size);
812 if (IS_ERR(key)) { 851 if (IS_ERR(key)) {
813 err = PTR_ERR(key); 852 err = PTR_ERR(key);
@@ -1244,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
1244 return 0; 1283 return 0;
1245} 1284}
1246 1285
1286static void bpf_prog_get_stats(const struct bpf_prog *prog,
1287 struct bpf_prog_stats *stats)
1288{
1289 u64 nsecs = 0, cnt = 0;
1290 int cpu;
1291
1292 for_each_possible_cpu(cpu) {
1293 const struct bpf_prog_stats *st;
1294 unsigned int start;
1295 u64 tnsecs, tcnt;
1296
1297 st = per_cpu_ptr(prog->aux->stats, cpu);
1298 do {
1299 start = u64_stats_fetch_begin_irq(&st->syncp);
1300 tnsecs = st->nsecs;
1301 tcnt = st->cnt;
1302 } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1303 nsecs += tnsecs;
1304 cnt += tcnt;
1305 }
1306 stats->nsecs = nsecs;
1307 stats->cnt = cnt;
1308}
1309
1247#ifdef CONFIG_PROC_FS 1310#ifdef CONFIG_PROC_FS
1248static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1311static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1249{ 1312{
1250 const struct bpf_prog *prog = filp->private_data; 1313 const struct bpf_prog *prog = filp->private_data;
1251 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 1314 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
1315 struct bpf_prog_stats stats;
1252 1316
1317 bpf_prog_get_stats(prog, &stats);
1253 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1318 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1254 seq_printf(m, 1319 seq_printf(m,
1255 "prog_type:\t%u\n" 1320 "prog_type:\t%u\n"
1256 "prog_jited:\t%u\n" 1321 "prog_jited:\t%u\n"
1257 "prog_tag:\t%s\n" 1322 "prog_tag:\t%s\n"
1258 "memlock:\t%llu\n" 1323 "memlock:\t%llu\n"
1259 "prog_id:\t%u\n", 1324 "prog_id:\t%u\n"
1325 "run_time_ns:\t%llu\n"
1326 "run_cnt:\t%llu\n",
1260 prog->type, 1327 prog->type,
1261 prog->jited, 1328 prog->jited,
1262 prog_tag, 1329 prog_tag,
1263 prog->pages * 1ULL << PAGE_SHIFT, 1330 prog->pages * 1ULL << PAGE_SHIFT,
1264 prog->aux->id); 1331 prog->aux->id,
1332 stats.nsecs,
1333 stats.cnt);
1265} 1334}
1266#endif 1335#endif
1267 1336
@@ -2083,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
2083 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 2152 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
2084 struct bpf_prog_info info = {}; 2153 struct bpf_prog_info info = {};
2085 u32 info_len = attr->info.info_len; 2154 u32 info_len = attr->info.info_len;
2155 struct bpf_prog_stats stats;
2086 char __user *uinsns; 2156 char __user *uinsns;
2087 u32 ulen; 2157 u32 ulen;
2088 int err; 2158 int err;
@@ -2122,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
2122 if (err) 2192 if (err)
2123 return err; 2193 return err;
2124 2194
2195 bpf_prog_get_stats(prog, &stats);
2196 info.run_time_ns = stats.nsecs;
2197 info.run_cnt = stats.cnt;
2198
2125 if (!capable(CAP_SYS_ADMIN)) { 2199 if (!capable(CAP_SYS_ADMIN)) {
2126 info.jited_prog_len = 0; 2200 info.jited_prog_len = 0;
2127 info.xlated_prog_len = 0; 2201 info.xlated_prog_len = 0;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fcce2f4209d..a7b96bf0e654 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -213,6 +213,7 @@ struct bpf_call_arg_meta {
213 s64 msize_smax_value; 213 s64 msize_smax_value;
214 u64 msize_umax_value; 214 u64 msize_umax_value;
215 int ptr_id; 215 int ptr_id;
216 int func_id;
216}; 217};
217 218
218static DEFINE_MUTEX(bpf_verifier_lock); 219static DEFINE_MUTEX(bpf_verifier_lock);
@@ -330,10 +331,19 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
330 type == PTR_TO_PACKET_META; 331 type == PTR_TO_PACKET_META;
331} 332}
332 333
334static bool type_is_sk_pointer(enum bpf_reg_type type)
335{
336 return type == PTR_TO_SOCKET ||
337 type == PTR_TO_SOCK_COMMON ||
338 type == PTR_TO_TCP_SOCK;
339}
340
333static bool reg_type_may_be_null(enum bpf_reg_type type) 341static bool reg_type_may_be_null(enum bpf_reg_type type)
334{ 342{
335 return type == PTR_TO_MAP_VALUE_OR_NULL || 343 return type == PTR_TO_MAP_VALUE_OR_NULL ||
336 type == PTR_TO_SOCKET_OR_NULL; 344 type == PTR_TO_SOCKET_OR_NULL ||
345 type == PTR_TO_SOCK_COMMON_OR_NULL ||
346 type == PTR_TO_TCP_SOCK_OR_NULL;
337} 347}
338 348
339static bool type_is_refcounted(enum bpf_reg_type type) 349static bool type_is_refcounted(enum bpf_reg_type type)
@@ -351,6 +361,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg)
351 return type_is_refcounted(reg->type); 361 return type_is_refcounted(reg->type);
352} 362}
353 363
364static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
365{
366 return reg->type == PTR_TO_MAP_VALUE &&
367 map_value_has_spin_lock(reg->map_ptr);
368}
369
354static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) 370static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
355{ 371{
356 return type_is_refcounted_or_null(reg->type); 372 return type_is_refcounted_or_null(reg->type);
@@ -370,6 +386,12 @@ static bool is_release_function(enum bpf_func_id func_id)
370 return func_id == BPF_FUNC_sk_release; 386 return func_id == BPF_FUNC_sk_release;
371} 387}
372 388
389static bool is_acquire_function(enum bpf_func_id func_id)
390{
391 return func_id == BPF_FUNC_sk_lookup_tcp ||
392 func_id == BPF_FUNC_sk_lookup_udp;
393}
394
373/* string representation of 'enum bpf_reg_type' */ 395/* string representation of 'enum bpf_reg_type' */
374static const char * const reg_type_str[] = { 396static const char * const reg_type_str[] = {
375 [NOT_INIT] = "?", 397 [NOT_INIT] = "?",
@@ -385,6 +407,10 @@ static const char * const reg_type_str[] = {
385 [PTR_TO_FLOW_KEYS] = "flow_keys", 407 [PTR_TO_FLOW_KEYS] = "flow_keys",
386 [PTR_TO_SOCKET] = "sock", 408 [PTR_TO_SOCKET] = "sock",
387 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 409 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
410 [PTR_TO_SOCK_COMMON] = "sock_common",
411 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
412 [PTR_TO_TCP_SOCK] = "tcp_sock",
413 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
388}; 414};
389 415
390static char slot_type_char[] = { 416static char slot_type_char[] = {
@@ -611,13 +637,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
611} 637}
612 638
613/* release function corresponding to acquire_reference_state(). Idempotent. */ 639/* release function corresponding to acquire_reference_state(). Idempotent. */
614static int __release_reference_state(struct bpf_func_state *state, int ptr_id) 640static int release_reference_state(struct bpf_func_state *state, int ptr_id)
615{ 641{
616 int i, last_idx; 642 int i, last_idx;
617 643
618 if (!ptr_id)
619 return -EFAULT;
620
621 last_idx = state->acquired_refs - 1; 644 last_idx = state->acquired_refs - 1;
622 for (i = 0; i < state->acquired_refs; i++) { 645 for (i = 0; i < state->acquired_refs; i++) {
623 if (state->refs[i].id == ptr_id) { 646 if (state->refs[i].id == ptr_id) {
@@ -629,21 +652,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
629 return 0; 652 return 0;
630 } 653 }
631 } 654 }
632 return -EFAULT; 655 return -EINVAL;
633}
634
635/* variation on the above for cases where we expect that there must be an
636 * outstanding reference for the specified ptr_id.
637 */
638static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
639{
640 struct bpf_func_state *state = cur_func(env);
641 int err;
642
643 err = __release_reference_state(state, ptr_id);
644 if (WARN_ON_ONCE(err != 0))
645 verbose(env, "verifier internal error: can't release reference\n");
646 return err;
647} 656}
648 657
649static int transfer_reference_state(struct bpf_func_state *dst, 658static int transfer_reference_state(struct bpf_func_state *dst,
@@ -712,6 +721,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
712 } 721 }
713 dst_state->speculative = src->speculative; 722 dst_state->speculative = src->speculative;
714 dst_state->curframe = src->curframe; 723 dst_state->curframe = src->curframe;
724 dst_state->active_spin_lock = src->active_spin_lock;
715 for (i = 0; i <= src->curframe; i++) { 725 for (i = 0; i <= src->curframe; i++) {
716 dst = dst_state->frame[i]; 726 dst = dst_state->frame[i];
717 if (!dst) { 727 if (!dst) {
@@ -1095,7 +1105,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
1095 for (i = 0; i < insn_cnt; i++) { 1105 for (i = 0; i < insn_cnt; i++) {
1096 u8 code = insn[i].code; 1106 u8 code = insn[i].code;
1097 1107
1098 if (BPF_CLASS(code) != BPF_JMP) 1108 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1099 goto next; 1109 goto next;
1100 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 1110 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1101 goto next; 1111 goto next;
@@ -1201,6 +1211,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
1201 case CONST_PTR_TO_MAP: 1211 case CONST_PTR_TO_MAP:
1202 case PTR_TO_SOCKET: 1212 case PTR_TO_SOCKET:
1203 case PTR_TO_SOCKET_OR_NULL: 1213 case PTR_TO_SOCKET_OR_NULL:
1214 case PTR_TO_SOCK_COMMON:
1215 case PTR_TO_SOCK_COMMON_OR_NULL:
1216 case PTR_TO_TCP_SOCK:
1217 case PTR_TO_TCP_SOCK_OR_NULL:
1204 return true; 1218 return true;
1205 default: 1219 default:
1206 return false; 1220 return false;
@@ -1483,6 +1497,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1483 if (err) 1497 if (err)
1484 verbose(env, "R%d max value is outside of the array range\n", 1498 verbose(env, "R%d max value is outside of the array range\n",
1485 regno); 1499 regno);
1500
1501 if (map_value_has_spin_lock(reg->map_ptr)) {
1502 u32 lock = reg->map_ptr->spin_lock_off;
1503
1504 /* if any part of struct bpf_spin_lock can be touched by
1505 * load/store reject this program.
1506 * To check that [x1, x2) overlaps with [y1, y2)
1507 * it is sufficient to check x1 < y2 && y1 < x2.
1508 */
1509 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1510 lock < reg->umax_value + off + size) {
1511 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1512 return -EACCES;
1513 }
1514 }
1486 return err; 1515 return err;
1487} 1516}
1488 1517
@@ -1624,6 +1653,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1624 struct bpf_reg_state *regs = cur_regs(env); 1653 struct bpf_reg_state *regs = cur_regs(env);
1625 struct bpf_reg_state *reg = &regs[regno]; 1654 struct bpf_reg_state *reg = &regs[regno];
1626 struct bpf_insn_access_aux info = {}; 1655 struct bpf_insn_access_aux info = {};
1656 bool valid;
1627 1657
1628 if (reg->smin_value < 0) { 1658 if (reg->smin_value < 0) {
1629 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 1659 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1631,15 +1661,31 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1631 return -EACCES; 1661 return -EACCES;
1632 } 1662 }
1633 1663
1634 if (!bpf_sock_is_valid_access(off, size, t, &info)) { 1664 switch (reg->type) {
1635 verbose(env, "invalid bpf_sock access off=%d size=%d\n", 1665 case PTR_TO_SOCK_COMMON:
1636 off, size); 1666 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1637 return -EACCES; 1667 break;
1668 case PTR_TO_SOCKET:
1669 valid = bpf_sock_is_valid_access(off, size, t, &info);
1670 break;
1671 case PTR_TO_TCP_SOCK:
1672 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1673 break;
1674 default:
1675 valid = false;
1638 } 1676 }
1639 1677
1640 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1641 1678
1642 return 0; 1679 if (valid) {
1680 env->insn_aux_data[insn_idx].ctx_field_size =
1681 info.ctx_field_size;
1682 return 0;
1683 }
1684
1685 verbose(env, "R%d invalid %s access off=%d size=%d\n",
1686 regno, reg_type_str[reg->type], off, size);
1687
1688 return -EACCES;
1643} 1689}
1644 1690
1645static bool __is_pointer_value(bool allow_ptr_leaks, 1691static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1665,8 +1711,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1665{ 1711{
1666 const struct bpf_reg_state *reg = reg_state(env, regno); 1712 const struct bpf_reg_state *reg = reg_state(env, regno);
1667 1713
1668 return reg->type == PTR_TO_CTX || 1714 return reg->type == PTR_TO_CTX;
1669 reg->type == PTR_TO_SOCKET; 1715}
1716
1717static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1718{
1719 const struct bpf_reg_state *reg = reg_state(env, regno);
1720
1721 return type_is_sk_pointer(reg->type);
1670} 1722}
1671 1723
1672static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 1724static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1777,6 +1829,12 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1777 case PTR_TO_SOCKET: 1829 case PTR_TO_SOCKET:
1778 pointer_desc = "sock "; 1830 pointer_desc = "sock ";
1779 break; 1831 break;
1832 case PTR_TO_SOCK_COMMON:
1833 pointer_desc = "sock_common ";
1834 break;
1835 case PTR_TO_TCP_SOCK:
1836 pointer_desc = "tcp_sock ";
1837 break;
1780 default: 1838 default:
1781 break; 1839 break;
1782 } 1840 }
@@ -1980,11 +2038,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1980 * PTR_TO_PACKET[_META,_END]. In the latter 2038 * PTR_TO_PACKET[_META,_END]. In the latter
1981 * case, we know the offset is zero. 2039 * case, we know the offset is zero.
1982 */ 2040 */
1983 if (reg_type == SCALAR_VALUE) 2041 if (reg_type == SCALAR_VALUE) {
1984 mark_reg_unknown(env, regs, value_regno); 2042 mark_reg_unknown(env, regs, value_regno);
1985 else 2043 } else {
1986 mark_reg_known_zero(env, regs, 2044 mark_reg_known_zero(env, regs,
1987 value_regno); 2045 value_regno);
2046 if (reg_type_may_be_null(reg_type))
2047 regs[value_regno].id = ++env->id_gen;
2048 }
1988 regs[value_regno].type = reg_type; 2049 regs[value_regno].type = reg_type;
1989 } 2050 }
1990 2051
@@ -2030,9 +2091,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2030 err = check_flow_keys_access(env, off, size); 2091 err = check_flow_keys_access(env, off, size);
2031 if (!err && t == BPF_READ && value_regno >= 0) 2092 if (!err && t == BPF_READ && value_regno >= 0)
2032 mark_reg_unknown(env, regs, value_regno); 2093 mark_reg_unknown(env, regs, value_regno);
2033 } else if (reg->type == PTR_TO_SOCKET) { 2094 } else if (type_is_sk_pointer(reg->type)) {
2034 if (t == BPF_WRITE) { 2095 if (t == BPF_WRITE) {
2035 verbose(env, "cannot write into socket\n"); 2096 verbose(env, "R%d cannot write into %s\n",
2097 regno, reg_type_str[reg->type]);
2036 return -EACCES; 2098 return -EACCES;
2037 } 2099 }
2038 err = check_sock_access(env, insn_idx, regno, off, size, t); 2100 err = check_sock_access(env, insn_idx, regno, off, size, t);
@@ -2079,7 +2141,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
2079 2141
2080 if (is_ctx_reg(env, insn->dst_reg) || 2142 if (is_ctx_reg(env, insn->dst_reg) ||
2081 is_pkt_reg(env, insn->dst_reg) || 2143 is_pkt_reg(env, insn->dst_reg) ||
2082 is_flow_key_reg(env, insn->dst_reg)) { 2144 is_flow_key_reg(env, insn->dst_reg) ||
2145 is_sk_reg(env, insn->dst_reg)) {
2083 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 2146 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2084 insn->dst_reg, 2147 insn->dst_reg,
2085 reg_type_str[reg_state(env, insn->dst_reg)->type]); 2148 reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2195,6 +2258,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2195 } 2258 }
2196} 2259}
2197 2260
2261/* Implementation details:
2262 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2263 * Two bpf_map_lookups (even with the same key) will have different reg->id.
2264 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2265 * value_or_null->value transition, since the verifier only cares about
2266 * the range of access to valid map value pointer and doesn't care about actual
2267 * address of the map element.
2268 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2269 * reg->id > 0 after value_or_null->value transition. By doing so
2270 * two bpf_map_lookups will be considered two different pointers that
2271 * point to different bpf_spin_locks.
2272 * The verifier allows taking only one bpf_spin_lock at a time to avoid
2273 * dead-locks.
2274 * Since only one bpf_spin_lock is allowed the checks are simpler than
2275 * reg_is_refcounted() logic. The verifier needs to remember only
2276 * one spin_lock instead of array of acquired_refs.
2277 * cur_state->active_spin_lock remembers which map value element got locked
2278 * and clears it after bpf_spin_unlock.
2279 */
2280static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2281 bool is_lock)
2282{
2283 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2284 struct bpf_verifier_state *cur = env->cur_state;
2285 bool is_const = tnum_is_const(reg->var_off);
2286 struct bpf_map *map = reg->map_ptr;
2287 u64 val = reg->var_off.value;
2288
2289 if (reg->type != PTR_TO_MAP_VALUE) {
2290 verbose(env, "R%d is not a pointer to map_value\n", regno);
2291 return -EINVAL;
2292 }
2293 if (!is_const) {
2294 verbose(env,
2295 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2296 regno);
2297 return -EINVAL;
2298 }
2299 if (!map->btf) {
2300 verbose(env,
2301 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
2302 map->name);
2303 return -EINVAL;
2304 }
2305 if (!map_value_has_spin_lock(map)) {
2306 if (map->spin_lock_off == -E2BIG)
2307 verbose(env,
2308 "map '%s' has more than one 'struct bpf_spin_lock'\n",
2309 map->name);
2310 else if (map->spin_lock_off == -ENOENT)
2311 verbose(env,
2312 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
2313 map->name);
2314 else
2315 verbose(env,
2316 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2317 map->name);
2318 return -EINVAL;
2319 }
2320 if (map->spin_lock_off != val + reg->off) {
2321 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2322 val + reg->off);
2323 return -EINVAL;
2324 }
2325 if (is_lock) {
2326 if (cur->active_spin_lock) {
2327 verbose(env,
2328 "Locking two bpf_spin_locks are not allowed\n");
2329 return -EINVAL;
2330 }
2331 cur->active_spin_lock = reg->id;
2332 } else {
2333 if (!cur->active_spin_lock) {
2334 verbose(env, "bpf_spin_unlock without taking a lock\n");
2335 return -EINVAL;
2336 }
2337 if (cur->active_spin_lock != reg->id) {
2338 verbose(env, "bpf_spin_unlock of different lock\n");
2339 return -EINVAL;
2340 }
2341 cur->active_spin_lock = 0;
2342 }
2343 return 0;
2344}
2345
2198static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 2346static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2199{ 2347{
2200 return type == ARG_PTR_TO_MEM || 2348 return type == ARG_PTR_TO_MEM ||
@@ -2261,6 +2409,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2261 err = check_ctx_reg(env, reg, regno); 2409 err = check_ctx_reg(env, reg, regno);
2262 if (err < 0) 2410 if (err < 0)
2263 return err; 2411 return err;
2412 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2413 expected_type = PTR_TO_SOCK_COMMON;
2414 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2415 if (!type_is_sk_pointer(type))
2416 goto err_type;
2264 } else if (arg_type == ARG_PTR_TO_SOCKET) { 2417 } else if (arg_type == ARG_PTR_TO_SOCKET) {
2265 expected_type = PTR_TO_SOCKET; 2418 expected_type = PTR_TO_SOCKET;
2266 if (type != expected_type) 2419 if (type != expected_type)
@@ -2271,6 +2424,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2271 return -EFAULT; 2424 return -EFAULT;
2272 } 2425 }
2273 meta->ptr_id = reg->id; 2426 meta->ptr_id = reg->id;
2427 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2428 if (meta->func_id == BPF_FUNC_spin_lock) {
2429 if (process_spin_lock(env, regno, true))
2430 return -EACCES;
2431 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
2432 if (process_spin_lock(env, regno, false))
2433 return -EACCES;
2434 } else {
2435 verbose(env, "verifier internal error\n");
2436 return -EFAULT;
2437 }
2274 } else if (arg_type_is_mem_ptr(arg_type)) { 2438 } else if (arg_type_is_mem_ptr(arg_type)) {
2275 expected_type = PTR_TO_STACK; 2439 expected_type = PTR_TO_STACK;
2276 /* One exception here. In case function allows for NULL to be 2440 /* One exception here. In case function allows for NULL to be
@@ -2664,7 +2828,7 @@ static int release_reference(struct bpf_verifier_env *env,
2664 for (i = 0; i <= vstate->curframe; i++) 2828 for (i = 0; i <= vstate->curframe; i++)
2665 release_reg_references(env, vstate->frame[i], meta->ptr_id); 2829 release_reg_references(env, vstate->frame[i], meta->ptr_id);
2666 2830
2667 return release_reference_state(env, meta->ptr_id); 2831 return release_reference_state(cur_func(env), meta->ptr_id);
2668} 2832}
2669 2833
2670static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 2834static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -2890,6 +3054,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2890 return err; 3054 return err;
2891 } 3055 }
2892 3056
3057 meta.func_id = func_id;
2893 /* check args */ 3058 /* check args */
2894 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); 3059 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2895 if (err) 3060 if (err)
@@ -2929,8 +3094,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2929 } 3094 }
2930 } else if (is_release_function(func_id)) { 3095 } else if (is_release_function(func_id)) {
2931 err = release_reference(env, &meta); 3096 err = release_reference(env, &meta);
2932 if (err) 3097 if (err) {
3098 verbose(env, "func %s#%d reference has not been acquired before\n",
3099 func_id_name(func_id), func_id);
2933 return err; 3100 return err;
3101 }
2934 } 3102 }
2935 3103
2936 regs = cur_regs(env); 3104 regs = cur_regs(env);
@@ -2972,17 +3140,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
2972 regs[BPF_REG_0].map_ptr = meta.map_ptr; 3140 regs[BPF_REG_0].map_ptr = meta.map_ptr;
2973 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { 3141 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
2974 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; 3142 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
3143 if (map_value_has_spin_lock(meta.map_ptr))
3144 regs[BPF_REG_0].id = ++env->id_gen;
2975 } else { 3145 } else {
2976 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; 3146 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2977 regs[BPF_REG_0].id = ++env->id_gen; 3147 regs[BPF_REG_0].id = ++env->id_gen;
2978 } 3148 }
2979 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 3149 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
2980 int id = acquire_reference_state(env, insn_idx);
2981 if (id < 0)
2982 return id;
2983 mark_reg_known_zero(env, regs, BPF_REG_0); 3150 mark_reg_known_zero(env, regs, BPF_REG_0);
2984 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 3151 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
2985 regs[BPF_REG_0].id = id; 3152 if (is_acquire_function(func_id)) {
3153 int id = acquire_reference_state(env, insn_idx);
3154
3155 if (id < 0)
3156 return id;
3157 /* For release_reference() */
3158 regs[BPF_REG_0].id = id;
3159 } else {
3160 /* For mark_ptr_or_null_reg() */
3161 regs[BPF_REG_0].id = ++env->id_gen;
3162 }
3163 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3164 mark_reg_known_zero(env, regs, BPF_REG_0);
3165 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3166 regs[BPF_REG_0].id = ++env->id_gen;
2986 } else { 3167 } else {
2987 verbose(env, "unknown return type %d of func %s#%d\n", 3168 verbose(env, "unknown return type %d of func %s#%d\n",
2988 fn->ret_type, func_id_name(func_id), func_id); 3169 fn->ret_type, func_id_name(func_id), func_id);
@@ -3242,6 +3423,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3242 case PTR_TO_PACKET_END: 3423 case PTR_TO_PACKET_END:
3243 case PTR_TO_SOCKET: 3424 case PTR_TO_SOCKET:
3244 case PTR_TO_SOCKET_OR_NULL: 3425 case PTR_TO_SOCKET_OR_NULL:
3426 case PTR_TO_SOCK_COMMON:
3427 case PTR_TO_SOCK_COMMON_OR_NULL:
3428 case PTR_TO_TCP_SOCK:
3429 case PTR_TO_TCP_SOCK_OR_NULL:
3245 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 3430 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3246 dst, reg_type_str[ptr_reg->type]); 3431 dst, reg_type_str[ptr_reg->type]);
3247 return -EACCES; 3432 return -EACCES;
@@ -4034,11 +4219,50 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
4034 * 0 - branch will not be taken and fall-through to next insn 4219 * 0 - branch will not be taken and fall-through to next insn
4035 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] 4220 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4036 */ 4221 */
4037static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) 4222static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4223 bool is_jmp32)
4038{ 4224{
4225 struct bpf_reg_state reg_lo;
4226 s64 sval;
4227
4039 if (__is_pointer_value(false, reg)) 4228 if (__is_pointer_value(false, reg))
4040 return -1; 4229 return -1;
4041 4230
4231 if (is_jmp32) {
4232 reg_lo = *reg;
4233 reg = &reg_lo;
4234 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4235 * could truncate high bits and update umin/umax according to
4236 * information of low bits.
4237 */
4238 coerce_reg_to_size(reg, 4);
4239 /* smin/smax need special handling. For example, after coerce,
4240 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4241 * used as operand to JMP32. It is a negative number from s32's
4242 * point of view, while it is a positive number when seen as
4243 * s64. The smin/smax are kept as s64, therefore, when used with
4244 * JMP32, they need to be transformed into s32, then sign
4245 * extended back to s64.
4246 *
4247 * Also, smin/smax were copied from umin/umax. If umin/umax has
4248 * different sign bit, then min/max relationship doesn't
4249 * maintain after casting into s32, for this case, set smin/smax
4250 * to safest range.
4251 */
4252 if ((reg->umax_value ^ reg->umin_value) &
4253 (1ULL << 31)) {
4254 reg->smin_value = S32_MIN;
4255 reg->smax_value = S32_MAX;
4256 }
4257 reg->smin_value = (s64)(s32)reg->smin_value;
4258 reg->smax_value = (s64)(s32)reg->smax_value;
4259
4260 val = (u32)val;
4261 sval = (s64)(s32)val;
4262 } else {
4263 sval = (s64)val;
4264 }
4265
4042 switch (opcode) { 4266 switch (opcode) {
4043 case BPF_JEQ: 4267 case BPF_JEQ:
4044 if (tnum_is_const(reg->var_off)) 4268 if (tnum_is_const(reg->var_off))
@@ -4061,9 +4285,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4061 return 0; 4285 return 0;
4062 break; 4286 break;
4063 case BPF_JSGT: 4287 case BPF_JSGT:
4064 if (reg->smin_value > (s64)val) 4288 if (reg->smin_value > sval)
4065 return 1; 4289 return 1;
4066 else if (reg->smax_value < (s64)val) 4290 else if (reg->smax_value < sval)
4067 return 0; 4291 return 0;
4068 break; 4292 break;
4069 case BPF_JLT: 4293 case BPF_JLT:
@@ -4073,9 +4297,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4073 return 0; 4297 return 0;
4074 break; 4298 break;
4075 case BPF_JSLT: 4299 case BPF_JSLT:
4076 if (reg->smax_value < (s64)val) 4300 if (reg->smax_value < sval)
4077 return 1; 4301 return 1;
4078 else if (reg->smin_value >= (s64)val) 4302 else if (reg->smin_value >= sval)
4079 return 0; 4303 return 0;
4080 break; 4304 break;
4081 case BPF_JGE: 4305 case BPF_JGE:
@@ -4085,9 +4309,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4085 return 0; 4309 return 0;
4086 break; 4310 break;
4087 case BPF_JSGE: 4311 case BPF_JSGE:
4088 if (reg->smin_value >= (s64)val) 4312 if (reg->smin_value >= sval)
4089 return 1; 4313 return 1;
4090 else if (reg->smax_value < (s64)val) 4314 else if (reg->smax_value < sval)
4091 return 0; 4315 return 0;
4092 break; 4316 break;
4093 case BPF_JLE: 4317 case BPF_JLE:
@@ -4097,9 +4321,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4097 return 0; 4321 return 0;
4098 break; 4322 break;
4099 case BPF_JSLE: 4323 case BPF_JSLE:
4100 if (reg->smax_value <= (s64)val) 4324 if (reg->smax_value <= sval)
4101 return 1; 4325 return 1;
4102 else if (reg->smin_value > (s64)val) 4326 else if (reg->smin_value > sval)
4103 return 0; 4327 return 0;
4104 break; 4328 break;
4105 } 4329 }
@@ -4107,6 +4331,29 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4107 return -1; 4331 return -1;
4108} 4332}
4109 4333
4334/* Generate min value of the high 32-bit from TNUM info. */
4335static u64 gen_hi_min(struct tnum var)
4336{
4337 return var.value & ~0xffffffffULL;
4338}
4339
4340/* Generate max value of the high 32-bit from TNUM info. */
4341static u64 gen_hi_max(struct tnum var)
4342{
4343 return (var.value | var.mask) & ~0xffffffffULL;
4344}
4345
4346/* Return true if VAL is compared with a s64 sign extended from s32, and they
4347 * are with the same signedness.
4348 */
4349static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4350{
4351 return ((s32)sval >= 0 &&
4352 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4353 ((s32)sval < 0 &&
4354 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4355}
4356
4110/* Adjusts the register min/max values in the case that the dst_reg is the 4357/* Adjusts the register min/max values in the case that the dst_reg is the
4111 * variable register that we are working on, and src_reg is a constant or we're 4358 * variable register that we are working on, and src_reg is a constant or we're
4112 * simply doing a BPF_K check. 4359 * simply doing a BPF_K check.
@@ -4114,8 +4361,10 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4114 */ 4361 */
4115static void reg_set_min_max(struct bpf_reg_state *true_reg, 4362static void reg_set_min_max(struct bpf_reg_state *true_reg,
4116 struct bpf_reg_state *false_reg, u64 val, 4363 struct bpf_reg_state *false_reg, u64 val,
4117 u8 opcode) 4364 u8 opcode, bool is_jmp32)
4118{ 4365{
4366 s64 sval;
4367
4119 /* If the dst_reg is a pointer, we can't learn anything about its 4368 /* If the dst_reg is a pointer, we can't learn anything about its
4120 * variable offset from the compare (unless src_reg were a pointer into 4369 * variable offset from the compare (unless src_reg were a pointer into
4121 * the same object, but we don't bother with that. 4370 * the same object, but we don't bother with that.
@@ -4125,19 +4374,31 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4125 if (__is_pointer_value(false, false_reg)) 4374 if (__is_pointer_value(false, false_reg))
4126 return; 4375 return;
4127 4376
4377 val = is_jmp32 ? (u32)val : val;
4378 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4379
4128 switch (opcode) { 4380 switch (opcode) {
4129 case BPF_JEQ: 4381 case BPF_JEQ:
4130 /* If this is false then we know nothing Jon Snow, but if it is
4131 * true then we know for sure.
4132 */
4133 __mark_reg_known(true_reg, val);
4134 break;
4135 case BPF_JNE: 4382 case BPF_JNE:
4136 /* If this is true we know nothing Jon Snow, but if it is false 4383 {
4137 * we know the value for sure; 4384 struct bpf_reg_state *reg =
4385 opcode == BPF_JEQ ? true_reg : false_reg;
4386
4387 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4388 * if it is true we know the value for sure. Likewise for
4389 * BPF_JNE.
4138 */ 4390 */
4139 __mark_reg_known(false_reg, val); 4391 if (is_jmp32) {
4392 u64 old_v = reg->var_off.value;
4393 u64 hi_mask = ~0xffffffffULL;
4394
4395 reg->var_off.value = (old_v & hi_mask) | val;
4396 reg->var_off.mask &= hi_mask;
4397 } else {
4398 __mark_reg_known(reg, val);
4399 }
4140 break; 4400 break;
4401 }
4141 case BPF_JSET: 4402 case BPF_JSET:
4142 false_reg->var_off = tnum_and(false_reg->var_off, 4403 false_reg->var_off = tnum_and(false_reg->var_off,
4143 tnum_const(~val)); 4404 tnum_const(~val));
@@ -4145,38 +4406,61 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4145 true_reg->var_off = tnum_or(true_reg->var_off, 4406 true_reg->var_off = tnum_or(true_reg->var_off,
4146 tnum_const(val)); 4407 tnum_const(val));
4147 break; 4408 break;
4148 case BPF_JGT:
4149 false_reg->umax_value = min(false_reg->umax_value, val);
4150 true_reg->umin_value = max(true_reg->umin_value, val + 1);
4151 break;
4152 case BPF_JSGT:
4153 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
4154 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
4155 break;
4156 case BPF_JLT:
4157 false_reg->umin_value = max(false_reg->umin_value, val);
4158 true_reg->umax_value = min(true_reg->umax_value, val - 1);
4159 break;
4160 case BPF_JSLT:
4161 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
4162 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
4163 break;
4164 case BPF_JGE: 4409 case BPF_JGE:
4165 false_reg->umax_value = min(false_reg->umax_value, val - 1); 4410 case BPF_JGT:
4166 true_reg->umin_value = max(true_reg->umin_value, val); 4411 {
4412 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
4413 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4414
4415 if (is_jmp32) {
4416 false_umax += gen_hi_max(false_reg->var_off);
4417 true_umin += gen_hi_min(true_reg->var_off);
4418 }
4419 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4420 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4167 break; 4421 break;
4422 }
4168 case BPF_JSGE: 4423 case BPF_JSGE:
4169 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); 4424 case BPF_JSGT:
4170 true_reg->smin_value = max_t(s64, true_reg->smin_value, val); 4425 {
4426 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
4427 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4428
4429 /* If the full s64 was not sign-extended from s32 then don't
4430 * deduct further info.
4431 */
4432 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4433 break;
4434 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4435 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4171 break; 4436 break;
4437 }
4172 case BPF_JLE: 4438 case BPF_JLE:
4173 false_reg->umin_value = max(false_reg->umin_value, val + 1); 4439 case BPF_JLT:
4174 true_reg->umax_value = min(true_reg->umax_value, val); 4440 {
4441 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
4442 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4443
4444 if (is_jmp32) {
4445 false_umin += gen_hi_min(false_reg->var_off);
4446 true_umax += gen_hi_max(true_reg->var_off);
4447 }
4448 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4449 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4175 break; 4450 break;
4451 }
4176 case BPF_JSLE: 4452 case BPF_JSLE:
4177 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); 4453 case BPF_JSLT:
4178 true_reg->smax_value = min_t(s64, true_reg->smax_value, val); 4454 {
4455 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
4456 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4457
4458 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4459 break;
4460 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4461 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4179 break; 4462 break;
4463 }
4180 default: 4464 default:
4181 break; 4465 break;
4182 } 4466 }
@@ -4199,24 +4483,34 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4199 */ 4483 */
4200static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, 4484static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4201 struct bpf_reg_state *false_reg, u64 val, 4485 struct bpf_reg_state *false_reg, u64 val,
4202 u8 opcode) 4486 u8 opcode, bool is_jmp32)
4203{ 4487{
4488 s64 sval;
4489
4204 if (__is_pointer_value(false, false_reg)) 4490 if (__is_pointer_value(false, false_reg))
4205 return; 4491 return;
4206 4492
4493 val = is_jmp32 ? (u32)val : val;
4494 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4495
4207 switch (opcode) { 4496 switch (opcode) {
4208 case BPF_JEQ: 4497 case BPF_JEQ:
4209 /* If this is false then we know nothing Jon Snow, but if it is
4210 * true then we know for sure.
4211 */
4212 __mark_reg_known(true_reg, val);
4213 break;
4214 case BPF_JNE: 4498 case BPF_JNE:
4215 /* If this is true we know nothing Jon Snow, but if it is false 4499 {
4216 * we know the value for sure; 4500 struct bpf_reg_state *reg =
4217 */ 4501 opcode == BPF_JEQ ? true_reg : false_reg;
4218 __mark_reg_known(false_reg, val); 4502
4503 if (is_jmp32) {
4504 u64 old_v = reg->var_off.value;
4505 u64 hi_mask = ~0xffffffffULL;
4506
4507 reg->var_off.value = (old_v & hi_mask) | val;
4508 reg->var_off.mask &= hi_mask;
4509 } else {
4510 __mark_reg_known(reg, val);
4511 }
4219 break; 4512 break;
4513 }
4220 case BPF_JSET: 4514 case BPF_JSET:
4221 false_reg->var_off = tnum_and(false_reg->var_off, 4515 false_reg->var_off = tnum_and(false_reg->var_off,
4222 tnum_const(~val)); 4516 tnum_const(~val));
@@ -4224,38 +4518,58 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4224 true_reg->var_off = tnum_or(true_reg->var_off, 4518 true_reg->var_off = tnum_or(true_reg->var_off,
4225 tnum_const(val)); 4519 tnum_const(val));
4226 break; 4520 break;
4227 case BPF_JGT:
4228 true_reg->umax_value = min(true_reg->umax_value, val - 1);
4229 false_reg->umin_value = max(false_reg->umin_value, val);
4230 break;
4231 case BPF_JSGT:
4232 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
4233 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
4234 break;
4235 case BPF_JLT:
4236 true_reg->umin_value = max(true_reg->umin_value, val + 1);
4237 false_reg->umax_value = min(false_reg->umax_value, val);
4238 break;
4239 case BPF_JSLT:
4240 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
4241 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
4242 break;
4243 case BPF_JGE: 4521 case BPF_JGE:
4244 true_reg->umax_value = min(true_reg->umax_value, val); 4522 case BPF_JGT:
4245 false_reg->umin_value = max(false_reg->umin_value, val + 1); 4523 {
4524 u64 false_umin = opcode == BPF_JGT ? val : val + 1;
4525 u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4526
4527 if (is_jmp32) {
4528 false_umin += gen_hi_min(false_reg->var_off);
4529 true_umax += gen_hi_max(true_reg->var_off);
4530 }
4531 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4532 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4246 break; 4533 break;
4534 }
4247 case BPF_JSGE: 4535 case BPF_JSGE:
4248 true_reg->smax_value = min_t(s64, true_reg->smax_value, val); 4536 case BPF_JSGT:
4249 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); 4537 {
4538 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1;
4539 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4540
4541 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4542 break;
4543 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4544 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4250 break; 4545 break;
4546 }
4251 case BPF_JLE: 4547 case BPF_JLE:
4252 true_reg->umin_value = max(true_reg->umin_value, val); 4548 case BPF_JLT:
4253 false_reg->umax_value = min(false_reg->umax_value, val - 1); 4549 {
4550 u64 false_umax = opcode == BPF_JLT ? val : val - 1;
4551 u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4552
4553 if (is_jmp32) {
4554 false_umax += gen_hi_max(false_reg->var_off);
4555 true_umin += gen_hi_min(true_reg->var_off);
4556 }
4557 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4558 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4254 break; 4559 break;
4560 }
4255 case BPF_JSLE: 4561 case BPF_JSLE:
4256 true_reg->smin_value = max_t(s64, true_reg->smin_value, val); 4562 case BPF_JSLT:
4257 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); 4563 {
4564 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1;
4565 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4566
4567 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4568 break;
4569 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4570 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4258 break; 4571 break;
4572 }
4259 default: 4573 default:
4260 break; 4574 break;
4261 } 4575 }
@@ -4346,8 +4660,13 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4346 } 4660 }
4347 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 4661 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4348 reg->type = PTR_TO_SOCKET; 4662 reg->type = PTR_TO_SOCKET;
4663 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4664 reg->type = PTR_TO_SOCK_COMMON;
4665 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4666 reg->type = PTR_TO_TCP_SOCK;
4349 } 4667 }
4350 if (is_null || !reg_is_refcounted(reg)) { 4668 if (is_null || !(reg_is_refcounted(reg) ||
4669 reg_may_point_to_spin_lock(reg))) {
4351 /* We don't need id from this point onwards anymore, 4670 /* We don't need id from this point onwards anymore,
4352 * thus we should better reset it, so that state 4671 * thus we should better reset it, so that state
4353 * pruning has chances to take effect. 4672 * pruning has chances to take effect.
@@ -4369,7 +4688,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4369 int i, j; 4688 int i, j;
4370 4689
4371 if (reg_is_refcounted_or_null(&regs[regno]) && is_null) 4690 if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4372 __release_reference_state(state, id); 4691 release_reference_state(state, id);
4373 4692
4374 for (i = 0; i < MAX_BPF_REG; i++) 4693 for (i = 0; i < MAX_BPF_REG; i++)
4375 mark_ptr_or_null_reg(state, &regs[i], id, is_null); 4694 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -4393,6 +4712,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4393 if (BPF_SRC(insn->code) != BPF_X) 4712 if (BPF_SRC(insn->code) != BPF_X)
4394 return false; 4713 return false;
4395 4714
4715 /* Pointers are always 64-bit. */
4716 if (BPF_CLASS(insn->code) == BPF_JMP32)
4717 return false;
4718
4396 switch (BPF_OP(insn->code)) { 4719 switch (BPF_OP(insn->code)) {
4397 case BPF_JGT: 4720 case BPF_JGT:
4398 if ((dst_reg->type == PTR_TO_PACKET && 4721 if ((dst_reg->type == PTR_TO_PACKET &&
@@ -4485,16 +4808,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4485 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 4808 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4486 struct bpf_reg_state *dst_reg, *other_branch_regs; 4809 struct bpf_reg_state *dst_reg, *other_branch_regs;
4487 u8 opcode = BPF_OP(insn->code); 4810 u8 opcode = BPF_OP(insn->code);
4811 bool is_jmp32;
4488 int err; 4812 int err;
4489 4813
4490 if (opcode > BPF_JSLE) { 4814 /* Only conditional jumps are expected to reach here. */
4491 verbose(env, "invalid BPF_JMP opcode %x\n", opcode); 4815 if (opcode == BPF_JA || opcode > BPF_JSLE) {
4816 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
4492 return -EINVAL; 4817 return -EINVAL;
4493 } 4818 }
4494 4819
4495 if (BPF_SRC(insn->code) == BPF_X) { 4820 if (BPF_SRC(insn->code) == BPF_X) {
4496 if (insn->imm != 0) { 4821 if (insn->imm != 0) {
4497 verbose(env, "BPF_JMP uses reserved fields\n"); 4822 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4498 return -EINVAL; 4823 return -EINVAL;
4499 } 4824 }
4500 4825
@@ -4510,7 +4835,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4510 } 4835 }
4511 } else { 4836 } else {
4512 if (insn->src_reg != BPF_REG_0) { 4837 if (insn->src_reg != BPF_REG_0) {
4513 verbose(env, "BPF_JMP uses reserved fields\n"); 4838 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4514 return -EINVAL; 4839 return -EINVAL;
4515 } 4840 }
4516 } 4841 }
@@ -4521,9 +4846,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4521 return err; 4846 return err;
4522 4847
4523 dst_reg = &regs[insn->dst_reg]; 4848 dst_reg = &regs[insn->dst_reg];
4849 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
4524 4850
4525 if (BPF_SRC(insn->code) == BPF_K) { 4851 if (BPF_SRC(insn->code) == BPF_K) {
4526 int pred = is_branch_taken(dst_reg, insn->imm, opcode); 4852 int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4853 is_jmp32);
4527 4854
4528 if (pred == 1) { 4855 if (pred == 1) {
4529 /* only follow the goto, ignore fall-through */ 4856 /* only follow the goto, ignore fall-through */
@@ -4551,30 +4878,51 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4551 * comparable. 4878 * comparable.
4552 */ 4879 */
4553 if (BPF_SRC(insn->code) == BPF_X) { 4880 if (BPF_SRC(insn->code) == BPF_X) {
4881 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4882 struct bpf_reg_state lo_reg0 = *dst_reg;
4883 struct bpf_reg_state lo_reg1 = *src_reg;
4884 struct bpf_reg_state *src_lo, *dst_lo;
4885
4886 dst_lo = &lo_reg0;
4887 src_lo = &lo_reg1;
4888 coerce_reg_to_size(dst_lo, 4);
4889 coerce_reg_to_size(src_lo, 4);
4890
4554 if (dst_reg->type == SCALAR_VALUE && 4891 if (dst_reg->type == SCALAR_VALUE &&
4555 regs[insn->src_reg].type == SCALAR_VALUE) { 4892 src_reg->type == SCALAR_VALUE) {
4556 if (tnum_is_const(regs[insn->src_reg].var_off)) 4893 if (tnum_is_const(src_reg->var_off) ||
4894 (is_jmp32 && tnum_is_const(src_lo->var_off)))
4557 reg_set_min_max(&other_branch_regs[insn->dst_reg], 4895 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4558 dst_reg, regs[insn->src_reg].var_off.value, 4896 dst_reg,
4559 opcode); 4897 is_jmp32
4560 else if (tnum_is_const(dst_reg->var_off)) 4898 ? src_lo->var_off.value
4899 : src_reg->var_off.value,
4900 opcode, is_jmp32);
4901 else if (tnum_is_const(dst_reg->var_off) ||
4902 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
4561 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], 4903 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
4562 &regs[insn->src_reg], 4904 src_reg,
4563 dst_reg->var_off.value, opcode); 4905 is_jmp32
4564 else if (opcode == BPF_JEQ || opcode == BPF_JNE) 4906 ? dst_lo->var_off.value
4907 : dst_reg->var_off.value,
4908 opcode, is_jmp32);
4909 else if (!is_jmp32 &&
4910 (opcode == BPF_JEQ || opcode == BPF_JNE))
4565 /* Comparing for equality, we can combine knowledge */ 4911 /* Comparing for equality, we can combine knowledge */
4566 reg_combine_min_max(&other_branch_regs[insn->src_reg], 4912 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4567 &other_branch_regs[insn->dst_reg], 4913 &other_branch_regs[insn->dst_reg],
4568 &regs[insn->src_reg], 4914 src_reg, dst_reg, opcode);
4569 &regs[insn->dst_reg], opcode);
4570 } 4915 }
4571 } else if (dst_reg->type == SCALAR_VALUE) { 4916 } else if (dst_reg->type == SCALAR_VALUE) {
4572 reg_set_min_max(&other_branch_regs[insn->dst_reg], 4917 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4573 dst_reg, insn->imm, opcode); 4918 dst_reg, insn->imm, opcode, is_jmp32);
4574 } 4919 }
4575 4920
4576 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ 4921 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4577 if (BPF_SRC(insn->code) == BPF_K && 4922 * NOTE: these optimizations below are related with pointer comparison
4923 * which will never be JMP32.
4924 */
4925 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
4578 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 4926 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
4579 reg_type_may_be_null(dst_reg->type)) { 4927 reg_type_may_be_null(dst_reg->type)) {
4580 /* Mark all identical registers in each branch as either 4928 /* Mark all identical registers in each branch as either
@@ -4716,6 +5064,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
4716 return err; 5064 return err;
4717 } 5065 }
4718 5066
5067 if (env->cur_state->active_spin_lock) {
5068 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
5069 return -EINVAL;
5070 }
5071
4719 if (regs[BPF_REG_6].type != PTR_TO_CTX) { 5072 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
4720 verbose(env, 5073 verbose(env,
4721 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 5074 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -4903,7 +5256,8 @@ peek_stack:
4903 goto check_state; 5256 goto check_state;
4904 t = insn_stack[cur_stack - 1]; 5257 t = insn_stack[cur_stack - 1];
4905 5258
4906 if (BPF_CLASS(insns[t].code) == BPF_JMP) { 5259 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5260 BPF_CLASS(insns[t].code) == BPF_JMP32) {
4907 u8 opcode = BPF_OP(insns[t].code); 5261 u8 opcode = BPF_OP(insns[t].code);
4908 5262
4909 if (opcode == BPF_EXIT) { 5263 if (opcode == BPF_EXIT) {
@@ -5000,13 +5354,14 @@ static int check_btf_func(struct bpf_verifier_env *env,
5000 const union bpf_attr *attr, 5354 const union bpf_attr *attr,
5001 union bpf_attr __user *uattr) 5355 union bpf_attr __user *uattr)
5002{ 5356{
5003 u32 i, nfuncs, urec_size, min_size, prev_offset; 5357 u32 i, nfuncs, urec_size, min_size;
5004 u32 krec_size = sizeof(struct bpf_func_info); 5358 u32 krec_size = sizeof(struct bpf_func_info);
5005 struct bpf_func_info *krecord; 5359 struct bpf_func_info *krecord;
5006 const struct btf_type *type; 5360 const struct btf_type *type;
5007 struct bpf_prog *prog; 5361 struct bpf_prog *prog;
5008 const struct btf *btf; 5362 const struct btf *btf;
5009 void __user *urecord; 5363 void __user *urecord;
5364 u32 prev_offset = 0;
5010 int ret = 0; 5365 int ret = 0;
5011 5366
5012 nfuncs = attr->func_info_cnt; 5367 nfuncs = attr->func_info_cnt;
@@ -5450,8 +5805,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5450 case PTR_TO_MAP_VALUE: 5805 case PTR_TO_MAP_VALUE:
5451 /* If the new min/max/var_off satisfy the old ones and 5806 /* If the new min/max/var_off satisfy the old ones and
5452 * everything else matches, we are OK. 5807 * everything else matches, we are OK.
5453 * We don't care about the 'id' value, because nothing 5808 * 'id' is not compared, since it's only used for maps with
5454 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL) 5809 * bpf_spin_lock inside map element and in such cases if
5810 * the rest of the prog is valid for one map element then
5811 * it's valid for all map elements regardless of the key
5812 * used in bpf_map_lookup()
5455 */ 5813 */
5456 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 5814 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5457 range_within(rold, rcur) && 5815 range_within(rold, rcur) &&
@@ -5499,6 +5857,10 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5499 case PTR_TO_FLOW_KEYS: 5857 case PTR_TO_FLOW_KEYS:
5500 case PTR_TO_SOCKET: 5858 case PTR_TO_SOCKET:
5501 case PTR_TO_SOCKET_OR_NULL: 5859 case PTR_TO_SOCKET_OR_NULL:
5860 case PTR_TO_SOCK_COMMON:
5861 case PTR_TO_SOCK_COMMON_OR_NULL:
5862 case PTR_TO_TCP_SOCK:
5863 case PTR_TO_TCP_SOCK_OR_NULL:
5502 /* Only valid matches are exact, which memcmp() above 5864 /* Only valid matches are exact, which memcmp() above
5503 * would have accepted 5865 * would have accepted
5504 */ 5866 */
@@ -5654,6 +6016,9 @@ static bool states_equal(struct bpf_verifier_env *env,
5654 if (old->speculative && !cur->speculative) 6016 if (old->speculative && !cur->speculative)
5655 return false; 6017 return false;
5656 6018
6019 if (old->active_spin_lock != cur->active_spin_lock)
6020 return false;
6021
5657 /* for states to be equal callsites have to be the same 6022 /* for states to be equal callsites have to be the same
5658 * and all frame states need to be equivalent 6023 * and all frame states need to be equivalent
5659 */ 6024 */
@@ -5816,6 +6181,10 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
5816 case PTR_TO_CTX: 6181 case PTR_TO_CTX:
5817 case PTR_TO_SOCKET: 6182 case PTR_TO_SOCKET:
5818 case PTR_TO_SOCKET_OR_NULL: 6183 case PTR_TO_SOCKET_OR_NULL:
6184 case PTR_TO_SOCK_COMMON:
6185 case PTR_TO_SOCK_COMMON_OR_NULL:
6186 case PTR_TO_TCP_SOCK:
6187 case PTR_TO_TCP_SOCK_OR_NULL:
5819 return false; 6188 return false;
5820 default: 6189 default:
5821 return true; 6190 return true;
@@ -6058,7 +6427,7 @@ static int do_check(struct bpf_verifier_env *env)
6058 if (err) 6427 if (err)
6059 return err; 6428 return err;
6060 6429
6061 } else if (class == BPF_JMP) { 6430 } else if (class == BPF_JMP || class == BPF_JMP32) {
6062 u8 opcode = BPF_OP(insn->code); 6431 u8 opcode = BPF_OP(insn->code);
6063 6432
6064 if (opcode == BPF_CALL) { 6433 if (opcode == BPF_CALL) {
@@ -6066,11 +6435,18 @@ static int do_check(struct bpf_verifier_env *env)
6066 insn->off != 0 || 6435 insn->off != 0 ||
6067 (insn->src_reg != BPF_REG_0 && 6436 (insn->src_reg != BPF_REG_0 &&
6068 insn->src_reg != BPF_PSEUDO_CALL) || 6437 insn->src_reg != BPF_PSEUDO_CALL) ||
6069 insn->dst_reg != BPF_REG_0) { 6438 insn->dst_reg != BPF_REG_0 ||
6439 class == BPF_JMP32) {
6070 verbose(env, "BPF_CALL uses reserved fields\n"); 6440 verbose(env, "BPF_CALL uses reserved fields\n");
6071 return -EINVAL; 6441 return -EINVAL;
6072 } 6442 }
6073 6443
6444 if (env->cur_state->active_spin_lock &&
6445 (insn->src_reg == BPF_PSEUDO_CALL ||
6446 insn->imm != BPF_FUNC_spin_unlock)) {
6447 verbose(env, "function calls are not allowed while holding a lock\n");
6448 return -EINVAL;
6449 }
6074 if (insn->src_reg == BPF_PSEUDO_CALL) 6450 if (insn->src_reg == BPF_PSEUDO_CALL)
6075 err = check_func_call(env, insn, &env->insn_idx); 6451 err = check_func_call(env, insn, &env->insn_idx);
6076 else 6452 else
@@ -6082,7 +6458,8 @@ static int do_check(struct bpf_verifier_env *env)
6082 if (BPF_SRC(insn->code) != BPF_K || 6458 if (BPF_SRC(insn->code) != BPF_K ||
6083 insn->imm != 0 || 6459 insn->imm != 0 ||
6084 insn->src_reg != BPF_REG_0 || 6460 insn->src_reg != BPF_REG_0 ||
6085 insn->dst_reg != BPF_REG_0) { 6461 insn->dst_reg != BPF_REG_0 ||
6462 class == BPF_JMP32) {
6086 verbose(env, "BPF_JA uses reserved fields\n"); 6463 verbose(env, "BPF_JA uses reserved fields\n");
6087 return -EINVAL; 6464 return -EINVAL;
6088 } 6465 }
@@ -6094,11 +6471,17 @@ static int do_check(struct bpf_verifier_env *env)
6094 if (BPF_SRC(insn->code) != BPF_K || 6471 if (BPF_SRC(insn->code) != BPF_K ||
6095 insn->imm != 0 || 6472 insn->imm != 0 ||
6096 insn->src_reg != BPF_REG_0 || 6473 insn->src_reg != BPF_REG_0 ||
6097 insn->dst_reg != BPF_REG_0) { 6474 insn->dst_reg != BPF_REG_0 ||
6475 class == BPF_JMP32) {
6098 verbose(env, "BPF_EXIT uses reserved fields\n"); 6476 verbose(env, "BPF_EXIT uses reserved fields\n");
6099 return -EINVAL; 6477 return -EINVAL;
6100 } 6478 }
6101 6479
6480 if (env->cur_state->active_spin_lock) {
6481 verbose(env, "bpf_spin_unlock is missing\n");
6482 return -EINVAL;
6483 }
6484
6102 if (state->curframe) { 6485 if (state->curframe) {
6103 /* exit from nested function */ 6486 /* exit from nested function */
6104 env->prev_insn_idx = env->insn_idx; 6487 env->prev_insn_idx = env->insn_idx;
@@ -6196,6 +6579,19 @@ static int check_map_prealloc(struct bpf_map *map)
6196 !(map->map_flags & BPF_F_NO_PREALLOC); 6579 !(map->map_flags & BPF_F_NO_PREALLOC);
6197} 6580}
6198 6581
6582static bool is_tracing_prog_type(enum bpf_prog_type type)
6583{
6584 switch (type) {
6585 case BPF_PROG_TYPE_KPROBE:
6586 case BPF_PROG_TYPE_TRACEPOINT:
6587 case BPF_PROG_TYPE_PERF_EVENT:
6588 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6589 return true;
6590 default:
6591 return false;
6592 }
6593}
6594
6199static int check_map_prog_compatibility(struct bpf_verifier_env *env, 6595static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6200 struct bpf_map *map, 6596 struct bpf_map *map,
6201 struct bpf_prog *prog) 6597 struct bpf_prog *prog)
@@ -6218,6 +6614,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6218 } 6614 }
6219 } 6615 }
6220 6616
6617 if ((is_tracing_prog_type(prog->type) ||
6618 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6619 map_value_has_spin_lock(map)) {
6620 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6621 return -EINVAL;
6622 }
6623
6221 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 6624 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
6222 !bpf_offload_prog_map_match(prog, map)) { 6625 !bpf_offload_prog_map_match(prog, map)) {
6223 verbose(env, "offload device mismatch between prog and map\n"); 6626 verbose(env, "offload device mismatch between prog and map\n");
@@ -6434,6 +6837,153 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
6434 return new_prog; 6837 return new_prog;
6435} 6838}
6436 6839
6840static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6841 u32 off, u32 cnt)
6842{
6843 int i, j;
6844
6845 /* find first prog starting at or after off (first to remove) */
6846 for (i = 0; i < env->subprog_cnt; i++)
6847 if (env->subprog_info[i].start >= off)
6848 break;
6849 /* find first prog starting at or after off + cnt (first to stay) */
6850 for (j = i; j < env->subprog_cnt; j++)
6851 if (env->subprog_info[j].start >= off + cnt)
6852 break;
6853 /* if j doesn't start exactly at off + cnt, we are just removing
6854 * the front of previous prog
6855 */
6856 if (env->subprog_info[j].start != off + cnt)
6857 j--;
6858
6859 if (j > i) {
6860 struct bpf_prog_aux *aux = env->prog->aux;
6861 int move;
6862
6863 /* move fake 'exit' subprog as well */
6864 move = env->subprog_cnt + 1 - j;
6865
6866 memmove(env->subprog_info + i,
6867 env->subprog_info + j,
6868 sizeof(*env->subprog_info) * move);
6869 env->subprog_cnt -= j - i;
6870
6871 /* remove func_info */
6872 if (aux->func_info) {
6873 move = aux->func_info_cnt - j;
6874
6875 memmove(aux->func_info + i,
6876 aux->func_info + j,
6877 sizeof(*aux->func_info) * move);
6878 aux->func_info_cnt -= j - i;
6879 /* func_info->insn_off is set after all code rewrites,
6880 * in adjust_btf_func() - no need to adjust
6881 */
6882 }
6883 } else {
6884 /* convert i from "first prog to remove" to "first to adjust" */
6885 if (env->subprog_info[i].start == off)
6886 i++;
6887 }
6888
6889 /* update fake 'exit' subprog as well */
6890 for (; i <= env->subprog_cnt; i++)
6891 env->subprog_info[i].start -= cnt;
6892
6893 return 0;
6894}
6895
6896static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6897 u32 cnt)
6898{
6899 struct bpf_prog *prog = env->prog;
6900 u32 i, l_off, l_cnt, nr_linfo;
6901 struct bpf_line_info *linfo;
6902
6903 nr_linfo = prog->aux->nr_linfo;
6904 if (!nr_linfo)
6905 return 0;
6906
6907 linfo = prog->aux->linfo;
6908
6909 /* find first line info to remove, count lines to be removed */
6910 for (i = 0; i < nr_linfo; i++)
6911 if (linfo[i].insn_off >= off)
6912 break;
6913
6914 l_off = i;
6915 l_cnt = 0;
6916 for (; i < nr_linfo; i++)
6917 if (linfo[i].insn_off < off + cnt)
6918 l_cnt++;
6919 else
6920 break;
6921
6922 /* First live insn doesn't match first live linfo, it needs to "inherit"
6923 * last removed linfo. prog is already modified, so prog->len == off
6924 * means no live instructions after (tail of the program was removed).
6925 */
6926 if (prog->len != off && l_cnt &&
6927 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6928 l_cnt--;
6929 linfo[--i].insn_off = off + cnt;
6930 }
6931
6932 /* remove the line info which refer to the removed instructions */
6933 if (l_cnt) {
6934 memmove(linfo + l_off, linfo + i,
6935 sizeof(*linfo) * (nr_linfo - i));
6936
6937 prog->aux->nr_linfo -= l_cnt;
6938 nr_linfo = prog->aux->nr_linfo;
6939 }
6940
6941 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
6942 for (i = l_off; i < nr_linfo; i++)
6943 linfo[i].insn_off -= cnt;
6944
6945 /* fix up all subprogs (incl. 'exit') which start >= off */
6946 for (i = 0; i <= env->subprog_cnt; i++)
6947 if (env->subprog_info[i].linfo_idx > l_off) {
6948 /* program may have started in the removed region but
6949 * may not be fully removed
6950 */
6951 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6952 env->subprog_info[i].linfo_idx -= l_cnt;
6953 else
6954 env->subprog_info[i].linfo_idx = l_off;
6955 }
6956
6957 return 0;
6958}
6959
6960static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6961{
6962 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6963 unsigned int orig_prog_len = env->prog->len;
6964 int err;
6965
6966 if (bpf_prog_is_dev_bound(env->prog->aux))
6967 bpf_prog_offload_remove_insns(env, off, cnt);
6968
6969 err = bpf_remove_insns(env->prog, off, cnt);
6970 if (err)
6971 return err;
6972
6973 err = adjust_subprog_starts_after_remove(env, off, cnt);
6974 if (err)
6975 return err;
6976
6977 err = bpf_adj_linfo_after_remove(env, off, cnt);
6978 if (err)
6979 return err;
6980
6981 memmove(aux_data + off, aux_data + off + cnt,
6982 sizeof(*aux_data) * (orig_prog_len - off - cnt));
6983
6984 return 0;
6985}
6986
6437/* The verifier does more data flow analysis than llvm and will not 6987/* The verifier does more data flow analysis than llvm and will not
6438 * explore branches that are dead at run time. Malicious programs can 6988 * explore branches that are dead at run time. Malicious programs can
6439 * have dead code too. Therefore replace all dead at-run-time code 6989 * have dead code too. Therefore replace all dead at-run-time code
@@ -6460,6 +7010,91 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
6460 } 7010 }
6461} 7011}
6462 7012
7013static bool insn_is_cond_jump(u8 code)
7014{
7015 u8 op;
7016
7017 if (BPF_CLASS(code) == BPF_JMP32)
7018 return true;
7019
7020 if (BPF_CLASS(code) != BPF_JMP)
7021 return false;
7022
7023 op = BPF_OP(code);
7024 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
7025}
7026
7027static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
7028{
7029 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7030 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7031 struct bpf_insn *insn = env->prog->insnsi;
7032 const int insn_cnt = env->prog->len;
7033 int i;
7034
7035 for (i = 0; i < insn_cnt; i++, insn++) {
7036 if (!insn_is_cond_jump(insn->code))
7037 continue;
7038
7039 if (!aux_data[i + 1].seen)
7040 ja.off = insn->off;
7041 else if (!aux_data[i + 1 + insn->off].seen)
7042 ja.off = 0;
7043 else
7044 continue;
7045
7046 if (bpf_prog_is_dev_bound(env->prog->aux))
7047 bpf_prog_offload_replace_insn(env, i, &ja);
7048
7049 memcpy(insn, &ja, sizeof(ja));
7050 }
7051}
7052
7053static int opt_remove_dead_code(struct bpf_verifier_env *env)
7054{
7055 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7056 int insn_cnt = env->prog->len;
7057 int i, err;
7058
7059 for (i = 0; i < insn_cnt; i++) {
7060 int j;
7061
7062 j = 0;
7063 while (i + j < insn_cnt && !aux_data[i + j].seen)
7064 j++;
7065 if (!j)
7066 continue;
7067
7068 err = verifier_remove_insns(env, i, j);
7069 if (err)
7070 return err;
7071 insn_cnt = env->prog->len;
7072 }
7073
7074 return 0;
7075}
7076
7077static int opt_remove_nops(struct bpf_verifier_env *env)
7078{
7079 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7080 struct bpf_insn *insn = env->prog->insnsi;
7081 int insn_cnt = env->prog->len;
7082 int i, err;
7083
7084 for (i = 0; i < insn_cnt; i++) {
7085 if (memcmp(&insn[i], &ja, sizeof(ja)))
7086 continue;
7087
7088 err = verifier_remove_insns(env, i, 1);
7089 if (err)
7090 return err;
7091 insn_cnt--;
7092 i--;
7093 }
7094
7095 return 0;
7096}
7097
6463/* convert load instructions that access fields of a context type into a 7098/* convert load instructions that access fields of a context type into a
6464 * sequence of instructions that access fields of the underlying structure: 7099 * sequence of instructions that access fields of the underlying structure:
6465 * struct __sk_buff -> struct sk_buff 7100 * struct __sk_buff -> struct sk_buff
@@ -6552,8 +7187,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
6552 convert_ctx_access = ops->convert_ctx_access; 7187 convert_ctx_access = ops->convert_ctx_access;
6553 break; 7188 break;
6554 case PTR_TO_SOCKET: 7189 case PTR_TO_SOCKET:
7190 case PTR_TO_SOCK_COMMON:
6555 convert_ctx_access = bpf_sock_convert_ctx_access; 7191 convert_ctx_access = bpf_sock_convert_ctx_access;
6556 break; 7192 break;
7193 case PTR_TO_TCP_SOCK:
7194 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7195 break;
6557 default: 7196 default:
6558 continue; 7197 continue;
6559 } 7198 }
@@ -6681,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
6681 subprog_end = env->subprog_info[i + 1].start; 7320 subprog_end = env->subprog_info[i + 1].start;
6682 7321
6683 len = subprog_end - subprog_start; 7322 len = subprog_end - subprog_start;
6684 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); 7323 /* BPF_PROG_RUN doesn't call subprogs directly,
7324 * hence main prog stats include the runtime of subprogs.
7325 * subprogs don't have IDs and not reachable via prog_get_next_id
7326 * func[i]->aux->stats will never be accessed and stays NULL
7327 */
7328 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
6685 if (!func[i]) 7329 if (!func[i])
6686 goto out_free; 7330 goto out_free;
6687 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 7331 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
@@ -7151,7 +7795,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7151{ 7795{
7152 struct bpf_verifier_env *env; 7796 struct bpf_verifier_env *env;
7153 struct bpf_verifier_log *log; 7797 struct bpf_verifier_log *log;
7154 int ret = -EINVAL; 7798 int i, len, ret = -EINVAL;
7799 bool is_priv;
7155 7800
7156 /* no program is valid */ 7801 /* no program is valid */
7157 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 7802 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
@@ -7165,12 +7810,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7165 return -ENOMEM; 7810 return -ENOMEM;
7166 log = &env->log; 7811 log = &env->log;
7167 7812
7813 len = (*prog)->len;
7168 env->insn_aux_data = 7814 env->insn_aux_data =
7169 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), 7815 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
7170 (*prog)->len));
7171 ret = -ENOMEM; 7816 ret = -ENOMEM;
7172 if (!env->insn_aux_data) 7817 if (!env->insn_aux_data)
7173 goto err_free_env; 7818 goto err_free_env;
7819 for (i = 0; i < len; i++)
7820 env->insn_aux_data[i].orig_idx = i;
7174 env->prog = *prog; 7821 env->prog = *prog;
7175 env->ops = bpf_verifier_ops[env->prog->type]; 7822 env->ops = bpf_verifier_ops[env->prog->type];
7176 7823
@@ -7198,6 +7845,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7198 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 7845 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7199 env->strict_alignment = false; 7846 env->strict_alignment = false;
7200 7847
7848 is_priv = capable(CAP_SYS_ADMIN);
7849 env->allow_ptr_leaks = is_priv;
7850
7201 ret = replace_map_fd_with_map_ptr(env); 7851 ret = replace_map_fd_with_map_ptr(env);
7202 if (ret < 0) 7852 if (ret < 0)
7203 goto skip_full_check; 7853 goto skip_full_check;
@@ -7215,8 +7865,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7215 if (!env->explored_states) 7865 if (!env->explored_states)
7216 goto skip_full_check; 7866 goto skip_full_check;
7217 7867
7218 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
7219
7220 ret = check_subprogs(env); 7868 ret = check_subprogs(env);
7221 if (ret < 0) 7869 if (ret < 0)
7222 goto skip_full_check; 7870 goto skip_full_check;
@@ -7246,8 +7894,17 @@ skip_full_check:
7246 ret = check_max_stack_depth(env); 7894 ret = check_max_stack_depth(env);
7247 7895
7248 /* instruction rewrites happen after this point */ 7896 /* instruction rewrites happen after this point */
7249 if (ret == 0) 7897 if (is_priv) {
7250 sanitize_dead_code(env); 7898 if (ret == 0)
7899 opt_hard_wire_dead_code_branches(env);
7900 if (ret == 0)
7901 ret = opt_remove_dead_code(env);
7902 if (ret == 0)
7903 ret = opt_remove_nops(env);
7904 } else {
7905 if (ret == 0)
7906 sanitize_dead_code(env);
7907 }
7251 7908
7252 if (ret == 0) 7909 if (ret == 0)
7253 /* program is valid, convert *(u32*)(ctx + off) accesses */ 7910 /* program is valid, convert *(u32*)(ctx + off) accesses */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7fd9f22e406d..cef98502b124 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5997,7 +5997,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
5997 int ret; 5997 int ret;
5998 5998
5999 mutex_lock(&cgroup_mutex); 5999 mutex_lock(&cgroup_mutex);
6000 ret = __cgroup_bpf_detach(cgrp, prog, type, flags); 6000 ret = __cgroup_bpf_detach(cgrp, prog, type);
6001 mutex_unlock(&cgroup_mutex); 6001 mutex_unlock(&cgroup_mutex);
6002 return ret; 6002 return ret;
6003} 6003}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8d76a65cfdd..7cbb5658be80 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6162,6 +6162,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
6162 add_taint(TAINT_WARN, LOCKDEP_STILL_OK); 6162 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6163} 6163}
6164EXPORT_SYMBOL(___might_sleep); 6164EXPORT_SYMBOL(___might_sleep);
6165
6166void __cant_sleep(const char *file, int line, int preempt_offset)
6167{
6168 static unsigned long prev_jiffy;
6169
6170 if (irqs_disabled())
6171 return;
6172
6173 if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
6174 return;
6175
6176 if (preempt_count() > preempt_offset)
6177 return;
6178
6179 if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
6180 return;
6181 prev_jiffy = jiffies;
6182
6183 printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
6184 printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
6185 in_atomic(), irqs_disabled(),
6186 current->pid, current->comm);
6187
6188 debug_show_held_locks(current);
6189 dump_stack();
6190 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6191}
6192EXPORT_SYMBOL_GPL(__cant_sleep);
6165#endif 6193#endif
6166 6194
6167#ifdef CONFIG_MAGIC_SYSRQ 6195#ifdef CONFIG_MAGIC_SYSRQ
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e815781ed751..a43c601ac252 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -267,6 +267,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
267 * All filters in the list are evaluated and the lowest BPF return 267 * All filters in the list are evaluated and the lowest BPF return
268 * value always takes priority (ignoring the DATA). 268 * value always takes priority (ignoring the DATA).
269 */ 269 */
270 preempt_disable();
270 for (; f; f = f->prev) { 271 for (; f; f = f->prev) {
271 u32 cur_ret = BPF_PROG_RUN(f->prog, sd); 272 u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
272 273
@@ -275,6 +276,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
275 *match = f; 276 *match = f;
276 } 277 }
277 } 278 }
279 preempt_enable();
278 return ret; 280 return ret;
279} 281}
280#endif /* CONFIG_SECCOMP_FILTER */ 282#endif /* CONFIG_SECCOMP_FILTER */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba4d9e85feb8..7578e21a711b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -224,6 +224,11 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
224#endif 224#endif
225static int proc_dopipe_max_size(struct ctl_table *table, int write, 225static int proc_dopipe_max_size(struct ctl_table *table, int write,
226 void __user *buffer, size_t *lenp, loff_t *ppos); 226 void __user *buffer, size_t *lenp, loff_t *ppos);
227#ifdef CONFIG_BPF_SYSCALL
228static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
229 void __user *buffer, size_t *lenp,
230 loff_t *ppos);
231#endif
227 232
228#ifdef CONFIG_MAGIC_SYSRQ 233#ifdef CONFIG_MAGIC_SYSRQ
229/* Note: sysrq code uses its own private copy */ 234/* Note: sysrq code uses its own private copy */
@@ -1229,6 +1234,15 @@ static struct ctl_table kern_table[] = {
1229 .extra1 = &one, 1234 .extra1 = &one,
1230 .extra2 = &one, 1235 .extra2 = &one,
1231 }, 1236 },
1237 {
1238 .procname = "bpf_stats_enabled",
1239 .data = &sysctl_bpf_stats_enabled,
1240 .maxlen = sizeof(sysctl_bpf_stats_enabled),
1241 .mode = 0644,
1242 .proc_handler = proc_dointvec_minmax_bpf_stats,
1243 .extra1 = &zero,
1244 .extra2 = &one,
1245 },
1232#endif 1246#endif
1233#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) 1247#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1234 { 1248 {
@@ -3260,6 +3274,29 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3260 3274
3261#endif /* CONFIG_PROC_SYSCTL */ 3275#endif /* CONFIG_PROC_SYSCTL */
3262 3276
3277#ifdef CONFIG_BPF_SYSCALL
3278static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3279 void __user *buffer, size_t *lenp,
3280 loff_t *ppos)
3281{
3282 int ret, bpf_stats = *(int *)table->data;
3283 struct ctl_table tmp = *table;
3284
3285 if (write && !capable(CAP_SYS_ADMIN))
3286 return -EPERM;
3287
3288 tmp.data = &bpf_stats;
3289 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3290 if (write && !ret) {
3291 *(int *)table->data = bpf_stats;
3292 if (bpf_stats)
3293 static_branch_enable(&bpf_stats_enabled_key);
3294 else
3295 static_branch_disable(&bpf_stats_enabled_key);
3296 }
3297 return ret;
3298}
3299#endif
3263/* 3300/*
3264 * No sense putting this after each symbol definition, twice, 3301 * No sense putting this after each symbol definition, twice,
3265 * exception granted :-) 3302 * exception granted :-)