aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 18:04:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 18:04:25 -0400
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /kernel/bpf/syscall.c
parent0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff)
parent26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c103
1 files changed, 84 insertions, 19 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b41c6cf2eb88..8339d81cba1d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -103,6 +103,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
103const struct bpf_map_ops bpf_map_offload_ops = { 103const struct bpf_map_ops bpf_map_offload_ops = {
104 .map_alloc = bpf_map_offload_map_alloc, 104 .map_alloc = bpf_map_offload_map_alloc,
105 .map_free = bpf_map_offload_map_free, 105 .map_free = bpf_map_offload_map_free,
106 .map_check_btf = map_check_no_btf,
106}; 107};
107 108
108static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 109static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -181,32 +182,60 @@ int bpf_map_precharge_memlock(u32 pages)
181 return 0; 182 return 0;
182} 183}
183 184
184static int bpf_map_charge_memlock(struct bpf_map *map) 185static int bpf_charge_memlock(struct user_struct *user, u32 pages)
185{ 186{
186 struct user_struct *user = get_current_user(); 187 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
187 unsigned long memlock_limit;
188 188
189 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 189 if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
190 atomic_long_sub(pages, &user->locked_vm);
191 return -EPERM;
192 }
193 return 0;
194}
190 195
191 atomic_long_add(map->pages, &user->locked_vm); 196static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
197{
198 atomic_long_sub(pages, &user->locked_vm);
199}
192 200
193 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 201static int bpf_map_init_memlock(struct bpf_map *map)
194 atomic_long_sub(map->pages, &user->locked_vm); 202{
203 struct user_struct *user = get_current_user();
204 int ret;
205
206 ret = bpf_charge_memlock(user, map->pages);
207 if (ret) {
195 free_uid(user); 208 free_uid(user);
196 return -EPERM; 209 return ret;
197 } 210 }
198 map->user = user; 211 map->user = user;
199 return 0; 212 return ret;
200} 213}
201 214
202static void bpf_map_uncharge_memlock(struct bpf_map *map) 215static void bpf_map_release_memlock(struct bpf_map *map)
203{ 216{
204 struct user_struct *user = map->user; 217 struct user_struct *user = map->user;
205 218 bpf_uncharge_memlock(user, map->pages);
206 atomic_long_sub(map->pages, &user->locked_vm);
207 free_uid(user); 219 free_uid(user);
208} 220}
209 221
222int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
223{
224 int ret;
225
226 ret = bpf_charge_memlock(map->user, pages);
227 if (ret)
228 return ret;
229 map->pages += pages;
230 return ret;
231}
232
233void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
234{
235 bpf_uncharge_memlock(map->user, pages);
236 map->pages -= pages;
237}
238
210static int bpf_map_alloc_id(struct bpf_map *map) 239static int bpf_map_alloc_id(struct bpf_map *map)
211{ 240{
212 int id; 241 int id;
@@ -256,7 +285,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
256{ 285{
257 struct bpf_map *map = container_of(work, struct bpf_map, work); 286 struct bpf_map *map = container_of(work, struct bpf_map, work);
258 287
259 bpf_map_uncharge_memlock(map); 288 bpf_map_release_memlock(map);
260 security_bpf_map_free(map); 289 security_bpf_map_free(map);
261 /* implementation dependent freeing */ 290 /* implementation dependent freeing */
262 map->ops->map_free(map); 291 map->ops->map_free(map);
@@ -427,6 +456,34 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
427 return 0; 456 return 0;
428} 457}
429 458
459int map_check_no_btf(const struct bpf_map *map,
460 const struct btf_type *key_type,
461 const struct btf_type *value_type)
462{
463 return -ENOTSUPP;
464}
465
466static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
467 u32 btf_key_id, u32 btf_value_id)
468{
469 const struct btf_type *key_type, *value_type;
470 u32 key_size, value_size;
471 int ret = 0;
472
473 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
474 if (!key_type || key_size != map->key_size)
475 return -EINVAL;
476
477 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
478 if (!value_type || value_size != map->value_size)
479 return -EINVAL;
480
481 if (map->ops->map_check_btf)
482 ret = map->ops->map_check_btf(map, key_type, value_type);
483
484 return ret;
485}
486
430#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id 487#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
431/* called via syscall */ 488/* called via syscall */
432static int map_create(union bpf_attr *attr) 489static int map_create(union bpf_attr *attr)
@@ -461,8 +518,7 @@ static int map_create(union bpf_attr *attr)
461 atomic_set(&map->refcnt, 1); 518 atomic_set(&map->refcnt, 1);
462 atomic_set(&map->usercnt, 1); 519 atomic_set(&map->usercnt, 1);
463 520
464 if (bpf_map_support_seq_show(map) && 521 if (attr->btf_key_type_id || attr->btf_value_type_id) {
465 (attr->btf_key_type_id || attr->btf_value_type_id)) {
466 struct btf *btf; 522 struct btf *btf;
467 523
468 if (!attr->btf_key_type_id || !attr->btf_value_type_id) { 524 if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -476,8 +532,8 @@ static int map_create(union bpf_attr *attr)
476 goto free_map_nouncharge; 532 goto free_map_nouncharge;
477 } 533 }
478 534
479 err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id, 535 err = map_check_btf(map, btf, attr->btf_key_type_id,
480 attr->btf_value_type_id); 536 attr->btf_value_type_id);
481 if (err) { 537 if (err) {
482 btf_put(btf); 538 btf_put(btf);
483 goto free_map_nouncharge; 539 goto free_map_nouncharge;
@@ -492,7 +548,7 @@ static int map_create(union bpf_attr *attr)
492 if (err) 548 if (err)
493 goto free_map_nouncharge; 549 goto free_map_nouncharge;
494 550
495 err = bpf_map_charge_memlock(map); 551 err = bpf_map_init_memlock(map);
496 if (err) 552 if (err)
497 goto free_map_sec; 553 goto free_map_sec;
498 554
@@ -515,7 +571,7 @@ static int map_create(union bpf_attr *attr)
515 return err; 571 return err;
516 572
517free_map: 573free_map:
518 bpf_map_uncharge_memlock(map); 574 bpf_map_release_memlock(map);
519free_map_sec: 575free_map_sec:
520 security_bpf_map_free(map); 576 security_bpf_map_free(map);
521free_map_nouncharge: 577free_map_nouncharge:
@@ -656,6 +712,8 @@ static int map_lookup_elem(union bpf_attr *attr)
656 err = bpf_fd_array_map_lookup_elem(map, key, value); 712 err = bpf_fd_array_map_lookup_elem(map, key, value);
657 } else if (IS_FD_HASH(map)) { 713 } else if (IS_FD_HASH(map)) {
658 err = bpf_fd_htab_map_lookup_elem(map, key, value); 714 err = bpf_fd_htab_map_lookup_elem(map, key, value);
715 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
716 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
659 } else { 717 } else {
660 rcu_read_lock(); 718 rcu_read_lock();
661 ptr = map->ops->map_lookup_elem(map, key); 719 ptr = map->ops->map_lookup_elem(map, key);
@@ -762,6 +820,10 @@ static int map_update_elem(union bpf_attr *attr)
762 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 820 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
763 attr->flags); 821 attr->flags);
764 rcu_read_unlock(); 822 rcu_read_unlock();
823 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
824 /* rcu_read_lock() is not needed */
825 err = bpf_fd_reuseport_array_update_elem(map, key, value,
826 attr->flags);
765 } else { 827 } else {
766 rcu_read_lock(); 828 rcu_read_lock();
767 err = map->ops->map_update_elem(map, key, value, attr->flags); 829 err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -929,6 +991,9 @@ static void free_used_maps(struct bpf_prog_aux *aux)
929{ 991{
930 int i; 992 int i;
931 993
994 if (aux->cgroup_storage)
995 bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
996
932 for (i = 0; i < aux->used_map_cnt; i++) 997 for (i = 0; i < aux->used_map_cnt; i++)
933 bpf_map_put(aux->used_maps[i]); 998 bpf_map_put(aux->used_maps[i]);
934 999