summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2019-05-29 21:03:58 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-05-31 19:52:56 -0400
commitb936ca643ade11f265fa10e5fb71c20d9c5243f1 (patch)
treefab6cc685363f4f9901fecf6335a8d8e6974f8b3 /kernel/bpf/syscall.c
parent3539b96e041c06e4317082816d90ec09160aeb11 (diff)
bpf: rework memlock-based memory accounting for maps
In order to unify the existing memlock charging code with the memcg-based memory accounting, which will be added later, let's rework the current scheme. Currently the following design is used: 1) .alloc() callback optionally checks if the allocation will likely succeed using bpf_map_precharge_memlock() 2) .alloc() performs actual allocations 3) .alloc() callback calculates map cost and sets map.memory.pages 4) map_create() calls bpf_map_init_memlock() which sets map.memory.user and performs actual charging; in case of failure the map is destroyed <map is in use> 1) bpf_map_free_deferred() calls bpf_map_release_memlock(), which performs uncharge and releases the user 2) .map_free() callback releases the memory The scheme can be simplified and made more robust: 1) .alloc() calculates map cost and calls bpf_map_charge_init() 2) bpf_map_charge_init() sets map.memory.user and performs actual charge 3) .alloc() performs actual allocations <map is in use> 1) .map_free() callback releases the memory 2) bpf_map_charge_finish() performs uncharge and releases the user The new scheme also allows to reuse bpf_map_charge_init()/finish() functions for memcg-based accounting. Because charges are performed before actual allocations and uncharges after freeing the memory, no bogus memory pressure can be created. In cases when the map structure is not available (e.g. it's not created yet, or is already destroyed), on-stack bpf_map_memory structure is used. The charge can be transferred with the bpf_map_charge_move() function. Signed-off-by: Roman Gushchin <guro@fb.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c69
1 files changed, 33 insertions, 36 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8289a2ce14fc..4a5ebad99154 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
188 map->numa_node = bpf_map_attr_numa_node(attr); 188 map->numa_node = bpf_map_attr_numa_node(attr);
189} 189}
190 190
191int bpf_map_precharge_memlock(u32 pages)
192{
193 struct user_struct *user = get_current_user();
194 unsigned long memlock_limit, cur;
195
196 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
197 cur = atomic_long_read(&user->locked_vm);
198 free_uid(user);
199 if (cur + pages > memlock_limit)
200 return -EPERM;
201 return 0;
202}
203
204static int bpf_charge_memlock(struct user_struct *user, u32 pages) 191static int bpf_charge_memlock(struct user_struct *user, u32 pages)
205{ 192{
206 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 193 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -214,29 +201,40 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
214 201
215static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) 202static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
216{ 203{
217 atomic_long_sub(pages, &user->locked_vm); 204 if (user)
205 atomic_long_sub(pages, &user->locked_vm);
218} 206}
219 207
220static int bpf_map_init_memlock(struct bpf_map *map) 208int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages)
221{ 209{
222 struct user_struct *user = get_current_user(); 210 struct user_struct *user = get_current_user();
223 int ret; 211 int ret;
224 212
225 ret = bpf_charge_memlock(user, map->memory.pages); 213 ret = bpf_charge_memlock(user, pages);
226 if (ret) { 214 if (ret) {
227 free_uid(user); 215 free_uid(user);
228 return ret; 216 return ret;
229 } 217 }
230 map->memory.user = user; 218
231 return ret; 219 mem->pages = pages;
220 mem->user = user;
221
222 return 0;
232} 223}
233 224
234static void bpf_map_release_memlock(struct bpf_map *map) 225void bpf_map_charge_finish(struct bpf_map_memory *mem)
235{ 226{
236 struct user_struct *user = map->memory.user; 227 bpf_uncharge_memlock(mem->user, mem->pages);
228 free_uid(mem->user);
229}
237 230
238 bpf_uncharge_memlock(user, map->memory.pages); 231void bpf_map_charge_move(struct bpf_map_memory *dst,
239 free_uid(user); 232 struct bpf_map_memory *src)
233{
234 *dst = *src;
235
236 /* Make sure src will not be used for the redundant uncharging. */
237 memset(src, 0, sizeof(struct bpf_map_memory));
240} 238}
241 239
242int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) 240int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
@@ -304,11 +302,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
304static void bpf_map_free_deferred(struct work_struct *work) 302static void bpf_map_free_deferred(struct work_struct *work)
305{ 303{
306 struct bpf_map *map = container_of(work, struct bpf_map, work); 304 struct bpf_map *map = container_of(work, struct bpf_map, work);
305 struct bpf_map_memory mem;
307 306
308 bpf_map_release_memlock(map); 307 bpf_map_charge_move(&mem, &map->memory);
309 security_bpf_map_free(map); 308 security_bpf_map_free(map);
310 /* implementation dependent freeing */ 309 /* implementation dependent freeing */
311 map->ops->map_free(map); 310 map->ops->map_free(map);
311 bpf_map_charge_finish(&mem);
312} 312}
313 313
314static void bpf_map_put_uref(struct bpf_map *map) 314static void bpf_map_put_uref(struct bpf_map *map)
@@ -550,6 +550,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
550static int map_create(union bpf_attr *attr) 550static int map_create(union bpf_attr *attr)
551{ 551{
552 int numa_node = bpf_map_attr_numa_node(attr); 552 int numa_node = bpf_map_attr_numa_node(attr);
553 struct bpf_map_memory mem;
553 struct bpf_map *map; 554 struct bpf_map *map;
554 int f_flags; 555 int f_flags;
555 int err; 556 int err;
@@ -574,7 +575,7 @@ static int map_create(union bpf_attr *attr)
574 575
575 err = bpf_obj_name_cpy(map->name, attr->map_name); 576 err = bpf_obj_name_cpy(map->name, attr->map_name);
576 if (err) 577 if (err)
577 goto free_map_nouncharge; 578 goto free_map;
578 579
579 atomic_set(&map->refcnt, 1); 580 atomic_set(&map->refcnt, 1);
580 atomic_set(&map->usercnt, 1); 581 atomic_set(&map->usercnt, 1);
@@ -584,20 +585,20 @@ static int map_create(union bpf_attr *attr)
584 585
585 if (!attr->btf_value_type_id) { 586 if (!attr->btf_value_type_id) {
586 err = -EINVAL; 587 err = -EINVAL;
587 goto free_map_nouncharge; 588 goto free_map;
588 } 589 }
589 590
590 btf = btf_get_by_fd(attr->btf_fd); 591 btf = btf_get_by_fd(attr->btf_fd);
591 if (IS_ERR(btf)) { 592 if (IS_ERR(btf)) {
592 err = PTR_ERR(btf); 593 err = PTR_ERR(btf);
593 goto free_map_nouncharge; 594 goto free_map;
594 } 595 }
595 596
596 err = map_check_btf(map, btf, attr->btf_key_type_id, 597 err = map_check_btf(map, btf, attr->btf_key_type_id,
597 attr->btf_value_type_id); 598 attr->btf_value_type_id);
598 if (err) { 599 if (err) {
599 btf_put(btf); 600 btf_put(btf);
600 goto free_map_nouncharge; 601 goto free_map;
601 } 602 }
602 603
603 map->btf = btf; 604 map->btf = btf;
@@ -609,15 +610,11 @@ static int map_create(union bpf_attr *attr)
609 610
610 err = security_bpf_map_alloc(map); 611 err = security_bpf_map_alloc(map);
611 if (err) 612 if (err)
612 goto free_map_nouncharge; 613 goto free_map;
613
614 err = bpf_map_init_memlock(map);
615 if (err)
616 goto free_map_sec;
617 614
618 err = bpf_map_alloc_id(map); 615 err = bpf_map_alloc_id(map);
619 if (err) 616 if (err)
620 goto free_map; 617 goto free_map_sec;
621 618
622 err = bpf_map_new_fd(map, f_flags); 619 err = bpf_map_new_fd(map, f_flags);
623 if (err < 0) { 620 if (err < 0) {
@@ -633,13 +630,13 @@ static int map_create(union bpf_attr *attr)
633 630
634 return err; 631 return err;
635 632
636free_map:
637 bpf_map_release_memlock(map);
638free_map_sec: 633free_map_sec:
639 security_bpf_map_free(map); 634 security_bpf_map_free(map);
640free_map_nouncharge: 635free_map:
641 btf_put(map->btf); 636 btf_put(map->btf);
637 bpf_map_charge_move(&mem, &map->memory);
642 map->ops->map_free(map); 638 map->ops->map_free(map);
639 bpf_map_charge_finish(&mem);
643 return err; 640 return err;
644} 641}
645 642