summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2019-05-29 21:03:58 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-05-31 19:52:56 -0400
commitb936ca643ade11f265fa10e5fb71c20d9c5243f1 (patch)
treefab6cc685363f4f9901fecf6335a8d8e6974f8b3
parent3539b96e041c06e4317082816d90ec09160aeb11 (diff)
bpf: rework memlock-based memory accounting for maps
In order to unify the existing memlock charging code with the memcg-based memory accounting, which will be added later, let's rework the current scheme. Currently the following design is used: 1) .alloc() callback optionally checks if the allocation will likely succeed using bpf_map_precharge_memlock() 2) .alloc() performs actual allocations 3) .alloc() callback calculates map cost and sets map.memory.pages 4) map_create() calls bpf_map_init_memlock() which sets map.memory.user and performs actual charging; in case of failure the map is destroyed <map is in use> 1) bpf_map_free_deferred() calls bpf_map_release_memlock(), which performs uncharge and releases the user 2) .map_free() callback releases the memory The scheme can be simplified and made more robust: 1) .alloc() calculates map cost and calls bpf_map_charge_init() 2) bpf_map_charge_init() sets map.memory.user and performs actual charge 3) .alloc() performs actual allocations <map is in use> 1) .map_free() callback releases the memory 2) bpf_map_charge_finish() performs uncharge and releases the user The new scheme also allows to reuse bpf_map_charge_init()/finish() functions for memcg-based accounting. Because charges are performed before actual allocations and uncharges after freeing the memory, no bogus memory pressure can be created. In cases when the map structure is not available (e.g. it's not created yet, or is already destroyed), on-stack bpf_map_memory structure is used. The charge can be transferred with the bpf_map_charge_move() function. Signed-off-by: Roman Gushchin <guro@fb.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h5
-rw-r--r--kernel/bpf/arraymap.c10
-rw-r--r--kernel/bpf/cpumap.c8
-rw-r--r--kernel/bpf/devmap.c13
-rw-r--r--kernel/bpf/hashtab.c11
-rw-r--r--kernel/bpf/local_storage.c9
-rw-r--r--kernel/bpf/lpm_trie.c5
-rw-r--r--kernel/bpf/queue_stack_maps.c9
-rw-r--r--kernel/bpf/reuseport_array.c9
-rw-r--r--kernel/bpf/stackmap.c30
-rw-r--r--kernel/bpf/syscall.c69
-rw-r--r--kernel/bpf/xskmap.c9
-rw-r--r--net/core/bpf_sk_storage.c8
-rw-r--r--net/core/sock_map.c5
14 files changed, 112 insertions, 88 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2e7c1c40d949..3c8f24f402bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -650,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
650struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); 650struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
651void bpf_map_put_with_uref(struct bpf_map *map); 651void bpf_map_put_with_uref(struct bpf_map *map);
652void bpf_map_put(struct bpf_map *map); 652void bpf_map_put(struct bpf_map *map);
653int bpf_map_precharge_memlock(u32 pages);
654int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); 653int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
655void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); 654void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
655int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages);
656void bpf_map_charge_finish(struct bpf_map_memory *mem);
657void bpf_map_charge_move(struct bpf_map_memory *dst,
658 struct bpf_map_memory *src);
656void *bpf_map_area_alloc(size_t size, int numa_node); 659void *bpf_map_area_alloc(size_t size, int numa_node);
657void bpf_map_area_free(void *base); 660void bpf_map_area_free(void *base);
658void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); 661void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8fda24e78193..3552da4407d9 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
83 u32 elem_size, index_mask, max_entries; 83 u32 elem_size, index_mask, max_entries;
84 bool unpriv = !capable(CAP_SYS_ADMIN); 84 bool unpriv = !capable(CAP_SYS_ADMIN);
85 u64 cost, array_size, mask64; 85 u64 cost, array_size, mask64;
86 struct bpf_map_memory mem;
86 struct bpf_array *array; 87 struct bpf_array *array;
87 88
88 elem_size = round_up(attr->value_size, 8); 89 elem_size = round_up(attr->value_size, 8);
@@ -125,23 +126,26 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
125 } 126 }
126 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 127 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
127 128
128 ret = bpf_map_precharge_memlock(cost); 129 ret = bpf_map_charge_init(&mem, cost);
129 if (ret < 0) 130 if (ret < 0)
130 return ERR_PTR(ret); 131 return ERR_PTR(ret);
131 132
132 /* allocate all map elements and zero-initialize them */ 133 /* allocate all map elements and zero-initialize them */
133 array = bpf_map_area_alloc(array_size, numa_node); 134 array = bpf_map_area_alloc(array_size, numa_node);
134 if (!array) 135 if (!array) {
136 bpf_map_charge_finish(&mem);
135 return ERR_PTR(-ENOMEM); 137 return ERR_PTR(-ENOMEM);
138 }
136 array->index_mask = index_mask; 139 array->index_mask = index_mask;
137 array->map.unpriv_array = unpriv; 140 array->map.unpriv_array = unpriv;
138 141
139 /* copy mandatory map attributes */ 142 /* copy mandatory map attributes */
140 bpf_map_init_from_attr(&array->map, attr); 143 bpf_map_init_from_attr(&array->map, attr);
141 array->map.memory.pages = cost; 144 bpf_map_charge_move(&array->map.memory, &mem);
142 array->elem_size = elem_size; 145 array->elem_size = elem_size;
143 146
144 if (percpu && bpf_array_alloc_percpu(array)) { 147 if (percpu && bpf_array_alloc_percpu(array)) {
148 bpf_map_charge_finish(&array->map.memory);
145 bpf_map_area_free(array); 149 bpf_map_area_free(array);
146 return ERR_PTR(-ENOMEM); 150 return ERR_PTR(-ENOMEM);
147 } 151 }
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 035268add724..c633c8d68023 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -108,10 +108,10 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
108 cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); 108 cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
109 if (cost >= U32_MAX - PAGE_SIZE) 109 if (cost >= U32_MAX - PAGE_SIZE)
110 goto free_cmap; 110 goto free_cmap;
111 cmap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
112 111
113 /* Notice returns -EPERM on if map size is larger than memlock limit */ 112 /* Notice returns -EPERM on if map size is larger than memlock limit */
114 ret = bpf_map_precharge_memlock(cmap->map.memory.pages); 113 ret = bpf_map_charge_init(&cmap->map.memory,
114 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
115 if (ret) { 115 if (ret) {
116 err = ret; 116 err = ret;
117 goto free_cmap; 117 goto free_cmap;
@@ -121,7 +121,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
121 cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), 121 cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
122 __alignof__(unsigned long)); 122 __alignof__(unsigned long));
123 if (!cmap->flush_needed) 123 if (!cmap->flush_needed)
124 goto free_cmap; 124 goto free_charge;
125 125
126 /* Alloc array for possible remote "destination" CPUs */ 126 /* Alloc array for possible remote "destination" CPUs */
127 cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * 127 cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
@@ -133,6 +133,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
133 return &cmap->map; 133 return &cmap->map;
134free_percpu: 134free_percpu:
135 free_percpu(cmap->flush_needed); 135 free_percpu(cmap->flush_needed);
136free_charge:
137 bpf_map_charge_finish(&cmap->map.memory);
136free_cmap: 138free_cmap:
137 kfree(cmap); 139 kfree(cmap);
138 return ERR_PTR(err); 140 return ERR_PTR(err);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f6c57efb1d0d..371bd880ed58 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -111,10 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111 if (cost >= U32_MAX - PAGE_SIZE) 111 if (cost >= U32_MAX - PAGE_SIZE)
112 goto free_dtab; 112 goto free_dtab;
113 113
114 dtab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 114 /* if map size is larger than memlock limit, reject it */
115 115 err = bpf_map_charge_init(&dtab->map.memory,
116 /* if map size is larger than memlock limit, reject it early */ 116 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
117 err = bpf_map_precharge_memlock(dtab->map.memory.pages);
118 if (err) 117 if (err)
119 goto free_dtab; 118 goto free_dtab;
120 119
@@ -125,19 +124,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
125 __alignof__(unsigned long), 124 __alignof__(unsigned long),
126 GFP_KERNEL | __GFP_NOWARN); 125 GFP_KERNEL | __GFP_NOWARN);
127 if (!dtab->flush_needed) 126 if (!dtab->flush_needed)
128 goto free_dtab; 127 goto free_charge;
129 128
130 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 129 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
131 sizeof(struct bpf_dtab_netdev *), 130 sizeof(struct bpf_dtab_netdev *),
132 dtab->map.numa_node); 131 dtab->map.numa_node);
133 if (!dtab->netdev_map) 132 if (!dtab->netdev_map)
134 goto free_dtab; 133 goto free_charge;
135 134
136 spin_lock(&dev_map_lock); 135 spin_lock(&dev_map_lock);
137 list_add_tail_rcu(&dtab->list, &dev_map_list); 136 list_add_tail_rcu(&dtab->list, &dev_map_list);
138 spin_unlock(&dev_map_lock); 137 spin_unlock(&dev_map_lock);
139 138
140 return &dtab->map; 139 return &dtab->map;
140free_charge:
141 bpf_map_charge_finish(&dtab->map.memory);
141free_dtab: 142free_dtab:
142 free_percpu(dtab->flush_needed); 143 free_percpu(dtab->flush_needed);
143 kfree(dtab); 144 kfree(dtab);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 15bf228d2e98..b0bdc7b040ad 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -364,10 +364,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
364 /* make sure page count doesn't overflow */ 364 /* make sure page count doesn't overflow */
365 goto free_htab; 365 goto free_htab;
366 366
367 htab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 367 /* if map size is larger than memlock limit, reject it */
368 368 err = bpf_map_charge_init(&htab->map.memory,
369 /* if map size is larger than memlock limit, reject it early */ 369 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
370 err = bpf_map_precharge_memlock(htab->map.memory.pages);
371 if (err) 370 if (err)
372 goto free_htab; 371 goto free_htab;
373 372
@@ -376,7 +375,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
376 sizeof(struct bucket), 375 sizeof(struct bucket),
377 htab->map.numa_node); 376 htab->map.numa_node);
378 if (!htab->buckets) 377 if (!htab->buckets)
379 goto free_htab; 378 goto free_charge;
380 379
381 if (htab->map.map_flags & BPF_F_ZERO_SEED) 380 if (htab->map.map_flags & BPF_F_ZERO_SEED)
382 htab->hashrnd = 0; 381 htab->hashrnd = 0;
@@ -409,6 +408,8 @@ free_prealloc:
409 prealloc_destroy(htab); 408 prealloc_destroy(htab);
410free_buckets: 409free_buckets:
411 bpf_map_area_free(htab->buckets); 410 bpf_map_area_free(htab->buckets);
411free_charge:
412 bpf_map_charge_finish(&htab->map.memory);
412free_htab: 413free_htab:
413 kfree(htab); 414 kfree(htab);
414 return ERR_PTR(err); 415 return ERR_PTR(err);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 574325276650..e49bfd4f4f6d 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -272,6 +272,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
272{ 272{
273 int numa_node = bpf_map_attr_numa_node(attr); 273 int numa_node = bpf_map_attr_numa_node(attr);
274 struct bpf_cgroup_storage_map *map; 274 struct bpf_cgroup_storage_map *map;
275 struct bpf_map_memory mem;
275 u32 pages; 276 u32 pages;
276 int ret; 277 int ret;
277 278
@@ -294,16 +295,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
294 295
295 pages = round_up(sizeof(struct bpf_cgroup_storage_map), PAGE_SIZE) >> 296 pages = round_up(sizeof(struct bpf_cgroup_storage_map), PAGE_SIZE) >>
296 PAGE_SHIFT; 297 PAGE_SHIFT;
297 ret = bpf_map_precharge_memlock(pages); 298 ret = bpf_map_charge_init(&mem, pages);
298 if (ret < 0) 299 if (ret < 0)
299 return ERR_PTR(ret); 300 return ERR_PTR(ret);
300 301
301 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), 302 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
302 __GFP_ZERO | GFP_USER, numa_node); 303 __GFP_ZERO | GFP_USER, numa_node);
303 if (!map) 304 if (!map) {
305 bpf_map_charge_finish(&mem);
304 return ERR_PTR(-ENOMEM); 306 return ERR_PTR(-ENOMEM);
307 }
305 308
306 map->map.memory.pages = pages; 309 bpf_map_charge_move(&map->map.memory, &mem);
307 310
308 /* copy mandatory map attributes */ 311 /* copy mandatory map attributes */
309 bpf_map_init_from_attr(&map->map, attr); 312 bpf_map_init_from_attr(&map->map, attr);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8e423a582760..6345a8d2dcd0 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -578,9 +578,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
578 goto out_err; 578 goto out_err;
579 } 579 }
580 580
581 trie->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 581 ret = bpf_map_charge_init(&trie->map.memory,
582 582 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
583 ret = bpf_map_precharge_memlock(trie->map.memory.pages);
584 if (ret) 583 if (ret)
585 goto out_err; 584 goto out_err;
586 585
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8a510e71d486..224cb0fd8f03 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -67,6 +67,7 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
67static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) 67static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
68{ 68{
69 int ret, numa_node = bpf_map_attr_numa_node(attr); 69 int ret, numa_node = bpf_map_attr_numa_node(attr);
70 struct bpf_map_memory mem = {0};
70 struct bpf_queue_stack *qs; 71 struct bpf_queue_stack *qs;
71 u64 size, queue_size, cost; 72 u64 size, queue_size, cost;
72 73
@@ -77,19 +78,21 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
77 78
78 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 79 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
79 80
80 ret = bpf_map_precharge_memlock(cost); 81 ret = bpf_map_charge_init(&mem, cost);
81 if (ret < 0) 82 if (ret < 0)
82 return ERR_PTR(ret); 83 return ERR_PTR(ret);
83 84
84 qs = bpf_map_area_alloc(queue_size, numa_node); 85 qs = bpf_map_area_alloc(queue_size, numa_node);
85 if (!qs) 86 if (!qs) {
87 bpf_map_charge_finish(&mem);
86 return ERR_PTR(-ENOMEM); 88 return ERR_PTR(-ENOMEM);
89 }
87 90
88 memset(qs, 0, sizeof(*qs)); 91 memset(qs, 0, sizeof(*qs));
89 92
90 bpf_map_init_from_attr(&qs->map, attr); 93 bpf_map_init_from_attr(&qs->map, attr);
91 94
92 qs->map.memory.pages = cost; 95 bpf_map_charge_move(&qs->map.memory, &mem);
93 qs->size = size; 96 qs->size = size;
94 97
95 raw_spin_lock_init(&qs->lock); 98 raw_spin_lock_init(&qs->lock);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 819515242739..5c6e25b1b9b1 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -151,6 +151,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
151{ 151{
152 int err, numa_node = bpf_map_attr_numa_node(attr); 152 int err, numa_node = bpf_map_attr_numa_node(attr);
153 struct reuseport_array *array; 153 struct reuseport_array *array;
154 struct bpf_map_memory mem;
154 u64 cost, array_size; 155 u64 cost, array_size;
155 156
156 if (!capable(CAP_SYS_ADMIN)) 157 if (!capable(CAP_SYS_ADMIN))
@@ -165,18 +166,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
165 return ERR_PTR(-ENOMEM); 166 return ERR_PTR(-ENOMEM);
166 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 167 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
167 168
168 err = bpf_map_precharge_memlock(cost); 169 err = bpf_map_charge_init(&mem, cost);
169 if (err) 170 if (err)
170 return ERR_PTR(err); 171 return ERR_PTR(err);
171 172
172 /* allocate all map elements and zero-initialize them */ 173 /* allocate all map elements and zero-initialize them */
173 array = bpf_map_area_alloc(array_size, numa_node); 174 array = bpf_map_area_alloc(array_size, numa_node);
174 if (!array) 175 if (!array) {
176 bpf_map_charge_finish(&mem);
175 return ERR_PTR(-ENOMEM); 177 return ERR_PTR(-ENOMEM);
178 }
176 179
177 /* copy mandatory map attributes */ 180 /* copy mandatory map attributes */
178 bpf_map_init_from_attr(&array->map, attr); 181 bpf_map_init_from_attr(&array->map, attr);
179 array->map.memory.pages = cost; 182 bpf_map_charge_move(&array->map.memory, &mem);
180 183
181 return &array->map; 184 return &array->map;
182} 185}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 08d4efff73ac..8da24ca65d97 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
89{ 89{
90 u32 value_size = attr->value_size; 90 u32 value_size = attr->value_size;
91 struct bpf_stack_map *smap; 91 struct bpf_stack_map *smap;
92 struct bpf_map_memory mem;
92 u64 cost, n_buckets; 93 u64 cost, n_buckets;
93 int err; 94 int err;
94 95
@@ -118,38 +119,41 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
118 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 119 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
119 if (cost >= U32_MAX - PAGE_SIZE) 120 if (cost >= U32_MAX - PAGE_SIZE)
120 return ERR_PTR(-E2BIG); 121 return ERR_PTR(-E2BIG);
122 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
123 if (cost >= U32_MAX - PAGE_SIZE)
124 return ERR_PTR(-E2BIG);
125
126 err = bpf_map_charge_init(&mem,
127 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
128 if (err)
129 return ERR_PTR(err);
121 130
122 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 131 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
123 if (!smap) 132 if (!smap) {
133 bpf_map_charge_finish(&mem);
124 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
125 135 }
126 err = -E2BIG;
127 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
128 if (cost >= U32_MAX - PAGE_SIZE)
129 goto free_smap;
130 136
131 bpf_map_init_from_attr(&smap->map, attr); 137 bpf_map_init_from_attr(&smap->map, attr);
132 smap->map.value_size = value_size; 138 smap->map.value_size = value_size;
133 smap->n_buckets = n_buckets; 139 smap->n_buckets = n_buckets;
134 smap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
135
136 err = bpf_map_precharge_memlock(smap->map.memory.pages);
137 if (err)
138 goto free_smap;
139 140
140 err = get_callchain_buffers(sysctl_perf_event_max_stack); 141 err = get_callchain_buffers(sysctl_perf_event_max_stack);
141 if (err) 142 if (err)
142 goto free_smap; 143 goto free_charge;
143 144
144 err = prealloc_elems_and_freelist(smap); 145 err = prealloc_elems_and_freelist(smap);
145 if (err) 146 if (err)
146 goto put_buffers; 147 goto put_buffers;
147 148
149 bpf_map_charge_move(&smap->map.memory, &mem);
150
148 return &smap->map; 151 return &smap->map;
149 152
150put_buffers: 153put_buffers:
151 put_callchain_buffers(); 154 put_callchain_buffers();
152free_smap: 155free_charge:
156 bpf_map_charge_finish(&mem);
153 bpf_map_area_free(smap); 157 bpf_map_area_free(smap);
154 return ERR_PTR(err); 158 return ERR_PTR(err);
155} 159}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8289a2ce14fc..4a5ebad99154 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
188 map->numa_node = bpf_map_attr_numa_node(attr); 188 map->numa_node = bpf_map_attr_numa_node(attr);
189} 189}
190 190
191int bpf_map_precharge_memlock(u32 pages)
192{
193 struct user_struct *user = get_current_user();
194 unsigned long memlock_limit, cur;
195
196 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
197 cur = atomic_long_read(&user->locked_vm);
198 free_uid(user);
199 if (cur + pages > memlock_limit)
200 return -EPERM;
201 return 0;
202}
203
204static int bpf_charge_memlock(struct user_struct *user, u32 pages) 191static int bpf_charge_memlock(struct user_struct *user, u32 pages)
205{ 192{
206 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 193 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -214,29 +201,40 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
214 201
215static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) 202static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
216{ 203{
217 atomic_long_sub(pages, &user->locked_vm); 204 if (user)
205 atomic_long_sub(pages, &user->locked_vm);
218} 206}
219 207
220static int bpf_map_init_memlock(struct bpf_map *map) 208int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages)
221{ 209{
222 struct user_struct *user = get_current_user(); 210 struct user_struct *user = get_current_user();
223 int ret; 211 int ret;
224 212
225 ret = bpf_charge_memlock(user, map->memory.pages); 213 ret = bpf_charge_memlock(user, pages);
226 if (ret) { 214 if (ret) {
227 free_uid(user); 215 free_uid(user);
228 return ret; 216 return ret;
229 } 217 }
230 map->memory.user = user; 218
231 return ret; 219 mem->pages = pages;
220 mem->user = user;
221
222 return 0;
232} 223}
233 224
234static void bpf_map_release_memlock(struct bpf_map *map) 225void bpf_map_charge_finish(struct bpf_map_memory *mem)
235{ 226{
236 struct user_struct *user = map->memory.user; 227 bpf_uncharge_memlock(mem->user, mem->pages);
228 free_uid(mem->user);
229}
237 230
238 bpf_uncharge_memlock(user, map->memory.pages); 231void bpf_map_charge_move(struct bpf_map_memory *dst,
239 free_uid(user); 232 struct bpf_map_memory *src)
233{
234 *dst = *src;
235
236 /* Make sure src will not be used for the redundant uncharging. */
237 memset(src, 0, sizeof(struct bpf_map_memory));
240} 238}
241 239
242int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) 240int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
@@ -304,11 +302,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
304static void bpf_map_free_deferred(struct work_struct *work) 302static void bpf_map_free_deferred(struct work_struct *work)
305{ 303{
306 struct bpf_map *map = container_of(work, struct bpf_map, work); 304 struct bpf_map *map = container_of(work, struct bpf_map, work);
305 struct bpf_map_memory mem;
307 306
308 bpf_map_release_memlock(map); 307 bpf_map_charge_move(&mem, &map->memory);
309 security_bpf_map_free(map); 308 security_bpf_map_free(map);
310 /* implementation dependent freeing */ 309 /* implementation dependent freeing */
311 map->ops->map_free(map); 310 map->ops->map_free(map);
311 bpf_map_charge_finish(&mem);
312} 312}
313 313
314static void bpf_map_put_uref(struct bpf_map *map) 314static void bpf_map_put_uref(struct bpf_map *map)
@@ -550,6 +550,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
550static int map_create(union bpf_attr *attr) 550static int map_create(union bpf_attr *attr)
551{ 551{
552 int numa_node = bpf_map_attr_numa_node(attr); 552 int numa_node = bpf_map_attr_numa_node(attr);
553 struct bpf_map_memory mem;
553 struct bpf_map *map; 554 struct bpf_map *map;
554 int f_flags; 555 int f_flags;
555 int err; 556 int err;
@@ -574,7 +575,7 @@ static int map_create(union bpf_attr *attr)
574 575
575 err = bpf_obj_name_cpy(map->name, attr->map_name); 576 err = bpf_obj_name_cpy(map->name, attr->map_name);
576 if (err) 577 if (err)
577 goto free_map_nouncharge; 578 goto free_map;
578 579
579 atomic_set(&map->refcnt, 1); 580 atomic_set(&map->refcnt, 1);
580 atomic_set(&map->usercnt, 1); 581 atomic_set(&map->usercnt, 1);
@@ -584,20 +585,20 @@ static int map_create(union bpf_attr *attr)
584 585
585 if (!attr->btf_value_type_id) { 586 if (!attr->btf_value_type_id) {
586 err = -EINVAL; 587 err = -EINVAL;
587 goto free_map_nouncharge; 588 goto free_map;
588 } 589 }
589 590
590 btf = btf_get_by_fd(attr->btf_fd); 591 btf = btf_get_by_fd(attr->btf_fd);
591 if (IS_ERR(btf)) { 592 if (IS_ERR(btf)) {
592 err = PTR_ERR(btf); 593 err = PTR_ERR(btf);
593 goto free_map_nouncharge; 594 goto free_map;
594 } 595 }
595 596
596 err = map_check_btf(map, btf, attr->btf_key_type_id, 597 err = map_check_btf(map, btf, attr->btf_key_type_id,
597 attr->btf_value_type_id); 598 attr->btf_value_type_id);
598 if (err) { 599 if (err) {
599 btf_put(btf); 600 btf_put(btf);
600 goto free_map_nouncharge; 601 goto free_map;
601 } 602 }
602 603
603 map->btf = btf; 604 map->btf = btf;
@@ -609,15 +610,11 @@ static int map_create(union bpf_attr *attr)
609 610
610 err = security_bpf_map_alloc(map); 611 err = security_bpf_map_alloc(map);
611 if (err) 612 if (err)
612 goto free_map_nouncharge; 613 goto free_map;
613
614 err = bpf_map_init_memlock(map);
615 if (err)
616 goto free_map_sec;
617 614
618 err = bpf_map_alloc_id(map); 615 err = bpf_map_alloc_id(map);
619 if (err) 616 if (err)
620 goto free_map; 617 goto free_map_sec;
621 618
622 err = bpf_map_new_fd(map, f_flags); 619 err = bpf_map_new_fd(map, f_flags);
623 if (err < 0) { 620 if (err < 0) {
@@ -633,13 +630,13 @@ static int map_create(union bpf_attr *attr)
633 630
634 return err; 631 return err;
635 632
636free_map:
637 bpf_map_release_memlock(map);
638free_map_sec: 633free_map_sec:
639 security_bpf_map_free(map); 634 security_bpf_map_free(map);
640free_map_nouncharge: 635free_map:
641 btf_put(map->btf); 636 btf_put(map->btf);
637 bpf_map_charge_move(&mem, &map->memory);
642 map->ops->map_free(map); 638 map->ops->map_free(map);
639 bpf_map_charge_finish(&mem);
643 return err; 640 return err;
644} 641}
645 642
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index f816ee1a0fa0..a329dab7c7a4 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -40,10 +40,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
40 if (cost >= U32_MAX - PAGE_SIZE) 40 if (cost >= U32_MAX - PAGE_SIZE)
41 goto free_m; 41 goto free_m;
42 42
43 m->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
44
45 /* Notice returns -EPERM on if map size is larger than memlock limit */ 43 /* Notice returns -EPERM on if map size is larger than memlock limit */
46 err = bpf_map_precharge_memlock(m->map.memory.pages); 44 err = bpf_map_charge_init(&m->map.memory,
45 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
47 if (err) 46 if (err)
48 goto free_m; 47 goto free_m;
49 48
@@ -51,7 +50,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
51 50
52 m->flush_list = alloc_percpu(struct list_head); 51 m->flush_list = alloc_percpu(struct list_head);
53 if (!m->flush_list) 52 if (!m->flush_list)
54 goto free_m; 53 goto free_charge;
55 54
56 for_each_possible_cpu(cpu) 55 for_each_possible_cpu(cpu)
57 INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); 56 INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
@@ -65,6 +64,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
65 64
66free_percpu: 65free_percpu:
67 free_percpu(m->flush_list); 66 free_percpu(m->flush_list);
67free_charge:
68 bpf_map_charge_finish(&m->map.memory);
68free_m: 69free_m:
69 kfree(m); 70 kfree(m);
70 return ERR_PTR(err); 71 return ERR_PTR(err);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 92581c3ff220..621a0b07ff11 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -640,13 +640,16 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
640 cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); 640 cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
641 pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 641 pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
642 642
643 ret = bpf_map_precharge_memlock(pages); 643 ret = bpf_map_charge_init(&smap->map.memory, pages);
644 if (ret < 0) 644 if (ret < 0) {
645 kfree(smap);
645 return ERR_PTR(ret); 646 return ERR_PTR(ret);
647 }
646 648
647 smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, 649 smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
648 GFP_USER | __GFP_NOWARN); 650 GFP_USER | __GFP_NOWARN);
649 if (!smap->buckets) { 651 if (!smap->buckets) {
652 bpf_map_charge_finish(&smap->map.memory);
650 kfree(smap); 653 kfree(smap);
651 return ERR_PTR(-ENOMEM); 654 return ERR_PTR(-ENOMEM);
652 } 655 }
@@ -659,7 +662,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
659 smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; 662 smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
660 smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % 663 smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
661 BPF_SK_STORAGE_CACHE_SIZE; 664 BPF_SK_STORAGE_CACHE_SIZE;
662 smap->map.memory.pages = pages;
663 665
664 return &smap->map; 666 return &smap->map;
665} 667}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4eb5b6a1b29f..1028c922a149 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -49,8 +49,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
49 goto free_stab; 49 goto free_stab;
50 } 50 }
51 51
52 stab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 52 err = bpf_map_charge_init(&stab->map.memory,
53 err = bpf_map_precharge_memlock(stab->map.memory.pages); 53 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
54 if (err) 54 if (err)
55 goto free_stab; 55 goto free_stab;
56 56
@@ -60,6 +60,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
60 if (stab->sks) 60 if (stab->sks)
61 return &stab->map; 61 return &stab->map;
62 err = -ENOMEM; 62 err = -ENOMEM;
63 bpf_map_charge_finish(&stab->map.memory);
63free_stab: 64free_stab:
64 kfree(stab); 65 kfree(stab);
65 return ERR_PTR(err); 66 return ERR_PTR(err);