diff options
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r-- | kernel/bpf/hashtab.c | 103 |
1 files changed, 59 insertions, 44 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3905d4bc5b80..b76828f23b49 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -227,7 +227,7 @@ static int alloc_extra_elems(struct bpf_htab *htab) | |||
227 | } | 227 | } |
228 | 228 | ||
229 | /* Called from syscall */ | 229 | /* Called from syscall */ |
230 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | 230 | static int htab_map_alloc_check(union bpf_attr *attr) |
231 | { | 231 | { |
232 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || | 232 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || |
233 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | 233 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); |
@@ -241,9 +241,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
241 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); | 241 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); |
242 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); | 242 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); |
243 | int numa_node = bpf_map_attr_numa_node(attr); | 243 | int numa_node = bpf_map_attr_numa_node(attr); |
244 | struct bpf_htab *htab; | ||
245 | int err, i; | ||
246 | u64 cost; | ||
247 | 244 | ||
248 | BUILD_BUG_ON(offsetof(struct htab_elem, htab) != | 245 | BUILD_BUG_ON(offsetof(struct htab_elem, htab) != |
249 | offsetof(struct htab_elem, hash_node.pprev)); | 246 | offsetof(struct htab_elem, hash_node.pprev)); |
@@ -254,40 +251,68 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
254 | /* LRU implementation is much complicated than other | 251 | /* LRU implementation is much complicated than other |
255 | * maps. Hence, limit to CAP_SYS_ADMIN for now. | 252 | * maps. Hence, limit to CAP_SYS_ADMIN for now. |
256 | */ | 253 | */ |
257 | return ERR_PTR(-EPERM); | 254 | return -EPERM; |
258 | 255 | ||
259 | if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) | 256 | if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) |
260 | /* reserved bits should not be used */ | 257 | /* reserved bits should not be used */ |
261 | return ERR_PTR(-EINVAL); | 258 | return -EINVAL; |
262 | 259 | ||
263 | if (!lru && percpu_lru) | 260 | if (!lru && percpu_lru) |
264 | return ERR_PTR(-EINVAL); | 261 | return -EINVAL; |
265 | 262 | ||
266 | if (lru && !prealloc) | 263 | if (lru && !prealloc) |
267 | return ERR_PTR(-ENOTSUPP); | 264 | return -ENOTSUPP; |
268 | 265 | ||
269 | if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) | 266 | if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) |
270 | return ERR_PTR(-EINVAL); | 267 | return -EINVAL; |
268 | |||
269 | /* check sanity of attributes. | ||
270 | * value_size == 0 may be allowed in the future to use map as a set | ||
271 | */ | ||
272 | if (attr->max_entries == 0 || attr->key_size == 0 || | ||
273 | attr->value_size == 0) | ||
274 | return -EINVAL; | ||
275 | |||
276 | if (attr->key_size > MAX_BPF_STACK) | ||
277 | /* eBPF programs initialize keys on stack, so they cannot be | ||
278 | * larger than max stack size | ||
279 | */ | ||
280 | return -E2BIG; | ||
281 | |||
282 | if (attr->value_size >= KMALLOC_MAX_SIZE - | ||
283 | MAX_BPF_STACK - sizeof(struct htab_elem)) | ||
284 | /* if value_size is bigger, the user space won't be able to | ||
285 | * access the elements via bpf syscall. This check also makes | ||
286 | * sure that the elem_size doesn't overflow and it's | ||
287 | * kmalloc-able later in htab_map_update_elem() | ||
288 | */ | ||
289 | return -E2BIG; | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | ||
295 | { | ||
296 | bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
297 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | ||
298 | bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || | ||
299 | attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); | ||
300 | /* percpu_lru means each cpu has its own LRU list. | ||
301 | * it is different from BPF_MAP_TYPE_PERCPU_HASH where | ||
302 | * the map's value itself is percpu. percpu_lru has | ||
303 | * nothing to do with the map's value. | ||
304 | */ | ||
305 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); | ||
306 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); | ||
307 | struct bpf_htab *htab; | ||
308 | int err, i; | ||
309 | u64 cost; | ||
271 | 310 | ||
272 | htab = kzalloc(sizeof(*htab), GFP_USER); | 311 | htab = kzalloc(sizeof(*htab), GFP_USER); |
273 | if (!htab) | 312 | if (!htab) |
274 | return ERR_PTR(-ENOMEM); | 313 | return ERR_PTR(-ENOMEM); |
275 | 314 | ||
276 | /* mandatory map attributes */ | 315 | bpf_map_init_from_attr(&htab->map, attr); |
277 | htab->map.map_type = attr->map_type; | ||
278 | htab->map.key_size = attr->key_size; | ||
279 | htab->map.value_size = attr->value_size; | ||
280 | htab->map.max_entries = attr->max_entries; | ||
281 | htab->map.map_flags = attr->map_flags; | ||
282 | htab->map.numa_node = numa_node; | ||
283 | |||
284 | /* check sanity of attributes. | ||
285 | * value_size == 0 may be allowed in the future to use map as a set | ||
286 | */ | ||
287 | err = -EINVAL; | ||
288 | if (htab->map.max_entries == 0 || htab->map.key_size == 0 || | ||
289 | htab->map.value_size == 0) | ||
290 | goto free_htab; | ||
291 | 316 | ||
292 | if (percpu_lru) { | 317 | if (percpu_lru) { |
293 | /* ensure each CPU's lru list has >=1 elements. | 318 | /* ensure each CPU's lru list has >=1 elements. |
@@ -304,22 +329,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
304 | /* hash table size must be power of 2 */ | 329 | /* hash table size must be power of 2 */ |
305 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); | 330 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); |
306 | 331 | ||
307 | err = -E2BIG; | ||
308 | if (htab->map.key_size > MAX_BPF_STACK) | ||
309 | /* eBPF programs initialize keys on stack, so they cannot be | ||
310 | * larger than max stack size | ||
311 | */ | ||
312 | goto free_htab; | ||
313 | |||
314 | if (htab->map.value_size >= KMALLOC_MAX_SIZE - | ||
315 | MAX_BPF_STACK - sizeof(struct htab_elem)) | ||
316 | /* if value_size is bigger, the user space won't be able to | ||
317 | * access the elements via bpf syscall. This check also makes | ||
318 | * sure that the elem_size doesn't overflow and it's | ||
319 | * kmalloc-able later in htab_map_update_elem() | ||
320 | */ | ||
321 | goto free_htab; | ||
322 | |||
323 | htab->elem_size = sizeof(struct htab_elem) + | 332 | htab->elem_size = sizeof(struct htab_elem) + |
324 | round_up(htab->map.key_size, 8); | 333 | round_up(htab->map.key_size, 8); |
325 | if (percpu) | 334 | if (percpu) |
@@ -327,6 +336,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
327 | else | 336 | else |
328 | htab->elem_size += round_up(htab->map.value_size, 8); | 337 | htab->elem_size += round_up(htab->map.value_size, 8); |
329 | 338 | ||
339 | err = -E2BIG; | ||
330 | /* prevent zero size kmalloc and check for u32 overflow */ | 340 | /* prevent zero size kmalloc and check for u32 overflow */ |
331 | if (htab->n_buckets == 0 || | 341 | if (htab->n_buckets == 0 || |
332 | htab->n_buckets > U32_MAX / sizeof(struct bucket)) | 342 | htab->n_buckets > U32_MAX / sizeof(struct bucket)) |
@@ -1143,6 +1153,7 @@ static void htab_map_free(struct bpf_map *map) | |||
1143 | } | 1153 | } |
1144 | 1154 | ||
1145 | const struct bpf_map_ops htab_map_ops = { | 1155 | const struct bpf_map_ops htab_map_ops = { |
1156 | .map_alloc_check = htab_map_alloc_check, | ||
1146 | .map_alloc = htab_map_alloc, | 1157 | .map_alloc = htab_map_alloc, |
1147 | .map_free = htab_map_free, | 1158 | .map_free = htab_map_free, |
1148 | .map_get_next_key = htab_map_get_next_key, | 1159 | .map_get_next_key = htab_map_get_next_key, |
@@ -1153,6 +1164,7 @@ const struct bpf_map_ops htab_map_ops = { | |||
1153 | }; | 1164 | }; |
1154 | 1165 | ||
1155 | const struct bpf_map_ops htab_lru_map_ops = { | 1166 | const struct bpf_map_ops htab_lru_map_ops = { |
1167 | .map_alloc_check = htab_map_alloc_check, | ||
1156 | .map_alloc = htab_map_alloc, | 1168 | .map_alloc = htab_map_alloc, |
1157 | .map_free = htab_map_free, | 1169 | .map_free = htab_map_free, |
1158 | .map_get_next_key = htab_map_get_next_key, | 1170 | .map_get_next_key = htab_map_get_next_key, |
@@ -1236,6 +1248,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, | |||
1236 | } | 1248 | } |
1237 | 1249 | ||
1238 | const struct bpf_map_ops htab_percpu_map_ops = { | 1250 | const struct bpf_map_ops htab_percpu_map_ops = { |
1251 | .map_alloc_check = htab_map_alloc_check, | ||
1239 | .map_alloc = htab_map_alloc, | 1252 | .map_alloc = htab_map_alloc, |
1240 | .map_free = htab_map_free, | 1253 | .map_free = htab_map_free, |
1241 | .map_get_next_key = htab_map_get_next_key, | 1254 | .map_get_next_key = htab_map_get_next_key, |
@@ -1245,6 +1258,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { | |||
1245 | }; | 1258 | }; |
1246 | 1259 | ||
1247 | const struct bpf_map_ops htab_lru_percpu_map_ops = { | 1260 | const struct bpf_map_ops htab_lru_percpu_map_ops = { |
1261 | .map_alloc_check = htab_map_alloc_check, | ||
1248 | .map_alloc = htab_map_alloc, | 1262 | .map_alloc = htab_map_alloc, |
1249 | .map_free = htab_map_free, | 1263 | .map_free = htab_map_free, |
1250 | .map_get_next_key = htab_map_get_next_key, | 1264 | .map_get_next_key = htab_map_get_next_key, |
@@ -1253,11 +1267,11 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { | |||
1253 | .map_delete_elem = htab_lru_map_delete_elem, | 1267 | .map_delete_elem = htab_lru_map_delete_elem, |
1254 | }; | 1268 | }; |
1255 | 1269 | ||
1256 | static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) | 1270 | static int fd_htab_map_alloc_check(union bpf_attr *attr) |
1257 | { | 1271 | { |
1258 | if (attr->value_size != sizeof(u32)) | 1272 | if (attr->value_size != sizeof(u32)) |
1259 | return ERR_PTR(-EINVAL); | 1273 | return -EINVAL; |
1260 | return htab_map_alloc(attr); | 1274 | return htab_map_alloc_check(attr); |
1261 | } | 1275 | } |
1262 | 1276 | ||
1263 | static void fd_htab_map_free(struct bpf_map *map) | 1277 | static void fd_htab_map_free(struct bpf_map *map) |
@@ -1328,7 +1342,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) | |||
1328 | if (IS_ERR(inner_map_meta)) | 1342 | if (IS_ERR(inner_map_meta)) |
1329 | return inner_map_meta; | 1343 | return inner_map_meta; |
1330 | 1344 | ||
1331 | map = fd_htab_map_alloc(attr); | 1345 | map = htab_map_alloc(attr); |
1332 | if (IS_ERR(map)) { | 1346 | if (IS_ERR(map)) { |
1333 | bpf_map_meta_free(inner_map_meta); | 1347 | bpf_map_meta_free(inner_map_meta); |
1334 | return map; | 1348 | return map; |
@@ -1372,6 +1386,7 @@ static void htab_of_map_free(struct bpf_map *map) | |||
1372 | } | 1386 | } |
1373 | 1387 | ||
1374 | const struct bpf_map_ops htab_of_maps_map_ops = { | 1388 | const struct bpf_map_ops htab_of_maps_map_ops = { |
1389 | .map_alloc_check = fd_htab_map_alloc_check, | ||
1375 | .map_alloc = htab_of_map_alloc, | 1390 | .map_alloc = htab_of_map_alloc, |
1376 | .map_free = htab_of_map_free, | 1391 | .map_free = htab_of_map_free, |
1377 | .map_get_next_key = htab_map_get_next_key, | 1392 | .map_get_next_key = htab_map_get_next_key, |