diff options
-rw-r--r-- | include/linux/bpf.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 10 | ||||
-rw-r--r-- | kernel/bpf/arraymap.c | 7 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 9 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 19 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 9 | ||||
-rw-r--r-- | kernel/bpf/sockmap.c | 10 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 8 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 14 |
9 files changed, 73 insertions, 23 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1cc6c5ff61ec..55b88e329804 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -51,6 +51,7 @@ struct bpf_map { | |||
51 | u32 map_flags; | 51 | u32 map_flags; |
52 | u32 pages; | 52 | u32 pages; |
53 | u32 id; | 53 | u32 id; |
54 | int numa_node; | ||
54 | struct user_struct *user; | 55 | struct user_struct *user; |
55 | const struct bpf_map_ops *ops; | 56 | const struct bpf_map_ops *ops; |
56 | struct work_struct work; | 57 | struct work_struct work; |
@@ -264,7 +265,7 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); | |||
264 | void bpf_map_put_with_uref(struct bpf_map *map); | 265 | void bpf_map_put_with_uref(struct bpf_map *map); |
265 | void bpf_map_put(struct bpf_map *map); | 266 | void bpf_map_put(struct bpf_map *map); |
266 | int bpf_map_precharge_memlock(u32 pages); | 267 | int bpf_map_precharge_memlock(u32 pages); |
267 | void *bpf_map_area_alloc(size_t size); | 268 | void *bpf_map_area_alloc(size_t size, int numa_node); |
268 | void bpf_map_area_free(void *base); | 269 | void bpf_map_area_free(void *base); |
269 | 270 | ||
270 | extern int sysctl_unprivileged_bpf_disabled; | 271 | extern int sysctl_unprivileged_bpf_disabled; |
@@ -316,6 +317,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); | |||
316 | void __dev_map_insert_ctx(struct bpf_map *map, u32 index); | 317 | void __dev_map_insert_ctx(struct bpf_map *map, u32 index); |
317 | void __dev_map_flush(struct bpf_map *map); | 318 | void __dev_map_flush(struct bpf_map *map); |
318 | 319 | ||
320 | /* Return map's numa specified by userspace */ | ||
321 | static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) | ||
322 | { | ||
323 | return (attr->map_flags & BPF_F_NUMA_NODE) ? | ||
324 | attr->numa_node : NUMA_NO_NODE; | ||
325 | } | ||
326 | |||
319 | #else | 327 | #else |
320 | static inline struct bpf_prog *bpf_prog_get(u32 ufd) | 328 | static inline struct bpf_prog *bpf_prog_get(u32 ufd) |
321 | { | 329 | { |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5ecbe812a2cc..843818dff96d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -165,6 +165,7 @@ enum bpf_attach_type { | |||
165 | #define BPF_NOEXIST 1 /* create new element if it didn't exist */ | 165 | #define BPF_NOEXIST 1 /* create new element if it didn't exist */ |
166 | #define BPF_EXIST 2 /* update existing element */ | 166 | #define BPF_EXIST 2 /* update existing element */ |
167 | 167 | ||
168 | /* flags for BPF_MAP_CREATE command */ | ||
168 | #define BPF_F_NO_PREALLOC (1U << 0) | 169 | #define BPF_F_NO_PREALLOC (1U << 0) |
169 | /* Instead of having one common LRU list in the | 170 | /* Instead of having one common LRU list in the |
170 | * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list | 171 | * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list |
@@ -173,6 +174,8 @@ enum bpf_attach_type { | |||
173 | * across different LRU lists. | 174 | * across different LRU lists. |
174 | */ | 175 | */ |
175 | #define BPF_F_NO_COMMON_LRU (1U << 1) | 176 | #define BPF_F_NO_COMMON_LRU (1U << 1) |
177 | /* Specify numa node during map creation */ | ||
178 | #define BPF_F_NUMA_NODE (1U << 2) | ||
176 | 179 | ||
177 | union bpf_attr { | 180 | union bpf_attr { |
178 | struct { /* anonymous struct used by BPF_MAP_CREATE command */ | 181 | struct { /* anonymous struct used by BPF_MAP_CREATE command */ |
@@ -180,8 +183,13 @@ union bpf_attr { | |||
180 | __u32 key_size; /* size of key in bytes */ | 183 | __u32 key_size; /* size of key in bytes */ |
181 | __u32 value_size; /* size of value in bytes */ | 184 | __u32 value_size; /* size of value in bytes */ |
182 | __u32 max_entries; /* max number of entries in a map */ | 185 | __u32 max_entries; /* max number of entries in a map */ |
183 | __u32 map_flags; /* prealloc or not */ | 186 | __u32 map_flags; /* BPF_MAP_CREATE related |
187 | * flags defined above. | ||
188 | */ | ||
184 | __u32 inner_map_fd; /* fd pointing to the inner map */ | 189 | __u32 inner_map_fd; /* fd pointing to the inner map */ |
190 | __u32 numa_node; /* numa node (effective only if | ||
191 | * BPF_F_NUMA_NODE is set). | ||
192 | */ | ||
185 | }; | 193 | }; |
186 | 194 | ||
187 | struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ | 195 | struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ |
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index d771a3872500..96e9c5c1dfc9 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
@@ -49,13 +49,15 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) | |||
49 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) | 49 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) |
50 | { | 50 | { |
51 | bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; | 51 | bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; |
52 | int numa_node = bpf_map_attr_numa_node(attr); | ||
52 | struct bpf_array *array; | 53 | struct bpf_array *array; |
53 | u64 array_size; | 54 | u64 array_size; |
54 | u32 elem_size; | 55 | u32 elem_size; |
55 | 56 | ||
56 | /* check sanity of attributes */ | 57 | /* check sanity of attributes */ |
57 | if (attr->max_entries == 0 || attr->key_size != 4 || | 58 | if (attr->max_entries == 0 || attr->key_size != 4 || |
58 | attr->value_size == 0 || attr->map_flags) | 59 | attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE || |
60 | (percpu && numa_node != NUMA_NO_NODE)) | ||
59 | return ERR_PTR(-EINVAL); | 61 | return ERR_PTR(-EINVAL); |
60 | 62 | ||
61 | if (attr->value_size > KMALLOC_MAX_SIZE) | 63 | if (attr->value_size > KMALLOC_MAX_SIZE) |
@@ -77,7 +79,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
77 | return ERR_PTR(-ENOMEM); | 79 | return ERR_PTR(-ENOMEM); |
78 | 80 | ||
79 | /* allocate all map elements and zero-initialize them */ | 81 | /* allocate all map elements and zero-initialize them */ |
80 | array = bpf_map_area_alloc(array_size); | 82 | array = bpf_map_area_alloc(array_size, numa_node); |
81 | if (!array) | 83 | if (!array) |
82 | return ERR_PTR(-ENOMEM); | 84 | return ERR_PTR(-ENOMEM); |
83 | 85 | ||
@@ -87,6 +89,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
87 | array->map.value_size = attr->value_size; | 89 | array->map.value_size = attr->value_size; |
88 | array->map.max_entries = attr->max_entries; | 90 | array->map.max_entries = attr->max_entries; |
89 | array->map.map_flags = attr->map_flags; | 91 | array->map.map_flags = attr->map_flags; |
92 | array->map.numa_node = numa_node; | ||
90 | array->elem_size = elem_size; | 93 | array->elem_size = elem_size; |
91 | 94 | ||
92 | if (!percpu) | 95 | if (!percpu) |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 18a72a8add43..67f4f00ce33a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -80,7 +80,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
80 | 80 | ||
81 | /* check sanity of attributes */ | 81 | /* check sanity of attributes */ |
82 | if (attr->max_entries == 0 || attr->key_size != 4 || | 82 | if (attr->max_entries == 0 || attr->key_size != 4 || |
83 | attr->value_size != 4 || attr->map_flags) | 83 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
84 | return ERR_PTR(-EINVAL); | 84 | return ERR_PTR(-EINVAL); |
85 | 85 | ||
86 | dtab = kzalloc(sizeof(*dtab), GFP_USER); | 86 | dtab = kzalloc(sizeof(*dtab), GFP_USER); |
@@ -93,6 +93,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
93 | dtab->map.value_size = attr->value_size; | 93 | dtab->map.value_size = attr->value_size; |
94 | dtab->map.max_entries = attr->max_entries; | 94 | dtab->map.max_entries = attr->max_entries; |
95 | dtab->map.map_flags = attr->map_flags; | 95 | dtab->map.map_flags = attr->map_flags; |
96 | dtab->map.numa_node = bpf_map_attr_numa_node(attr); | ||
96 | 97 | ||
97 | err = -ENOMEM; | 98 | err = -ENOMEM; |
98 | 99 | ||
@@ -119,7 +120,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
119 | goto free_dtab; | 120 | goto free_dtab; |
120 | 121 | ||
121 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * | 122 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * |
122 | sizeof(struct bpf_dtab_netdev *)); | 123 | sizeof(struct bpf_dtab_netdev *), |
124 | dtab->map.numa_node); | ||
123 | if (!dtab->netdev_map) | 125 | if (!dtab->netdev_map) |
124 | goto free_dtab; | 126 | goto free_dtab; |
125 | 127 | ||
@@ -344,7 +346,8 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
344 | if (!ifindex) { | 346 | if (!ifindex) { |
345 | dev = NULL; | 347 | dev = NULL; |
346 | } else { | 348 | } else { |
347 | dev = kmalloc(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN); | 349 | dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, |
350 | map->numa_node); | ||
348 | if (!dev) | 351 | if (!dev) |
349 | return -ENOMEM; | 352 | return -ENOMEM; |
350 | 353 | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4fb463172aa8..47ae748c3a49 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include "bpf_lru_list.h" | 18 | #include "bpf_lru_list.h" |
19 | #include "map_in_map.h" | 19 | #include "map_in_map.h" |
20 | 20 | ||
21 | #define HTAB_CREATE_FLAG_MASK \ | ||
22 | (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE) | ||
23 | |||
21 | struct bucket { | 24 | struct bucket { |
22 | struct hlist_nulls_head head; | 25 | struct hlist_nulls_head head; |
23 | raw_spinlock_t lock; | 26 | raw_spinlock_t lock; |
@@ -138,7 +141,8 @@ static int prealloc_init(struct bpf_htab *htab) | |||
138 | if (!htab_is_percpu(htab) && !htab_is_lru(htab)) | 141 | if (!htab_is_percpu(htab) && !htab_is_lru(htab)) |
139 | num_entries += num_possible_cpus(); | 142 | num_entries += num_possible_cpus(); |
140 | 143 | ||
141 | htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries); | 144 | htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries, |
145 | htab->map.numa_node); | ||
142 | if (!htab->elems) | 146 | if (!htab->elems) |
143 | return -ENOMEM; | 147 | return -ENOMEM; |
144 | 148 | ||
@@ -233,6 +237,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
233 | */ | 237 | */ |
234 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); | 238 | bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); |
235 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); | 239 | bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); |
240 | int numa_node = bpf_map_attr_numa_node(attr); | ||
236 | struct bpf_htab *htab; | 241 | struct bpf_htab *htab; |
237 | int err, i; | 242 | int err, i; |
238 | u64 cost; | 243 | u64 cost; |
@@ -248,7 +253,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
248 | */ | 253 | */ |
249 | return ERR_PTR(-EPERM); | 254 | return ERR_PTR(-EPERM); |
250 | 255 | ||
251 | if (attr->map_flags & ~(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU)) | 256 | if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) |
252 | /* reserved bits should not be used */ | 257 | /* reserved bits should not be used */ |
253 | return ERR_PTR(-EINVAL); | 258 | return ERR_PTR(-EINVAL); |
254 | 259 | ||
@@ -258,6 +263,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
258 | if (lru && !prealloc) | 263 | if (lru && !prealloc) |
259 | return ERR_PTR(-ENOTSUPP); | 264 | return ERR_PTR(-ENOTSUPP); |
260 | 265 | ||
266 | if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) | ||
267 | return ERR_PTR(-EINVAL); | ||
268 | |||
261 | htab = kzalloc(sizeof(*htab), GFP_USER); | 269 | htab = kzalloc(sizeof(*htab), GFP_USER); |
262 | if (!htab) | 270 | if (!htab) |
263 | return ERR_PTR(-ENOMEM); | 271 | return ERR_PTR(-ENOMEM); |
@@ -268,6 +276,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
268 | htab->map.value_size = attr->value_size; | 276 | htab->map.value_size = attr->value_size; |
269 | htab->map.max_entries = attr->max_entries; | 277 | htab->map.max_entries = attr->max_entries; |
270 | htab->map.map_flags = attr->map_flags; | 278 | htab->map.map_flags = attr->map_flags; |
279 | htab->map.numa_node = numa_node; | ||
271 | 280 | ||
272 | /* check sanity of attributes. | 281 | /* check sanity of attributes. |
273 | * value_size == 0 may be allowed in the future to use map as a set | 282 | * value_size == 0 may be allowed in the future to use map as a set |
@@ -346,7 +355,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
346 | 355 | ||
347 | err = -ENOMEM; | 356 | err = -ENOMEM; |
348 | htab->buckets = bpf_map_area_alloc(htab->n_buckets * | 357 | htab->buckets = bpf_map_area_alloc(htab->n_buckets * |
349 | sizeof(struct bucket)); | 358 | sizeof(struct bucket), |
359 | htab->map.numa_node); | ||
350 | if (!htab->buckets) | 360 | if (!htab->buckets) |
351 | goto free_htab; | 361 | goto free_htab; |
352 | 362 | ||
@@ -689,7 +699,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | |||
689 | atomic_dec(&htab->count); | 699 | atomic_dec(&htab->count); |
690 | return ERR_PTR(-E2BIG); | 700 | return ERR_PTR(-E2BIG); |
691 | } | 701 | } |
692 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); | 702 | l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, |
703 | htab->map.numa_node); | ||
693 | if (!l_new) | 704 | if (!l_new) |
694 | return ERR_PTR(-ENOMEM); | 705 | return ERR_PTR(-ENOMEM); |
695 | } | 706 | } |
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b09185f0f17d..1b767844a76f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c | |||
@@ -244,7 +244,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, | |||
244 | if (value) | 244 | if (value) |
245 | size += trie->map.value_size; | 245 | size += trie->map.value_size; |
246 | 246 | ||
247 | node = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); | 247 | node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN, |
248 | trie->map.numa_node); | ||
248 | if (!node) | 249 | if (!node) |
249 | return NULL; | 250 | return NULL; |
250 | 251 | ||
@@ -405,6 +406,8 @@ static int trie_delete_elem(struct bpf_map *map, void *key) | |||
405 | #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) | 406 | #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) |
406 | #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) | 407 | #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) |
407 | 408 | ||
409 | #define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE) | ||
410 | |||
408 | static struct bpf_map *trie_alloc(union bpf_attr *attr) | 411 | static struct bpf_map *trie_alloc(union bpf_attr *attr) |
409 | { | 412 | { |
410 | struct lpm_trie *trie; | 413 | struct lpm_trie *trie; |
@@ -416,7 +419,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) | |||
416 | 419 | ||
417 | /* check sanity of attributes */ | 420 | /* check sanity of attributes */ |
418 | if (attr->max_entries == 0 || | 421 | if (attr->max_entries == 0 || |
419 | attr->map_flags != BPF_F_NO_PREALLOC || | 422 | !(attr->map_flags & BPF_F_NO_PREALLOC) || |
423 | attr->map_flags & ~LPM_CREATE_FLAG_MASK || | ||
420 | attr->key_size < LPM_KEY_SIZE_MIN || | 424 | attr->key_size < LPM_KEY_SIZE_MIN || |
421 | attr->key_size > LPM_KEY_SIZE_MAX || | 425 | attr->key_size > LPM_KEY_SIZE_MAX || |
422 | attr->value_size < LPM_VAL_SIZE_MIN || | 426 | attr->value_size < LPM_VAL_SIZE_MIN || |
@@ -433,6 +437,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) | |||
433 | trie->map.value_size = attr->value_size; | 437 | trie->map.value_size = attr->value_size; |
434 | trie->map.max_entries = attr->max_entries; | 438 | trie->map.max_entries = attr->max_entries; |
435 | trie->map.map_flags = attr->map_flags; | 439 | trie->map.map_flags = attr->map_flags; |
440 | trie->map.numa_node = bpf_map_attr_numa_node(attr); | ||
436 | trie->data_size = attr->key_size - | 441 | trie->data_size = attr->key_size - |
437 | offsetof(struct bpf_lpm_trie_key, data); | 442 | offsetof(struct bpf_lpm_trie_key, data); |
438 | trie->max_prefixlen = trie->data_size * 8; | 443 | trie->max_prefixlen = trie->data_size * 8; |
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 39de541fbcdc..78b2bb9370ac 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
@@ -443,7 +443,9 @@ static struct smap_psock *smap_init_psock(struct sock *sock, | |||
443 | { | 443 | { |
444 | struct smap_psock *psock; | 444 | struct smap_psock *psock; |
445 | 445 | ||
446 | psock = kzalloc(sizeof(struct smap_psock), GFP_ATOMIC | __GFP_NOWARN); | 446 | psock = kzalloc_node(sizeof(struct smap_psock), |
447 | GFP_ATOMIC | __GFP_NOWARN, | ||
448 | stab->map.numa_node); | ||
447 | if (!psock) | 449 | if (!psock) |
448 | return ERR_PTR(-ENOMEM); | 450 | return ERR_PTR(-ENOMEM); |
449 | 451 | ||
@@ -465,7 +467,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
465 | 467 | ||
466 | /* check sanity of attributes */ | 468 | /* check sanity of attributes */ |
467 | if (attr->max_entries == 0 || attr->key_size != 4 || | 469 | if (attr->max_entries == 0 || attr->key_size != 4 || |
468 | attr->value_size != 4 || attr->map_flags) | 470 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
469 | return ERR_PTR(-EINVAL); | 471 | return ERR_PTR(-EINVAL); |
470 | 472 | ||
471 | if (attr->value_size > KMALLOC_MAX_SIZE) | 473 | if (attr->value_size > KMALLOC_MAX_SIZE) |
@@ -481,6 +483,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
481 | stab->map.value_size = attr->value_size; | 483 | stab->map.value_size = attr->value_size; |
482 | stab->map.max_entries = attr->max_entries; | 484 | stab->map.max_entries = attr->max_entries; |
483 | stab->map.map_flags = attr->map_flags; | 485 | stab->map.map_flags = attr->map_flags; |
486 | stab->map.numa_node = bpf_map_attr_numa_node(attr); | ||
484 | 487 | ||
485 | /* make sure page count doesn't overflow */ | 488 | /* make sure page count doesn't overflow */ |
486 | cost = (u64) stab->map.max_entries * sizeof(struct sock *); | 489 | cost = (u64) stab->map.max_entries * sizeof(struct sock *); |
@@ -495,7 +498,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
495 | goto free_stab; | 498 | goto free_stab; |
496 | 499 | ||
497 | stab->sock_map = bpf_map_area_alloc(stab->map.max_entries * | 500 | stab->sock_map = bpf_map_area_alloc(stab->map.max_entries * |
498 | sizeof(struct sock *)); | 501 | sizeof(struct sock *), |
502 | stab->map.numa_node); | ||
499 | if (!stab->sock_map) | 503 | if (!stab->sock_map) |
500 | goto free_stab; | 504 | goto free_stab; |
501 | 505 | ||
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 31147d730abf..135be433e9a0 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -31,7 +31,8 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) | |||
31 | u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; | 31 | u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; |
32 | int err; | 32 | int err; |
33 | 33 | ||
34 | smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries); | 34 | smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, |
35 | smap->map.numa_node); | ||
35 | if (!smap->elems) | 36 | if (!smap->elems) |
36 | return -ENOMEM; | 37 | return -ENOMEM; |
37 | 38 | ||
@@ -59,7 +60,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
59 | if (!capable(CAP_SYS_ADMIN)) | 60 | if (!capable(CAP_SYS_ADMIN)) |
60 | return ERR_PTR(-EPERM); | 61 | return ERR_PTR(-EPERM); |
61 | 62 | ||
62 | if (attr->map_flags) | 63 | if (attr->map_flags & ~BPF_F_NUMA_NODE) |
63 | return ERR_PTR(-EINVAL); | 64 | return ERR_PTR(-EINVAL); |
64 | 65 | ||
65 | /* check sanity of attributes */ | 66 | /* check sanity of attributes */ |
@@ -75,7 +76,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
75 | if (cost >= U32_MAX - PAGE_SIZE) | 76 | if (cost >= U32_MAX - PAGE_SIZE) |
76 | return ERR_PTR(-E2BIG); | 77 | return ERR_PTR(-E2BIG); |
77 | 78 | ||
78 | smap = bpf_map_area_alloc(cost); | 79 | smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); |
79 | if (!smap) | 80 | if (!smap) |
80 | return ERR_PTR(-ENOMEM); | 81 | return ERR_PTR(-ENOMEM); |
81 | 82 | ||
@@ -91,6 +92,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
91 | smap->map.map_flags = attr->map_flags; | 92 | smap->map.map_flags = attr->map_flags; |
92 | smap->n_buckets = n_buckets; | 93 | smap->n_buckets = n_buckets; |
93 | smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | 94 | smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; |
95 | smap->map.numa_node = bpf_map_attr_numa_node(attr); | ||
94 | 96 | ||
95 | err = bpf_map_precharge_memlock(smap->map.pages); | 97 | err = bpf_map_precharge_memlock(smap->map.pages); |
96 | if (err) | 98 | if (err) |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b8cb1b3c9bfb..9378f3ba2cbf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -105,7 +105,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) | |||
105 | return map; | 105 | return map; |
106 | } | 106 | } |
107 | 107 | ||
108 | void *bpf_map_area_alloc(size_t size) | 108 | void *bpf_map_area_alloc(size_t size, int numa_node) |
109 | { | 109 | { |
110 | /* We definitely need __GFP_NORETRY, so OOM killer doesn't | 110 | /* We definitely need __GFP_NORETRY, so OOM killer doesn't |
111 | * trigger under memory pressure as we really just want to | 111 | * trigger under memory pressure as we really just want to |
@@ -115,12 +115,13 @@ void *bpf_map_area_alloc(size_t size) | |||
115 | void *area; | 115 | void *area; |
116 | 116 | ||
117 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { | 117 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { |
118 | area = kmalloc(size, GFP_USER | flags); | 118 | area = kmalloc_node(size, GFP_USER | flags, numa_node); |
119 | if (area != NULL) | 119 | if (area != NULL) |
120 | return area; | 120 | return area; |
121 | } | 121 | } |
122 | 122 | ||
123 | return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); | 123 | return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, |
124 | __builtin_return_address(0)); | ||
124 | } | 125 | } |
125 | 126 | ||
126 | void bpf_map_area_free(void *area) | 127 | void bpf_map_area_free(void *area) |
@@ -309,10 +310,11 @@ int bpf_map_new_fd(struct bpf_map *map) | |||
309 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ | 310 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ |
310 | sizeof(attr->CMD##_LAST_FIELD)) != NULL | 311 | sizeof(attr->CMD##_LAST_FIELD)) != NULL |
311 | 312 | ||
312 | #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd | 313 | #define BPF_MAP_CREATE_LAST_FIELD numa_node |
313 | /* called via syscall */ | 314 | /* called via syscall */ |
314 | static int map_create(union bpf_attr *attr) | 315 | static int map_create(union bpf_attr *attr) |
315 | { | 316 | { |
317 | int numa_node = bpf_map_attr_numa_node(attr); | ||
316 | struct bpf_map *map; | 318 | struct bpf_map *map; |
317 | int err; | 319 | int err; |
318 | 320 | ||
@@ -320,6 +322,10 @@ static int map_create(union bpf_attr *attr) | |||
320 | if (err) | 322 | if (err) |
321 | return -EINVAL; | 323 | return -EINVAL; |
322 | 324 | ||
325 | if (numa_node != NUMA_NO_NODE && | ||
326 | (numa_node >= nr_node_ids || !node_online(numa_node))) | ||
327 | return -EINVAL; | ||
328 | |||
323 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ | 329 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ |
324 | map = find_and_alloc_map(attr); | 330 | map = find_and_alloc_map(attr); |
325 | if (IS_ERR(map)) | 331 | if (IS_ERR(map)) |