aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/sockmap.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2017-08-18 14:28:00 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-20 00:35:43 -0400
commit96eabe7a40aa17e613cf3db2c742ee8b1fc764d0 (patch)
tree6364b3b63eeb2707e74c3bd770b01342016d936a /kernel/bpf/sockmap.c
parentbd76b87962833f6e55264030a227be0f090b1286 (diff)
bpf: Allow selecting numa node during map creation
The current map creation API does not allow to provide the numa-node preference. The memory usually comes from where the map-creation-process is running. The performance is not ideal if the bpf_prog is known to always run in a numa node different from the map-creation-process. One of the use case is sharding on CPU to different LRU maps (i.e. an array of LRU maps). Here is the test result of map_perf_test on the INNER_LRU_HASH_PREALLOC test if we force the lru map used by CPU0 to be allocated from a remote numa node: [ The machine has 20 cores. CPU0-9 at node 0. CPU10-19 at node 1 ] ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1628380 events per sec 4:inner_lru_hash_map_perf pre-alloc 1626396 events per sec 3:inner_lru_hash_map_perf pre-alloc 1626144 events per sec 6:inner_lru_hash_map_perf pre-alloc 1621657 events per sec 2:inner_lru_hash_map_perf pre-alloc 1621534 events per sec 1:inner_lru_hash_map_perf pre-alloc 1620292 events per sec 7:inner_lru_hash_map_perf pre-alloc 1613305 events per sec 0:inner_lru_hash_map_perf pre-alloc 1239150 events per sec #<<< After specifying numa node: ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1629627 events per sec 3:inner_lru_hash_map_perf pre-alloc 1628057 events per sec 1:inner_lru_hash_map_perf pre-alloc 1623054 events per sec 6:inner_lru_hash_map_perf pre-alloc 1616033 events per sec 2:inner_lru_hash_map_perf pre-alloc 1614630 events per sec 4:inner_lru_hash_map_perf pre-alloc 1612651 events per sec 7:inner_lru_hash_map_perf pre-alloc 1609337 events per sec 0:inner_lru_hash_map_perf pre-alloc 1619340 events per sec #<<< This patch adds one field, numa_node, to the bpf_attr. Since numa node 0 is a valid node, a new flag BPF_F_NUMA_NODE is also added. The numa_node field is honored if and only if the BPF_F_NUMA_NODE flag is set. Numa node selection is not supported for percpu map. This patch does not change all the kmalloc. F.e. 'htab = kzalloc()' is not changed since the object is small enough to stay in the cache. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/sockmap.c')
-rw-r--r--kernel/bpf/sockmap.c10
1 files changed, 7 insertions, 3 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 39de541fbcdc..78b2bb9370ac 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -443,7 +443,9 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
443{ 443{
444 struct smap_psock *psock; 444 struct smap_psock *psock;
445 445
446 psock = kzalloc(sizeof(struct smap_psock), GFP_ATOMIC | __GFP_NOWARN); 446 psock = kzalloc_node(sizeof(struct smap_psock),
447 GFP_ATOMIC | __GFP_NOWARN,
448 stab->map.numa_node);
447 if (!psock) 449 if (!psock)
448 return ERR_PTR(-ENOMEM); 450 return ERR_PTR(-ENOMEM);
449 451
@@ -465,7 +467,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
465 467
466 /* check sanity of attributes */ 468 /* check sanity of attributes */
467 if (attr->max_entries == 0 || attr->key_size != 4 || 469 if (attr->max_entries == 0 || attr->key_size != 4 ||
468 attr->value_size != 4 || attr->map_flags) 470 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
469 return ERR_PTR(-EINVAL); 471 return ERR_PTR(-EINVAL);
470 472
471 if (attr->value_size > KMALLOC_MAX_SIZE) 473 if (attr->value_size > KMALLOC_MAX_SIZE)
@@ -481,6 +483,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
481 stab->map.value_size = attr->value_size; 483 stab->map.value_size = attr->value_size;
482 stab->map.max_entries = attr->max_entries; 484 stab->map.max_entries = attr->max_entries;
483 stab->map.map_flags = attr->map_flags; 485 stab->map.map_flags = attr->map_flags;
486 stab->map.numa_node = bpf_map_attr_numa_node(attr);
484 487
485 /* make sure page count doesn't overflow */ 488 /* make sure page count doesn't overflow */
486 cost = (u64) stab->map.max_entries * sizeof(struct sock *); 489 cost = (u64) stab->map.max_entries * sizeof(struct sock *);
@@ -495,7 +498,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
495 goto free_stab; 498 goto free_stab;
496 499
497 stab->sock_map = bpf_map_area_alloc(stab->map.max_entries * 500 stab->sock_map = bpf_map_area_alloc(stab->map.max_entries *
498 sizeof(struct sock *)); 501 sizeof(struct sock *),
502 stab->map.numa_node);
499 if (!stab->sock_map) 503 if (!stab->sock_map)
500 goto free_stab; 504 goto free_stab;
501 505