diff options
author | Martin KaFai Lau <kafai@fb.com> | 2017-08-18 14:28:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-20 00:35:43 -0400 |
commit | 96eabe7a40aa17e613cf3db2c742ee8b1fc764d0 (patch) | |
tree | 6364b3b63eeb2707e74c3bd770b01342016d936a /include/uapi/linux/bpf.h | |
parent | bd76b87962833f6e55264030a227be0f090b1286 (diff) |
bpf: Allow selecting numa node during map creation
The current map creation API does not allow to provide the numa-node
preference. The memory usually comes from where the map-creation-process
is running. The performance is not ideal if the bpf_prog is known to
always run in a numa node different from the map-creation-process.
One of the use case is sharding on CPU to different LRU maps (i.e.
an array of LRU maps). Here is the test result of map_perf_test on
the INNER_LRU_HASH_PREALLOC test if we force the lru map used by
CPU0 to be allocated from a remote numa node:
[ The machine has 20 cores. CPU0-9 at node 0. CPU10-19 at node 1 ]
># taskset -c 10 ./map_perf_test 512 8 1260000 8000000
5:inner_lru_hash_map_perf pre-alloc 1628380 events per sec
4:inner_lru_hash_map_perf pre-alloc 1626396 events per sec
3:inner_lru_hash_map_perf pre-alloc 1626144 events per sec
6:inner_lru_hash_map_perf pre-alloc 1621657 events per sec
2:inner_lru_hash_map_perf pre-alloc 1621534 events per sec
1:inner_lru_hash_map_perf pre-alloc 1620292 events per sec
7:inner_lru_hash_map_perf pre-alloc 1613305 events per sec
0:inner_lru_hash_map_perf pre-alloc 1239150 events per sec #<<<
After specifying numa node:
># taskset -c 10 ./map_perf_test 512 8 1260000 8000000
5:inner_lru_hash_map_perf pre-alloc 1629627 events per sec
3:inner_lru_hash_map_perf pre-alloc 1628057 events per sec
1:inner_lru_hash_map_perf pre-alloc 1623054 events per sec
6:inner_lru_hash_map_perf pre-alloc 1616033 events per sec
2:inner_lru_hash_map_perf pre-alloc 1614630 events per sec
4:inner_lru_hash_map_perf pre-alloc 1612651 events per sec
7:inner_lru_hash_map_perf pre-alloc 1609337 events per sec
0:inner_lru_hash_map_perf pre-alloc 1619340 events per sec #<<<
This patch adds one field, numa_node, to the bpf_attr. Since numa node 0
is a valid node, a new flag BPF_F_NUMA_NODE is also added. The numa_node
field is honored if and only if the BPF_F_NUMA_NODE flag is set.
Numa node selection is not supported for percpu map.
This patch does not change all the kmalloc. F.e.
'htab = kzalloc()' is not changed since the object
is small enough to stay in the cache.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/uapi/linux/bpf.h')
-rw-r--r-- | include/uapi/linux/bpf.h | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5ecbe812a2cc..843818dff96d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -165,6 +165,7 @@ enum bpf_attach_type { | |||
165 | #define BPF_NOEXIST 1 /* create new element if it didn't exist */ | 165 | #define BPF_NOEXIST 1 /* create new element if it didn't exist */ |
166 | #define BPF_EXIST 2 /* update existing element */ | 166 | #define BPF_EXIST 2 /* update existing element */ |
167 | 167 | ||
168 | /* flags for BPF_MAP_CREATE command */ | ||
168 | #define BPF_F_NO_PREALLOC (1U << 0) | 169 | #define BPF_F_NO_PREALLOC (1U << 0) |
169 | /* Instead of having one common LRU list in the | 170 | /* Instead of having one common LRU list in the |
170 | * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list | 171 | * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list |
@@ -173,6 +174,8 @@ enum bpf_attach_type { | |||
173 | * across different LRU lists. | 174 | * across different LRU lists. |
174 | */ | 175 | */ |
175 | #define BPF_F_NO_COMMON_LRU (1U << 1) | 176 | #define BPF_F_NO_COMMON_LRU (1U << 1) |
177 | /* Specify numa node during map creation */ | ||
178 | #define BPF_F_NUMA_NODE (1U << 2) | ||
176 | 179 | ||
177 | union bpf_attr { | 180 | union bpf_attr { |
178 | struct { /* anonymous struct used by BPF_MAP_CREATE command */ | 181 | struct { /* anonymous struct used by BPF_MAP_CREATE command */ |
@@ -180,8 +183,13 @@ union bpf_attr { | |||
180 | __u32 key_size; /* size of key in bytes */ | 183 | __u32 key_size; /* size of key in bytes */ |
181 | __u32 value_size; /* size of value in bytes */ | 184 | __u32 value_size; /* size of value in bytes */ |
182 | __u32 max_entries; /* max number of entries in a map */ | 185 | __u32 max_entries; /* max number of entries in a map */ |
183 | __u32 map_flags; /* prealloc or not */ | 186 | __u32 map_flags; /* BPF_MAP_CREATE related |
187 | * flags defined above. | ||
188 | */ | ||
184 | __u32 inner_map_fd; /* fd pointing to the inner map */ | 189 | __u32 inner_map_fd; /* fd pointing to the inner map */ |
190 | __u32 numa_node; /* numa node (effective only if | ||
191 | * BPF_F_NUMA_NODE is set). | ||
192 | */ | ||
185 | }; | 193 | }; |
186 | 194 | ||
187 | struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ | 195 | struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ |