aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2016-02-02 01:39:54 -0500
committerDavid S. Miller <davem@davemloft.net>2016-02-06 03:34:36 -0500
commita10423b87a7eae75da79ce80a8d9475047a674ee (patch)
tree2a8296c94fc90056e7a8d2b2dd3b31d4d3d73d5b /kernel
parent824bd0ce6c7c43a9e1e210abf124958e54d88342 (diff)
bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map
Primary use case is a histogram array of latency where bpf program computes the latency of block requests or other events and stores histogram of latency into array of 64 elements. All cpus are constantly running, so normal increment is not accurate, bpf_xadd causes cache ping-pong and this per-cpu approach allows fastest collision-free counters. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c102
1 files changed, 91 insertions, 11 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 89ebbc4d1164..b9bf1d7949ca 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -17,11 +17,39 @@
17#include <linux/filter.h> 17#include <linux/filter.h>
18#include <linux/perf_event.h> 18#include <linux/perf_event.h>
19 19
20static void bpf_array_free_percpu(struct bpf_array *array)
21{
22 int i;
23
24 for (i = 0; i < array->map.max_entries; i++)
25 free_percpu(array->pptrs[i]);
26}
27
28static int bpf_array_alloc_percpu(struct bpf_array *array)
29{
30 void __percpu *ptr;
31 int i;
32
33 for (i = 0; i < array->map.max_entries; i++) {
34 ptr = __alloc_percpu_gfp(array->elem_size, 8,
35 GFP_USER | __GFP_NOWARN);
36 if (!ptr) {
37 bpf_array_free_percpu(array);
38 return -ENOMEM;
39 }
40 array->pptrs[i] = ptr;
41 }
42
43 return 0;
44}
45
20/* Called from syscall */ 46/* Called from syscall */
21static struct bpf_map *array_map_alloc(union bpf_attr *attr) 47static struct bpf_map *array_map_alloc(union bpf_attr *attr)
22{ 48{
49 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
23 struct bpf_array *array; 50 struct bpf_array *array;
24 u32 elem_size, array_size; 51 u64 array_size;
52 u32 elem_size;
25 53
26 /* check sanity of attributes */ 54 /* check sanity of attributes */
27 if (attr->max_entries == 0 || attr->key_size != 4 || 55 if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
36 64
37 elem_size = round_up(attr->value_size, 8); 65 elem_size = round_up(attr->value_size, 8);
38 66
39 /* check round_up into zero and u32 overflow */ 67 array_size = sizeof(*array);
40 if (elem_size == 0 || 68 if (percpu)
41 attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) 69 array_size += (u64) attr->max_entries * sizeof(void *);
70 else
71 array_size += (u64) attr->max_entries * elem_size;
72
73 /* make sure there is no u32 overflow later in round_up() */
74 if (array_size >= U32_MAX - PAGE_SIZE)
42 return ERR_PTR(-ENOMEM); 75 return ERR_PTR(-ENOMEM);
43 76
44 array_size = sizeof(*array) + attr->max_entries * elem_size;
45 77
46 /* allocate all map elements and zero-initialize them */ 78 /* allocate all map elements and zero-initialize them */
47 array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); 79 array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
52 } 84 }
53 85
54 /* copy mandatory map attributes */ 86 /* copy mandatory map attributes */
87 array->map.map_type = attr->map_type;
55 array->map.key_size = attr->key_size; 88 array->map.key_size = attr->key_size;
56 array->map.value_size = attr->value_size; 89 array->map.value_size = attr->value_size;
57 array->map.max_entries = attr->max_entries; 90 array->map.max_entries = attr->max_entries;
58 array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
59 array->elem_size = elem_size; 91 array->elem_size = elem_size;
60 92
93 if (!percpu)
94 goto out;
95
96 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
97
98 if (array_size >= U32_MAX - PAGE_SIZE ||
99 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
100 kvfree(array);
101 return ERR_PTR(-ENOMEM);
102 }
103out:
104 array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
105
61 return &array->map; 106 return &array->map;
62} 107}
63 108
@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
67 struct bpf_array *array = container_of(map, struct bpf_array, map); 112 struct bpf_array *array = container_of(map, struct bpf_array, map);
68 u32 index = *(u32 *)key; 113 u32 index = *(u32 *)key;
69 114
70 if (index >= array->map.max_entries) 115 if (unlikely(index >= array->map.max_entries))
71 return NULL; 116 return NULL;
72 117
73 return array->value + array->elem_size * index; 118 return array->value + array->elem_size * index;
74} 119}
75 120
121/* Called from eBPF program */
122static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
123{
124 struct bpf_array *array = container_of(map, struct bpf_array, map);
125 u32 index = *(u32 *)key;
126
127 if (unlikely(index >= array->map.max_entries))
128 return NULL;
129
130 return this_cpu_ptr(array->pptrs[index]);
131}
132
76/* Called from syscall */ 133/* Called from syscall */
77static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 134static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
78{ 135{
@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
99 struct bpf_array *array = container_of(map, struct bpf_array, map); 156 struct bpf_array *array = container_of(map, struct bpf_array, map);
100 u32 index = *(u32 *)key; 157 u32 index = *(u32 *)key;
101 158
102 if (map_flags > BPF_EXIST) 159 if (unlikely(map_flags > BPF_EXIST))
103 /* unknown flags */ 160 /* unknown flags */
104 return -EINVAL; 161 return -EINVAL;
105 162
106 if (index >= array->map.max_entries) 163 if (unlikely(index >= array->map.max_entries))
107 /* all elements were pre-allocated, cannot insert a new one */ 164 /* all elements were pre-allocated, cannot insert a new one */
108 return -E2BIG; 165 return -E2BIG;
109 166
110 if (map_flags == BPF_NOEXIST) 167 if (unlikely(map_flags == BPF_NOEXIST))
111 /* all elements already exist */ 168 /* all elements already exist */
112 return -EEXIST; 169 return -EEXIST;
113 170
114 memcpy(array->value + array->elem_size * index, value, map->value_size); 171 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
172 memcpy(this_cpu_ptr(array->pptrs[index]),
173 value, map->value_size);
174 else
175 memcpy(array->value + array->elem_size * index,
176 value, map->value_size);
115 return 0; 177 return 0;
116} 178}
117 179
@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map)
133 */ 195 */
134 synchronize_rcu(); 196 synchronize_rcu();
135 197
198 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
199 bpf_array_free_percpu(array);
200
136 kvfree(array); 201 kvfree(array);
137} 202}
138 203
@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
150 .type = BPF_MAP_TYPE_ARRAY, 215 .type = BPF_MAP_TYPE_ARRAY,
151}; 216};
152 217
218static const struct bpf_map_ops percpu_array_ops = {
219 .map_alloc = array_map_alloc,
220 .map_free = array_map_free,
221 .map_get_next_key = array_map_get_next_key,
222 .map_lookup_elem = percpu_array_map_lookup_elem,
223 .map_update_elem = array_map_update_elem,
224 .map_delete_elem = array_map_delete_elem,
225};
226
227static struct bpf_map_type_list percpu_array_type __read_mostly = {
228 .ops = &percpu_array_ops,
229 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
230};
231
153static int __init register_array_map(void) 232static int __init register_array_map(void)
154{ 233{
155 bpf_register_map_type(&array_type); 234 bpf_register_map_type(&array_type);
235 bpf_register_map_type(&percpu_array_type);
156 return 0; 236 return 0;
157} 237}
158late_initcall(register_array_map); 238late_initcall(register_array_map);