diff options
Diffstat (limited to 'kernel/bpf/stackmap.c')
| -rw-r--r-- | kernel/bpf/stackmap.c | 86 |
1 files changed, 68 insertions, 18 deletions
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index f0a02c344358..499d9e933f8e 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
| @@ -10,9 +10,10 @@ | |||
| 10 | #include <linux/vmalloc.h> | 10 | #include <linux/vmalloc.h> |
| 11 | #include <linux/stacktrace.h> | 11 | #include <linux/stacktrace.h> |
| 12 | #include <linux/perf_event.h> | 12 | #include <linux/perf_event.h> |
| 13 | #include "percpu_freelist.h" | ||
| 13 | 14 | ||
| 14 | struct stack_map_bucket { | 15 | struct stack_map_bucket { |
| 15 | struct rcu_head rcu; | 16 | struct pcpu_freelist_node fnode; |
| 16 | u32 hash; | 17 | u32 hash; |
| 17 | u32 nr; | 18 | u32 nr; |
| 18 | u64 ip[]; | 19 | u64 ip[]; |
| @@ -20,10 +21,34 @@ struct stack_map_bucket { | |||
| 20 | 21 | ||
| 21 | struct bpf_stack_map { | 22 | struct bpf_stack_map { |
| 22 | struct bpf_map map; | 23 | struct bpf_map map; |
| 24 | void *elems; | ||
| 25 | struct pcpu_freelist freelist; | ||
| 23 | u32 n_buckets; | 26 | u32 n_buckets; |
| 24 | struct stack_map_bucket __rcu *buckets[]; | 27 | struct stack_map_bucket *buckets[]; |
| 25 | }; | 28 | }; |
| 26 | 29 | ||
| 30 | static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) | ||
| 31 | { | ||
| 32 | u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; | ||
| 33 | int err; | ||
| 34 | |||
| 35 | smap->elems = vzalloc(elem_size * smap->map.max_entries); | ||
| 36 | if (!smap->elems) | ||
| 37 | return -ENOMEM; | ||
| 38 | |||
| 39 | err = pcpu_freelist_init(&smap->freelist); | ||
| 40 | if (err) | ||
| 41 | goto free_elems; | ||
| 42 | |||
| 43 | pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, | ||
| 44 | smap->map.max_entries); | ||
| 45 | return 0; | ||
| 46 | |||
| 47 | free_elems: | ||
| 48 | vfree(smap->elems); | ||
| 49 | return err; | ||
| 50 | } | ||
| 51 | |||
| 27 | /* Called from syscall */ | 52 | /* Called from syscall */ |
| 28 | static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | 53 | static struct bpf_map *stack_map_alloc(union bpf_attr *attr) |
| 29 | { | 54 | { |
| @@ -70,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) | |||
| 70 | smap->n_buckets = n_buckets; | 95 | smap->n_buckets = n_buckets; |
| 71 | smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | 96 | smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; |
| 72 | 97 | ||
| 98 | err = bpf_map_precharge_memlock(smap->map.pages); | ||
| 99 | if (err) | ||
| 100 | goto free_smap; | ||
| 101 | |||
| 73 | err = get_callchain_buffers(); | 102 | err = get_callchain_buffers(); |
| 74 | if (err) | 103 | if (err) |
| 75 | goto free_smap; | 104 | goto free_smap; |
| 76 | 105 | ||
| 106 | err = prealloc_elems_and_freelist(smap); | ||
| 107 | if (err) | ||
| 108 | goto put_buffers; | ||
| 109 | |||
| 77 | return &smap->map; | 110 | return &smap->map; |
| 78 | 111 | ||
| 112 | put_buffers: | ||
| 113 | put_callchain_buffers(); | ||
| 79 | free_smap: | 114 | free_smap: |
| 80 | kvfree(smap); | 115 | kvfree(smap); |
| 81 | return ERR_PTR(err); | 116 | return ERR_PTR(err); |
| @@ -121,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) | |||
| 121 | ips = trace->ip + skip + init_nr; | 156 | ips = trace->ip + skip + init_nr; |
| 122 | hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); | 157 | hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); |
| 123 | id = hash & (smap->n_buckets - 1); | 158 | id = hash & (smap->n_buckets - 1); |
| 124 | bucket = rcu_dereference(smap->buckets[id]); | 159 | bucket = READ_ONCE(smap->buckets[id]); |
| 125 | 160 | ||
| 126 | if (bucket && bucket->hash == hash) { | 161 | if (bucket && bucket->hash == hash) { |
| 127 | if (flags & BPF_F_FAST_STACK_CMP) | 162 | if (flags & BPF_F_FAST_STACK_CMP) |
| @@ -135,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) | |||
| 135 | if (bucket && !(flags & BPF_F_REUSE_STACKID)) | 170 | if (bucket && !(flags & BPF_F_REUSE_STACKID)) |
| 136 | return -EEXIST; | 171 | return -EEXIST; |
| 137 | 172 | ||
| 138 | new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size, | 173 | new_bucket = (struct stack_map_bucket *) |
| 139 | GFP_ATOMIC | __GFP_NOWARN); | 174 | pcpu_freelist_pop(&smap->freelist); |
| 140 | if (unlikely(!new_bucket)) | 175 | if (unlikely(!new_bucket)) |
| 141 | return -ENOMEM; | 176 | return -ENOMEM; |
| 142 | 177 | ||
| 143 | memcpy(new_bucket->ip, ips, trace_len); | 178 | memcpy(new_bucket->ip, ips, trace_len); |
| 144 | memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len); | ||
| 145 | new_bucket->hash = hash; | 179 | new_bucket->hash = hash; |
| 146 | new_bucket->nr = trace_nr; | 180 | new_bucket->nr = trace_nr; |
| 147 | 181 | ||
| 148 | old_bucket = xchg(&smap->buckets[id], new_bucket); | 182 | old_bucket = xchg(&smap->buckets[id], new_bucket); |
| 149 | if (old_bucket) | 183 | if (old_bucket) |
| 150 | kfree_rcu(old_bucket, rcu); | 184 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); |
| 151 | return id; | 185 | return id; |
| 152 | } | 186 | } |
| 153 | 187 | ||
| @@ -160,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = { | |||
| 160 | .arg3_type = ARG_ANYTHING, | 194 | .arg3_type = ARG_ANYTHING, |
| 161 | }; | 195 | }; |
| 162 | 196 | ||
| 163 | /* Called from syscall or from eBPF program */ | 197 | /* Called from eBPF program */ |
| 164 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) | 198 | static void *stack_map_lookup_elem(struct bpf_map *map, void *key) |
| 165 | { | 199 | { |
| 200 | return NULL; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* Called from syscall */ | ||
| 204 | int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) | ||
| 205 | { | ||
| 166 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); | 206 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
| 167 | struct stack_map_bucket *bucket; | 207 | struct stack_map_bucket *bucket, *old_bucket; |
| 168 | u32 id = *(u32 *)key; | 208 | u32 id = *(u32 *)key, trace_len; |
| 169 | 209 | ||
| 170 | if (unlikely(id >= smap->n_buckets)) | 210 | if (unlikely(id >= smap->n_buckets)) |
| 171 | return NULL; | 211 | return -ENOENT; |
| 172 | bucket = rcu_dereference(smap->buckets[id]); | 212 | |
| 173 | return bucket ? bucket->ip : NULL; | 213 | bucket = xchg(&smap->buckets[id], NULL); |
| 214 | if (!bucket) | ||
| 215 | return -ENOENT; | ||
| 216 | |||
| 217 | trace_len = bucket->nr * sizeof(u64); | ||
| 218 | memcpy(value, bucket->ip, trace_len); | ||
| 219 | memset(value + trace_len, 0, map->value_size - trace_len); | ||
| 220 | |||
| 221 | old_bucket = xchg(&smap->buckets[id], bucket); | ||
| 222 | if (old_bucket) | ||
| 223 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); | ||
| 224 | return 0; | ||
| 174 | } | 225 | } |
| 175 | 226 | ||
| 176 | static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | 227 | static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) |
| @@ -196,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key) | |||
| 196 | 247 | ||
| 197 | old_bucket = xchg(&smap->buckets[id], NULL); | 248 | old_bucket = xchg(&smap->buckets[id], NULL); |
| 198 | if (old_bucket) { | 249 | if (old_bucket) { |
| 199 | kfree_rcu(old_bucket, rcu); | 250 | pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); |
| 200 | return 0; | 251 | return 0; |
| 201 | } else { | 252 | } else { |
| 202 | return -ENOENT; | 253 | return -ENOENT; |
| @@ -207,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key) | |||
| 207 | static void stack_map_free(struct bpf_map *map) | 258 | static void stack_map_free(struct bpf_map *map) |
| 208 | { | 259 | { |
| 209 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); | 260 | struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); |
| 210 | int i; | ||
| 211 | 261 | ||
| 262 | /* wait for bpf programs to complete before freeing stack map */ | ||
| 212 | synchronize_rcu(); | 263 | synchronize_rcu(); |
| 213 | 264 | ||
| 214 | for (i = 0; i < smap->n_buckets; i++) | 265 | vfree(smap->elems); |
| 215 | if (smap->buckets[i]) | 266 | pcpu_freelist_destroy(&smap->freelist); |
| 216 | kfree_rcu(smap->buckets[i], rcu); | ||
| 217 | kvfree(smap); | 267 | kvfree(smap); |
| 218 | put_callchain_buffers(); | 268 | put_callchain_buffers(); |
| 219 | } | 269 | } |
