diff options
author | Martin KaFai Lau <kafai@fb.com> | 2017-06-05 15:15:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-06-06 15:41:23 -0400 |
commit | bd5f5f4ecb78e2698dad655645b6d6a2f7012a8c (patch) | |
tree | 3d3bc441f14e2494ada5a37143f5fd190075f540 /kernel/bpf/syscall.c | |
parent | b16d9aa4c2b90af8d2c3201e245150f8c430c3bc (diff) |
bpf: Add BPF_MAP_GET_FD_BY_ID
Add BPF_MAP_GET_FD_BY_ID command to allow user to get a fd
from a bpf_map's ID.
bpf_map_inc_not_zero() is added and is called with map_idr_lock
held.
__bpf_map_put() is also added which has the 'bool do_idr_lock'
param to decide if the map_idr_lock should be acquired when
freeing the map->id.
In the error path of bpf_map_inc_not_zero(), it may have to
call __bpf_map_put(map, false) which does not need
to take the map_idr_lock when freeing the map->id.
It is currently limited to CAP_SYS_ADMIN which we can
consider to lift it in followup patches.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 95 |
1 files changed, 85 insertions, 10 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dc6253bb8ebb..1802bb9c47d9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -135,11 +135,19 @@ static int bpf_map_alloc_id(struct bpf_map *map) | |||
135 | return id > 0 ? 0 : id; | 135 | return id > 0 ? 0 : id; |
136 | } | 136 | } |
137 | 137 | ||
138 | static void bpf_map_free_id(struct bpf_map *map) | 138 | static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) |
139 | { | 139 | { |
140 | spin_lock_bh(&map_idr_lock); | 140 | if (do_idr_lock) |
141 | spin_lock_bh(&map_idr_lock); | ||
142 | else | ||
143 | __acquire(&map_idr_lock); | ||
144 | |||
141 | idr_remove(&map_idr, map->id); | 145 | idr_remove(&map_idr, map->id); |
142 | spin_unlock_bh(&map_idr_lock); | 146 | |
147 | if (do_idr_lock) | ||
148 | spin_unlock_bh(&map_idr_lock); | ||
149 | else | ||
150 | __release(&map_idr_lock); | ||
143 | } | 151 | } |
144 | 152 | ||
145 | /* called from workqueue */ | 153 | /* called from workqueue */ |
@@ -163,16 +171,21 @@ static void bpf_map_put_uref(struct bpf_map *map) | |||
163 | /* decrement map refcnt and schedule it for freeing via workqueue | 171 | /* decrement map refcnt and schedule it for freeing via workqueue |
164 | * (unrelying map implementation ops->map_free() might sleep) | 172 | * (unrelying map implementation ops->map_free() might sleep) |
165 | */ | 173 | */ |
166 | void bpf_map_put(struct bpf_map *map) | 174 | static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) |
167 | { | 175 | { |
168 | if (atomic_dec_and_test(&map->refcnt)) { | 176 | if (atomic_dec_and_test(&map->refcnt)) { |
169 | /* bpf_map_free_id() must be called first */ | 177 | /* bpf_map_free_id() must be called first */ |
170 | bpf_map_free_id(map); | 178 | bpf_map_free_id(map, do_idr_lock); |
171 | INIT_WORK(&map->work, bpf_map_free_deferred); | 179 | INIT_WORK(&map->work, bpf_map_free_deferred); |
172 | schedule_work(&map->work); | 180 | schedule_work(&map->work); |
173 | } | 181 | } |
174 | } | 182 | } |
175 | 183 | ||
184 | void bpf_map_put(struct bpf_map *map) | ||
185 | { | ||
186 | __bpf_map_put(map, true); | ||
187 | } | ||
188 | |||
176 | void bpf_map_put_with_uref(struct bpf_map *map) | 189 | void bpf_map_put_with_uref(struct bpf_map *map) |
177 | { | 190 | { |
178 | bpf_map_put_uref(map); | 191 | bpf_map_put_uref(map); |
@@ -271,15 +284,20 @@ static int map_create(union bpf_attr *attr) | |||
271 | goto free_map; | 284 | goto free_map; |
272 | 285 | ||
273 | err = bpf_map_new_fd(map); | 286 | err = bpf_map_new_fd(map); |
274 | if (err < 0) | 287 | if (err < 0) { |
275 | /* failed to allocate fd */ | 288 | /* failed to allocate fd. |
276 | goto free_id; | 289 | * bpf_map_put() is needed because the above |
290 | * bpf_map_alloc_id() has published the map | ||
291 | * to the userspace and the userspace may | ||
292 | * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. | ||
293 | */ | ||
294 | bpf_map_put(map); | ||
295 | return err; | ||
296 | } | ||
277 | 297 | ||
278 | trace_bpf_map_create(map, err); | 298 | trace_bpf_map_create(map, err); |
279 | return err; | 299 | return err; |
280 | 300 | ||
281 | free_id: | ||
282 | bpf_map_free_id(map); | ||
283 | free_map: | 301 | free_map: |
284 | bpf_map_uncharge_memlock(map); | 302 | bpf_map_uncharge_memlock(map); |
285 | free_map_nouncharge: | 303 | free_map_nouncharge: |
@@ -331,6 +349,28 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) | |||
331 | return map; | 349 | return map; |
332 | } | 350 | } |
333 | 351 | ||
352 | /* map_idr_lock should have been held */ | ||
353 | static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, | ||
354 | bool uref) | ||
355 | { | ||
356 | int refold; | ||
357 | |||
358 | refold = __atomic_add_unless(&map->refcnt, 1, 0); | ||
359 | |||
360 | if (refold >= BPF_MAX_REFCNT) { | ||
361 | __bpf_map_put(map, false); | ||
362 | return ERR_PTR(-EBUSY); | ||
363 | } | ||
364 | |||
365 | if (!refold) | ||
366 | return ERR_PTR(-ENOENT); | ||
367 | |||
368 | if (uref) | ||
369 | atomic_inc(&map->usercnt); | ||
370 | |||
371 | return map; | ||
372 | } | ||
373 | |||
334 | int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) | 374 | int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) |
335 | { | 375 | { |
336 | return -ENOTSUPP; | 376 | return -ENOTSUPP; |
@@ -1167,6 +1207,38 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) | |||
1167 | return fd; | 1207 | return fd; |
1168 | } | 1208 | } |
1169 | 1209 | ||
1210 | #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id | ||
1211 | |||
1212 | static int bpf_map_get_fd_by_id(const union bpf_attr *attr) | ||
1213 | { | ||
1214 | struct bpf_map *map; | ||
1215 | u32 id = attr->map_id; | ||
1216 | int fd; | ||
1217 | |||
1218 | if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) | ||
1219 | return -EINVAL; | ||
1220 | |||
1221 | if (!capable(CAP_SYS_ADMIN)) | ||
1222 | return -EPERM; | ||
1223 | |||
1224 | spin_lock_bh(&map_idr_lock); | ||
1225 | map = idr_find(&map_idr, id); | ||
1226 | if (map) | ||
1227 | map = bpf_map_inc_not_zero(map, true); | ||
1228 | else | ||
1229 | map = ERR_PTR(-ENOENT); | ||
1230 | spin_unlock_bh(&map_idr_lock); | ||
1231 | |||
1232 | if (IS_ERR(map)) | ||
1233 | return PTR_ERR(map); | ||
1234 | |||
1235 | fd = bpf_map_new_fd(map); | ||
1236 | if (fd < 0) | ||
1237 | bpf_map_put(map); | ||
1238 | |||
1239 | return fd; | ||
1240 | } | ||
1241 | |||
1170 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | 1242 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
1171 | { | 1243 | { |
1172 | union bpf_attr attr = {}; | 1244 | union bpf_attr attr = {}; |
@@ -1255,6 +1327,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz | |||
1255 | case BPF_PROG_GET_FD_BY_ID: | 1327 | case BPF_PROG_GET_FD_BY_ID: |
1256 | err = bpf_prog_get_fd_by_id(&attr); | 1328 | err = bpf_prog_get_fd_by_id(&attr); |
1257 | break; | 1329 | break; |
1330 | case BPF_MAP_GET_FD_BY_ID: | ||
1331 | err = bpf_map_get_fd_by_id(&attr); | ||
1332 | break; | ||
1258 | default: | 1333 | default: |
1259 | err = -EINVAL; | 1334 | err = -EINVAL; |
1260 | break; | 1335 | break; |