diff options
| author | Alexei Starovoitov <ast@plumgrid.com> | 2014-09-26 03:16:59 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2014-09-26 15:05:14 -0400 |
| commit | db20fd2b01087bdfbe30bce314a198eefedcc42e (patch) | |
| tree | c43a4e3edb46deae542c8e79d74d24703ffbdcca /kernel/bpf | |
| parent | 749730ce42a2121e1c88350d69478bff3994b10a (diff) | |
bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel
and userspace.
The maps are accessed from user space via BPF syscall, which has commands:
- create a map with given type and attributes
fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
returns fd or negative error
- lookup key in a given map referenced by fd
err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero and stores found elem into value or negative error
- create or update key/value pair in a given map
err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero or negative error
- find and delete element by key in a given map
err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key
- iterate map elements (based on input key return next_key)
err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->next_key
- close(fd) deletes the map
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
| -rw-r--r-- | kernel/bpf/syscall.c | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 428a0e23adc0..f94349ecaf61 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/anon_inodes.h> | 15 | #include <linux/anon_inodes.h> |
| 16 | #include <linux/file.h> | ||
| 16 | 17 | ||
| 17 | static LIST_HEAD(bpf_map_types); | 18 | static LIST_HEAD(bpf_map_types); |
| 18 | 19 | ||
| @@ -111,6 +112,228 @@ free_map: | |||
| 111 | return err; | 112 | return err; |
| 112 | } | 113 | } |
| 113 | 114 | ||
| 115 | /* if error is returned, fd is released. | ||
| 116 | * On success caller should complete fd access with matching fdput() | ||
| 117 | */ | ||
| 118 | struct bpf_map *bpf_map_get(struct fd f) | ||
| 119 | { | ||
| 120 | struct bpf_map *map; | ||
| 121 | |||
| 122 | if (!f.file) | ||
| 123 | return ERR_PTR(-EBADF); | ||
| 124 | |||
| 125 | if (f.file->f_op != &bpf_map_fops) { | ||
| 126 | fdput(f); | ||
| 127 | return ERR_PTR(-EINVAL); | ||
| 128 | } | ||
| 129 | |||
| 130 | map = f.file->private_data; | ||
| 131 | |||
| 132 | return map; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* helper to convert user pointers passed inside __aligned_u64 fields */ | ||
| 136 | static void __user *u64_to_ptr(__u64 val) | ||
| 137 | { | ||
| 138 | return (void __user *) (unsigned long) val; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* last field in 'union bpf_attr' used by this command */ | ||
| 142 | #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value | ||
| 143 | |||
| 144 | static int map_lookup_elem(union bpf_attr *attr) | ||
| 145 | { | ||
| 146 | void __user *ukey = u64_to_ptr(attr->key); | ||
| 147 | void __user *uvalue = u64_to_ptr(attr->value); | ||
| 148 | int ufd = attr->map_fd; | ||
| 149 | struct fd f = fdget(ufd); | ||
| 150 | struct bpf_map *map; | ||
| 151 | void *key, *value; | ||
| 152 | int err; | ||
| 153 | |||
| 154 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) | ||
| 155 | return -EINVAL; | ||
| 156 | |||
| 157 | map = bpf_map_get(f); | ||
| 158 | if (IS_ERR(map)) | ||
| 159 | return PTR_ERR(map); | ||
| 160 | |||
| 161 | err = -ENOMEM; | ||
| 162 | key = kmalloc(map->key_size, GFP_USER); | ||
| 163 | if (!key) | ||
| 164 | goto err_put; | ||
| 165 | |||
| 166 | err = -EFAULT; | ||
| 167 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
| 168 | goto free_key; | ||
| 169 | |||
| 170 | err = -ESRCH; | ||
| 171 | rcu_read_lock(); | ||
| 172 | value = map->ops->map_lookup_elem(map, key); | ||
| 173 | if (!value) | ||
| 174 | goto err_unlock; | ||
| 175 | |||
| 176 | err = -EFAULT; | ||
| 177 | if (copy_to_user(uvalue, value, map->value_size) != 0) | ||
| 178 | goto err_unlock; | ||
| 179 | |||
| 180 | err = 0; | ||
| 181 | |||
| 182 | err_unlock: | ||
| 183 | rcu_read_unlock(); | ||
| 184 | free_key: | ||
| 185 | kfree(key); | ||
| 186 | err_put: | ||
| 187 | fdput(f); | ||
| 188 | return err; | ||
| 189 | } | ||
| 190 | |||
| 191 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | ||
| 192 | |||
| 193 | static int map_update_elem(union bpf_attr *attr) | ||
| 194 | { | ||
| 195 | void __user *ukey = u64_to_ptr(attr->key); | ||
| 196 | void __user *uvalue = u64_to_ptr(attr->value); | ||
| 197 | int ufd = attr->map_fd; | ||
| 198 | struct fd f = fdget(ufd); | ||
| 199 | struct bpf_map *map; | ||
| 200 | void *key, *value; | ||
| 201 | int err; | ||
| 202 | |||
| 203 | if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) | ||
| 204 | return -EINVAL; | ||
| 205 | |||
| 206 | map = bpf_map_get(f); | ||
| 207 | if (IS_ERR(map)) | ||
| 208 | return PTR_ERR(map); | ||
| 209 | |||
| 210 | err = -ENOMEM; | ||
| 211 | key = kmalloc(map->key_size, GFP_USER); | ||
| 212 | if (!key) | ||
| 213 | goto err_put; | ||
| 214 | |||
| 215 | err = -EFAULT; | ||
| 216 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
| 217 | goto free_key; | ||
| 218 | |||
| 219 | err = -ENOMEM; | ||
| 220 | value = kmalloc(map->value_size, GFP_USER); | ||
| 221 | if (!value) | ||
| 222 | goto free_key; | ||
| 223 | |||
| 224 | err = -EFAULT; | ||
| 225 | if (copy_from_user(value, uvalue, map->value_size) != 0) | ||
| 226 | goto free_value; | ||
| 227 | |||
| 228 | /* eBPF program that use maps are running under rcu_read_lock(), | ||
| 229 | * therefore all map accessors rely on this fact, so do the same here | ||
| 230 | */ | ||
| 231 | rcu_read_lock(); | ||
| 232 | err = map->ops->map_update_elem(map, key, value); | ||
| 233 | rcu_read_unlock(); | ||
| 234 | |||
| 235 | free_value: | ||
| 236 | kfree(value); | ||
| 237 | free_key: | ||
| 238 | kfree(key); | ||
| 239 | err_put: | ||
| 240 | fdput(f); | ||
| 241 | return err; | ||
| 242 | } | ||
| 243 | |||
| 244 | #define BPF_MAP_DELETE_ELEM_LAST_FIELD key | ||
| 245 | |||
| 246 | static int map_delete_elem(union bpf_attr *attr) | ||
| 247 | { | ||
| 248 | void __user *ukey = u64_to_ptr(attr->key); | ||
| 249 | int ufd = attr->map_fd; | ||
| 250 | struct fd f = fdget(ufd); | ||
| 251 | struct bpf_map *map; | ||
| 252 | void *key; | ||
| 253 | int err; | ||
| 254 | |||
| 255 | if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) | ||
| 256 | return -EINVAL; | ||
| 257 | |||
| 258 | map = bpf_map_get(f); | ||
| 259 | if (IS_ERR(map)) | ||
| 260 | return PTR_ERR(map); | ||
| 261 | |||
| 262 | err = -ENOMEM; | ||
| 263 | key = kmalloc(map->key_size, GFP_USER); | ||
| 264 | if (!key) | ||
| 265 | goto err_put; | ||
| 266 | |||
| 267 | err = -EFAULT; | ||
| 268 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
| 269 | goto free_key; | ||
| 270 | |||
| 271 | rcu_read_lock(); | ||
| 272 | err = map->ops->map_delete_elem(map, key); | ||
| 273 | rcu_read_unlock(); | ||
| 274 | |||
| 275 | free_key: | ||
| 276 | kfree(key); | ||
| 277 | err_put: | ||
| 278 | fdput(f); | ||
| 279 | return err; | ||
| 280 | } | ||
| 281 | |||
| 282 | /* last field in 'union bpf_attr' used by this command */ | ||
| 283 | #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key | ||
| 284 | |||
| 285 | static int map_get_next_key(union bpf_attr *attr) | ||
| 286 | { | ||
| 287 | void __user *ukey = u64_to_ptr(attr->key); | ||
| 288 | void __user *unext_key = u64_to_ptr(attr->next_key); | ||
| 289 | int ufd = attr->map_fd; | ||
| 290 | struct fd f = fdget(ufd); | ||
| 291 | struct bpf_map *map; | ||
| 292 | void *key, *next_key; | ||
| 293 | int err; | ||
| 294 | |||
| 295 | if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) | ||
| 296 | return -EINVAL; | ||
| 297 | |||
| 298 | map = bpf_map_get(f); | ||
| 299 | if (IS_ERR(map)) | ||
| 300 | return PTR_ERR(map); | ||
| 301 | |||
| 302 | err = -ENOMEM; | ||
| 303 | key = kmalloc(map->key_size, GFP_USER); | ||
| 304 | if (!key) | ||
| 305 | goto err_put; | ||
| 306 | |||
| 307 | err = -EFAULT; | ||
| 308 | if (copy_from_user(key, ukey, map->key_size) != 0) | ||
| 309 | goto free_key; | ||
| 310 | |||
| 311 | err = -ENOMEM; | ||
| 312 | next_key = kmalloc(map->key_size, GFP_USER); | ||
| 313 | if (!next_key) | ||
| 314 | goto free_key; | ||
| 315 | |||
| 316 | rcu_read_lock(); | ||
| 317 | err = map->ops->map_get_next_key(map, key, next_key); | ||
| 318 | rcu_read_unlock(); | ||
| 319 | if (err) | ||
| 320 | goto free_next_key; | ||
| 321 | |||
| 322 | err = -EFAULT; | ||
| 323 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | ||
| 324 | goto free_next_key; | ||
| 325 | |||
| 326 | err = 0; | ||
| 327 | |||
| 328 | free_next_key: | ||
| 329 | kfree(next_key); | ||
| 330 | free_key: | ||
| 331 | kfree(key); | ||
| 332 | err_put: | ||
| 333 | fdput(f); | ||
| 334 | return err; | ||
| 335 | } | ||
| 336 | |||
| 114 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) | 337 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
| 115 | { | 338 | { |
| 116 | union bpf_attr attr = {}; | 339 | union bpf_attr attr = {}; |
| @@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz | |||
| 160 | case BPF_MAP_CREATE: | 383 | case BPF_MAP_CREATE: |
| 161 | err = map_create(&attr); | 384 | err = map_create(&attr); |
| 162 | break; | 385 | break; |
| 386 | case BPF_MAP_LOOKUP_ELEM: | ||
| 387 | err = map_lookup_elem(&attr); | ||
| 388 | break; | ||
| 389 | case BPF_MAP_UPDATE_ELEM: | ||
| 390 | err = map_update_elem(&attr); | ||
| 391 | break; | ||
| 392 | case BPF_MAP_DELETE_ELEM: | ||
| 393 | err = map_delete_elem(&attr); | ||
| 394 | break; | ||
| 395 | case BPF_MAP_GET_NEXT_KEY: | ||
| 396 | err = map_get_next_key(&attr); | ||
| 397 | break; | ||
| 163 | default: | 398 | default: |
| 164 | err = -EINVAL; | 399 | err = -EINVAL; |
| 165 | break; | 400 | break; |
