aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-04-09 17:20:06 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-04-09 20:05:46 -0400
commit87df15de441bd4add7876ef584da8cabdd9a042a (patch)
treeaef0470fd5ccbb5f524148be5dc2b0e2bee093e3 /kernel/bpf/syscall.c
parent591fe9888d7809d9ee5c828020b6c6ae27c37229 (diff)
bpf: add syscall side map freeze support
This patch adds a new BPF_MAP_FREEZE command which allows to "freeze" the map globally as read-only / immutable from syscall side. Map permission handling has been refactored into map_get_sys_perms() and drops FMODE_CAN_WRITE in case of locked map. Main use case is to allow for setting up .rodata sections from the BPF ELF which are loaded into the kernel, meaning BPF loader first allocates map, sets up map value by copying .rodata section into it and once complete, it calls BPF_MAP_FREEZE on the map fd to prevent further modifications. Right now BPF_MAP_FREEZE only takes map fd as argument while remaining bpf_attr members are required to be zero. I didn't add write-only locking here as counterpart since I don't have a concrete use-case for it on my side, and I think it makes probably more sense to wait once there is actually one. In that case bpf_attr can be extended as usual with a flag field and/or others where flag 0 means that we lock the map read-only hence this doesn't prevent to add further extensions to BPF_MAP_FREEZE upon need. A map creation flag like BPF_F_WRONCE was not considered for couple of reasons: i) in case of a generic implementation, a map can consist of more than just one element, thus there could be multiple map updates needed to set the map into a state where it can then be made immutable, ii) WRONCE indicates exact one-time write before it is then set immutable. A generic implementation would set a bit atomically on map update entry (if unset), indicating that every subsequent update from then onwards will need to bail out there. However, map updates can fail, so upon failure that flag would need to be unset again and the update attempt would need to be repeated for it to be eventually made immutable. While this can be made race-free, this approach feels less clean and in combination with reason i), it's not generic enough. A dedicated BPF_MAP_FREEZE command directly sets the flag and caller has the guarantee that map is immutable from syscall side upon successful return for any future syscall invocations that would alter the map state, which is also more intuitive from an API point of view. A command name such as BPF_MAP_LOCK has been avoided as it's too close with BPF map spin locks (which already has BPF_F_LOCK flag). BPF_MAP_FREEZE is so far only enabled for privileged users. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c66
1 files changed, 54 insertions, 12 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0c9276b54c88..b3ce516e5a20 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -355,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
355 return 0; 355 return 0;
356} 356}
357 357
358static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
359{
360 fmode_t mode = f.file->f_mode;
361
362 /* Our file permissions may have been overridden by global
363 * map permissions facing syscall side.
364 */
365 if (READ_ONCE(map->frozen))
366 mode &= ~FMODE_CAN_WRITE;
367 return mode;
368}
369
358#ifdef CONFIG_PROC_FS 370#ifdef CONFIG_PROC_FS
359static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 371static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
360{ 372{
@@ -376,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
376 "max_entries:\t%u\n" 388 "max_entries:\t%u\n"
377 "map_flags:\t%#x\n" 389 "map_flags:\t%#x\n"
378 "memlock:\t%llu\n" 390 "memlock:\t%llu\n"
379 "map_id:\t%u\n", 391 "map_id:\t%u\n"
392 "frozen:\t%u\n",
380 map->map_type, 393 map->map_type,
381 map->key_size, 394 map->key_size,
382 map->value_size, 395 map->value_size,
383 map->max_entries, 396 map->max_entries,
384 map->map_flags, 397 map->map_flags,
385 map->pages * 1ULL << PAGE_SHIFT, 398 map->pages * 1ULL << PAGE_SHIFT,
386 map->id); 399 map->id,
400 READ_ONCE(map->frozen));
387 401
388 if (owner_prog_type) { 402 if (owner_prog_type) {
389 seq_printf(m, "owner_prog_type:\t%u\n", 403 seq_printf(m, "owner_prog_type:\t%u\n",
@@ -727,8 +741,7 @@ static int map_lookup_elem(union bpf_attr *attr)
727 map = __bpf_map_get(f); 741 map = __bpf_map_get(f);
728 if (IS_ERR(map)) 742 if (IS_ERR(map))
729 return PTR_ERR(map); 743 return PTR_ERR(map);
730 744 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
731 if (!(f.file->f_mode & FMODE_CAN_READ)) {
732 err = -EPERM; 745 err = -EPERM;
733 goto err_put; 746 goto err_put;
734 } 747 }
@@ -857,8 +870,7 @@ static int map_update_elem(union bpf_attr *attr)
857 map = __bpf_map_get(f); 870 map = __bpf_map_get(f);
858 if (IS_ERR(map)) 871 if (IS_ERR(map))
859 return PTR_ERR(map); 872 return PTR_ERR(map);
860 873 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
861 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
862 err = -EPERM; 874 err = -EPERM;
863 goto err_put; 875 goto err_put;
864 } 876 }
@@ -969,8 +981,7 @@ static int map_delete_elem(union bpf_attr *attr)
969 map = __bpf_map_get(f); 981 map = __bpf_map_get(f);
970 if (IS_ERR(map)) 982 if (IS_ERR(map))
971 return PTR_ERR(map); 983 return PTR_ERR(map);
972 984 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
973 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
974 err = -EPERM; 985 err = -EPERM;
975 goto err_put; 986 goto err_put;
976 } 987 }
@@ -1021,8 +1032,7 @@ static int map_get_next_key(union bpf_attr *attr)
1021 map = __bpf_map_get(f); 1032 map = __bpf_map_get(f);
1022 if (IS_ERR(map)) 1033 if (IS_ERR(map))
1023 return PTR_ERR(map); 1034 return PTR_ERR(map);
1024 1035 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1025 if (!(f.file->f_mode & FMODE_CAN_READ)) {
1026 err = -EPERM; 1036 err = -EPERM;
1027 goto err_put; 1037 goto err_put;
1028 } 1038 }
@@ -1089,8 +1099,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
1089 map = __bpf_map_get(f); 1099 map = __bpf_map_get(f);
1090 if (IS_ERR(map)) 1100 if (IS_ERR(map))
1091 return PTR_ERR(map); 1101 return PTR_ERR(map);
1092 1102 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1093 if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
1094 err = -EPERM; 1103 err = -EPERM;
1095 goto err_put; 1104 goto err_put;
1096 } 1105 }
@@ -1132,6 +1141,36 @@ err_put:
1132 return err; 1141 return err;
1133} 1142}
1134 1143
1144#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1145
1146static int map_freeze(const union bpf_attr *attr)
1147{
1148 int err = 0, ufd = attr->map_fd;
1149 struct bpf_map *map;
1150 struct fd f;
1151
1152 if (CHECK_ATTR(BPF_MAP_FREEZE))
1153 return -EINVAL;
1154
1155 f = fdget(ufd);
1156 map = __bpf_map_get(f);
1157 if (IS_ERR(map))
1158 return PTR_ERR(map);
1159 if (READ_ONCE(map->frozen)) {
1160 err = -EBUSY;
1161 goto err_put;
1162 }
1163 if (!capable(CAP_SYS_ADMIN)) {
1164 err = -EPERM;
1165 goto err_put;
1166 }
1167
1168 WRITE_ONCE(map->frozen, true);
1169err_put:
1170 fdput(f);
1171 return err;
1172}
1173
1135static const struct bpf_prog_ops * const bpf_prog_types[] = { 1174static const struct bpf_prog_ops * const bpf_prog_types[] = {
1136#define BPF_PROG_TYPE(_id, _name) \ 1175#define BPF_PROG_TYPE(_id, _name) \
1137 [_id] = & _name ## _prog_ops, 1176 [_id] = & _name ## _prog_ops,
@@ -2735,6 +2774,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
2735 case BPF_MAP_GET_NEXT_KEY: 2774 case BPF_MAP_GET_NEXT_KEY:
2736 err = map_get_next_key(&attr); 2775 err = map_get_next_key(&attr);
2737 break; 2776 break;
2777 case BPF_MAP_FREEZE:
2778 err = map_freeze(&attr);
2779 break;
2738 case BPF_PROG_LOAD: 2780 case BPF_PROG_LOAD:
2739 err = bpf_prog_load(&attr, uattr); 2781 err = bpf_prog_load(&attr, uattr);
2740 break; 2782 break;