aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2015-11-24 15:28:15 -0500
committerDavid S. Miller <davem@davemloft.net>2015-11-25 12:14:09 -0500
commitc9da161c6517ba12154059d3b965c2cbaf16f90f (patch)
treea9a86696004c560029bf6fd33648471bdcafc346
parent19cebbcb04c8277bb8a7905957c8af11967c4e28 (diff)
bpf: fix clearing on persistent program array maps
Currently, when having map file descriptors pointing to program arrays, there's still the issue that we unconditionally flush program array contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens when such a file descriptor is released and is independent of the map's refcount. Having this flush independent of the refcount is for a reason: there can be arbitrary complex dependency chains among tail calls, also circular ones (direct or indirect, nesting limit determined during runtime), and we need to make sure that the map drops all references to eBPF programs it holds, so that the map's refcount can eventually drop to zero and initiate its freeing. Btw, a walk of the whole dependency graph would not be possible for various reasons, one being complexity and another one inconsistency, i.e. new programs can be added to parts of the graph at any time, so there's no guaranteed consistent state for the time of such a walk. Now, the program array pinning itself works, but the issue is that each derived file descriptor on close would nevertheless call unconditionally into bpf_fd_array_map_clear(). Instead, keep track of users and postpone this flush until the last reference to a user is dropped. As this only concerns a subset of references (f.e. a prog array could hold a program that itself has reference on the prog array holding it, etc), we need to track them separately. Short analysis on the refcounting: on map creation time usercnt will be one, so there's no change in behaviour for bpf_map_release(), if unpinned. If we already fail in map_create(), we are immediately freed, and no file descriptor has been made public yet. In bpf_obj_pin_user(), we need to probe for a possible map in bpf_fd_probe_obj() already with a usercnt reference, so before we drop the reference on the fd with fdput(). Therefore, if actual pinning fails, we need to drop that reference again in bpf_any_put(), otherwise we keep holding it. When last reference drops on the inode, the bpf_any_put() in bpf_evict_inode() will take care of dropping the usercnt again. In the bpf_obj_get_user() case, the bpf_any_get() will grab a reference on the usercnt, still at a time when we have the reference on the path. Should we later on fail to grab a new file descriptor, bpf_any_put() will drop it, otherwise we hold it until bpf_map_release() time. Joint work with Alexei. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf.h5
-rw-r--r--kernel/bpf/inode.c6
-rw-r--r--kernel/bpf/syscall.c36
-rw-r--r--kernel/bpf/verifier.c3
4 files changed, 33 insertions, 17 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6683b6..83d1926c61e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_map {
40 struct user_struct *user; 40 struct user_struct *user;
41 const struct bpf_map_ops *ops; 41 const struct bpf_map_ops *ops;
42 struct work_struct work; 42 struct work_struct work;
43 atomic_t usercnt;
43}; 44};
44 45
45struct bpf_map_type_list { 46struct bpf_map_type_list {
@@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
167void bpf_prog_put(struct bpf_prog *prog); 168void bpf_prog_put(struct bpf_prog *prog);
168void bpf_prog_put_rcu(struct bpf_prog *prog); 169void bpf_prog_put_rcu(struct bpf_prog *prog);
169 170
170struct bpf_map *bpf_map_get(u32 ufd); 171struct bpf_map *bpf_map_get_with_uref(u32 ufd);
171struct bpf_map *__bpf_map_get(struct fd f); 172struct bpf_map *__bpf_map_get(struct fd f);
173void bpf_map_inc(struct bpf_map *map, bool uref);
174void bpf_map_put_with_uref(struct bpf_map *map);
172void bpf_map_put(struct bpf_map *map); 175void bpf_map_put(struct bpf_map *map);
173 176
174extern int sysctl_unprivileged_bpf_disabled; 177extern int sysctl_unprivileged_bpf_disabled;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726e31c9..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
34 atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); 34 atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
35 break; 35 break;
36 case BPF_TYPE_MAP: 36 case BPF_TYPE_MAP:
37 atomic_inc(&((struct bpf_map *)raw)->refcnt); 37 bpf_map_inc(raw, true);
38 break; 38 break;
39 default: 39 default:
40 WARN_ON_ONCE(1); 40 WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
51 bpf_prog_put(raw); 51 bpf_prog_put(raw);
52 break; 52 break;
53 case BPF_TYPE_MAP: 53 case BPF_TYPE_MAP:
54 bpf_map_put(raw); 54 bpf_map_put_with_uref(raw);
55 break; 55 break;
56 default: 56 default:
57 WARN_ON_ONCE(1); 57 WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
64 void *raw; 64 void *raw;
65 65
66 *type = BPF_TYPE_MAP; 66 *type = BPF_TYPE_MAP;
67 raw = bpf_map_get(ufd); 67 raw = bpf_map_get_with_uref(ufd);
68 if (IS_ERR(raw)) { 68 if (IS_ERR(raw)) {
69 *type = BPF_TYPE_PROG; 69 *type = BPF_TYPE_PROG;
70 raw = bpf_prog_get(ufd); 70 raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..4a8f3c1d7da6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
82 map->ops->map_free(map); 82 map->ops->map_free(map);
83} 83}
84 84
85static void bpf_map_put_uref(struct bpf_map *map)
86{
87 if (atomic_dec_and_test(&map->usercnt)) {
88 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
89 bpf_fd_array_map_clear(map);
90 }
91}
92
85/* decrement map refcnt and schedule it for freeing via workqueue 93/* decrement map refcnt and schedule it for freeing via workqueue
86 * (unrelying map implementation ops->map_free() might sleep) 94 * (unrelying map implementation ops->map_free() might sleep)
87 */ 95 */
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
93 } 101 }
94} 102}
95 103
96static int bpf_map_release(struct inode *inode, struct file *filp) 104void bpf_map_put_with_uref(struct bpf_map *map)
97{ 105{
98 struct bpf_map *map = filp->private_data; 106 bpf_map_put_uref(map);
99
100 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
101 /* prog_array stores refcnt-ed bpf_prog pointers
102 * release them all when user space closes prog_array_fd
103 */
104 bpf_fd_array_map_clear(map);
105
106 bpf_map_put(map); 107 bpf_map_put(map);
108}
109
110static int bpf_map_release(struct inode *inode, struct file *filp)
111{
112 bpf_map_put_with_uref(filp->private_data);
107 return 0; 113 return 0;
108} 114}
109 115
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
142 return PTR_ERR(map); 148 return PTR_ERR(map);
143 149
144 atomic_set(&map->refcnt, 1); 150 atomic_set(&map->refcnt, 1);
151 atomic_set(&map->usercnt, 1);
145 152
146 err = bpf_map_charge_memlock(map); 153 err = bpf_map_charge_memlock(map);
147 if (err) 154 if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
174 return f.file->private_data; 181 return f.file->private_data;
175} 182}
176 183
177struct bpf_map *bpf_map_get(u32 ufd) 184void bpf_map_inc(struct bpf_map *map, bool uref)
185{
186 atomic_inc(&map->refcnt);
187 if (uref)
188 atomic_inc(&map->usercnt);
189}
190
191struct bpf_map *bpf_map_get_with_uref(u32 ufd)
178{ 192{
179 struct fd f = fdget(ufd); 193 struct fd f = fdget(ufd);
180 struct bpf_map *map; 194 struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
183 if (IS_ERR(map)) 197 if (IS_ERR(map))
184 return map; 198 return map;
185 199
186 atomic_inc(&map->refcnt); 200 bpf_map_inc(map, true);
187 fdput(f); 201 fdput(f);
188 202
189 return map; 203 return map;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6073056badf..a7945d10b378 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
2021 * will be used by the valid program until it's unloaded 2021 * will be used by the valid program until it's unloaded
2022 * and all maps are released in free_bpf_prog_info() 2022 * and all maps are released in free_bpf_prog_info()
2023 */ 2023 */
2024 atomic_inc(&map->refcnt); 2024 bpf_map_inc(map, false);
2025
2026 fdput(f); 2025 fdput(f);
2027next_insn: 2026next_insn:
2028 insn++; 2027 insn++;