aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-08-17 17:26:14 -0400
committerAlexei Starovoitov <ast@kernel.org>2018-08-17 18:56:23 -0400
commitf6069b9aa9934ede26f41ac0781fce241279ad43 (patch)
treedf9009742722dc032801dcf2f1250042935da580
parenta85da34e97f553d27b520757ffc9b31232694a78 (diff)
bpf: fix redirect to map under tail calls
Commits 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") and 7c3001313396 ("bpf: fix ri->map_owner pointer on bpf_prog_realloc") tried to mitigate that buggy programs using bpf_redirect_map() helper call do not leave stale maps behind. Idea was to add a map_owner cookie into the per CPU struct redirect_info which was set to prog->aux by the prog making the helper call as a proof that the map is not stale since the prog is implicitly holding a reference to it. This owner cookie could later on get compared with the program calling into BPF whether they match and therefore the redirect could proceed with processing the map safely. In (obvious) hindsight, this approach breaks down when tail calls are involved since the original caller's prog->aux pointer does not have to match the one from one of the progs out of the tail call chain, and therefore the xdp buffer will be dropped instead of redirected. A way around that would be to fix the issue differently (which also allows to remove related work in fast path at the same time): once the life-time of a redirect map has come to its end we use it's map free callback where we need to wait on synchronize_rcu() for current outstanding xdp buffers and remove such a map pointer from the redirect info if found to be present. At that time no program is using this map anymore so we simply invalidate the map pointers to NULL iff they previously pointed to that instance while making sure that the redirect path only reads out the map once. Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine") Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") Reported-by: Sebastiano Miano <sebastiano.miano@polito.it> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/filter.h3
-rw-r--r--include/trace/events/xdp.h5
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/devmap.c1
-rw-r--r--kernel/bpf/verifier.c21
-rw-r--r--kernel/bpf/xskmap.c1
-rw-r--r--net/core/filter.c68
7 files changed, 38 insertions, 63 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5d565c50bcb2..6791a0ac0139 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,7 +543,6 @@ struct bpf_redirect_info {
543 u32 flags; 543 u32 flags;
544 struct bpf_map *map; 544 struct bpf_map *map;
545 struct bpf_map *map_to_flush; 545 struct bpf_map *map_to_flush;
546 unsigned long map_owner;
547 u32 kern_flags; 546 u32 kern_flags;
548}; 547};
549 548
@@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
781struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, 780struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
782 const struct bpf_insn *patch, u32 len); 781 const struct bpf_insn *patch, u32 len);
783 782
783void bpf_clear_redirect_map(struct bpf_map *map);
784
784static inline bool xdp_return_frame_no_direct(void) 785static inline bool xdp_return_frame_no_direct(void)
785{ 786{
786 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 787 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1ecf4c67fcf7..e95cb86b65cf 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {
147 147
148#define devmap_ifindex(fwd, map) \ 148#define devmap_ifindex(fwd, map) \
149 (!fwd ? 0 : \ 149 (!fwd ? 0 : \
150 (!map ? 0 : \ 150 ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
151 ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ 151 ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
152 ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
153 152
154#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ 153#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
155 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ 154 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 620bc5024d7d..24aac0d0f412 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
479 * It does __not__ ensure pending flush operations (if any) are 479 * It does __not__ ensure pending flush operations (if any) are
480 * complete. 480 * complete.
481 */ 481 */
482
483 bpf_clear_redirect_map(map);
482 synchronize_rcu(); 484 synchronize_rcu();
483 485
484 /* To ensure all pending flush operations have completed wait for flush 486 /* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ac1df79f3788..141710b82a6c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
161 list_del_rcu(&dtab->list); 161 list_del_rcu(&dtab->list);
162 spin_unlock(&dev_map_lock); 162 spin_unlock(&dev_map_lock);
163 163
164 bpf_clear_redirect_map(map);
164 synchronize_rcu(); 165 synchronize_rcu();
165 166
166 /* To ensure all pending flush operations have completed wait for flush 167 /* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..92246117d2b0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
5844 goto patch_call_imm; 5844 goto patch_call_imm;
5845 } 5845 }
5846 5846
5847 if (insn->imm == BPF_FUNC_redirect_map) {
5848 /* Note, we cannot use prog directly as imm as subsequent
5849 * rewrites would still change the prog pointer. The only
5850 * stable address we can use is aux, which also works with
5851 * prog clones during blinding.
5852 */
5853 u64 addr = (unsigned long)prog->aux;
5854 struct bpf_insn r4_ld[] = {
5855 BPF_LD_IMM64(BPF_REG_4, addr),
5856 *insn,
5857 };
5858 cnt = ARRAY_SIZE(r4_ld);
5859
5860 new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
5861 if (!new_prog)
5862 return -ENOMEM;
5863
5864 delta += cnt - 1;
5865 env->prog = prog = new_prog;
5866 insn = new_prog->insnsi + i + delta;
5867 }
5868patch_call_imm: 5847patch_call_imm:
5869 fn = env->ops->get_func_proto(insn->imm, env->prog); 5848 fn = env->ops->get_func_proto(insn->imm, env->prog);
5870 /* all functions that have prototype and verifier allowed 5849 /* all functions that have prototype and verifier allowed
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 4ddf61e158f6..9f8463afda9c 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
75 struct xsk_map *m = container_of(map, struct xsk_map, map); 75 struct xsk_map *m = container_of(map, struct xsk_map, map);
76 int i; 76 int i;
77 77
78 bpf_clear_redirect_map(map);
78 synchronize_net(); 79 synchronize_net();
79 80
80 for (i = 0; i < map->max_entries; i++) { 81 for (i = 0; i < map->max_entries; i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..c25eb36f1320 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
3246 } 3246 }
3247} 3247}
3248 3248
3249static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, 3249void bpf_clear_redirect_map(struct bpf_map *map)
3250 unsigned long aux)
3251{ 3250{
3252 return (unsigned long)xdp_prog->aux != aux; 3251 struct bpf_redirect_info *ri;
3252 int cpu;
3253
3254 for_each_possible_cpu(cpu) {
3255 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3256 /* Avoid polluting remote cacheline due to writes if
3257 * not needed. Once we pass this test, we need the
3258 * cmpxchg() to make sure it hasn't been changed in
3259 * the meantime by remote CPU.
3260 */
3261 if (unlikely(READ_ONCE(ri->map) == map))
3262 cmpxchg(&ri->map, map, NULL);
3263 }
3253} 3264}
3254 3265
3255static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 3266static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
3256 struct bpf_prog *xdp_prog) 3267 struct bpf_prog *xdp_prog, struct bpf_map *map)
3257{ 3268{
3258 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3269 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3259 unsigned long map_owner = ri->map_owner;
3260 struct bpf_map *map = ri->map;
3261 u32 index = ri->ifindex; 3270 u32 index = ri->ifindex;
3262 void *fwd = NULL; 3271 void *fwd = NULL;
3263 int err; 3272 int err;
3264 3273
3265 ri->ifindex = 0; 3274 ri->ifindex = 0;
3266 ri->map = NULL; 3275 WRITE_ONCE(ri->map, NULL);
3267 ri->map_owner = 0;
3268
3269 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
3270 err = -EFAULT;
3271 map = NULL;
3272 goto err;
3273 }
3274 3276
3275 fwd = __xdp_map_lookup_elem(map, index); 3277 fwd = __xdp_map_lookup_elem(map, index);
3276 if (!fwd) { 3278 if (!fwd) {
@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3296 struct bpf_prog *xdp_prog) 3298 struct bpf_prog *xdp_prog)
3297{ 3299{
3298 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3300 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3301 struct bpf_map *map = READ_ONCE(ri->map);
3299 struct net_device *fwd; 3302 struct net_device *fwd;
3300 u32 index = ri->ifindex; 3303 u32 index = ri->ifindex;
3301 int err; 3304 int err;
3302 3305
3303 if (ri->map) 3306 if (map)
3304 return xdp_do_redirect_map(dev, xdp, xdp_prog); 3307 return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
3305 3308
3306 fwd = dev_get_by_index_rcu(dev_net(dev), index); 3309 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3307 ri->ifindex = 0; 3310 ri->ifindex = 0;
@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
3325static int xdp_do_generic_redirect_map(struct net_device *dev, 3328static int xdp_do_generic_redirect_map(struct net_device *dev,
3326 struct sk_buff *skb, 3329 struct sk_buff *skb,
3327 struct xdp_buff *xdp, 3330 struct xdp_buff *xdp,
3328 struct bpf_prog *xdp_prog) 3331 struct bpf_prog *xdp_prog,
3332 struct bpf_map *map)
3329{ 3333{
3330 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3334 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3331 unsigned long map_owner = ri->map_owner;
3332 struct bpf_map *map = ri->map;
3333 u32 index = ri->ifindex; 3335 u32 index = ri->ifindex;
3334 void *fwd = NULL; 3336 void *fwd = NULL;
3335 int err = 0; 3337 int err = 0;
3336 3338
3337 ri->ifindex = 0; 3339 ri->ifindex = 0;
3338 ri->map = NULL; 3340 WRITE_ONCE(ri->map, NULL);
3339 ri->map_owner = 0;
3340 3341
3341 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
3342 err = -EFAULT;
3343 map = NULL;
3344 goto err;
3345 }
3346 fwd = __xdp_map_lookup_elem(map, index); 3342 fwd = __xdp_map_lookup_elem(map, index);
3347 if (unlikely(!fwd)) { 3343 if (unlikely(!fwd)) {
3348 err = -EINVAL; 3344 err = -EINVAL;
@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
3379 struct xdp_buff *xdp, struct bpf_prog *xdp_prog) 3375 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
3380{ 3376{
3381 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3377 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3378 struct bpf_map *map = READ_ONCE(ri->map);
3382 u32 index = ri->ifindex; 3379 u32 index = ri->ifindex;
3383 struct net_device *fwd; 3380 struct net_device *fwd;
3384 int err = 0; 3381 int err = 0;
3385 3382
3386 if (ri->map) 3383 if (map)
3387 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); 3384 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
3388 3385 map);
3389 ri->ifindex = 0; 3386 ri->ifindex = 0;
3390 fwd = dev_get_by_index_rcu(dev_net(dev), index); 3387 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3391 if (unlikely(!fwd)) { 3388 if (unlikely(!fwd)) {
@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3416 3413
3417 ri->ifindex = ifindex; 3414 ri->ifindex = ifindex;
3418 ri->flags = flags; 3415 ri->flags = flags;
3419 ri->map = NULL; 3416 WRITE_ONCE(ri->map, NULL);
3420 ri->map_owner = 0;
3421 3417
3422 return XDP_REDIRECT; 3418 return XDP_REDIRECT;
3423} 3419}
@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3430 .arg2_type = ARG_ANYTHING, 3426 .arg2_type = ARG_ANYTHING,
3431}; 3427};
3432 3428
3433BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, 3429BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
3434 unsigned long, map_owner) 3430 u64, flags)
3435{ 3431{
3436 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3432 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3437 3433
@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
3440 3436
3441 ri->ifindex = ifindex; 3437 ri->ifindex = ifindex;
3442 ri->flags = flags; 3438 ri->flags = flags;
3443 ri->map = map; 3439 WRITE_ONCE(ri->map, map);
3444 ri->map_owner = map_owner;
3445 3440
3446 return XDP_REDIRECT; 3441 return XDP_REDIRECT;
3447} 3442}
3448 3443
3449/* Note, arg4 is hidden from users and populated by the verifier
3450 * with the right pointer.
3451 */
3452static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { 3444static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3453 .func = bpf_xdp_redirect_map, 3445 .func = bpf_xdp_redirect_map,
3454 .gpl_only = false, 3446 .gpl_only = false,