aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2014-09-02 16:53:44 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-05 15:02:48 -0400
commit60a3b2253c413cf601783b070507d7dd6620c954 (patch)
treed5682002b80cfeb75cb765df5ba097e7c889e9fe
parent4a804c01635a43ed073893532c058fbaa1f5154e (diff)
net: bpf: make eBPF interpreter images read-only
With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler <spender@grsecurity.net> Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Kees Cook <keescook@chromium.org> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/arm/net/bpf_jit_32.c3
-rw-r--r--arch/mips/net/bpf_jit.c3
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c3
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c18
-rw-r--r--include/linux/filter.h49
-rw-r--r--kernel/bpf/core.c80
-rw-r--r--kernel/seccomp.c7
-rw-r--r--lib/test_bpf.c2
-rw-r--r--net/core/filter.c6
11 files changed, 144 insertions, 32 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a37b989a2f91..a76623bcf722 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp)
930{ 930{
931 if (fp->jited) 931 if (fp->jited)
932 module_free(NULL, fp->bpf_func); 932 module_free(NULL, fp->bpf_func);
933 kfree(fp); 933
934 bpf_prog_unlock_free(fp);
934} 935}
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a56619ece2..cfa83cf2447d 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp)
1427{ 1427{
1428 if (fp->jited) 1428 if (fp->jited)
1429 module_free(NULL, fp->bpf_func); 1429 module_free(NULL, fp->bpf_func);
1430 kfree(fp); 1430
1431 bpf_prog_unlock_free(fp);
1431} 1432}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3afa6f4c1957..40c53ff59124 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp)
697{ 697{
698 if (fp->jited) 698 if (fp->jited)
699 module_free(NULL, fp->bpf_func); 699 module_free(NULL, fp->bpf_func);
700 kfree(fp); 700
701 bpf_prog_unlock_free(fp);
701} 702}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7c04d7..f2833c5b218a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp)
887 module_free(NULL, header); 887 module_free(NULL, header);
888 888
889free_filter: 889free_filter:
890 kfree(fp); 890 bpf_prog_unlock_free(fp);
891} 891}
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 1f76c22a6a75..f7a736b645e8 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp)
812{ 812{
813 if (fp->jited) 813 if (fp->jited)
814 module_free(NULL, fp->bpf_func); 814 module_free(NULL, fp->bpf_func);
815 kfree(fp); 815
816 bpf_prog_unlock_free(fp);
816} 817}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b08a98c59530..39ccfbb4a723 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -972,23 +972,17 @@ out:
972 kfree(addrs); 972 kfree(addrs);
973} 973}
974 974
975static void bpf_jit_free_deferred(struct work_struct *work) 975void bpf_jit_free(struct bpf_prog *fp)
976{ 976{
977 struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
978 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 977 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
979 struct bpf_binary_header *header = (void *)addr; 978 struct bpf_binary_header *header = (void *)addr;
980 979
980 if (!fp->jited)
981 goto free_filter;
982
981 set_memory_rw(addr, header->pages); 983 set_memory_rw(addr, header->pages);
982 module_free(NULL, header); 984 module_free(NULL, header);
983 kfree(fp);
984}
985 985
986void bpf_jit_free(struct bpf_prog *fp) 986free_filter:
987{ 987 bpf_prog_unlock_free(fp);
988 if (fp->jited) {
989 INIT_WORK(&fp->work, bpf_jit_free_deferred);
990 schedule_work(&fp->work);
991 } else {
992 kfree(fp);
993 }
994} 988}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab8ccb1..c78994593355 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,6 +9,11 @@
9#include <linux/skbuff.h> 9#include <linux/skbuff.h>
10#include <linux/workqueue.h> 10#include <linux/workqueue.h>
11#include <uapi/linux/filter.h> 11#include <uapi/linux/filter.h>
12#include <asm/cacheflush.h>
13
14struct sk_buff;
15struct sock;
16struct seccomp_data;
12 17
13/* Internally used and optimized filter representation with extended 18/* Internally used and optimized filter representation with extended
14 * instruction set based on top of classic BPF. 19 * instruction set based on top of classic BPF.
@@ -320,20 +325,23 @@ struct sock_fprog_kern {
320 struct sock_filter *filter; 325 struct sock_filter *filter;
321}; 326};
322 327
323struct sk_buff; 328struct bpf_work_struct {
324struct sock; 329 struct bpf_prog *prog;
325struct seccomp_data; 330 struct work_struct work;
331};
326 332
327struct bpf_prog { 333struct bpf_prog {
334 u32 pages; /* Number of allocated pages */
328 u32 jited:1, /* Is our filter JIT'ed? */ 335 u32 jited:1, /* Is our filter JIT'ed? */
329 len:31; /* Number of filter blocks */ 336 len:31; /* Number of filter blocks */
330 struct sock_fprog_kern *orig_prog; /* Original BPF program */ 337 struct sock_fprog_kern *orig_prog; /* Original BPF program */
338 struct bpf_work_struct *work; /* Deferred free work struct */
331 unsigned int (*bpf_func)(const struct sk_buff *skb, 339 unsigned int (*bpf_func)(const struct sk_buff *skb,
332 const struct bpf_insn *filter); 340 const struct bpf_insn *filter);
341 /* Instructions for interpreter */
333 union { 342 union {
334 struct sock_filter insns[0]; 343 struct sock_filter insns[0];
335 struct bpf_insn insnsi[0]; 344 struct bpf_insn insnsi[0];
336 struct work_struct work;
337 }; 345 };
338}; 346};
339 347
@@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen)
353 361
354#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) 362#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
355 363
364#ifdef CONFIG_DEBUG_SET_MODULE_RONX
365static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
366{
367 set_memory_ro((unsigned long)fp, fp->pages);
368}
369
370static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
371{
372 set_memory_rw((unsigned long)fp, fp->pages);
373}
374#else
375static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
376{
377}
378
379static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
380{
381}
382#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
383
356int sk_filter(struct sock *sk, struct sk_buff *skb); 384int sk_filter(struct sock *sk, struct sk_buff *skb);
357 385
358void bpf_prog_select_runtime(struct bpf_prog *fp); 386void bpf_prog_select_runtime(struct bpf_prog *fp);
@@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp);
361int bpf_convert_filter(struct sock_filter *prog, int len, 389int bpf_convert_filter(struct sock_filter *prog, int len,
362 struct bpf_insn *new_prog, int *new_len); 390 struct bpf_insn *new_prog, int *new_len);
363 391
392struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
393struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
394 gfp_t gfp_extra_flags);
395void __bpf_prog_free(struct bpf_prog *fp);
396
397static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
398{
399 bpf_prog_unlock_ro(fp);
400 __bpf_prog_free(fp);
401}
402
364int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); 403int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
365void bpf_prog_destroy(struct bpf_prog *fp); 404void bpf_prog_destroy(struct bpf_prog *fp);
366 405
@@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp)
450 489
451static inline void bpf_jit_free(struct bpf_prog *fp) 490static inline void bpf_jit_free(struct bpf_prog *fp)
452{ 491{
453 kfree(fp); 492 bpf_prog_unlock_free(fp);
454} 493}
455#endif /* CONFIG_BPF_JIT */ 494#endif /* CONFIG_BPF_JIT */
456 495
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7f0dbcbb34af..b54bb2c2e494 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -22,6 +22,7 @@
22 */ 22 */
23#include <linux/filter.h> 23#include <linux/filter.h>
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <linux/vmalloc.h>
25#include <asm/unaligned.h> 26#include <asm/unaligned.h>
26 27
27/* Registers */ 28/* Registers */
@@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
63 return NULL; 64 return NULL;
64} 65}
65 66
67struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
68{
69 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
70 gfp_extra_flags;
71 struct bpf_work_struct *ws;
72 struct bpf_prog *fp;
73
74 size = round_up(size, PAGE_SIZE);
75 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
76 if (fp == NULL)
77 return NULL;
78
79 ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
80 if (ws == NULL) {
81 vfree(fp);
82 return NULL;
83 }
84
85 fp->pages = size / PAGE_SIZE;
86 fp->work = ws;
87
88 return fp;
89}
90EXPORT_SYMBOL_GPL(bpf_prog_alloc);
91
92struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
93 gfp_t gfp_extra_flags)
94{
95 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
96 gfp_extra_flags;
97 struct bpf_prog *fp;
98
99 BUG_ON(fp_old == NULL);
100
101 size = round_up(size, PAGE_SIZE);
102 if (size <= fp_old->pages * PAGE_SIZE)
103 return fp_old;
104
105 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
106 if (fp != NULL) {
107 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
108 fp->pages = size / PAGE_SIZE;
109
110 /* We keep fp->work from fp_old around in the new
111 * reallocated structure.
112 */
113 fp_old->work = NULL;
114 __bpf_prog_free(fp_old);
115 }
116
117 return fp;
118}
119EXPORT_SYMBOL_GPL(bpf_prog_realloc);
120
121void __bpf_prog_free(struct bpf_prog *fp)
122{
123 kfree(fp->work);
124 vfree(fp);
125}
126EXPORT_SYMBOL_GPL(__bpf_prog_free);
127
66/* Base function for offset calculation. Needs to go into .text section, 128/* Base function for offset calculation. Needs to go into .text section,
67 * therefore keeping it non-static as well; will also be used by JITs 129 * therefore keeping it non-static as well; will also be used by JITs
68 * anyway later on, so do not let the compiler omit it. 130 * anyway later on, so do not let the compiler omit it.
@@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
523 585
524 /* Probe if internal BPF can be JITed */ 586 /* Probe if internal BPF can be JITed */
525 bpf_int_jit_compile(fp); 587 bpf_int_jit_compile(fp);
588 /* Lock whole bpf_prog as read-only */
589 bpf_prog_lock_ro(fp);
526} 590}
527EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); 591EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
528 592
529/* free internal BPF program */ 593static void bpf_prog_free_deferred(struct work_struct *work)
594{
595 struct bpf_work_struct *ws;
596
597 ws = container_of(work, struct bpf_work_struct, work);
598 bpf_jit_free(ws->prog);
599}
600
601/* Free internal BPF program */
530void bpf_prog_free(struct bpf_prog *fp) 602void bpf_prog_free(struct bpf_prog *fp)
531{ 603{
532 bpf_jit_free(fp); 604 struct bpf_work_struct *ws = fp->work;
605
606 INIT_WORK(&ws->work, bpf_prog_free_deferred);
607 ws->prog = fp;
608 schedule_work(&ws->work);
533} 609}
534EXPORT_SYMBOL_GPL(bpf_prog_free); 610EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..84922befea84 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
395 if (!filter) 395 if (!filter)
396 goto free_prog; 396 goto free_prog;
397 397
398 filter->prog = kzalloc(bpf_prog_size(new_len), 398 filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
399 GFP_KERNEL|__GFP_NOWARN);
400 if (!filter->prog) 399 if (!filter->prog)
401 goto free_filter; 400 goto free_filter;
402 401
403 ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); 402 ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
404 if (ret) 403 if (ret)
405 goto free_filter_prog; 404 goto free_filter_prog;
406 kfree(fp);
407 405
406 kfree(fp);
408 atomic_set(&filter->usage, 1); 407 atomic_set(&filter->usage, 1);
409 filter->prog->len = new_len; 408 filter->prog->len = new_len;
410 409
@@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
413 return filter; 412 return filter;
414 413
415free_filter_prog: 414free_filter_prog:
416 kfree(filter->prog); 415 __bpf_prog_free(filter->prog);
417free_filter: 416free_filter:
418 kfree(filter); 417 kfree(filter);
419free_prog: 418free_prog:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8c66c6aace04..9a67456ba29a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
1836 break; 1836 break;
1837 1837
1838 case INTERNAL: 1838 case INTERNAL:
1839 fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); 1839 fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
1840 if (fp == NULL) { 1840 if (fp == NULL) {
1841 pr_cont("UNEXPECTED_FAIL no memory left\n"); 1841 pr_cont("UNEXPECTED_FAIL no memory left\n");
1842 *err = -ENOMEM; 1842 *err = -ENOMEM;
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a89d0f..37f8eb06fdee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
933 933
934 /* Expand fp for appending the new filter representation. */ 934 /* Expand fp for appending the new filter representation. */
935 old_fp = fp; 935 old_fp = fp;
936 fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); 936 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
937 if (!fp) { 937 if (!fp) {
938 /* The old_fp is still around in case we couldn't 938 /* The old_fp is still around in case we couldn't
939 * allocate new memory, so uncharge on that one. 939 * allocate new memory, so uncharge on that one.
@@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1013 if (fprog->filter == NULL) 1013 if (fprog->filter == NULL)
1014 return -EINVAL; 1014 return -EINVAL;
1015 1015
1016 fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); 1016 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1017 if (!fp) 1017 if (!fp)
1018 return -ENOMEM; 1018 return -ENOMEM;
1019 1019
@@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1069 if (fprog->filter == NULL) 1069 if (fprog->filter == NULL)
1070 return -EINVAL; 1070 return -EINVAL;
1071 1071
1072 prog = kmalloc(bpf_fsize, GFP_KERNEL); 1072 prog = bpf_prog_alloc(bpf_fsize, 0);
1073 if (!prog) 1073 if (!prog)
1074 return -ENOMEM; 1074 return -ENOMEM;
1075 1075