bpf: introduce BPF_JIT_ALWAYS_ON config

The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. A quote from goolge project zero blog: "At this point, it would normally be necessary to locate gadgets in the host kernel code that can be used to actually leak data by reading from an attacker-controlled location, shifting and masking the result appropriately and then using the result of that as offset to an attacker-controlled address for a load. But piecing gadgets together and figuring out which ones work in a speculation context seems annoying. So instead, we decided to use the eBPF interpreter, which is built into the host kernel - while there is no legitimate way to invoke it from inside a VM, the presence of the code in the host kernel's text section is sufficient to make it usable for the attack, just like with ordinary ROP gadgets." To make attacker job harder introduce BPF_JIT_ALWAYS_ON config option that removes interpreter from the kernel in favor of JIT-only mode. So far eBPF JIT is supported by: x64, arm64, arm32, sparc64, s390, powerpc64, mips64 The start of JITed program is randomized and code page is marked as read-only. In addition "constant blinding" can be turned on with net.core.bpf_jit_harden v2->v3: - move __bpf_prog_ret0 under ifdef (Daniel) v1->v2: - fix init order, test_bpf and cBPF (Daniel's feedback) - fix offloaded bpf (Jakub's feedback) - add 'return 0' dummy in case something can invoke prog->bpf_func - retarget bpf tree. For bpf-next the patch would need one extra hunk. It will be sent when the trees are merged back to net-next Considered doing: int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; but it seems better to land the patch as-is and in bpf-next remove bpf_jit_enable global variable from all JITs, consolidate in one place and remove this jit_init() function. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
author: Alexei Starovoitov <ast@kernel.org> 2018-01-09 13:04:29 -0500
committer: Daniel Borkmann <daniel@iogearbox.net> 2018-01-09 16:25:26 -0500
commit: 290af86629b25ffd1ed6232c4e9107da031705cb (patch)
tree: 0d4c514627f1b748a9a8887b5386290b9d140fc0
parent: be95a845cc4402272994ce290e3ad928aff06cb9 (diff)
6 files changed, 50 insertions, 8 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 2934249fba46..5e2a4a391ba9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1392,6 +1392,13 @@ config BPF_SYSCALL
          Enable the bpf() system call that allows to manipulate eBPF
          programs and maps via file descriptors.
+config BPF_JIT_ALWAYS_ON
+        bool "Permanently enable BPF JIT and remove BPF interpreter"
+        depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+        help
+          Enables BPF JIT and removes BPF interpreter to avoid
+          speculative execution of BPF instructions by the interpreter
 config USERFAULTFD
        bool "Enable userfaultfd() system call"
        select ANON_INODES
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 86b50aa26ee8..51ec2dda7f08 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
 *      __bpf_prog_run - run eBPF program on a given context
 *      @ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 };
+#else
+static unsigned int __bpf_prog_ret0(const void *ctx,
+                                    const struct bpf_insn *insn)
+{
+        return 0;
+}
+#endif
 bool bpf_prog_array_compatible(struct bpf_array *array,
                               const struct bpf_prog *fp)
 {
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
        u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
        fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+        fp->bpf_func = __bpf_prog_ret0;
+#endif
        /* eBPF JITs can rewrite the program in case constant
         * blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
         */
        if (!bpf_prog_is_dev_bound(fp->aux)) {
                fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+                if (!fp->jited) {
+                        *err = -ENOTSUPP;
+                        return fp;
+                }
+#endif
        } else {
                *err = bpf_prog_offload_compile(fp);
                if (*err)
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 9e9748089270..f369889e521d 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
                                return NULL;
                        }
                }
-                /* We don't expect to fail. */
                if (*err) {
-                        pr_cont("FAIL to attach err=%d len=%d\n",
+                        pr_cont("FAIL to prog_create err=%d len=%d\n",
                                *err, fprog.len);
                        return NULL;
                }
@@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
                 * checks.
                 */
                fp = bpf_prog_select_runtime(fp, err);
+                if (*err) {
+                        pr_cont("FAIL to select_runtime err=%d\n", *err);
+                        return NULL;
+                }
                break;
        }
@@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
                                pass_cnt++;
                                continue;
                        }
+                        err_cnt++;
-                        return err;
+                        continue;
                }
                pr_cont("jited:%u ", fp->jited);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..d339ef170df6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
                 */
                goto out_err_free;
-        /* We are guaranteed to never error here with cBPF to eBPF
-         * transitions, since there's no issue with type compatibility
-         * checks on program arrays.
-         */
        fp = bpf_prog_select_runtime(fp, &err);
+        if (err)
+                goto out_err_free;
        kfree(old_prog);
        return fp;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..a47ad6cd41c0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
                .data           = &bpf_jit_enable,
                .maxlen         = sizeof(int),
                .mode           = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
                .proc_handler   = proc_dointvec
+#else
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &one,
+                .extra2         = &one,
+#endif
        },
 # ifdef CONFIG_HAVE_EBPF_JIT
        {
diff --git a/net/socket.c b/net/socket.c
index 05f361faec45..78acd6ce74c7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2619,6 +2619,15 @@ out_fs:
 core_initcall(sock_init);       /* early initcall */
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+        bpf_jit_enable = 1;
+#endif
+        return 0;
+}
+pure_initcall(jit_init);
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
author	Alexei Starovoitov <ast@kernel.org>	2018-01-09 13:04:29 -0500
committer	Daniel Borkmann <daniel@iogearbox.net>	2018-01-09 16:25:26 -0500
commit	290af86629b25ffd1ed6232c4e9107da031705cb (patch)
tree	0d4c514627f1b748a9a8887b5386290b9d140fc0
parent	be95a845cc4402272994ce290e3ad928aff06cb9 (diff)

diff --git a/init/Kconfig b/init/Kconfig index 2934249fba46..5e2a4a391ba9 100644 --- a/init/Kconfig +++ b/init/Kconfig
@@ -1392,6 +1392,13 @@ config BPF_SYSCALL
1392	Enable the bpf() system call that allows to manipulate eBPF	1392	Enable the bpf() system call that allows to manipulate eBPF
1393	programs and maps via file descriptors.	1393	programs and maps via file descriptors.
1394		1394
		1395	config BPF_JIT_ALWAYS_ON
		1396	bool "Permanently enable BPF JIT and remove BPF interpreter"
		1397	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
		1398	help
		1399	Enables BPF JIT and removes BPF interpreter to avoid
		1400	speculative execution of BPF instructions by the interpreter
		1401
1395	config USERFAULTFD	1402	config USERFAULTFD
1396	bool "Enable userfaultfd() system call"	1403	bool "Enable userfaultfd() system call"
1397	select ANON_INODES	1404	select ANON_INODES


diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 86b50aa26ee8..51ec2dda7f08 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
767	}	767	}
768	EXPORT_SYMBOL_GPL(__bpf_call_base);	768	EXPORT_SYMBOL_GPL(__bpf_call_base);
769		769
		770	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
770	/**	771	/**
771	* __bpf_prog_run - run eBPF program on a given context	772	* __bpf_prog_run - run eBPF program on a given context
772	* @ctx: is the data we are operating on	773	* @ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1317	EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)	1318	EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1318	};	1319	};
1319		1320
		1321	#else
		1322	static unsigned int __bpf_prog_ret0(const void *ctx,
		1323	const struct bpf_insn *insn)
		1324	{
		1325	return 0;
		1326	}
		1327	#endif
		1328
1320	bool bpf_prog_array_compatible(struct bpf_array *array,	1329	bool bpf_prog_array_compatible(struct bpf_array *array,
1321	const struct bpf_prog *fp)	1330	const struct bpf_prog *fp)
1322	{	1331	{
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1364	*/	1373	*/
1365	struct bpf_prog bpf_prog_select_runtime(struct bpf_prog fp, int *err)	1374	struct bpf_prog bpf_prog_select_runtime(struct bpf_prog fp, int *err)
1366	{	1375	{
		1376	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1367	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);	1377	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1368		1378
1369	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];	1379	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
		1380	#else
		1381	fp->bpf_func = __bpf_prog_ret0;
		1382	#endif
1370		1383
1371	/* eBPF JITs can rewrite the program in case constant	1384	/* eBPF JITs can rewrite the program in case constant
1372	* blinding is active. However, in case of error during	1385	* blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog bpf_prog_select_runtime(struct bpf_prog fp, int *err)
1376	*/	1389	*/
1377	if (!bpf_prog_is_dev_bound(fp->aux)) {	1390	if (!bpf_prog_is_dev_bound(fp->aux)) {
1378	fp = bpf_int_jit_compile(fp);	1391	fp = bpf_int_jit_compile(fp);
		1392	#ifdef CONFIG_BPF_JIT_ALWAYS_ON
		1393	if (!fp->jited) {
		1394	*err = -ENOTSUPP;
		1395	return fp;
		1396	}
		1397	#endif
1379	} else {	1398	} else {
1380	*err = bpf_prog_offload_compile(fp);	1399	*err = bpf_prog_offload_compile(fp);
1381	if (*err)	1400	if (*err)


diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9e9748089270..f369889e521d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c
@@ -6250,9 +6250,8 @@ static struct bpf_prog generate_filter(int which, int err)
6250	return NULL;	6250	return NULL;
6251	}	6251	}
6252	}	6252	}
6253	/* We don't expect to fail. */
6254	if (*err) {	6253	if (*err) {
6255	pr_cont("FAIL to attach err=%d len=%d\n",	6254	pr_cont("FAIL to prog_create err=%d len=%d\n",
6256	*err, fprog.len);	6255	*err, fprog.len);
6257	return NULL;	6256	return NULL;
6258	}	6257	}
@@ -6276,6 +6275,10 @@ static struct bpf_prog generate_filter(int which, int err)
6276	* checks.	6275	* checks.
6277	*/	6276	*/
6278	fp = bpf_prog_select_runtime(fp, err);	6277	fp = bpf_prog_select_runtime(fp, err);
		6278	if (*err) {
		6279	pr_cont("FAIL to select_runtime err=%d\n", *err);
		6280	return NULL;
		6281	}
6279	break;	6282	break;
6280	}	6283	}
6281		6284
@@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
6461	pass_cnt++;	6464	pass_cnt++;
6462	continue;	6465	continue;
6463	}	6466	}
6464		6467	err_cnt++;
6465	return err;	6468	continue;
6466	}	6469	}
6467		6470
6468	pr_cont("jited:%u ", fp->jited);	6471	pr_cont("jited:%u ", fp->jited);


diff --git a/net/core/filter.c b/net/core/filter.c index 6a85e67fafce..d339ef170df6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c
@@ -1054,11 +1054,9 @@ static struct bpf_prog bpf_migrate_filter(struct bpf_prog fp)
1054	*/	1054	*/
1055	goto out_err_free;	1055	goto out_err_free;
1056		1056
1057	/* We are guaranteed to never error here with cBPF to eBPF
1058	* transitions, since there's no issue with type compatibility
1059	* checks on program arrays.
1060	*/
1061	fp = bpf_prog_select_runtime(fp, &err);	1057	fp = bpf_prog_select_runtime(fp, &err);
		1058	if (err)
		1059	goto out_err_free;
1062		1060
1063	kfree(old_prog);	1061	kfree(old_prog);
1064	return fp;	1062	return fp;


diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cbc3dde4cfcc..a47ad6cd41c0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
325	.data = &bpf_jit_enable,	325	.data = &bpf_jit_enable,
326	.maxlen = sizeof(int),	326	.maxlen = sizeof(int),
327	.mode = 0644,	327	.mode = 0644,
		328	#ifndef CONFIG_BPF_JIT_ALWAYS_ON
328	.proc_handler = proc_dointvec	329	.proc_handler = proc_dointvec
		330	#else
		331	.proc_handler = proc_dointvec_minmax,
		332	.extra1 = &one,
		333	.extra2 = &one,
		334	#endif
329	},	335	},
330	# ifdef CONFIG_HAVE_EBPF_JIT	336	# ifdef CONFIG_HAVE_EBPF_JIT
331	{	337	{


diff --git a/net/socket.c b/net/socket.c index 05f361faec45..78acd6ce74c7 100644 --- a/net/socket.c +++ b/net/socket.c
@@ -2619,6 +2619,15 @@ out_fs:
2619		2619
2620	core_initcall(sock_init); /* early initcall */	2620	core_initcall(sock_init); /* early initcall */
2621		2621
		2622	static int __init jit_init(void)
		2623	{
		2624	#ifdef CONFIG_BPF_JIT_ALWAYS_ON
		2625	bpf_jit_enable = 1;
		2626	#endif
		2627	return 0;
		2628	}
		2629	pure_initcall(jit_init);
		2630
2622	#ifdef CONFIG_PROC_FS	2631	#ifdef CONFIG_PROC_FS
2623	void socket_seq_show(struct seq_file *seq)	2632	void socket_seq_show(struct seq_file *seq)
2624	{	2633	{