From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Aug 2014 17:03:28 +0200 Subject: random32: improvements to prandom_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a couple of minor items, mostly addesssing prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t for length arguments, 2) We can use put_unaligned() when filling the array instead of open coding it [ perhaps some archs will further benefit from their own arch specific implementation when GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned int as type for getting the arch seed, 5) Make use of prandom_u32_max() for timer slack. Regarding the change to put_unaligned(), callers of prandom_bytes() which internally invoke prandom_bytes_state(), don't bother as they expect the array to be filled randomly and don't have any control of the internal state what-so-ever (that's also why we have periodic reseeding there, etc), so they really don't care. Now for the direct callers of prandom_bytes_state(), which are solely located in test cases for MTD devices, that is, drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}: These tests basically fill a test write-vector through prandom_bytes_state() with an a-priori defined seed each time and write that to a MTD device. Later on, they set up a read-vector and read back that blocks from the device. So in the verification phase, the write-vector is being re-setup [ so same seed and prandom_bytes_state() called ], and then memcmp()'ed against the read-vector to check if the data is the same. Akinobu, Lothar and I also tested this patch and it runs through the 3 relevant MTD test cases w/o any errors on the nandsim device (simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53 and i.MX6): # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \ third_id_byte=0x00 fourth_id_byte=0x15 # modprobe mtd_oobtest dev=0 # modprobe mtd_pagetest dev=0 # modprobe mtd_subpagetest dev=0 We also don't have any users depending directly on a particular result of the PRNG (except the PRNG self-test itself), and that's just fine as it e.g. allowed us easily to do things like upgrading from taus88 to taus113. Signed-off-by: Daniel Borkmann Tested-by: Akinobu Mita Tested-by: Lothar Waßmann Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/random32.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/random32.c b/lib/random32.c index c9b6bf3afe0c..0bee183fa18f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32); * This is used for pseudo-randomness with no outside seeding. * For more random results, use prandom_bytes(). */ -void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { - unsigned char *p = buf; - int i; - - for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { - u32 random = prandom_u32_state(state); - int j; + u8 *ptr = buf; - for (j = 0; j < sizeof(u32); j++) { - p[i + j] = random; - random >>= BITS_PER_BYTE; - } + while (bytes >= sizeof(u32)) { + put_unaligned(prandom_u32_state(state), (u32 *) ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); } - if (i < bytes) { - u32 random = prandom_u32_state(state); - for (; i < bytes; i++) { - p[i] = random; - random >>= BITS_PER_BYTE; - } + if (bytes > 0) { + u32 rem = prandom_u32_state(state); + do { + *ptr++ = (u8) rem; + bytes--; + rem >>= BITS_PER_BYTE; + } while (bytes > 0); } } EXPORT_SYMBOL(prandom_bytes_state); @@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state); * @buf: where to copy the pseudo-random bytes to * @bytes: the requested number of bytes */ -void prandom_bytes(void *buf, int bytes) +void prandom_bytes(void *buf, size_t bytes) { struct rnd_state *state = &get_cpu_var(net_rand_state); @@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes); static void prandom_warmup(struct rnd_state *state) { - /* Calling RNG ten times to satify recurrence condition */ + /* Calling RNG ten times to satisfy recurrence condition */ prandom_u32_state(state); prandom_u32_state(state); prandom_u32_state(state); @@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state) static u32 __extract_hwseed(void) { - u32 val = 0; + unsigned int val = 0; (void)(arch_get_random_seed_int(&val) || arch_get_random_int(&val)); @@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare) prandom_seed(entropy); /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + (prandom_u32() % 40); + expires = 40 + prandom_u32_max(40); seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); add_timer(&seed_timer); -- cgit v1.2.2 From 72b603ee8cfc6be587f301568d79ce38e7ed735d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Aug 2014 12:27:02 -0700 Subject: bpf: x86: add missing 'shift by register' instructions to x64 eBPF JIT 'shift by register' operations are supported by eBPF interpreter, but were accidently left out of x64 JIT compiler. Fix it and add a testcase. Reported-by: Brendan Gregg Signed-off-by: Alexei Starovoitov Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") Signed-off-by: David S. Miller --- lib/test_bpf.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 89e0345733bd..8c66c6aace04 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1341,6 +1341,44 @@ static struct bpf_test tests[] = { { }, { { 0, -1 } } }, + { + "INT: shifts by register", + .u.insns_int = { + BPF_MOV64_IMM(R0, -1234), + BPF_MOV64_IMM(R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, 0x7ffffd97, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R2, 1), + BPF_ALU64_REG(BPF_LSH, R0, R2), + BPF_MOV32_IMM(R4, -1234), + BPF_JMP_REG(BPF_JEQ, R0, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_AND, R4, 63), + BPF_ALU64_REG(BPF_LSH, R0, R4), /* R0 <= 46 */ + BPF_MOV64_IMM(R3, 47), + BPF_ALU64_REG(BPF_ARSH, R0, R3), + BPF_JMP_IMM(BPF_JEQ, R0, -617, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R2, 1), + BPF_ALU64_REG(BPF_LSH, R4, R2), /* R4 = 46 << 1 */ + BPF_JMP_IMM(BPF_JEQ, R4, 92, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R4, 4), + BPF_ALU64_REG(BPF_LSH, R4, R4), /* R4 = 4 << 4 */ + BPF_JMP_IMM(BPF_JEQ, R4, 64, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R4, 5), + BPF_ALU32_REG(BPF_LSH, R4, R4), /* R4 = 5 << 5 */ + BPF_JMP_IMM(BPF_JEQ, R4, 160, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R0, -1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, { "INT: DIV + ABS", .u.insns_int = { -- cgit v1.2.2 From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 3 Sep 2014 09:22:36 +0800 Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking Although rhashtable library allows user to specify a quiet big size for user's created hash table, the table may be shrunk to a very small size - HASH_MIN_SIZE(4) after object is removed from the table at the first time. Subsequently, even if the total amount of objects saved in the table is quite lower than user's initial setting in a long time, the hash table size is still dynamically adjusted by rhashtable_shrink() or rhashtable_expand() each time object is inserted or removed from the table. However, as synchronize_rcu() has to be called when table is shrunk or expanded by the two functions, we should permit user to set the minimum table size through configuring the minimum number of shifts according to user specific requirement, avoiding these expensive actions of shrinking or expanding because of calling synchronize_rcu(). Signed-off-by: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a2c78810ebc1..8dfec3f26d4c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) ASSERT_RHT_MUTEX(ht); - if (tbl->size <= HASH_MIN_SIZE) + if (ht->shift <= ht->p.min_shift) return 0; ntbl = bucket_table_alloc(tbl->size / 2, flags); @@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, } EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); -static size_t rounded_hashtable_size(unsigned int nelem) +static size_t rounded_hashtable_size(struct rhashtable_params *params) { - return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE); + return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + 1UL << params->min_shift); } /** @@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) (!params->key_len && !params->obj_hashfn)) return -EINVAL; + params->min_shift = max_t(size_t, params->min_shift, + ilog2(HASH_MIN_SIZE)); + if (params->nelem_hint) - size = rounded_hashtable_size(params->nelem_hint); + size = rounded_hashtable_size(params); tbl = bucket_table_alloc(size, GFP_KERNEL); if (tbl == NULL) -- cgit v1.2.2 From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Sep 2014 22:53:44 +0200 Subject: net: bpf: make eBPF interpreter images read-only With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8c66c6aace04..9a67456ba29a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(flen), 0); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; -- cgit v1.2.2 From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:17 -0700 Subject: net: filter: add "load 64-bit immediate" eBPF instruction add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register. All previous instructions were 8-byte. This is first 16-byte instruction. Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction: insn[0].code = BPF_LD | BPF_DW | BPF_IMM insn[0].dst_reg = destination register insn[0].imm = lower 32-bit insn[1].code = 0 insn[1].imm = upper 32-bit All unused fields must be zero. Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads 32-bit immediate value into a register. x64 JITs it as single 'movabsq %rax, imm64' arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn Note that old eBPF programs are binary compatible with new interpreter. It helps eBPF programs load 64-bit constant into a register with one instruction instead of using two registers and 4 instructions: BPF_MOV32_IMM(R1, imm32) BPF_ALU64_IMM(BPF_LSH, R1, 32) BPF_MOV32_IMM(R2, imm32) BPF_ALU64_REG(BPF_OR, R1, R2) User space generated programs will use this instruction to load constants only. To tell kernel that user space needs a pointer the _pseudo_ variant of this instruction may be added later, which will use extra bits of encoding to indicate what type of pointer user space is asking kernel to provide. For example 'off' or 'src_reg' fields can be used for such purpose. src_reg = 1 could mean that user space is asking kernel to validate and load in-kernel map pointer. src_reg = 2 could mean that user space needs readonly data section pointer src_reg = 3 could mean that user space needs a pointer to per-cpu local data All such future pseudo instructions will not be carrying the actual pointer as part of the instruction, but rather will be treated as a request to kernel to provide one. The kernel will verify the request_for_a_pointer, then will drop _pseudo_ marking and will store actual internal pointer inside the instruction, so the end result is the interpreter and JITs never see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that loads 64-bit immediate into a register. User space never operates on direct pointers and verifier can easily recognize request_for_pointer vs other instructions. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9a67456ba29a..413890815d3e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = { { }, { { 1, 0 } }, }, + { + "load 64-bit immediate", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x567800001234L), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_IMM(BPF_LSH, R3, 32), + BPF_ALU64_IMM(BPF_RSH, R3, 32), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, }; static struct net_device dev; -- cgit v1.2.2 From 25ee7327d04bc3ff41a7a5ac42d74226f8d60ac6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 19 Sep 2014 13:53:51 -0700 Subject: net: bpf: fix compiler warnings in test_bpf old gcc 4.2 used by avr32 architecture produces warnings: lib/test_bpf.c:1741: warning: integer constant is too large for 'long' type lib/test_bpf.c:1741: warning: integer constant is too large for 'long' type lib/test_bpf.c: In function '__run_one': lib/test_bpf.c:1897: warning: 'ret' may be used uninitialized in this function silence these warnings. Fixes: 02ab695bb37e ("net: filter: add "load 64-bit immediate" eBPF instruction") Reported-by: Fengguang Wu Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 413890815d3e..23e070bcf72d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1738,7 +1738,7 @@ static struct bpf_test tests[] = { { "load 64-bit immediate", .u.insns_int = { - BPF_LD_IMM64(R1, 0x567800001234L), + BPF_LD_IMM64(R1, 0x567800001234LL), BPF_MOV64_REG(R2, R1), BPF_MOV64_REG(R3, R2), BPF_ALU64_IMM(BPF_RSH, R2, 32), @@ -1894,7 +1894,7 @@ static int __run_one(const struct bpf_prog *fp, const void *data, int runs, u64 *duration) { u64 start, finish; - int ret, i; + int ret = 0, i; start = ktime_to_us(ktime_get()); -- cgit v1.2.2 From 3c731eba48e1b0650decfc91a839b80f0e05ce8f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:17:07 -0700 Subject: bpf: mini eBPF library, test stubs and verifier testsuite 1. the library includes a trivial set of BPF syscall wrappers: int bpf_create_map(int key_size, int value_size, int max_entries); int bpf_update_elem(int fd, void *key, void *value); int bpf_lookup_elem(int fd, void *key, void *value); int bpf_delete_elem(int fd, void *key); int bpf_get_next_key(int fd, void *key, void *next_key); int bpf_prog_load(enum bpf_prog_type prog_type, const struct sock_filter_int *insns, int insn_len, const char *license); bpf_prog_load() stores verifier log into global bpf_log_buf[] array and BPF_*() macros to build instructions 2. test stubs configure eBPF infra with 'unspec' map and program types. These are fake types used by user space testsuite only. 3. verifier tests valid and invalid programs and expects predefined error log messages from kernel. 40 tests so far. $ sudo ./test_verifier #0 add+sub+mul OK #1 unreachable OK #2 unreachable2 OK #3 out of range jump OK #4 out of range jump2 OK #5 test1 ld_imm64 OK ... Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a28590083622..3ac43f34437b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1672,7 +1672,8 @@ config TEST_BPF against the BPF interpreter or BPF JIT compiler depending on the current setting. This is in particular useful for BPF JIT compiler development, but also to run regression tests against changes in - the interpreter code. + the interpreter code. It also enables test stubs for eBPF maps and + verifier used by user space verifier testsuite. If unsure, say N. -- cgit v1.2.2