aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
authorThomas Garnier <thgarnie@google.com>2016-05-19 20:10:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 22:12:14 -0400
commitc7ce4f60ac199fb3521c5fcd64da21cee801ec2b (patch)
tree1fe6cb7f40f9907181c9ada69ff5348dfc5fbec4 /mm/slab.c
parent81ae6d03952c1bfb96e1a716809bd65e7cd14360 (diff)
mm: SLAB freelist randomization
Provides an optional config (CONFIG_SLAB_FREELIST_RANDOM) to randomize the SLAB freelist. The list is randomized during initialization of a new set of pages. The order on different freelist sizes is pre-computed at boot for performance. Each kmem_cache has its own randomized freelist. Before pre-computed lists are available freelists are generated dynamically. This security feature reduces the predictability of the kernel SLAB allocator against heap overflows rendering attacks much less stable. For example this attack against SLUB (also applicable against SLAB) would be affected: https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/ Also, since v4.6 the freelist was moved at the end of the SLAB. It means a controllable heap is opened to new attacks not yet publicly discussed. A kernel heap overflow can be transformed to multiple use-after-free. This feature makes this type of attack harder too. To generate entropy, we use get_random_bytes_arch because 0 bits of entropy is available in the boot stage. In the worse case this function will fallback to the get_random_bytes sub API. We also generate a shift random number to shift pre-computed freelist for each new set of pages. The config option name is not specific to the SLAB as this approach will be extended to other allocators like SLUB. Performance results highlighted no major changes: Hackbench (running 90 10 times): Before average: 0.0698 After average: 0.0663 (-5.01%) slab_test 1 run on boot. Difference only seen on the 2048 size test being the worse case scenario covered by freelist randomization. New slab pages are constantly being created on the 10000 allocations. Variance should be mainly due to getting new pages every few allocations. Before: Single thread testing ===================== 1. Kmalloc: Repeatedly allocate then free test 10000 times kmalloc(8) -> 99 cycles kfree -> 112 cycles 10000 times kmalloc(16) -> 109 cycles kfree -> 140 cycles 10000 times kmalloc(32) -> 129 cycles kfree -> 137 cycles 10000 times kmalloc(64) -> 141 cycles kfree -> 141 cycles 10000 times kmalloc(128) -> 152 cycles kfree -> 148 cycles 10000 times kmalloc(256) -> 195 cycles kfree -> 167 cycles 10000 times kmalloc(512) -> 257 cycles kfree -> 199 cycles 10000 times kmalloc(1024) -> 393 cycles kfree -> 251 cycles 10000 times kmalloc(2048) -> 649 cycles kfree -> 228 cycles 10000 times kmalloc(4096) -> 806 cycles kfree -> 370 cycles 10000 times kmalloc(8192) -> 814 cycles kfree -> 411 cycles 10000 times kmalloc(16384) -> 892 cycles kfree -> 455 cycles 2. Kmalloc: alloc/free test 10000 times kmalloc(8)/kfree -> 121 cycles 10000 times kmalloc(16)/kfree -> 121 cycles 10000 times kmalloc(32)/kfree -> 121 cycles 10000 times kmalloc(64)/kfree -> 121 cycles 10000 times kmalloc(128)/kfree -> 121 cycles 10000 times kmalloc(256)/kfree -> 119 cycles 10000 times kmalloc(512)/kfree -> 119 cycles 10000 times kmalloc(1024)/kfree -> 119 cycles 10000 times kmalloc(2048)/kfree -> 119 cycles 10000 times kmalloc(4096)/kfree -> 121 cycles 10000 times kmalloc(8192)/kfree -> 119 cycles 10000 times kmalloc(16384)/kfree -> 119 cycles After: Single thread testing ===================== 1. Kmalloc: Repeatedly allocate then free test 10000 times kmalloc(8) -> 130 cycles kfree -> 86 cycles 10000 times kmalloc(16) -> 118 cycles kfree -> 86 cycles 10000 times kmalloc(32) -> 121 cycles kfree -> 85 cycles 10000 times kmalloc(64) -> 176 cycles kfree -> 102 cycles 10000 times kmalloc(128) -> 178 cycles kfree -> 100 cycles 10000 times kmalloc(256) -> 205 cycles kfree -> 109 cycles 10000 times kmalloc(512) -> 262 cycles kfree -> 136 cycles 10000 times kmalloc(1024) -> 342 cycles kfree -> 157 cycles 10000 times kmalloc(2048) -> 701 cycles kfree -> 238 cycles 10000 times kmalloc(4096) -> 803 cycles kfree -> 364 cycles 10000 times kmalloc(8192) -> 835 cycles kfree -> 404 cycles 10000 times kmalloc(16384) -> 896 cycles kfree -> 441 cycles 2. Kmalloc: alloc/free test 10000 times kmalloc(8)/kfree -> 121 cycles 10000 times kmalloc(16)/kfree -> 121 cycles 10000 times kmalloc(32)/kfree -> 123 cycles 10000 times kmalloc(64)/kfree -> 142 cycles 10000 times kmalloc(128)/kfree -> 121 cycles 10000 times kmalloc(256)/kfree -> 119 cycles 10000 times kmalloc(512)/kfree -> 119 cycles 10000 times kmalloc(1024)/kfree -> 119 cycles 10000 times kmalloc(2048)/kfree -> 119 cycles 10000 times kmalloc(4096)/kfree -> 119 cycles 10000 times kmalloc(8192)/kfree -> 119 cycles 10000 times kmalloc(16384)/kfree -> 119 cycles [akpm@linux-foundation.org: propagate gfp_t into cache_random_seq_create()] Signed-off-by: Thomas Garnier <thgarnie@google.com> Acked-by: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kees Cook <keescook@chromium.org> Cc: Greg Thelen <gthelen@google.com> Cc: Laura Abbott <labbott@fedoraproject.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c167
1 files changed, 165 insertions, 2 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 8133ebea77a4..1ee26a0d358f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1243,6 +1243,61 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
1243 } 1243 }
1244} 1244}
1245 1245
1246#ifdef CONFIG_SLAB_FREELIST_RANDOM
1247static void freelist_randomize(struct rnd_state *state, freelist_idx_t *list,
1248 size_t count)
1249{
1250 size_t i;
1251 unsigned int rand;
1252
1253 for (i = 0; i < count; i++)
1254 list[i] = i;
1255
1256 /* Fisher-Yates shuffle */
1257 for (i = count - 1; i > 0; i--) {
1258 rand = prandom_u32_state(state);
1259 rand %= (i + 1);
1260 swap(list[i], list[rand]);
1261 }
1262}
1263
1264/* Create a random sequence per cache */
1265static int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
1266{
1267 unsigned int seed, count = cachep->num;
1268 struct rnd_state state;
1269
1270 if (count < 2)
1271 return 0;
1272
1273 /* If it fails, we will just use the global lists */
1274 cachep->random_seq = kcalloc(count, sizeof(freelist_idx_t), gfp);
1275 if (!cachep->random_seq)
1276 return -ENOMEM;
1277
1278 /* Get best entropy at this stage */
1279 get_random_bytes_arch(&seed, sizeof(seed));
1280 prandom_seed_state(&state, seed);
1281
1282 freelist_randomize(&state, cachep->random_seq, count);
1283 return 0;
1284}
1285
1286/* Destroy the per-cache random freelist sequence */
1287static void cache_random_seq_destroy(struct kmem_cache *cachep)
1288{
1289 kfree(cachep->random_seq);
1290 cachep->random_seq = NULL;
1291}
1292#else
1293static inline int cache_random_seq_create(struct kmem_cache *cachep, gfp_t gfp)
1294{
1295 return 0;
1296}
1297static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
1298#endif /* CONFIG_SLAB_FREELIST_RANDOM */
1299
1300
1246/* 1301/*
1247 * Initialisation. Called after the page allocator have been initialised and 1302 * Initialisation. Called after the page allocator have been initialised and
1248 * before smp_init(). 1303 * before smp_init().
@@ -2374,6 +2429,8 @@ void __kmem_cache_release(struct kmem_cache *cachep)
2374 int i; 2429 int i;
2375 struct kmem_cache_node *n; 2430 struct kmem_cache_node *n;
2376 2431
2432 cache_random_seq_destroy(cachep);
2433
2377 free_percpu(cachep->cpu_cache); 2434 free_percpu(cachep->cpu_cache);
2378 2435
2379 /* NUMA: free the node structures */ 2436 /* NUMA: free the node structures */
@@ -2480,15 +2537,115 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
2480#endif 2537#endif
2481} 2538}
2482 2539
2540#ifdef CONFIG_SLAB_FREELIST_RANDOM
2541/* Hold information during a freelist initialization */
2542union freelist_init_state {
2543 struct {
2544 unsigned int pos;
2545 freelist_idx_t *list;
2546 unsigned int count;
2547 unsigned int rand;
2548 };
2549 struct rnd_state rnd_state;
2550};
2551
2552/*
2553 * Initialize the state based on the randomization methode available.
2554 * return true if the pre-computed list is available, false otherwize.
2555 */
2556static bool freelist_state_initialize(union freelist_init_state *state,
2557 struct kmem_cache *cachep,
2558 unsigned int count)
2559{
2560 bool ret;
2561 unsigned int rand;
2562
2563 /* Use best entropy available to define a random shift */
2564 get_random_bytes_arch(&rand, sizeof(rand));
2565
2566 /* Use a random state if the pre-computed list is not available */
2567 if (!cachep->random_seq) {
2568 prandom_seed_state(&state->rnd_state, rand);
2569 ret = false;
2570 } else {
2571 state->list = cachep->random_seq;
2572 state->count = count;
2573 state->pos = 0;
2574 state->rand = rand;
2575 ret = true;
2576 }
2577 return ret;
2578}
2579
2580/* Get the next entry on the list and randomize it using a random shift */
2581static freelist_idx_t next_random_slot(union freelist_init_state *state)
2582{
2583 return (state->list[state->pos++] + state->rand) % state->count;
2584}
2585
2586/*
2587 * Shuffle the freelist initialization state based on pre-computed lists.
2588 * return true if the list was successfully shuffled, false otherwise.
2589 */
2590static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
2591{
2592 unsigned int objfreelist = 0, i, count = cachep->num;
2593 union freelist_init_state state;
2594 bool precomputed;
2595
2596 if (count < 2)
2597 return false;
2598
2599 precomputed = freelist_state_initialize(&state, cachep, count);
2600
2601 /* Take a random entry as the objfreelist */
2602 if (OBJFREELIST_SLAB(cachep)) {
2603 if (!precomputed)
2604 objfreelist = count - 1;
2605 else
2606 objfreelist = next_random_slot(&state);
2607 page->freelist = index_to_obj(cachep, page, objfreelist) +
2608 obj_offset(cachep);
2609 count--;
2610 }
2611
2612 /*
2613 * On early boot, generate the list dynamically.
2614 * Later use a pre-computed list for speed.
2615 */
2616 if (!precomputed) {
2617 freelist_randomize(&state.rnd_state, page->freelist, count);
2618 } else {
2619 for (i = 0; i < count; i++)
2620 set_free_obj(page, i, next_random_slot(&state));
2621 }
2622
2623 if (OBJFREELIST_SLAB(cachep))
2624 set_free_obj(page, cachep->num - 1, objfreelist);
2625
2626 return true;
2627}
2628#else
2629static inline bool shuffle_freelist(struct kmem_cache *cachep,
2630 struct page *page)
2631{
2632 return false;
2633}
2634#endif /* CONFIG_SLAB_FREELIST_RANDOM */
2635
2483static void cache_init_objs(struct kmem_cache *cachep, 2636static void cache_init_objs(struct kmem_cache *cachep,
2484 struct page *page) 2637 struct page *page)
2485{ 2638{
2486 int i; 2639 int i;
2487 void *objp; 2640 void *objp;
2641 bool shuffled;
2488 2642
2489 cache_init_objs_debug(cachep, page); 2643 cache_init_objs_debug(cachep, page);
2490 2644
2491 if (OBJFREELIST_SLAB(cachep)) { 2645 /* Try to randomize the freelist if enabled */
2646 shuffled = shuffle_freelist(cachep, page);
2647
2648 if (!shuffled && OBJFREELIST_SLAB(cachep)) {
2492 page->freelist = index_to_obj(cachep, page, cachep->num - 1) + 2649 page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
2493 obj_offset(cachep); 2650 obj_offset(cachep);
2494 } 2651 }
@@ -2502,7 +2659,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
2502 kasan_poison_object_data(cachep, objp); 2659 kasan_poison_object_data(cachep, objp);
2503 } 2660 }
2504 2661
2505 set_free_obj(page, i, i); 2662 if (!shuffled)
2663 set_free_obj(page, i, i);
2506 } 2664 }
2507} 2665}
2508 2666
@@ -3841,6 +3999,10 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3841 int shared = 0; 3999 int shared = 0;
3842 int batchcount = 0; 4000 int batchcount = 0;
3843 4001
4002 err = cache_random_seq_create(cachep, gfp);
4003 if (err)
4004 goto end;
4005
3844 if (!is_root_cache(cachep)) { 4006 if (!is_root_cache(cachep)) {
3845 struct kmem_cache *root = memcg_root_cache(cachep); 4007 struct kmem_cache *root = memcg_root_cache(cachep);
3846 limit = root->limit; 4008 limit = root->limit;
@@ -3894,6 +4056,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3894 batchcount = (limit + 1) / 2; 4056 batchcount = (limit + 1) / 2;
3895skip_setup: 4057skip_setup:
3896 err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); 4058 err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
4059end:
3897 if (err) 4060 if (err)
3898 pr_err("enable_cpucache failed for %s, error %d\n", 4061 pr_err("enable_cpucache failed for %s, error %d\n",
3899 cachep->name, -err); 4062 cachep->name, -err);