aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr@hp.com>2014-01-12 18:31:23 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 05:45:18 -0500
commita52b89ebb6d4499be38780db8d176c5d3a6fbc17 (patch)
tree2b180da9e52aee4d4c3b784c63597af6a4e3a509 /kernel/futex.c
parent0d00c7b20c7716ce08399570ea48813ecf001aa8 (diff)
futexes: Increase hash table size for better performance
Currently, the futex global hash table suffers from its fixed, smallish (for today's standards) size of 256 entries, as well as its lack of NUMA awareness. Large systems, using many futexes, can be prone to high amounts of collisions; where these futexes hash to the same bucket and lead to extra contention on the same hb->lock. Furthermore, cacheline bouncing is a reality when we have multiple hb->locks residing on the same cacheline and different futexes hash to adjacent buckets. This patch keeps the current static size of 16 entries for small systems, or otherwise, 256 * ncpus (or larger as we need to round the number to a power of 2). Note that this number of CPUs accounts for all CPUs that can ever be available in the system, taking into consideration things like hotpluging. While we do impose extra overhead at bootup by making the hash table larger, this is a one time thing, and does not shadow the benefits of this patch. Furthermore, as suggested by tglx, by cache aligning the hash buckets we can avoid access across cacheline boundaries and also avoid massive cache line bouncing if multiple cpus are hammering away at different hash buckets which happen to reside in the same cache line. Also, similar to other core kernel components (pid, dcache, tcp), by using alloc_large_system_hash() we benefit from its NUMA awareness and thus the table is distributed among the nodes instead of in a single one. For a custom microbenchmark that pounds on the uaddr hashing -- making the wait path fail at futex_wait_setup() returning -EWOULDBLOCK for large amounts of futexes, we can see the following benefits on a 80-core, 8-socket 1Tb server: +---------+--------------------+------------------------+-----------------------+-------------------------------+ | threads | baseline (ops/sec) | aligned-only (ops/sec) | large table (ops/sec) | large table+aligned (ops/sec) | +---------+--------------------+------------------------+-----------------------+-------------------------------+ |     512 |              32426 | 50531  (+55.8%)        | 255274  (+687.2%)     | 292553  (+802.2%)             | |     256 |              65360 | 99588  (+52.3%)        | 443563  (+578.6%)     | 508088  (+677.3%)             | |     128 |             125635 | 200075 (+59.2%)        | 742613  (+491.1%)     | 835452  (+564.9%)             | |      80 |             193559 | 323425 (+67.1%)        | 1028147 (+431.1%)     | 1130304 (+483.9%)             | |      64 |             247667 | 443740 (+79.1%)        | 997300  (+302.6%)     | 1145494 (+362.5%)             | |      32 |             628412 | 721401 (+14.7%)        | 965996  (+53.7%)      | 1122115 (+78.5%)              | +---------+--------------------+------------------------+-----------------------+-------------------------------+ Reviewed-by: Darren Hart <dvhart@linux.intel.com> Reviewed-by: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Waiman Long <Waiman.Long@hp.com> Reviewed-and-tested-by: Jason Low <jason.low2@hp.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Jeff Mahoney <jeffm@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Scott Norton <scott.norton@hp.com> Cc: Tom Vaden <tom.vaden@hp.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Link: http://lkml.kernel.org/r/1389569486-25487-3-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c26
1 files changed, 19 insertions, 7 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 085f5fa0b342..577481d5c59d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -63,6 +63,7 @@
63#include <linux/sched/rt.h> 63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h> 64#include <linux/hugetlb.h>
65#include <linux/freezer.h> 65#include <linux/freezer.h>
66#include <linux/bootmem.h>
66 67
67#include <asm/futex.h> 68#include <asm/futex.h>
68 69
@@ -70,8 +71,6 @@
70 71
71int __read_mostly futex_cmpxchg_enabled; 72int __read_mostly futex_cmpxchg_enabled;
72 73
73#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
74
75/* 74/*
76 * Futex flags used to encode options to functions and preserve them across 75 * Futex flags used to encode options to functions and preserve them across
77 * restarts. 76 * restarts.
@@ -149,9 +148,11 @@ static const struct futex_q futex_q_init = {
149struct futex_hash_bucket { 148struct futex_hash_bucket {
150 spinlock_t lock; 149 spinlock_t lock;
151 struct plist_head chain; 150 struct plist_head chain;
152}; 151} ____cacheline_aligned_in_smp;
153 152
154static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 153static unsigned long __read_mostly futex_hashsize;
154
155static struct futex_hash_bucket *futex_queues;
155 156
156/* 157/*
157 * We hash on the keys returned from get_futex_key (see below). 158 * We hash on the keys returned from get_futex_key (see below).
@@ -161,7 +162,7 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
161 u32 hash = jhash2((u32*)&key->both.word, 162 u32 hash = jhash2((u32*)&key->both.word,
162 (sizeof(key->both.word)+sizeof(key->both.ptr))/4, 163 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
163 key->both.offset); 164 key->both.offset);
164 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)]; 165 return &futex_queues[hash & (futex_hashsize - 1)];
165} 166}
166 167
167/* 168/*
@@ -2719,7 +2720,18 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2719static int __init futex_init(void) 2720static int __init futex_init(void)
2720{ 2721{
2721 u32 curval; 2722 u32 curval;
2722 int i; 2723 unsigned long i;
2724
2725#if CONFIG_BASE_SMALL
2726 futex_hashsize = 16;
2727#else
2728 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
2729#endif
2730
2731 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
2732 futex_hashsize, 0,
2733 futex_hashsize < 256 ? HASH_SMALL : 0,
2734 NULL, NULL, futex_hashsize, futex_hashsize);
2723 2735
2724 /* 2736 /*
2725 * This will fail and we want it. Some arch implementations do 2737 * This will fail and we want it. Some arch implementations do
@@ -2734,7 +2746,7 @@ static int __init futex_init(void)
2734 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) 2746 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2735 futex_cmpxchg_enabled = 1; 2747 futex_cmpxchg_enabled = 1;
2736 2748
2737 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2749 for (i = 0; i < futex_hashsize; i++) {
2738 plist_head_init(&futex_queues[i].chain); 2750 plist_head_init(&futex_queues[i].chain);
2739 spin_lock_init(&futex_queues[i].lock); 2751 spin_lock_init(&futex_queues[i].lock);
2740 } 2752 }