aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-10-14 21:05:01 -0400
committerJens Axboe <axboe@kernel.dk>2013-10-25 06:55:59 -0400
commite26b53d0b287056646a0dffce8bc6b0f053f3823 (patch)
tree99f7f824f8123d441de534960fd85dda5d2e5385
parent098faf5805c80f951ce5e8b4a6842382ad793c38 (diff)
percpu_ida: make percpu_ida percpu size/batch configurable
Make percpu_ida percpu size/batch configurable. The block-mq-tag will use it. After block-mq uses percpu_ida to manage tags, performance is improved. My test is done in a 2 sockets machine, 12 process cross the 2 sockets. So if there is lock contention or ipi, should be stressed heavily. Testing is done for null-blk. hw_queue_depth nopatch iops patch iops 64 ~800k/s ~1470k/s 2048 ~4470k/s ~4340k/s Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--include/linux/percpu_ida.h18
-rw-r--r--lib/percpu_ida.c28
2 files changed, 28 insertions, 18 deletions
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 0b23edbee309..56c14033e7e7 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -16,6 +16,8 @@ struct percpu_ida {
16 * percpu_ida_init() 16 * percpu_ida_init()
17 */ 17 */
18 unsigned nr_tags; 18 unsigned nr_tags;
19 unsigned percpu_max_size;
20 unsigned percpu_batch_size;
19 21
20 struct percpu_ida_cpu __percpu *tag_cpu; 22 struct percpu_ida_cpu __percpu *tag_cpu;
21 23
@@ -51,10 +53,24 @@ struct percpu_ida {
51 } ____cacheline_aligned_in_smp; 53 } ____cacheline_aligned_in_smp;
52}; 54};
53 55
56/*
57 * Number of tags we move between the percpu freelist and the global freelist at
58 * a time
59 */
60#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U
61/* Max size of percpu freelist, */
62#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
63
54int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); 64int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
55void percpu_ida_free(struct percpu_ida *pool, unsigned tag); 65void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
56 66
57void percpu_ida_destroy(struct percpu_ida *pool); 67void percpu_ida_destroy(struct percpu_ida *pool);
58int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); 68int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
69 unsigned long max_size, unsigned long batch_size);
70static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
71{
72 return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
73 IDA_DEFAULT_PCPU_BATCH_MOVE);
74}
59 75
60#endif /* __PERCPU_IDA_H__ */ 76#endif /* __PERCPU_IDA_H__ */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index bab1ba2a4c71..a601d4259e13 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -30,15 +30,6 @@
30#include <linux/spinlock.h> 30#include <linux/spinlock.h>
31#include <linux/percpu_ida.h> 31#include <linux/percpu_ida.h>
32 32
33/*
34 * Number of tags we move between the percpu freelist and the global freelist at
35 * a time
36 */
37#define IDA_PCPU_BATCH_MOVE 32U
38
39/* Max size of percpu freelist, */
40#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2)
41
42struct percpu_ida_cpu { 33struct percpu_ida_cpu {
43 /* 34 /*
44 * Even though this is percpu, we need a lock for tag stealing by remote 35 * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
78 struct percpu_ida_cpu *remote; 69 struct percpu_ida_cpu *remote;
79 70
80 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); 71 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
81 cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; 72 cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
82 cpus_have_tags--) { 73 cpus_have_tags--) {
83 cpu = cpumask_next(cpu, &pool->cpus_have_tags); 74 cpu = cpumask_next(cpu, &pool->cpus_have_tags);
84 75
@@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
123{ 114{
124 move_tags(tags->freelist, &tags->nr_free, 115 move_tags(tags->freelist, &tags->nr_free,
125 pool->freelist, &pool->nr_free, 116 pool->freelist, &pool->nr_free,
126 min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); 117 min(pool->nr_free, pool->percpu_batch_size));
127} 118}
128 119
129static inline unsigned alloc_local_tag(struct percpu_ida *pool, 120static inline unsigned alloc_local_tag(struct percpu_ida *pool,
@@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
245 wake_up(&pool->wait); 236 wake_up(&pool->wait);
246 } 237 }
247 238
248 if (nr_free == IDA_PCPU_SIZE) { 239 if (nr_free == pool->percpu_max_size) {
249 spin_lock(&pool->lock); 240 spin_lock(&pool->lock);
250 241
251 /* 242 /*
252 * Global lock held and irqs disabled, don't need percpu 243 * Global lock held and irqs disabled, don't need percpu
253 * lock 244 * lock
254 */ 245 */
255 if (tags->nr_free == IDA_PCPU_SIZE) { 246 if (tags->nr_free == pool->percpu_max_size) {
256 move_tags(pool->freelist, &pool->nr_free, 247 move_tags(pool->freelist, &pool->nr_free,
257 tags->freelist, &tags->nr_free, 248 tags->freelist, &tags->nr_free,
258 IDA_PCPU_BATCH_MOVE); 249 pool->percpu_batch_size);
259 250
260 wake_up(&pool->wait); 251 wake_up(&pool->wait);
261 } 252 }
@@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
292 * Allocation is percpu, but sharding is limited by nr_tags - for best 283 * Allocation is percpu, but sharding is limited by nr_tags - for best
293 * performance, the workload should not span more cpus than nr_tags / 128. 284 * performance, the workload should not span more cpus than nr_tags / 128.
294 */ 285 */
295int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) 286int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
287 unsigned long max_size, unsigned long batch_size)
296{ 288{
297 unsigned i, cpu, order; 289 unsigned i, cpu, order;
298 290
@@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
301 init_waitqueue_head(&pool->wait); 293 init_waitqueue_head(&pool->wait);
302 spin_lock_init(&pool->lock); 294 spin_lock_init(&pool->lock);
303 pool->nr_tags = nr_tags; 295 pool->nr_tags = nr_tags;
296 pool->percpu_max_size = max_size;
297 pool->percpu_batch_size = batch_size;
304 298
305 /* Guard against overflow */ 299 /* Guard against overflow */
306 if (nr_tags > (unsigned) INT_MAX + 1) { 300 if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
319 pool->nr_free = nr_tags; 313 pool->nr_free = nr_tags;
320 314
321 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + 315 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
322 IDA_PCPU_SIZE * sizeof(unsigned), 316 pool->percpu_max_size * sizeof(unsigned),
323 sizeof(unsigned)); 317 sizeof(unsigned));
324 if (!pool->tag_cpu) 318 if (!pool->tag_cpu)
325 goto err; 319 goto err;
@@ -332,4 +326,4 @@ err:
332 percpu_ida_destroy(pool); 326 percpu_ida_destroy(pool);
333 return -ENOMEM; 327 return -ENOMEM;
334} 328}
335EXPORT_SYMBOL_GPL(percpu_ida_init); 329EXPORT_SYMBOL_GPL(__percpu_ida_init);