diff options
author | Shaohua Li <shli@kernel.org> | 2013-10-14 21:05:01 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-10-25 06:55:59 -0400 |
commit | e26b53d0b287056646a0dffce8bc6b0f053f3823 (patch) | |
tree | 99f7f824f8123d441de534960fd85dda5d2e5385 | |
parent | 098faf5805c80f951ce5e8b4a6842382ad793c38 (diff) |
percpu_ida: make percpu_ida percpu size/batch configurable
Make percpu_ida percpu size/batch configurable. The block-mq-tag will
use it.
After block-mq uses percpu_ida to manage tags, performance is improved.
My test is done in a 2 sockets machine, 12 process cross the 2 sockets.
So if there is lock contention or ipi, should be stressed heavily.
Testing is done for null-blk.
hw_queue_depth nopatch iops patch iops
64 ~800k/s ~1470k/s
2048 ~4470k/s ~4340k/s
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | include/linux/percpu_ida.h | 18 | ||||
-rw-r--r-- | lib/percpu_ida.c | 28 |
2 files changed, 28 insertions, 18 deletions
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h index 0b23edbee309..56c14033e7e7 100644 --- a/include/linux/percpu_ida.h +++ b/include/linux/percpu_ida.h | |||
@@ -16,6 +16,8 @@ struct percpu_ida { | |||
16 | * percpu_ida_init() | 16 | * percpu_ida_init() |
17 | */ | 17 | */ |
18 | unsigned nr_tags; | 18 | unsigned nr_tags; |
19 | unsigned percpu_max_size; | ||
20 | unsigned percpu_batch_size; | ||
19 | 21 | ||
20 | struct percpu_ida_cpu __percpu *tag_cpu; | 22 | struct percpu_ida_cpu __percpu *tag_cpu; |
21 | 23 | ||
@@ -51,10 +53,24 @@ struct percpu_ida { | |||
51 | } ____cacheline_aligned_in_smp; | 53 | } ____cacheline_aligned_in_smp; |
52 | }; | 54 | }; |
53 | 55 | ||
56 | /* | ||
57 | * Number of tags we move between the percpu freelist and the global freelist at | ||
58 | * a time | ||
59 | */ | ||
60 | #define IDA_DEFAULT_PCPU_BATCH_MOVE 32U | ||
61 | /* Max size of percpu freelist, */ | ||
62 | #define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2) | ||
63 | |||
54 | int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); | 64 | int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); |
55 | void percpu_ida_free(struct percpu_ida *pool, unsigned tag); | 65 | void percpu_ida_free(struct percpu_ida *pool, unsigned tag); |
56 | 66 | ||
57 | void percpu_ida_destroy(struct percpu_ida *pool); | 67 | void percpu_ida_destroy(struct percpu_ida *pool); |
58 | int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); | 68 | int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, |
69 | unsigned long max_size, unsigned long batch_size); | ||
70 | static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | ||
71 | { | ||
72 | return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE, | ||
73 | IDA_DEFAULT_PCPU_BATCH_MOVE); | ||
74 | } | ||
59 | 75 | ||
60 | #endif /* __PERCPU_IDA_H__ */ | 76 | #endif /* __PERCPU_IDA_H__ */ |
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index bab1ba2a4c71..a601d4259e13 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c | |||
@@ -30,15 +30,6 @@ | |||
30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
31 | #include <linux/percpu_ida.h> | 31 | #include <linux/percpu_ida.h> |
32 | 32 | ||
33 | /* | ||
34 | * Number of tags we move between the percpu freelist and the global freelist at | ||
35 | * a time | ||
36 | */ | ||
37 | #define IDA_PCPU_BATCH_MOVE 32U | ||
38 | |||
39 | /* Max size of percpu freelist, */ | ||
40 | #define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) | ||
41 | |||
42 | struct percpu_ida_cpu { | 33 | struct percpu_ida_cpu { |
43 | /* | 34 | /* |
44 | * Even though this is percpu, we need a lock for tag stealing by remote | 35 | * Even though this is percpu, we need a lock for tag stealing by remote |
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool, | |||
78 | struct percpu_ida_cpu *remote; | 69 | struct percpu_ida_cpu *remote; |
79 | 70 | ||
80 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); | 71 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); |
81 | cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; | 72 | cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; |
82 | cpus_have_tags--) { | 73 | cpus_have_tags--) { |
83 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); | 74 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); |
84 | 75 | ||
@@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool, | |||
123 | { | 114 | { |
124 | move_tags(tags->freelist, &tags->nr_free, | 115 | move_tags(tags->freelist, &tags->nr_free, |
125 | pool->freelist, &pool->nr_free, | 116 | pool->freelist, &pool->nr_free, |
126 | min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); | 117 | min(pool->nr_free, pool->percpu_batch_size)); |
127 | } | 118 | } |
128 | 119 | ||
129 | static inline unsigned alloc_local_tag(struct percpu_ida *pool, | 120 | static inline unsigned alloc_local_tag(struct percpu_ida *pool, |
@@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |||
245 | wake_up(&pool->wait); | 236 | wake_up(&pool->wait); |
246 | } | 237 | } |
247 | 238 | ||
248 | if (nr_free == IDA_PCPU_SIZE) { | 239 | if (nr_free == pool->percpu_max_size) { |
249 | spin_lock(&pool->lock); | 240 | spin_lock(&pool->lock); |
250 | 241 | ||
251 | /* | 242 | /* |
252 | * Global lock held and irqs disabled, don't need percpu | 243 | * Global lock held and irqs disabled, don't need percpu |
253 | * lock | 244 | * lock |
254 | */ | 245 | */ |
255 | if (tags->nr_free == IDA_PCPU_SIZE) { | 246 | if (tags->nr_free == pool->percpu_max_size) { |
256 | move_tags(pool->freelist, &pool->nr_free, | 247 | move_tags(pool->freelist, &pool->nr_free, |
257 | tags->freelist, &tags->nr_free, | 248 | tags->freelist, &tags->nr_free, |
258 | IDA_PCPU_BATCH_MOVE); | 249 | pool->percpu_batch_size); |
259 | 250 | ||
260 | wake_up(&pool->wait); | 251 | wake_up(&pool->wait); |
261 | } | 252 | } |
@@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy); | |||
292 | * Allocation is percpu, but sharding is limited by nr_tags - for best | 283 | * Allocation is percpu, but sharding is limited by nr_tags - for best |
293 | * performance, the workload should not span more cpus than nr_tags / 128. | 284 | * performance, the workload should not span more cpus than nr_tags / 128. |
294 | */ | 285 | */ |
295 | int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | 286 | int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, |
287 | unsigned long max_size, unsigned long batch_size) | ||
296 | { | 288 | { |
297 | unsigned i, cpu, order; | 289 | unsigned i, cpu, order; |
298 | 290 | ||
@@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | |||
301 | init_waitqueue_head(&pool->wait); | 293 | init_waitqueue_head(&pool->wait); |
302 | spin_lock_init(&pool->lock); | 294 | spin_lock_init(&pool->lock); |
303 | pool->nr_tags = nr_tags; | 295 | pool->nr_tags = nr_tags; |
296 | pool->percpu_max_size = max_size; | ||
297 | pool->percpu_batch_size = batch_size; | ||
304 | 298 | ||
305 | /* Guard against overflow */ | 299 | /* Guard against overflow */ |
306 | if (nr_tags > (unsigned) INT_MAX + 1) { | 300 | if (nr_tags > (unsigned) INT_MAX + 1) { |
@@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | |||
319 | pool->nr_free = nr_tags; | 313 | pool->nr_free = nr_tags; |
320 | 314 | ||
321 | pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + | 315 | pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + |
322 | IDA_PCPU_SIZE * sizeof(unsigned), | 316 | pool->percpu_max_size * sizeof(unsigned), |
323 | sizeof(unsigned)); | 317 | sizeof(unsigned)); |
324 | if (!pool->tag_cpu) | 318 | if (!pool->tag_cpu) |
325 | goto err; | 319 | goto err; |
@@ -332,4 +326,4 @@ err: | |||
332 | percpu_ida_destroy(pool); | 326 | percpu_ida_destroy(pool); |
333 | return -ENOMEM; | 327 | return -ENOMEM; |
334 | } | 328 | } |
335 | EXPORT_SYMBOL_GPL(percpu_ida_init); | 329 | EXPORT_SYMBOL_GPL(__percpu_ida_init); |