summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaiman Long <longman@redhat.com>2019-05-20 10:14:47 -0400
committerThomas Gleixner <tglx@linutronix.de>2019-06-14 08:51:14 -0400
commit634d61f45d6f668fe7e468b62d00ae469a583ca2 (patch)
treec0a85cfb076ba93b00da054c11cd9cd3ae9b4969
parentd86998b17a01050c0232231fa481e65ef8171ca6 (diff)
debugobjects: Percpu pool lookahead freeing/allocation
Most workloads will allocate a bunch of memory objects, work on them and then freeing all or most of them. So just having a percpu free pool may not reduce the pool_lock contention significantly if large number of objects are being used. To help those situations, we are now doing lookahead allocation and freeing of the debug objects into and out of the percpu free pool. This will hopefully reduce the number of times the pool_lock needs to be taken and hence its contention level. Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Yang Shi <yang.shi@linux.alibaba.com> Cc: "Joel Fernandes (Google)" <joel@joelfernandes.org> Cc: Qian Cai <cai@gmx.us> Cc: Zhong Jiang <zhongjiang@huawei.com> Link: https://lkml.kernel.org/r/20190520141450.7575-3-longman@redhat.com
-rw-r--r--lib/debugobjects.c75
1 files changed, 69 insertions, 6 deletions
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 38c23b528f6f..714459a8dc10 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -26,6 +26,7 @@
26#define ODEBUG_POOL_SIZE 1024 26#define ODEBUG_POOL_SIZE 1024
27#define ODEBUG_POOL_MIN_LEVEL 256 27#define ODEBUG_POOL_MIN_LEVEL 256
28#define ODEBUG_POOL_PERCPU_SIZE 64 28#define ODEBUG_POOL_PERCPU_SIZE 64
29#define ODEBUG_BATCH_SIZE 16
29 30
30#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT 31#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
31#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) 32#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
@@ -203,11 +204,10 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
203static struct debug_obj * 204static struct debug_obj *
204alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) 205alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
205{ 206{
206 struct debug_percpu_free *percpu_pool; 207 struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
207 struct debug_obj *obj; 208 struct debug_obj *obj;
208 209
209 if (likely(obj_cache)) { 210 if (likely(obj_cache)) {
210 percpu_pool = this_cpu_ptr(&percpu_obj_pool);
211 obj = __alloc_object(&percpu_pool->free_objs); 211 obj = __alloc_object(&percpu_pool->free_objs);
212 if (obj) { 212 if (obj) {
213 percpu_pool->obj_free--; 213 percpu_pool->obj_free--;
@@ -219,10 +219,32 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
219 obj = __alloc_object(&obj_pool); 219 obj = __alloc_object(&obj_pool);
220 if (obj) { 220 if (obj) {
221 obj_pool_used++; 221 obj_pool_used++;
222 obj_pool_free--;
223
224 /*
225 * Looking ahead, allocate one batch of debug objects and
226 * put them into the percpu free pool.
227 */
228 if (likely(obj_cache)) {
229 int i;
230
231 for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
232 struct debug_obj *obj2;
233
234 obj2 = __alloc_object(&obj_pool);
235 if (!obj2)
236 break;
237 hlist_add_head(&obj2->node,
238 &percpu_pool->free_objs);
239 percpu_pool->obj_free++;
240 obj_pool_used++;
241 obj_pool_free--;
242 }
243 }
244
222 if (obj_pool_used > obj_pool_max_used) 245 if (obj_pool_used > obj_pool_max_used)
223 obj_pool_max_used = obj_pool_used; 246 obj_pool_max_used = obj_pool_used;
224 247
225 obj_pool_free--;
226 if (obj_pool_free < obj_pool_min_free) 248 if (obj_pool_free < obj_pool_min_free)
227 obj_pool_min_free = obj_pool_free; 249 obj_pool_min_free = obj_pool_free;
228 } 250 }
@@ -288,22 +310,39 @@ static void free_obj_work(struct work_struct *work)
288 310
289static bool __free_object(struct debug_obj *obj) 311static bool __free_object(struct debug_obj *obj)
290{ 312{
313 struct debug_obj *objs[ODEBUG_BATCH_SIZE];
314 struct debug_percpu_free *percpu_pool;
315 int lookahead_count = 0;
291 unsigned long flags; 316 unsigned long flags;
292 bool work; 317 bool work;
293 struct debug_percpu_free *percpu_pool;
294 318
295 local_irq_save(flags); 319 local_irq_save(flags);
320 if (!obj_cache)
321 goto free_to_obj_pool;
322
296 /* 323 /*
297 * Try to free it into the percpu pool first. 324 * Try to free it into the percpu pool first.
298 */ 325 */
299 percpu_pool = this_cpu_ptr(&percpu_obj_pool); 326 percpu_pool = this_cpu_ptr(&percpu_obj_pool);
300 if (obj_cache && percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) { 327 if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
301 hlist_add_head(&obj->node, &percpu_pool->free_objs); 328 hlist_add_head(&obj->node, &percpu_pool->free_objs);
302 percpu_pool->obj_free++; 329 percpu_pool->obj_free++;
303 local_irq_restore(flags); 330 local_irq_restore(flags);
304 return false; 331 return false;
305 } 332 }
306 333
334 /*
335 * As the percpu pool is full, look ahead and pull out a batch
336 * of objects from the percpu pool and free them as well.
337 */
338 for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
339 objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
340 if (!objs[lookahead_count])
341 break;
342 percpu_pool->obj_free--;
343 }
344
345free_to_obj_pool:
307 raw_spin_lock(&pool_lock); 346 raw_spin_lock(&pool_lock);
308 work = (obj_pool_free > debug_objects_pool_size) && obj_cache; 347 work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
309 obj_pool_used--; 348 obj_pool_used--;
@@ -311,9 +350,25 @@ static bool __free_object(struct debug_obj *obj)
311 if (work) { 350 if (work) {
312 obj_nr_tofree++; 351 obj_nr_tofree++;
313 hlist_add_head(&obj->node, &obj_to_free); 352 hlist_add_head(&obj->node, &obj_to_free);
353 if (lookahead_count) {
354 obj_nr_tofree += lookahead_count;
355 obj_pool_used -= lookahead_count;
356 while (lookahead_count) {
357 hlist_add_head(&objs[--lookahead_count]->node,
358 &obj_to_free);
359 }
360 }
314 } else { 361 } else {
315 obj_pool_free++; 362 obj_pool_free++;
316 hlist_add_head(&obj->node, &obj_pool); 363 hlist_add_head(&obj->node, &obj_pool);
364 if (lookahead_count) {
365 obj_pool_free += lookahead_count;
366 obj_pool_used -= lookahead_count;
367 while (lookahead_count) {
368 hlist_add_head(&objs[--lookahead_count]->node,
369 &obj_pool);
370 }
371 }
317 } 372 }
318 raw_spin_unlock(&pool_lock); 373 raw_spin_unlock(&pool_lock);
319 local_irq_restore(flags); 374 local_irq_restore(flags);
@@ -1228,7 +1283,7 @@ free:
1228 */ 1283 */
1229void __init debug_objects_mem_init(void) 1284void __init debug_objects_mem_init(void)
1230{ 1285{
1231 int cpu; 1286 int cpu, extras;
1232 1287
1233 if (!debug_objects_enabled) 1288 if (!debug_objects_enabled)
1234 return; 1289 return;
@@ -1253,4 +1308,12 @@ void __init debug_objects_mem_init(void)
1253 pr_warn("out of memory.\n"); 1308 pr_warn("out of memory.\n");
1254 } else 1309 } else
1255 debug_objects_selftest(); 1310 debug_objects_selftest();
1311
1312 /*
1313 * Increase the thresholds for allocating and freeing objects
1314 * according to the number of possible CPUs available in the system.
1315 */
1316 extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
1317 debug_objects_pool_size += extras;
1318 debug_objects_pool_min_level += extras;
1256} 1319}