aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-03-31 18:07:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-03-31 18:07:43 -0400
commitcf6fafcf0588093fadf0c8cd69bebe3b5df136c7 (patch)
tree1aefe138b60f9b74ed64a80c2ce4e22a0749a8ca
parent1ce235faa8fefa4eb7199cad890944c1d2ba1b3e (diff)
parent21ddfd38ee9aac804d22beaceed4c7b903cca234 (diff)
Merge branch 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu changes from Tejun Heo: "The percpu allocation is now popular enough for the extremely naive range allocator to cause scalability issues. The existing allocator linearly scanned the allocation map on both alloc and free without making use of hint or anything. Al reimplemented the range allocator so that it can use binary search instead of linear scan during free and alloc path uses simple hinting to avoid scanning in common cases. Combined, the new allocator resolves the scalability issue percpu allocator was showing during container benchmark workload" * 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: renew the max_contig if we merge the head and previous block percpu: allocation size should be even percpu: speed alloc_pcpu_area() up percpu: store offsets instead of lengths in ->map[] perpcu: fold pcpu_split_block() into the only caller
-rw-r--r--mm/percpu.c208
1 files changed, 112 insertions, 96 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 036cfe07050f..63e24fb4387b 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -102,10 +102,11 @@ struct pcpu_chunk {
102 int free_size; /* free bytes in the chunk */ 102 int free_size; /* free bytes in the chunk */
103 int contig_hint; /* max contiguous size hint */ 103 int contig_hint; /* max contiguous size hint */
104 void *base_addr; /* base address of this chunk */ 104 void *base_addr; /* base address of this chunk */
105 int map_used; /* # of map entries used */ 105 int map_used; /* # of map entries used before the sentry */
106 int map_alloc; /* # of map entries allocated */ 106 int map_alloc; /* # of map entries allocated */
107 int *map; /* allocation map */ 107 int *map; /* allocation map */
108 void *data; /* chunk data */ 108 void *data; /* chunk data */
109 int first_free; /* no free below this */
109 bool immutable; /* no [de]population allowed */ 110 bool immutable; /* no [de]population allowed */
110 unsigned long populated[]; /* populated bitmap */ 111 unsigned long populated[]; /* populated bitmap */
111}; 112};
@@ -356,11 +357,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
356{ 357{
357 int new_alloc; 358 int new_alloc;
358 359
359 if (chunk->map_alloc >= chunk->map_used + 2) 360 if (chunk->map_alloc >= chunk->map_used + 3)
360 return 0; 361 return 0;
361 362
362 new_alloc = PCPU_DFL_MAP_ALLOC; 363 new_alloc = PCPU_DFL_MAP_ALLOC;
363 while (new_alloc < chunk->map_used + 2) 364 while (new_alloc < chunk->map_used + 3)
364 new_alloc *= 2; 365 new_alloc *= 2;
365 366
366 return new_alloc; 367 return new_alloc;
@@ -418,48 +419,6 @@ out_unlock:
418} 419}
419 420
420/** 421/**
421 * pcpu_split_block - split a map block
422 * @chunk: chunk of interest
423 * @i: index of map block to split
424 * @head: head size in bytes (can be 0)
425 * @tail: tail size in bytes (can be 0)
426 *
427 * Split the @i'th map block into two or three blocks. If @head is
428 * non-zero, @head bytes block is inserted before block @i moving it
429 * to @i+1 and reducing its size by @head bytes.
430 *
431 * If @tail is non-zero, the target block, which can be @i or @i+1
432 * depending on @head, is reduced by @tail bytes and @tail byte block
433 * is inserted after the target block.
434 *
435 * @chunk->map must have enough free slots to accommodate the split.
436 *
437 * CONTEXT:
438 * pcpu_lock.
439 */
440static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
441 int head, int tail)
442{
443 int nr_extra = !!head + !!tail;
444
445 BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
446
447 /* insert new subblocks */
448 memmove(&chunk->map[i + nr_extra], &chunk->map[i],
449 sizeof(chunk->map[0]) * (chunk->map_used - i));
450 chunk->map_used += nr_extra;
451
452 if (head) {
453 chunk->map[i + 1] = chunk->map[i] - head;
454 chunk->map[i++] = head;
455 }
456 if (tail) {
457 chunk->map[i++] -= tail;
458 chunk->map[i] = tail;
459 }
460}
461
462/**
463 * pcpu_alloc_area - allocate area from a pcpu_chunk 422 * pcpu_alloc_area - allocate area from a pcpu_chunk
464 * @chunk: chunk of interest 423 * @chunk: chunk of interest
465 * @size: wanted size in bytes 424 * @size: wanted size in bytes
@@ -483,19 +442,27 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
483 int oslot = pcpu_chunk_slot(chunk); 442 int oslot = pcpu_chunk_slot(chunk);
484 int max_contig = 0; 443 int max_contig = 0;
485 int i, off; 444 int i, off;
445 bool seen_free = false;
446 int *p;
486 447
487 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { 448 for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) {
488 bool is_last = i + 1 == chunk->map_used;
489 int head, tail; 449 int head, tail;
450 int this_size;
451
452 off = *p;
453 if (off & 1)
454 continue;
490 455
491 /* extra for alignment requirement */ 456 /* extra for alignment requirement */
492 head = ALIGN(off, align) - off; 457 head = ALIGN(off, align) - off;
493 BUG_ON(i == 0 && head != 0);
494 458
495 if (chunk->map[i] < 0) 459 this_size = (p[1] & ~1) - off;
496 continue; 460 if (this_size < head + size) {
497 if (chunk->map[i] < head + size) { 461 if (!seen_free) {
498 max_contig = max(chunk->map[i], max_contig); 462 chunk->first_free = i;
463 seen_free = true;
464 }
465 max_contig = max(this_size, max_contig);
499 continue; 466 continue;
500 } 467 }
501 468
@@ -505,44 +472,59 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
505 * than sizeof(int), which is very small but isn't too 472 * than sizeof(int), which is very small but isn't too
506 * uncommon for percpu allocations. 473 * uncommon for percpu allocations.
507 */ 474 */
508 if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { 475 if (head && (head < sizeof(int) || !(p[-1] & 1))) {
509 if (chunk->map[i - 1] > 0) 476 *p = off += head;
510 chunk->map[i - 1] += head; 477 if (p[-1] & 1)
511 else {
512 chunk->map[i - 1] -= head;
513 chunk->free_size -= head; 478 chunk->free_size -= head;
514 } 479 else
515 chunk->map[i] -= head; 480 max_contig = max(*p - p[-1], max_contig);
516 off += head; 481 this_size -= head;
517 head = 0; 482 head = 0;
518 } 483 }
519 484
520 /* if tail is small, just keep it around */ 485 /* if tail is small, just keep it around */
521 tail = chunk->map[i] - head - size; 486 tail = this_size - head - size;
522 if (tail < sizeof(int)) 487 if (tail < sizeof(int)) {
523 tail = 0; 488 tail = 0;
489 size = this_size - head;
490 }
524 491
525 /* split if warranted */ 492 /* split if warranted */
526 if (head || tail) { 493 if (head || tail) {
527 pcpu_split_block(chunk, i, head, tail); 494 int nr_extra = !!head + !!tail;
495
496 /* insert new subblocks */
497 memmove(p + nr_extra + 1, p + 1,
498 sizeof(chunk->map[0]) * (chunk->map_used - i));
499 chunk->map_used += nr_extra;
500
528 if (head) { 501 if (head) {
529 i++; 502 if (!seen_free) {
530 off += head; 503 chunk->first_free = i;
531 max_contig = max(chunk->map[i - 1], max_contig); 504 seen_free = true;
505 }
506 *++p = off += head;
507 ++i;
508 max_contig = max(head, max_contig);
509 }
510 if (tail) {
511 p[1] = off + size;
512 max_contig = max(tail, max_contig);
532 } 513 }
533 if (tail)
534 max_contig = max(chunk->map[i + 1], max_contig);
535 } 514 }
536 515
516 if (!seen_free)
517 chunk->first_free = i + 1;
518
537 /* update hint and mark allocated */ 519 /* update hint and mark allocated */
538 if (is_last) 520 if (i + 1 == chunk->map_used)
539 chunk->contig_hint = max_contig; /* fully scanned */ 521 chunk->contig_hint = max_contig; /* fully scanned */
540 else 522 else
541 chunk->contig_hint = max(chunk->contig_hint, 523 chunk->contig_hint = max(chunk->contig_hint,
542 max_contig); 524 max_contig);
543 525
544 chunk->free_size -= chunk->map[i]; 526 chunk->free_size -= size;
545 chunk->map[i] = -chunk->map[i]; 527 *p |= 1;
546 528
547 pcpu_chunk_relocate(chunk, oslot); 529 pcpu_chunk_relocate(chunk, oslot);
548 return off; 530 return off;
@@ -570,34 +552,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
570static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) 552static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
571{ 553{
572 int oslot = pcpu_chunk_slot(chunk); 554 int oslot = pcpu_chunk_slot(chunk);
573 int i, off; 555 int off = 0;
574 556 unsigned i, j;
575 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) 557 int to_free = 0;
576 if (off == freeme) 558 int *p;
577 break; 559
560 freeme |= 1; /* we are searching for <given offset, in use> pair */
561
562 i = 0;
563 j = chunk->map_used;
564 while (i != j) {
565 unsigned k = (i + j) / 2;
566 off = chunk->map[k];
567 if (off < freeme)
568 i = k + 1;
569 else if (off > freeme)
570 j = k;
571 else
572 i = j = k;
573 }
578 BUG_ON(off != freeme); 574 BUG_ON(off != freeme);
579 BUG_ON(chunk->map[i] > 0);
580 575
581 chunk->map[i] = -chunk->map[i]; 576 if (i < chunk->first_free)
582 chunk->free_size += chunk->map[i]; 577 chunk->first_free = i;
583 578
579 p = chunk->map + i;
580 *p = off &= ~1;
581 chunk->free_size += (p[1] & ~1) - off;
582
583 /* merge with next? */
584 if (!(p[1] & 1))
585 to_free++;
584 /* merge with previous? */ 586 /* merge with previous? */
585 if (i > 0 && chunk->map[i - 1] >= 0) { 587 if (i > 0 && !(p[-1] & 1)) {
586 chunk->map[i - 1] += chunk->map[i]; 588 to_free++;
587 chunk->map_used--;
588 memmove(&chunk->map[i], &chunk->map[i + 1],
589 (chunk->map_used - i) * sizeof(chunk->map[0]));
590 i--; 589 i--;
590 p--;
591 } 591 }
592 /* merge with next? */ 592 if (to_free) {
593 if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { 593 chunk->map_used -= to_free;
594 chunk->map[i] += chunk->map[i + 1]; 594 memmove(p + 1, p + 1 + to_free,
595 chunk->map_used--; 595 (chunk->map_used - i) * sizeof(chunk->map[0]));
596 memmove(&chunk->map[i + 1], &chunk->map[i + 2],
597 (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
598 } 596 }
599 597
600 chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); 598 chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint);
601 pcpu_chunk_relocate(chunk, oslot); 599 pcpu_chunk_relocate(chunk, oslot);
602} 600}
603 601
@@ -617,7 +615,9 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
617 } 615 }
618 616
619 chunk->map_alloc = PCPU_DFL_MAP_ALLOC; 617 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
620 chunk->map[chunk->map_used++] = pcpu_unit_size; 618 chunk->map[0] = 0;
619 chunk->map[1] = pcpu_unit_size | 1;
620 chunk->map_used = 1;
621 621
622 INIT_LIST_HEAD(&chunk->list); 622 INIT_LIST_HEAD(&chunk->list);
623 chunk->free_size = pcpu_unit_size; 623 chunk->free_size = pcpu_unit_size;
@@ -713,6 +713,16 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
713 unsigned long flags; 713 unsigned long flags;
714 void __percpu *ptr; 714 void __percpu *ptr;
715 715
716 /*
717 * We want the lowest bit of offset available for in-use/free
718 * indicator, so force >= 16bit alignment and make size even.
719 */
720 if (unlikely(align < 2))
721 align = 2;
722
723 if (unlikely(size & 1))
724 size++;
725
716 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 726 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
717 WARN(true, "illegal size (%zu) or align (%zu) for " 727 WARN(true, "illegal size (%zu) or align (%zu) for "
718 "percpu allocation\n", size, align); 728 "percpu allocation\n", size, align);
@@ -1343,9 +1353,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1343 } 1353 }
1344 schunk->contig_hint = schunk->free_size; 1354 schunk->contig_hint = schunk->free_size;
1345 1355
1346 schunk->map[schunk->map_used++] = -ai->static_size; 1356 schunk->map[0] = 1;
1357 schunk->map[1] = ai->static_size;
1358 schunk->map_used = 1;
1347 if (schunk->free_size) 1359 if (schunk->free_size)
1348 schunk->map[schunk->map_used++] = schunk->free_size; 1360 schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size);
1361 else
1362 schunk->map[1] |= 1;
1349 1363
1350 /* init dynamic chunk if necessary */ 1364 /* init dynamic chunk if necessary */
1351 if (dyn_size) { 1365 if (dyn_size) {
@@ -1358,8 +1372,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1358 bitmap_fill(dchunk->populated, pcpu_unit_pages); 1372 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1359 1373
1360 dchunk->contig_hint = dchunk->free_size = dyn_size; 1374 dchunk->contig_hint = dchunk->free_size = dyn_size;
1361 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; 1375 dchunk->map[0] = 1;
1362 dchunk->map[dchunk->map_used++] = dchunk->free_size; 1376 dchunk->map[1] = pcpu_reserved_chunk_limit;
1377 dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
1378 dchunk->map_used = 2;
1363 } 1379 }
1364 1380
1365 /* link the first chunk in */ 1381 /* link the first chunk in */