diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 526 |
1 files changed, 427 insertions, 99 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index da997f9800bd..014bab65e0ff 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -76,6 +76,10 @@ | |||
76 | 76 | ||
77 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 77 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
78 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 78 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
79 | #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 | ||
80 | #define PCPU_ATOMIC_MAP_MARGIN_HIGH 64 | ||
81 | #define PCPU_EMPTY_POP_PAGES_LOW 2 | ||
82 | #define PCPU_EMPTY_POP_PAGES_HIGH 4 | ||
79 | 83 | ||
80 | #ifdef CONFIG_SMP | 84 | #ifdef CONFIG_SMP |
81 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ | 85 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ |
@@ -102,12 +106,16 @@ struct pcpu_chunk { | |||
102 | int free_size; /* free bytes in the chunk */ | 106 | int free_size; /* free bytes in the chunk */ |
103 | int contig_hint; /* max contiguous size hint */ | 107 | int contig_hint; /* max contiguous size hint */ |
104 | void *base_addr; /* base address of this chunk */ | 108 | void *base_addr; /* base address of this chunk */ |
109 | |||
105 | int map_used; /* # of map entries used before the sentry */ | 110 | int map_used; /* # of map entries used before the sentry */ |
106 | int map_alloc; /* # of map entries allocated */ | 111 | int map_alloc; /* # of map entries allocated */ |
107 | int *map; /* allocation map */ | 112 | int *map; /* allocation map */ |
113 | struct work_struct map_extend_work;/* async ->map[] extension */ | ||
114 | |||
108 | void *data; /* chunk data */ | 115 | void *data; /* chunk data */ |
109 | int first_free; /* no free below this */ | 116 | int first_free; /* no free below this */ |
110 | bool immutable; /* no [de]population allowed */ | 117 | bool immutable; /* no [de]population allowed */ |
118 | int nr_populated; /* # of populated pages */ | ||
111 | unsigned long populated[]; /* populated bitmap */ | 119 | unsigned long populated[]; /* populated bitmap */ |
112 | }; | 120 | }; |
113 | 121 | ||
@@ -151,38 +159,33 @@ static struct pcpu_chunk *pcpu_first_chunk; | |||
151 | static struct pcpu_chunk *pcpu_reserved_chunk; | 159 | static struct pcpu_chunk *pcpu_reserved_chunk; |
152 | static int pcpu_reserved_chunk_limit; | 160 | static int pcpu_reserved_chunk_limit; |
153 | 161 | ||
162 | static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ | ||
163 | static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ | ||
164 | |||
165 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | ||
166 | |||
154 | /* | 167 | /* |
155 | * Synchronization rules. | 168 | * The number of empty populated pages, protected by pcpu_lock. The |
156 | * | 169 | * reserved chunk doesn't contribute to the count. |
157 | * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former | ||
158 | * protects allocation/reclaim paths, chunks, populated bitmap and | ||
159 | * vmalloc mapping. The latter is a spinlock and protects the index | ||
160 | * data structures - chunk slots, chunks and area maps in chunks. | ||
161 | * | ||
162 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | ||
163 | * pcpu_lock is grabbed and released as necessary. All actual memory | ||
164 | * allocations are done using GFP_KERNEL with pcpu_lock released. In | ||
165 | * general, percpu memory can't be allocated with irq off but | ||
166 | * irqsave/restore are still used in alloc path so that it can be used | ||
167 | * from early init path - sched_init() specifically. | ||
168 | * | ||
169 | * Free path accesses and alters only the index data structures, so it | ||
170 | * can be safely called from atomic context. When memory needs to be | ||
171 | * returned to the system, free path schedules reclaim_work which | ||
172 | * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be | ||
173 | * reclaimed, release both locks and frees the chunks. Note that it's | ||
174 | * necessary to grab both locks to remove a chunk from circulation as | ||
175 | * allocation path might be referencing the chunk with only | ||
176 | * pcpu_alloc_mutex locked. | ||
177 | */ | 170 | */ |
178 | static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ | 171 | static int pcpu_nr_empty_pop_pages; |
179 | static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ | ||
180 | 172 | ||
181 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | 173 | /* |
174 | * Balance work is used to populate or destroy chunks asynchronously. We | ||
175 | * try to keep the number of populated free pages between | ||
176 | * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one | ||
177 | * empty chunk. | ||
178 | */ | ||
179 | static void pcpu_balance_workfn(struct work_struct *work); | ||
180 | static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn); | ||
181 | static bool pcpu_async_enabled __read_mostly; | ||
182 | static bool pcpu_atomic_alloc_failed; | ||
182 | 183 | ||
183 | /* reclaim work to release fully free chunks, scheduled from free path */ | 184 | static void pcpu_schedule_balance_work(void) |
184 | static void pcpu_reclaim(struct work_struct *work); | 185 | { |
185 | static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); | 186 | if (pcpu_async_enabled) |
187 | schedule_work(&pcpu_balance_work); | ||
188 | } | ||
186 | 189 | ||
187 | static bool pcpu_addr_in_first_chunk(void *addr) | 190 | static bool pcpu_addr_in_first_chunk(void *addr) |
188 | { | 191 | { |
@@ -315,6 +318,38 @@ static void pcpu_mem_free(void *ptr, size_t size) | |||
315 | } | 318 | } |
316 | 319 | ||
317 | /** | 320 | /** |
321 | * pcpu_count_occupied_pages - count the number of pages an area occupies | ||
322 | * @chunk: chunk of interest | ||
323 | * @i: index of the area in question | ||
324 | * | ||
325 | * Count the number of pages chunk's @i'th area occupies. When the area's | ||
326 | * start and/or end address isn't aligned to page boundary, the straddled | ||
327 | * page is included in the count iff the rest of the page is free. | ||
328 | */ | ||
329 | static int pcpu_count_occupied_pages(struct pcpu_chunk *chunk, int i) | ||
330 | { | ||
331 | int off = chunk->map[i] & ~1; | ||
332 | int end = chunk->map[i + 1] & ~1; | ||
333 | |||
334 | if (!PAGE_ALIGNED(off) && i > 0) { | ||
335 | int prev = chunk->map[i - 1]; | ||
336 | |||
337 | if (!(prev & 1) && prev <= round_down(off, PAGE_SIZE)) | ||
338 | off = round_down(off, PAGE_SIZE); | ||
339 | } | ||
340 | |||
341 | if (!PAGE_ALIGNED(end) && i + 1 < chunk->map_used) { | ||
342 | int next = chunk->map[i + 1]; | ||
343 | int nend = chunk->map[i + 2] & ~1; | ||
344 | |||
345 | if (!(next & 1) && nend >= round_up(end, PAGE_SIZE)) | ||
346 | end = round_up(end, PAGE_SIZE); | ||
347 | } | ||
348 | |||
349 | return max_t(int, PFN_DOWN(end) - PFN_UP(off), 0); | ||
350 | } | ||
351 | |||
352 | /** | ||
318 | * pcpu_chunk_relocate - put chunk in the appropriate chunk slot | 353 | * pcpu_chunk_relocate - put chunk in the appropriate chunk slot |
319 | * @chunk: chunk of interest | 354 | * @chunk: chunk of interest |
320 | * @oslot: the previous slot it was on | 355 | * @oslot: the previous slot it was on |
@@ -342,9 +377,14 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) | |||
342 | /** | 377 | /** |
343 | * pcpu_need_to_extend - determine whether chunk area map needs to be extended | 378 | * pcpu_need_to_extend - determine whether chunk area map needs to be extended |
344 | * @chunk: chunk of interest | 379 | * @chunk: chunk of interest |
380 | * @is_atomic: the allocation context | ||
345 | * | 381 | * |
346 | * Determine whether area map of @chunk needs to be extended to | 382 | * Determine whether area map of @chunk needs to be extended. If |
347 | * accommodate a new allocation. | 383 | * @is_atomic, only the amount necessary for a new allocation is |
384 | * considered; however, async extension is scheduled if the left amount is | ||
385 | * low. If !@is_atomic, it aims for more empty space. Combined, this | ||
386 | * ensures that the map is likely to have enough available space to | ||
387 | * accomodate atomic allocations which can't extend maps directly. | ||
348 | * | 388 | * |
349 | * CONTEXT: | 389 | * CONTEXT: |
350 | * pcpu_lock. | 390 | * pcpu_lock. |
@@ -353,15 +393,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) | |||
353 | * New target map allocation length if extension is necessary, 0 | 393 | * New target map allocation length if extension is necessary, 0 |
354 | * otherwise. | 394 | * otherwise. |
355 | */ | 395 | */ |
356 | static int pcpu_need_to_extend(struct pcpu_chunk *chunk) | 396 | static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic) |
357 | { | 397 | { |
358 | int new_alloc; | 398 | int margin, new_alloc; |
399 | |||
400 | if (is_atomic) { | ||
401 | margin = 3; | ||
402 | |||
403 | if (chunk->map_alloc < | ||
404 | chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && | ||
405 | pcpu_async_enabled) | ||
406 | schedule_work(&chunk->map_extend_work); | ||
407 | } else { | ||
408 | margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; | ||
409 | } | ||
359 | 410 | ||
360 | if (chunk->map_alloc >= chunk->map_used + 3) | 411 | if (chunk->map_alloc >= chunk->map_used + margin) |
361 | return 0; | 412 | return 0; |
362 | 413 | ||
363 | new_alloc = PCPU_DFL_MAP_ALLOC; | 414 | new_alloc = PCPU_DFL_MAP_ALLOC; |
364 | while (new_alloc < chunk->map_used + 3) | 415 | while (new_alloc < chunk->map_used + margin) |
365 | new_alloc *= 2; | 416 | new_alloc *= 2; |
366 | 417 | ||
367 | return new_alloc; | 418 | return new_alloc; |
@@ -418,11 +469,76 @@ out_unlock: | |||
418 | return 0; | 469 | return 0; |
419 | } | 470 | } |
420 | 471 | ||
472 | static void pcpu_map_extend_workfn(struct work_struct *work) | ||
473 | { | ||
474 | struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk, | ||
475 | map_extend_work); | ||
476 | int new_alloc; | ||
477 | |||
478 | spin_lock_irq(&pcpu_lock); | ||
479 | new_alloc = pcpu_need_to_extend(chunk, false); | ||
480 | spin_unlock_irq(&pcpu_lock); | ||
481 | |||
482 | if (new_alloc) | ||
483 | pcpu_extend_area_map(chunk, new_alloc); | ||
484 | } | ||
485 | |||
486 | /** | ||
487 | * pcpu_fit_in_area - try to fit the requested allocation in a candidate area | ||
488 | * @chunk: chunk the candidate area belongs to | ||
489 | * @off: the offset to the start of the candidate area | ||
490 | * @this_size: the size of the candidate area | ||
491 | * @size: the size of the target allocation | ||
492 | * @align: the alignment of the target allocation | ||
493 | * @pop_only: only allocate from already populated region | ||
494 | * | ||
495 | * We're trying to allocate @size bytes aligned at @align. @chunk's area | ||
496 | * at @off sized @this_size is a candidate. This function determines | ||
497 | * whether the target allocation fits in the candidate area and returns the | ||
498 | * number of bytes to pad after @off. If the target area doesn't fit, -1 | ||
499 | * is returned. | ||
500 | * | ||
501 | * If @pop_only is %true, this function only considers the already | ||
502 | * populated part of the candidate area. | ||
503 | */ | ||
504 | static int pcpu_fit_in_area(struct pcpu_chunk *chunk, int off, int this_size, | ||
505 | int size, int align, bool pop_only) | ||
506 | { | ||
507 | int cand_off = off; | ||
508 | |||
509 | while (true) { | ||
510 | int head = ALIGN(cand_off, align) - off; | ||
511 | int page_start, page_end, rs, re; | ||
512 | |||
513 | if (this_size < head + size) | ||
514 | return -1; | ||
515 | |||
516 | if (!pop_only) | ||
517 | return head; | ||
518 | |||
519 | /* | ||
520 | * If the first unpopulated page is beyond the end of the | ||
521 | * allocation, the whole allocation is populated; | ||
522 | * otherwise, retry from the end of the unpopulated area. | ||
523 | */ | ||
524 | page_start = PFN_DOWN(head + off); | ||
525 | page_end = PFN_UP(head + off + size); | ||
526 | |||
527 | rs = page_start; | ||
528 | pcpu_next_unpop(chunk, &rs, &re, PFN_UP(off + this_size)); | ||
529 | if (rs >= page_end) | ||
530 | return head; | ||
531 | cand_off = re * PAGE_SIZE; | ||
532 | } | ||
533 | } | ||
534 | |||
421 | /** | 535 | /** |
422 | * pcpu_alloc_area - allocate area from a pcpu_chunk | 536 | * pcpu_alloc_area - allocate area from a pcpu_chunk |
423 | * @chunk: chunk of interest | 537 | * @chunk: chunk of interest |
424 | * @size: wanted size in bytes | 538 | * @size: wanted size in bytes |
425 | * @align: wanted align | 539 | * @align: wanted align |
540 | * @pop_only: allocate only from the populated area | ||
541 | * @occ_pages_p: out param for the number of pages the area occupies | ||
426 | * | 542 | * |
427 | * Try to allocate @size bytes area aligned at @align from @chunk. | 543 | * Try to allocate @size bytes area aligned at @align from @chunk. |
428 | * Note that this function only allocates the offset. It doesn't | 544 | * Note that this function only allocates the offset. It doesn't |
@@ -437,7 +553,8 @@ out_unlock: | |||
437 | * Allocated offset in @chunk on success, -1 if no matching area is | 553 | * Allocated offset in @chunk on success, -1 if no matching area is |
438 | * found. | 554 | * found. |
439 | */ | 555 | */ |
440 | static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | 556 | static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align, |
557 | bool pop_only, int *occ_pages_p) | ||
441 | { | 558 | { |
442 | int oslot = pcpu_chunk_slot(chunk); | 559 | int oslot = pcpu_chunk_slot(chunk); |
443 | int max_contig = 0; | 560 | int max_contig = 0; |
@@ -453,11 +570,11 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
453 | if (off & 1) | 570 | if (off & 1) |
454 | continue; | 571 | continue; |
455 | 572 | ||
456 | /* extra for alignment requirement */ | ||
457 | head = ALIGN(off, align) - off; | ||
458 | |||
459 | this_size = (p[1] & ~1) - off; | 573 | this_size = (p[1] & ~1) - off; |
460 | if (this_size < head + size) { | 574 | |
575 | head = pcpu_fit_in_area(chunk, off, this_size, size, align, | ||
576 | pop_only); | ||
577 | if (head < 0) { | ||
461 | if (!seen_free) { | 578 | if (!seen_free) { |
462 | chunk->first_free = i; | 579 | chunk->first_free = i; |
463 | seen_free = true; | 580 | seen_free = true; |
@@ -526,6 +643,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
526 | chunk->free_size -= size; | 643 | chunk->free_size -= size; |
527 | *p |= 1; | 644 | *p |= 1; |
528 | 645 | ||
646 | *occ_pages_p = pcpu_count_occupied_pages(chunk, i); | ||
529 | pcpu_chunk_relocate(chunk, oslot); | 647 | pcpu_chunk_relocate(chunk, oslot); |
530 | return off; | 648 | return off; |
531 | } | 649 | } |
@@ -541,6 +659,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
541 | * pcpu_free_area - free area to a pcpu_chunk | 659 | * pcpu_free_area - free area to a pcpu_chunk |
542 | * @chunk: chunk of interest | 660 | * @chunk: chunk of interest |
543 | * @freeme: offset of area to free | 661 | * @freeme: offset of area to free |
662 | * @occ_pages_p: out param for the number of pages the area occupies | ||
544 | * | 663 | * |
545 | * Free area starting from @freeme to @chunk. Note that this function | 664 | * Free area starting from @freeme to @chunk. Note that this function |
546 | * only modifies the allocation map. It doesn't depopulate or unmap | 665 | * only modifies the allocation map. It doesn't depopulate or unmap |
@@ -549,7 +668,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
549 | * CONTEXT: | 668 | * CONTEXT: |
550 | * pcpu_lock. | 669 | * pcpu_lock. |
551 | */ | 670 | */ |
552 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | 671 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, |
672 | int *occ_pages_p) | ||
553 | { | 673 | { |
554 | int oslot = pcpu_chunk_slot(chunk); | 674 | int oslot = pcpu_chunk_slot(chunk); |
555 | int off = 0; | 675 | int off = 0; |
@@ -580,6 +700,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
580 | *p = off &= ~1; | 700 | *p = off &= ~1; |
581 | chunk->free_size += (p[1] & ~1) - off; | 701 | chunk->free_size += (p[1] & ~1) - off; |
582 | 702 | ||
703 | *occ_pages_p = pcpu_count_occupied_pages(chunk, i); | ||
704 | |||
583 | /* merge with next? */ | 705 | /* merge with next? */ |
584 | if (!(p[1] & 1)) | 706 | if (!(p[1] & 1)) |
585 | to_free++; | 707 | to_free++; |
@@ -620,6 +742,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
620 | chunk->map_used = 1; | 742 | chunk->map_used = 1; |
621 | 743 | ||
622 | INIT_LIST_HEAD(&chunk->list); | 744 | INIT_LIST_HEAD(&chunk->list); |
745 | INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn); | ||
623 | chunk->free_size = pcpu_unit_size; | 746 | chunk->free_size = pcpu_unit_size; |
624 | chunk->contig_hint = pcpu_unit_size; | 747 | chunk->contig_hint = pcpu_unit_size; |
625 | 748 | ||
@@ -634,6 +757,50 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) | |||
634 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); | 757 | pcpu_mem_free(chunk, pcpu_chunk_struct_size); |
635 | } | 758 | } |
636 | 759 | ||
760 | /** | ||
761 | * pcpu_chunk_populated - post-population bookkeeping | ||
762 | * @chunk: pcpu_chunk which got populated | ||
763 | * @page_start: the start page | ||
764 | * @page_end: the end page | ||
765 | * | ||
766 | * Pages in [@page_start,@page_end) have been populated to @chunk. Update | ||
767 | * the bookkeeping information accordingly. Must be called after each | ||
768 | * successful population. | ||
769 | */ | ||
770 | static void pcpu_chunk_populated(struct pcpu_chunk *chunk, | ||
771 | int page_start, int page_end) | ||
772 | { | ||
773 | int nr = page_end - page_start; | ||
774 | |||
775 | lockdep_assert_held(&pcpu_lock); | ||
776 | |||
777 | bitmap_set(chunk->populated, page_start, nr); | ||
778 | chunk->nr_populated += nr; | ||
779 | pcpu_nr_empty_pop_pages += nr; | ||
780 | } | ||
781 | |||
782 | /** | ||
783 | * pcpu_chunk_depopulated - post-depopulation bookkeeping | ||
784 | * @chunk: pcpu_chunk which got depopulated | ||
785 | * @page_start: the start page | ||
786 | * @page_end: the end page | ||
787 | * | ||
788 | * Pages in [@page_start,@page_end) have been depopulated from @chunk. | ||
789 | * Update the bookkeeping information accordingly. Must be called after | ||
790 | * each successful depopulation. | ||
791 | */ | ||
792 | static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, | ||
793 | int page_start, int page_end) | ||
794 | { | ||
795 | int nr = page_end - page_start; | ||
796 | |||
797 | lockdep_assert_held(&pcpu_lock); | ||
798 | |||
799 | bitmap_clear(chunk->populated, page_start, nr); | ||
800 | chunk->nr_populated -= nr; | ||
801 | pcpu_nr_empty_pop_pages -= nr; | ||
802 | } | ||
803 | |||
637 | /* | 804 | /* |
638 | * Chunk management implementation. | 805 | * Chunk management implementation. |
639 | * | 806 | * |
@@ -695,21 +862,23 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
695 | * @size: size of area to allocate in bytes | 862 | * @size: size of area to allocate in bytes |
696 | * @align: alignment of area (max PAGE_SIZE) | 863 | * @align: alignment of area (max PAGE_SIZE) |
697 | * @reserved: allocate from the reserved chunk if available | 864 | * @reserved: allocate from the reserved chunk if available |
865 | * @gfp: allocation flags | ||
698 | * | 866 | * |
699 | * Allocate percpu area of @size bytes aligned at @align. | 867 | * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't |
700 | * | 868 | * contain %GFP_KERNEL, the allocation is atomic. |
701 | * CONTEXT: | ||
702 | * Does GFP_KERNEL allocation. | ||
703 | * | 869 | * |
704 | * RETURNS: | 870 | * RETURNS: |
705 | * Percpu pointer to the allocated area on success, NULL on failure. | 871 | * Percpu pointer to the allocated area on success, NULL on failure. |
706 | */ | 872 | */ |
707 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) | 873 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, |
874 | gfp_t gfp) | ||
708 | { | 875 | { |
709 | static int warn_limit = 10; | 876 | static int warn_limit = 10; |
710 | struct pcpu_chunk *chunk; | 877 | struct pcpu_chunk *chunk; |
711 | const char *err; | 878 | const char *err; |
712 | int slot, off, new_alloc; | 879 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
880 | int occ_pages = 0; | ||
881 | int slot, off, new_alloc, cpu, ret; | ||
713 | unsigned long flags; | 882 | unsigned long flags; |
714 | void __percpu *ptr; | 883 | void __percpu *ptr; |
715 | 884 | ||
@@ -728,7 +897,6 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
728 | return NULL; | 897 | return NULL; |
729 | } | 898 | } |
730 | 899 | ||
731 | mutex_lock(&pcpu_alloc_mutex); | ||
732 | spin_lock_irqsave(&pcpu_lock, flags); | 900 | spin_lock_irqsave(&pcpu_lock, flags); |
733 | 901 | ||
734 | /* serve reserved allocations from the reserved chunk if available */ | 902 | /* serve reserved allocations from the reserved chunk if available */ |
@@ -740,16 +908,18 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
740 | goto fail_unlock; | 908 | goto fail_unlock; |
741 | } | 909 | } |
742 | 910 | ||
743 | while ((new_alloc = pcpu_need_to_extend(chunk))) { | 911 | while ((new_alloc = pcpu_need_to_extend(chunk, is_atomic))) { |
744 | spin_unlock_irqrestore(&pcpu_lock, flags); | 912 | spin_unlock_irqrestore(&pcpu_lock, flags); |
745 | if (pcpu_extend_area_map(chunk, new_alloc) < 0) { | 913 | if (is_atomic || |
914 | pcpu_extend_area_map(chunk, new_alloc) < 0) { | ||
746 | err = "failed to extend area map of reserved chunk"; | 915 | err = "failed to extend area map of reserved chunk"; |
747 | goto fail_unlock_mutex; | 916 | goto fail; |
748 | } | 917 | } |
749 | spin_lock_irqsave(&pcpu_lock, flags); | 918 | spin_lock_irqsave(&pcpu_lock, flags); |
750 | } | 919 | } |
751 | 920 | ||
752 | off = pcpu_alloc_area(chunk, size, align); | 921 | off = pcpu_alloc_area(chunk, size, align, is_atomic, |
922 | &occ_pages); | ||
753 | if (off >= 0) | 923 | if (off >= 0) |
754 | goto area_found; | 924 | goto area_found; |
755 | 925 | ||
@@ -764,13 +934,15 @@ restart: | |||
764 | if (size > chunk->contig_hint) | 934 | if (size > chunk->contig_hint) |
765 | continue; | 935 | continue; |
766 | 936 | ||
767 | new_alloc = pcpu_need_to_extend(chunk); | 937 | new_alloc = pcpu_need_to_extend(chunk, is_atomic); |
768 | if (new_alloc) { | 938 | if (new_alloc) { |
939 | if (is_atomic) | ||
940 | continue; | ||
769 | spin_unlock_irqrestore(&pcpu_lock, flags); | 941 | spin_unlock_irqrestore(&pcpu_lock, flags); |
770 | if (pcpu_extend_area_map(chunk, | 942 | if (pcpu_extend_area_map(chunk, |
771 | new_alloc) < 0) { | 943 | new_alloc) < 0) { |
772 | err = "failed to extend area map"; | 944 | err = "failed to extend area map"; |
773 | goto fail_unlock_mutex; | 945 | goto fail; |
774 | } | 946 | } |
775 | spin_lock_irqsave(&pcpu_lock, flags); | 947 | spin_lock_irqsave(&pcpu_lock, flags); |
776 | /* | 948 | /* |
@@ -780,74 +952,134 @@ restart: | |||
780 | goto restart; | 952 | goto restart; |
781 | } | 953 | } |
782 | 954 | ||
783 | off = pcpu_alloc_area(chunk, size, align); | 955 | off = pcpu_alloc_area(chunk, size, align, is_atomic, |
956 | &occ_pages); | ||
784 | if (off >= 0) | 957 | if (off >= 0) |
785 | goto area_found; | 958 | goto area_found; |
786 | } | 959 | } |
787 | } | 960 | } |
788 | 961 | ||
789 | /* hmmm... no space left, create a new chunk */ | ||
790 | spin_unlock_irqrestore(&pcpu_lock, flags); | 962 | spin_unlock_irqrestore(&pcpu_lock, flags); |
791 | 963 | ||
792 | chunk = pcpu_create_chunk(); | 964 | /* |
793 | if (!chunk) { | 965 | * No space left. Create a new chunk. We don't want multiple |
794 | err = "failed to allocate new chunk"; | 966 | * tasks to create chunks simultaneously. Serialize and create iff |
795 | goto fail_unlock_mutex; | 967 | * there's still no empty chunk after grabbing the mutex. |
968 | */ | ||
969 | if (is_atomic) | ||
970 | goto fail; | ||
971 | |||
972 | mutex_lock(&pcpu_alloc_mutex); | ||
973 | |||
974 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { | ||
975 | chunk = pcpu_create_chunk(); | ||
976 | if (!chunk) { | ||
977 | mutex_unlock(&pcpu_alloc_mutex); | ||
978 | err = "failed to allocate new chunk"; | ||
979 | goto fail; | ||
980 | } | ||
981 | |||
982 | spin_lock_irqsave(&pcpu_lock, flags); | ||
983 | pcpu_chunk_relocate(chunk, -1); | ||
984 | } else { | ||
985 | spin_lock_irqsave(&pcpu_lock, flags); | ||
796 | } | 986 | } |
797 | 987 | ||
798 | spin_lock_irqsave(&pcpu_lock, flags); | 988 | mutex_unlock(&pcpu_alloc_mutex); |
799 | pcpu_chunk_relocate(chunk, -1); | ||
800 | goto restart; | 989 | goto restart; |
801 | 990 | ||
802 | area_found: | 991 | area_found: |
803 | spin_unlock_irqrestore(&pcpu_lock, flags); | 992 | spin_unlock_irqrestore(&pcpu_lock, flags); |
804 | 993 | ||
805 | /* populate, map and clear the area */ | 994 | /* populate if not all pages are already there */ |
806 | if (pcpu_populate_chunk(chunk, off, size)) { | 995 | if (!is_atomic) { |
807 | spin_lock_irqsave(&pcpu_lock, flags); | 996 | int page_start, page_end, rs, re; |
808 | pcpu_free_area(chunk, off); | 997 | |
809 | err = "failed to populate"; | 998 | mutex_lock(&pcpu_alloc_mutex); |
810 | goto fail_unlock; | 999 | |
1000 | page_start = PFN_DOWN(off); | ||
1001 | page_end = PFN_UP(off + size); | ||
1002 | |||
1003 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
1004 | WARN_ON(chunk->immutable); | ||
1005 | |||
1006 | ret = pcpu_populate_chunk(chunk, rs, re); | ||
1007 | |||
1008 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1009 | if (ret) { | ||
1010 | mutex_unlock(&pcpu_alloc_mutex); | ||
1011 | pcpu_free_area(chunk, off, &occ_pages); | ||
1012 | err = "failed to populate"; | ||
1013 | goto fail_unlock; | ||
1014 | } | ||
1015 | pcpu_chunk_populated(chunk, rs, re); | ||
1016 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
1017 | } | ||
1018 | |||
1019 | mutex_unlock(&pcpu_alloc_mutex); | ||
811 | } | 1020 | } |
812 | 1021 | ||
813 | mutex_unlock(&pcpu_alloc_mutex); | 1022 | if (chunk != pcpu_reserved_chunk) |
1023 | pcpu_nr_empty_pop_pages -= occ_pages; | ||
1024 | |||
1025 | if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) | ||
1026 | pcpu_schedule_balance_work(); | ||
1027 | |||
1028 | /* clear the areas and return address relative to base address */ | ||
1029 | for_each_possible_cpu(cpu) | ||
1030 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | ||
814 | 1031 | ||
815 | /* return address relative to base address */ | ||
816 | ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); | 1032 | ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); |
817 | kmemleak_alloc_percpu(ptr, size); | 1033 | kmemleak_alloc_percpu(ptr, size); |
818 | return ptr; | 1034 | return ptr; |
819 | 1035 | ||
820 | fail_unlock: | 1036 | fail_unlock: |
821 | spin_unlock_irqrestore(&pcpu_lock, flags); | 1037 | spin_unlock_irqrestore(&pcpu_lock, flags); |
822 | fail_unlock_mutex: | 1038 | fail: |
823 | mutex_unlock(&pcpu_alloc_mutex); | 1039 | if (!is_atomic && warn_limit) { |
824 | if (warn_limit) { | 1040 | pr_warning("PERCPU: allocation failed, size=%zu align=%zu atomic=%d, %s\n", |
825 | pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " | 1041 | size, align, is_atomic, err); |
826 | "%s\n", size, align, err); | ||
827 | dump_stack(); | 1042 | dump_stack(); |
828 | if (!--warn_limit) | 1043 | if (!--warn_limit) |
829 | pr_info("PERCPU: limit reached, disable warning\n"); | 1044 | pr_info("PERCPU: limit reached, disable warning\n"); |
830 | } | 1045 | } |
1046 | if (is_atomic) { | ||
1047 | /* see the flag handling in pcpu_blance_workfn() */ | ||
1048 | pcpu_atomic_alloc_failed = true; | ||
1049 | pcpu_schedule_balance_work(); | ||
1050 | } | ||
831 | return NULL; | 1051 | return NULL; |
832 | } | 1052 | } |
833 | 1053 | ||
834 | /** | 1054 | /** |
835 | * __alloc_percpu - allocate dynamic percpu area | 1055 | * __alloc_percpu_gfp - allocate dynamic percpu area |
836 | * @size: size of area to allocate in bytes | 1056 | * @size: size of area to allocate in bytes |
837 | * @align: alignment of area (max PAGE_SIZE) | 1057 | * @align: alignment of area (max PAGE_SIZE) |
1058 | * @gfp: allocation flags | ||
838 | * | 1059 | * |
839 | * Allocate zero-filled percpu area of @size bytes aligned at @align. | 1060 | * Allocate zero-filled percpu area of @size bytes aligned at @align. If |
840 | * Might sleep. Might trigger writeouts. | 1061 | * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can |
841 | * | 1062 | * be called from any context but is a lot more likely to fail. |
842 | * CONTEXT: | ||
843 | * Does GFP_KERNEL allocation. | ||
844 | * | 1063 | * |
845 | * RETURNS: | 1064 | * RETURNS: |
846 | * Percpu pointer to the allocated area on success, NULL on failure. | 1065 | * Percpu pointer to the allocated area on success, NULL on failure. |
847 | */ | 1066 | */ |
1067 | void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) | ||
1068 | { | ||
1069 | return pcpu_alloc(size, align, false, gfp); | ||
1070 | } | ||
1071 | EXPORT_SYMBOL_GPL(__alloc_percpu_gfp); | ||
1072 | |||
1073 | /** | ||
1074 | * __alloc_percpu - allocate dynamic percpu area | ||
1075 | * @size: size of area to allocate in bytes | ||
1076 | * @align: alignment of area (max PAGE_SIZE) | ||
1077 | * | ||
1078 | * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL). | ||
1079 | */ | ||
848 | void __percpu *__alloc_percpu(size_t size, size_t align) | 1080 | void __percpu *__alloc_percpu(size_t size, size_t align) |
849 | { | 1081 | { |
850 | return pcpu_alloc(size, align, false); | 1082 | return pcpu_alloc(size, align, false, GFP_KERNEL); |
851 | } | 1083 | } |
852 | EXPORT_SYMBOL_GPL(__alloc_percpu); | 1084 | EXPORT_SYMBOL_GPL(__alloc_percpu); |
853 | 1085 | ||
@@ -869,44 +1101,121 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); | |||
869 | */ | 1101 | */ |
870 | void __percpu *__alloc_reserved_percpu(size_t size, size_t align) | 1102 | void __percpu *__alloc_reserved_percpu(size_t size, size_t align) |
871 | { | 1103 | { |
872 | return pcpu_alloc(size, align, true); | 1104 | return pcpu_alloc(size, align, true, GFP_KERNEL); |
873 | } | 1105 | } |
874 | 1106 | ||
875 | /** | 1107 | /** |
876 | * pcpu_reclaim - reclaim fully free chunks, workqueue function | 1108 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
877 | * @work: unused | 1109 | * @work: unused |
878 | * | 1110 | * |
879 | * Reclaim all fully free chunks except for the first one. | 1111 | * Reclaim all fully free chunks except for the first one. |
880 | * | ||
881 | * CONTEXT: | ||
882 | * workqueue context. | ||
883 | */ | 1112 | */ |
884 | static void pcpu_reclaim(struct work_struct *work) | 1113 | static void pcpu_balance_workfn(struct work_struct *work) |
885 | { | 1114 | { |
886 | LIST_HEAD(todo); | 1115 | LIST_HEAD(to_free); |
887 | struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1]; | 1116 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; |
888 | struct pcpu_chunk *chunk, *next; | 1117 | struct pcpu_chunk *chunk, *next; |
1118 | int slot, nr_to_pop, ret; | ||
889 | 1119 | ||
1120 | /* | ||
1121 | * There's no reason to keep around multiple unused chunks and VM | ||
1122 | * areas can be scarce. Destroy all free chunks except for one. | ||
1123 | */ | ||
890 | mutex_lock(&pcpu_alloc_mutex); | 1124 | mutex_lock(&pcpu_alloc_mutex); |
891 | spin_lock_irq(&pcpu_lock); | 1125 | spin_lock_irq(&pcpu_lock); |
892 | 1126 | ||
893 | list_for_each_entry_safe(chunk, next, head, list) { | 1127 | list_for_each_entry_safe(chunk, next, free_head, list) { |
894 | WARN_ON(chunk->immutable); | 1128 | WARN_ON(chunk->immutable); |
895 | 1129 | ||
896 | /* spare the first one */ | 1130 | /* spare the first one */ |
897 | if (chunk == list_first_entry(head, struct pcpu_chunk, list)) | 1131 | if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) |
898 | continue; | 1132 | continue; |
899 | 1133 | ||
900 | list_move(&chunk->list, &todo); | 1134 | list_move(&chunk->list, &to_free); |
901 | } | 1135 | } |
902 | 1136 | ||
903 | spin_unlock_irq(&pcpu_lock); | 1137 | spin_unlock_irq(&pcpu_lock); |
904 | 1138 | ||
905 | list_for_each_entry_safe(chunk, next, &todo, list) { | 1139 | list_for_each_entry_safe(chunk, next, &to_free, list) { |
906 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size); | 1140 | int rs, re; |
1141 | |||
1142 | pcpu_for_each_pop_region(chunk, rs, re, 0, pcpu_unit_pages) { | ||
1143 | pcpu_depopulate_chunk(chunk, rs, re); | ||
1144 | spin_lock_irq(&pcpu_lock); | ||
1145 | pcpu_chunk_depopulated(chunk, rs, re); | ||
1146 | spin_unlock_irq(&pcpu_lock); | ||
1147 | } | ||
907 | pcpu_destroy_chunk(chunk); | 1148 | pcpu_destroy_chunk(chunk); |
908 | } | 1149 | } |
909 | 1150 | ||
1151 | /* | ||
1152 | * Ensure there are certain number of free populated pages for | ||
1153 | * atomic allocs. Fill up from the most packed so that atomic | ||
1154 | * allocs don't increase fragmentation. If atomic allocation | ||
1155 | * failed previously, always populate the maximum amount. This | ||
1156 | * should prevent atomic allocs larger than PAGE_SIZE from keeping | ||
1157 | * failing indefinitely; however, large atomic allocs are not | ||
1158 | * something we support properly and can be highly unreliable and | ||
1159 | * inefficient. | ||
1160 | */ | ||
1161 | retry_pop: | ||
1162 | if (pcpu_atomic_alloc_failed) { | ||
1163 | nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH; | ||
1164 | /* best effort anyway, don't worry about synchronization */ | ||
1165 | pcpu_atomic_alloc_failed = false; | ||
1166 | } else { | ||
1167 | nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - | ||
1168 | pcpu_nr_empty_pop_pages, | ||
1169 | 0, PCPU_EMPTY_POP_PAGES_HIGH); | ||
1170 | } | ||
1171 | |||
1172 | for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) { | ||
1173 | int nr_unpop = 0, rs, re; | ||
1174 | |||
1175 | if (!nr_to_pop) | ||
1176 | break; | ||
1177 | |||
1178 | spin_lock_irq(&pcpu_lock); | ||
1179 | list_for_each_entry(chunk, &pcpu_slot[slot], list) { | ||
1180 | nr_unpop = pcpu_unit_pages - chunk->nr_populated; | ||
1181 | if (nr_unpop) | ||
1182 | break; | ||
1183 | } | ||
1184 | spin_unlock_irq(&pcpu_lock); | ||
1185 | |||
1186 | if (!nr_unpop) | ||
1187 | continue; | ||
1188 | |||
1189 | /* @chunk can't go away while pcpu_alloc_mutex is held */ | ||
1190 | pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) { | ||
1191 | int nr = min(re - rs, nr_to_pop); | ||
1192 | |||
1193 | ret = pcpu_populate_chunk(chunk, rs, rs + nr); | ||
1194 | if (!ret) { | ||
1195 | nr_to_pop -= nr; | ||
1196 | spin_lock_irq(&pcpu_lock); | ||
1197 | pcpu_chunk_populated(chunk, rs, rs + nr); | ||
1198 | spin_unlock_irq(&pcpu_lock); | ||
1199 | } else { | ||
1200 | nr_to_pop = 0; | ||
1201 | } | ||
1202 | |||
1203 | if (!nr_to_pop) | ||
1204 | break; | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1208 | if (nr_to_pop) { | ||
1209 | /* ran out of chunks to populate, create a new one and retry */ | ||
1210 | chunk = pcpu_create_chunk(); | ||
1211 | if (chunk) { | ||
1212 | spin_lock_irq(&pcpu_lock); | ||
1213 | pcpu_chunk_relocate(chunk, -1); | ||
1214 | spin_unlock_irq(&pcpu_lock); | ||
1215 | goto retry_pop; | ||
1216 | } | ||
1217 | } | ||
1218 | |||
910 | mutex_unlock(&pcpu_alloc_mutex); | 1219 | mutex_unlock(&pcpu_alloc_mutex); |
911 | } | 1220 | } |
912 | 1221 | ||
@@ -924,7 +1233,7 @@ void free_percpu(void __percpu *ptr) | |||
924 | void *addr; | 1233 | void *addr; |
925 | struct pcpu_chunk *chunk; | 1234 | struct pcpu_chunk *chunk; |
926 | unsigned long flags; | 1235 | unsigned long flags; |
927 | int off; | 1236 | int off, occ_pages; |
928 | 1237 | ||
929 | if (!ptr) | 1238 | if (!ptr) |
930 | return; | 1239 | return; |
@@ -938,7 +1247,10 @@ void free_percpu(void __percpu *ptr) | |||
938 | chunk = pcpu_chunk_addr_search(addr); | 1247 | chunk = pcpu_chunk_addr_search(addr); |
939 | off = addr - chunk->base_addr; | 1248 | off = addr - chunk->base_addr; |
940 | 1249 | ||
941 | pcpu_free_area(chunk, off); | 1250 | pcpu_free_area(chunk, off, &occ_pages); |
1251 | |||
1252 | if (chunk != pcpu_reserved_chunk) | ||
1253 | pcpu_nr_empty_pop_pages += occ_pages; | ||
942 | 1254 | ||
943 | /* if there are more than one fully free chunks, wake up grim reaper */ | 1255 | /* if there are more than one fully free chunks, wake up grim reaper */ |
944 | if (chunk->free_size == pcpu_unit_size) { | 1256 | if (chunk->free_size == pcpu_unit_size) { |
@@ -946,7 +1258,7 @@ void free_percpu(void __percpu *ptr) | |||
946 | 1258 | ||
947 | list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) | 1259 | list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) |
948 | if (pos != chunk) { | 1260 | if (pos != chunk) { |
949 | schedule_work(&pcpu_reclaim_work); | 1261 | pcpu_schedule_balance_work(); |
950 | break; | 1262 | break; |
951 | } | 1263 | } |
952 | } | 1264 | } |
@@ -1336,11 +1648,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1336 | */ | 1648 | */ |
1337 | schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); | 1649 | schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); |
1338 | INIT_LIST_HEAD(&schunk->list); | 1650 | INIT_LIST_HEAD(&schunk->list); |
1651 | INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn); | ||
1339 | schunk->base_addr = base_addr; | 1652 | schunk->base_addr = base_addr; |
1340 | schunk->map = smap; | 1653 | schunk->map = smap; |
1341 | schunk->map_alloc = ARRAY_SIZE(smap); | 1654 | schunk->map_alloc = ARRAY_SIZE(smap); |
1342 | schunk->immutable = true; | 1655 | schunk->immutable = true; |
1343 | bitmap_fill(schunk->populated, pcpu_unit_pages); | 1656 | bitmap_fill(schunk->populated, pcpu_unit_pages); |
1657 | schunk->nr_populated = pcpu_unit_pages; | ||
1344 | 1658 | ||
1345 | if (ai->reserved_size) { | 1659 | if (ai->reserved_size) { |
1346 | schunk->free_size = ai->reserved_size; | 1660 | schunk->free_size = ai->reserved_size; |
@@ -1364,11 +1678,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1364 | if (dyn_size) { | 1678 | if (dyn_size) { |
1365 | dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); | 1679 | dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); |
1366 | INIT_LIST_HEAD(&dchunk->list); | 1680 | INIT_LIST_HEAD(&dchunk->list); |
1681 | INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn); | ||
1367 | dchunk->base_addr = base_addr; | 1682 | dchunk->base_addr = base_addr; |
1368 | dchunk->map = dmap; | 1683 | dchunk->map = dmap; |
1369 | dchunk->map_alloc = ARRAY_SIZE(dmap); | 1684 | dchunk->map_alloc = ARRAY_SIZE(dmap); |
1370 | dchunk->immutable = true; | 1685 | dchunk->immutable = true; |
1371 | bitmap_fill(dchunk->populated, pcpu_unit_pages); | 1686 | bitmap_fill(dchunk->populated, pcpu_unit_pages); |
1687 | dchunk->nr_populated = pcpu_unit_pages; | ||
1372 | 1688 | ||
1373 | dchunk->contig_hint = dchunk->free_size = dyn_size; | 1689 | dchunk->contig_hint = dchunk->free_size = dyn_size; |
1374 | dchunk->map[0] = 1; | 1690 | dchunk->map[0] = 1; |
@@ -1379,6 +1695,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1379 | 1695 | ||
1380 | /* link the first chunk in */ | 1696 | /* link the first chunk in */ |
1381 | pcpu_first_chunk = dchunk ?: schunk; | 1697 | pcpu_first_chunk = dchunk ?: schunk; |
1698 | pcpu_nr_empty_pop_pages += | ||
1699 | pcpu_count_occupied_pages(pcpu_first_chunk, 1); | ||
1382 | pcpu_chunk_relocate(pcpu_first_chunk, -1); | 1700 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
1383 | 1701 | ||
1384 | /* we're done */ | 1702 | /* we're done */ |
@@ -1932,8 +2250,6 @@ void __init setup_per_cpu_areas(void) | |||
1932 | 2250 | ||
1933 | if (pcpu_setup_first_chunk(ai, fc) < 0) | 2251 | if (pcpu_setup_first_chunk(ai, fc) < 0) |
1934 | panic("Failed to initialize percpu areas."); | 2252 | panic("Failed to initialize percpu areas."); |
1935 | |||
1936 | pcpu_free_alloc_info(ai); | ||
1937 | } | 2253 | } |
1938 | 2254 | ||
1939 | #endif /* CONFIG_SMP */ | 2255 | #endif /* CONFIG_SMP */ |
@@ -1967,3 +2283,15 @@ void __init percpu_init_late(void) | |||
1967 | spin_unlock_irqrestore(&pcpu_lock, flags); | 2283 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1968 | } | 2284 | } |
1969 | } | 2285 | } |
2286 | |||
2287 | /* | ||
2288 | * Percpu allocator is initialized early during boot when neither slab or | ||
2289 | * workqueue is available. Plug async management until everything is up | ||
2290 | * and running. | ||
2291 | */ | ||
2292 | static int __init percpu_enable_async(void) | ||
2293 | { | ||
2294 | pcpu_async_enabled = true; | ||
2295 | return 0; | ||
2296 | } | ||
2297 | subsys_initcall(percpu_enable_async); | ||