diff options
-rw-r--r-- | arch/sparc/kernel/smp_64.c | 42 | ||||
-rw-r--r-- | include/linux/percpu.h | 3 | ||||
-rw-r--r-- | mm/percpu.c | 604 |
3 files changed, 400 insertions, 249 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index ccad7b20ae75..f2f22ee97a7a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -1415,19 +1415,6 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
1415 | #endif | 1415 | #endif |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | static size_t pcpur_size __initdata; | ||
1419 | static void **pcpur_ptrs __initdata; | ||
1420 | |||
1421 | static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) | ||
1422 | { | ||
1423 | size_t off = (size_t)pageno << PAGE_SHIFT; | ||
1424 | |||
1425 | if (off >= pcpur_size) | ||
1426 | return NULL; | ||
1427 | |||
1428 | return virt_to_page(pcpur_ptrs[cpu] + off); | ||
1429 | } | ||
1430 | |||
1431 | #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) | 1418 | #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) |
1432 | 1419 | ||
1433 | static void __init pcpu_map_range(unsigned long start, unsigned long end, | 1420 | static void __init pcpu_map_range(unsigned long start, unsigned long end, |
@@ -1491,25 +1478,26 @@ void __init setup_per_cpu_areas(void) | |||
1491 | size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; | 1478 | size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; |
1492 | static struct vm_struct vm; | 1479 | static struct vm_struct vm; |
1493 | unsigned long delta, cpu; | 1480 | unsigned long delta, cpu; |
1494 | size_t pcpu_unit_size; | 1481 | size_t size_sum, pcpu_unit_size; |
1495 | size_t ptrs_size; | 1482 | size_t ptrs_size; |
1483 | void **ptrs; | ||
1496 | 1484 | ||
1497 | pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 1485 | size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + |
1498 | PERCPU_DYNAMIC_RESERVE); | 1486 | PERCPU_DYNAMIC_RESERVE); |
1499 | dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; | 1487 | dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; |
1500 | 1488 | ||
1501 | 1489 | ||
1502 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | 1490 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(ptrs[0])); |
1503 | pcpur_ptrs = alloc_bootmem(ptrs_size); | 1491 | ptrs = alloc_bootmem(ptrs_size); |
1504 | 1492 | ||
1505 | for_each_possible_cpu(cpu) { | 1493 | for_each_possible_cpu(cpu) { |
1506 | pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, | 1494 | ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, |
1507 | PCPU_CHUNK_SIZE); | 1495 | PCPU_CHUNK_SIZE); |
1508 | 1496 | ||
1509 | free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), | 1497 | free_bootmem(__pa(ptrs[cpu] + size_sum), |
1510 | PCPU_CHUNK_SIZE - pcpur_size); | 1498 | PCPU_CHUNK_SIZE - size_sum); |
1511 | 1499 | ||
1512 | memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); | 1500 | memcpy(ptrs[cpu], __per_cpu_load, static_size); |
1513 | } | 1501 | } |
1514 | 1502 | ||
1515 | /* allocate address and map */ | 1503 | /* allocate address and map */ |
@@ -1523,14 +1511,14 @@ void __init setup_per_cpu_areas(void) | |||
1523 | 1511 | ||
1524 | start += cpu * PCPU_CHUNK_SIZE; | 1512 | start += cpu * PCPU_CHUNK_SIZE; |
1525 | end = start + PCPU_CHUNK_SIZE; | 1513 | end = start + PCPU_CHUNK_SIZE; |
1526 | pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu])); | 1514 | pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); |
1527 | } | 1515 | } |
1528 | 1516 | ||
1529 | pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, | 1517 | pcpu_unit_size = pcpu_setup_first_chunk(static_size, |
1530 | PERCPU_MODULE_RESERVE, dyn_size, | 1518 | PERCPU_MODULE_RESERVE, dyn_size, |
1531 | PCPU_CHUNK_SIZE, vm.addr); | 1519 | PCPU_CHUNK_SIZE, vm.addr); |
1532 | 1520 | ||
1533 | free_bootmem(__pa(pcpur_ptrs), ptrs_size); | 1521 | free_bootmem(__pa(ptrs), ptrs_size); |
1534 | 1522 | ||
1535 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | 1523 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
1536 | for_each_possible_cpu(cpu) { | 1524 | for_each_possible_cpu(cpu) { |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index ec64357e1762..63c8b7a23e66 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -58,13 +58,12 @@ | |||
58 | 58 | ||
59 | extern void *pcpu_base_addr; | 59 | extern void *pcpu_base_addr; |
60 | 60 | ||
61 | typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); | ||
62 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); | 61 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); |
63 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); | 62 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); |
64 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | 63 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); |
65 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | 64 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); |
66 | 65 | ||
67 | extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | 66 | extern size_t __init pcpu_setup_first_chunk( |
68 | size_t static_size, size_t reserved_size, | 67 | size_t static_size, size_t reserved_size, |
69 | ssize_t dyn_size, size_t unit_size, | 68 | ssize_t dyn_size, size_t unit_size, |
70 | void *base_addr); | 69 | void *base_addr); |
diff --git a/mm/percpu.c b/mm/percpu.c index 639fce4d2caf..21756814d99f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -94,8 +94,7 @@ struct pcpu_chunk { | |||
94 | int map_alloc; /* # of map entries allocated */ | 94 | int map_alloc; /* # of map entries allocated */ |
95 | int *map; /* allocation map */ | 95 | int *map; /* allocation map */ |
96 | bool immutable; /* no [de]population allowed */ | 96 | bool immutable; /* no [de]population allowed */ |
97 | struct page **page; /* points to page array */ | 97 | unsigned long populated[]; /* populated bitmap */ |
98 | struct page *page_ar[]; /* #cpus * UNIT_PAGES */ | ||
99 | }; | 98 | }; |
100 | 99 | ||
101 | static int pcpu_unit_pages __read_mostly; | 100 | static int pcpu_unit_pages __read_mostly; |
@@ -129,9 +128,9 @@ static int pcpu_reserved_chunk_limit; | |||
129 | * Synchronization rules. | 128 | * Synchronization rules. |
130 | * | 129 | * |
131 | * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former | 130 | * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former |
132 | * protects allocation/reclaim paths, chunks and chunk->page arrays. | 131 | * protects allocation/reclaim paths, chunks, populated bitmap and |
133 | * The latter is a spinlock and protects the index data structures - | 132 | * vmalloc mapping. The latter is a spinlock and protects the index |
134 | * chunk slots, chunks and area maps in chunks. | 133 | * data structures - chunk slots, chunks and area maps in chunks. |
135 | * | 134 | * |
136 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | 135 | * During allocation, pcpu_alloc_mutex is kept locked all the time and |
137 | * pcpu_lock is grabbed and released as necessary. All actual memory | 136 | * pcpu_lock is grabbed and released as necessary. All actual memory |
@@ -188,16 +187,13 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, | |||
188 | (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); | 187 | (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); |
189 | } | 188 | } |
190 | 189 | ||
191 | static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, | 190 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, |
192 | unsigned int cpu, int page_idx) | 191 | unsigned int cpu, int page_idx) |
193 | { | 192 | { |
194 | return &chunk->page[pcpu_page_idx(cpu, page_idx)]; | 193 | /* must not be used on pre-mapped chunk */ |
195 | } | 194 | WARN_ON(chunk->immutable); |
196 | 195 | ||
197 | static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, | 196 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); |
198 | int page_idx) | ||
199 | { | ||
200 | return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; | ||
201 | } | 197 | } |
202 | 198 | ||
203 | /* set the pointer to a chunk in a page struct */ | 199 | /* set the pointer to a chunk in a page struct */ |
@@ -212,6 +208,34 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) | |||
212 | return (struct pcpu_chunk *)page->index; | 208 | return (struct pcpu_chunk *)page->index; |
213 | } | 209 | } |
214 | 210 | ||
211 | static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end) | ||
212 | { | ||
213 | *rs = find_next_zero_bit(chunk->populated, end, *rs); | ||
214 | *re = find_next_bit(chunk->populated, end, *rs + 1); | ||
215 | } | ||
216 | |||
217 | static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end) | ||
218 | { | ||
219 | *rs = find_next_bit(chunk->populated, end, *rs); | ||
220 | *re = find_next_zero_bit(chunk->populated, end, *rs + 1); | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * (Un)populated page region iterators. Iterate over (un)populated | ||
225 | * page regions betwen @start and @end in @chunk. @rs and @re should | ||
226 | * be integer variables and will be set to start and end page index of | ||
227 | * the current region. | ||
228 | */ | ||
229 | #define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \ | ||
230 | for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \ | ||
231 | (rs) < (re); \ | ||
232 | (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end))) | ||
233 | |||
234 | #define pcpu_for_each_pop_region(chunk, rs, re, start, end) \ | ||
235 | for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \ | ||
236 | (rs) < (re); \ | ||
237 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) | ||
238 | |||
215 | /** | 239 | /** |
216 | * pcpu_mem_alloc - allocate memory | 240 | * pcpu_mem_alloc - allocate memory |
217 | * @size: bytes to allocate | 241 | * @size: bytes to allocate |
@@ -545,42 +569,197 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
545 | } | 569 | } |
546 | 570 | ||
547 | /** | 571 | /** |
548 | * pcpu_unmap - unmap pages out of a pcpu_chunk | 572 | * pcpu_get_pages_and_bitmap - get temp pages array and bitmap |
573 | * @chunk: chunk of interest | ||
574 | * @bitmapp: output parameter for bitmap | ||
575 | * @may_alloc: may allocate the array | ||
576 | * | ||
577 | * Returns pointer to array of pointers to struct page and bitmap, | ||
578 | * both of which can be indexed with pcpu_page_idx(). The returned | ||
579 | * array is cleared to zero and *@bitmapp is copied from | ||
580 | * @chunk->populated. Note that there is only one array and bitmap | ||
581 | * and access exclusion is the caller's responsibility. | ||
582 | * | ||
583 | * CONTEXT: | ||
584 | * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. | ||
585 | * Otherwise, don't care. | ||
586 | * | ||
587 | * RETURNS: | ||
588 | * Pointer to temp pages array on success, NULL on failure. | ||
589 | */ | ||
590 | static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | ||
591 | unsigned long **bitmapp, | ||
592 | bool may_alloc) | ||
593 | { | ||
594 | static struct page **pages; | ||
595 | static unsigned long *bitmap; | ||
596 | size_t pages_size = num_possible_cpus() * pcpu_unit_pages * | ||
597 | sizeof(pages[0]); | ||
598 | size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * | ||
599 | sizeof(unsigned long); | ||
600 | |||
601 | if (!pages || !bitmap) { | ||
602 | if (may_alloc && !pages) | ||
603 | pages = pcpu_mem_alloc(pages_size); | ||
604 | if (may_alloc && !bitmap) | ||
605 | bitmap = pcpu_mem_alloc(bitmap_size); | ||
606 | if (!pages || !bitmap) | ||
607 | return NULL; | ||
608 | } | ||
609 | |||
610 | memset(pages, 0, pages_size); | ||
611 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | ||
612 | |||
613 | *bitmapp = bitmap; | ||
614 | return pages; | ||
615 | } | ||
616 | |||
617 | /** | ||
618 | * pcpu_free_pages - free pages which were allocated for @chunk | ||
619 | * @chunk: chunk pages were allocated for | ||
620 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() | ||
621 | * @populated: populated bitmap | ||
622 | * @page_start: page index of the first page to be freed | ||
623 | * @page_end: page index of the last page to be freed + 1 | ||
624 | * | ||
625 | * Free pages [@page_start and @page_end) in @pages for all units. | ||
626 | * The pages were allocated for @chunk. | ||
627 | */ | ||
628 | static void pcpu_free_pages(struct pcpu_chunk *chunk, | ||
629 | struct page **pages, unsigned long *populated, | ||
630 | int page_start, int page_end) | ||
631 | { | ||
632 | unsigned int cpu; | ||
633 | int i; | ||
634 | |||
635 | for_each_possible_cpu(cpu) { | ||
636 | for (i = page_start; i < page_end; i++) { | ||
637 | struct page *page = pages[pcpu_page_idx(cpu, i)]; | ||
638 | |||
639 | if (page) | ||
640 | __free_page(page); | ||
641 | } | ||
642 | } | ||
643 | } | ||
644 | |||
645 | /** | ||
646 | * pcpu_alloc_pages - allocates pages for @chunk | ||
647 | * @chunk: target chunk | ||
648 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | ||
649 | * @populated: populated bitmap | ||
650 | * @page_start: page index of the first page to be allocated | ||
651 | * @page_end: page index of the last page to be allocated + 1 | ||
652 | * | ||
653 | * Allocate pages [@page_start,@page_end) into @pages for all units. | ||
654 | * The allocation is for @chunk. Percpu core doesn't care about the | ||
655 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | ||
656 | */ | ||
657 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | ||
658 | struct page **pages, unsigned long *populated, | ||
659 | int page_start, int page_end) | ||
660 | { | ||
661 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | ||
662 | unsigned int cpu; | ||
663 | int i; | ||
664 | |||
665 | for_each_possible_cpu(cpu) { | ||
666 | for (i = page_start; i < page_end; i++) { | ||
667 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | ||
668 | |||
669 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); | ||
670 | if (!*pagep) { | ||
671 | pcpu_free_pages(chunk, pages, populated, | ||
672 | page_start, page_end); | ||
673 | return -ENOMEM; | ||
674 | } | ||
675 | } | ||
676 | } | ||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | /** | ||
681 | * pcpu_pre_unmap_flush - flush cache prior to unmapping | ||
682 | * @chunk: chunk the regions to be flushed belongs to | ||
683 | * @page_start: page index of the first page to be flushed | ||
684 | * @page_end: page index of the last page to be flushed + 1 | ||
685 | * | ||
686 | * Pages in [@page_start,@page_end) of @chunk are about to be | ||
687 | * unmapped. Flush cache. As each flushing trial can be very | ||
688 | * expensive, issue flush on the whole region at once rather than | ||
689 | * doing it for each cpu. This could be an overkill but is more | ||
690 | * scalable. | ||
691 | */ | ||
692 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | ||
693 | int page_start, int page_end) | ||
694 | { | ||
695 | unsigned int last = num_possible_cpus() - 1; | ||
696 | |||
697 | flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), | ||
698 | pcpu_chunk_addr(chunk, last, page_end)); | ||
699 | } | ||
700 | |||
701 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | ||
702 | { | ||
703 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); | ||
704 | } | ||
705 | |||
706 | /** | ||
707 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk | ||
549 | * @chunk: chunk of interest | 708 | * @chunk: chunk of interest |
709 | * @pages: pages array which can be used to pass information to free | ||
710 | * @populated: populated bitmap | ||
550 | * @page_start: page index of the first page to unmap | 711 | * @page_start: page index of the first page to unmap |
551 | * @page_end: page index of the last page to unmap + 1 | 712 | * @page_end: page index of the last page to unmap + 1 |
552 | * @flush_tlb: whether to flush tlb or not | ||
553 | * | 713 | * |
554 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | 714 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. |
555 | * If @flush is true, vcache is flushed before unmapping and tlb | 715 | * Corresponding elements in @pages were cleared by the caller and can |
556 | * after. | 716 | * be used to carry information to pcpu_free_pages() which will be |
717 | * called after all unmaps are finished. The caller should call | ||
718 | * proper pre/post flush functions. | ||
557 | */ | 719 | */ |
558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | 720 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, |
559 | bool flush_tlb) | 721 | struct page **pages, unsigned long *populated, |
722 | int page_start, int page_end) | ||
560 | { | 723 | { |
561 | unsigned int last = num_possible_cpus() - 1; | ||
562 | unsigned int cpu; | 724 | unsigned int cpu; |
725 | int i; | ||
563 | 726 | ||
564 | /* unmap must not be done on immutable chunk */ | 727 | for_each_possible_cpu(cpu) { |
565 | WARN_ON(chunk->immutable); | 728 | for (i = page_start; i < page_end; i++) { |
729 | struct page *page; | ||
566 | 730 | ||
567 | /* | 731 | page = pcpu_chunk_page(chunk, cpu, i); |
568 | * Each flushing trial can be very expensive, issue flush on | 732 | WARN_ON(!page); |
569 | * the whole region at once rather than doing it for each cpu. | 733 | pages[pcpu_page_idx(cpu, i)] = page; |
570 | * This could be an overkill but is more scalable. | 734 | } |
571 | */ | 735 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
572 | flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), | 736 | page_end - page_start); |
573 | pcpu_chunk_addr(chunk, last, page_end)); | 737 | } |
574 | 738 | ||
575 | for_each_possible_cpu(cpu) | 739 | for (i = page_start; i < page_end; i++) |
576 | unmap_kernel_range_noflush( | 740 | __clear_bit(i, populated); |
577 | pcpu_chunk_addr(chunk, cpu, page_start), | 741 | } |
578 | (page_end - page_start) << PAGE_SHIFT); | 742 | |
579 | 743 | /** | |
580 | /* ditto as flush_cache_vunmap() */ | 744 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping |
581 | if (flush_tlb) | 745 | * @chunk: pcpu_chunk the regions to be flushed belong to |
582 | flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), | 746 | * @page_start: page index of the first page to be flushed |
583 | pcpu_chunk_addr(chunk, last, page_end)); | 747 | * @page_end: page index of the last page to be flushed + 1 |
748 | * | ||
749 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush | ||
750 | * TLB for the regions. This can be skipped if the area is to be | ||
751 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. | ||
752 | * | ||
753 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
754 | * for the whole region. | ||
755 | */ | ||
756 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | ||
757 | int page_start, int page_end) | ||
758 | { | ||
759 | unsigned int last = num_possible_cpus() - 1; | ||
760 | |||
761 | flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), | ||
762 | pcpu_chunk_addr(chunk, last, page_end)); | ||
584 | } | 763 | } |
585 | 764 | ||
586 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | 765 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
@@ -591,35 +770,76 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages, | |||
591 | } | 770 | } |
592 | 771 | ||
593 | /** | 772 | /** |
594 | * pcpu_map - map pages into a pcpu_chunk | 773 | * pcpu_map_pages - map pages into a pcpu_chunk |
595 | * @chunk: chunk of interest | 774 | * @chunk: chunk of interest |
775 | * @pages: pages array containing pages to be mapped | ||
776 | * @populated: populated bitmap | ||
596 | * @page_start: page index of the first page to map | 777 | * @page_start: page index of the first page to map |
597 | * @page_end: page index of the last page to map + 1 | 778 | * @page_end: page index of the last page to map + 1 |
598 | * | 779 | * |
599 | * For each cpu, map pages [@page_start,@page_end) into @chunk. | 780 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The |
600 | * vcache is flushed afterwards. | 781 | * caller is responsible for calling pcpu_post_map_flush() after all |
782 | * mappings are complete. | ||
783 | * | ||
784 | * This function is responsible for setting corresponding bits in | ||
785 | * @chunk->populated bitmap and whatever is necessary for reverse | ||
786 | * lookup (addr -> chunk). | ||
601 | */ | 787 | */ |
602 | static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | 788 | static int pcpu_map_pages(struct pcpu_chunk *chunk, |
789 | struct page **pages, unsigned long *populated, | ||
790 | int page_start, int page_end) | ||
603 | { | 791 | { |
604 | unsigned int last = num_possible_cpus() - 1; | 792 | unsigned int cpu, tcpu; |
605 | unsigned int cpu; | 793 | int i, err; |
606 | int err; | ||
607 | |||
608 | /* map must not be done on immutable chunk */ | ||
609 | WARN_ON(chunk->immutable); | ||
610 | 794 | ||
611 | for_each_possible_cpu(cpu) { | 795 | for_each_possible_cpu(cpu) { |
612 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), | 796 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
613 | pcpu_chunk_pagep(chunk, cpu, page_start), | 797 | &pages[pcpu_page_idx(cpu, page_start)], |
614 | page_end - page_start); | 798 | page_end - page_start); |
615 | if (err < 0) | 799 | if (err < 0) |
616 | return err; | 800 | goto err; |
617 | } | 801 | } |
618 | 802 | ||
803 | /* mapping successful, link chunk and mark populated */ | ||
804 | for (i = page_start; i < page_end; i++) { | ||
805 | for_each_possible_cpu(cpu) | ||
806 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], | ||
807 | chunk); | ||
808 | __set_bit(i, populated); | ||
809 | } | ||
810 | |||
811 | return 0; | ||
812 | |||
813 | err: | ||
814 | for_each_possible_cpu(tcpu) { | ||
815 | if (tcpu == cpu) | ||
816 | break; | ||
817 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), | ||
818 | page_end - page_start); | ||
819 | } | ||
820 | return err; | ||
821 | } | ||
822 | |||
823 | /** | ||
824 | * pcpu_post_map_flush - flush cache after mapping | ||
825 | * @chunk: pcpu_chunk the regions to be flushed belong to | ||
826 | * @page_start: page index of the first page to be flushed | ||
827 | * @page_end: page index of the last page to be flushed + 1 | ||
828 | * | ||
829 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush | ||
830 | * cache. | ||
831 | * | ||
832 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
833 | * for the whole region. | ||
834 | */ | ||
835 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | ||
836 | int page_start, int page_end) | ||
837 | { | ||
838 | unsigned int last = num_possible_cpus() - 1; | ||
839 | |||
619 | /* flush at once, please read comments in pcpu_unmap() */ | 840 | /* flush at once, please read comments in pcpu_unmap() */ |
620 | flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), | 841 | flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), |
621 | pcpu_chunk_addr(chunk, last, page_end)); | 842 | pcpu_chunk_addr(chunk, last, page_end)); |
622 | return 0; | ||
623 | } | 843 | } |
624 | 844 | ||
625 | /** | 845 | /** |
@@ -636,39 +856,45 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | |||
636 | * CONTEXT: | 856 | * CONTEXT: |
637 | * pcpu_alloc_mutex. | 857 | * pcpu_alloc_mutex. |
638 | */ | 858 | */ |
639 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, | 859 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) |
640 | bool flush) | ||
641 | { | 860 | { |
642 | int page_start = PFN_DOWN(off); | 861 | int page_start = PFN_DOWN(off); |
643 | int page_end = PFN_UP(off + size); | 862 | int page_end = PFN_UP(off + size); |
644 | int unmap_start = -1; | 863 | struct page **pages; |
645 | int uninitialized_var(unmap_end); | 864 | unsigned long *populated; |
646 | unsigned int cpu; | 865 | int rs, re; |
647 | int i; | 866 | |
867 | /* quick path, check whether it's empty already */ | ||
868 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
869 | if (rs == page_start && re == page_end) | ||
870 | return; | ||
871 | break; | ||
872 | } | ||
648 | 873 | ||
649 | for (i = page_start; i < page_end; i++) { | 874 | /* immutable chunks can't be depopulated */ |
650 | for_each_possible_cpu(cpu) { | 875 | WARN_ON(chunk->immutable); |
651 | struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); | ||
652 | 876 | ||
653 | if (!*pagep) | 877 | /* |
654 | continue; | 878 | * If control reaches here, there must have been at least one |
879 | * successful population attempt so the temp pages array must | ||
880 | * be available now. | ||
881 | */ | ||
882 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); | ||
883 | BUG_ON(!pages); | ||
655 | 884 | ||
656 | __free_page(*pagep); | 885 | /* unmap and free */ |
886 | pcpu_pre_unmap_flush(chunk, page_start, page_end); | ||
657 | 887 | ||
658 | /* | 888 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) |
659 | * If it's partial depopulation, it might get | 889 | pcpu_unmap_pages(chunk, pages, populated, rs, re); |
660 | * populated or depopulated again. Mark the | ||
661 | * page gone. | ||
662 | */ | ||
663 | *pagep = NULL; | ||
664 | 890 | ||
665 | unmap_start = unmap_start < 0 ? i : unmap_start; | 891 | /* no need to flush tlb, vmalloc will handle it lazily */ |
666 | unmap_end = i + 1; | 892 | |
667 | } | 893 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) |
668 | } | 894 | pcpu_free_pages(chunk, pages, populated, rs, re); |
669 | 895 | ||
670 | if (unmap_start >= 0) | 896 | /* commit new bitmap */ |
671 | pcpu_unmap(chunk, unmap_start, unmap_end, flush); | 897 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); |
672 | } | 898 | } |
673 | 899 | ||
674 | /** | 900 | /** |
@@ -685,50 +911,61 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, | |||
685 | */ | 911 | */ |
686 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | 912 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) |
687 | { | 913 | { |
688 | const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | ||
689 | int page_start = PFN_DOWN(off); | 914 | int page_start = PFN_DOWN(off); |
690 | int page_end = PFN_UP(off + size); | 915 | int page_end = PFN_UP(off + size); |
691 | int map_start = -1; | 916 | int free_end = page_start, unmap_end = page_start; |
692 | int uninitialized_var(map_end); | 917 | struct page **pages; |
918 | unsigned long *populated; | ||
693 | unsigned int cpu; | 919 | unsigned int cpu; |
694 | int i; | 920 | int rs, re, rc; |
695 | 921 | ||
696 | for (i = page_start; i < page_end; i++) { | 922 | /* quick path, check whether all pages are already there */ |
697 | if (pcpu_chunk_page_occupied(chunk, i)) { | 923 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { |
698 | if (map_start >= 0) { | 924 | if (rs == page_start && re == page_end) |
699 | if (pcpu_map(chunk, map_start, map_end)) | 925 | goto clear; |
700 | goto err; | 926 | break; |
701 | map_start = -1; | 927 | } |
702 | } | ||
703 | continue; | ||
704 | } | ||
705 | 928 | ||
706 | map_start = map_start < 0 ? i : map_start; | 929 | /* need to allocate and map pages, this chunk can't be immutable */ |
707 | map_end = i + 1; | 930 | WARN_ON(chunk->immutable); |
708 | 931 | ||
709 | for_each_possible_cpu(cpu) { | 932 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); |
710 | struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); | 933 | if (!pages) |
934 | return -ENOMEM; | ||
711 | 935 | ||
712 | *pagep = alloc_pages_node(cpu_to_node(cpu), | 936 | /* alloc and map */ |
713 | alloc_mask, 0); | 937 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { |
714 | if (!*pagep) | 938 | rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); |
715 | goto err; | 939 | if (rc) |
716 | pcpu_set_page_chunk(*pagep, chunk); | 940 | goto err_free; |
717 | } | 941 | free_end = re; |
718 | } | 942 | } |
719 | 943 | ||
720 | if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) | 944 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { |
721 | goto err; | 945 | rc = pcpu_map_pages(chunk, pages, populated, rs, re); |
946 | if (rc) | ||
947 | goto err_unmap; | ||
948 | unmap_end = re; | ||
949 | } | ||
950 | pcpu_post_map_flush(chunk, page_start, page_end); | ||
722 | 951 | ||
952 | /* commit new bitmap */ | ||
953 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | ||
954 | clear: | ||
723 | for_each_possible_cpu(cpu) | 955 | for_each_possible_cpu(cpu) |
724 | memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, | 956 | memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, |
725 | size); | 957 | size); |
726 | |||
727 | return 0; | 958 | return 0; |
728 | err: | 959 | |
729 | /* likely under heavy memory pressure, give memory back */ | 960 | err_unmap: |
730 | pcpu_depopulate_chunk(chunk, off, size, true); | 961 | pcpu_pre_unmap_flush(chunk, page_start, unmap_end); |
731 | return -ENOMEM; | 962 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) |
963 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | ||
964 | pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); | ||
965 | err_free: | ||
966 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) | ||
967 | pcpu_free_pages(chunk, pages, populated, rs, re); | ||
968 | return rc; | ||
732 | } | 969 | } |
733 | 970 | ||
734 | static void free_pcpu_chunk(struct pcpu_chunk *chunk) | 971 | static void free_pcpu_chunk(struct pcpu_chunk *chunk) |
@@ -752,7 +989,6 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
752 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); | 989 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); |
753 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; | 990 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; |
754 | chunk->map[chunk->map_used++] = pcpu_unit_size; | 991 | chunk->map[chunk->map_used++] = pcpu_unit_size; |
755 | chunk->page = chunk->page_ar; | ||
756 | 992 | ||
757 | chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); | 993 | chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); |
758 | if (!chunk->vm) { | 994 | if (!chunk->vm) { |
@@ -933,7 +1169,7 @@ static void pcpu_reclaim(struct work_struct *work) | |||
933 | mutex_unlock(&pcpu_alloc_mutex); | 1169 | mutex_unlock(&pcpu_alloc_mutex); |
934 | 1170 | ||
935 | list_for_each_entry_safe(chunk, next, &todo, list) { | 1171 | list_for_each_entry_safe(chunk, next, &todo, list) { |
936 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); | 1172 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size); |
937 | free_pcpu_chunk(chunk); | 1173 | free_pcpu_chunk(chunk); |
938 | } | 1174 | } |
939 | } | 1175 | } |
@@ -981,7 +1217,6 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
981 | 1217 | ||
982 | /** | 1218 | /** |
983 | * pcpu_setup_first_chunk - initialize the first percpu chunk | 1219 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
984 | * @get_page_fn: callback to fetch page pointer | ||
985 | * @static_size: the size of static percpu area in bytes | 1220 | * @static_size: the size of static percpu area in bytes |
986 | * @reserved_size: the size of reserved percpu area in bytes, 0 for none | 1221 | * @reserved_size: the size of reserved percpu area in bytes, 0 for none |
987 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | 1222 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto |
@@ -992,14 +1227,6 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
992 | * perpcu area. This function is to be called from arch percpu area | 1227 | * perpcu area. This function is to be called from arch percpu area |
993 | * setup path. | 1228 | * setup path. |
994 | * | 1229 | * |
995 | * @get_page_fn() should return pointer to percpu page given cpu | ||
996 | * number and page number. It should at least return enough pages to | ||
997 | * cover the static area. The returned pages for static area should | ||
998 | * have been initialized with valid data. It can also return pages | ||
999 | * after the static area. NULL return indicates end of pages for the | ||
1000 | * cpu. Note that @get_page_fn() must return the same number of pages | ||
1001 | * for all cpus. | ||
1002 | * | ||
1003 | * @reserved_size, if non-zero, specifies the amount of bytes to | 1230 | * @reserved_size, if non-zero, specifies the amount of bytes to |
1004 | * reserve after the static area in the first chunk. This reserves | 1231 | * reserve after the static area in the first chunk. This reserves |
1005 | * the first chunk such that it's available only through reserved | 1232 | * the first chunk such that it's available only through reserved |
@@ -1031,8 +1258,7 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
1031 | * The determined pcpu_unit_size which can be used to initialize | 1258 | * The determined pcpu_unit_size which can be used to initialize |
1032 | * percpu access. | 1259 | * percpu access. |
1033 | */ | 1260 | */ |
1034 | size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | 1261 | size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, |
1035 | size_t static_size, size_t reserved_size, | ||
1036 | ssize_t dyn_size, size_t unit_size, | 1262 | ssize_t dyn_size, size_t unit_size, |
1037 | void *base_addr) | 1263 | void *base_addr) |
1038 | { | 1264 | { |
@@ -1041,8 +1267,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1041 | size_t size_sum = static_size + reserved_size + | 1267 | size_t size_sum = static_size + reserved_size + |
1042 | (dyn_size >= 0 ? dyn_size : 0); | 1268 | (dyn_size >= 0 ? dyn_size : 0); |
1043 | struct pcpu_chunk *schunk, *dchunk = NULL; | 1269 | struct pcpu_chunk *schunk, *dchunk = NULL; |
1044 | unsigned int cpu; | 1270 | int i; |
1045 | int i, nr_pages; | ||
1046 | 1271 | ||
1047 | /* santiy checks */ | 1272 | /* santiy checks */ |
1048 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | 1273 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
@@ -1056,8 +1281,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1056 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | 1281 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; |
1057 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 1282 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
1058 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 1283 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; |
1059 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 1284 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + |
1060 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); | 1285 | BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); |
1061 | 1286 | ||
1062 | if (dyn_size < 0) | 1287 | if (dyn_size < 0) |
1063 | dyn_size = pcpu_unit_size - static_size - reserved_size; | 1288 | dyn_size = pcpu_unit_size - static_size - reserved_size; |
@@ -1087,8 +1312,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1087 | schunk->vm = &first_vm; | 1312 | schunk->vm = &first_vm; |
1088 | schunk->map = smap; | 1313 | schunk->map = smap; |
1089 | schunk->map_alloc = ARRAY_SIZE(smap); | 1314 | schunk->map_alloc = ARRAY_SIZE(smap); |
1090 | schunk->page = schunk->page_ar; | ||
1091 | schunk->immutable = true; | 1315 | schunk->immutable = true; |
1316 | bitmap_fill(schunk->populated, pcpu_unit_pages); | ||
1092 | 1317 | ||
1093 | if (reserved_size) { | 1318 | if (reserved_size) { |
1094 | schunk->free_size = reserved_size; | 1319 | schunk->free_size = reserved_size; |
@@ -1106,38 +1331,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1106 | 1331 | ||
1107 | /* init dynamic chunk if necessary */ | 1332 | /* init dynamic chunk if necessary */ |
1108 | if (dyn_size) { | 1333 | if (dyn_size) { |
1109 | dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); | 1334 | dchunk = alloc_bootmem(pcpu_chunk_struct_size); |
1110 | INIT_LIST_HEAD(&dchunk->list); | 1335 | INIT_LIST_HEAD(&dchunk->list); |
1111 | dchunk->vm = &first_vm; | 1336 | dchunk->vm = &first_vm; |
1112 | dchunk->map = dmap; | 1337 | dchunk->map = dmap; |
1113 | dchunk->map_alloc = ARRAY_SIZE(dmap); | 1338 | dchunk->map_alloc = ARRAY_SIZE(dmap); |
1114 | dchunk->page = schunk->page_ar; /* share page map with schunk */ | ||
1115 | dchunk->immutable = true; | 1339 | dchunk->immutable = true; |
1340 | bitmap_fill(dchunk->populated, pcpu_unit_pages); | ||
1116 | 1341 | ||
1117 | dchunk->contig_hint = dchunk->free_size = dyn_size; | 1342 | dchunk->contig_hint = dchunk->free_size = dyn_size; |
1118 | dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; | 1343 | dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; |
1119 | dchunk->map[dchunk->map_used++] = dchunk->free_size; | 1344 | dchunk->map[dchunk->map_used++] = dchunk->free_size; |
1120 | } | 1345 | } |
1121 | 1346 | ||
1122 | /* assign pages */ | ||
1123 | nr_pages = -1; | ||
1124 | for_each_possible_cpu(cpu) { | ||
1125 | for (i = 0; i < pcpu_unit_pages; i++) { | ||
1126 | struct page *page = get_page_fn(cpu, i); | ||
1127 | |||
1128 | if (!page) | ||
1129 | break; | ||
1130 | *pcpu_chunk_pagep(schunk, cpu, i) = page; | ||
1131 | } | ||
1132 | |||
1133 | BUG_ON(i < PFN_UP(static_size)); | ||
1134 | |||
1135 | if (nr_pages < 0) | ||
1136 | nr_pages = i; | ||
1137 | else | ||
1138 | BUG_ON(nr_pages != i); | ||
1139 | } | ||
1140 | |||
1141 | /* link the first chunk in */ | 1347 | /* link the first chunk in */ |
1142 | pcpu_first_chunk = dchunk ?: schunk; | 1348 | pcpu_first_chunk = dchunk ?: schunk; |
1143 | pcpu_chunk_relocate(pcpu_first_chunk, -1); | 1349 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
@@ -1160,23 +1366,6 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, | |||
1160 | return size_sum; | 1366 | return size_sum; |
1161 | } | 1367 | } |
1162 | 1368 | ||
1163 | /* | ||
1164 | * Embedding first chunk setup helper. | ||
1165 | */ | ||
1166 | static void *pcpue_ptr __initdata; | ||
1167 | static size_t pcpue_size __initdata; | ||
1168 | static size_t pcpue_unit_size __initdata; | ||
1169 | |||
1170 | static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) | ||
1171 | { | ||
1172 | size_t off = (size_t)pageno << PAGE_SHIFT; | ||
1173 | |||
1174 | if (off >= pcpue_size) | ||
1175 | return NULL; | ||
1176 | |||
1177 | return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); | ||
1178 | } | ||
1179 | |||
1180 | /** | 1369 | /** |
1181 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem | 1370 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem |
1182 | * @static_size: the size of static percpu area in bytes | 1371 | * @static_size: the size of static percpu area in bytes |
@@ -1207,18 +1396,19 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) | |||
1207 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | 1396 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, |
1208 | ssize_t dyn_size) | 1397 | ssize_t dyn_size) |
1209 | { | 1398 | { |
1210 | size_t chunk_size; | 1399 | size_t size_sum, unit_size, chunk_size; |
1400 | void *base; | ||
1211 | unsigned int cpu; | 1401 | unsigned int cpu; |
1212 | 1402 | ||
1213 | /* determine parameters and allocate */ | 1403 | /* determine parameters and allocate */ |
1214 | pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1404 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); |
1215 | 1405 | ||
1216 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1406 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
1217 | chunk_size = pcpue_unit_size * num_possible_cpus(); | 1407 | chunk_size = unit_size * num_possible_cpus(); |
1218 | 1408 | ||
1219 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1409 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1220 | __pa(MAX_DMA_ADDRESS)); | 1410 | __pa(MAX_DMA_ADDRESS)); |
1221 | if (!pcpue_ptr) { | 1411 | if (!base) { |
1222 | pr_warning("PERCPU: failed to allocate %zu bytes for " | 1412 | pr_warning("PERCPU: failed to allocate %zu bytes for " |
1223 | "embedding\n", chunk_size); | 1413 | "embedding\n", chunk_size); |
1224 | return -ENOMEM; | 1414 | return -ENOMEM; |
@@ -1226,33 +1416,18 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1226 | 1416 | ||
1227 | /* return the leftover and copy */ | 1417 | /* return the leftover and copy */ |
1228 | for_each_possible_cpu(cpu) { | 1418 | for_each_possible_cpu(cpu) { |
1229 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; | 1419 | void *ptr = base + cpu * unit_size; |
1230 | 1420 | ||
1231 | free_bootmem(__pa(ptr + pcpue_size), | 1421 | free_bootmem(__pa(ptr + size_sum), unit_size - size_sum); |
1232 | pcpue_unit_size - pcpue_size); | ||
1233 | memcpy(ptr, __per_cpu_load, static_size); | 1422 | memcpy(ptr, __per_cpu_load, static_size); |
1234 | } | 1423 | } |
1235 | 1424 | ||
1236 | /* we're ready, commit */ | 1425 | /* we're ready, commit */ |
1237 | pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", | 1426 | pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", |
1238 | pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); | 1427 | size_sum >> PAGE_SHIFT, base, static_size); |
1239 | 1428 | ||
1240 | return pcpu_setup_first_chunk(pcpue_get_page, static_size, | 1429 | return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, |
1241 | reserved_size, dyn_size, | 1430 | unit_size, base); |
1242 | pcpue_unit_size, pcpue_ptr); | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | * 4k page first chunk setup helper. | ||
1247 | */ | ||
1248 | static struct page **pcpu4k_pages __initdata; | ||
1249 | static int pcpu4k_unit_pages __initdata; | ||
1250 | |||
1251 | static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) | ||
1252 | { | ||
1253 | if (pageno < pcpu4k_unit_pages) | ||
1254 | return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno]; | ||
1255 | return NULL; | ||
1256 | } | 1431 | } |
1257 | 1432 | ||
1258 | /** | 1433 | /** |
@@ -1279,23 +1454,25 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, | |||
1279 | pcpu_fc_populate_pte_fn_t populate_pte_fn) | 1454 | pcpu_fc_populate_pte_fn_t populate_pte_fn) |
1280 | { | 1455 | { |
1281 | static struct vm_struct vm; | 1456 | static struct vm_struct vm; |
1457 | int unit_pages; | ||
1282 | size_t pages_size; | 1458 | size_t pages_size; |
1459 | struct page **pages; | ||
1283 | unsigned int cpu; | 1460 | unsigned int cpu; |
1284 | int i, j; | 1461 | int i, j; |
1285 | ssize_t ret; | 1462 | ssize_t ret; |
1286 | 1463 | ||
1287 | pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, | 1464 | unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, |
1288 | PCPU_MIN_UNIT_SIZE)); | 1465 | PCPU_MIN_UNIT_SIZE)); |
1289 | 1466 | ||
1290 | /* unaligned allocations can't be freed, round up to page size */ | 1467 | /* unaligned allocations can't be freed, round up to page size */ |
1291 | pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() * | 1468 | pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * |
1292 | sizeof(pcpu4k_pages[0])); | 1469 | sizeof(pages[0])); |
1293 | pcpu4k_pages = alloc_bootmem(pages_size); | 1470 | pages = alloc_bootmem(pages_size); |
1294 | 1471 | ||
1295 | /* allocate pages */ | 1472 | /* allocate pages */ |
1296 | j = 0; | 1473 | j = 0; |
1297 | for_each_possible_cpu(cpu) | 1474 | for_each_possible_cpu(cpu) |
1298 | for (i = 0; i < pcpu4k_unit_pages; i++) { | 1475 | for (i = 0; i < unit_pages; i++) { |
1299 | void *ptr; | 1476 | void *ptr; |
1300 | 1477 | ||
1301 | ptr = alloc_fn(cpu, PAGE_SIZE); | 1478 | ptr = alloc_fn(cpu, PAGE_SIZE); |
@@ -1304,25 +1481,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, | |||
1304 | "4k page for cpu%u\n", cpu); | 1481 | "4k page for cpu%u\n", cpu); |
1305 | goto enomem; | 1482 | goto enomem; |
1306 | } | 1483 | } |
1307 | pcpu4k_pages[j++] = virt_to_page(ptr); | 1484 | pages[j++] = virt_to_page(ptr); |
1308 | } | 1485 | } |
1309 | 1486 | ||
1310 | /* allocate vm area, map the pages and copy static data */ | 1487 | /* allocate vm area, map the pages and copy static data */ |
1311 | vm.flags = VM_ALLOC; | 1488 | vm.flags = VM_ALLOC; |
1312 | vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT; | 1489 | vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT; |
1313 | vm_area_register_early(&vm, PAGE_SIZE); | 1490 | vm_area_register_early(&vm, PAGE_SIZE); |
1314 | 1491 | ||
1315 | for_each_possible_cpu(cpu) { | 1492 | for_each_possible_cpu(cpu) { |
1316 | unsigned long unit_addr = (unsigned long)vm.addr + | 1493 | unsigned long unit_addr = (unsigned long)vm.addr + |
1317 | (cpu * pcpu4k_unit_pages << PAGE_SHIFT); | 1494 | (cpu * unit_pages << PAGE_SHIFT); |
1318 | 1495 | ||
1319 | for (i = 0; i < pcpu4k_unit_pages; i++) | 1496 | for (i = 0; i < unit_pages; i++) |
1320 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); | 1497 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); |
1321 | 1498 | ||
1322 | /* pte already populated, the following shouldn't fail */ | 1499 | /* pte already populated, the following shouldn't fail */ |
1323 | ret = __pcpu_map_pages(unit_addr, | 1500 | ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], |
1324 | &pcpu4k_pages[cpu * pcpu4k_unit_pages], | 1501 | unit_pages); |
1325 | pcpu4k_unit_pages); | ||
1326 | if (ret < 0) | 1502 | if (ret < 0) |
1327 | panic("failed to map percpu area, err=%zd\n", ret); | 1503 | panic("failed to map percpu area, err=%zd\n", ret); |
1328 | 1504 | ||
@@ -1340,19 +1516,18 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, | |||
1340 | 1516 | ||
1341 | /* we're ready, commit */ | 1517 | /* we're ready, commit */ |
1342 | pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n", | 1518 | pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n", |
1343 | pcpu4k_unit_pages, static_size); | 1519 | unit_pages, static_size); |
1344 | 1520 | ||
1345 | ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, | 1521 | ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, |
1346 | reserved_size, -1, | 1522 | unit_pages << PAGE_SHIFT, vm.addr); |
1347 | pcpu4k_unit_pages << PAGE_SHIFT, vm.addr); | ||
1348 | goto out_free_ar; | 1523 | goto out_free_ar; |
1349 | 1524 | ||
1350 | enomem: | 1525 | enomem: |
1351 | while (--j >= 0) | 1526 | while (--j >= 0) |
1352 | free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE); | 1527 | free_fn(page_address(pages[j]), PAGE_SIZE); |
1353 | ret = -ENOMEM; | 1528 | ret = -ENOMEM; |
1354 | out_free_ar: | 1529 | out_free_ar: |
1355 | free_bootmem(__pa(pcpu4k_pages), pages_size); | 1530 | free_bootmem(__pa(pages), pages_size); |
1356 | return ret; | 1531 | return ret; |
1357 | } | 1532 | } |
1358 | 1533 | ||
@@ -1370,16 +1545,6 @@ static size_t pcpul_unit_size; | |||
1370 | static struct pcpul_ent *pcpul_map; | 1545 | static struct pcpul_ent *pcpul_map; |
1371 | static struct vm_struct pcpul_vm; | 1546 | static struct vm_struct pcpul_vm; |
1372 | 1547 | ||
1373 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
1374 | { | ||
1375 | size_t off = (size_t)pageno << PAGE_SHIFT; | ||
1376 | |||
1377 | if (off >= pcpul_size) | ||
1378 | return NULL; | ||
1379 | |||
1380 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
1381 | } | ||
1382 | |||
1383 | /** | 1548 | /** |
1384 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | 1549 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page |
1385 | * @static_size: the size of static percpu area in bytes | 1550 | * @static_size: the size of static percpu area in bytes |
@@ -1475,9 +1640,8 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, | |||
1475 | pr_info("PERCPU: Remapped at %p with large pages, static data " | 1640 | pr_info("PERCPU: Remapped at %p with large pages, static data " |
1476 | "%zu bytes\n", pcpul_vm.addr, static_size); | 1641 | "%zu bytes\n", pcpul_vm.addr, static_size); |
1477 | 1642 | ||
1478 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | 1643 | ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, |
1479 | reserved_size, dyn_size, pcpul_unit_size, | 1644 | pcpul_unit_size, pcpul_vm.addr); |
1480 | pcpul_vm.addr); | ||
1481 | 1645 | ||
1482 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | 1646 | /* sort pcpul_map array for pcpu_lpage_remapped() */ |
1483 | for (i = 0; i < num_possible_cpus() - 1; i++) | 1647 | for (i = 0; i < num_possible_cpus() - 1; i++) |