diff options
author | Tejun Heo <tj@kernel.org> | 2009-02-23 21:57:21 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-02-23 21:57:21 -0500 |
commit | 8d408b4be37bc49c9086531f2ebe411cf5731746 (patch) | |
tree | 559a532a04b24dd164ec2c72ab545b30a5a604ef /mm/percpu.c | |
parent | d9b55eeb1d55ef2dc5a4fdbff9604c2c68cb5649 (diff) |
percpu: give more latitude to arch specific first chunk initialization
Impact: more latitude for first percpu chunk allocation
The first percpu chunk serves the kernel static percpu area and may or
may not contain extra room for further dynamic allocation.
Initialization of the first chunk needs to be done before normal
memory allocation service is up, so it has its own init path -
pcpu_setup_static().
It seems archs need more latitude while initializing the first chunk
for example to take advantage of large page mapping. This patch makes
the following changes to allow this.
* Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space
to reserve in the first chunk for further dynamic allocation.
* Rename pcpu_setup_static() to pcpu_setup_first_chunk().
* Make pcpu_setup_first_chunk() much more flexible by fetching page
pointer by callback and adding optional @unit_size, @free_size and
@base_addr arguments which allow archs to selectively part of chunk
initialization to their likings.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 149 |
1 files changed, 116 insertions, 33 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index d9e6e5d1dbd..9ac01980cce 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -48,8 +48,8 @@ | |||
48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate | 48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate |
49 | * regular address to percpu pointer and back | 49 | * regular address to percpu pointer and back |
50 | * | 50 | * |
51 | * - use pcpu_setup_static() during percpu area initialization to | 51 | * - use pcpu_setup_first_chunk() during percpu area initialization to |
52 | * setup kernel static percpu area | 52 | * setup the first chunk containing the kernel static percpu area |
53 | */ | 53 | */ |
54 | 54 | ||
55 | #include <linux/bitmap.h> | 55 | #include <linux/bitmap.h> |
@@ -67,7 +67,6 @@ | |||
67 | #include <asm/cacheflush.h> | 67 | #include <asm/cacheflush.h> |
68 | #include <asm/tlbflush.h> | 68 | #include <asm/tlbflush.h> |
69 | 69 | ||
70 | #define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */ | ||
71 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 70 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
72 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 71 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
73 | 72 | ||
@@ -80,6 +79,7 @@ struct pcpu_chunk { | |||
80 | int map_used; /* # of map entries used */ | 79 | int map_used; /* # of map entries used */ |
81 | int map_alloc; /* # of map entries allocated */ | 80 | int map_alloc; /* # of map entries allocated */ |
82 | int *map; /* allocation map */ | 81 | int *map; /* allocation map */ |
82 | bool immutable; /* no [de]population allowed */ | ||
83 | struct page *page[]; /* #cpus * UNIT_PAGES */ | 83 | struct page *page[]; /* #cpus * UNIT_PAGES */ |
84 | }; | 84 | }; |
85 | 85 | ||
@@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | |||
521 | unsigned int last = num_possible_cpus() - 1; | 521 | unsigned int last = num_possible_cpus() - 1; |
522 | unsigned int cpu; | 522 | unsigned int cpu; |
523 | 523 | ||
524 | /* unmap must not be done on immutable chunk */ | ||
525 | WARN_ON(chunk->immutable); | ||
526 | |||
524 | /* | 527 | /* |
525 | * Each flushing trial can be very expensive, issue flush on | 528 | * Each flushing trial can be very expensive, issue flush on |
526 | * the whole region at once rather than doing it for each cpu. | 529 | * the whole region at once rather than doing it for each cpu. |
@@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | |||
602 | unsigned int cpu; | 605 | unsigned int cpu; |
603 | int err; | 606 | int err; |
604 | 607 | ||
608 | /* map must not be done on immutable chunk */ | ||
609 | WARN_ON(chunk->immutable); | ||
610 | |||
605 | for_each_possible_cpu(cpu) { | 611 | for_each_possible_cpu(cpu) { |
606 | err = map_kernel_range_noflush( | 612 | err = map_kernel_range_noflush( |
607 | pcpu_chunk_addr(chunk, cpu, page_start), | 613 | pcpu_chunk_addr(chunk, cpu, page_start), |
@@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align) | |||
727 | struct pcpu_chunk *chunk; | 733 | struct pcpu_chunk *chunk; |
728 | int slot, off; | 734 | int slot, off; |
729 | 735 | ||
730 | if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || | 736 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
731 | align > PAGE_SIZE)) { | ||
732 | WARN(true, "illegal size (%zu) or align (%zu) for " | 737 | WARN(true, "illegal size (%zu) or align (%zu) for " |
733 | "percpu allocation\n", size, align); | 738 | "percpu allocation\n", size, align); |
734 | return NULL; | 739 | return NULL; |
@@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); | |||
776 | 781 | ||
777 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) | 782 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) |
778 | { | 783 | { |
784 | WARN_ON(chunk->immutable); | ||
779 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); | 785 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); |
780 | list_del(&chunk->list); | 786 | list_del(&chunk->list); |
781 | rb_erase(&chunk->rb_node, &pcpu_addr_root); | 787 | rb_erase(&chunk->rb_node, &pcpu_addr_root); |
@@ -821,33 +827,73 @@ void free_percpu(void *ptr) | |||
821 | EXPORT_SYMBOL_GPL(free_percpu); | 827 | EXPORT_SYMBOL_GPL(free_percpu); |
822 | 828 | ||
823 | /** | 829 | /** |
824 | * pcpu_setup_static - initialize kernel static percpu area | 830 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
825 | * @populate_pte_fn: callback to allocate pagetable | 831 | * @get_page_fn: callback to fetch page pointer |
826 | * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages | 832 | * @static_size: the size of static percpu area in bytes |
827 | * @cpu_size: the size of static percpu area in bytes | 833 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto |
828 | * | 834 | * @free_size: free size in bytes, 0 for auto |
829 | * Initialize kernel static percpu area. The caller should allocate | 835 | * @base_addr: mapped address, NULL for auto |
830 | * all the necessary pages and pass them in @pages. | 836 | * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary |
831 | * @populate_pte_fn() is called on each page to be used for percpu | 837 | * |
832 | * mapping and is responsible for making sure all the necessary page | 838 | * Initialize the first percpu chunk which contains the kernel static |
833 | * tables for the page is allocated. | 839 | * perpcu area. This function is to be called from arch percpu area |
840 | * setup path. The first two parameters are mandatory. The rest are | ||
841 | * optional. | ||
842 | * | ||
843 | * @get_page_fn() should return pointer to percpu page given cpu | ||
844 | * number and page number. It should at least return enough pages to | ||
845 | * cover the static area. The returned pages for static area should | ||
846 | * have been initialized with valid data. If @unit_size is specified, | ||
847 | * it can also return pages after the static area. NULL return | ||
848 | * indicates end of pages for the cpu. Note that @get_page_fn() must | ||
849 | * return the same number of pages for all cpus. | ||
850 | * | ||
851 | * @unit_size, if non-zero, determines unit size and must be aligned | ||
852 | * to PAGE_SIZE and equal to or larger than @static_size + @free_size. | ||
853 | * | ||
854 | * @free_size determines the number of free bytes after the static | ||
855 | * area in the first chunk. If zero, whatever left is available. | ||
856 | * Specifying non-zero value make percpu leave the area after | ||
857 | * @static_size + @free_size alone. | ||
858 | * | ||
859 | * Non-null @base_addr means that the caller already allocated virtual | ||
860 | * region for the first chunk and mapped it. percpu must not mess | ||
861 | * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL | ||
862 | * @populate_pte_fn doesn't make any sense. | ||
863 | * | ||
864 | * @populate_pte_fn is used to populate the pagetable. NULL means the | ||
865 | * caller already populated the pagetable. | ||
834 | * | 866 | * |
835 | * RETURNS: | 867 | * RETURNS: |
836 | * The determined pcpu_unit_size which can be used to initialize | 868 | * The determined pcpu_unit_size which can be used to initialize |
837 | * percpu access. | 869 | * percpu access. |
838 | */ | 870 | */ |
839 | size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, | 871 | size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, |
840 | struct page **pages, size_t cpu_size) | 872 | size_t static_size, size_t unit_size, |
873 | size_t free_size, void *base_addr, | ||
874 | pcpu_populate_pte_fn_t populate_pte_fn) | ||
841 | { | 875 | { |
842 | static struct vm_struct static_vm; | 876 | static struct vm_struct static_vm; |
843 | struct pcpu_chunk *static_chunk; | 877 | struct pcpu_chunk *static_chunk; |
844 | int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE); | ||
845 | unsigned int cpu; | 878 | unsigned int cpu; |
879 | int nr_pages; | ||
846 | int err, i; | 880 | int err, i; |
847 | 881 | ||
848 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); | 882 | /* santiy checks */ |
883 | BUG_ON(!static_size); | ||
884 | BUG_ON(!unit_size && free_size); | ||
885 | BUG_ON(unit_size && unit_size < static_size + free_size); | ||
886 | BUG_ON(unit_size & ~PAGE_MASK); | ||
887 | BUG_ON(base_addr && !unit_size); | ||
888 | BUG_ON(base_addr && populate_pte_fn); | ||
849 | 889 | ||
850 | pcpu_static_size = cpu_size; | 890 | if (unit_size) |
891 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | ||
892 | else | ||
893 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, | ||
894 | PFN_UP(static_size)); | ||
895 | |||
896 | pcpu_static_size = static_size; | ||
851 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 897 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
852 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 898 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; |
853 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 899 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) |
@@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, | |||
862 | for (i = 0; i < pcpu_nr_slots; i++) | 908 | for (i = 0; i < pcpu_nr_slots; i++) |
863 | INIT_LIST_HEAD(&pcpu_slot[i]); | 909 | INIT_LIST_HEAD(&pcpu_slot[i]); |
864 | 910 | ||
865 | /* init and register vm area */ | ||
866 | static_vm.flags = VM_ALLOC; | ||
867 | static_vm.size = pcpu_chunk_size; | ||
868 | vm_area_register_early(&static_vm, PAGE_SIZE); | ||
869 | |||
870 | /* init static_chunk */ | 911 | /* init static_chunk */ |
871 | static_chunk = alloc_bootmem(pcpu_chunk_struct_size); | 912 | static_chunk = alloc_bootmem(pcpu_chunk_struct_size); |
872 | INIT_LIST_HEAD(&static_chunk->list); | 913 | INIT_LIST_HEAD(&static_chunk->list); |
873 | static_chunk->vm = &static_vm; | 914 | static_chunk->vm = &static_vm; |
874 | static_chunk->free_size = pcpu_unit_size - pcpu_static_size; | 915 | |
916 | if (free_size) | ||
917 | static_chunk->free_size = free_size; | ||
918 | else | ||
919 | static_chunk->free_size = pcpu_unit_size - pcpu_static_size; | ||
920 | |||
875 | static_chunk->contig_hint = static_chunk->free_size; | 921 | static_chunk->contig_hint = static_chunk->free_size; |
876 | 922 | ||
877 | /* assign pages and map them */ | 923 | /* allocate vm address */ |
924 | static_vm.flags = VM_ALLOC; | ||
925 | static_vm.size = pcpu_chunk_size; | ||
926 | |||
927 | if (!base_addr) | ||
928 | vm_area_register_early(&static_vm, PAGE_SIZE); | ||
929 | else { | ||
930 | /* | ||
931 | * Pages already mapped. No need to remap into | ||
932 | * vmalloc area. In this case the static chunk can't | ||
933 | * be mapped or unmapped by percpu and is marked | ||
934 | * immutable. | ||
935 | */ | ||
936 | static_vm.addr = base_addr; | ||
937 | static_chunk->immutable = true; | ||
938 | } | ||
939 | |||
940 | /* assign pages */ | ||
941 | nr_pages = -1; | ||
878 | for_each_possible_cpu(cpu) { | 942 | for_each_possible_cpu(cpu) { |
879 | for (i = 0; i < nr_cpu_pages; i++) { | 943 | for (i = 0; i < pcpu_unit_pages; i++) { |
880 | *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; | 944 | struct page *page = get_page_fn(cpu, i); |
881 | populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); | 945 | |
946 | if (!page) | ||
947 | break; | ||
948 | *pcpu_chunk_pagep(static_chunk, cpu, i) = page; | ||
882 | } | 949 | } |
950 | |||
951 | BUG_ON(i < PFN_UP(pcpu_static_size)); | ||
952 | |||
953 | if (nr_pages < 0) | ||
954 | nr_pages = i; | ||
955 | else | ||
956 | BUG_ON(nr_pages != i); | ||
883 | } | 957 | } |
884 | 958 | ||
885 | err = pcpu_map(static_chunk, 0, nr_cpu_pages); | 959 | /* map them */ |
886 | if (err) | 960 | if (populate_pte_fn) { |
887 | panic("failed to setup static percpu area, err=%d\n", err); | 961 | for_each_possible_cpu(cpu) |
962 | for (i = 0; i < nr_pages; i++) | ||
963 | populate_pte_fn(pcpu_chunk_addr(static_chunk, | ||
964 | cpu, i)); | ||
965 | |||
966 | err = pcpu_map(static_chunk, 0, nr_pages); | ||
967 | if (err) | ||
968 | panic("failed to setup static percpu area, err=%d\n", | ||
969 | err); | ||
970 | } | ||
888 | 971 | ||
889 | /* link static_chunk in */ | 972 | /* link static_chunk in */ |
890 | pcpu_chunk_relocate(static_chunk, -1); | 973 | pcpu_chunk_relocate(static_chunk, -1); |