diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 149 |
1 files changed, 116 insertions, 33 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index d9e6e5d1dbd4..9ac01980cce0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -48,8 +48,8 @@ | |||
48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate | 48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate |
49 | * regular address to percpu pointer and back | 49 | * regular address to percpu pointer and back |
50 | * | 50 | * |
51 | * - use pcpu_setup_static() during percpu area initialization to | 51 | * - use pcpu_setup_first_chunk() during percpu area initialization to |
52 | * setup kernel static percpu area | 52 | * setup the first chunk containing the kernel static percpu area |
53 | */ | 53 | */ |
54 | 54 | ||
55 | #include <linux/bitmap.h> | 55 | #include <linux/bitmap.h> |
@@ -67,7 +67,6 @@ | |||
67 | #include <asm/cacheflush.h> | 67 | #include <asm/cacheflush.h> |
68 | #include <asm/tlbflush.h> | 68 | #include <asm/tlbflush.h> |
69 | 69 | ||
70 | #define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */ | ||
71 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 70 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
72 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 71 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
73 | 72 | ||
@@ -80,6 +79,7 @@ struct pcpu_chunk { | |||
80 | int map_used; /* # of map entries used */ | 79 | int map_used; /* # of map entries used */ |
81 | int map_alloc; /* # of map entries allocated */ | 80 | int map_alloc; /* # of map entries allocated */ |
82 | int *map; /* allocation map */ | 81 | int *map; /* allocation map */ |
82 | bool immutable; /* no [de]population allowed */ | ||
83 | struct page *page[]; /* #cpus * UNIT_PAGES */ | 83 | struct page *page[]; /* #cpus * UNIT_PAGES */ |
84 | }; | 84 | }; |
85 | 85 | ||
@@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | |||
521 | unsigned int last = num_possible_cpus() - 1; | 521 | unsigned int last = num_possible_cpus() - 1; |
522 | unsigned int cpu; | 522 | unsigned int cpu; |
523 | 523 | ||
524 | /* unmap must not be done on immutable chunk */ | ||
525 | WARN_ON(chunk->immutable); | ||
526 | |||
524 | /* | 527 | /* |
525 | * Each flushing trial can be very expensive, issue flush on | 528 | * Each flushing trial can be very expensive, issue flush on |
526 | * the whole region at once rather than doing it for each cpu. | 529 | * the whole region at once rather than doing it for each cpu. |
@@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | |||
602 | unsigned int cpu; | 605 | unsigned int cpu; |
603 | int err; | 606 | int err; |
604 | 607 | ||
608 | /* map must not be done on immutable chunk */ | ||
609 | WARN_ON(chunk->immutable); | ||
610 | |||
605 | for_each_possible_cpu(cpu) { | 611 | for_each_possible_cpu(cpu) { |
606 | err = map_kernel_range_noflush( | 612 | err = map_kernel_range_noflush( |
607 | pcpu_chunk_addr(chunk, cpu, page_start), | 613 | pcpu_chunk_addr(chunk, cpu, page_start), |
@@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align) | |||
727 | struct pcpu_chunk *chunk; | 733 | struct pcpu_chunk *chunk; |
728 | int slot, off; | 734 | int slot, off; |
729 | 735 | ||
730 | if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || | 736 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
731 | align > PAGE_SIZE)) { | ||
732 | WARN(true, "illegal size (%zu) or align (%zu) for " | 737 | WARN(true, "illegal size (%zu) or align (%zu) for " |
733 | "percpu allocation\n", size, align); | 738 | "percpu allocation\n", size, align); |
734 | return NULL; | 739 | return NULL; |
@@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); | |||
776 | 781 | ||
777 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) | 782 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) |
778 | { | 783 | { |
784 | WARN_ON(chunk->immutable); | ||
779 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); | 785 | pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); |
780 | list_del(&chunk->list); | 786 | list_del(&chunk->list); |
781 | rb_erase(&chunk->rb_node, &pcpu_addr_root); | 787 | rb_erase(&chunk->rb_node, &pcpu_addr_root); |
@@ -821,33 +827,73 @@ void free_percpu(void *ptr) | |||
821 | EXPORT_SYMBOL_GPL(free_percpu); | 827 | EXPORT_SYMBOL_GPL(free_percpu); |
822 | 828 | ||
823 | /** | 829 | /** |
824 | * pcpu_setup_static - initialize kernel static percpu area | 830 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
825 | * @populate_pte_fn: callback to allocate pagetable | 831 | * @get_page_fn: callback to fetch page pointer |
826 | * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages | 832 | * @static_size: the size of static percpu area in bytes |
827 | * @cpu_size: the size of static percpu area in bytes | 833 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto |
828 | * | 834 | * @free_size: free size in bytes, 0 for auto |
829 | * Initialize kernel static percpu area. The caller should allocate | 835 | * @base_addr: mapped address, NULL for auto |
830 | * all the necessary pages and pass them in @pages. | 836 | * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary |
831 | * @populate_pte_fn() is called on each page to be used for percpu | 837 | * |
832 | * mapping and is responsible for making sure all the necessary page | 838 | * Initialize the first percpu chunk which contains the kernel static |
833 | * tables for the page is allocated. | 839 | * perpcu area. This function is to be called from arch percpu area |
840 | * setup path. The first two parameters are mandatory. The rest are | ||
841 | * optional. | ||
842 | * | ||
843 | * @get_page_fn() should return pointer to percpu page given cpu | ||
844 | * number and page number. It should at least return enough pages to | ||
845 | * cover the static area. The returned pages for static area should | ||
846 | * have been initialized with valid data. If @unit_size is specified, | ||
847 | * it can also return pages after the static area. NULL return | ||
848 | * indicates end of pages for the cpu. Note that @get_page_fn() must | ||
849 | * return the same number of pages for all cpus. | ||
850 | * | ||
851 | * @unit_size, if non-zero, determines unit size and must be aligned | ||
852 | * to PAGE_SIZE and equal to or larger than @static_size + @free_size. | ||
853 | * | ||
854 | * @free_size determines the number of free bytes after the static | ||
855 | * area in the first chunk. If zero, whatever left is available. | ||
856 | * Specifying non-zero value make percpu leave the area after | ||
857 | * @static_size + @free_size alone. | ||
858 | * | ||
859 | * Non-null @base_addr means that the caller already allocated virtual | ||
860 | * region for the first chunk and mapped it. percpu must not mess | ||
861 | * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL | ||
862 | * @populate_pte_fn doesn't make any sense. | ||
863 | * | ||
864 | * @populate_pte_fn is used to populate the pagetable. NULL means the | ||
865 | * caller already populated the pagetable. | ||
834 | * | 866 | * |
835 | * RETURNS: | 867 | * RETURNS: |
836 | * The determined pcpu_unit_size which can be used to initialize | 868 | * The determined pcpu_unit_size which can be used to initialize |
837 | * percpu access. | 869 | * percpu access. |
838 | */ | 870 | */ |
839 | size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, | 871 | size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, |
840 | struct page **pages, size_t cpu_size) | 872 | size_t static_size, size_t unit_size, |
873 | size_t free_size, void *base_addr, | ||
874 | pcpu_populate_pte_fn_t populate_pte_fn) | ||
841 | { | 875 | { |
842 | static struct vm_struct static_vm; | 876 | static struct vm_struct static_vm; |
843 | struct pcpu_chunk *static_chunk; | 877 | struct pcpu_chunk *static_chunk; |
844 | int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE); | ||
845 | unsigned int cpu; | 878 | unsigned int cpu; |
879 | int nr_pages; | ||
846 | int err, i; | 880 | int err, i; |
847 | 881 | ||
848 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); | 882 | /* santiy checks */ |
883 | BUG_ON(!static_size); | ||
884 | BUG_ON(!unit_size && free_size); | ||
885 | BUG_ON(unit_size && unit_size < static_size + free_size); | ||
886 | BUG_ON(unit_size & ~PAGE_MASK); | ||
887 | BUG_ON(base_addr && !unit_size); | ||
888 | BUG_ON(base_addr && populate_pte_fn); | ||
849 | 889 | ||
850 | pcpu_static_size = cpu_size; | 890 | if (unit_size) |
891 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | ||
892 | else | ||
893 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, | ||
894 | PFN_UP(static_size)); | ||
895 | |||
896 | pcpu_static_size = static_size; | ||
851 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 897 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
852 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 898 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; |
853 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 899 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) |
@@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, | |||
862 | for (i = 0; i < pcpu_nr_slots; i++) | 908 | for (i = 0; i < pcpu_nr_slots; i++) |
863 | INIT_LIST_HEAD(&pcpu_slot[i]); | 909 | INIT_LIST_HEAD(&pcpu_slot[i]); |
864 | 910 | ||
865 | /* init and register vm area */ | ||
866 | static_vm.flags = VM_ALLOC; | ||
867 | static_vm.size = pcpu_chunk_size; | ||
868 | vm_area_register_early(&static_vm, PAGE_SIZE); | ||
869 | |||
870 | /* init static_chunk */ | 911 | /* init static_chunk */ |
871 | static_chunk = alloc_bootmem(pcpu_chunk_struct_size); | 912 | static_chunk = alloc_bootmem(pcpu_chunk_struct_size); |
872 | INIT_LIST_HEAD(&static_chunk->list); | 913 | INIT_LIST_HEAD(&static_chunk->list); |
873 | static_chunk->vm = &static_vm; | 914 | static_chunk->vm = &static_vm; |
874 | static_chunk->free_size = pcpu_unit_size - pcpu_static_size; | 915 | |
916 | if (free_size) | ||
917 | static_chunk->free_size = free_size; | ||
918 | else | ||
919 | static_chunk->free_size = pcpu_unit_size - pcpu_static_size; | ||
920 | |||
875 | static_chunk->contig_hint = static_chunk->free_size; | 921 | static_chunk->contig_hint = static_chunk->free_size; |
876 | 922 | ||
877 | /* assign pages and map them */ | 923 | /* allocate vm address */ |
924 | static_vm.flags = VM_ALLOC; | ||
925 | static_vm.size = pcpu_chunk_size; | ||
926 | |||
927 | if (!base_addr) | ||
928 | vm_area_register_early(&static_vm, PAGE_SIZE); | ||
929 | else { | ||
930 | /* | ||
931 | * Pages already mapped. No need to remap into | ||
932 | * vmalloc area. In this case the static chunk can't | ||
933 | * be mapped or unmapped by percpu and is marked | ||
934 | * immutable. | ||
935 | */ | ||
936 | static_vm.addr = base_addr; | ||
937 | static_chunk->immutable = true; | ||
938 | } | ||
939 | |||
940 | /* assign pages */ | ||
941 | nr_pages = -1; | ||
878 | for_each_possible_cpu(cpu) { | 942 | for_each_possible_cpu(cpu) { |
879 | for (i = 0; i < nr_cpu_pages; i++) { | 943 | for (i = 0; i < pcpu_unit_pages; i++) { |
880 | *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; | 944 | struct page *page = get_page_fn(cpu, i); |
881 | populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); | 945 | |
946 | if (!page) | ||
947 | break; | ||
948 | *pcpu_chunk_pagep(static_chunk, cpu, i) = page; | ||
882 | } | 949 | } |
950 | |||
951 | BUG_ON(i < PFN_UP(pcpu_static_size)); | ||
952 | |||
953 | if (nr_pages < 0) | ||
954 | nr_pages = i; | ||
955 | else | ||
956 | BUG_ON(nr_pages != i); | ||
883 | } | 957 | } |
884 | 958 | ||
885 | err = pcpu_map(static_chunk, 0, nr_cpu_pages); | 959 | /* map them */ |
886 | if (err) | 960 | if (populate_pte_fn) { |
887 | panic("failed to setup static percpu area, err=%d\n", err); | 961 | for_each_possible_cpu(cpu) |
962 | for (i = 0; i < nr_pages; i++) | ||
963 | populate_pte_fn(pcpu_chunk_addr(static_chunk, | ||
964 | cpu, i)); | ||
965 | |||
966 | err = pcpu_map(static_chunk, 0, nr_pages); | ||
967 | if (err) | ||
968 | panic("failed to setup static percpu area, err=%d\n", | ||
969 | err); | ||
970 | } | ||
888 | 971 | ||
889 | /* link static_chunk in */ | 972 | /* link static_chunk in */ |
890 | pcpu_chunk_relocate(static_chunk, -1); | 973 | pcpu_chunk_relocate(static_chunk, -1); |