aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--include/linux/percpu.h10
-rw-r--r--kernel/module.c2
-rw-r--r--mm/percpu.c153
4 files changed, 144 insertions, 29 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 38e2b2a470a5..dd4eabc747c8 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -217,7 +217,7 @@ proceed:
217 pr_info("PERCPU: Remapped at %p with large pages, static data " 217 pr_info("PERCPU: Remapped at %p with large pages, static data "
218 "%zu bytes\n", vm.addr, static_size); 218 "%zu bytes\n", vm.addr, static_size);
219 219
220 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, 220 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE,
221 pcpur_size - static_size, vm.addr, NULL); 221 pcpur_size - static_size, vm.addr, NULL);
222 goto out_free_ar; 222 goto out_free_ar;
223 223
@@ -297,7 +297,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
297 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", 297 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
298 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); 298 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
299 299
300 return pcpu_setup_first_chunk(pcpue_get_page, static_size, 300 return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0,
301 pcpue_unit_size, dyn_size, 301 pcpue_unit_size, dyn_size,
302 pcpue_ptr, NULL); 302 pcpue_ptr, NULL);
303} 303}
@@ -356,8 +356,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
356 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", 356 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
357 pcpu4k_nr_static_pages, static_size); 357 pcpu4k_nr_static_pages, static_size);
358 358
359 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, 359 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1,
360 pcpu4k_populate_pte); 360 NULL, pcpu4k_populate_pte);
361 goto out_free_ar; 361 goto out_free_ar;
362 362
363enomem: 363enomem:
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index a96fc53bbd62..8ff15153ae20 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
117typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); 117typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
118 118
119extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 119extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
120 size_t static_size, 120 size_t static_size, size_t reserved_size,
121 ssize_t unit_size, ssize_t dyn_size, 121 ssize_t unit_size, ssize_t dyn_size,
122 void *base_addr, 122 void *base_addr,
123 pcpu_populate_pte_fn_t populate_pte_fn); 123 pcpu_populate_pte_fn_t populate_pte_fn);
124 124
125/* 125/*
126 * Use this to get to a cpu's version of the per-cpu object 126 * Use this to get to a cpu's version of the per-cpu object
@@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
129 */ 129 */
130#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) 130#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
131 131
132extern void *__alloc_reserved_percpu(size_t size, size_t align);
133
132#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ 134#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
133 135
134struct percpu_data { 136struct percpu_data {
diff --git a/kernel/module.c b/kernel/module.c
index 1f0657ae555b..f0e04d6b67d8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -381,7 +381,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
381 align = PAGE_SIZE; 381 align = PAGE_SIZE;
382 } 382 }
383 383
384 ptr = __alloc_percpu(size, align); 384 ptr = __alloc_reserved_percpu(size, align);
385 if (!ptr) 385 if (!ptr)
386 printk(KERN_WARNING 386 printk(KERN_WARNING
387 "Could not allocate %lu bytes percpu data\n", size); 387 "Could not allocate %lu bytes percpu data\n", size);
diff --git a/mm/percpu.c b/mm/percpu.c
index 5b47d9fe65f5..ef8e169b7731 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -94,6 +94,11 @@ static size_t pcpu_chunk_struct_size __read_mostly;
94void *pcpu_base_addr __read_mostly; 94void *pcpu_base_addr __read_mostly;
95EXPORT_SYMBOL_GPL(pcpu_base_addr); 95EXPORT_SYMBOL_GPL(pcpu_base_addr);
96 96
97/* optional reserved chunk, only accessible for reserved allocations */
98static struct pcpu_chunk *pcpu_reserved_chunk;
99/* offset limit of the reserved chunk */
100static int pcpu_reserved_chunk_limit;
101
97/* 102/*
98 * One mutex to rule them all. 103 * One mutex to rule them all.
99 * 104 *
@@ -201,13 +206,14 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size)
201 * 206 *
202 * This function is called after an allocation or free changed @chunk. 207 * This function is called after an allocation or free changed @chunk.
203 * New slot according to the changed state is determined and @chunk is 208 * New slot according to the changed state is determined and @chunk is
204 * moved to the slot. 209 * moved to the slot. Note that the reserved chunk is never put on
210 * chunk slots.
205 */ 211 */
206static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) 212static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
207{ 213{
208 int nslot = pcpu_chunk_slot(chunk); 214 int nslot = pcpu_chunk_slot(chunk);
209 215
210 if (oslot != nslot) { 216 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
211 if (oslot < nslot) 217 if (oslot < nslot)
212 list_move(&chunk->list, &pcpu_slot[nslot]); 218 list_move(&chunk->list, &pcpu_slot[nslot]);
213 else 219 else
@@ -255,6 +261,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
255 struct rb_node *n, *parent; 261 struct rb_node *n, *parent;
256 struct pcpu_chunk *chunk; 262 struct pcpu_chunk *chunk;
257 263
264 /* is it in the reserved chunk? */
265 if (pcpu_reserved_chunk) {
266 void *start = pcpu_reserved_chunk->vm->addr;
267
268 if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
269 return pcpu_reserved_chunk;
270 }
271
272 /* nah... search the regular ones */
258 n = *pcpu_chunk_rb_search(addr, &parent); 273 n = *pcpu_chunk_rb_search(addr, &parent);
259 if (!n) { 274 if (!n) {
260 /* no exactly matching chunk, the parent is the closest */ 275 /* no exactly matching chunk, the parent is the closest */
@@ -713,9 +728,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
713} 728}
714 729
715/** 730/**
716 * __alloc_percpu - allocate percpu area 731 * pcpu_alloc - the percpu allocator
717 * @size: size of area to allocate in bytes 732 * @size: size of area to allocate in bytes
718 * @align: alignment of area (max PAGE_SIZE) 733 * @align: alignment of area (max PAGE_SIZE)
734 * @reserved: allocate from the reserved chunk if available
719 * 735 *
720 * Allocate percpu area of @size bytes aligned at @align. Might 736 * Allocate percpu area of @size bytes aligned at @align. Might
721 * sleep. Might trigger writeouts. 737 * sleep. Might trigger writeouts.
@@ -723,7 +739,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
723 * RETURNS: 739 * RETURNS:
724 * Percpu pointer to the allocated area on success, NULL on failure. 740 * Percpu pointer to the allocated area on success, NULL on failure.
725 */ 741 */
726void *__alloc_percpu(size_t size, size_t align) 742static void *pcpu_alloc(size_t size, size_t align, bool reserved)
727{ 743{
728 void *ptr = NULL; 744 void *ptr = NULL;
729 struct pcpu_chunk *chunk; 745 struct pcpu_chunk *chunk;
@@ -737,7 +753,18 @@ void *__alloc_percpu(size_t size, size_t align)
737 753
738 mutex_lock(&pcpu_mutex); 754 mutex_lock(&pcpu_mutex);
739 755
740 /* allocate area */ 756 /* serve reserved allocations from the reserved chunk if available */
757 if (reserved && pcpu_reserved_chunk) {
758 chunk = pcpu_reserved_chunk;
759 if (size > chunk->contig_hint)
760 goto out_unlock;
761 off = pcpu_alloc_area(chunk, size, align);
762 if (off >= 0)
763 goto area_found;
764 goto out_unlock;
765 }
766
767 /* search through normal chunks */
741 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { 768 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
742 list_for_each_entry(chunk, &pcpu_slot[slot], list) { 769 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
743 if (size > chunk->contig_hint) 770 if (size > chunk->contig_hint)
@@ -773,8 +800,41 @@ out_unlock:
773 mutex_unlock(&pcpu_mutex); 800 mutex_unlock(&pcpu_mutex);
774 return ptr; 801 return ptr;
775} 802}
803
804/**
805 * __alloc_percpu - allocate dynamic percpu area
806 * @size: size of area to allocate in bytes
807 * @align: alignment of area (max PAGE_SIZE)
808 *
809 * Allocate percpu area of @size bytes aligned at @align. Might
810 * sleep. Might trigger writeouts.
811 *
812 * RETURNS:
813 * Percpu pointer to the allocated area on success, NULL on failure.
814 */
815void *__alloc_percpu(size_t size, size_t align)
816{
817 return pcpu_alloc(size, align, false);
818}
776EXPORT_SYMBOL_GPL(__alloc_percpu); 819EXPORT_SYMBOL_GPL(__alloc_percpu);
777 820
821/**
822 * __alloc_reserved_percpu - allocate reserved percpu area
823 * @size: size of area to allocate in bytes
824 * @align: alignment of area (max PAGE_SIZE)
825 *
826 * Allocate percpu area of @size bytes aligned at @align from reserved
827 * percpu area if arch has set it up; otherwise, allocation is served
828 * from the same dynamic area. Might sleep. Might trigger writeouts.
829 *
830 * RETURNS:
831 * Percpu pointer to the allocated area on success, NULL on failure.
832 */
833void *__alloc_reserved_percpu(size_t size, size_t align)
834{
835 return pcpu_alloc(size, align, true);
836}
837
778static void pcpu_kill_chunk(struct pcpu_chunk *chunk) 838static void pcpu_kill_chunk(struct pcpu_chunk *chunk)
779{ 839{
780 WARN_ON(chunk->immutable); 840 WARN_ON(chunk->immutable);
@@ -826,6 +886,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
826 * pcpu_setup_first_chunk - initialize the first percpu chunk 886 * pcpu_setup_first_chunk - initialize the first percpu chunk
827 * @get_page_fn: callback to fetch page pointer 887 * @get_page_fn: callback to fetch page pointer
828 * @static_size: the size of static percpu area in bytes 888 * @static_size: the size of static percpu area in bytes
889 * @reserved_size: the size of reserved percpu area in bytes
829 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 890 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
830 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 891 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
831 * @base_addr: mapped address, NULL for auto 892 * @base_addr: mapped address, NULL for auto
@@ -844,14 +905,22 @@ EXPORT_SYMBOL_GPL(free_percpu);
844 * indicates end of pages for the cpu. Note that @get_page_fn() must 905 * indicates end of pages for the cpu. Note that @get_page_fn() must
845 * return the same number of pages for all cpus. 906 * return the same number of pages for all cpus.
846 * 907 *
908 * @reserved_size, if non-zero, specifies the amount of bytes to
909 * reserve after the static area in the first chunk. This reserves
910 * the first chunk such that it's available only through reserved
911 * percpu allocation. This is primarily used to serve module percpu
912 * static areas on architectures where the addressing model has
913 * limited offset range for symbol relocations to guarantee module
914 * percpu symbols fall inside the relocatable range.
915 *
847 * @unit_size, if non-negative, specifies unit size and must be 916 * @unit_size, if non-negative, specifies unit size and must be
848 * aligned to PAGE_SIZE and equal to or larger than @static_size + 917 * aligned to PAGE_SIZE and equal to or larger than @static_size +
849 * @dyn_size. 918 * @reserved_size + @dyn_size.
850 * 919 *
851 * @dyn_size, if non-negative, limits the number of bytes available 920 * @dyn_size, if non-negative, limits the number of bytes available
852 * for dynamic allocation in the first chunk. Specifying non-negative 921 * for dynamic allocation in the first chunk. Specifying non-negative
853 * value make percpu leave alone the area beyond @static_size + 922 * value make percpu leave alone the area beyond @static_size +
854 * @dyn_size. 923 * @reserved_size + @dyn_size.
855 * 924 *
856 * Non-null @base_addr means that the caller already allocated virtual 925 * Non-null @base_addr means that the caller already allocated virtual
857 * region for the first chunk and mapped it. percpu must not mess 926 * region for the first chunk and mapped it. percpu must not mess
@@ -861,28 +930,36 @@ EXPORT_SYMBOL_GPL(free_percpu);
861 * @populate_pte_fn is used to populate the pagetable. NULL means the 930 * @populate_pte_fn is used to populate the pagetable. NULL means the
862 * caller already populated the pagetable. 931 * caller already populated the pagetable.
863 * 932 *
933 * If the first chunk ends up with both reserved and dynamic areas, it
934 * is served by two chunks - one to serve the core static and reserved
935 * areas and the other for the dynamic area. They share the same vm
936 * and page map but uses different area allocation map to stay away
937 * from each other. The latter chunk is circulated in the chunk slots
938 * and available for dynamic allocation like any other chunks.
939 *
864 * RETURNS: 940 * RETURNS:
865 * The determined pcpu_unit_size which can be used to initialize 941 * The determined pcpu_unit_size which can be used to initialize
866 * percpu access. 942 * percpu access.
867 */ 943 */
868size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 944size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
869 size_t static_size, 945 size_t static_size, size_t reserved_size,
870 ssize_t unit_size, ssize_t dyn_size, 946 ssize_t unit_size, ssize_t dyn_size,
871 void *base_addr, 947 void *base_addr,
872 pcpu_populate_pte_fn_t populate_pte_fn) 948 pcpu_populate_pte_fn_t populate_pte_fn)
873{ 949{
874 static struct vm_struct first_vm; 950 static struct vm_struct first_vm;
875 static int smap[2]; 951 static int smap[2], dmap[2];
876 struct pcpu_chunk *schunk; 952 struct pcpu_chunk *schunk, *dchunk = NULL;
877 unsigned int cpu; 953 unsigned int cpu;
878 int nr_pages; 954 int nr_pages;
879 int err, i; 955 int err, i;
880 956
881 /* santiy checks */ 957 /* santiy checks */
882 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); 958 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
959 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
883 BUG_ON(!static_size); 960 BUG_ON(!static_size);
884 if (unit_size >= 0) { 961 if (unit_size >= 0) {
885 BUG_ON(unit_size < static_size + 962 BUG_ON(unit_size < static_size + reserved_size +
886 (dyn_size >= 0 ? dyn_size : 0)); 963 (dyn_size >= 0 ? dyn_size : 0));
887 BUG_ON(unit_size & ~PAGE_MASK); 964 BUG_ON(unit_size & ~PAGE_MASK);
888 } else { 965 } else {
@@ -895,7 +972,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
895 pcpu_unit_pages = unit_size >> PAGE_SHIFT; 972 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
896 else 973 else
897 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, 974 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
898 PFN_UP(static_size)); 975 PFN_UP(static_size + reserved_size));
899 976
900 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 977 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
901 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 978 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
@@ -903,7 +980,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
903 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 980 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
904 981
905 if (dyn_size < 0) 982 if (dyn_size < 0)
906 dyn_size = pcpu_unit_size - static_size; 983 dyn_size = pcpu_unit_size - static_size - reserved_size;
907 984
908 /* 985 /*
909 * Allocate chunk slots. The additional last slot is for 986 * Allocate chunk slots. The additional last slot is for
@@ -914,20 +991,49 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
914 for (i = 0; i < pcpu_nr_slots; i++) 991 for (i = 0; i < pcpu_nr_slots; i++)
915 INIT_LIST_HEAD(&pcpu_slot[i]); 992 INIT_LIST_HEAD(&pcpu_slot[i]);
916 993
917 /* init static chunk */ 994 /*
995 * Initialize static chunk. If reserved_size is zero, the
996 * static chunk covers static area + dynamic allocation area
997 * in the first chunk. If reserved_size is not zero, it
998 * covers static area + reserved area (mostly used for module
999 * static percpu allocation).
1000 */
918 schunk = alloc_bootmem(pcpu_chunk_struct_size); 1001 schunk = alloc_bootmem(pcpu_chunk_struct_size);
919 INIT_LIST_HEAD(&schunk->list); 1002 INIT_LIST_HEAD(&schunk->list);
920 schunk->vm = &first_vm; 1003 schunk->vm = &first_vm;
921 schunk->map = smap; 1004 schunk->map = smap;
922 schunk->map_alloc = ARRAY_SIZE(smap); 1005 schunk->map_alloc = ARRAY_SIZE(smap);
923 schunk->page = schunk->page_ar; 1006 schunk->page = schunk->page_ar;
924 schunk->free_size = dyn_size; 1007
1008 if (reserved_size) {
1009 schunk->free_size = reserved_size;
1010 pcpu_reserved_chunk = schunk; /* not for dynamic alloc */
1011 } else {
1012 schunk->free_size = dyn_size;
1013 dyn_size = 0; /* dynamic area covered */
1014 }
925 schunk->contig_hint = schunk->free_size; 1015 schunk->contig_hint = schunk->free_size;
926 1016
927 schunk->map[schunk->map_used++] = -static_size; 1017 schunk->map[schunk->map_used++] = -static_size;
928 if (schunk->free_size) 1018 if (schunk->free_size)
929 schunk->map[schunk->map_used++] = schunk->free_size; 1019 schunk->map[schunk->map_used++] = schunk->free_size;
930 1020
1021 pcpu_reserved_chunk_limit = static_size + schunk->free_size;
1022
1023 /* init dynamic chunk if necessary */
1024 if (dyn_size) {
1025 dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
1026 INIT_LIST_HEAD(&dchunk->list);
1027 dchunk->vm = &first_vm;
1028 dchunk->map = dmap;
1029 dchunk->map_alloc = ARRAY_SIZE(dmap);
1030 dchunk->page = schunk->page_ar; /* share page map with schunk */
1031
1032 dchunk->contig_hint = dchunk->free_size = dyn_size;
1033 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1034 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1035 }
1036
931 /* allocate vm address */ 1037 /* allocate vm address */
932 first_vm.flags = VM_ALLOC; 1038 first_vm.flags = VM_ALLOC;
933 first_vm.size = pcpu_chunk_size; 1039 first_vm.size = pcpu_chunk_size;
@@ -937,12 +1043,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
937 else { 1043 else {
938 /* 1044 /*
939 * Pages already mapped. No need to remap into 1045 * Pages already mapped. No need to remap into
940 * vmalloc area. In this case the static chunk can't 1046 * vmalloc area. In this case the first chunks can't
941 * be mapped or unmapped by percpu and is marked 1047 * be mapped or unmapped by percpu and are marked
942 * immutable. 1048 * immutable.
943 */ 1049 */
944 first_vm.addr = base_addr; 1050 first_vm.addr = base_addr;
945 schunk->immutable = true; 1051 schunk->immutable = true;
1052 if (dchunk)
1053 dchunk->immutable = true;
946 } 1054 }
947 1055
948 /* assign pages */ 1056 /* assign pages */
@@ -978,8 +1086,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
978 } 1086 }
979 1087
980 /* link the first chunk in */ 1088 /* link the first chunk in */
981 pcpu_chunk_relocate(schunk, -1); 1089 if (!dchunk) {
982 pcpu_chunk_addr_insert(schunk); 1090 pcpu_chunk_relocate(schunk, -1);
1091 pcpu_chunk_addr_insert(schunk);
1092 } else {
1093 pcpu_chunk_relocate(dchunk, -1);
1094 pcpu_chunk_addr_insert(dchunk);
1095 }
983 1096
984 /* we're done */ 1097 /* we're done */
985 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); 1098 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);