aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-03-06 00:33:59 -0500
committerTejun Heo <tj@kernel.org>2009-03-06 00:33:59 -0500
commitedcb463997ed7b2ffa3bac76e3e75957318f2e01 (patch)
treed1eefb78736404993b0de4a049392f9af578e29e /mm
parent3e24aa58907c62bc79d1094e941a374568f62522 (diff)
percpu, module: implement reserved allocation and use it for module percpu variables
Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu.c153
1 files changed, 133 insertions, 20 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 5b47d9fe65f5..ef8e169b7731 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -94,6 +94,11 @@ static size_t pcpu_chunk_struct_size __read_mostly;
94void *pcpu_base_addr __read_mostly; 94void *pcpu_base_addr __read_mostly;
95EXPORT_SYMBOL_GPL(pcpu_base_addr); 95EXPORT_SYMBOL_GPL(pcpu_base_addr);
96 96
97/* optional reserved chunk, only accessible for reserved allocations */
98static struct pcpu_chunk *pcpu_reserved_chunk;
99/* offset limit of the reserved chunk */
100static int pcpu_reserved_chunk_limit;
101
97/* 102/*
98 * One mutex to rule them all. 103 * One mutex to rule them all.
99 * 104 *
@@ -201,13 +206,14 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size)
201 * 206 *
202 * This function is called after an allocation or free changed @chunk. 207 * This function is called after an allocation or free changed @chunk.
203 * New slot according to the changed state is determined and @chunk is 208 * New slot according to the changed state is determined and @chunk is
204 * moved to the slot. 209 * moved to the slot. Note that the reserved chunk is never put on
210 * chunk slots.
205 */ 211 */
206static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) 212static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
207{ 213{
208 int nslot = pcpu_chunk_slot(chunk); 214 int nslot = pcpu_chunk_slot(chunk);
209 215
210 if (oslot != nslot) { 216 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
211 if (oslot < nslot) 217 if (oslot < nslot)
212 list_move(&chunk->list, &pcpu_slot[nslot]); 218 list_move(&chunk->list, &pcpu_slot[nslot]);
213 else 219 else
@@ -255,6 +261,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
255 struct rb_node *n, *parent; 261 struct rb_node *n, *parent;
256 struct pcpu_chunk *chunk; 262 struct pcpu_chunk *chunk;
257 263
264 /* is it in the reserved chunk? */
265 if (pcpu_reserved_chunk) {
266 void *start = pcpu_reserved_chunk->vm->addr;
267
268 if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
269 return pcpu_reserved_chunk;
270 }
271
272 /* nah... search the regular ones */
258 n = *pcpu_chunk_rb_search(addr, &parent); 273 n = *pcpu_chunk_rb_search(addr, &parent);
259 if (!n) { 274 if (!n) {
260 /* no exactly matching chunk, the parent is the closest */ 275 /* no exactly matching chunk, the parent is the closest */
@@ -713,9 +728,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
713} 728}
714 729
715/** 730/**
716 * __alloc_percpu - allocate percpu area 731 * pcpu_alloc - the percpu allocator
717 * @size: size of area to allocate in bytes 732 * @size: size of area to allocate in bytes
718 * @align: alignment of area (max PAGE_SIZE) 733 * @align: alignment of area (max PAGE_SIZE)
734 * @reserved: allocate from the reserved chunk if available
719 * 735 *
720 * Allocate percpu area of @size bytes aligned at @align. Might 736 * Allocate percpu area of @size bytes aligned at @align. Might
721 * sleep. Might trigger writeouts. 737 * sleep. Might trigger writeouts.
@@ -723,7 +739,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
723 * RETURNS: 739 * RETURNS:
724 * Percpu pointer to the allocated area on success, NULL on failure. 740 * Percpu pointer to the allocated area on success, NULL on failure.
725 */ 741 */
726void *__alloc_percpu(size_t size, size_t align) 742static void *pcpu_alloc(size_t size, size_t align, bool reserved)
727{ 743{
728 void *ptr = NULL; 744 void *ptr = NULL;
729 struct pcpu_chunk *chunk; 745 struct pcpu_chunk *chunk;
@@ -737,7 +753,18 @@ void *__alloc_percpu(size_t size, size_t align)
737 753
738 mutex_lock(&pcpu_mutex); 754 mutex_lock(&pcpu_mutex);
739 755
740 /* allocate area */ 756 /* serve reserved allocations from the reserved chunk if available */
757 if (reserved && pcpu_reserved_chunk) {
758 chunk = pcpu_reserved_chunk;
759 if (size > chunk->contig_hint)
760 goto out_unlock;
761 off = pcpu_alloc_area(chunk, size, align);
762 if (off >= 0)
763 goto area_found;
764 goto out_unlock;
765 }
766
767 /* search through normal chunks */
741 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { 768 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
742 list_for_each_entry(chunk, &pcpu_slot[slot], list) { 769 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
743 if (size > chunk->contig_hint) 770 if (size > chunk->contig_hint)
@@ -773,8 +800,41 @@ out_unlock:
773 mutex_unlock(&pcpu_mutex); 800 mutex_unlock(&pcpu_mutex);
774 return ptr; 801 return ptr;
775} 802}
803
804/**
805 * __alloc_percpu - allocate dynamic percpu area
806 * @size: size of area to allocate in bytes
807 * @align: alignment of area (max PAGE_SIZE)
808 *
809 * Allocate percpu area of @size bytes aligned at @align. Might
810 * sleep. Might trigger writeouts.
811 *
812 * RETURNS:
813 * Percpu pointer to the allocated area on success, NULL on failure.
814 */
815void *__alloc_percpu(size_t size, size_t align)
816{
817 return pcpu_alloc(size, align, false);
818}
776EXPORT_SYMBOL_GPL(__alloc_percpu); 819EXPORT_SYMBOL_GPL(__alloc_percpu);
777 820
821/**
822 * __alloc_reserved_percpu - allocate reserved percpu area
823 * @size: size of area to allocate in bytes
824 * @align: alignment of area (max PAGE_SIZE)
825 *
826 * Allocate percpu area of @size bytes aligned at @align from reserved
827 * percpu area if arch has set it up; otherwise, allocation is served
828 * from the same dynamic area. Might sleep. Might trigger writeouts.
829 *
830 * RETURNS:
831 * Percpu pointer to the allocated area on success, NULL on failure.
832 */
833void *__alloc_reserved_percpu(size_t size, size_t align)
834{
835 return pcpu_alloc(size, align, true);
836}
837
778static void pcpu_kill_chunk(struct pcpu_chunk *chunk) 838static void pcpu_kill_chunk(struct pcpu_chunk *chunk)
779{ 839{
780 WARN_ON(chunk->immutable); 840 WARN_ON(chunk->immutable);
@@ -826,6 +886,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
826 * pcpu_setup_first_chunk - initialize the first percpu chunk 886 * pcpu_setup_first_chunk - initialize the first percpu chunk
827 * @get_page_fn: callback to fetch page pointer 887 * @get_page_fn: callback to fetch page pointer
828 * @static_size: the size of static percpu area in bytes 888 * @static_size: the size of static percpu area in bytes
889 * @reserved_size: the size of reserved percpu area in bytes
829 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 890 * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
830 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 891 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
831 * @base_addr: mapped address, NULL for auto 892 * @base_addr: mapped address, NULL for auto
@@ -844,14 +905,22 @@ EXPORT_SYMBOL_GPL(free_percpu);
844 * indicates end of pages for the cpu. Note that @get_page_fn() must 905 * indicates end of pages for the cpu. Note that @get_page_fn() must
845 * return the same number of pages for all cpus. 906 * return the same number of pages for all cpus.
846 * 907 *
908 * @reserved_size, if non-zero, specifies the amount of bytes to
909 * reserve after the static area in the first chunk. This reserves
910 * the first chunk such that it's available only through reserved
911 * percpu allocation. This is primarily used to serve module percpu
912 * static areas on architectures where the addressing model has
913 * limited offset range for symbol relocations to guarantee module
914 * percpu symbols fall inside the relocatable range.
915 *
847 * @unit_size, if non-negative, specifies unit size and must be 916 * @unit_size, if non-negative, specifies unit size and must be
848 * aligned to PAGE_SIZE and equal to or larger than @static_size + 917 * aligned to PAGE_SIZE and equal to or larger than @static_size +
849 * @dyn_size. 918 * @reserved_size + @dyn_size.
850 * 919 *
851 * @dyn_size, if non-negative, limits the number of bytes available 920 * @dyn_size, if non-negative, limits the number of bytes available
852 * for dynamic allocation in the first chunk. Specifying non-negative 921 * for dynamic allocation in the first chunk. Specifying non-negative
853 * value make percpu leave alone the area beyond @static_size + 922 * value make percpu leave alone the area beyond @static_size +
854 * @dyn_size. 923 * @reserved_size + @dyn_size.
855 * 924 *
856 * Non-null @base_addr means that the caller already allocated virtual 925 * Non-null @base_addr means that the caller already allocated virtual
857 * region for the first chunk and mapped it. percpu must not mess 926 * region for the first chunk and mapped it. percpu must not mess
@@ -861,28 +930,36 @@ EXPORT_SYMBOL_GPL(free_percpu);
861 * @populate_pte_fn is used to populate the pagetable. NULL means the 930 * @populate_pte_fn is used to populate the pagetable. NULL means the
862 * caller already populated the pagetable. 931 * caller already populated the pagetable.
863 * 932 *
933 * If the first chunk ends up with both reserved and dynamic areas, it
934 * is served by two chunks - one to serve the core static and reserved
935 * areas and the other for the dynamic area. They share the same vm
936 * and page map but uses different area allocation map to stay away
937 * from each other. The latter chunk is circulated in the chunk slots
938 * and available for dynamic allocation like any other chunks.
939 *
864 * RETURNS: 940 * RETURNS:
865 * The determined pcpu_unit_size which can be used to initialize 941 * The determined pcpu_unit_size which can be used to initialize
866 * percpu access. 942 * percpu access.
867 */ 943 */
868size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 944size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
869 size_t static_size, 945 size_t static_size, size_t reserved_size,
870 ssize_t unit_size, ssize_t dyn_size, 946 ssize_t unit_size, ssize_t dyn_size,
871 void *base_addr, 947 void *base_addr,
872 pcpu_populate_pte_fn_t populate_pte_fn) 948 pcpu_populate_pte_fn_t populate_pte_fn)
873{ 949{
874 static struct vm_struct first_vm; 950 static struct vm_struct first_vm;
875 static int smap[2]; 951 static int smap[2], dmap[2];
876 struct pcpu_chunk *schunk; 952 struct pcpu_chunk *schunk, *dchunk = NULL;
877 unsigned int cpu; 953 unsigned int cpu;
878 int nr_pages; 954 int nr_pages;
879 int err, i; 955 int err, i;
880 956
881 /* santiy checks */ 957 /* santiy checks */
882 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); 958 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
959 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
883 BUG_ON(!static_size); 960 BUG_ON(!static_size);
884 if (unit_size >= 0) { 961 if (unit_size >= 0) {
885 BUG_ON(unit_size < static_size + 962 BUG_ON(unit_size < static_size + reserved_size +
886 (dyn_size >= 0 ? dyn_size : 0)); 963 (dyn_size >= 0 ? dyn_size : 0));
887 BUG_ON(unit_size & ~PAGE_MASK); 964 BUG_ON(unit_size & ~PAGE_MASK);
888 } else { 965 } else {
@@ -895,7 +972,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
895 pcpu_unit_pages = unit_size >> PAGE_SHIFT; 972 pcpu_unit_pages = unit_size >> PAGE_SHIFT;
896 else 973 else
897 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, 974 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
898 PFN_UP(static_size)); 975 PFN_UP(static_size + reserved_size));
899 976
900 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 977 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
901 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 978 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
@@ -903,7 +980,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
903 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 980 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
904 981
905 if (dyn_size < 0) 982 if (dyn_size < 0)
906 dyn_size = pcpu_unit_size - static_size; 983 dyn_size = pcpu_unit_size - static_size - reserved_size;
907 984
908 /* 985 /*
909 * Allocate chunk slots. The additional last slot is for 986 * Allocate chunk slots. The additional last slot is for
@@ -914,20 +991,49 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
914 for (i = 0; i < pcpu_nr_slots; i++) 991 for (i = 0; i < pcpu_nr_slots; i++)
915 INIT_LIST_HEAD(&pcpu_slot[i]); 992 INIT_LIST_HEAD(&pcpu_slot[i]);
916 993
917 /* init static chunk */ 994 /*
995 * Initialize static chunk. If reserved_size is zero, the
996 * static chunk covers static area + dynamic allocation area
997 * in the first chunk. If reserved_size is not zero, it
998 * covers static area + reserved area (mostly used for module
999 * static percpu allocation).
1000 */
918 schunk = alloc_bootmem(pcpu_chunk_struct_size); 1001 schunk = alloc_bootmem(pcpu_chunk_struct_size);
919 INIT_LIST_HEAD(&schunk->list); 1002 INIT_LIST_HEAD(&schunk->list);
920 schunk->vm = &first_vm; 1003 schunk->vm = &first_vm;
921 schunk->map = smap; 1004 schunk->map = smap;
922 schunk->map_alloc = ARRAY_SIZE(smap); 1005 schunk->map_alloc = ARRAY_SIZE(smap);
923 schunk->page = schunk->page_ar; 1006 schunk->page = schunk->page_ar;
924 schunk->free_size = dyn_size; 1007
1008 if (reserved_size) {
1009 schunk->free_size = reserved_size;
1010 pcpu_reserved_chunk = schunk; /* not for dynamic alloc */
1011 } else {
1012 schunk->free_size = dyn_size;
1013 dyn_size = 0; /* dynamic area covered */
1014 }
925 schunk->contig_hint = schunk->free_size; 1015 schunk->contig_hint = schunk->free_size;
926 1016
927 schunk->map[schunk->map_used++] = -static_size; 1017 schunk->map[schunk->map_used++] = -static_size;
928 if (schunk->free_size) 1018 if (schunk->free_size)
929 schunk->map[schunk->map_used++] = schunk->free_size; 1019 schunk->map[schunk->map_used++] = schunk->free_size;
930 1020
1021 pcpu_reserved_chunk_limit = static_size + schunk->free_size;
1022
1023 /* init dynamic chunk if necessary */
1024 if (dyn_size) {
1025 dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
1026 INIT_LIST_HEAD(&dchunk->list);
1027 dchunk->vm = &first_vm;
1028 dchunk->map = dmap;
1029 dchunk->map_alloc = ARRAY_SIZE(dmap);
1030 dchunk->page = schunk->page_ar; /* share page map with schunk */
1031
1032 dchunk->contig_hint = dchunk->free_size = dyn_size;
1033 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1034 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1035 }
1036
931 /* allocate vm address */ 1037 /* allocate vm address */
932 first_vm.flags = VM_ALLOC; 1038 first_vm.flags = VM_ALLOC;
933 first_vm.size = pcpu_chunk_size; 1039 first_vm.size = pcpu_chunk_size;
@@ -937,12 +1043,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
937 else { 1043 else {
938 /* 1044 /*
939 * Pages already mapped. No need to remap into 1045 * Pages already mapped. No need to remap into
940 * vmalloc area. In this case the static chunk can't 1046 * vmalloc area. In this case the first chunks can't
941 * be mapped or unmapped by percpu and is marked 1047 * be mapped or unmapped by percpu and are marked
942 * immutable. 1048 * immutable.
943 */ 1049 */
944 first_vm.addr = base_addr; 1050 first_vm.addr = base_addr;
945 schunk->immutable = true; 1051 schunk->immutable = true;
1052 if (dchunk)
1053 dchunk->immutable = true;
946 } 1054 }
947 1055
948 /* assign pages */ 1056 /* assign pages */
@@ -978,8 +1086,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
978 } 1086 }
979 1087
980 /* link the first chunk in */ 1088 /* link the first chunk in */
981 pcpu_chunk_relocate(schunk, -1); 1089 if (!dchunk) {
982 pcpu_chunk_addr_insert(schunk); 1090 pcpu_chunk_relocate(schunk, -1);
1091 pcpu_chunk_addr_insert(schunk);
1092 } else {
1093 pcpu_chunk_relocate(dchunk, -1);
1094 pcpu_chunk_addr_insert(dchunk);
1095 }
983 1096
984 /* we're done */ 1097 /* we're done */
985 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); 1098 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);