diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 153 |
1 files changed, 133 insertions, 20 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 5b47d9fe65f5..ef8e169b7731 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -94,6 +94,11 @@ static size_t pcpu_chunk_struct_size __read_mostly; | |||
94 | void *pcpu_base_addr __read_mostly; | 94 | void *pcpu_base_addr __read_mostly; |
95 | EXPORT_SYMBOL_GPL(pcpu_base_addr); | 95 | EXPORT_SYMBOL_GPL(pcpu_base_addr); |
96 | 96 | ||
97 | /* optional reserved chunk, only accessible for reserved allocations */ | ||
98 | static struct pcpu_chunk *pcpu_reserved_chunk; | ||
99 | /* offset limit of the reserved chunk */ | ||
100 | static int pcpu_reserved_chunk_limit; | ||
101 | |||
97 | /* | 102 | /* |
98 | * One mutex to rule them all. | 103 | * One mutex to rule them all. |
99 | * | 104 | * |
@@ -201,13 +206,14 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size) | |||
201 | * | 206 | * |
202 | * This function is called after an allocation or free changed @chunk. | 207 | * This function is called after an allocation or free changed @chunk. |
203 | * New slot according to the changed state is determined and @chunk is | 208 | * New slot according to the changed state is determined and @chunk is |
204 | * moved to the slot. | 209 | * moved to the slot. Note that the reserved chunk is never put on |
210 | * chunk slots. | ||
205 | */ | 211 | */ |
206 | static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) | 212 | static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) |
207 | { | 213 | { |
208 | int nslot = pcpu_chunk_slot(chunk); | 214 | int nslot = pcpu_chunk_slot(chunk); |
209 | 215 | ||
210 | if (oslot != nslot) { | 216 | if (chunk != pcpu_reserved_chunk && oslot != nslot) { |
211 | if (oslot < nslot) | 217 | if (oslot < nslot) |
212 | list_move(&chunk->list, &pcpu_slot[nslot]); | 218 | list_move(&chunk->list, &pcpu_slot[nslot]); |
213 | else | 219 | else |
@@ -255,6 +261,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
255 | struct rb_node *n, *parent; | 261 | struct rb_node *n, *parent; |
256 | struct pcpu_chunk *chunk; | 262 | struct pcpu_chunk *chunk; |
257 | 263 | ||
264 | /* is it in the reserved chunk? */ | ||
265 | if (pcpu_reserved_chunk) { | ||
266 | void *start = pcpu_reserved_chunk->vm->addr; | ||
267 | |||
268 | if (addr >= start && addr < start + pcpu_reserved_chunk_limit) | ||
269 | return pcpu_reserved_chunk; | ||
270 | } | ||
271 | |||
272 | /* nah... search the regular ones */ | ||
258 | n = *pcpu_chunk_rb_search(addr, &parent); | 273 | n = *pcpu_chunk_rb_search(addr, &parent); |
259 | if (!n) { | 274 | if (!n) { |
260 | /* no exactly matching chunk, the parent is the closest */ | 275 | /* no exactly matching chunk, the parent is the closest */ |
@@ -713,9 +728,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
713 | } | 728 | } |
714 | 729 | ||
715 | /** | 730 | /** |
716 | * __alloc_percpu - allocate percpu area | 731 | * pcpu_alloc - the percpu allocator |
717 | * @size: size of area to allocate in bytes | 732 | * @size: size of area to allocate in bytes |
718 | * @align: alignment of area (max PAGE_SIZE) | 733 | * @align: alignment of area (max PAGE_SIZE) |
734 | * @reserved: allocate from the reserved chunk if available | ||
719 | * | 735 | * |
720 | * Allocate percpu area of @size bytes aligned at @align. Might | 736 | * Allocate percpu area of @size bytes aligned at @align. Might |
721 | * sleep. Might trigger writeouts. | 737 | * sleep. Might trigger writeouts. |
@@ -723,7 +739,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
723 | * RETURNS: | 739 | * RETURNS: |
724 | * Percpu pointer to the allocated area on success, NULL on failure. | 740 | * Percpu pointer to the allocated area on success, NULL on failure. |
725 | */ | 741 | */ |
726 | void *__alloc_percpu(size_t size, size_t align) | 742 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) |
727 | { | 743 | { |
728 | void *ptr = NULL; | 744 | void *ptr = NULL; |
729 | struct pcpu_chunk *chunk; | 745 | struct pcpu_chunk *chunk; |
@@ -737,7 +753,18 @@ void *__alloc_percpu(size_t size, size_t align) | |||
737 | 753 | ||
738 | mutex_lock(&pcpu_mutex); | 754 | mutex_lock(&pcpu_mutex); |
739 | 755 | ||
740 | /* allocate area */ | 756 | /* serve reserved allocations from the reserved chunk if available */ |
757 | if (reserved && pcpu_reserved_chunk) { | ||
758 | chunk = pcpu_reserved_chunk; | ||
759 | if (size > chunk->contig_hint) | ||
760 | goto out_unlock; | ||
761 | off = pcpu_alloc_area(chunk, size, align); | ||
762 | if (off >= 0) | ||
763 | goto area_found; | ||
764 | goto out_unlock; | ||
765 | } | ||
766 | |||
767 | /* search through normal chunks */ | ||
741 | for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { | 768 | for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { |
742 | list_for_each_entry(chunk, &pcpu_slot[slot], list) { | 769 | list_for_each_entry(chunk, &pcpu_slot[slot], list) { |
743 | if (size > chunk->contig_hint) | 770 | if (size > chunk->contig_hint) |
@@ -773,8 +800,41 @@ out_unlock: | |||
773 | mutex_unlock(&pcpu_mutex); | 800 | mutex_unlock(&pcpu_mutex); |
774 | return ptr; | 801 | return ptr; |
775 | } | 802 | } |
803 | |||
804 | /** | ||
805 | * __alloc_percpu - allocate dynamic percpu area | ||
806 | * @size: size of area to allocate in bytes | ||
807 | * @align: alignment of area (max PAGE_SIZE) | ||
808 | * | ||
809 | * Allocate percpu area of @size bytes aligned at @align. Might | ||
810 | * sleep. Might trigger writeouts. | ||
811 | * | ||
812 | * RETURNS: | ||
813 | * Percpu pointer to the allocated area on success, NULL on failure. | ||
814 | */ | ||
815 | void *__alloc_percpu(size_t size, size_t align) | ||
816 | { | ||
817 | return pcpu_alloc(size, align, false); | ||
818 | } | ||
776 | EXPORT_SYMBOL_GPL(__alloc_percpu); | 819 | EXPORT_SYMBOL_GPL(__alloc_percpu); |
777 | 820 | ||
821 | /** | ||
822 | * __alloc_reserved_percpu - allocate reserved percpu area | ||
823 | * @size: size of area to allocate in bytes | ||
824 | * @align: alignment of area (max PAGE_SIZE) | ||
825 | * | ||
826 | * Allocate percpu area of @size bytes aligned at @align from reserved | ||
827 | * percpu area if arch has set it up; otherwise, allocation is served | ||
828 | * from the same dynamic area. Might sleep. Might trigger writeouts. | ||
829 | * | ||
830 | * RETURNS: | ||
831 | * Percpu pointer to the allocated area on success, NULL on failure. | ||
832 | */ | ||
833 | void *__alloc_reserved_percpu(size_t size, size_t align) | ||
834 | { | ||
835 | return pcpu_alloc(size, align, true); | ||
836 | } | ||
837 | |||
778 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) | 838 | static void pcpu_kill_chunk(struct pcpu_chunk *chunk) |
779 | { | 839 | { |
780 | WARN_ON(chunk->immutable); | 840 | WARN_ON(chunk->immutable); |
@@ -826,6 +886,7 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
826 | * pcpu_setup_first_chunk - initialize the first percpu chunk | 886 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
827 | * @get_page_fn: callback to fetch page pointer | 887 | * @get_page_fn: callback to fetch page pointer |
828 | * @static_size: the size of static percpu area in bytes | 888 | * @static_size: the size of static percpu area in bytes |
889 | * @reserved_size: the size of reserved percpu area in bytes | ||
829 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto | 890 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto |
830 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | 891 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto |
831 | * @base_addr: mapped address, NULL for auto | 892 | * @base_addr: mapped address, NULL for auto |
@@ -844,14 +905,22 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
844 | * indicates end of pages for the cpu. Note that @get_page_fn() must | 905 | * indicates end of pages for the cpu. Note that @get_page_fn() must |
845 | * return the same number of pages for all cpus. | 906 | * return the same number of pages for all cpus. |
846 | * | 907 | * |
908 | * @reserved_size, if non-zero, specifies the amount of bytes to | ||
909 | * reserve after the static area in the first chunk. This reserves | ||
910 | * the first chunk such that it's available only through reserved | ||
911 | * percpu allocation. This is primarily used to serve module percpu | ||
912 | * static areas on architectures where the addressing model has | ||
913 | * limited offset range for symbol relocations to guarantee module | ||
914 | * percpu symbols fall inside the relocatable range. | ||
915 | * | ||
847 | * @unit_size, if non-negative, specifies unit size and must be | 916 | * @unit_size, if non-negative, specifies unit size and must be |
848 | * aligned to PAGE_SIZE and equal to or larger than @static_size + | 917 | * aligned to PAGE_SIZE and equal to or larger than @static_size + |
849 | * @dyn_size. | 918 | * @reserved_size + @dyn_size. |
850 | * | 919 | * |
851 | * @dyn_size, if non-negative, limits the number of bytes available | 920 | * @dyn_size, if non-negative, limits the number of bytes available |
852 | * for dynamic allocation in the first chunk. Specifying non-negative | 921 | * for dynamic allocation in the first chunk. Specifying non-negative |
853 | * value make percpu leave alone the area beyond @static_size + | 922 | * value make percpu leave alone the area beyond @static_size + |
854 | * @dyn_size. | 923 | * @reserved_size + @dyn_size. |
855 | * | 924 | * |
856 | * Non-null @base_addr means that the caller already allocated virtual | 925 | * Non-null @base_addr means that the caller already allocated virtual |
857 | * region for the first chunk and mapped it. percpu must not mess | 926 | * region for the first chunk and mapped it. percpu must not mess |
@@ -861,28 +930,36 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
861 | * @populate_pte_fn is used to populate the pagetable. NULL means the | 930 | * @populate_pte_fn is used to populate the pagetable. NULL means the |
862 | * caller already populated the pagetable. | 931 | * caller already populated the pagetable. |
863 | * | 932 | * |
933 | * If the first chunk ends up with both reserved and dynamic areas, it | ||
934 | * is served by two chunks - one to serve the core static and reserved | ||
935 | * areas and the other for the dynamic area. They share the same vm | ||
936 | * and page map but uses different area allocation map to stay away | ||
937 | * from each other. The latter chunk is circulated in the chunk slots | ||
938 | * and available for dynamic allocation like any other chunks. | ||
939 | * | ||
864 | * RETURNS: | 940 | * RETURNS: |
865 | * The determined pcpu_unit_size which can be used to initialize | 941 | * The determined pcpu_unit_size which can be used to initialize |
866 | * percpu access. | 942 | * percpu access. |
867 | */ | 943 | */ |
868 | size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | 944 | size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, |
869 | size_t static_size, | 945 | size_t static_size, size_t reserved_size, |
870 | ssize_t unit_size, ssize_t dyn_size, | 946 | ssize_t unit_size, ssize_t dyn_size, |
871 | void *base_addr, | 947 | void *base_addr, |
872 | pcpu_populate_pte_fn_t populate_pte_fn) | 948 | pcpu_populate_pte_fn_t populate_pte_fn) |
873 | { | 949 | { |
874 | static struct vm_struct first_vm; | 950 | static struct vm_struct first_vm; |
875 | static int smap[2]; | 951 | static int smap[2], dmap[2]; |
876 | struct pcpu_chunk *schunk; | 952 | struct pcpu_chunk *schunk, *dchunk = NULL; |
877 | unsigned int cpu; | 953 | unsigned int cpu; |
878 | int nr_pages; | 954 | int nr_pages; |
879 | int err, i; | 955 | int err, i; |
880 | 956 | ||
881 | /* santiy checks */ | 957 | /* santiy checks */ |
882 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); | 958 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
959 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | ||
883 | BUG_ON(!static_size); | 960 | BUG_ON(!static_size); |
884 | if (unit_size >= 0) { | 961 | if (unit_size >= 0) { |
885 | BUG_ON(unit_size < static_size + | 962 | BUG_ON(unit_size < static_size + reserved_size + |
886 | (dyn_size >= 0 ? dyn_size : 0)); | 963 | (dyn_size >= 0 ? dyn_size : 0)); |
887 | BUG_ON(unit_size & ~PAGE_MASK); | 964 | BUG_ON(unit_size & ~PAGE_MASK); |
888 | } else { | 965 | } else { |
@@ -895,7 +972,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
895 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | 972 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; |
896 | else | 973 | else |
897 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, | 974 | pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, |
898 | PFN_UP(static_size)); | 975 | PFN_UP(static_size + reserved_size)); |
899 | 976 | ||
900 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 977 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
901 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 978 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; |
@@ -903,7 +980,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
903 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); | 980 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); |
904 | 981 | ||
905 | if (dyn_size < 0) | 982 | if (dyn_size < 0) |
906 | dyn_size = pcpu_unit_size - static_size; | 983 | dyn_size = pcpu_unit_size - static_size - reserved_size; |
907 | 984 | ||
908 | /* | 985 | /* |
909 | * Allocate chunk slots. The additional last slot is for | 986 | * Allocate chunk slots. The additional last slot is for |
@@ -914,20 +991,49 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
914 | for (i = 0; i < pcpu_nr_slots; i++) | 991 | for (i = 0; i < pcpu_nr_slots; i++) |
915 | INIT_LIST_HEAD(&pcpu_slot[i]); | 992 | INIT_LIST_HEAD(&pcpu_slot[i]); |
916 | 993 | ||
917 | /* init static chunk */ | 994 | /* |
995 | * Initialize static chunk. If reserved_size is zero, the | ||
996 | * static chunk covers static area + dynamic allocation area | ||
997 | * in the first chunk. If reserved_size is not zero, it | ||
998 | * covers static area + reserved area (mostly used for module | ||
999 | * static percpu allocation). | ||
1000 | */ | ||
918 | schunk = alloc_bootmem(pcpu_chunk_struct_size); | 1001 | schunk = alloc_bootmem(pcpu_chunk_struct_size); |
919 | INIT_LIST_HEAD(&schunk->list); | 1002 | INIT_LIST_HEAD(&schunk->list); |
920 | schunk->vm = &first_vm; | 1003 | schunk->vm = &first_vm; |
921 | schunk->map = smap; | 1004 | schunk->map = smap; |
922 | schunk->map_alloc = ARRAY_SIZE(smap); | 1005 | schunk->map_alloc = ARRAY_SIZE(smap); |
923 | schunk->page = schunk->page_ar; | 1006 | schunk->page = schunk->page_ar; |
924 | schunk->free_size = dyn_size; | 1007 | |
1008 | if (reserved_size) { | ||
1009 | schunk->free_size = reserved_size; | ||
1010 | pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ | ||
1011 | } else { | ||
1012 | schunk->free_size = dyn_size; | ||
1013 | dyn_size = 0; /* dynamic area covered */ | ||
1014 | } | ||
925 | schunk->contig_hint = schunk->free_size; | 1015 | schunk->contig_hint = schunk->free_size; |
926 | 1016 | ||
927 | schunk->map[schunk->map_used++] = -static_size; | 1017 | schunk->map[schunk->map_used++] = -static_size; |
928 | if (schunk->free_size) | 1018 | if (schunk->free_size) |
929 | schunk->map[schunk->map_used++] = schunk->free_size; | 1019 | schunk->map[schunk->map_used++] = schunk->free_size; |
930 | 1020 | ||
1021 | pcpu_reserved_chunk_limit = static_size + schunk->free_size; | ||
1022 | |||
1023 | /* init dynamic chunk if necessary */ | ||
1024 | if (dyn_size) { | ||
1025 | dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); | ||
1026 | INIT_LIST_HEAD(&dchunk->list); | ||
1027 | dchunk->vm = &first_vm; | ||
1028 | dchunk->map = dmap; | ||
1029 | dchunk->map_alloc = ARRAY_SIZE(dmap); | ||
1030 | dchunk->page = schunk->page_ar; /* share page map with schunk */ | ||
1031 | |||
1032 | dchunk->contig_hint = dchunk->free_size = dyn_size; | ||
1033 | dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; | ||
1034 | dchunk->map[dchunk->map_used++] = dchunk->free_size; | ||
1035 | } | ||
1036 | |||
931 | /* allocate vm address */ | 1037 | /* allocate vm address */ |
932 | first_vm.flags = VM_ALLOC; | 1038 | first_vm.flags = VM_ALLOC; |
933 | first_vm.size = pcpu_chunk_size; | 1039 | first_vm.size = pcpu_chunk_size; |
@@ -937,12 +1043,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
937 | else { | 1043 | else { |
938 | /* | 1044 | /* |
939 | * Pages already mapped. No need to remap into | 1045 | * Pages already mapped. No need to remap into |
940 | * vmalloc area. In this case the static chunk can't | 1046 | * vmalloc area. In this case the first chunks can't |
941 | * be mapped or unmapped by percpu and is marked | 1047 | * be mapped or unmapped by percpu and are marked |
942 | * immutable. | 1048 | * immutable. |
943 | */ | 1049 | */ |
944 | first_vm.addr = base_addr; | 1050 | first_vm.addr = base_addr; |
945 | schunk->immutable = true; | 1051 | schunk->immutable = true; |
1052 | if (dchunk) | ||
1053 | dchunk->immutable = true; | ||
946 | } | 1054 | } |
947 | 1055 | ||
948 | /* assign pages */ | 1056 | /* assign pages */ |
@@ -978,8 +1086,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
978 | } | 1086 | } |
979 | 1087 | ||
980 | /* link the first chunk in */ | 1088 | /* link the first chunk in */ |
981 | pcpu_chunk_relocate(schunk, -1); | 1089 | if (!dchunk) { |
982 | pcpu_chunk_addr_insert(schunk); | 1090 | pcpu_chunk_relocate(schunk, -1); |
1091 | pcpu_chunk_addr_insert(schunk); | ||
1092 | } else { | ||
1093 | pcpu_chunk_relocate(dchunk, -1); | ||
1094 | pcpu_chunk_addr_insert(dchunk); | ||
1095 | } | ||
983 | 1096 | ||
984 | /* we're done */ | 1097 | /* we're done */ |
985 | pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); | 1098 | pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); |