diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 68 |
1 files changed, 44 insertions, 24 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index bf80e55dbed7..716eb4acf2fc 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly; | |||
116 | static int pcpu_nr_slots __read_mostly; | 116 | static int pcpu_nr_slots __read_mostly; |
117 | static size_t pcpu_chunk_struct_size __read_mostly; | 117 | static size_t pcpu_chunk_struct_size __read_mostly; |
118 | 118 | ||
119 | /* cpus with the lowest and highest unit numbers */ | 119 | /* cpus with the lowest and highest unit addresses */ |
120 | static unsigned int pcpu_first_unit_cpu __read_mostly; | 120 | static unsigned int pcpu_low_unit_cpu __read_mostly; |
121 | static unsigned int pcpu_last_unit_cpu __read_mostly; | 121 | static unsigned int pcpu_high_unit_cpu __read_mostly; |
122 | 122 | ||
123 | /* the address of the first chunk which starts with the kernel static area */ | 123 | /* the address of the first chunk which starts with the kernel static area */ |
124 | void *pcpu_base_addr __read_mostly; | 124 | void *pcpu_base_addr __read_mostly; |
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) | 273 | (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) |
274 | 274 | ||
275 | /** | 275 | /** |
276 | * pcpu_mem_alloc - allocate memory | 276 | * pcpu_mem_zalloc - allocate memory |
277 | * @size: bytes to allocate | 277 | * @size: bytes to allocate |
278 | * | 278 | * |
279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 279 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
280 | * kzalloc() is used; otherwise, vmalloc() is used. The returned | 280 | * kzalloc() is used; otherwise, vzalloc() is used. The returned |
281 | * memory is always zeroed. | 281 | * memory is always zeroed. |
282 | * | 282 | * |
283 | * CONTEXT: | 283 | * CONTEXT: |
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
286 | * RETURNS: | 286 | * RETURNS: |
287 | * Pointer to the allocated area on success, NULL on failure. | 287 | * Pointer to the allocated area on success, NULL on failure. |
288 | */ | 288 | */ |
289 | static void *pcpu_mem_alloc(size_t size) | 289 | static void *pcpu_mem_zalloc(size_t size) |
290 | { | 290 | { |
291 | if (WARN_ON_ONCE(!slab_is_available())) | 291 | if (WARN_ON_ONCE(!slab_is_available())) |
292 | return NULL; | 292 | return NULL; |
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size) | |||
302 | * @ptr: memory to free | 302 | * @ptr: memory to free |
303 | * @size: size of the area | 303 | * @size: size of the area |
304 | * | 304 | * |
305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). | 305 | * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). |
306 | */ | 306 | */ |
307 | static void pcpu_mem_free(void *ptr, size_t size) | 307 | static void pcpu_mem_free(void *ptr, size_t size) |
308 | { | 308 | { |
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | |||
384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | 384 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); |
385 | unsigned long flags; | 385 | unsigned long flags; |
386 | 386 | ||
387 | new = pcpu_mem_alloc(new_size); | 387 | new = pcpu_mem_zalloc(new_size); |
388 | if (!new) | 388 | if (!new) |
389 | return -ENOMEM; | 389 | return -ENOMEM; |
390 | 390 | ||
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
604 | { | 604 | { |
605 | struct pcpu_chunk *chunk; | 605 | struct pcpu_chunk *chunk; |
606 | 606 | ||
607 | chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); | 607 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); |
608 | if (!chunk) | 608 | if (!chunk) |
609 | return NULL; | 609 | return NULL; |
610 | 610 | ||
611 | chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); | 611 | chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * |
612 | sizeof(chunk->map[0])); | ||
612 | if (!chunk->map) { | 613 | if (!chunk->map) { |
613 | kfree(chunk); | 614 | kfree(chunk); |
614 | return NULL; | 615 | return NULL; |
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr) | |||
977 | * address. The caller is responsible for ensuring @addr stays valid | 978 | * address. The caller is responsible for ensuring @addr stays valid |
978 | * until this function finishes. | 979 | * until this function finishes. |
979 | * | 980 | * |
981 | * percpu allocator has special setup for the first chunk, which currently | ||
982 | * supports either embedding in linear address space or vmalloc mapping, | ||
983 | * and, from the second one, the backing allocator (currently either vm or | ||
984 | * km) provides translation. | ||
985 | * | ||
986 | * The addr can be tranlated simply without checking if it falls into the | ||
987 | * first chunk. But the current code reflects better how percpu allocator | ||
988 | * actually works, and the verification can discover both bugs in percpu | ||
989 | * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current | ||
990 | * code. | ||
991 | * | ||
980 | * RETURNS: | 992 | * RETURNS: |
981 | * The physical address for @addr. | 993 | * The physical address for @addr. |
982 | */ | 994 | */ |
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
984 | { | 996 | { |
985 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | 997 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
986 | bool in_first_chunk = false; | 998 | bool in_first_chunk = false; |
987 | unsigned long first_start, first_end; | 999 | unsigned long first_low, first_high; |
988 | unsigned int cpu; | 1000 | unsigned int cpu; |
989 | 1001 | ||
990 | /* | 1002 | /* |
991 | * The following test on first_start/end isn't strictly | 1003 | * The following test on unit_low/high isn't strictly |
992 | * necessary but will speed up lookups of addresses which | 1004 | * necessary but will speed up lookups of addresses which |
993 | * aren't in the first chunk. | 1005 | * aren't in the first chunk. |
994 | */ | 1006 | */ |
995 | first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); | 1007 | first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); |
996 | first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, | 1008 | first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, |
997 | pcpu_unit_pages); | 1009 | pcpu_unit_pages); |
998 | if ((unsigned long)addr >= first_start && | 1010 | if ((unsigned long)addr >= first_low && |
999 | (unsigned long)addr < first_end) { | 1011 | (unsigned long)addr < first_high) { |
1000 | for_each_possible_cpu(cpu) { | 1012 | for_each_possible_cpu(cpu) { |
1001 | void *start = per_cpu_ptr(base, cpu); | 1013 | void *start = per_cpu_ptr(base, cpu); |
1002 | 1014 | ||
@@ -1011,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
1011 | if (!is_vmalloc_addr(addr)) | 1023 | if (!is_vmalloc_addr(addr)) |
1012 | return __pa(addr); | 1024 | return __pa(addr); |
1013 | else | 1025 | else |
1014 | return page_to_phys(vmalloc_to_page(addr)); | 1026 | return page_to_phys(vmalloc_to_page(addr)) + |
1027 | offset_in_page(addr); | ||
1015 | } else | 1028 | } else |
1016 | return page_to_phys(pcpu_addr_to_page(addr)); | 1029 | return page_to_phys(pcpu_addr_to_page(addr)) + |
1030 | offset_in_page(addr); | ||
1017 | } | 1031 | } |
1018 | 1032 | ||
1019 | /** | 1033 | /** |
@@ -1233,7 +1247,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1233 | 1247 | ||
1234 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | 1248 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
1235 | unit_map[cpu] = UINT_MAX; | 1249 | unit_map[cpu] = UINT_MAX; |
1236 | pcpu_first_unit_cpu = NR_CPUS; | 1250 | |
1251 | pcpu_low_unit_cpu = NR_CPUS; | ||
1252 | pcpu_high_unit_cpu = NR_CPUS; | ||
1237 | 1253 | ||
1238 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | 1254 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
1239 | const struct pcpu_group_info *gi = &ai->groups[group]; | 1255 | const struct pcpu_group_info *gi = &ai->groups[group]; |
@@ -1253,9 +1269,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1253 | unit_map[cpu] = unit + i; | 1269 | unit_map[cpu] = unit + i; |
1254 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; | 1270 | unit_off[cpu] = gi->base_offset + i * ai->unit_size; |
1255 | 1271 | ||
1256 | if (pcpu_first_unit_cpu == NR_CPUS) | 1272 | /* determine low/high unit_cpu */ |
1257 | pcpu_first_unit_cpu = cpu; | 1273 | if (pcpu_low_unit_cpu == NR_CPUS || |
1258 | pcpu_last_unit_cpu = cpu; | 1274 | unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) |
1275 | pcpu_low_unit_cpu = cpu; | ||
1276 | if (pcpu_high_unit_cpu == NR_CPUS || | ||
1277 | unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) | ||
1278 | pcpu_high_unit_cpu = cpu; | ||
1259 | } | 1279 | } |
1260 | } | 1280 | } |
1261 | pcpu_nr_units = unit; | 1281 | pcpu_nr_units = unit; |
@@ -1889,7 +1909,7 @@ void __init percpu_init_late(void) | |||
1889 | 1909 | ||
1890 | BUILD_BUG_ON(size > PAGE_SIZE); | 1910 | BUILD_BUG_ON(size > PAGE_SIZE); |
1891 | 1911 | ||
1892 | map = pcpu_mem_alloc(size); | 1912 | map = pcpu_mem_zalloc(size); |
1893 | BUG_ON(!map); | 1913 | BUG_ON(!map); |
1894 | 1914 | ||
1895 | spin_lock_irqsave(&pcpu_lock, flags); | 1915 | spin_lock_irqsave(&pcpu_lock, flags); |