diff options
-rw-r--r-- | include/linux/percpu.h | 20 | ||||
-rw-r--r-- | init/main.c | 1 | ||||
-rw-r--r-- | mm/percpu.c | 85 |
3 files changed, 64 insertions, 42 deletions
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d3a38d687104..b8b9084527b1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -45,6 +45,16 @@ | |||
45 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) | 45 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Percpu allocator can serve percpu allocations before slab is | ||
49 | * initialized which allows slab to depend on the percpu allocator. | ||
50 | * The following two parameters decide how much resource to | ||
51 | * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or | ||
52 | * larger than PERCPU_DYNAMIC_EARLY_SIZE. | ||
53 | */ | ||
54 | #define PERCPU_DYNAMIC_EARLY_SLOTS 128 | ||
55 | #define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) | ||
56 | |||
57 | /* | ||
48 | * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy | 58 | * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy |
49 | * back on the first chunk for dynamic percpu allocation if arch is | 59 | * back on the first chunk for dynamic percpu allocation if arch is |
50 | * manually allocating and mapping it for faster access (as a part of | 60 | * manually allocating and mapping it for faster access (as a part of |
@@ -104,16 +114,11 @@ extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | |||
104 | int nr_units); | 114 | int nr_units); |
105 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); | 115 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); |
106 | 116 | ||
107 | extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( | ||
108 | size_t reserved_size, ssize_t dyn_size, | ||
109 | size_t atom_size, | ||
110 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); | ||
111 | |||
112 | extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | 117 | extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
113 | void *base_addr); | 118 | void *base_addr); |
114 | 119 | ||
115 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK | 120 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK |
116 | extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | 121 | extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, |
117 | size_t atom_size, | 122 | size_t atom_size, |
118 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, | 123 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, |
119 | pcpu_fc_alloc_fn_t alloc_fn, | 124 | pcpu_fc_alloc_fn_t alloc_fn, |
@@ -140,6 +145,7 @@ extern bool is_kernel_percpu_address(unsigned long addr); | |||
140 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | 145 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA |
141 | extern void __init setup_per_cpu_areas(void); | 146 | extern void __init setup_per_cpu_areas(void); |
142 | #endif | 147 | #endif |
148 | extern void __init percpu_init_late(void); | ||
143 | 149 | ||
144 | #else /* CONFIG_SMP */ | 150 | #else /* CONFIG_SMP */ |
145 | 151 | ||
@@ -153,6 +159,8 @@ static inline bool is_kernel_percpu_address(unsigned long addr) | |||
153 | 159 | ||
154 | static inline void __init setup_per_cpu_areas(void) { } | 160 | static inline void __init setup_per_cpu_areas(void) { } |
155 | 161 | ||
162 | static inline void __init percpu_init_late(void) { } | ||
163 | |||
156 | static inline void *pcpu_lpage_remapped(void *kaddr) | 164 | static inline void *pcpu_lpage_remapped(void *kaddr) |
157 | { | 165 | { |
158 | return NULL; | 166 | return NULL; |
diff --git a/init/main.c b/init/main.c index a42fdf4aeba9..4ddb53f04f2a 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -532,6 +532,7 @@ static void __init mm_init(void) | |||
532 | page_cgroup_init_flatmem(); | 532 | page_cgroup_init_flatmem(); |
533 | mem_init(); | 533 | mem_init(); |
534 | kmem_cache_init(); | 534 | kmem_cache_init(); |
535 | percpu_init_late(); | ||
535 | pgtable_cache_init(); | 536 | pgtable_cache_init(); |
536 | vmalloc_init(); | 537 | vmalloc_init(); |
537 | } | 538 | } |
diff --git a/mm/percpu.c b/mm/percpu.c index 6470e7710231..e61dc2cc5873 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, | |||
282 | */ | 282 | */ |
283 | static void *pcpu_mem_alloc(size_t size) | 283 | static void *pcpu_mem_alloc(size_t size) |
284 | { | 284 | { |
285 | if (WARN_ON_ONCE(!slab_is_available())) | ||
286 | return NULL; | ||
287 | |||
285 | if (size <= PAGE_SIZE) | 288 | if (size <= PAGE_SIZE) |
286 | return kzalloc(size, GFP_KERNEL); | 289 | return kzalloc(size, GFP_KERNEL); |
287 | else { | 290 | else { |
@@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | |||
392 | old_size = chunk->map_alloc * sizeof(chunk->map[0]); | 395 | old_size = chunk->map_alloc * sizeof(chunk->map[0]); |
393 | memcpy(new, chunk->map, old_size); | 396 | memcpy(new, chunk->map, old_size); |
394 | 397 | ||
395 | /* | ||
396 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is | ||
397 | * one of the first chunks and still using static map. | ||
398 | */ | ||
399 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) | ||
400 | old = chunk->map; | ||
401 | |||
402 | chunk->map_alloc = new_alloc; | 398 | chunk->map_alloc = new_alloc; |
403 | chunk->map = new; | 399 | chunk->map = new; |
404 | new = NULL; | 400 | new = NULL; |
@@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
604 | { | 600 | { |
605 | struct pcpu_chunk *chunk; | 601 | struct pcpu_chunk *chunk; |
606 | 602 | ||
607 | chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); | 603 | chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); |
608 | if (!chunk) | 604 | if (!chunk) |
609 | return NULL; | 605 | return NULL; |
610 | 606 | ||
@@ -1013,20 +1009,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
1013 | return page_to_phys(pcpu_addr_to_page(addr)); | 1009 | return page_to_phys(pcpu_addr_to_page(addr)); |
1014 | } | 1010 | } |
1015 | 1011 | ||
1016 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | ||
1017 | size_t reserved_size, | ||
1018 | ssize_t *dyn_sizep) | ||
1019 | { | ||
1020 | size_t size_sum; | ||
1021 | |||
1022 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1023 | (*dyn_sizep >= 0 ? *dyn_sizep : 0)); | ||
1024 | if (*dyn_sizep != 0) | ||
1025 | *dyn_sizep = size_sum - static_size - reserved_size; | ||
1026 | |||
1027 | return size_sum; | ||
1028 | } | ||
1029 | |||
1030 | /** | 1012 | /** |
1031 | * pcpu_alloc_alloc_info - allocate percpu allocation info | 1013 | * pcpu_alloc_alloc_info - allocate percpu allocation info |
1032 | * @nr_groups: the number of groups | 1014 | * @nr_groups: the number of groups |
@@ -1085,7 +1067,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | |||
1085 | /** | 1067 | /** |
1086 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | 1068 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs |
1087 | * @reserved_size: the size of reserved percpu area in bytes | 1069 | * @reserved_size: the size of reserved percpu area in bytes |
1088 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | 1070 | * @dyn_size: minimum free size for dynamic allocation in bytes |
1089 | * @atom_size: allocation atom size | 1071 | * @atom_size: allocation atom size |
1090 | * @cpu_distance_fn: callback to determine distance between cpus, optional | 1072 | * @cpu_distance_fn: callback to determine distance between cpus, optional |
1091 | * | 1073 | * |
@@ -1103,8 +1085,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | |||
1103 | * On success, pointer to the new allocation_info is returned. On | 1085 | * On success, pointer to the new allocation_info is returned. On |
1104 | * failure, ERR_PTR value is returned. | 1086 | * failure, ERR_PTR value is returned. |
1105 | */ | 1087 | */ |
1106 | struct pcpu_alloc_info * __init pcpu_build_alloc_info( | 1088 | static struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
1107 | size_t reserved_size, ssize_t dyn_size, | 1089 | size_t reserved_size, size_t dyn_size, |
1108 | size_t atom_size, | 1090 | size_t atom_size, |
1109 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | 1091 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) |
1110 | { | 1092 | { |
@@ -1123,13 +1105,17 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
1123 | memset(group_map, 0, sizeof(group_map)); | 1105 | memset(group_map, 0, sizeof(group_map)); |
1124 | memset(group_cnt, 0, sizeof(group_cnt)); | 1106 | memset(group_cnt, 0, sizeof(group_cnt)); |
1125 | 1107 | ||
1108 | /* calculate size_sum and ensure dyn_size is enough for early alloc */ | ||
1109 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1110 | max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); | ||
1111 | dyn_size = size_sum - static_size - reserved_size; | ||
1112 | |||
1126 | /* | 1113 | /* |
1127 | * Determine min_unit_size, alloc_size and max_upa such that | 1114 | * Determine min_unit_size, alloc_size and max_upa such that |
1128 | * alloc_size is multiple of atom_size and is the smallest | 1115 | * alloc_size is multiple of atom_size and is the smallest |
1129 | * which can accomodate 4k aligned segments which are equal to | 1116 | * which can accomodate 4k aligned segments which are equal to |
1130 | * or larger than min_unit_size. | 1117 | * or larger than min_unit_size. |
1131 | */ | 1118 | */ |
1132 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | ||
1133 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1119 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
1134 | 1120 | ||
1135 | alloc_size = roundup(min_unit_size, atom_size); | 1121 | alloc_size = roundup(min_unit_size, atom_size); |
@@ -1350,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1350 | void *base_addr) | 1336 | void *base_addr) |
1351 | { | 1337 | { |
1352 | static char cpus_buf[4096] __initdata; | 1338 | static char cpus_buf[4096] __initdata; |
1353 | static int smap[2], dmap[2]; | 1339 | static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; |
1340 | static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; | ||
1354 | size_t dyn_size = ai->dyn_size; | 1341 | size_t dyn_size = ai->dyn_size; |
1355 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; | 1342 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; |
1356 | struct pcpu_chunk *schunk, *dchunk = NULL; | 1343 | struct pcpu_chunk *schunk, *dchunk = NULL; |
@@ -1373,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1373 | } while (0) | 1360 | } while (0) |
1374 | 1361 | ||
1375 | /* sanity checks */ | 1362 | /* sanity checks */ |
1376 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | ||
1377 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | ||
1378 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); | 1363 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); |
1379 | PCPU_SETUP_BUG_ON(!ai->static_size); | 1364 | PCPU_SETUP_BUG_ON(!ai->static_size); |
1380 | PCPU_SETUP_BUG_ON(!base_addr); | 1365 | PCPU_SETUP_BUG_ON(!base_addr); |
1381 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); | 1366 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
1382 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); | 1367 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
1383 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); | 1368 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
1369 | PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); | ||
1384 | PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); | 1370 | PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); |
1385 | 1371 | ||
1386 | /* process group information and build config tables accordingly */ | 1372 | /* process group information and build config tables accordingly */ |
@@ -1532,7 +1518,7 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1532 | /** | 1518 | /** |
1533 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem | 1519 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem |
1534 | * @reserved_size: the size of reserved percpu area in bytes | 1520 | * @reserved_size: the size of reserved percpu area in bytes |
1535 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | 1521 | * @dyn_size: minimum free size for dynamic allocation in bytes |
1536 | * @atom_size: allocation atom size | 1522 | * @atom_size: allocation atom size |
1537 | * @cpu_distance_fn: callback to determine distance between cpus, optional | 1523 | * @cpu_distance_fn: callback to determine distance between cpus, optional |
1538 | * @alloc_fn: function to allocate percpu page | 1524 | * @alloc_fn: function to allocate percpu page |
@@ -1553,10 +1539,7 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1553 | * vmalloc space is not orders of magnitude larger than distances | 1539 | * vmalloc space is not orders of magnitude larger than distances |
1554 | * between node memory addresses (ie. 32bit NUMA machines). | 1540 | * between node memory addresses (ie. 32bit NUMA machines). |
1555 | * | 1541 | * |
1556 | * When @dyn_size is positive, dynamic area might be larger than | 1542 | * @dyn_size specifies the minimum dynamic area size. |
1557 | * specified to fill page alignment. When @dyn_size is auto, | ||
1558 | * @dyn_size is just big enough to fill page alignment after static | ||
1559 | * and reserved areas. | ||
1560 | * | 1543 | * |
1561 | * If the needed size is smaller than the minimum or specified unit | 1544 | * If the needed size is smaller than the minimum or specified unit |
1562 | * size, the leftover is returned using @free_fn. | 1545 | * size, the leftover is returned using @free_fn. |
@@ -1564,7 +1547,7 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1564 | * RETURNS: | 1547 | * RETURNS: |
1565 | * 0 on success, -errno on failure. | 1548 | * 0 on success, -errno on failure. |
1566 | */ | 1549 | */ |
1567 | int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, | 1550 | int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, |
1568 | size_t atom_size, | 1551 | size_t atom_size, |
1569 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, | 1552 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn, |
1570 | pcpu_fc_alloc_fn_t alloc_fn, | 1553 | pcpu_fc_alloc_fn_t alloc_fn, |
@@ -1695,7 +1678,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, | |||
1695 | 1678 | ||
1696 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); | 1679 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); |
1697 | 1680 | ||
1698 | ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); | 1681 | ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL); |
1699 | if (IS_ERR(ai)) | 1682 | if (IS_ERR(ai)) |
1700 | return PTR_ERR(ai); | 1683 | return PTR_ERR(ai); |
1701 | BUG_ON(ai->nr_groups != 1); | 1684 | BUG_ON(ai->nr_groups != 1); |
@@ -1821,3 +1804,33 @@ void __init setup_per_cpu_areas(void) | |||
1821 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; | 1804 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; |
1822 | } | 1805 | } |
1823 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | 1806 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ |
1807 | |||
1808 | /* | ||
1809 | * First and reserved chunks are initialized with temporary allocation | ||
1810 | * map in initdata so that they can be used before slab is online. | ||
1811 | * This function is called after slab is brought up and replaces those | ||
1812 | * with properly allocated maps. | ||
1813 | */ | ||
1814 | void __init percpu_init_late(void) | ||
1815 | { | ||
1816 | struct pcpu_chunk *target_chunks[] = | ||
1817 | { pcpu_first_chunk, pcpu_reserved_chunk, NULL }; | ||
1818 | struct pcpu_chunk *chunk; | ||
1819 | unsigned long flags; | ||
1820 | int i; | ||
1821 | |||
1822 | for (i = 0; (chunk = target_chunks[i]); i++) { | ||
1823 | int *map; | ||
1824 | const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]); | ||
1825 | |||
1826 | BUILD_BUG_ON(size > PAGE_SIZE); | ||
1827 | |||
1828 | map = pcpu_mem_alloc(size); | ||
1829 | BUG_ON(!map); | ||
1830 | |||
1831 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1832 | memcpy(map, chunk->map, size); | ||
1833 | chunk->map = map; | ||
1834 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
1835 | } | ||
1836 | } | ||