diff options
-rw-r--r-- | arch/sparc/kernel/smp_64.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 38 | ||||
-rw-r--r-- | include/linux/percpu.h | 42 | ||||
-rw-r--r-- | mm/percpu.c | 529 |
4 files changed, 389 insertions, 244 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9856d866b77b..a42a4a744d14 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end, | |||
1475 | 1475 | ||
1476 | void __init setup_per_cpu_areas(void) | 1476 | void __init setup_per_cpu_areas(void) |
1477 | { | 1477 | { |
1478 | size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; | ||
1479 | static struct vm_struct vm; | 1478 | static struct vm_struct vm; |
1479 | struct pcpu_alloc_info *ai; | ||
1480 | unsigned long delta, cpu; | 1480 | unsigned long delta, cpu; |
1481 | size_t size_sum, pcpu_unit_size; | 1481 | size_t size_sum, pcpu_unit_size; |
1482 | size_t ptrs_size; | 1482 | size_t ptrs_size; |
1483 | void **ptrs; | 1483 | void **ptrs; |
1484 | 1484 | ||
1485 | size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 1485 | ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); |
1486 | |||
1487 | ai->static_size = __per_cpu_end - __per_cpu_start; | ||
1488 | ai->reserved_size = PERCPU_MODULE_RESERVE; | ||
1489 | |||
1490 | size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size + | ||
1486 | PERCPU_DYNAMIC_RESERVE); | 1491 | PERCPU_DYNAMIC_RESERVE); |
1487 | dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; | ||
1488 | 1492 | ||
1493 | ai->dyn_size = size_sum - ai->static_size - ai->reserved_size; | ||
1494 | ai->unit_size = PCPU_CHUNK_SIZE; | ||
1495 | ai->atom_size = PCPU_CHUNK_SIZE; | ||
1496 | ai->alloc_size = PCPU_CHUNK_SIZE; | ||
1497 | ai->groups[0].nr_units = nr_cpu_ids; | ||
1498 | |||
1499 | for_each_possible_cpu(cpu) | ||
1500 | ai->groups[0].cpu_map[cpu] = cpu; | ||
1489 | 1501 | ||
1490 | ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); | 1502 | ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); |
1491 | ptrs = alloc_bootmem(ptrs_size); | 1503 | ptrs = alloc_bootmem(ptrs_size); |
@@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void) | |||
1497 | free_bootmem(__pa(ptrs[cpu] + size_sum), | 1509 | free_bootmem(__pa(ptrs[cpu] + size_sum), |
1498 | PCPU_CHUNK_SIZE - size_sum); | 1510 | PCPU_CHUNK_SIZE - size_sum); |
1499 | 1511 | ||
1500 | memcpy(ptrs[cpu], __per_cpu_load, static_size); | 1512 | memcpy(ptrs[cpu], __per_cpu_load, ai->static_size); |
1501 | } | 1513 | } |
1502 | 1514 | ||
1503 | /* allocate address and map */ | 1515 | /* allocate address and map */ |
@@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void) | |||
1514 | pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); | 1526 | pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); |
1515 | } | 1527 | } |
1516 | 1528 | ||
1517 | pcpu_unit_size = pcpu_setup_first_chunk(static_size, | 1529 | pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); |
1518 | PERCPU_MODULE_RESERVE, dyn_size, | ||
1519 | PCPU_CHUNK_SIZE, vm.addr, NULL); | ||
1520 | 1530 | ||
1521 | free_bootmem(__pa(ptrs), ptrs_size); | 1531 | free_bootmem(__pa(ptrs), ptrs_size); |
1522 | 1532 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 660cde133141..db5f9c49fec5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
161 | { | 161 | { |
162 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 162 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
163 | size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; | 163 | size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; |
164 | size_t unit_map_size, unit_size; | 164 | struct pcpu_alloc_info *ai; |
165 | int *unit_map; | ||
166 | int nr_units; | ||
167 | ssize_t ret; | 165 | ssize_t ret; |
168 | 166 | ||
169 | /* on non-NUMA, embedding is better */ | 167 | /* on non-NUMA, embedding is better */ |
@@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
177 | } | 175 | } |
178 | 176 | ||
179 | /* allocate and build unit_map */ | 177 | /* allocate and build unit_map */ |
180 | unit_map_size = nr_cpu_ids * sizeof(int); | 178 | ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, |
181 | unit_map = alloc_bootmem_nopanic(unit_map_size); | 179 | PMD_SIZE, pcpu_lpage_cpu_distance); |
182 | if (!unit_map) { | 180 | if (IS_ERR(ai)) { |
183 | pr_warning("PERCPU: failed to allocate unit_map\n"); | 181 | pr_warning("PERCPU: failed to build unit_map (%ld)\n", |
184 | return -ENOMEM; | 182 | PTR_ERR(ai)); |
183 | return PTR_ERR(ai); | ||
185 | } | 184 | } |
186 | 185 | ||
187 | ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, | ||
188 | &dyn_size, &unit_size, PMD_SIZE, | ||
189 | unit_map, pcpu_lpage_cpu_distance); | ||
190 | if (ret < 0) { | ||
191 | pr_warning("PERCPU: failed to build unit_map\n"); | ||
192 | goto out_free; | ||
193 | } | ||
194 | nr_units = ret; | ||
195 | |||
196 | /* do the parameters look okay? */ | 186 | /* do the parameters look okay? */ |
197 | if (!chosen) { | 187 | if (!chosen) { |
198 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 188 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
199 | size_t tot_size = nr_units * unit_size; | 189 | size_t tot_size = 0; |
190 | int group; | ||
191 | |||
192 | for (group = 0; group < ai->nr_groups; group++) | ||
193 | tot_size += ai->unit_size * ai->groups[group].nr_units; | ||
200 | 194 | ||
201 | /* don't consume more than 20% of vmalloc area */ | 195 | /* don't consume more than 20% of vmalloc area */ |
202 | if (tot_size > vm_size / 5) { | 196 | if (tot_size > vm_size / 5) { |
@@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
207 | } | 201 | } |
208 | } | 202 | } |
209 | 203 | ||
210 | ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | 204 | ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, |
211 | unit_size, PMD_SIZE, unit_map, nr_units, | 205 | pcpul_map); |
212 | pcpu_fc_alloc, pcpu_fc_free, pcpul_map); | ||
213 | out_free: | 206 | out_free: |
214 | if (ret < 0) | 207 | pcpu_free_alloc_info(ai); |
215 | free_bootmem(__pa(unit_map), unit_map_size); | ||
216 | return ret; | 208 | return ret; |
217 | } | 209 | } |
218 | #else | 210 | #else |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 570fb18de2ba..77b86be8ce4f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -59,6 +59,25 @@ | |||
59 | extern void *pcpu_base_addr; | 59 | extern void *pcpu_base_addr; |
60 | extern const int *pcpu_unit_map; | 60 | extern const int *pcpu_unit_map; |
61 | 61 | ||
62 | struct pcpu_group_info { | ||
63 | int nr_units; /* aligned # of units */ | ||
64 | unsigned long base_offset; /* base address offset */ | ||
65 | unsigned int *cpu_map; /* unit->cpu map, empty | ||
66 | * entries contain NR_CPUS */ | ||
67 | }; | ||
68 | |||
69 | struct pcpu_alloc_info { | ||
70 | size_t static_size; | ||
71 | size_t reserved_size; | ||
72 | size_t dyn_size; | ||
73 | size_t unit_size; | ||
74 | size_t atom_size; | ||
75 | size_t alloc_size; | ||
76 | size_t __ai_size; /* internal, don't use */ | ||
77 | int nr_groups; /* 0 if grouping unnecessary */ | ||
78 | struct pcpu_group_info groups[]; | ||
79 | }; | ||
80 | |||
62 | enum pcpu_fc { | 81 | enum pcpu_fc { |
63 | PCPU_FC_AUTO, | 82 | PCPU_FC_AUTO, |
64 | PCPU_FC_EMBED, | 83 | PCPU_FC_EMBED, |
@@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | |||
78 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); | 97 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); |
79 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | 98 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); |
80 | 99 | ||
81 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | 100 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, |
82 | extern int __init pcpu_lpage_build_unit_map( | 101 | int nr_units); |
83 | size_t reserved_size, ssize_t *dyn_sizep, | 102 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); |
84 | size_t *unit_sizep, size_t lpage_size, | 103 | |
85 | int *unit_map, | 104 | extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
105 | size_t reserved_size, ssize_t dyn_size, | ||
106 | size_t atom_size, | ||
86 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); | 107 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); |
87 | #endif | ||
88 | 108 | ||
89 | extern size_t __init pcpu_setup_first_chunk( | 109 | extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
90 | size_t static_size, size_t reserved_size, | 110 | void *base_addr); |
91 | size_t dyn_size, size_t unit_size, | ||
92 | void *base_addr, const int *unit_map); | ||
93 | 111 | ||
94 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK | 112 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK |
95 | extern ssize_t __init pcpu_embed_first_chunk( | 113 | extern ssize_t __init pcpu_embed_first_chunk( |
@@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk( | |||
106 | 124 | ||
107 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | 125 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK |
108 | extern ssize_t __init pcpu_lpage_first_chunk( | 126 | extern ssize_t __init pcpu_lpage_first_chunk( |
109 | size_t reserved_size, size_t dyn_size, | 127 | const struct pcpu_alloc_info *ai, |
110 | size_t unit_size, size_t lpage_size, | ||
111 | const int *unit_map, int nr_units, | ||
112 | pcpu_fc_alloc_fn_t alloc_fn, | 128 | pcpu_fc_alloc_fn_t alloc_fn, |
113 | pcpu_fc_free_fn_t free_fn, | 129 | pcpu_fc_free_fn_t free_fn, |
114 | pcpu_fc_map_fn_t map_fn); | 130 | pcpu_fc_map_fn_t map_fn); |
diff --git a/mm/percpu.c b/mm/percpu.c index 2b9c4b2a2fc0..99f7fa682722 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -58,6 +58,7 @@ | |||
58 | 58 | ||
59 | #include <linux/bitmap.h> | 59 | #include <linux/bitmap.h> |
60 | #include <linux/bootmem.h> | 60 | #include <linux/bootmem.h> |
61 | #include <linux/err.h> | ||
61 | #include <linux/list.h> | 62 | #include <linux/list.h> |
62 | #include <linux/log2.h> | 63 | #include <linux/log2.h> |
63 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
@@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, | |||
1245 | return size_sum; | 1246 | return size_sum; |
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
1249 | /** | 1249 | /** |
1250 | * pcpu_lpage_build_unit_map - build unit_map for large page remapping | 1250 | * pcpu_alloc_alloc_info - allocate percpu allocation info |
1251 | * @nr_groups: the number of groups | ||
1252 | * @nr_units: the number of units | ||
1253 | * | ||
1254 | * Allocate ai which is large enough for @nr_groups groups containing | ||
1255 | * @nr_units units. The returned ai's groups[0].cpu_map points to the | ||
1256 | * cpu_map array which is long enough for @nr_units and filled with | ||
1257 | * NR_CPUS. It's the caller's responsibility to initialize cpu_map | ||
1258 | * pointer of other groups. | ||
1259 | * | ||
1260 | * RETURNS: | ||
1261 | * Pointer to the allocated pcpu_alloc_info on success, NULL on | ||
1262 | * failure. | ||
1263 | */ | ||
1264 | struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | ||
1265 | int nr_units) | ||
1266 | { | ||
1267 | struct pcpu_alloc_info *ai; | ||
1268 | size_t base_size, ai_size; | ||
1269 | void *ptr; | ||
1270 | int unit; | ||
1271 | |||
1272 | base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]), | ||
1273 | __alignof__(ai->groups[0].cpu_map[0])); | ||
1274 | ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); | ||
1275 | |||
1276 | ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size)); | ||
1277 | if (!ptr) | ||
1278 | return NULL; | ||
1279 | ai = ptr; | ||
1280 | ptr += base_size; | ||
1281 | |||
1282 | ai->groups[0].cpu_map = ptr; | ||
1283 | |||
1284 | for (unit = 0; unit < nr_units; unit++) | ||
1285 | ai->groups[0].cpu_map[unit] = NR_CPUS; | ||
1286 | |||
1287 | ai->nr_groups = nr_groups; | ||
1288 | ai->__ai_size = PFN_ALIGN(ai_size); | ||
1289 | |||
1290 | return ai; | ||
1291 | } | ||
1292 | |||
1293 | /** | ||
1294 | * pcpu_free_alloc_info - free percpu allocation info | ||
1295 | * @ai: pcpu_alloc_info to free | ||
1296 | * | ||
1297 | * Free @ai which was allocated by pcpu_alloc_alloc_info(). | ||
1298 | */ | ||
1299 | void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | ||
1300 | { | ||
1301 | free_bootmem(__pa(ai), ai->__ai_size); | ||
1302 | } | ||
1303 | |||
1304 | /** | ||
1305 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | ||
1251 | * @reserved_size: the size of reserved percpu area in bytes | 1306 | * @reserved_size: the size of reserved percpu area in bytes |
1252 | * @dyn_sizep: in/out parameter for dynamic size, -1 for auto | 1307 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto |
1253 | * @unit_sizep: out parameter for unit size | 1308 | * @atom_size: allocation atom size |
1254 | * @unit_map: unit_map to be filled | 1309 | * @cpu_distance_fn: callback to determine distance between cpus, optional |
1255 | * @cpu_distance_fn: callback to determine distance between cpus | ||
1256 | * | 1310 | * |
1257 | * This function builds cpu -> unit map and determine other parameters | 1311 | * This function determines grouping of units, their mappings to cpus |
1258 | * considering needed percpu size, large page size and distances | 1312 | * and other parameters considering needed percpu size, allocation |
1259 | * between CPUs in NUMA. | 1313 | * atom size and distances between CPUs. |
1260 | * | 1314 | * |
1261 | * CPUs which are of LOCAL_DISTANCE both ways are grouped together and | 1315 | * Groups are always mutliples of atom size and CPUs which are of |
1262 | * may share units in the same large page. The returned configuration | 1316 | * LOCAL_DISTANCE both ways are grouped together and share space for |
1263 | * is guaranteed to have CPUs on different nodes on different large | 1317 | * units in the same group. The returned configuration is guaranteed |
1264 | * pages and >=75% usage of allocated virtual address space. | 1318 | * to have CPUs on different nodes on different groups and >=75% usage |
1319 | * of allocated virtual address space. | ||
1265 | * | 1320 | * |
1266 | * RETURNS: | 1321 | * RETURNS: |
1267 | * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and | 1322 | * On success, pointer to the new allocation_info is returned. On |
1268 | * returns the number of units to be allocated. -errno on failure. | 1323 | * failure, ERR_PTR value is returned. |
1269 | */ | 1324 | */ |
1270 | int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | 1325 | struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
1271 | size_t *unit_sizep, size_t lpage_size, | 1326 | size_t reserved_size, ssize_t dyn_size, |
1272 | int *unit_map, | 1327 | size_t atom_size, |
1273 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | 1328 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) |
1274 | { | 1329 | { |
1275 | static int group_map[NR_CPUS] __initdata; | 1330 | static int group_map[NR_CPUS] __initdata; |
1276 | static int group_cnt[NR_CPUS] __initdata; | 1331 | static int group_cnt[NR_CPUS] __initdata; |
1277 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1332 | const size_t static_size = __per_cpu_end - __per_cpu_start; |
1278 | int group_cnt_max = 0; | 1333 | int group_cnt_max = 0, nr_groups = 1, nr_units = 0; |
1279 | size_t size_sum, min_unit_size, alloc_size; | 1334 | size_t size_sum, min_unit_size, alloc_size; |
1280 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | 1335 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ |
1281 | int last_allocs; | 1336 | int last_allocs, group, unit; |
1282 | unsigned int cpu, tcpu; | 1337 | unsigned int cpu, tcpu; |
1283 | int group, unit; | 1338 | struct pcpu_alloc_info *ai; |
1339 | unsigned int *cpu_map; | ||
1284 | 1340 | ||
1285 | /* | 1341 | /* |
1286 | * Determine min_unit_size, alloc_size and max_upa such that | 1342 | * Determine min_unit_size, alloc_size and max_upa such that |
1287 | * alloc_size is multiple of lpage_size and is the smallest | 1343 | * alloc_size is multiple of atom_size and is the smallest |
1288 | * which can accomodate 4k aligned segments which are equal to | 1344 | * which can accomodate 4k aligned segments which are equal to |
1289 | * or larger than min_unit_size. | 1345 | * or larger than min_unit_size. |
1290 | */ | 1346 | */ |
1291 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); | 1347 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); |
1292 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1348 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
1293 | 1349 | ||
1294 | alloc_size = roundup(min_unit_size, lpage_size); | 1350 | alloc_size = roundup(min_unit_size, atom_size); |
1295 | upa = alloc_size / min_unit_size; | 1351 | upa = alloc_size / min_unit_size; |
1296 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | 1352 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) |
1297 | upa--; | 1353 | upa--; |
@@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
1304 | for_each_possible_cpu(tcpu) { | 1360 | for_each_possible_cpu(tcpu) { |
1305 | if (cpu == tcpu) | 1361 | if (cpu == tcpu) |
1306 | break; | 1362 | break; |
1307 | if (group_map[tcpu] == group && | 1363 | if (group_map[tcpu] == group && cpu_distance_fn && |
1308 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | 1364 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || |
1309 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | 1365 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { |
1310 | group++; | 1366 | group++; |
1367 | nr_groups = max(nr_groups, group + 1); | ||
1311 | goto next_group; | 1368 | goto next_group; |
1312 | } | 1369 | } |
1313 | } | 1370 | } |
@@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
1328 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | 1385 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) |
1329 | continue; | 1386 | continue; |
1330 | 1387 | ||
1331 | for (group = 0; group_cnt[group]; group++) { | 1388 | for (group = 0; group < nr_groups; group++) { |
1332 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | 1389 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); |
1333 | allocs += this_allocs; | 1390 | allocs += this_allocs; |
1334 | wasted += this_allocs * upa - group_cnt[group]; | 1391 | wasted += this_allocs * upa - group_cnt[group]; |
@@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
1348 | last_allocs = allocs; | 1405 | last_allocs = allocs; |
1349 | best_upa = upa; | 1406 | best_upa = upa; |
1350 | } | 1407 | } |
1351 | *unit_sizep = alloc_size / best_upa; | 1408 | upa = best_upa; |
1409 | |||
1410 | /* allocate and fill alloc_info */ | ||
1411 | for (group = 0; group < nr_groups; group++) | ||
1412 | nr_units += roundup(group_cnt[group], upa); | ||
1413 | |||
1414 | ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | ||
1415 | if (!ai) | ||
1416 | return ERR_PTR(-ENOMEM); | ||
1417 | cpu_map = ai->groups[0].cpu_map; | ||
1418 | |||
1419 | for (group = 0; group < nr_groups; group++) { | ||
1420 | ai->groups[group].cpu_map = cpu_map; | ||
1421 | cpu_map += roundup(group_cnt[group], upa); | ||
1422 | } | ||
1423 | |||
1424 | ai->static_size = static_size; | ||
1425 | ai->reserved_size = reserved_size; | ||
1426 | ai->dyn_size = dyn_size; | ||
1427 | ai->unit_size = alloc_size / upa; | ||
1428 | ai->atom_size = atom_size; | ||
1429 | ai->alloc_size = alloc_size; | ||
1430 | |||
1431 | for (group = 0, unit = 0; group_cnt[group]; group++) { | ||
1432 | struct pcpu_group_info *gi = &ai->groups[group]; | ||
1433 | |||
1434 | /* | ||
1435 | * Initialize base_offset as if all groups are located | ||
1436 | * back-to-back. The caller should update this to | ||
1437 | * reflect actual allocation. | ||
1438 | */ | ||
1439 | gi->base_offset = unit * ai->unit_size; | ||
1352 | 1440 | ||
1353 | /* assign units to cpus accordingly */ | ||
1354 | unit = 0; | ||
1355 | for (group = 0; group_cnt[group]; group++) { | ||
1356 | for_each_possible_cpu(cpu) | 1441 | for_each_possible_cpu(cpu) |
1357 | if (group_map[cpu] == group) | 1442 | if (group_map[cpu] == group) |
1358 | unit_map[cpu] = unit++; | 1443 | gi->cpu_map[gi->nr_units++] = cpu; |
1359 | unit = roundup(unit, best_upa); | 1444 | gi->nr_units = roundup(gi->nr_units, upa); |
1445 | unit += gi->nr_units; | ||
1360 | } | 1446 | } |
1447 | BUG_ON(unit != nr_units); | ||
1361 | 1448 | ||
1362 | return unit; /* unit contains aligned number of units */ | 1449 | return ai; |
1363 | } | 1450 | } |
1364 | 1451 | ||
1365 | static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | 1452 | /** |
1366 | unsigned int *cpup); | 1453 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info |
1367 | 1454 | * @lvl: loglevel | |
1368 | static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | 1455 | * @ai: allocation info to dump |
1369 | size_t reserved_size, size_t dyn_size, | 1456 | * |
1370 | size_t unit_size, size_t lpage_size, | 1457 | * Print out information about @ai using loglevel @lvl. |
1371 | const int *unit_map, int nr_units) | 1458 | */ |
1459 | static void pcpu_dump_alloc_info(const char *lvl, | ||
1460 | const struct pcpu_alloc_info *ai) | ||
1372 | { | 1461 | { |
1373 | int width = 1, v = nr_units; | 1462 | int group_width = 1, cpu_width = 1, width; |
1374 | char empty_str[] = "--------"; | 1463 | char empty_str[] = "--------"; |
1375 | int upl, lpl; /* units per lpage, lpage per line */ | 1464 | int alloc = 0, alloc_end = 0; |
1376 | unsigned int cpu; | 1465 | int group, v; |
1377 | int lpage, unit; | 1466 | int upa, apl; /* units per alloc, allocs per line */ |
1467 | |||
1468 | v = ai->nr_groups; | ||
1469 | while (v /= 10) | ||
1470 | group_width++; | ||
1378 | 1471 | ||
1472 | v = num_possible_cpus(); | ||
1379 | while (v /= 10) | 1473 | while (v /= 10) |
1380 | width++; | 1474 | cpu_width++; |
1381 | empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; | 1475 | empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; |
1382 | 1476 | ||
1383 | upl = max_t(int, lpage_size / unit_size, 1); | 1477 | upa = ai->alloc_size / ai->unit_size; |
1384 | lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); | 1478 | width = upa * (cpu_width + 1) + group_width + 3; |
1479 | apl = rounddown_pow_of_two(max(60 / width, 1)); | ||
1385 | 1480 | ||
1386 | printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, | 1481 | printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", |
1387 | static_size, reserved_size, dyn_size, unit_size, lpage_size); | 1482 | lvl, ai->static_size, ai->reserved_size, ai->dyn_size, |
1483 | ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); | ||
1388 | 1484 | ||
1389 | for (lpage = 0, unit = 0; unit < nr_units; unit++) { | 1485 | for (group = 0; group < ai->nr_groups; group++) { |
1390 | if (!(unit % upl)) { | 1486 | const struct pcpu_group_info *gi = &ai->groups[group]; |
1391 | if (!(lpage++ % lpl)) { | 1487 | int unit = 0, unit_end = 0; |
1488 | |||
1489 | BUG_ON(gi->nr_units % upa); | ||
1490 | for (alloc_end += gi->nr_units / upa; | ||
1491 | alloc < alloc_end; alloc++) { | ||
1492 | if (!(alloc % apl)) { | ||
1392 | printk("\n"); | 1493 | printk("\n"); |
1393 | printk("%spcpu-lpage: ", lvl); | 1494 | printk("%spcpu-alloc: ", lvl); |
1394 | } else | 1495 | } |
1395 | printk("| "); | 1496 | printk("[%0*d] ", group_width, group); |
1497 | |||
1498 | for (unit_end += upa; unit < unit_end; unit++) | ||
1499 | if (gi->cpu_map[unit] != NR_CPUS) | ||
1500 | printk("%0*d ", cpu_width, | ||
1501 | gi->cpu_map[unit]); | ||
1502 | else | ||
1503 | printk("%s ", empty_str); | ||
1396 | } | 1504 | } |
1397 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | ||
1398 | printk("%0*d ", width, cpu); | ||
1399 | else | ||
1400 | printk("%s ", empty_str); | ||
1401 | } | 1505 | } |
1402 | printk("\n"); | 1506 | printk("\n"); |
1403 | } | 1507 | } |
1404 | #endif | ||
1405 | 1508 | ||
1406 | /** | 1509 | /** |
1407 | * pcpu_setup_first_chunk - initialize the first percpu chunk | 1510 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
1408 | * @static_size: the size of static percpu area in bytes | 1511 | * @ai: pcpu_alloc_info describing how to percpu area is shaped |
1409 | * @reserved_size: the size of reserved percpu area in bytes, 0 for none | ||
1410 | * @dyn_size: free size for dynamic allocation in bytes | ||
1411 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE | ||
1412 | * @base_addr: mapped address | 1512 | * @base_addr: mapped address |
1413 | * @unit_map: cpu -> unit map, NULL for sequential mapping | ||
1414 | * | 1513 | * |
1415 | * Initialize the first percpu chunk which contains the kernel static | 1514 | * Initialize the first percpu chunk which contains the kernel static |
1416 | * perpcu area. This function is to be called from arch percpu area | 1515 | * perpcu area. This function is to be called from arch percpu area |
1417 | * setup path. | 1516 | * setup path. |
1418 | * | 1517 | * |
1419 | * @reserved_size, if non-zero, specifies the amount of bytes to | 1518 | * @ai contains all information necessary to initialize the first |
1519 | * chunk and prime the dynamic percpu allocator. | ||
1520 | * | ||
1521 | * @ai->static_size is the size of static percpu area. | ||
1522 | * | ||
1523 | * @ai->reserved_size, if non-zero, specifies the amount of bytes to | ||
1420 | * reserve after the static area in the first chunk. This reserves | 1524 | * reserve after the static area in the first chunk. This reserves |
1421 | * the first chunk such that it's available only through reserved | 1525 | * the first chunk such that it's available only through reserved |
1422 | * percpu allocation. This is primarily used to serve module percpu | 1526 | * percpu allocation. This is primarily used to serve module percpu |
@@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | |||
1424 | * limited offset range for symbol relocations to guarantee module | 1528 | * limited offset range for symbol relocations to guarantee module |
1425 | * percpu symbols fall inside the relocatable range. | 1529 | * percpu symbols fall inside the relocatable range. |
1426 | * | 1530 | * |
1427 | * @dyn_size determines the number of bytes available for dynamic | 1531 | * @ai->dyn_size determines the number of bytes available for dynamic |
1428 | * allocation in the first chunk. The area between @static_size + | 1532 | * allocation in the first chunk. The area between @ai->static_size + |
1429 | * @reserved_size + @dyn_size and @unit_size is unused. | 1533 | * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused. |
1430 | * | 1534 | * |
1431 | * @unit_size specifies unit size and must be aligned to PAGE_SIZE and | 1535 | * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE |
1432 | * equal to or larger than @static_size + @reserved_size + if | 1536 | * and equal to or larger than @ai->static_size + @ai->reserved_size + |
1433 | * non-negative, @dyn_size. | 1537 | * @ai->dyn_size. |
1538 | * | ||
1539 | * @ai->atom_size is the allocation atom size and used as alignment | ||
1540 | * for vm areas. | ||
1541 | * | ||
1542 | * @ai->alloc_size is the allocation size and always multiple of | ||
1543 | * @ai->atom_size. This is larger than @ai->atom_size if | ||
1544 | * @ai->unit_size is larger than @ai->atom_size. | ||
1545 | * | ||
1546 | * @ai->nr_groups and @ai->groups describe virtual memory layout of | ||
1547 | * percpu areas. Units which should be colocated are put into the | ||
1548 | * same group. Dynamic VM areas will be allocated according to these | ||
1549 | * groupings. If @ai->nr_groups is zero, a single group containing | ||
1550 | * all units is assumed. | ||
1434 | * | 1551 | * |
1435 | * The caller should have mapped the first chunk at @base_addr and | 1552 | * The caller should have mapped the first chunk at @base_addr and |
1436 | * copied static data to each unit. | 1553 | * copied static data to each unit. |
@@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | |||
1446 | * The determined pcpu_unit_size which can be used to initialize | 1563 | * The determined pcpu_unit_size which can be used to initialize |
1447 | * percpu access. | 1564 | * percpu access. |
1448 | */ | 1565 | */ |
1449 | size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | 1566 | size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
1450 | size_t dyn_size, size_t unit_size, | 1567 | void *base_addr) |
1451 | void *base_addr, const int *unit_map) | ||
1452 | { | 1568 | { |
1453 | static struct vm_struct first_vm; | 1569 | static struct vm_struct first_vm; |
1454 | static int smap[2], dmap[2]; | 1570 | static int smap[2], dmap[2]; |
1455 | size_t size_sum = static_size + reserved_size + dyn_size; | 1571 | size_t dyn_size = ai->dyn_size; |
1572 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; | ||
1456 | struct pcpu_chunk *schunk, *dchunk = NULL; | 1573 | struct pcpu_chunk *schunk, *dchunk = NULL; |
1457 | unsigned int cpu, tcpu; | 1574 | unsigned int cpu; |
1458 | int i; | 1575 | int *unit_map; |
1576 | int group, unit, i; | ||
1459 | 1577 | ||
1460 | /* sanity checks */ | 1578 | /* sanity checks */ |
1461 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | 1579 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
1462 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | 1580 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); |
1463 | BUG_ON(!static_size); | 1581 | BUG_ON(ai->nr_groups <= 0); |
1582 | BUG_ON(!ai->static_size); | ||
1464 | BUG_ON(!base_addr); | 1583 | BUG_ON(!base_addr); |
1465 | BUG_ON(unit_size < size_sum); | 1584 | BUG_ON(ai->unit_size < size_sum); |
1466 | BUG_ON(unit_size & ~PAGE_MASK); | 1585 | BUG_ON(ai->unit_size & ~PAGE_MASK); |
1467 | BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); | 1586 | BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
1587 | |||
1588 | pcpu_dump_alloc_info(KERN_DEBUG, ai); | ||
1468 | 1589 | ||
1469 | /* determine number of units and verify and initialize pcpu_unit_map */ | 1590 | /* determine number of units and verify and initialize pcpu_unit_map */ |
1470 | if (unit_map) { | 1591 | unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); |
1471 | int first_unit = INT_MAX, last_unit = INT_MIN; | ||
1472 | |||
1473 | for_each_possible_cpu(cpu) { | ||
1474 | int unit = unit_map[cpu]; | ||
1475 | |||
1476 | BUG_ON(unit < 0); | ||
1477 | for_each_possible_cpu(tcpu) { | ||
1478 | if (tcpu == cpu) | ||
1479 | break; | ||
1480 | /* the mapping should be one-to-one */ | ||
1481 | BUG_ON(unit_map[tcpu] == unit); | ||
1482 | } | ||
1483 | 1592 | ||
1484 | if (unit < first_unit) { | 1593 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
1485 | pcpu_first_unit_cpu = cpu; | 1594 | unit_map[cpu] = NR_CPUS; |
1486 | first_unit = unit; | 1595 | pcpu_first_unit_cpu = NR_CPUS; |
1487 | } | ||
1488 | if (unit > last_unit) { | ||
1489 | pcpu_last_unit_cpu = cpu; | ||
1490 | last_unit = unit; | ||
1491 | } | ||
1492 | } | ||
1493 | pcpu_nr_units = last_unit + 1; | ||
1494 | pcpu_unit_map = unit_map; | ||
1495 | } else { | ||
1496 | int *identity_map; | ||
1497 | 1596 | ||
1498 | /* #units == #cpus, identity mapped */ | 1597 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
1499 | identity_map = alloc_bootmem(nr_cpu_ids * | 1598 | const struct pcpu_group_info *gi = &ai->groups[group]; |
1500 | sizeof(identity_map[0])); | ||
1501 | 1599 | ||
1502 | for_each_possible_cpu(cpu) | 1600 | for (i = 0; i < gi->nr_units; i++) { |
1503 | identity_map[cpu] = cpu; | 1601 | cpu = gi->cpu_map[i]; |
1602 | if (cpu == NR_CPUS) | ||
1603 | continue; | ||
1504 | 1604 | ||
1505 | pcpu_first_unit_cpu = 0; | 1605 | BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); |
1506 | pcpu_last_unit_cpu = pcpu_nr_units - 1; | 1606 | BUG_ON(unit_map[cpu] != NR_CPUS); |
1507 | pcpu_nr_units = nr_cpu_ids; | 1607 | |
1508 | pcpu_unit_map = identity_map; | 1608 | unit_map[cpu] = unit + i; |
1609 | if (pcpu_first_unit_cpu == NR_CPUS) | ||
1610 | pcpu_first_unit_cpu = cpu; | ||
1611 | } | ||
1509 | } | 1612 | } |
1613 | pcpu_last_unit_cpu = cpu; | ||
1614 | pcpu_nr_units = unit; | ||
1615 | |||
1616 | for_each_possible_cpu(cpu) | ||
1617 | BUG_ON(unit_map[cpu] == NR_CPUS); | ||
1618 | |||
1619 | pcpu_unit_map = unit_map; | ||
1510 | 1620 | ||
1511 | /* determine basic parameters */ | 1621 | /* determine basic parameters */ |
1512 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | 1622 | pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; |
1513 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 1623 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
1514 | pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; | 1624 | pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; |
1515 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + | 1625 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + |
@@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | |||
1543 | schunk->immutable = true; | 1653 | schunk->immutable = true; |
1544 | bitmap_fill(schunk->populated, pcpu_unit_pages); | 1654 | bitmap_fill(schunk->populated, pcpu_unit_pages); |
1545 | 1655 | ||
1546 | if (reserved_size) { | 1656 | if (ai->reserved_size) { |
1547 | schunk->free_size = reserved_size; | 1657 | schunk->free_size = ai->reserved_size; |
1548 | pcpu_reserved_chunk = schunk; | 1658 | pcpu_reserved_chunk = schunk; |
1549 | pcpu_reserved_chunk_limit = static_size + reserved_size; | 1659 | pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size; |
1550 | } else { | 1660 | } else { |
1551 | schunk->free_size = dyn_size; | 1661 | schunk->free_size = dyn_size; |
1552 | dyn_size = 0; /* dynamic area covered */ | 1662 | dyn_size = 0; /* dynamic area covered */ |
1553 | } | 1663 | } |
1554 | schunk->contig_hint = schunk->free_size; | 1664 | schunk->contig_hint = schunk->free_size; |
1555 | 1665 | ||
1556 | schunk->map[schunk->map_used++] = -static_size; | 1666 | schunk->map[schunk->map_used++] = -ai->static_size; |
1557 | if (schunk->free_size) | 1667 | if (schunk->free_size) |
1558 | schunk->map[schunk->map_used++] = schunk->free_size; | 1668 | schunk->map[schunk->map_used++] = schunk->free_size; |
1559 | 1669 | ||
@@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
1643 | */ | 1753 | */ |
1644 | ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) | 1754 | ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) |
1645 | { | 1755 | { |
1646 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1756 | struct pcpu_alloc_info *ai; |
1647 | size_t size_sum, unit_size, chunk_size; | 1757 | size_t size_sum, chunk_size; |
1648 | void *base; | 1758 | void *base; |
1649 | unsigned int cpu; | 1759 | int unit; |
1760 | ssize_t ret; | ||
1650 | 1761 | ||
1651 | /* determine parameters and allocate */ | 1762 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); |
1652 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1763 | if (IS_ERR(ai)) |
1764 | return PTR_ERR(ai); | ||
1765 | BUG_ON(ai->nr_groups != 1); | ||
1766 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | ||
1653 | 1767 | ||
1654 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1768 | size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; |
1655 | chunk_size = unit_size * nr_cpu_ids; | 1769 | chunk_size = ai->unit_size * num_possible_cpus(); |
1656 | 1770 | ||
1657 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1771 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1658 | __pa(MAX_DMA_ADDRESS)); | 1772 | __pa(MAX_DMA_ADDRESS)); |
1659 | if (!base) { | 1773 | if (!base) { |
1660 | pr_warning("PERCPU: failed to allocate %zu bytes for " | 1774 | pr_warning("PERCPU: failed to allocate %zu bytes for " |
1661 | "embedding\n", chunk_size); | 1775 | "embedding\n", chunk_size); |
1662 | return -ENOMEM; | 1776 | ret = -ENOMEM; |
1777 | goto out_free_ai; | ||
1663 | } | 1778 | } |
1664 | 1779 | ||
1665 | /* return the leftover and copy */ | 1780 | /* return the leftover and copy */ |
1666 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { | 1781 | for (unit = 0; unit < num_possible_cpus(); unit++) { |
1667 | void *ptr = base + cpu * unit_size; | 1782 | void *ptr = base + unit * ai->unit_size; |
1668 | 1783 | ||
1669 | if (cpu_possible(cpu)) { | 1784 | free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); |
1670 | free_bootmem(__pa(ptr + size_sum), | 1785 | memcpy(ptr, __per_cpu_load, ai->static_size); |
1671 | unit_size - size_sum); | ||
1672 | memcpy(ptr, __per_cpu_load, static_size); | ||
1673 | } else | ||
1674 | free_bootmem(__pa(ptr), unit_size); | ||
1675 | } | 1786 | } |
1676 | 1787 | ||
1677 | /* we're ready, commit */ | 1788 | /* we're ready, commit */ |
1678 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 1789 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |
1679 | PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size, | 1790 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, |
1680 | unit_size); | 1791 | ai->dyn_size, ai->unit_size); |
1681 | 1792 | ||
1682 | return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 1793 | ret = pcpu_setup_first_chunk(ai, base); |
1683 | unit_size, base, NULL); | 1794 | out_free_ai: |
1795 | pcpu_free_alloc_info(ai); | ||
1796 | return ret; | ||
1684 | } | 1797 | } |
1685 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | 1798 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || |
1686 | !CONFIG_HAVE_SETUP_PER_CPU_AREA */ | 1799 | !CONFIG_HAVE_SETUP_PER_CPU_AREA */ |
@@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
1709 | pcpu_fc_populate_pte_fn_t populate_pte_fn) | 1822 | pcpu_fc_populate_pte_fn_t populate_pte_fn) |
1710 | { | 1823 | { |
1711 | static struct vm_struct vm; | 1824 | static struct vm_struct vm; |
1712 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1825 | struct pcpu_alloc_info *ai; |
1713 | ssize_t dyn_size = -1; | ||
1714 | size_t size_sum, unit_size; | ||
1715 | char psize_str[16]; | 1826 | char psize_str[16]; |
1716 | int unit_pages; | 1827 | int unit_pages; |
1717 | size_t pages_size; | 1828 | size_t pages_size; |
1718 | struct page **pages; | 1829 | struct page **pages; |
1719 | unsigned int cpu; | 1830 | int unit, i, j; |
1720 | int i, j; | ||
1721 | ssize_t ret; | 1831 | ssize_t ret; |
1722 | 1832 | ||
1723 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); | 1833 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); |
1724 | 1834 | ||
1725 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1835 | ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); |
1726 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1836 | if (IS_ERR(ai)) |
1727 | unit_pages = unit_size >> PAGE_SHIFT; | 1837 | return PTR_ERR(ai); |
1838 | BUG_ON(ai->nr_groups != 1); | ||
1839 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | ||
1840 | |||
1841 | unit_pages = ai->unit_size >> PAGE_SHIFT; | ||
1728 | 1842 | ||
1729 | /* unaligned allocations can't be freed, round up to page size */ | 1843 | /* unaligned allocations can't be freed, round up to page size */ |
1730 | pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); | 1844 | pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * |
1845 | sizeof(pages[0])); | ||
1731 | pages = alloc_bootmem(pages_size); | 1846 | pages = alloc_bootmem(pages_size); |
1732 | 1847 | ||
1733 | /* allocate pages */ | 1848 | /* allocate pages */ |
1734 | j = 0; | 1849 | j = 0; |
1735 | for_each_possible_cpu(cpu) | 1850 | for (unit = 0; unit < num_possible_cpus(); unit++) |
1736 | for (i = 0; i < unit_pages; i++) { | 1851 | for (i = 0; i < unit_pages; i++) { |
1852 | unsigned int cpu = ai->groups[0].cpu_map[unit]; | ||
1737 | void *ptr; | 1853 | void *ptr; |
1738 | 1854 | ||
1739 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); | 1855 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); |
@@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
1747 | 1863 | ||
1748 | /* allocate vm area, map the pages and copy static data */ | 1864 | /* allocate vm area, map the pages and copy static data */ |
1749 | vm.flags = VM_ALLOC; | 1865 | vm.flags = VM_ALLOC; |
1750 | vm.size = nr_cpu_ids * unit_size; | 1866 | vm.size = num_possible_cpus() * ai->unit_size; |
1751 | vm_area_register_early(&vm, PAGE_SIZE); | 1867 | vm_area_register_early(&vm, PAGE_SIZE); |
1752 | 1868 | ||
1753 | for_each_possible_cpu(cpu) { | 1869 | for (unit = 0; unit < num_possible_cpus(); unit++) { |
1754 | unsigned long unit_addr = | 1870 | unsigned long unit_addr = |
1755 | (unsigned long)vm.addr + cpu * unit_size; | 1871 | (unsigned long)vm.addr + unit * ai->unit_size; |
1756 | 1872 | ||
1757 | for (i = 0; i < unit_pages; i++) | 1873 | for (i = 0; i < unit_pages; i++) |
1758 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); | 1874 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); |
1759 | 1875 | ||
1760 | /* pte already populated, the following shouldn't fail */ | 1876 | /* pte already populated, the following shouldn't fail */ |
1761 | ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], | 1877 | ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], |
1762 | unit_pages); | 1878 | unit_pages); |
1763 | if (ret < 0) | 1879 | if (ret < 0) |
1764 | panic("failed to map percpu area, err=%zd\n", ret); | 1880 | panic("failed to map percpu area, err=%zd\n", ret); |
@@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
1772 | */ | 1888 | */ |
1773 | 1889 | ||
1774 | /* copy static data */ | 1890 | /* copy static data */ |
1775 | memcpy((void *)unit_addr, __per_cpu_load, static_size); | 1891 | memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); |
1776 | } | 1892 | } |
1777 | 1893 | ||
1778 | /* we're ready, commit */ | 1894 | /* we're ready, commit */ |
1779 | pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", | 1895 | pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", |
1780 | unit_pages, psize_str, vm.addr, static_size, reserved_size, | 1896 | unit_pages, psize_str, vm.addr, ai->static_size, |
1781 | dyn_size); | 1897 | ai->reserved_size, ai->dyn_size); |
1782 | 1898 | ||
1783 | ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 1899 | ret = pcpu_setup_first_chunk(ai, vm.addr); |
1784 | unit_size, vm.addr, NULL); | ||
1785 | goto out_free_ar; | 1900 | goto out_free_ar; |
1786 | 1901 | ||
1787 | enomem: | 1902 | enomem: |
@@ -1790,6 +1905,7 @@ enomem: | |||
1790 | ret = -ENOMEM; | 1905 | ret = -ENOMEM; |
1791 | out_free_ar: | 1906 | out_free_ar: |
1792 | free_bootmem(__pa(pages), pages_size); | 1907 | free_bootmem(__pa(pages), pages_size); |
1908 | pcpu_free_alloc_info(ai); | ||
1793 | return ret; | 1909 | return ret; |
1794 | } | 1910 | } |
1795 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1911 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
@@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size; | |||
1805 | static int pcpul_nr_lpages; | 1921 | static int pcpul_nr_lpages; |
1806 | static struct pcpul_ent *pcpul_map; | 1922 | static struct pcpul_ent *pcpul_map; |
1807 | 1923 | ||
1808 | static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | 1924 | static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, |
1809 | unsigned int *cpup) | 1925 | unsigned int *cpup) |
1810 | { | 1926 | { |
1811 | unsigned int cpu; | 1927 | int group, cunit; |
1812 | 1928 | ||
1813 | for_each_possible_cpu(cpu) | 1929 | for (group = 0, cunit = 0; group < ai->nr_groups; group++) { |
1814 | if (unit_map[cpu] == unit) { | 1930 | const struct pcpu_group_info *gi = &ai->groups[group]; |
1931 | |||
1932 | if (unit < cunit + gi->nr_units) { | ||
1815 | if (cpup) | 1933 | if (cpup) |
1816 | *cpup = cpu; | 1934 | *cpup = gi->cpu_map[unit - cunit]; |
1817 | return true; | 1935 | return true; |
1818 | } | 1936 | } |
1937 | cunit += gi->nr_units; | ||
1938 | } | ||
1819 | 1939 | ||
1820 | return false; | 1940 | return false; |
1821 | } | 1941 | } |
1822 | 1942 | ||
1943 | static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) | ||
1944 | { | ||
1945 | int group, unit, i; | ||
1946 | |||
1947 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | ||
1948 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
1949 | |||
1950 | for (i = 0; i < gi->nr_units; i++) | ||
1951 | if (gi->cpu_map[i] == cpu) | ||
1952 | return unit + i; | ||
1953 | } | ||
1954 | BUG(); | ||
1955 | } | ||
1956 | |||
1823 | /** | 1957 | /** |
1824 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | 1958 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page |
1825 | * @reserved_size: the size of reserved percpu area in bytes | 1959 | * @ai: pcpu_alloc_info |
1826 | * @dyn_size: free size for dynamic allocation in bytes | ||
1827 | * @unit_size: unit size in bytes | ||
1828 | * @lpage_size: the size of a large page | ||
1829 | * @unit_map: cpu -> unit mapping | ||
1830 | * @nr_units: the number of units | ||
1831 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size | 1960 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size |
1832 | * @free_fn: function to free percpu memory, @size <= lpage_size | 1961 | * @free_fn: function to free percpu memory, @size <= lpage_size |
1833 | * @map_fn: function to map percpu lpage, always called with lpage_size | 1962 | * @map_fn: function to map percpu lpage, always called with lpage_size |
1834 | * | 1963 | * |
1835 | * This allocator uses large page to build and map the first chunk. | 1964 | * This allocator uses large page to build and map the first chunk. |
1836 | * Unlike other helpers, the caller should always specify @dyn_size | 1965 | * Unlike other helpers, the caller should provide fully initialized |
1837 | * and @unit_size. These parameters along with @unit_map and | 1966 | * @ai. This can be done using pcpu_build_alloc_info(). This two |
1838 | * @nr_units can be determined using pcpu_lpage_build_unit_map(). | 1967 | * stage initialization is to allow arch code to evaluate the |
1839 | * This two stage initialization is to allow arch code to evaluate the | ||
1840 | * parameters before committing to it. | 1968 | * parameters before committing to it. |
1841 | * | 1969 | * |
1842 | * Large pages are allocated as directed by @unit_map and other | 1970 | * Large pages are allocated as directed by @unit_map and other |
@@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | |||
1852 | * The determined pcpu_unit_size which can be used to initialize | 1980 | * The determined pcpu_unit_size which can be used to initialize |
1853 | * percpu access on success, -errno on failure. | 1981 | * percpu access on success, -errno on failure. |
1854 | */ | 1982 | */ |
1855 | ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | 1983 | ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, |
1856 | size_t unit_size, size_t lpage_size, | ||
1857 | const int *unit_map, int nr_units, | ||
1858 | pcpu_fc_alloc_fn_t alloc_fn, | 1984 | pcpu_fc_alloc_fn_t alloc_fn, |
1859 | pcpu_fc_free_fn_t free_fn, | 1985 | pcpu_fc_free_fn_t free_fn, |
1860 | pcpu_fc_map_fn_t map_fn) | 1986 | pcpu_fc_map_fn_t map_fn) |
1861 | { | 1987 | { |
1862 | static struct vm_struct vm; | 1988 | static struct vm_struct vm; |
1863 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1989 | const size_t lpage_size = ai->atom_size; |
1864 | size_t chunk_size = unit_size * nr_units; | 1990 | size_t chunk_size, map_size; |
1865 | size_t map_size; | ||
1866 | unsigned int cpu; | 1991 | unsigned int cpu; |
1867 | ssize_t ret; | 1992 | ssize_t ret; |
1868 | int i, j, unit; | 1993 | int i, j, unit, nr_units; |
1869 | 1994 | ||
1870 | pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, | 1995 | nr_units = 0; |
1871 | unit_size, lpage_size, unit_map, nr_units); | 1996 | for (i = 0; i < ai->nr_groups; i++) |
1997 | nr_units += ai->groups[i].nr_units; | ||
1872 | 1998 | ||
1999 | chunk_size = ai->unit_size * nr_units; | ||
1873 | BUG_ON(chunk_size % lpage_size); | 2000 | BUG_ON(chunk_size % lpage_size); |
1874 | 2001 | ||
1875 | pcpul_size = static_size + reserved_size + dyn_size; | 2002 | pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; |
1876 | pcpul_lpage_size = lpage_size; | 2003 | pcpul_lpage_size = lpage_size; |
1877 | pcpul_nr_lpages = chunk_size / lpage_size; | 2004 | pcpul_nr_lpages = chunk_size / lpage_size; |
1878 | 2005 | ||
@@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1883 | /* allocate all pages */ | 2010 | /* allocate all pages */ |
1884 | for (i = 0; i < pcpul_nr_lpages; i++) { | 2011 | for (i = 0; i < pcpul_nr_lpages; i++) { |
1885 | size_t offset = i * lpage_size; | 2012 | size_t offset = i * lpage_size; |
1886 | int first_unit = offset / unit_size; | 2013 | int first_unit = offset / ai->unit_size; |
1887 | int last_unit = (offset + lpage_size - 1) / unit_size; | 2014 | int last_unit = (offset + lpage_size - 1) / ai->unit_size; |
1888 | void *ptr; | 2015 | void *ptr; |
1889 | 2016 | ||
1890 | /* find out which cpu is mapped to this unit */ | 2017 | /* find out which cpu is mapped to this unit */ |
1891 | for (unit = first_unit; unit <= last_unit; unit++) | 2018 | for (unit = first_unit; unit <= last_unit; unit++) |
1892 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | 2019 | if (pcpul_unit_to_cpu(unit, ai, &cpu)) |
1893 | goto found; | 2020 | goto found; |
1894 | continue; | 2021 | continue; |
1895 | found: | 2022 | found: |
@@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1905 | 2032 | ||
1906 | /* return unused holes */ | 2033 | /* return unused holes */ |
1907 | for (unit = 0; unit < nr_units; unit++) { | 2034 | for (unit = 0; unit < nr_units; unit++) { |
1908 | size_t start = unit * unit_size; | 2035 | size_t start = unit * ai->unit_size; |
1909 | size_t end = start + unit_size; | 2036 | size_t end = start + ai->unit_size; |
1910 | size_t off, next; | 2037 | size_t off, next; |
1911 | 2038 | ||
1912 | /* don't free used part of occupied unit */ | 2039 | /* don't free used part of occupied unit */ |
1913 | if (pcpul_unit_to_cpu(unit, unit_map, NULL)) | 2040 | if (pcpul_unit_to_cpu(unit, ai, NULL)) |
1914 | start += pcpul_size; | 2041 | start += pcpul_size; |
1915 | 2042 | ||
1916 | /* unit can span more than one page, punch the holes */ | 2043 | /* unit can span more than one page, punch the holes */ |
@@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1925 | /* allocate address, map and copy */ | 2052 | /* allocate address, map and copy */ |
1926 | vm.flags = VM_ALLOC; | 2053 | vm.flags = VM_ALLOC; |
1927 | vm.size = chunk_size; | 2054 | vm.size = chunk_size; |
1928 | vm_area_register_early(&vm, unit_size); | 2055 | vm_area_register_early(&vm, ai->unit_size); |
1929 | 2056 | ||
1930 | for (i = 0; i < pcpul_nr_lpages; i++) { | 2057 | for (i = 0; i < pcpul_nr_lpages; i++) { |
1931 | if (!pcpul_map[i].ptr) | 2058 | if (!pcpul_map[i].ptr) |
@@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1935 | } | 2062 | } |
1936 | 2063 | ||
1937 | for_each_possible_cpu(cpu) | 2064 | for_each_possible_cpu(cpu) |
1938 | memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, | 2065 | memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, |
1939 | static_size); | 2066 | __per_cpu_load, ai->static_size); |
1940 | 2067 | ||
1941 | /* we're ready, commit */ | 2068 | /* we're ready, commit */ |
1942 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", | 2069 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", |
1943 | vm.addr, static_size, reserved_size, dyn_size, unit_size); | 2070 | vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, |
2071 | ai->unit_size); | ||
1944 | 2072 | ||
1945 | ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 2073 | ret = pcpu_setup_first_chunk(ai, vm.addr); |
1946 | unit_size, vm.addr, unit_map); | ||
1947 | 2074 | ||
1948 | /* | 2075 | /* |
1949 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped | 2076 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped |