diff options
| -rw-r--r-- | arch/sparc/kernel/smp_64.c | 24 | ||||
| -rw-r--r-- | arch/x86/kernel/setup_percpu.c | 38 | ||||
| -rw-r--r-- | include/linux/percpu.h | 42 | ||||
| -rw-r--r-- | mm/percpu.c | 529 |
4 files changed, 389 insertions, 244 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9856d866b77b..a42a4a744d14 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
| @@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end, | |||
| 1475 | 1475 | ||
| 1476 | void __init setup_per_cpu_areas(void) | 1476 | void __init setup_per_cpu_areas(void) |
| 1477 | { | 1477 | { |
| 1478 | size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; | ||
| 1479 | static struct vm_struct vm; | 1478 | static struct vm_struct vm; |
| 1479 | struct pcpu_alloc_info *ai; | ||
| 1480 | unsigned long delta, cpu; | 1480 | unsigned long delta, cpu; |
| 1481 | size_t size_sum, pcpu_unit_size; | 1481 | size_t size_sum, pcpu_unit_size; |
| 1482 | size_t ptrs_size; | 1482 | size_t ptrs_size; |
| 1483 | void **ptrs; | 1483 | void **ptrs; |
| 1484 | 1484 | ||
| 1485 | size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 1485 | ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); |
| 1486 | |||
| 1487 | ai->static_size = __per_cpu_end - __per_cpu_start; | ||
| 1488 | ai->reserved_size = PERCPU_MODULE_RESERVE; | ||
| 1489 | |||
| 1490 | size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size + | ||
| 1486 | PERCPU_DYNAMIC_RESERVE); | 1491 | PERCPU_DYNAMIC_RESERVE); |
| 1487 | dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; | ||
| 1488 | 1492 | ||
| 1493 | ai->dyn_size = size_sum - ai->static_size - ai->reserved_size; | ||
| 1494 | ai->unit_size = PCPU_CHUNK_SIZE; | ||
| 1495 | ai->atom_size = PCPU_CHUNK_SIZE; | ||
| 1496 | ai->alloc_size = PCPU_CHUNK_SIZE; | ||
| 1497 | ai->groups[0].nr_units = nr_cpu_ids; | ||
| 1498 | |||
| 1499 | for_each_possible_cpu(cpu) | ||
| 1500 | ai->groups[0].cpu_map[cpu] = cpu; | ||
| 1489 | 1501 | ||
| 1490 | ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); | 1502 | ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); |
| 1491 | ptrs = alloc_bootmem(ptrs_size); | 1503 | ptrs = alloc_bootmem(ptrs_size); |
| @@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void) | |||
| 1497 | free_bootmem(__pa(ptrs[cpu] + size_sum), | 1509 | free_bootmem(__pa(ptrs[cpu] + size_sum), |
| 1498 | PCPU_CHUNK_SIZE - size_sum); | 1510 | PCPU_CHUNK_SIZE - size_sum); |
| 1499 | 1511 | ||
| 1500 | memcpy(ptrs[cpu], __per_cpu_load, static_size); | 1512 | memcpy(ptrs[cpu], __per_cpu_load, ai->static_size); |
| 1501 | } | 1513 | } |
| 1502 | 1514 | ||
| 1503 | /* allocate address and map */ | 1515 | /* allocate address and map */ |
| @@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void) | |||
| 1514 | pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); | 1526 | pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); |
| 1515 | } | 1527 | } |
| 1516 | 1528 | ||
| 1517 | pcpu_unit_size = pcpu_setup_first_chunk(static_size, | 1529 | pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); |
| 1518 | PERCPU_MODULE_RESERVE, dyn_size, | ||
| 1519 | PCPU_CHUNK_SIZE, vm.addr, NULL); | ||
| 1520 | 1530 | ||
| 1521 | free_bootmem(__pa(ptrs), ptrs_size); | 1531 | free_bootmem(__pa(ptrs), ptrs_size); |
| 1522 | 1532 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 660cde133141..db5f9c49fec5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
| 161 | { | 161 | { |
| 162 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 162 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
| 163 | size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; | 163 | size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; |
| 164 | size_t unit_map_size, unit_size; | 164 | struct pcpu_alloc_info *ai; |
| 165 | int *unit_map; | ||
| 166 | int nr_units; | ||
| 167 | ssize_t ret; | 165 | ssize_t ret; |
| 168 | 166 | ||
| 169 | /* on non-NUMA, embedding is better */ | 167 | /* on non-NUMA, embedding is better */ |
| @@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
| 177 | } | 175 | } |
| 178 | 176 | ||
| 179 | /* allocate and build unit_map */ | 177 | /* allocate and build unit_map */ |
| 180 | unit_map_size = nr_cpu_ids * sizeof(int); | 178 | ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, |
| 181 | unit_map = alloc_bootmem_nopanic(unit_map_size); | 179 | PMD_SIZE, pcpu_lpage_cpu_distance); |
| 182 | if (!unit_map) { | 180 | if (IS_ERR(ai)) { |
| 183 | pr_warning("PERCPU: failed to allocate unit_map\n"); | 181 | pr_warning("PERCPU: failed to build unit_map (%ld)\n", |
| 184 | return -ENOMEM; | 182 | PTR_ERR(ai)); |
| 183 | return PTR_ERR(ai); | ||
| 185 | } | 184 | } |
| 186 | 185 | ||
| 187 | ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, | ||
| 188 | &dyn_size, &unit_size, PMD_SIZE, | ||
| 189 | unit_map, pcpu_lpage_cpu_distance); | ||
| 190 | if (ret < 0) { | ||
| 191 | pr_warning("PERCPU: failed to build unit_map\n"); | ||
| 192 | goto out_free; | ||
| 193 | } | ||
| 194 | nr_units = ret; | ||
| 195 | |||
| 196 | /* do the parameters look okay? */ | 186 | /* do the parameters look okay? */ |
| 197 | if (!chosen) { | 187 | if (!chosen) { |
| 198 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 188 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
| 199 | size_t tot_size = nr_units * unit_size; | 189 | size_t tot_size = 0; |
| 190 | int group; | ||
| 191 | |||
| 192 | for (group = 0; group < ai->nr_groups; group++) | ||
| 193 | tot_size += ai->unit_size * ai->groups[group].nr_units; | ||
| 200 | 194 | ||
| 201 | /* don't consume more than 20% of vmalloc area */ | 195 | /* don't consume more than 20% of vmalloc area */ |
| 202 | if (tot_size > vm_size / 5) { | 196 | if (tot_size > vm_size / 5) { |
| @@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) | |||
| 207 | } | 201 | } |
| 208 | } | 202 | } |
| 209 | 203 | ||
| 210 | ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | 204 | ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, |
| 211 | unit_size, PMD_SIZE, unit_map, nr_units, | 205 | pcpul_map); |
| 212 | pcpu_fc_alloc, pcpu_fc_free, pcpul_map); | ||
| 213 | out_free: | 206 | out_free: |
| 214 | if (ret < 0) | 207 | pcpu_free_alloc_info(ai); |
| 215 | free_bootmem(__pa(unit_map), unit_map_size); | ||
| 216 | return ret; | 208 | return ret; |
| 217 | } | 209 | } |
| 218 | #else | 210 | #else |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 570fb18de2ba..77b86be8ce4f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
| @@ -59,6 +59,25 @@ | |||
| 59 | extern void *pcpu_base_addr; | 59 | extern void *pcpu_base_addr; |
| 60 | extern const int *pcpu_unit_map; | 60 | extern const int *pcpu_unit_map; |
| 61 | 61 | ||
| 62 | struct pcpu_group_info { | ||
| 63 | int nr_units; /* aligned # of units */ | ||
| 64 | unsigned long base_offset; /* base address offset */ | ||
| 65 | unsigned int *cpu_map; /* unit->cpu map, empty | ||
| 66 | * entries contain NR_CPUS */ | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct pcpu_alloc_info { | ||
| 70 | size_t static_size; | ||
| 71 | size_t reserved_size; | ||
| 72 | size_t dyn_size; | ||
| 73 | size_t unit_size; | ||
| 74 | size_t atom_size; | ||
| 75 | size_t alloc_size; | ||
| 76 | size_t __ai_size; /* internal, don't use */ | ||
| 77 | int nr_groups; /* 0 if grouping unnecessary */ | ||
| 78 | struct pcpu_group_info groups[]; | ||
| 79 | }; | ||
| 80 | |||
| 62 | enum pcpu_fc { | 81 | enum pcpu_fc { |
| 63 | PCPU_FC_AUTO, | 82 | PCPU_FC_AUTO, |
| 64 | PCPU_FC_EMBED, | 83 | PCPU_FC_EMBED, |
| @@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | |||
| 78 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); | 97 | typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); |
| 79 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | 98 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); |
| 80 | 99 | ||
| 81 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | 100 | extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, |
| 82 | extern int __init pcpu_lpage_build_unit_map( | 101 | int nr_units); |
| 83 | size_t reserved_size, ssize_t *dyn_sizep, | 102 | extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); |
| 84 | size_t *unit_sizep, size_t lpage_size, | 103 | |
| 85 | int *unit_map, | 104 | extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
| 105 | size_t reserved_size, ssize_t dyn_size, | ||
| 106 | size_t atom_size, | ||
| 86 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); | 107 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn); |
| 87 | #endif | ||
| 88 | 108 | ||
| 89 | extern size_t __init pcpu_setup_first_chunk( | 109 | extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
| 90 | size_t static_size, size_t reserved_size, | 110 | void *base_addr); |
| 91 | size_t dyn_size, size_t unit_size, | ||
| 92 | void *base_addr, const int *unit_map); | ||
| 93 | 111 | ||
| 94 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK | 112 | #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK |
| 95 | extern ssize_t __init pcpu_embed_first_chunk( | 113 | extern ssize_t __init pcpu_embed_first_chunk( |
| @@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk( | |||
| 106 | 124 | ||
| 107 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | 125 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK |
| 108 | extern ssize_t __init pcpu_lpage_first_chunk( | 126 | extern ssize_t __init pcpu_lpage_first_chunk( |
| 109 | size_t reserved_size, size_t dyn_size, | 127 | const struct pcpu_alloc_info *ai, |
| 110 | size_t unit_size, size_t lpage_size, | ||
| 111 | const int *unit_map, int nr_units, | ||
| 112 | pcpu_fc_alloc_fn_t alloc_fn, | 128 | pcpu_fc_alloc_fn_t alloc_fn, |
| 113 | pcpu_fc_free_fn_t free_fn, | 129 | pcpu_fc_free_fn_t free_fn, |
| 114 | pcpu_fc_map_fn_t map_fn); | 130 | pcpu_fc_map_fn_t map_fn); |
diff --git a/mm/percpu.c b/mm/percpu.c index 2b9c4b2a2fc0..99f7fa682722 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | 58 | ||
| 59 | #include <linux/bitmap.h> | 59 | #include <linux/bitmap.h> |
| 60 | #include <linux/bootmem.h> | 60 | #include <linux/bootmem.h> |
| 61 | #include <linux/err.h> | ||
| 61 | #include <linux/list.h> | 62 | #include <linux/list.h> |
| 62 | #include <linux/log2.h> | 63 | #include <linux/log2.h> |
| 63 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
| @@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, | |||
| 1245 | return size_sum; | 1246 | return size_sum; |
| 1246 | } | 1247 | } |
| 1247 | 1248 | ||
| 1248 | #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK | ||
| 1249 | /** | 1249 | /** |
| 1250 | * pcpu_lpage_build_unit_map - build unit_map for large page remapping | 1250 | * pcpu_alloc_alloc_info - allocate percpu allocation info |
| 1251 | * @nr_groups: the number of groups | ||
| 1252 | * @nr_units: the number of units | ||
| 1253 | * | ||
| 1254 | * Allocate ai which is large enough for @nr_groups groups containing | ||
| 1255 | * @nr_units units. The returned ai's groups[0].cpu_map points to the | ||
| 1256 | * cpu_map array which is long enough for @nr_units and filled with | ||
| 1257 | * NR_CPUS. It's the caller's responsibility to initialize cpu_map | ||
| 1258 | * pointer of other groups. | ||
| 1259 | * | ||
| 1260 | * RETURNS: | ||
| 1261 | * Pointer to the allocated pcpu_alloc_info on success, NULL on | ||
| 1262 | * failure. | ||
| 1263 | */ | ||
| 1264 | struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, | ||
| 1265 | int nr_units) | ||
| 1266 | { | ||
| 1267 | struct pcpu_alloc_info *ai; | ||
| 1268 | size_t base_size, ai_size; | ||
| 1269 | void *ptr; | ||
| 1270 | int unit; | ||
| 1271 | |||
| 1272 | base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]), | ||
| 1273 | __alignof__(ai->groups[0].cpu_map[0])); | ||
| 1274 | ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); | ||
| 1275 | |||
| 1276 | ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size)); | ||
| 1277 | if (!ptr) | ||
| 1278 | return NULL; | ||
| 1279 | ai = ptr; | ||
| 1280 | ptr += base_size; | ||
| 1281 | |||
| 1282 | ai->groups[0].cpu_map = ptr; | ||
| 1283 | |||
| 1284 | for (unit = 0; unit < nr_units; unit++) | ||
| 1285 | ai->groups[0].cpu_map[unit] = NR_CPUS; | ||
| 1286 | |||
| 1287 | ai->nr_groups = nr_groups; | ||
| 1288 | ai->__ai_size = PFN_ALIGN(ai_size); | ||
| 1289 | |||
| 1290 | return ai; | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | /** | ||
| 1294 | * pcpu_free_alloc_info - free percpu allocation info | ||
| 1295 | * @ai: pcpu_alloc_info to free | ||
| 1296 | * | ||
| 1297 | * Free @ai which was allocated by pcpu_alloc_alloc_info(). | ||
| 1298 | */ | ||
| 1299 | void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | ||
| 1300 | { | ||
| 1301 | free_bootmem(__pa(ai), ai->__ai_size); | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | /** | ||
| 1305 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | ||
| 1251 | * @reserved_size: the size of reserved percpu area in bytes | 1306 | * @reserved_size: the size of reserved percpu area in bytes |
| 1252 | * @dyn_sizep: in/out parameter for dynamic size, -1 for auto | 1307 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto |
| 1253 | * @unit_sizep: out parameter for unit size | 1308 | * @atom_size: allocation atom size |
| 1254 | * @unit_map: unit_map to be filled | 1309 | * @cpu_distance_fn: callback to determine distance between cpus, optional |
| 1255 | * @cpu_distance_fn: callback to determine distance between cpus | ||
| 1256 | * | 1310 | * |
| 1257 | * This function builds cpu -> unit map and determine other parameters | 1311 | * This function determines grouping of units, their mappings to cpus |
| 1258 | * considering needed percpu size, large page size and distances | 1312 | * and other parameters considering needed percpu size, allocation |
| 1259 | * between CPUs in NUMA. | 1313 | * atom size and distances between CPUs. |
| 1260 | * | 1314 | * |
| 1261 | * CPUs which are of LOCAL_DISTANCE both ways are grouped together and | 1315 | * Groups are always mutliples of atom size and CPUs which are of |
| 1262 | * may share units in the same large page. The returned configuration | 1316 | * LOCAL_DISTANCE both ways are grouped together and share space for |
| 1263 | * is guaranteed to have CPUs on different nodes on different large | 1317 | * units in the same group. The returned configuration is guaranteed |
| 1264 | * pages and >=75% usage of allocated virtual address space. | 1318 | * to have CPUs on different nodes on different groups and >=75% usage |
| 1319 | * of allocated virtual address space. | ||
| 1265 | * | 1320 | * |
| 1266 | * RETURNS: | 1321 | * RETURNS: |
| 1267 | * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and | 1322 | * On success, pointer to the new allocation_info is returned. On |
| 1268 | * returns the number of units to be allocated. -errno on failure. | 1323 | * failure, ERR_PTR value is returned. |
| 1269 | */ | 1324 | */ |
| 1270 | int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | 1325 | struct pcpu_alloc_info * __init pcpu_build_alloc_info( |
| 1271 | size_t *unit_sizep, size_t lpage_size, | 1326 | size_t reserved_size, ssize_t dyn_size, |
| 1272 | int *unit_map, | 1327 | size_t atom_size, |
| 1273 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | 1328 | pcpu_fc_cpu_distance_fn_t cpu_distance_fn) |
| 1274 | { | 1329 | { |
| 1275 | static int group_map[NR_CPUS] __initdata; | 1330 | static int group_map[NR_CPUS] __initdata; |
| 1276 | static int group_cnt[NR_CPUS] __initdata; | 1331 | static int group_cnt[NR_CPUS] __initdata; |
| 1277 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1332 | const size_t static_size = __per_cpu_end - __per_cpu_start; |
| 1278 | int group_cnt_max = 0; | 1333 | int group_cnt_max = 0, nr_groups = 1, nr_units = 0; |
| 1279 | size_t size_sum, min_unit_size, alloc_size; | 1334 | size_t size_sum, min_unit_size, alloc_size; |
| 1280 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | 1335 | int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ |
| 1281 | int last_allocs; | 1336 | int last_allocs, group, unit; |
| 1282 | unsigned int cpu, tcpu; | 1337 | unsigned int cpu, tcpu; |
| 1283 | int group, unit; | 1338 | struct pcpu_alloc_info *ai; |
| 1339 | unsigned int *cpu_map; | ||
| 1284 | 1340 | ||
| 1285 | /* | 1341 | /* |
| 1286 | * Determine min_unit_size, alloc_size and max_upa such that | 1342 | * Determine min_unit_size, alloc_size and max_upa such that |
| 1287 | * alloc_size is multiple of lpage_size and is the smallest | 1343 | * alloc_size is multiple of atom_size and is the smallest |
| 1288 | * which can accomodate 4k aligned segments which are equal to | 1344 | * which can accomodate 4k aligned segments which are equal to |
| 1289 | * or larger than min_unit_size. | 1345 | * or larger than min_unit_size. |
| 1290 | */ | 1346 | */ |
| 1291 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); | 1347 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); |
| 1292 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1348 | min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); |
| 1293 | 1349 | ||
| 1294 | alloc_size = roundup(min_unit_size, lpage_size); | 1350 | alloc_size = roundup(min_unit_size, atom_size); |
| 1295 | upa = alloc_size / min_unit_size; | 1351 | upa = alloc_size / min_unit_size; |
| 1296 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | 1352 | while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) |
| 1297 | upa--; | 1353 | upa--; |
| @@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
| 1304 | for_each_possible_cpu(tcpu) { | 1360 | for_each_possible_cpu(tcpu) { |
| 1305 | if (cpu == tcpu) | 1361 | if (cpu == tcpu) |
| 1306 | break; | 1362 | break; |
| 1307 | if (group_map[tcpu] == group && | 1363 | if (group_map[tcpu] == group && cpu_distance_fn && |
| 1308 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | 1364 | (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || |
| 1309 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | 1365 | cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { |
| 1310 | group++; | 1366 | group++; |
| 1367 | nr_groups = max(nr_groups, group + 1); | ||
| 1311 | goto next_group; | 1368 | goto next_group; |
| 1312 | } | 1369 | } |
| 1313 | } | 1370 | } |
| @@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
| 1328 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | 1385 | if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) |
| 1329 | continue; | 1386 | continue; |
| 1330 | 1387 | ||
| 1331 | for (group = 0; group_cnt[group]; group++) { | 1388 | for (group = 0; group < nr_groups; group++) { |
| 1332 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | 1389 | int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); |
| 1333 | allocs += this_allocs; | 1390 | allocs += this_allocs; |
| 1334 | wasted += this_allocs * upa - group_cnt[group]; | 1391 | wasted += this_allocs * upa - group_cnt[group]; |
| @@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, | |||
| 1348 | last_allocs = allocs; | 1405 | last_allocs = allocs; |
| 1349 | best_upa = upa; | 1406 | best_upa = upa; |
| 1350 | } | 1407 | } |
| 1351 | *unit_sizep = alloc_size / best_upa; | 1408 | upa = best_upa; |
| 1409 | |||
| 1410 | /* allocate and fill alloc_info */ | ||
| 1411 | for (group = 0; group < nr_groups; group++) | ||
| 1412 | nr_units += roundup(group_cnt[group], upa); | ||
| 1413 | |||
| 1414 | ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | ||
| 1415 | if (!ai) | ||
| 1416 | return ERR_PTR(-ENOMEM); | ||
| 1417 | cpu_map = ai->groups[0].cpu_map; | ||
| 1418 | |||
| 1419 | for (group = 0; group < nr_groups; group++) { | ||
| 1420 | ai->groups[group].cpu_map = cpu_map; | ||
| 1421 | cpu_map += roundup(group_cnt[group], upa); | ||
| 1422 | } | ||
| 1423 | |||
| 1424 | ai->static_size = static_size; | ||
| 1425 | ai->reserved_size = reserved_size; | ||
| 1426 | ai->dyn_size = dyn_size; | ||
| 1427 | ai->unit_size = alloc_size / upa; | ||
| 1428 | ai->atom_size = atom_size; | ||
| 1429 | ai->alloc_size = alloc_size; | ||
| 1430 | |||
| 1431 | for (group = 0, unit = 0; group_cnt[group]; group++) { | ||
| 1432 | struct pcpu_group_info *gi = &ai->groups[group]; | ||
| 1433 | |||
| 1434 | /* | ||
| 1435 | * Initialize base_offset as if all groups are located | ||
| 1436 | * back-to-back. The caller should update this to | ||
| 1437 | * reflect actual allocation. | ||
| 1438 | */ | ||
| 1439 | gi->base_offset = unit * ai->unit_size; | ||
| 1352 | 1440 | ||
| 1353 | /* assign units to cpus accordingly */ | ||
| 1354 | unit = 0; | ||
| 1355 | for (group = 0; group_cnt[group]; group++) { | ||
| 1356 | for_each_possible_cpu(cpu) | 1441 | for_each_possible_cpu(cpu) |
| 1357 | if (group_map[cpu] == group) | 1442 | if (group_map[cpu] == group) |
| 1358 | unit_map[cpu] = unit++; | 1443 | gi->cpu_map[gi->nr_units++] = cpu; |
| 1359 | unit = roundup(unit, best_upa); | 1444 | gi->nr_units = roundup(gi->nr_units, upa); |
| 1445 | unit += gi->nr_units; | ||
| 1360 | } | 1446 | } |
| 1447 | BUG_ON(unit != nr_units); | ||
| 1361 | 1448 | ||
| 1362 | return unit; /* unit contains aligned number of units */ | 1449 | return ai; |
| 1363 | } | 1450 | } |
| 1364 | 1451 | ||
| 1365 | static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | 1452 | /** |
| 1366 | unsigned int *cpup); | 1453 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info |
| 1367 | 1454 | * @lvl: loglevel | |
| 1368 | static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | 1455 | * @ai: allocation info to dump |
| 1369 | size_t reserved_size, size_t dyn_size, | 1456 | * |
| 1370 | size_t unit_size, size_t lpage_size, | 1457 | * Print out information about @ai using loglevel @lvl. |
| 1371 | const int *unit_map, int nr_units) | 1458 | */ |
| 1459 | static void pcpu_dump_alloc_info(const char *lvl, | ||
| 1460 | const struct pcpu_alloc_info *ai) | ||
| 1372 | { | 1461 | { |
| 1373 | int width = 1, v = nr_units; | 1462 | int group_width = 1, cpu_width = 1, width; |
| 1374 | char empty_str[] = "--------"; | 1463 | char empty_str[] = "--------"; |
| 1375 | int upl, lpl; /* units per lpage, lpage per line */ | 1464 | int alloc = 0, alloc_end = 0; |
| 1376 | unsigned int cpu; | 1465 | int group, v; |
| 1377 | int lpage, unit; | 1466 | int upa, apl; /* units per alloc, allocs per line */ |
| 1467 | |||
| 1468 | v = ai->nr_groups; | ||
| 1469 | while (v /= 10) | ||
| 1470 | group_width++; | ||
| 1378 | 1471 | ||
| 1472 | v = num_possible_cpus(); | ||
| 1379 | while (v /= 10) | 1473 | while (v /= 10) |
| 1380 | width++; | 1474 | cpu_width++; |
| 1381 | empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; | 1475 | empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; |
| 1382 | 1476 | ||
| 1383 | upl = max_t(int, lpage_size / unit_size, 1); | 1477 | upa = ai->alloc_size / ai->unit_size; |
| 1384 | lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); | 1478 | width = upa * (cpu_width + 1) + group_width + 3; |
| 1479 | apl = rounddown_pow_of_two(max(60 / width, 1)); | ||
| 1385 | 1480 | ||
| 1386 | printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, | 1481 | printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", |
| 1387 | static_size, reserved_size, dyn_size, unit_size, lpage_size); | 1482 | lvl, ai->static_size, ai->reserved_size, ai->dyn_size, |
| 1483 | ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); | ||
| 1388 | 1484 | ||
| 1389 | for (lpage = 0, unit = 0; unit < nr_units; unit++) { | 1485 | for (group = 0; group < ai->nr_groups; group++) { |
| 1390 | if (!(unit % upl)) { | 1486 | const struct pcpu_group_info *gi = &ai->groups[group]; |
| 1391 | if (!(lpage++ % lpl)) { | 1487 | int unit = 0, unit_end = 0; |
| 1488 | |||
| 1489 | BUG_ON(gi->nr_units % upa); | ||
| 1490 | for (alloc_end += gi->nr_units / upa; | ||
| 1491 | alloc < alloc_end; alloc++) { | ||
| 1492 | if (!(alloc % apl)) { | ||
| 1392 | printk("\n"); | 1493 | printk("\n"); |
| 1393 | printk("%spcpu-lpage: ", lvl); | 1494 | printk("%spcpu-alloc: ", lvl); |
| 1394 | } else | 1495 | } |
| 1395 | printk("| "); | 1496 | printk("[%0*d] ", group_width, group); |
| 1497 | |||
| 1498 | for (unit_end += upa; unit < unit_end; unit++) | ||
| 1499 | if (gi->cpu_map[unit] != NR_CPUS) | ||
| 1500 | printk("%0*d ", cpu_width, | ||
| 1501 | gi->cpu_map[unit]); | ||
| 1502 | else | ||
| 1503 | printk("%s ", empty_str); | ||
| 1396 | } | 1504 | } |
| 1397 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | ||
| 1398 | printk("%0*d ", width, cpu); | ||
| 1399 | else | ||
| 1400 | printk("%s ", empty_str); | ||
| 1401 | } | 1505 | } |
| 1402 | printk("\n"); | 1506 | printk("\n"); |
| 1403 | } | 1507 | } |
| 1404 | #endif | ||
| 1405 | 1508 | ||
| 1406 | /** | 1509 | /** |
| 1407 | * pcpu_setup_first_chunk - initialize the first percpu chunk | 1510 | * pcpu_setup_first_chunk - initialize the first percpu chunk |
| 1408 | * @static_size: the size of static percpu area in bytes | 1511 | * @ai: pcpu_alloc_info describing how to percpu area is shaped |
| 1409 | * @reserved_size: the size of reserved percpu area in bytes, 0 for none | ||
| 1410 | * @dyn_size: free size for dynamic allocation in bytes | ||
| 1411 | * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE | ||
| 1412 | * @base_addr: mapped address | 1512 | * @base_addr: mapped address |
| 1413 | * @unit_map: cpu -> unit map, NULL for sequential mapping | ||
| 1414 | * | 1513 | * |
| 1415 | * Initialize the first percpu chunk which contains the kernel static | 1514 | * Initialize the first percpu chunk which contains the kernel static |
| 1416 | * perpcu area. This function is to be called from arch percpu area | 1515 | * perpcu area. This function is to be called from arch percpu area |
| 1417 | * setup path. | 1516 | * setup path. |
| 1418 | * | 1517 | * |
| 1419 | * @reserved_size, if non-zero, specifies the amount of bytes to | 1518 | * @ai contains all information necessary to initialize the first |
| 1519 | * chunk and prime the dynamic percpu allocator. | ||
| 1520 | * | ||
| 1521 | * @ai->static_size is the size of static percpu area. | ||
| 1522 | * | ||
| 1523 | * @ai->reserved_size, if non-zero, specifies the amount of bytes to | ||
| 1420 | * reserve after the static area in the first chunk. This reserves | 1524 | * reserve after the static area in the first chunk. This reserves |
| 1421 | * the first chunk such that it's available only through reserved | 1525 | * the first chunk such that it's available only through reserved |
| 1422 | * percpu allocation. This is primarily used to serve module percpu | 1526 | * percpu allocation. This is primarily used to serve module percpu |
| @@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | |||
| 1424 | * limited offset range for symbol relocations to guarantee module | 1528 | * limited offset range for symbol relocations to guarantee module |
| 1425 | * percpu symbols fall inside the relocatable range. | 1529 | * percpu symbols fall inside the relocatable range. |
| 1426 | * | 1530 | * |
| 1427 | * @dyn_size determines the number of bytes available for dynamic | 1531 | * @ai->dyn_size determines the number of bytes available for dynamic |
| 1428 | * allocation in the first chunk. The area between @static_size + | 1532 | * allocation in the first chunk. The area between @ai->static_size + |
| 1429 | * @reserved_size + @dyn_size and @unit_size is unused. | 1533 | * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused. |
| 1430 | * | 1534 | * |
| 1431 | * @unit_size specifies unit size and must be aligned to PAGE_SIZE and | 1535 | * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE |
| 1432 | * equal to or larger than @static_size + @reserved_size + if | 1536 | * and equal to or larger than @ai->static_size + @ai->reserved_size + |
| 1433 | * non-negative, @dyn_size. | 1537 | * @ai->dyn_size. |
| 1538 | * | ||
| 1539 | * @ai->atom_size is the allocation atom size and used as alignment | ||
| 1540 | * for vm areas. | ||
| 1541 | * | ||
| 1542 | * @ai->alloc_size is the allocation size and always multiple of | ||
| 1543 | * @ai->atom_size. This is larger than @ai->atom_size if | ||
| 1544 | * @ai->unit_size is larger than @ai->atom_size. | ||
| 1545 | * | ||
| 1546 | * @ai->nr_groups and @ai->groups describe virtual memory layout of | ||
| 1547 | * percpu areas. Units which should be colocated are put into the | ||
| 1548 | * same group. Dynamic VM areas will be allocated according to these | ||
| 1549 | * groupings. If @ai->nr_groups is zero, a single group containing | ||
| 1550 | * all units is assumed. | ||
| 1434 | * | 1551 | * |
| 1435 | * The caller should have mapped the first chunk at @base_addr and | 1552 | * The caller should have mapped the first chunk at @base_addr and |
| 1436 | * copied static data to each unit. | 1553 | * copied static data to each unit. |
| @@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, | |||
| 1446 | * The determined pcpu_unit_size which can be used to initialize | 1563 | * The determined pcpu_unit_size which can be used to initialize |
| 1447 | * percpu access. | 1564 | * percpu access. |
| 1448 | */ | 1565 | */ |
| 1449 | size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | 1566 | size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
| 1450 | size_t dyn_size, size_t unit_size, | 1567 | void *base_addr) |
| 1451 | void *base_addr, const int *unit_map) | ||
| 1452 | { | 1568 | { |
| 1453 | static struct vm_struct first_vm; | 1569 | static struct vm_struct first_vm; |
| 1454 | static int smap[2], dmap[2]; | 1570 | static int smap[2], dmap[2]; |
| 1455 | size_t size_sum = static_size + reserved_size + dyn_size; | 1571 | size_t dyn_size = ai->dyn_size; |
| 1572 | size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; | ||
| 1456 | struct pcpu_chunk *schunk, *dchunk = NULL; | 1573 | struct pcpu_chunk *schunk, *dchunk = NULL; |
| 1457 | unsigned int cpu, tcpu; | 1574 | unsigned int cpu; |
| 1458 | int i; | 1575 | int *unit_map; |
| 1576 | int group, unit, i; | ||
| 1459 | 1577 | ||
| 1460 | /* sanity checks */ | 1578 | /* sanity checks */ |
| 1461 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || | 1579 | BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || |
| 1462 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); | 1580 | ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); |
| 1463 | BUG_ON(!static_size); | 1581 | BUG_ON(ai->nr_groups <= 0); |
| 1582 | BUG_ON(!ai->static_size); | ||
| 1464 | BUG_ON(!base_addr); | 1583 | BUG_ON(!base_addr); |
| 1465 | BUG_ON(unit_size < size_sum); | 1584 | BUG_ON(ai->unit_size < size_sum); |
| 1466 | BUG_ON(unit_size & ~PAGE_MASK); | 1585 | BUG_ON(ai->unit_size & ~PAGE_MASK); |
| 1467 | BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); | 1586 | BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
| 1587 | |||
| 1588 | pcpu_dump_alloc_info(KERN_DEBUG, ai); | ||
| 1468 | 1589 | ||
| 1469 | /* determine number of units and verify and initialize pcpu_unit_map */ | 1590 | /* determine number of units and verify and initialize pcpu_unit_map */ |
| 1470 | if (unit_map) { | 1591 | unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); |
| 1471 | int first_unit = INT_MAX, last_unit = INT_MIN; | ||
| 1472 | |||
| 1473 | for_each_possible_cpu(cpu) { | ||
| 1474 | int unit = unit_map[cpu]; | ||
| 1475 | |||
| 1476 | BUG_ON(unit < 0); | ||
| 1477 | for_each_possible_cpu(tcpu) { | ||
| 1478 | if (tcpu == cpu) | ||
| 1479 | break; | ||
| 1480 | /* the mapping should be one-to-one */ | ||
| 1481 | BUG_ON(unit_map[tcpu] == unit); | ||
| 1482 | } | ||
| 1483 | 1592 | ||
| 1484 | if (unit < first_unit) { | 1593 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
| 1485 | pcpu_first_unit_cpu = cpu; | 1594 | unit_map[cpu] = NR_CPUS; |
| 1486 | first_unit = unit; | 1595 | pcpu_first_unit_cpu = NR_CPUS; |
| 1487 | } | ||
| 1488 | if (unit > last_unit) { | ||
| 1489 | pcpu_last_unit_cpu = cpu; | ||
| 1490 | last_unit = unit; | ||
| 1491 | } | ||
| 1492 | } | ||
| 1493 | pcpu_nr_units = last_unit + 1; | ||
| 1494 | pcpu_unit_map = unit_map; | ||
| 1495 | } else { | ||
| 1496 | int *identity_map; | ||
| 1497 | 1596 | ||
| 1498 | /* #units == #cpus, identity mapped */ | 1597 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { |
| 1499 | identity_map = alloc_bootmem(nr_cpu_ids * | 1598 | const struct pcpu_group_info *gi = &ai->groups[group]; |
| 1500 | sizeof(identity_map[0])); | ||
| 1501 | 1599 | ||
| 1502 | for_each_possible_cpu(cpu) | 1600 | for (i = 0; i < gi->nr_units; i++) { |
| 1503 | identity_map[cpu] = cpu; | 1601 | cpu = gi->cpu_map[i]; |
| 1602 | if (cpu == NR_CPUS) | ||
| 1603 | continue; | ||
| 1504 | 1604 | ||
| 1505 | pcpu_first_unit_cpu = 0; | 1605 | BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); |
| 1506 | pcpu_last_unit_cpu = pcpu_nr_units - 1; | 1606 | BUG_ON(unit_map[cpu] != NR_CPUS); |
| 1507 | pcpu_nr_units = nr_cpu_ids; | 1607 | |
| 1508 | pcpu_unit_map = identity_map; | 1608 | unit_map[cpu] = unit + i; |
| 1609 | if (pcpu_first_unit_cpu == NR_CPUS) | ||
| 1610 | pcpu_first_unit_cpu = cpu; | ||
| 1611 | } | ||
| 1509 | } | 1612 | } |
| 1613 | pcpu_last_unit_cpu = cpu; | ||
| 1614 | pcpu_nr_units = unit; | ||
| 1615 | |||
| 1616 | for_each_possible_cpu(cpu) | ||
| 1617 | BUG_ON(unit_map[cpu] == NR_CPUS); | ||
| 1618 | |||
| 1619 | pcpu_unit_map = unit_map; | ||
| 1510 | 1620 | ||
| 1511 | /* determine basic parameters */ | 1621 | /* determine basic parameters */ |
| 1512 | pcpu_unit_pages = unit_size >> PAGE_SHIFT; | 1622 | pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; |
| 1513 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 1623 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
| 1514 | pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; | 1624 | pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; |
| 1515 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + | 1625 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + |
| @@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, | |||
| 1543 | schunk->immutable = true; | 1653 | schunk->immutable = true; |
| 1544 | bitmap_fill(schunk->populated, pcpu_unit_pages); | 1654 | bitmap_fill(schunk->populated, pcpu_unit_pages); |
| 1545 | 1655 | ||
| 1546 | if (reserved_size) { | 1656 | if (ai->reserved_size) { |
| 1547 | schunk->free_size = reserved_size; | 1657 | schunk->free_size = ai->reserved_size; |
| 1548 | pcpu_reserved_chunk = schunk; | 1658 | pcpu_reserved_chunk = schunk; |
| 1549 | pcpu_reserved_chunk_limit = static_size + reserved_size; | 1659 | pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size; |
| 1550 | } else { | 1660 | } else { |
| 1551 | schunk->free_size = dyn_size; | 1661 | schunk->free_size = dyn_size; |
| 1552 | dyn_size = 0; /* dynamic area covered */ | 1662 | dyn_size = 0; /* dynamic area covered */ |
| 1553 | } | 1663 | } |
| 1554 | schunk->contig_hint = schunk->free_size; | 1664 | schunk->contig_hint = schunk->free_size; |
| 1555 | 1665 | ||
| 1556 | schunk->map[schunk->map_used++] = -static_size; | 1666 | schunk->map[schunk->map_used++] = -ai->static_size; |
| 1557 | if (schunk->free_size) | 1667 | if (schunk->free_size) |
| 1558 | schunk->map[schunk->map_used++] = schunk->free_size; | 1668 | schunk->map[schunk->map_used++] = schunk->free_size; |
| 1559 | 1669 | ||
| @@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup); | |||
| 1643 | */ | 1753 | */ |
| 1644 | ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) | 1754 | ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) |
| 1645 | { | 1755 | { |
| 1646 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1756 | struct pcpu_alloc_info *ai; |
| 1647 | size_t size_sum, unit_size, chunk_size; | 1757 | size_t size_sum, chunk_size; |
| 1648 | void *base; | 1758 | void *base; |
| 1649 | unsigned int cpu; | 1759 | int unit; |
| 1760 | ssize_t ret; | ||
| 1650 | 1761 | ||
| 1651 | /* determine parameters and allocate */ | 1762 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); |
| 1652 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1763 | if (IS_ERR(ai)) |
| 1764 | return PTR_ERR(ai); | ||
| 1765 | BUG_ON(ai->nr_groups != 1); | ||
| 1766 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | ||
| 1653 | 1767 | ||
| 1654 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1768 | size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; |
| 1655 | chunk_size = unit_size * nr_cpu_ids; | 1769 | chunk_size = ai->unit_size * num_possible_cpus(); |
| 1656 | 1770 | ||
| 1657 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1771 | base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
| 1658 | __pa(MAX_DMA_ADDRESS)); | 1772 | __pa(MAX_DMA_ADDRESS)); |
| 1659 | if (!base) { | 1773 | if (!base) { |
| 1660 | pr_warning("PERCPU: failed to allocate %zu bytes for " | 1774 | pr_warning("PERCPU: failed to allocate %zu bytes for " |
| 1661 | "embedding\n", chunk_size); | 1775 | "embedding\n", chunk_size); |
| 1662 | return -ENOMEM; | 1776 | ret = -ENOMEM; |
| 1777 | goto out_free_ai; | ||
| 1663 | } | 1778 | } |
| 1664 | 1779 | ||
| 1665 | /* return the leftover and copy */ | 1780 | /* return the leftover and copy */ |
| 1666 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { | 1781 | for (unit = 0; unit < num_possible_cpus(); unit++) { |
| 1667 | void *ptr = base + cpu * unit_size; | 1782 | void *ptr = base + unit * ai->unit_size; |
| 1668 | 1783 | ||
| 1669 | if (cpu_possible(cpu)) { | 1784 | free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); |
| 1670 | free_bootmem(__pa(ptr + size_sum), | 1785 | memcpy(ptr, __per_cpu_load, ai->static_size); |
| 1671 | unit_size - size_sum); | ||
| 1672 | memcpy(ptr, __per_cpu_load, static_size); | ||
| 1673 | } else | ||
| 1674 | free_bootmem(__pa(ptr), unit_size); | ||
| 1675 | } | 1786 | } |
| 1676 | 1787 | ||
| 1677 | /* we're ready, commit */ | 1788 | /* we're ready, commit */ |
| 1678 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 1789 | pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |
| 1679 | PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size, | 1790 | PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, |
| 1680 | unit_size); | 1791 | ai->dyn_size, ai->unit_size); |
| 1681 | 1792 | ||
| 1682 | return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 1793 | ret = pcpu_setup_first_chunk(ai, base); |
| 1683 | unit_size, base, NULL); | 1794 | out_free_ai: |
| 1795 | pcpu_free_alloc_info(ai); | ||
| 1796 | return ret; | ||
| 1684 | } | 1797 | } |
| 1685 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | 1798 | #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || |
| 1686 | !CONFIG_HAVE_SETUP_PER_CPU_AREA */ | 1799 | !CONFIG_HAVE_SETUP_PER_CPU_AREA */ |
| @@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
| 1709 | pcpu_fc_populate_pte_fn_t populate_pte_fn) | 1822 | pcpu_fc_populate_pte_fn_t populate_pte_fn) |
| 1710 | { | 1823 | { |
| 1711 | static struct vm_struct vm; | 1824 | static struct vm_struct vm; |
| 1712 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1825 | struct pcpu_alloc_info *ai; |
| 1713 | ssize_t dyn_size = -1; | ||
| 1714 | size_t size_sum, unit_size; | ||
| 1715 | char psize_str[16]; | 1826 | char psize_str[16]; |
| 1716 | int unit_pages; | 1827 | int unit_pages; |
| 1717 | size_t pages_size; | 1828 | size_t pages_size; |
| 1718 | struct page **pages; | 1829 | struct page **pages; |
| 1719 | unsigned int cpu; | 1830 | int unit, i, j; |
| 1720 | int i, j; | ||
| 1721 | ssize_t ret; | 1831 | ssize_t ret; |
| 1722 | 1832 | ||
| 1723 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); | 1833 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); |
| 1724 | 1834 | ||
| 1725 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | 1835 | ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); |
| 1726 | unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | 1836 | if (IS_ERR(ai)) |
| 1727 | unit_pages = unit_size >> PAGE_SHIFT; | 1837 | return PTR_ERR(ai); |
| 1838 | BUG_ON(ai->nr_groups != 1); | ||
| 1839 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | ||
| 1840 | |||
| 1841 | unit_pages = ai->unit_size >> PAGE_SHIFT; | ||
| 1728 | 1842 | ||
| 1729 | /* unaligned allocations can't be freed, round up to page size */ | 1843 | /* unaligned allocations can't be freed, round up to page size */ |
| 1730 | pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); | 1844 | pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * |
| 1845 | sizeof(pages[0])); | ||
| 1731 | pages = alloc_bootmem(pages_size); | 1846 | pages = alloc_bootmem(pages_size); |
| 1732 | 1847 | ||
| 1733 | /* allocate pages */ | 1848 | /* allocate pages */ |
| 1734 | j = 0; | 1849 | j = 0; |
| 1735 | for_each_possible_cpu(cpu) | 1850 | for (unit = 0; unit < num_possible_cpus(); unit++) |
| 1736 | for (i = 0; i < unit_pages; i++) { | 1851 | for (i = 0; i < unit_pages; i++) { |
| 1852 | unsigned int cpu = ai->groups[0].cpu_map[unit]; | ||
| 1737 | void *ptr; | 1853 | void *ptr; |
| 1738 | 1854 | ||
| 1739 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); | 1855 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); |
| @@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
| 1747 | 1863 | ||
| 1748 | /* allocate vm area, map the pages and copy static data */ | 1864 | /* allocate vm area, map the pages and copy static data */ |
| 1749 | vm.flags = VM_ALLOC; | 1865 | vm.flags = VM_ALLOC; |
| 1750 | vm.size = nr_cpu_ids * unit_size; | 1866 | vm.size = num_possible_cpus() * ai->unit_size; |
| 1751 | vm_area_register_early(&vm, PAGE_SIZE); | 1867 | vm_area_register_early(&vm, PAGE_SIZE); |
| 1752 | 1868 | ||
| 1753 | for_each_possible_cpu(cpu) { | 1869 | for (unit = 0; unit < num_possible_cpus(); unit++) { |
| 1754 | unsigned long unit_addr = | 1870 | unsigned long unit_addr = |
| 1755 | (unsigned long)vm.addr + cpu * unit_size; | 1871 | (unsigned long)vm.addr + unit * ai->unit_size; |
| 1756 | 1872 | ||
| 1757 | for (i = 0; i < unit_pages; i++) | 1873 | for (i = 0; i < unit_pages; i++) |
| 1758 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); | 1874 | populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); |
| 1759 | 1875 | ||
| 1760 | /* pte already populated, the following shouldn't fail */ | 1876 | /* pte already populated, the following shouldn't fail */ |
| 1761 | ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], | 1877 | ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], |
| 1762 | unit_pages); | 1878 | unit_pages); |
| 1763 | if (ret < 0) | 1879 | if (ret < 0) |
| 1764 | panic("failed to map percpu area, err=%zd\n", ret); | 1880 | panic("failed to map percpu area, err=%zd\n", ret); |
| @@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, | |||
| 1772 | */ | 1888 | */ |
| 1773 | 1889 | ||
| 1774 | /* copy static data */ | 1890 | /* copy static data */ |
| 1775 | memcpy((void *)unit_addr, __per_cpu_load, static_size); | 1891 | memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); |
| 1776 | } | 1892 | } |
| 1777 | 1893 | ||
| 1778 | /* we're ready, commit */ | 1894 | /* we're ready, commit */ |
| 1779 | pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", | 1895 | pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", |
| 1780 | unit_pages, psize_str, vm.addr, static_size, reserved_size, | 1896 | unit_pages, psize_str, vm.addr, ai->static_size, |
| 1781 | dyn_size); | 1897 | ai->reserved_size, ai->dyn_size); |
| 1782 | 1898 | ||
| 1783 | ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 1899 | ret = pcpu_setup_first_chunk(ai, vm.addr); |
| 1784 | unit_size, vm.addr, NULL); | ||
| 1785 | goto out_free_ar; | 1900 | goto out_free_ar; |
| 1786 | 1901 | ||
| 1787 | enomem: | 1902 | enomem: |
| @@ -1790,6 +1905,7 @@ enomem: | |||
| 1790 | ret = -ENOMEM; | 1905 | ret = -ENOMEM; |
| 1791 | out_free_ar: | 1906 | out_free_ar: |
| 1792 | free_bootmem(__pa(pages), pages_size); | 1907 | free_bootmem(__pa(pages), pages_size); |
| 1908 | pcpu_free_alloc_info(ai); | ||
| 1793 | return ret; | 1909 | return ret; |
| 1794 | } | 1910 | } |
| 1795 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1911 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
| @@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size; | |||
| 1805 | static int pcpul_nr_lpages; | 1921 | static int pcpul_nr_lpages; |
| 1806 | static struct pcpul_ent *pcpul_map; | 1922 | static struct pcpul_ent *pcpul_map; |
| 1807 | 1923 | ||
| 1808 | static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | 1924 | static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, |
| 1809 | unsigned int *cpup) | 1925 | unsigned int *cpup) |
| 1810 | { | 1926 | { |
| 1811 | unsigned int cpu; | 1927 | int group, cunit; |
| 1812 | 1928 | ||
| 1813 | for_each_possible_cpu(cpu) | 1929 | for (group = 0, cunit = 0; group < ai->nr_groups; group++) { |
| 1814 | if (unit_map[cpu] == unit) { | 1930 | const struct pcpu_group_info *gi = &ai->groups[group]; |
| 1931 | |||
| 1932 | if (unit < cunit + gi->nr_units) { | ||
| 1815 | if (cpup) | 1933 | if (cpup) |
| 1816 | *cpup = cpu; | 1934 | *cpup = gi->cpu_map[unit - cunit]; |
| 1817 | return true; | 1935 | return true; |
| 1818 | } | 1936 | } |
| 1937 | cunit += gi->nr_units; | ||
| 1938 | } | ||
| 1819 | 1939 | ||
| 1820 | return false; | 1940 | return false; |
| 1821 | } | 1941 | } |
| 1822 | 1942 | ||
| 1943 | static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) | ||
| 1944 | { | ||
| 1945 | int group, unit, i; | ||
| 1946 | |||
| 1947 | for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { | ||
| 1948 | const struct pcpu_group_info *gi = &ai->groups[group]; | ||
| 1949 | |||
| 1950 | for (i = 0; i < gi->nr_units; i++) | ||
| 1951 | if (gi->cpu_map[i] == cpu) | ||
| 1952 | return unit + i; | ||
| 1953 | } | ||
| 1954 | BUG(); | ||
| 1955 | } | ||
| 1956 | |||
| 1823 | /** | 1957 | /** |
| 1824 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | 1958 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page |
| 1825 | * @reserved_size: the size of reserved percpu area in bytes | 1959 | * @ai: pcpu_alloc_info |
| 1826 | * @dyn_size: free size for dynamic allocation in bytes | ||
| 1827 | * @unit_size: unit size in bytes | ||
| 1828 | * @lpage_size: the size of a large page | ||
| 1829 | * @unit_map: cpu -> unit mapping | ||
| 1830 | * @nr_units: the number of units | ||
| 1831 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size | 1960 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size |
| 1832 | * @free_fn: function to free percpu memory, @size <= lpage_size | 1961 | * @free_fn: function to free percpu memory, @size <= lpage_size |
| 1833 | * @map_fn: function to map percpu lpage, always called with lpage_size | 1962 | * @map_fn: function to map percpu lpage, always called with lpage_size |
| 1834 | * | 1963 | * |
| 1835 | * This allocator uses large page to build and map the first chunk. | 1964 | * This allocator uses large page to build and map the first chunk. |
| 1836 | * Unlike other helpers, the caller should always specify @dyn_size | 1965 | * Unlike other helpers, the caller should provide fully initialized |
| 1837 | * and @unit_size. These parameters along with @unit_map and | 1966 | * @ai. This can be done using pcpu_build_alloc_info(). This two |
| 1838 | * @nr_units can be determined using pcpu_lpage_build_unit_map(). | 1967 | * stage initialization is to allow arch code to evaluate the |
| 1839 | * This two stage initialization is to allow arch code to evaluate the | ||
| 1840 | * parameters before committing to it. | 1968 | * parameters before committing to it. |
| 1841 | * | 1969 | * |
| 1842 | * Large pages are allocated as directed by @unit_map and other | 1970 | * Large pages are allocated as directed by @unit_map and other |
| @@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, | |||
| 1852 | * The determined pcpu_unit_size which can be used to initialize | 1980 | * The determined pcpu_unit_size which can be used to initialize |
| 1853 | * percpu access on success, -errno on failure. | 1981 | * percpu access on success, -errno on failure. |
| 1854 | */ | 1982 | */ |
| 1855 | ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | 1983 | ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, |
| 1856 | size_t unit_size, size_t lpage_size, | ||
| 1857 | const int *unit_map, int nr_units, | ||
| 1858 | pcpu_fc_alloc_fn_t alloc_fn, | 1984 | pcpu_fc_alloc_fn_t alloc_fn, |
| 1859 | pcpu_fc_free_fn_t free_fn, | 1985 | pcpu_fc_free_fn_t free_fn, |
| 1860 | pcpu_fc_map_fn_t map_fn) | 1986 | pcpu_fc_map_fn_t map_fn) |
| 1861 | { | 1987 | { |
| 1862 | static struct vm_struct vm; | 1988 | static struct vm_struct vm; |
| 1863 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 1989 | const size_t lpage_size = ai->atom_size; |
| 1864 | size_t chunk_size = unit_size * nr_units; | 1990 | size_t chunk_size, map_size; |
| 1865 | size_t map_size; | ||
| 1866 | unsigned int cpu; | 1991 | unsigned int cpu; |
| 1867 | ssize_t ret; | 1992 | ssize_t ret; |
| 1868 | int i, j, unit; | 1993 | int i, j, unit, nr_units; |
| 1869 | 1994 | ||
| 1870 | pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, | 1995 | nr_units = 0; |
| 1871 | unit_size, lpage_size, unit_map, nr_units); | 1996 | for (i = 0; i < ai->nr_groups; i++) |
| 1997 | nr_units += ai->groups[i].nr_units; | ||
| 1872 | 1998 | ||
| 1999 | chunk_size = ai->unit_size * nr_units; | ||
| 1873 | BUG_ON(chunk_size % lpage_size); | 2000 | BUG_ON(chunk_size % lpage_size); |
| 1874 | 2001 | ||
| 1875 | pcpul_size = static_size + reserved_size + dyn_size; | 2002 | pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; |
| 1876 | pcpul_lpage_size = lpage_size; | 2003 | pcpul_lpage_size = lpage_size; |
| 1877 | pcpul_nr_lpages = chunk_size / lpage_size; | 2004 | pcpul_nr_lpages = chunk_size / lpage_size; |
| 1878 | 2005 | ||
| @@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
| 1883 | /* allocate all pages */ | 2010 | /* allocate all pages */ |
| 1884 | for (i = 0; i < pcpul_nr_lpages; i++) { | 2011 | for (i = 0; i < pcpul_nr_lpages; i++) { |
| 1885 | size_t offset = i * lpage_size; | 2012 | size_t offset = i * lpage_size; |
| 1886 | int first_unit = offset / unit_size; | 2013 | int first_unit = offset / ai->unit_size; |
| 1887 | int last_unit = (offset + lpage_size - 1) / unit_size; | 2014 | int last_unit = (offset + lpage_size - 1) / ai->unit_size; |
| 1888 | void *ptr; | 2015 | void *ptr; |
| 1889 | 2016 | ||
| 1890 | /* find out which cpu is mapped to this unit */ | 2017 | /* find out which cpu is mapped to this unit */ |
| 1891 | for (unit = first_unit; unit <= last_unit; unit++) | 2018 | for (unit = first_unit; unit <= last_unit; unit++) |
| 1892 | if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) | 2019 | if (pcpul_unit_to_cpu(unit, ai, &cpu)) |
| 1893 | goto found; | 2020 | goto found; |
| 1894 | continue; | 2021 | continue; |
| 1895 | found: | 2022 | found: |
| @@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
| 1905 | 2032 | ||
| 1906 | /* return unused holes */ | 2033 | /* return unused holes */ |
| 1907 | for (unit = 0; unit < nr_units; unit++) { | 2034 | for (unit = 0; unit < nr_units; unit++) { |
| 1908 | size_t start = unit * unit_size; | 2035 | size_t start = unit * ai->unit_size; |
| 1909 | size_t end = start + unit_size; | 2036 | size_t end = start + ai->unit_size; |
| 1910 | size_t off, next; | 2037 | size_t off, next; |
| 1911 | 2038 | ||
| 1912 | /* don't free used part of occupied unit */ | 2039 | /* don't free used part of occupied unit */ |
| 1913 | if (pcpul_unit_to_cpu(unit, unit_map, NULL)) | 2040 | if (pcpul_unit_to_cpu(unit, ai, NULL)) |
| 1914 | start += pcpul_size; | 2041 | start += pcpul_size; |
| 1915 | 2042 | ||
| 1916 | /* unit can span more than one page, punch the holes */ | 2043 | /* unit can span more than one page, punch the holes */ |
| @@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
| 1925 | /* allocate address, map and copy */ | 2052 | /* allocate address, map and copy */ |
| 1926 | vm.flags = VM_ALLOC; | 2053 | vm.flags = VM_ALLOC; |
| 1927 | vm.size = chunk_size; | 2054 | vm.size = chunk_size; |
| 1928 | vm_area_register_early(&vm, unit_size); | 2055 | vm_area_register_early(&vm, ai->unit_size); |
| 1929 | 2056 | ||
| 1930 | for (i = 0; i < pcpul_nr_lpages; i++) { | 2057 | for (i = 0; i < pcpul_nr_lpages; i++) { |
| 1931 | if (!pcpul_map[i].ptr) | 2058 | if (!pcpul_map[i].ptr) |
| @@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, | |||
| 1935 | } | 2062 | } |
| 1936 | 2063 | ||
| 1937 | for_each_possible_cpu(cpu) | 2064 | for_each_possible_cpu(cpu) |
| 1938 | memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, | 2065 | memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, |
| 1939 | static_size); | 2066 | __per_cpu_load, ai->static_size); |
| 1940 | 2067 | ||
| 1941 | /* we're ready, commit */ | 2068 | /* we're ready, commit */ |
| 1942 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", | 2069 | pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", |
| 1943 | vm.addr, static_size, reserved_size, dyn_size, unit_size); | 2070 | vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, |
| 2071 | ai->unit_size); | ||
| 1944 | 2072 | ||
| 1945 | ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, | 2073 | ret = pcpu_setup_first_chunk(ai, vm.addr); |
| 1946 | unit_size, vm.addr, unit_map); | ||
| 1947 | 2074 | ||
| 1948 | /* | 2075 | /* |
| 1949 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped | 2076 | * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped |
