diff options
author | Tejun Heo <tj@kernel.org> | 2009-07-03 19:10:59 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-07-03 19:10:59 -0400 |
commit | 8c4bfc6e8801616ab2e01c38140b2159b388d2ff (patch) | |
tree | e29e8bbfae362362554b870371a6187b41f92d82 | |
parent | 8f05a6a65d944f2fed4eb384fb58aa8c8e5a9bab (diff) |
x86,percpu: generalize lpage first chunk allocator
Generalize and move x86 setup_pcpu_lpage() into
pcpu_lpage_first_chunk(). setup_pcpu_lpage() now is a simple wrapper
around the generalized version. Other than taking size parameters and
using arch supplied callbacks to allocate/free/map memory,
pcpu_lpage_first_chunk() is identical to the original implementation.
This simplifies arch code and will help converting more archs to
dynamic percpu allocator.
While at it, factor out pcpu_calc_fc_sizes() which is common to
pcpu_embed_first_chunk() and pcpu_lpage_first_chunk().
[ Impact: code reorganization and generalization ]
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/percpu.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 169 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 1 | ||||
-rw-r--r-- | include/linux/percpu.h | 27 | ||||
-rw-r--r-- | mm/percpu.c | 209 |
5 files changed, 244 insertions, 171 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1ddb0d85..a18c038a3079 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -156,15 +156,6 @@ do { \ | |||
156 | /* We can use this directly for local CPU (faster). */ | 156 | /* We can use this directly for local CPU (faster). */ |
157 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | 157 | DECLARE_PER_CPU(unsigned long, this_cpu_off); |
158 | 158 | ||
159 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
160 | void *pcpu_lpage_remapped(void *kaddr); | ||
161 | #else | ||
162 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
163 | { | ||
164 | return NULL; | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | #endif /* !__ASSEMBLY__ */ | 159 | #endif /* !__ASSEMBLY__ */ |
169 | 160 | ||
170 | #ifdef CONFIG_SMP | 161 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ab896b31e80b..4f2e0ac9130b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size) | |||
137 | } | 137 | } |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * Large page remap allocator | 140 | * Large page remapping allocator |
141 | * | ||
142 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
143 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
144 | * As PMD page is quite large, only part of it is used for the first | ||
145 | * chunk. Unused part is returned to the bootmem allocator. | ||
146 | * | ||
147 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
148 | * and to the vmalloc area for the first percpu chunk. The double | ||
149 | * mapping does add one more PMD TLB entry pressure but still is much | ||
150 | * better than only using 4k mappings while still being NUMA friendly. | ||
151 | */ | 141 | */ |
152 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 142 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
153 | struct pcpul_ent { | 143 | static void __init pcpul_map(void *ptr, size_t size, void *addr) |
154 | unsigned int cpu; | ||
155 | void *ptr; | ||
156 | }; | ||
157 | |||
158 | static size_t pcpul_size; | ||
159 | static struct pcpul_ent *pcpul_map; | ||
160 | static struct vm_struct pcpul_vm; | ||
161 | |||
162 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
163 | { | 144 | { |
164 | size_t off = (size_t)pageno << PAGE_SHIFT; | 145 | pmd_t *pmd, pmd_v; |
165 | 146 | ||
166 | if (off >= pcpul_size) | 147 | pmd = populate_extra_pmd((unsigned long)addr); |
167 | return NULL; | 148 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); |
168 | 149 | set_pmd(pmd, pmd_v); | |
169 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
170 | } | 150 | } |
171 | 151 | ||
172 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 152 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
173 | { | 153 | { |
174 | size_t map_size, dyn_size; | 154 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
175 | unsigned int cpu; | ||
176 | int i, j; | ||
177 | ssize_t ret; | ||
178 | 155 | ||
179 | if (!chosen) { | 156 | if (!chosen) { |
180 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 157 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
@@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
198 | return -EINVAL; | 175 | return -EINVAL; |
199 | } | 176 | } |
200 | 177 | ||
201 | /* | 178 | return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
202 | * Currently supports only single page. Supporting multiple | 179 | reserve - PERCPU_FIRST_CHUNK_RESERVE, |
203 | * pages won't be too difficult if it ever becomes necessary. | 180 | PMD_SIZE, |
204 | */ | 181 | pcpu_fc_alloc, pcpu_fc_free, pcpul_map); |
205 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
206 | PERCPU_DYNAMIC_RESERVE); | ||
207 | if (pcpul_size > PMD_SIZE) { | ||
208 | pr_warning("PERCPU: static data is larger than large page, " | ||
209 | "can't use large page\n"); | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
213 | |||
214 | /* allocate pointer array and alloc large pages */ | ||
215 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); | ||
216 | pcpul_map = alloc_bootmem(map_size); | ||
217 | |||
218 | for_each_possible_cpu(cpu) { | ||
219 | pcpul_map[cpu].cpu = cpu; | ||
220 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | ||
221 | PMD_SIZE); | ||
222 | if (!pcpul_map[cpu].ptr) { | ||
223 | pr_warning("PERCPU: failed to allocate large page " | ||
224 | "for cpu%u\n", cpu); | ||
225 | goto enomem; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Only use pcpul_size bytes and give back the rest. | ||
230 | * | ||
231 | * Ingo: The 2MB up-rounding bootmem is needed to make | ||
232 | * sure the partial 2MB page is still fully RAM - it's | ||
233 | * not well-specified to have a PAT-incompatible area | ||
234 | * (unmapped RAM, device memory, etc.) in that hole. | ||
235 | */ | ||
236 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), | ||
237 | PMD_SIZE - pcpul_size); | ||
238 | |||
239 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); | ||
240 | } | ||
241 | |||
242 | /* allocate address and map */ | ||
243 | pcpul_vm.flags = VM_ALLOC; | ||
244 | pcpul_vm.size = num_possible_cpus() * PMD_SIZE; | ||
245 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | ||
246 | |||
247 | for_each_possible_cpu(cpu) { | ||
248 | pmd_t *pmd, pmd_v; | ||
249 | |||
250 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + | ||
251 | cpu * PMD_SIZE); | ||
252 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), | ||
253 | PAGE_KERNEL_LARGE); | ||
254 | set_pmd(pmd, pmd_v); | ||
255 | } | ||
256 | |||
257 | /* we're ready, commit */ | ||
258 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
259 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
260 | |||
261 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
262 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | ||
263 | PMD_SIZE, pcpul_vm.addr, NULL); | ||
264 | |||
265 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
266 | for (i = 0; i < num_possible_cpus() - 1; i++) | ||
267 | for (j = i + 1; j < num_possible_cpus(); j++) | ||
268 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
269 | struct pcpul_ent tmp = pcpul_map[i]; | ||
270 | pcpul_map[i] = pcpul_map[j]; | ||
271 | pcpul_map[j] = tmp; | ||
272 | } | ||
273 | |||
274 | return ret; | ||
275 | |||
276 | enomem: | ||
277 | for_each_possible_cpu(cpu) | ||
278 | if (pcpul_map[cpu].ptr) | ||
279 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); | ||
280 | free_bootmem(__pa(pcpul_map), map_size); | ||
281 | return -ENOMEM; | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
286 | * @kaddr: the kernel address in question | ||
287 | * | ||
288 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
289 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
290 | * mapping such that the same physical page is not mapped under | ||
291 | * different attributes. | ||
292 | * | ||
293 | * The recycled area is always at the tail of a partially used PMD | ||
294 | * page. | ||
295 | * | ||
296 | * RETURNS: | ||
297 | * Address of corresponding remapped pcpu address if match is found; | ||
298 | * otherwise, NULL. | ||
299 | */ | ||
300 | void *pcpu_lpage_remapped(void *kaddr) | ||
301 | { | ||
302 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | ||
303 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | ||
304 | int left = 0, right = num_possible_cpus() - 1; | ||
305 | int pos; | ||
306 | |||
307 | /* pcpul in use at all? */ | ||
308 | if (!pcpul_map) | ||
309 | return NULL; | ||
310 | |||
311 | /* okay, perform binary search */ | ||
312 | while (left <= right) { | ||
313 | pos = (left + right) / 2; | ||
314 | |||
315 | if (pcpul_map[pos].ptr < pmd_addr) | ||
316 | left = pos + 1; | ||
317 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
318 | right = pos - 1; | ||
319 | else { | ||
320 | /* it shouldn't be in the area for the first chunk */ | ||
321 | WARN_ON(offset < pcpul_size); | ||
322 | |||
323 | return pcpul_vm.addr + | ||
324 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | return NULL; | ||
329 | } | 182 | } |
330 | #else | 183 | #else |
331 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 184 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b734d7a8966..c106f7852424 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | 14 | #include <linux/pfn.h> |
15 | #include <linux/percpu.h> | ||
15 | 16 | ||
16 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 41b5bfab4195..9f6bfd7d4b92 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); | |||
62 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); | 62 | typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); |
63 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); | 63 | typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); |
64 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); | 64 | typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); |
65 | typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); | ||
65 | 66 | ||
66 | extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | 67 | extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, |
67 | size_t static_size, size_t reserved_size, | 68 | size_t static_size, size_t reserved_size, |
@@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk( | |||
79 | pcpu_fc_free_fn_t free_fn, | 80 | pcpu_fc_free_fn_t free_fn, |
80 | pcpu_fc_populate_pte_fn_t populate_pte_fn); | 81 | pcpu_fc_populate_pte_fn_t populate_pte_fn); |
81 | 82 | ||
83 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
84 | extern ssize_t __init pcpu_lpage_first_chunk( | ||
85 | size_t static_size, size_t reserved_size, | ||
86 | ssize_t dyn_size, size_t lpage_size, | ||
87 | pcpu_fc_alloc_fn_t alloc_fn, | ||
88 | pcpu_fc_free_fn_t free_fn, | ||
89 | pcpu_fc_map_fn_t map_fn); | ||
90 | |||
91 | extern void *pcpu_lpage_remapped(void *kaddr); | ||
92 | #else | ||
93 | static inline ssize_t __init pcpu_lpage_first_chunk( | ||
94 | size_t static_size, size_t reserved_size, | ||
95 | ssize_t dyn_size, size_t lpage_size, | ||
96 | pcpu_fc_alloc_fn_t alloc_fn, | ||
97 | pcpu_fc_free_fn_t free_fn, | ||
98 | pcpu_fc_map_fn_t map_fn) | ||
99 | { | ||
100 | return -EINVAL; | ||
101 | } | ||
102 | |||
103 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
104 | { | ||
105 | return NULL; | ||
106 | } | ||
107 | #endif | ||
108 | |||
82 | /* | 109 | /* |
83 | * Use this to get to a cpu's version of the per-cpu object | 110 | * Use this to get to a cpu's version of the per-cpu object |
84 | * dynamically allocated. Non-atomic access to the current CPU's | 111 | * dynamically allocated. Non-atomic access to the current CPU's |
diff --git a/mm/percpu.c b/mm/percpu.c index f3fe7bc7378f..17db527ee2e2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1190 | return pcpu_unit_size; | 1190 | return pcpu_unit_size; |
1191 | } | 1191 | } |
1192 | 1192 | ||
1193 | static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, | ||
1194 | ssize_t *dyn_sizep) | ||
1195 | { | ||
1196 | size_t size_sum; | ||
1197 | |||
1198 | size_sum = PFN_ALIGN(static_size + reserved_size + | ||
1199 | (*dyn_sizep >= 0 ? *dyn_sizep : 0)); | ||
1200 | if (*dyn_sizep != 0) | ||
1201 | *dyn_sizep = size_sum - static_size - reserved_size; | ||
1202 | |||
1203 | return size_sum; | ||
1204 | } | ||
1205 | |||
1193 | /* | 1206 | /* |
1194 | * Embedding first chunk setup helper. | 1207 | * Embedding first chunk setup helper. |
1195 | */ | 1208 | */ |
@@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1241 | unsigned int cpu; | 1254 | unsigned int cpu; |
1242 | 1255 | ||
1243 | /* determine parameters and allocate */ | 1256 | /* determine parameters and allocate */ |
1244 | pcpue_size = PFN_ALIGN(static_size + reserved_size + | 1257 | pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); |
1245 | (dyn_size >= 0 ? dyn_size : 0)); | ||
1246 | if (dyn_size != 0) | ||
1247 | dyn_size = pcpue_size - static_size - reserved_size; | ||
1248 | 1258 | ||
1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1259 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); |
1250 | chunk_size = pcpue_unit_size * num_possible_cpus(); | 1260 | chunk_size = pcpue_unit_size * num_possible_cpus(); |
@@ -1391,6 +1401,197 @@ out_free_ar: | |||
1391 | } | 1401 | } |
1392 | 1402 | ||
1393 | /* | 1403 | /* |
1404 | * Large page remapping first chunk setup helper | ||
1405 | */ | ||
1406 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
1407 | struct pcpul_ent { | ||
1408 | unsigned int cpu; | ||
1409 | void *ptr; | ||
1410 | }; | ||
1411 | |||
1412 | static size_t pcpul_size; | ||
1413 | static size_t pcpul_unit_size; | ||
1414 | static struct pcpul_ent *pcpul_map; | ||
1415 | static struct vm_struct pcpul_vm; | ||
1416 | |||
1417 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
1418 | { | ||
1419 | size_t off = (size_t)pageno << PAGE_SHIFT; | ||
1420 | |||
1421 | if (off >= pcpul_size) | ||
1422 | return NULL; | ||
1423 | |||
1424 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
1425 | } | ||
1426 | |||
1427 | /** | ||
1428 | * pcpu_lpage_first_chunk - remap the first percpu chunk using large page | ||
1429 | * @static_size: the size of static percpu area in bytes | ||
1430 | * @reserved_size: the size of reserved percpu area in bytes | ||
1431 | * @dyn_size: free size for dynamic allocation in bytes, -1 for auto | ||
1432 | * @lpage_size: the size of a large page | ||
1433 | * @alloc_fn: function to allocate percpu lpage, always called with lpage_size | ||
1434 | * @free_fn: function to free percpu memory, @size <= lpage_size | ||
1435 | * @map_fn: function to map percpu lpage, always called with lpage_size | ||
1436 | * | ||
1437 | * This allocator uses large page as unit. A large page is allocated | ||
1438 | * for each cpu and each is remapped into vmalloc area using large | ||
1439 | * page mapping. As large page can be quite large, only part of it is | ||
1440 | * used for the first chunk. Unused part is returned to the bootmem | ||
1441 | * allocator. | ||
1442 | * | ||
1443 | * So, the large pages are mapped twice - once to the physical mapping | ||
1444 | * and to the vmalloc area for the first percpu chunk. The double | ||
1445 | * mapping does add one more large TLB entry pressure but still is | ||
1446 | * much better than only using 4k mappings while still being NUMA | ||
1447 | * friendly. | ||
1448 | * | ||
1449 | * RETURNS: | ||
1450 | * The determined pcpu_unit_size which can be used to initialize | ||
1451 | * percpu access on success, -errno on failure. | ||
1452 | */ | ||
1453 | ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, | ||
1454 | ssize_t dyn_size, size_t lpage_size, | ||
1455 | pcpu_fc_alloc_fn_t alloc_fn, | ||
1456 | pcpu_fc_free_fn_t free_fn, | ||
1457 | pcpu_fc_map_fn_t map_fn) | ||
1458 | { | ||
1459 | size_t size_sum; | ||
1460 | size_t map_size; | ||
1461 | unsigned int cpu; | ||
1462 | int i, j; | ||
1463 | ssize_t ret; | ||
1464 | |||
1465 | /* | ||
1466 | * Currently supports only single page. Supporting multiple | ||
1467 | * pages won't be too difficult if it ever becomes necessary. | ||
1468 | */ | ||
1469 | size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); | ||
1470 | |||
1471 | pcpul_unit_size = lpage_size; | ||
1472 | pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | ||
1473 | if (pcpul_size > pcpul_unit_size) { | ||
1474 | pr_warning("PERCPU: static data is larger than large page, " | ||
1475 | "can't use large page\n"); | ||
1476 | return -EINVAL; | ||
1477 | } | ||
1478 | |||
1479 | /* allocate pointer array and alloc large pages */ | ||
1480 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); | ||
1481 | pcpul_map = alloc_bootmem(map_size); | ||
1482 | |||
1483 | for_each_possible_cpu(cpu) { | ||
1484 | void *ptr; | ||
1485 | |||
1486 | ptr = alloc_fn(cpu, lpage_size); | ||
1487 | if (!ptr) { | ||
1488 | pr_warning("PERCPU: failed to allocate large page " | ||
1489 | "for cpu%u\n", cpu); | ||
1490 | goto enomem; | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * Only use pcpul_size bytes and give back the rest. | ||
1495 | * | ||
1496 | * Ingo: The lpage_size up-rounding bootmem is needed | ||
1497 | * to make sure the partial lpage is still fully RAM - | ||
1498 | * it's not well-specified to have a incompatible area | ||
1499 | * (unmapped RAM, device memory, etc.) in that hole. | ||
1500 | */ | ||
1501 | free_fn(ptr + pcpul_size, lpage_size - pcpul_size); | ||
1502 | |||
1503 | pcpul_map[cpu].cpu = cpu; | ||
1504 | pcpul_map[cpu].ptr = ptr; | ||
1505 | |||
1506 | memcpy(ptr, __per_cpu_load, static_size); | ||
1507 | } | ||
1508 | |||
1509 | /* allocate address and map */ | ||
1510 | pcpul_vm.flags = VM_ALLOC; | ||
1511 | pcpul_vm.size = num_possible_cpus() * pcpul_unit_size; | ||
1512 | vm_area_register_early(&pcpul_vm, pcpul_unit_size); | ||
1513 | |||
1514 | for_each_possible_cpu(cpu) | ||
1515 | map_fn(pcpul_map[cpu].ptr, pcpul_unit_size, | ||
1516 | pcpul_vm.addr + cpu * pcpul_unit_size); | ||
1517 | |||
1518 | /* we're ready, commit */ | ||
1519 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
1520 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
1521 | |||
1522 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
1523 | reserved_size, dyn_size, pcpul_unit_size, | ||
1524 | pcpul_vm.addr, NULL); | ||
1525 | |||
1526 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
1527 | for (i = 0; i < num_possible_cpus() - 1; i++) | ||
1528 | for (j = i + 1; j < num_possible_cpus(); j++) | ||
1529 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
1530 | struct pcpul_ent tmp = pcpul_map[i]; | ||
1531 | pcpul_map[i] = pcpul_map[j]; | ||
1532 | pcpul_map[j] = tmp; | ||
1533 | } | ||
1534 | |||
1535 | return ret; | ||
1536 | |||
1537 | enomem: | ||
1538 | for_each_possible_cpu(cpu) | ||
1539 | if (pcpul_map[cpu].ptr) | ||
1540 | free_fn(pcpul_map[cpu].ptr, pcpul_size); | ||
1541 | free_bootmem(__pa(pcpul_map), map_size); | ||
1542 | return -ENOMEM; | ||
1543 | } | ||
1544 | |||
1545 | /** | ||
1546 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
1547 | * @kaddr: the kernel address in question | ||
1548 | * | ||
1549 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
1550 | * used by pageattr to detect VM aliases and break up the pcpu large | ||
1551 | * page mapping such that the same physical page is not mapped under | ||
1552 | * different attributes. | ||
1553 | * | ||
1554 | * The recycled area is always at the tail of a partially used large | ||
1555 | * page. | ||
1556 | * | ||
1557 | * RETURNS: | ||
1558 | * Address of corresponding remapped pcpu address if match is found; | ||
1559 | * otherwise, NULL. | ||
1560 | */ | ||
1561 | void *pcpu_lpage_remapped(void *kaddr) | ||
1562 | { | ||
1563 | unsigned long unit_mask = pcpul_unit_size - 1; | ||
1564 | void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask); | ||
1565 | unsigned long offset = (unsigned long)kaddr & unit_mask; | ||
1566 | int left = 0, right = num_possible_cpus() - 1; | ||
1567 | int pos; | ||
1568 | |||
1569 | /* pcpul in use at all? */ | ||
1570 | if (!pcpul_map) | ||
1571 | return NULL; | ||
1572 | |||
1573 | /* okay, perform binary search */ | ||
1574 | while (left <= right) { | ||
1575 | pos = (left + right) / 2; | ||
1576 | |||
1577 | if (pcpul_map[pos].ptr < lpage_addr) | ||
1578 | left = pos + 1; | ||
1579 | else if (pcpul_map[pos].ptr > lpage_addr) | ||
1580 | right = pos - 1; | ||
1581 | else { | ||
1582 | /* it shouldn't be in the area for the first chunk */ | ||
1583 | WARN_ON(offset < pcpul_size); | ||
1584 | |||
1585 | return pcpul_vm.addr + | ||
1586 | pcpul_map[pos].cpu * pcpul_unit_size + offset; | ||
1587 | } | ||
1588 | } | ||
1589 | |||
1590 | return NULL; | ||
1591 | } | ||
1592 | #endif | ||
1593 | |||
1594 | /* | ||
1394 | * Generic percpu area setup. | 1595 | * Generic percpu area setup. |
1395 | * | 1596 | * |
1396 | * The embedding helper is used because its behavior closely resembles | 1597 | * The embedding helper is used because its behavior closely resembles |