aboutsummaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-07-03 19:10:59 -0400
committerTejun Heo <tj@kernel.org>2009-07-03 19:10:59 -0400
commit8f05a6a65d944f2fed4eb384fb58aa8c8e5a9bab (patch)
tree433e2bc9f937778376a9b38e3d9816862292bb51 /mm/percpu.c
parentd4b95f80399471e4bce5e992700ff7f06ef91f6a (diff)
percpu: make 4k first chunk allocator map memory
At first, percpu first chunk was always setup page-by-page by the generic code. To add other allocators, different parts of the generic initialization was made optional. Now we have three allocators - embed, remap and 4k. embed and remap fully handle allocation and mapping of the first chunk while 4k still depends on generic code for those. This makes the generic alloc/map paths specifci to 4k and makes the code unnecessary complicated with optional generic behaviors. This patch makes the 4k allocator to allocate and map memory directly instead of depending on the generic code. The only outside visible change is that now dynamic area in the first chunk is allocated up-front instead of on-demand. This doesn't make any meaningful difference as the area is minimal (usually less than a page, just enough to fill the alignment) on 4k allocator. Plus, dynamic area in the first chunk usually gets fully used anyway. This will allow simplification of pcpu_setpu_first_chunk() and removal of chunk->page array. [ Impact: no outside visible change other than up-front allocation of dyn area ] Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c71
1 files changed, 54 insertions, 17 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 27b0f40a3ea8..f3fe7bc7378f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -632,6 +632,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
632 pcpu_unmap(chunk, unmap_start, unmap_end, flush); 632 pcpu_unmap(chunk, unmap_start, unmap_end, flush);
633} 633}
634 634
635static int __pcpu_map_pages(unsigned long addr, struct page **pages,
636 int nr_pages)
637{
638 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
639 PAGE_KERNEL, pages);
640}
641
635/** 642/**
636 * pcpu_map - map pages into a pcpu_chunk 643 * pcpu_map - map pages into a pcpu_chunk
637 * @chunk: chunk of interest 644 * @chunk: chunk of interest
@@ -651,11 +658,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
651 WARN_ON(chunk->immutable); 658 WARN_ON(chunk->immutable);
652 659
653 for_each_possible_cpu(cpu) { 660 for_each_possible_cpu(cpu) {
654 err = map_kernel_range_noflush( 661 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
655 pcpu_chunk_addr(chunk, cpu, page_start), 662 pcpu_chunk_pagep(chunk, cpu, page_start),
656 (page_end - page_start) << PAGE_SHIFT, 663 page_end - page_start);
657 PAGE_KERNEL,
658 pcpu_chunk_pagep(chunk, cpu, page_start));
659 if (err < 0) 664 if (err < 0)
660 return err; 665 return err;
661 } 666 }
@@ -1274,12 +1279,12 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1274 * 4k page first chunk setup helper. 1279 * 4k page first chunk setup helper.
1275 */ 1280 */
1276static struct page **pcpu4k_pages __initdata; 1281static struct page **pcpu4k_pages __initdata;
1277static int pcpu4k_nr_static_pages __initdata; 1282static int pcpu4k_unit_pages __initdata;
1278 1283
1279static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) 1284static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
1280{ 1285{
1281 if (pageno < pcpu4k_nr_static_pages) 1286 if (pageno < pcpu4k_unit_pages)
1282 return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; 1287 return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno];
1283 return NULL; 1288 return NULL;
1284} 1289}
1285 1290
@@ -1306,22 +1311,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
1306 pcpu_fc_free_fn_t free_fn, 1311 pcpu_fc_free_fn_t free_fn,
1307 pcpu_fc_populate_pte_fn_t populate_pte_fn) 1312 pcpu_fc_populate_pte_fn_t populate_pte_fn)
1308{ 1313{
1314 static struct vm_struct vm;
1309 size_t pages_size; 1315 size_t pages_size;
1310 unsigned int cpu; 1316 unsigned int cpu;
1311 int i, j; 1317 int i, j;
1312 ssize_t ret; 1318 ssize_t ret;
1313 1319
1314 pcpu4k_nr_static_pages = PFN_UP(static_size); 1320 pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
1321 PCPU_MIN_UNIT_SIZE));
1315 1322
1316 /* unaligned allocations can't be freed, round up to page size */ 1323 /* unaligned allocations can't be freed, round up to page size */
1317 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() * 1324 pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() *
1318 sizeof(pcpu4k_pages[0])); 1325 sizeof(pcpu4k_pages[0]));
1319 pcpu4k_pages = alloc_bootmem(pages_size); 1326 pcpu4k_pages = alloc_bootmem(pages_size);
1320 1327
1321 /* allocate and copy */ 1328 /* allocate pages */
1322 j = 0; 1329 j = 0;
1323 for_each_possible_cpu(cpu) 1330 for_each_possible_cpu(cpu)
1324 for (i = 0; i < pcpu4k_nr_static_pages; i++) { 1331 for (i = 0; i < pcpu4k_unit_pages; i++) {
1325 void *ptr; 1332 void *ptr;
1326 1333
1327 ptr = alloc_fn(cpu, PAGE_SIZE); 1334 ptr = alloc_fn(cpu, PAGE_SIZE);
@@ -1330,18 +1337,48 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
1330 "4k page for cpu%u\n", cpu); 1337 "4k page for cpu%u\n", cpu);
1331 goto enomem; 1338 goto enomem;
1332 } 1339 }
1333
1334 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
1335 pcpu4k_pages[j++] = virt_to_page(ptr); 1340 pcpu4k_pages[j++] = virt_to_page(ptr);
1336 } 1341 }
1337 1342
1343 /* allocate vm area, map the pages and copy static data */
1344 vm.flags = VM_ALLOC;
1345 vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT;
1346 vm_area_register_early(&vm, PAGE_SIZE);
1347
1348 for_each_possible_cpu(cpu) {
1349 unsigned long unit_addr = (unsigned long)vm.addr +
1350 (cpu * pcpu4k_unit_pages << PAGE_SHIFT);
1351
1352 for (i = 0; i < pcpu4k_unit_pages; i++)
1353 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
1354
1355 /* pte already populated, the following shouldn't fail */
1356 ret = __pcpu_map_pages(unit_addr,
1357 &pcpu4k_pages[cpu * pcpu4k_unit_pages],
1358 pcpu4k_unit_pages);
1359 if (ret < 0)
1360 panic("failed to map percpu area, err=%zd\n", ret);
1361
1362 /*
1363 * FIXME: Archs with virtual cache should flush local
1364 * cache for the linear mapping here - something
1365 * equivalent to flush_cache_vmap() on the local cpu.
1366 * flush_cache_vmap() can't be used as most supporting
1367 * data structures are not set up yet.
1368 */
1369
1370 /* copy static data */
1371 memcpy((void *)unit_addr, __per_cpu_load, static_size);
1372 }
1373
1338 /* we're ready, commit */ 1374 /* we're ready, commit */
1339 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", 1375 pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n",
1340 pcpu4k_nr_static_pages, static_size); 1376 pcpu4k_unit_pages, static_size);
1341 1377
1342 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 1378 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
1343 reserved_size, -1, 1379 reserved_size, -1,
1344 -1, NULL, populate_pte_fn); 1380 pcpu4k_unit_pages << PAGE_SHIFT, vm.addr,
1381 NULL);
1345 goto out_free_ar; 1382 goto out_free_ar;
1346 1383
1347enomem: 1384enomem: