aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-08-14 02:00:53 -0400
committerTejun Heo <tj@kernel.org>2009-08-14 02:00:53 -0400
commitbcb2107fdbecef3de55d597d23453747af81ba88 (patch)
treef5d7095b80a3483f65a58d8eecdb02b06475fa3d
parente933a73f48e3b2d40cfa56d81e2646f194b5a66a (diff)
sparc64: use embedding percpu first chunk allocator
sparc64 currently allocates a large page for each cpu and partially remap them into vmalloc area much like what lpage first chunk allocator did. As a 4M page is used for each cpu, this results in very large unit size and also adds TLB pressure due to the double mapping of pages in the first chunk. This patch converts sparc64 to use the embedding percpu first chunk allocator which now knows how to handle NUMA configurations. This simplifies the code a lot, doesn't incur any extra TLB pressure and results in better utilization of address space. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/kernel/smp_64.c128
2 files changed, 21 insertions, 110 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4f6ed0f113f0..fbd1233b392d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -95,6 +95,9 @@ config AUDIT_ARCH
95config HAVE_SETUP_PER_CPU_AREA 95config HAVE_SETUP_PER_CPU_AREA
96 def_bool y if SPARC64 96 def_bool y if SPARC64
97 97
98config NEED_PER_CPU_EMBED_FIRST_CHUNK
99 def_bool y if SPARC64
100
98config GENERIC_HARDIRQS_NO__DO_IRQ 101config GENERIC_HARDIRQS_NO__DO_IRQ
99 bool 102 bool
100 def_bool y if SPARC64 103 def_bool y if SPARC64
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b03fd362c629..ff68373ce6d6 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1389,8 +1389,8 @@ void smp_send_stop(void)
1389 * RETURNS: 1389 * RETURNS:
1390 * Pointer to the allocated area on success, NULL on failure. 1390 * Pointer to the allocated area on success, NULL on failure.
1391 */ 1391 */
1392static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, 1392static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1393 unsigned long align) 1393 size_t align)
1394{ 1394{
1395 const unsigned long goal = __pa(MAX_DMA_ADDRESS); 1395 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1396#ifdef CONFIG_NEED_MULTIPLE_NODES 1396#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1415,123 +1415,31 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
1415#endif 1415#endif
1416} 1416}
1417 1417
1418#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) 1418static void __init pcpu_free_bootmem(void *ptr, size_t size)
1419
1420static void __init pcpu_map_range(unsigned long start, unsigned long end,
1421 struct page *page)
1422{ 1419{
1423 unsigned long pfn = page_to_pfn(page); 1420 free_bootmem(__pa(ptr), size);
1424 unsigned long pte_base; 1421}
1425
1426 BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
1427
1428 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
1429 _PAGE_CP_4U | _PAGE_CV_4U |
1430 _PAGE_P_4U | _PAGE_W_4U);
1431 if (tlb_type == hypervisor)
1432 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
1433 _PAGE_CP_4V | _PAGE_CV_4V |
1434 _PAGE_P_4V | _PAGE_W_4V);
1435
1436 while (start < end) {
1437 pgd_t *pgd = pgd_offset_k(start);
1438 unsigned long this_end;
1439 pud_t *pud;
1440 pmd_t *pmd;
1441 pte_t *pte;
1442
1443 pud = pud_offset(pgd, start);
1444 if (pud_none(*pud)) {
1445 pmd_t *new;
1446
1447 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1448 pud_populate(&init_mm, pud, new);
1449 }
1450
1451 pmd = pmd_offset(pud, start);
1452 if (!pmd_present(*pmd)) {
1453 pte_t *new;
1454
1455 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1456 pmd_populate_kernel(&init_mm, pmd, new);
1457 }
1458
1459 pte = pte_offset_kernel(pmd, start);
1460 this_end = (start + PMD_SIZE) & PMD_MASK;
1461 if (this_end > end)
1462 this_end = end;
1463
1464 while (start < this_end) {
1465 unsigned long paddr = pfn << PAGE_SHIFT;
1466
1467 pte_val(*pte) = (paddr | pte_base);
1468 1422
1469 start += PAGE_SIZE; 1423static int pcpu_cpu_distance(unsigned int from, unsigned int to)
1470 pte++; 1424{
1471 pfn++; 1425 if (cpu_to_node(from) == cpu_to_node(to))
1472 } 1426 return LOCAL_DISTANCE;
1473 } 1427 else
1428 return REMOTE_DISTANCE;
1474} 1429}
1475 1430
1476void __init setup_per_cpu_areas(void) 1431void __init setup_per_cpu_areas(void)
1477{ 1432{
1478 static struct vm_struct vm; 1433 unsigned long delta;
1479 struct pcpu_alloc_info *ai; 1434 unsigned int cpu;
1480 unsigned long delta, cpu;
1481 size_t size_sum;
1482 size_t ptrs_size;
1483 void **ptrs;
1484 int rc; 1435 int rc;
1485 1436
1486 ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); 1437 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1487 1438 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1488 ai->static_size = __per_cpu_end - __per_cpu_start; 1439 pcpu_cpu_distance, pcpu_alloc_bootmem,
1489 ai->reserved_size = PERCPU_MODULE_RESERVE; 1440 pcpu_free_bootmem);
1490
1491 size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
1492 PERCPU_DYNAMIC_RESERVE);
1493
1494 ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
1495 ai->unit_size = PCPU_CHUNK_SIZE;
1496 ai->atom_size = PCPU_CHUNK_SIZE;
1497 ai->alloc_size = PCPU_CHUNK_SIZE;
1498 ai->groups[0].nr_units = nr_cpu_ids;
1499
1500 for_each_possible_cpu(cpu)
1501 ai->groups[0].cpu_map[cpu] = cpu;
1502
1503 ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
1504 ptrs = alloc_bootmem(ptrs_size);
1505
1506 for_each_possible_cpu(cpu) {
1507 ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
1508 PCPU_CHUNK_SIZE);
1509
1510 free_bootmem(__pa(ptrs[cpu] + size_sum),
1511 PCPU_CHUNK_SIZE - size_sum);
1512
1513 memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
1514 }
1515
1516 /* allocate address and map */
1517 vm.flags = VM_ALLOC;
1518 vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
1519 vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
1520
1521 for_each_possible_cpu(cpu) {
1522 unsigned long start = (unsigned long) vm.addr;
1523 unsigned long end;
1524
1525 start += cpu * PCPU_CHUNK_SIZE;
1526 end = start + PCPU_CHUNK_SIZE;
1527 pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
1528 }
1529
1530 rc = pcpu_setup_first_chunk(ai, vm.addr);
1531 if (rc) 1441 if (rc)
1532 panic("failed to setup percpu first chunk (%d)", rc); 1442 panic("failed to initialize first chunk (%d)", rc);
1533
1534 free_bootmem(__pa(ptrs), ptrs_size);
1535 1443
1536 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 1444 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1537 for_each_possible_cpu(cpu) 1445 for_each_possible_cpu(cpu)