sparc64: use embedding percpu first chunk allocator

sparc64 currently allocates a large page for each cpu and partially remap them into vmalloc area much like what lpage first chunk allocator did. As a 4M page is used for each cpu, this results in very large unit size and also adds TLB pressure due to the double mapping of pages in the first chunk. This patch converts sparc64 to use the embedding percpu first chunk allocator which now knows how to handle NUMA configurations. This simplifies the code a lot, doesn't incur any extra TLB pressure and results in better utilization of address space. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: David S. Miller <davem@davemloft.net>
author: Tejun Heo <tj@kernel.org> 2009-08-14 02:00:53 -0400
committer: Tejun Heo <tj@kernel.org> 2009-08-14 02:00:53 -0400
commit: bcb2107fdbecef3de55d597d23453747af81ba88 (patch)
tree: f5d7095b80a3483f65a58d8eecdb02b06475fa3d /arch/sparc
parent: e933a73f48e3b2d40cfa56d81e2646f194b5a66a (diff)
2 files changed, 21 insertions, 110 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4f6ed0f113f0..fbd1233b392d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -95,6 +95,9 @@ config AUDIT_ARCH
 config HAVE_SETUP_PER_CPU_AREA
        def_bool y if SPARC64
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+        def_bool y if SPARC64
 config GENERIC_HARDIRQS_NO__DO_IRQ
        bool
        def_bool y if SPARC64
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b03fd362c629..ff68373ce6d6 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1389,8 +1389,8 @@ void smp_send_stop(void)
 * RETURNS:
 * Pointer to the allocated area on success, NULL on failure.
 */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
-                                        unsigned long align)
+                                        size_t align)
 {
        const unsigned long goal = __pa(MAX_DMA_ADDRESS);
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1415,123 +1415,31 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
-#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
-static void __init pcpu_map_range(unsigned long start, unsigned long end,
-                                  struct page *page)
 {
-        unsigned long pfn = page_to_pfn(page);
+        free_bootmem(__pa(ptr), size);
-        unsigned long pte_base;
+}
-        BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
-        pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
-                    _PAGE_CP_4U | _PAGE_CV_4U |
-                    _PAGE_P_4U | _PAGE_W_4U);
-        if (tlb_type == hypervisor)
-                pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
-                            _PAGE_CP_4V | _PAGE_CV_4V |
-                            _PAGE_P_4V | _PAGE_W_4V);
-        while (start < end) {
-                pgd_t *pgd = pgd_offset_k(start);
-                unsigned long this_end;
-                pud_t *pud;
-                pmd_t *pmd;
-                pte_t *pte;
-                pud = pud_offset(pgd, start);
-                if (pud_none(*pud)) {
-                        pmd_t *new;
-                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-                        pud_populate(&init_mm, pud, new);
-                }
-                pmd = pmd_offset(pud, start);
-                if (!pmd_present(*pmd)) {
-                        pte_t *new;
-                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-                        pmd_populate_kernel(&init_mm, pmd, new);
-                }
-                pte = pte_offset_kernel(pmd, start);
-                this_end = (start + PMD_SIZE) & PMD_MASK;
-                if (this_end > end)
-                        this_end = end;
-                while (start < this_end) {
-                        unsigned long paddr = pfn << PAGE_SHIFT;
-                        pte_val(*pte) = (paddr | pte_base);
-                        start += PAGE_SIZE;
+static int pcpu_cpu_distance(unsigned int from, unsigned int to)
-                        pte++;
+{
-                        pfn++;
+        if (cpu_to_node(from) == cpu_to_node(to))
-                }
+                return LOCAL_DISTANCE;
-        }
+        else
+                return REMOTE_DISTANCE;
 }
 void __init setup_per_cpu_areas(void)
 {
-        static struct vm_struct vm;
+        unsigned long delta;
-        struct pcpu_alloc_info *ai;
+        unsigned int cpu;
-        unsigned long delta, cpu;
-        size_t size_sum;
-        size_t ptrs_size;
-        void **ptrs;
        int rc;
-        ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
+        rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+                                    PERCPU_DYNAMIC_RESERVE, 4 << 20,
-        ai->static_size = __per_cpu_end - __per_cpu_start;
+                                    pcpu_cpu_distance, pcpu_alloc_bootmem,
-        ai->reserved_size = PERCPU_MODULE_RESERVE;
+                                    pcpu_free_bootmem);
-        size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
-                             PERCPU_DYNAMIC_RESERVE);
-        ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
-        ai->unit_size = PCPU_CHUNK_SIZE;
-        ai->atom_size = PCPU_CHUNK_SIZE;
-        ai->alloc_size = PCPU_CHUNK_SIZE;
-        ai->groups[0].nr_units = nr_cpu_ids;
-        for_each_possible_cpu(cpu)
-                ai->groups[0].cpu_map[cpu] = cpu;
-        ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
-        ptrs = alloc_bootmem(ptrs_size);
-        for_each_possible_cpu(cpu) {
-                ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
-                                               PCPU_CHUNK_SIZE);
-                free_bootmem(__pa(ptrs[cpu] + size_sum),
-                             PCPU_CHUNK_SIZE - size_sum);
-                memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
-        }
-        /* allocate address and map */
-        vm.flags = VM_ALLOC;
-        vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
-        vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
-        for_each_possible_cpu(cpu) {
-                unsigned long start = (unsigned long) vm.addr;
-                unsigned long end;
-                start += cpu * PCPU_CHUNK_SIZE;
-                end = start + PCPU_CHUNK_SIZE;
-                pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
-        }
-        rc = pcpu_setup_first_chunk(ai, vm.addr);
        if (rc)
-                panic("failed to setup percpu first chunk (%d)", rc);
+                panic("failed to initialize first chunk (%d)", rc);
-        free_bootmem(__pa(ptrs), ptrs_size);
        delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
        for_each_possible_cpu(cpu)
author	Tejun Heo <tj@kernel.org>	2009-08-14 02:00:53 -0400
committer	Tejun Heo <tj@kernel.org>	2009-08-14 02:00:53 -0400
commit	bcb2107fdbecef3de55d597d23453747af81ba88 (patch)
tree	f5d7095b80a3483f65a58d8eecdb02b06475fa3d /arch/sparc
parent	e933a73f48e3b2d40cfa56d81e2646f194b5a66a (diff)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 4f6ed0f113f0..fbd1233b392d 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig
@@ -95,6 +95,9 @@ config AUDIT_ARCH
95	config HAVE_SETUP_PER_CPU_AREA	95	config HAVE_SETUP_PER_CPU_AREA
96	def_bool y if SPARC64	96	def_bool y if SPARC64
97		97
		98	config NEED_PER_CPU_EMBED_FIRST_CHUNK
		99	def_bool y if SPARC64
		100
98	config GENERIC_HARDIRQS_NO__DO_IRQ	101	config GENERIC_HARDIRQS_NO__DO_IRQ
99	bool	102	bool
100	def_bool y if SPARC64	103	def_bool y if SPARC64


diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index b03fd362c629..ff68373ce6d6 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c
@@ -1389,8 +1389,8 @@ void smp_send_stop(void)
1389	* RETURNS:	1389	* RETURNS:
1390	* Pointer to the allocated area on success, NULL on failure.	1390	* Pointer to the allocated area on success, NULL on failure.
1391	*/	1391	*/
1392	static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,	1392	static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1393	unsigned long align)	1393	size_t align)
1394	{	1394	{
1395	const unsigned long goal = __pa(MAX_DMA_ADDRESS);	1395	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1396	#ifdef CONFIG_NEED_MULTIPLE_NODES	1396	#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1415,123 +1415,31 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
1415	#endif	1415	#endif
1416	}	1416	}
1417		1417
1418	#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)	1418	static void __init pcpu_free_bootmem(void *ptr, size_t size)
1419
1420	static void __init pcpu_map_range(unsigned long start, unsigned long end,
1421	struct page *page)
1422	{	1419	{
1423	unsigned long pfn = page_to_pfn(page);	1420	free_bootmem(__pa(ptr), size);
1424	unsigned long pte_base;	1421	}
1425
1426	BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
1427
1428	pte_base = (_PAGE_VALID \| _PAGE_SZ4MB_4U \|
1429	_PAGE_CP_4U \| _PAGE_CV_4U \|
1430	_PAGE_P_4U \| _PAGE_W_4U);
1431	if (tlb_type == hypervisor)
1432	pte_base = (_PAGE_VALID \| _PAGE_SZ4MB_4V \|
1433	_PAGE_CP_4V \| _PAGE_CV_4V \|
1434	_PAGE_P_4V \| _PAGE_W_4V);
1435
1436	while (start < end) {
1437	pgd_t *pgd = pgd_offset_k(start);
1438	unsigned long this_end;
1439	pud_t *pud;
1440	pmd_t *pmd;
1441	pte_t *pte;
1442
1443	pud = pud_offset(pgd, start);
1444	if (pud_none(*pud)) {
1445	pmd_t *new;
1446
1447	new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1448	pud_populate(&init_mm, pud, new);
1449	}
1450
1451	pmd = pmd_offset(pud, start);
1452	if (!pmd_present(*pmd)) {
1453	pte_t *new;
1454
1455	new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1456	pmd_populate_kernel(&init_mm, pmd, new);
1457	}
1458
1459	pte = pte_offset_kernel(pmd, start);
1460	this_end = (start + PMD_SIZE) & PMD_MASK;
1461	if (this_end > end)
1462	this_end = end;
1463
1464	while (start < this_end) {
1465	unsigned long paddr = pfn << PAGE_SHIFT;
1466
1467	pte_val(*pte) = (paddr \| pte_base);
1468		1422
1469	start += PAGE_SIZE;	1423	static int pcpu_cpu_distance(unsigned int from, unsigned int to)
1470	pte++;	1424	{
1471	pfn++;	1425	if (cpu_to_node(from) == cpu_to_node(to))
1472	}	1426	return LOCAL_DISTANCE;
1473	}	1427	else
		1428	return REMOTE_DISTANCE;
1474	}	1429	}
1475		1430
1476	void __init setup_per_cpu_areas(void)	1431	void __init setup_per_cpu_areas(void)
1477	{	1432	{
1478	static struct vm_struct vm;	1433	unsigned long delta;
1479	struct pcpu_alloc_info *ai;	1434	unsigned int cpu;
1480	unsigned long delta, cpu;
1481	size_t size_sum;
1482	size_t ptrs_size;
1483	void **ptrs;
1484	int rc;	1435	int rc;
1485		1436
1486	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);	1437	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1487		1438	PERCPU_DYNAMIC_RESERVE, 4 << 20,
1488	ai->static_size = __per_cpu_end - __per_cpu_start;	1439	pcpu_cpu_distance, pcpu_alloc_bootmem,
1489	ai->reserved_size = PERCPU_MODULE_RESERVE;	1440	pcpu_free_bootmem);
1490
1491	size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
1492	PERCPU_DYNAMIC_RESERVE);
1493
1494	ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
1495	ai->unit_size = PCPU_CHUNK_SIZE;
1496	ai->atom_size = PCPU_CHUNK_SIZE;
1497	ai->alloc_size = PCPU_CHUNK_SIZE;
1498	ai->groups[0].nr_units = nr_cpu_ids;
1499
1500	for_each_possible_cpu(cpu)
1501	ai->groups[0].cpu_map[cpu] = cpu;
1502
1503	ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
1504	ptrs = alloc_bootmem(ptrs_size);
1505
1506	for_each_possible_cpu(cpu) {
1507	ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
1508	PCPU_CHUNK_SIZE);
1509
1510	free_bootmem(__pa(ptrs[cpu] + size_sum),
1511	PCPU_CHUNK_SIZE - size_sum);
1512
1513	memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
1514	}
1515
1516	/* allocate address and map */
1517	vm.flags = VM_ALLOC;
1518	vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE;
1519	vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
1520
1521	for_each_possible_cpu(cpu) {
1522	unsigned long start = (unsigned long) vm.addr;
1523	unsigned long end;
1524
1525	start += cpu * PCPU_CHUNK_SIZE;
1526	end = start + PCPU_CHUNK_SIZE;
1527	pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
1528	}
1529
1530	rc = pcpu_setup_first_chunk(ai, vm.addr);
1531	if (rc)	1441	if (rc)
1532	panic("failed to setup percpu first chunk (%d)", rc);	1442	panic("failed to initialize first chunk (%d)", rc);
1533
1534	free_bootmem(__pa(ptrs), ptrs_size);
1535		1443
1536	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;	1444	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1537	for_each_possible_cpu(cpu)	1445	for_each_possible_cpu(cpu)