percpu: kill lpage first chunk allocator

With x86 converted to embedding allocator, lpage doesn't have any user left. Kill it along with cpa handling code. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jan Beulich <JBeulich@novell.com>
author: Tejun Heo <tj@kernel.org> 2009-08-14 02:00:53 -0400
committer: Tejun Heo <tj@kernel.org> 2009-08-14 02:00:53 -0400
commit: e933a73f48e3b2d40cfa56d81e2646f194b5a66a (patch)
tree: e828fbdac9ff888a3e8e3d750e14f132abd7ffa0
parent: 4518e6a0c038b98be4c480e6f4481e8676bd15dd (diff)
4 files changed, 6 insertions, 281 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dee9ce2e6cfa..e710093e3d32 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file
                        See arch/parisc/kernel/pdc_chassis.c
        percpu_alloc=   Select which percpu first chunk allocator to use.
-                        Currently supported values are "embed", "page" and
+                        Currently supported values are "embed" and "page".
-                        "lpage".  Archs may support subset or none of the
+                        Archs may support subset or none of the selections.
-                        selections.  See comments in mm/percpu.c for details
+                        See comments in mm/percpu.c for details on each
-                        on each allocator.  This parameter is primarily for
+                        allocator.  This parameter is primarily for debugging
-                        debugging and performance comparison.
+                        and performance comparison.
        pf.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dce282f65700..f53cfc7f963d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
        struct cpa_data alias_cpa;
        unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-        unsigned long vaddr, remapped;
+        unsigned long vaddr;
        int ret;
        if (cpa->pfn >= max_pfn_mapped)
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
        }
 #endif
-        /*
-         * If the PMD page was partially used for per-cpu remapping,
-         * the recycled area needs to be split and modified.  Because
-         * the area is always proper subset of a PMD page
-         * cpa->numpages is guaranteed to be 1 for these areas, so
-         * there's no need to loop over and check for further remaps.
-         */
-        remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-        if (remapped) {
-                WARN_ON(cpa->numpages > 1);
-                alias_cpa = *cpa;
-                alias_cpa.vaddr = &remapped;
-                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-                ret = __change_page_attr_set_clr(&alias_cpa, 0);
-                if (ret)
-                        return ret;
-        }
        return 0;
 }
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 25359932740e..878836ca999c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -82,7 +82,6 @@ enum pcpu_fc {
        PCPU_FC_AUTO,
        PCPU_FC_EMBED,
        PCPU_FC_PAGE,
-        PCPU_FC_LPAGE,
        PCPU_FC_NR,
 };
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
-typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
                                                             int nr_units);
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
                                pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-                                pcpu_fc_alloc_fn_t alloc_fn,
-                                pcpu_fc_free_fn_t free_fn,
-                                pcpu_fc_map_fn_t map_fn);
-extern void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-        return NULL;
-}
-#endif
 /*
 * Use this to get to a cpu's version of the per-cpu object
 * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index c2826d05505c..77933928107d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
        [PCPU_FC_AUTO]  = "auto",
        [PCPU_FC_EMBED] = "embed",
        [PCPU_FC_PAGE]  = "page",
-        [PCPU_FC_LPAGE] = "lpage",
 };
 enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
@@ -1730,10 +1729,6 @@ static int __init percpu_alloc_setup(char *str)
        else if (!strcmp(str, "page"))
                pcpu_chosen_fc = PCPU_FC_PAGE;
 #endif
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-        else if (!strcmp(str, "lpage"))
-                pcpu_chosen_fc = PCPU_FC_LPAGE;
-#endif
        else
                pr_warning("PERCPU: unknown allocator %s specified\n", str);
@@ -1970,242 +1965,6 @@ out_free_ar:
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-struct pcpul_ent {
-        void            *ptr;
-        void            *map_addr;
-};
-static size_t pcpul_size;
-static size_t pcpul_lpage_size;
-static int pcpul_nr_lpages;
-static struct pcpul_ent *pcpul_map;
-static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
-                                     unsigned int *cpup)
-{
-        int group, cunit;
-        for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
-                const struct pcpu_group_info *gi = &ai->groups[group];
-                if (unit < cunit + gi->nr_units) {
-                        if (cpup)
-                                *cpup = gi->cpu_map[unit - cunit];
-                        return true;
-                }
-                cunit += gi->nr_units;
-        }
-        return false;
-}
-static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
-{
-        int group, unit, i;
-        for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
-                const struct pcpu_group_info *gi = &ai->groups[group];
-                for (i = 0; i < gi->nr_units; i++)
-                        if (gi->cpu_map[i] == cpu)
-                                return unit + i;
-        }
-        BUG();
-}
-/**
- * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @ai: pcpu_alloc_info
- * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
- * @free_fn: function to free percpu memory, @size <= lpage_size
- * @map_fn: function to map percpu lpage, always called with lpage_size
- *
- * This allocator uses large page to build and map the first chunk.
- * Unlike other helpers, the caller should provide fully initialized
- * @ai.  This can be done using pcpu_build_alloc_info().  This two
- * stage initialization is to allow arch code to evaluate the
- * parameters before committing to it.
- *
- * Large pages are allocated as directed by @unit_map and other
- * parameters and mapped to vmalloc space.  Unused holes are returned
- * to the page allocator.  Note that these holes end up being actively
- * mapped twice - once to the physical mapping and to the vmalloc area
- * for the first percpu chunk.  Depending on architecture, this might
- * cause problem when changing page attributes of the returned area.
- * These double mapped areas can be detected using
- * pcpu_lpage_remapped().
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-                                  pcpu_fc_alloc_fn_t alloc_fn,
-                                  pcpu_fc_free_fn_t free_fn,
-                                  pcpu_fc_map_fn_t map_fn)
-{
-        static struct vm_struct vm;
-        const size_t lpage_size = ai->atom_size;
-        size_t chunk_size, map_size;
-        unsigned int cpu;
-        int i, j, unit, nr_units, rc;
-        nr_units = 0;
-        for (i = 0; i < ai->nr_groups; i++)
-                nr_units += ai->groups[i].nr_units;
-        chunk_size = ai->unit_size * nr_units;
-        BUG_ON(chunk_size % lpage_size);
-        pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
-        pcpul_lpage_size = lpage_size;
-        pcpul_nr_lpages = chunk_size / lpage_size;
-        /* allocate pointer array and alloc large pages */
-        map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
-        pcpul_map = alloc_bootmem(map_size);
-        /* allocate all pages */
-        for (i = 0; i < pcpul_nr_lpages; i++) {
-                size_t offset = i * lpage_size;
-                int first_unit = offset / ai->unit_size;
-                int last_unit = (offset + lpage_size - 1) / ai->unit_size;
-                void *ptr;
-                /* find out which cpu is mapped to this unit */
-                for (unit = first_unit; unit <= last_unit; unit++)
-                        if (pcpul_unit_to_cpu(unit, ai, &cpu))
-                                goto found;
-                continue;
-        found:
-                ptr = alloc_fn(cpu, lpage_size, lpage_size);
-                if (!ptr) {
-                        pr_warning("PERCPU: failed to allocate large page "
-                                   "for cpu%u\n", cpu);
-                        goto enomem;
-                }
-                pcpul_map[i].ptr = ptr;
-        }
-        /* return unused holes */
-        for (unit = 0; unit < nr_units; unit++) {
-                size_t start = unit * ai->unit_size;
-                size_t end = start + ai->unit_size;
-                size_t off, next;
-                /* don't free used part of occupied unit */
-                if (pcpul_unit_to_cpu(unit, ai, NULL))
-                        start += pcpul_size;
-                /* unit can span more than one page, punch the holes */
-                for (off = start; off < end; off = next) {
-                        void *ptr = pcpul_map[off / lpage_size].ptr;
-                        next = min(roundup(off + 1, lpage_size), end);
-                        if (ptr)
-                                free_fn(ptr + off % lpage_size, next - off);
-                }
-        }
-        /* allocate address, map and copy */
-        vm.flags = VM_ALLOC;
-        vm.size = chunk_size;
-        vm_area_register_early(&vm, ai->unit_size);
-        for (i = 0; i < pcpul_nr_lpages; i++) {
-                if (!pcpul_map[i].ptr)
-                        continue;
-                pcpul_map[i].map_addr = vm.addr + i * lpage_size;
-                map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
-        }
-        for_each_possible_cpu(cpu)
-                memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
-                       __per_cpu_load, ai->static_size);
-        /* we're ready, commit */
-        pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
-                vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
-                ai->unit_size);
-        rc = pcpu_setup_first_chunk(ai, vm.addr);
-        /*
-         * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
-         * lpages are pushed to the end and trimmed.
-         */
-        for (i = 0; i < pcpul_nr_lpages - 1; i++)
-                for (j = i + 1; j < pcpul_nr_lpages; j++) {
-                        struct pcpul_ent tmp;
-                        if (!pcpul_map[j].ptr)
-                                continue;
-                        if (pcpul_map[i].ptr &&
-                            pcpul_map[i].ptr < pcpul_map[j].ptr)
-                                continue;
-                        tmp = pcpul_map[i];
-                        pcpul_map[i] = pcpul_map[j];
-                        pcpul_map[j] = tmp;
-                }
-        while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
-                pcpul_nr_lpages--;
-        return rc;
-enomem:
-        for (i = 0; i < pcpul_nr_lpages; i++)
-                if (pcpul_map[i].ptr)
-                        free_fn(pcpul_map[i].ptr, lpage_size);
-        free_bootmem(__pa(pcpul_map), map_size);
-        return -ENOMEM;
-}
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu large
- * page mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used large
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
-{
-        unsigned long lpage_mask = pcpul_lpage_size - 1;
-        void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
-        unsigned long offset = (unsigned long)kaddr & lpage_mask;
-        int left = 0, right = pcpul_nr_lpages - 1;
-        int pos;
-        /* pcpul in use at all? */
-        if (!pcpul_map)
-                return NULL;
-        /* okay, perform binary search */
-        while (left <= right) {
-                pos = (left + right) / 2;
-                if (pcpul_map[pos].ptr < lpage_addr)
-                        left = pos + 1;
-                else if (pcpul_map[pos].ptr > lpage_addr)
-                        right = pos - 1;
-                else
-                        return pcpul_map[pos].map_addr + offset;
-        }
-        return NULL;
-}
-#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
 /*
 * Generic percpu area setup.
 *
author	Tejun Heo <tj@kernel.org>	2009-08-14 02:00:53 -0400
committer	Tejun Heo <tj@kernel.org>	2009-08-14 02:00:53 -0400
commit	e933a73f48e3b2d40cfa56d81e2646f194b5a66a (patch)
tree	e828fbdac9ff888a3e8e3d750e14f132abd7ffa0
parent	4518e6a0c038b98be4c480e6f4481e8676bd15dd (diff)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dee9ce2e6cfa..e710093e3d32 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file
1920	See arch/parisc/kernel/pdc_chassis.c	1920	See arch/parisc/kernel/pdc_chassis.c
1921		1921
1922	percpu_alloc= Select which percpu first chunk allocator to use.	1922	percpu_alloc= Select which percpu first chunk allocator to use.
1923	Currently supported values are "embed", "page" and	1923	Currently supported values are "embed" and "page".
1924	"lpage". Archs may support subset or none of the	1924	Archs may support subset or none of the selections.
1925	selections. See comments in mm/percpu.c for details	1925	See comments in mm/percpu.c for details on each
1926	on each allocator. This parameter is primarily for	1926	allocator. This parameter is primarily for debugging
1927	debugging and performance comparison.	1927	and performance comparison.
1928		1928
1929	pf. [PARIDE]	1929	pf. [PARIDE]
1930	See Documentation/blockdev/paride.txt.	1930	See Documentation/blockdev/paride.txt.


diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f65700..f53cfc7f963d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
687	{	687	{
688	struct cpa_data alias_cpa;	688	struct cpa_data alias_cpa;
689	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);	689	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
690	unsigned long vaddr, remapped;	690	unsigned long vaddr;
691	int ret;	691	int ret;
692		692
693	if (cpa->pfn >= max_pfn_mapped)	693	if (cpa->pfn >= max_pfn_mapped)
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
745	}	745	}
746	#endif	746	#endif
747		747
748	/*
749	* If the PMD page was partially used for per-cpu remapping,
750	* the recycled area needs to be split and modified. Because
751	* the area is always proper subset of a PMD page
752	* cpa->numpages is guaranteed to be 1 for these areas, so
753	* there's no need to loop over and check for further remaps.
754	*/
755	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
756	if (remapped) {
757	WARN_ON(cpa->numpages > 1);
758	alias_cpa = *cpa;
759	alias_cpa.vaddr = &remapped;
760	alias_cpa.flags &= ~(CPA_PAGES_ARRAY \| CPA_ARRAY);
761	ret = __change_page_attr_set_clr(&alias_cpa, 0);
762	if (ret)
763	return ret;
764	}
765
766	return 0;	748	return 0;
767	}	749	}
768		750


diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 25359932740e..878836ca999c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h
@@ -82,7 +82,6 @@ enum pcpu_fc {
82	PCPU_FC_AUTO,	82	PCPU_FC_AUTO,
83	PCPU_FC_EMBED,	83	PCPU_FC_EMBED,
84	PCPU_FC_PAGE,	84	PCPU_FC_PAGE,
85	PCPU_FC_LPAGE,
86		85
87	PCPU_FC_NR,	86	PCPU_FC_NR,
88	};	87	};
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
95	typedef void (pcpu_fc_free_fn_t)(void ptr, size_t size);	94	typedef void (pcpu_fc_free_fn_t)(void ptr, size_t size);
96	typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);	95	typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
97	typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);	96	typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
98	typedef void (pcpu_fc_map_fn_t)(void ptr, size_t size, void *addr);
99		97
100	extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,	98	extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
101	int nr_units);	99	int nr_units);
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
124	pcpu_fc_populate_pte_fn_t populate_pte_fn);	122	pcpu_fc_populate_pte_fn_t populate_pte_fn);
125	#endif	123	#endif
126		124
127	#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
128	extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
129	pcpu_fc_alloc_fn_t alloc_fn,
130	pcpu_fc_free_fn_t free_fn,
131	pcpu_fc_map_fn_t map_fn);
132
133	extern void pcpu_lpage_remapped(void kaddr);
134	#else
135	static inline void pcpu_lpage_remapped(void kaddr)
136	{
137	return NULL;
138	}
139	#endif
140
141	/*	125	/*
142	* Use this to get to a cpu's version of the per-cpu object	126	* Use this to get to a cpu's version of the per-cpu object
143	* dynamically allocated. Non-atomic access to the current CPU's	127	* dynamically allocated. Non-atomic access to the current CPU's


diff --git a/mm/percpu.c b/mm/percpu.c index c2826d05505c..77933928107d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1713	[PCPU_FC_AUTO] = "auto",	1713	[PCPU_FC_AUTO] = "auto",
1714	[PCPU_FC_EMBED] = "embed",	1714	[PCPU_FC_EMBED] = "embed",
1715	[PCPU_FC_PAGE] = "page",	1715	[PCPU_FC_PAGE] = "page",
1716	[PCPU_FC_LPAGE] = "lpage",
1717	};	1716	};
1718		1717
1719	enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;	1718	enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
@@ -1730,10 +1729,6 @@ static int __init percpu_alloc_setup(char *str)
1730	else if (!strcmp(str, "page"))	1729	else if (!strcmp(str, "page"))
1731	pcpu_chosen_fc = PCPU_FC_PAGE;	1730	pcpu_chosen_fc = PCPU_FC_PAGE;
1732	#endif	1731	#endif
1733	#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1734	else if (!strcmp(str, "lpage"))
1735	pcpu_chosen_fc = PCPU_FC_LPAGE;
1736	#endif
1737	else	1732	else
1738	pr_warning("PERCPU: unknown allocator %s specified\n", str);	1733	pr_warning("PERCPU: unknown allocator %s specified\n", str);
1739		1734
@@ -1970,242 +1965,6 @@ out_free_ar:
1970	}	1965	}
1971	#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */	1966	#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
1972		1967
1973	#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1974	struct pcpul_ent {
1975	void *ptr;
1976	void *map_addr;
1977	};
1978
1979	static size_t pcpul_size;
1980	static size_t pcpul_lpage_size;
1981	static int pcpul_nr_lpages;
1982	static struct pcpul_ent *pcpul_map;
1983
1984	static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
1985	unsigned int *cpup)
1986	{
1987	int group, cunit;
1988
1989	for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
1990	const struct pcpu_group_info *gi = &ai->groups[group];
1991
1992	if (unit < cunit + gi->nr_units) {
1993	if (cpup)
1994	*cpup = gi->cpu_map[unit - cunit];
1995	return true;
1996	}
1997	cunit += gi->nr_units;
1998	}
1999
2000	return false;
2001	}
2002
2003	static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
2004	{
2005	int group, unit, i;
2006
2007	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
2008	const struct pcpu_group_info *gi = &ai->groups[group];
2009
2010	for (i = 0; i < gi->nr_units; i++)
2011	if (gi->cpu_map[i] == cpu)
2012	return unit + i;
2013	}
2014	BUG();
2015	}
2016
2017	/**
2018	* pcpu_lpage_first_chunk - remap the first percpu chunk using large page
2019	* @ai: pcpu_alloc_info
2020	* @alloc_fn: function to allocate percpu lpage, always called with lpage_size
2021	* @free_fn: function to free percpu memory, @size <= lpage_size
2022	* @map_fn: function to map percpu lpage, always called with lpage_size
2023	*
2024	* This allocator uses large page to build and map the first chunk.
2025	* Unlike other helpers, the caller should provide fully initialized
2026	* @ai. This can be done using pcpu_build_alloc_info(). This two
2027	* stage initialization is to allow arch code to evaluate the
2028	* parameters before committing to it.
2029	*
2030	* Large pages are allocated as directed by @unit_map and other
2031	* parameters and mapped to vmalloc space. Unused holes are returned
2032	* to the page allocator. Note that these holes end up being actively
2033	* mapped twice - once to the physical mapping and to the vmalloc area
2034	* for the first percpu chunk. Depending on architecture, this might
2035	* cause problem when changing page attributes of the returned area.
2036	* These double mapped areas can be detected using
2037	* pcpu_lpage_remapped().
2038	*
2039	* RETURNS:
2040	* 0 on success, -errno on failure.
2041	*/
2042	int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
2043	pcpu_fc_alloc_fn_t alloc_fn,
2044	pcpu_fc_free_fn_t free_fn,
2045	pcpu_fc_map_fn_t map_fn)
2046	{
2047	static struct vm_struct vm;
2048	const size_t lpage_size = ai->atom_size;
2049	size_t chunk_size, map_size;
2050	unsigned int cpu;
2051	int i, j, unit, nr_units, rc;
2052
2053	nr_units = 0;
2054	for (i = 0; i < ai->nr_groups; i++)
2055	nr_units += ai->groups[i].nr_units;
2056
2057	chunk_size = ai->unit_size * nr_units;
2058	BUG_ON(chunk_size % lpage_size);
2059
2060	pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
2061	pcpul_lpage_size = lpage_size;
2062	pcpul_nr_lpages = chunk_size / lpage_size;
2063
2064	/* allocate pointer array and alloc large pages */
2065	map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
2066	pcpul_map = alloc_bootmem(map_size);
2067
2068	/* allocate all pages */
2069	for (i = 0; i < pcpul_nr_lpages; i++) {
2070	size_t offset = i * lpage_size;
2071	int first_unit = offset / ai->unit_size;
2072	int last_unit = (offset + lpage_size - 1) / ai->unit_size;
2073	void *ptr;
2074
2075	/* find out which cpu is mapped to this unit */
2076	for (unit = first_unit; unit <= last_unit; unit++)
2077	if (pcpul_unit_to_cpu(unit, ai, &cpu))
2078	goto found;
2079	continue;
2080	found:
2081	ptr = alloc_fn(cpu, lpage_size, lpage_size);
2082	if (!ptr) {
2083	pr_warning("PERCPU: failed to allocate large page "
2084	"for cpu%u\n", cpu);
2085	goto enomem;
2086	}
2087
2088	pcpul_map[i].ptr = ptr;
2089	}
2090
2091	/* return unused holes */
2092	for (unit = 0; unit < nr_units; unit++) {
2093	size_t start = unit * ai->unit_size;
2094	size_t end = start + ai->unit_size;
2095	size_t off, next;
2096
2097	/* don't free used part of occupied unit */
2098	if (pcpul_unit_to_cpu(unit, ai, NULL))
2099	start += pcpul_size;
2100
2101	/* unit can span more than one page, punch the holes */
2102	for (off = start; off < end; off = next) {
2103	void *ptr = pcpul_map[off / lpage_size].ptr;
2104	next = min(roundup(off + 1, lpage_size), end);
2105	if (ptr)
2106	free_fn(ptr + off % lpage_size, next - off);
2107	}
2108	}
2109
2110	/* allocate address, map and copy */
2111	vm.flags = VM_ALLOC;
2112	vm.size = chunk_size;
2113	vm_area_register_early(&vm, ai->unit_size);
2114
2115	for (i = 0; i < pcpul_nr_lpages; i++) {
2116	if (!pcpul_map[i].ptr)
2117	continue;
2118	pcpul_map[i].map_addr = vm.addr + i * lpage_size;
2119	map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
2120	}
2121
2122	for_each_possible_cpu(cpu)
2123	memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
2124	__per_cpu_load, ai->static_size);
2125
2126	/* we're ready, commit */
2127	pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
2128	vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
2129	ai->unit_size);
2130
2131	rc = pcpu_setup_first_chunk(ai, vm.addr);
2132
2133	/*
2134	* Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped
2135	* lpages are pushed to the end and trimmed.
2136	*/
2137	for (i = 0; i < pcpul_nr_lpages - 1; i++)
2138	for (j = i + 1; j < pcpul_nr_lpages; j++) {
2139	struct pcpul_ent tmp;
2140
2141	if (!pcpul_map[j].ptr)
2142	continue;
2143	if (pcpul_map[i].ptr &&
2144	pcpul_map[i].ptr < pcpul_map[j].ptr)
2145	continue;
2146
2147	tmp = pcpul_map[i];
2148	pcpul_map[i] = pcpul_map[j];
2149	pcpul_map[j] = tmp;
2150	}
2151
2152	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
2153	pcpul_nr_lpages--;
2154
2155	return rc;
2156
2157	enomem:
2158	for (i = 0; i < pcpul_nr_lpages; i++)
2159	if (pcpul_map[i].ptr)
2160	free_fn(pcpul_map[i].ptr, lpage_size);
2161	free_bootmem(__pa(pcpul_map), map_size);
2162	return -ENOMEM;
2163	}
2164
2165	/**
2166	* pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
2167	* @kaddr: the kernel address in question
2168	*
2169	* Determine whether @kaddr falls in the pcpul recycled area. This is
2170	* used by pageattr to detect VM aliases and break up the pcpu large
2171	* page mapping such that the same physical page is not mapped under
2172	* different attributes.
2173	*
2174	* The recycled area is always at the tail of a partially used large
2175	* page.
2176	*
2177	* RETURNS:
2178	* Address of corresponding remapped pcpu address if match is found;
2179	* otherwise, NULL.
2180	*/
2181	void pcpu_lpage_remapped(void kaddr)
2182	{
2183	unsigned long lpage_mask = pcpul_lpage_size - 1;
2184	void lpage_addr = (void )((unsigned long)kaddr & ~lpage_mask);
2185	unsigned long offset = (unsigned long)kaddr & lpage_mask;
2186	int left = 0, right = pcpul_nr_lpages - 1;
2187	int pos;
2188
2189	/* pcpul in use at all? */
2190	if (!pcpul_map)
2191	return NULL;
2192
2193	/* okay, perform binary search */
2194	while (left <= right) {
2195	pos = (left + right) / 2;
2196
2197	if (pcpul_map[pos].ptr < lpage_addr)
2198	left = pos + 1;
2199	else if (pcpul_map[pos].ptr > lpage_addr)
2200	right = pos - 1;
2201	else
2202	return pcpul_map[pos].map_addr + offset;
2203	}
2204
2205	return NULL;
2206	}
2207	#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
2208
2209	/*	1968	/*
2210	* Generic percpu area setup.	1969	* Generic percpu area setup.
2211	*	1970	*