aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-08-14 02:00:53 -0400
committerTejun Heo <tj@kernel.org>2009-08-14 02:00:53 -0400
commite933a73f48e3b2d40cfa56d81e2646f194b5a66a (patch)
treee828fbdac9ff888a3e8e3d750e14f132abd7ffa0
parent4518e6a0c038b98be4c480e6f4481e8676bd15dd (diff)
percpu: kill lpage first chunk allocator
With x86 converted to embedding allocator, lpage doesn't have any user left. Kill it along with cpa handling code. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jan Beulich <JBeulich@novell.com>
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--arch/x86/mm/pageattr.c20
-rw-r--r--include/linux/percpu.h16
-rw-r--r--mm/percpu.c241
4 files changed, 6 insertions, 281 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dee9ce2e6cfa..e710093e3d32 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file
1920 See arch/parisc/kernel/pdc_chassis.c 1920 See arch/parisc/kernel/pdc_chassis.c
1921 1921
1922 percpu_alloc= Select which percpu first chunk allocator to use. 1922 percpu_alloc= Select which percpu first chunk allocator to use.
1923 Currently supported values are "embed", "page" and 1923 Currently supported values are "embed" and "page".
1924 "lpage". Archs may support subset or none of the 1924 Archs may support subset or none of the selections.
1925 selections. See comments in mm/percpu.c for details 1925 See comments in mm/percpu.c for details on each
1926 on each allocator. This parameter is primarily for 1926 allocator. This parameter is primarily for debugging
1927 debugging and performance comparison. 1927 and performance comparison.
1928 1928
1929 pf. [PARIDE] 1929 pf. [PARIDE]
1930 See Documentation/blockdev/paride.txt. 1930 See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dce282f65700..f53cfc7f963d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
687{ 687{
688 struct cpa_data alias_cpa; 688 struct cpa_data alias_cpa;
689 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); 689 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
690 unsigned long vaddr, remapped; 690 unsigned long vaddr;
691 int ret; 691 int ret;
692 692
693 if (cpa->pfn >= max_pfn_mapped) 693 if (cpa->pfn >= max_pfn_mapped)
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
745 } 745 }
746#endif 746#endif
747 747
748 /*
749 * If the PMD page was partially used for per-cpu remapping,
750 * the recycled area needs to be split and modified. Because
751 * the area is always proper subset of a PMD page
752 * cpa->numpages is guaranteed to be 1 for these areas, so
753 * there's no need to loop over and check for further remaps.
754 */
755 remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
756 if (remapped) {
757 WARN_ON(cpa->numpages > 1);
758 alias_cpa = *cpa;
759 alias_cpa.vaddr = &remapped;
760 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
761 ret = __change_page_attr_set_clr(&alias_cpa, 0);
762 if (ret)
763 return ret;
764 }
765
766 return 0; 748 return 0;
767} 749}
768 750
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 25359932740e..878836ca999c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -82,7 +82,6 @@ enum pcpu_fc {
82 PCPU_FC_AUTO, 82 PCPU_FC_AUTO,
83 PCPU_FC_EMBED, 83 PCPU_FC_EMBED,
84 PCPU_FC_PAGE, 84 PCPU_FC_PAGE,
85 PCPU_FC_LPAGE,
86 85
87 PCPU_FC_NR, 86 PCPU_FC_NR,
88}; 87};
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
95typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); 94typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
96typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); 95typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
97typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); 96typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
98typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
99 97
100extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, 98extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
101 int nr_units); 99 int nr_units);
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
124 pcpu_fc_populate_pte_fn_t populate_pte_fn); 122 pcpu_fc_populate_pte_fn_t populate_pte_fn);
125#endif 123#endif
126 124
127#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
128extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
129 pcpu_fc_alloc_fn_t alloc_fn,
130 pcpu_fc_free_fn_t free_fn,
131 pcpu_fc_map_fn_t map_fn);
132
133extern void *pcpu_lpage_remapped(void *kaddr);
134#else
135static inline void *pcpu_lpage_remapped(void *kaddr)
136{
137 return NULL;
138}
139#endif
140
141/* 125/*
142 * Use this to get to a cpu's version of the per-cpu object 126 * Use this to get to a cpu's version of the per-cpu object
143 * dynamically allocated. Non-atomic access to the current CPU's 127 * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index c2826d05505c..77933928107d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1713 [PCPU_FC_AUTO] = "auto", 1713 [PCPU_FC_AUTO] = "auto",
1714 [PCPU_FC_EMBED] = "embed", 1714 [PCPU_FC_EMBED] = "embed",
1715 [PCPU_FC_PAGE] = "page", 1715 [PCPU_FC_PAGE] = "page",
1716 [PCPU_FC_LPAGE] = "lpage",
1717}; 1716};
1718 1717
1719enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; 1718enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
@@ -1730,10 +1729,6 @@ static int __init percpu_alloc_setup(char *str)
1730 else if (!strcmp(str, "page")) 1729 else if (!strcmp(str, "page"))
1731 pcpu_chosen_fc = PCPU_FC_PAGE; 1730 pcpu_chosen_fc = PCPU_FC_PAGE;
1732#endif 1731#endif
1733#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1734 else if (!strcmp(str, "lpage"))
1735 pcpu_chosen_fc = PCPU_FC_LPAGE;
1736#endif
1737 else 1732 else
1738 pr_warning("PERCPU: unknown allocator %s specified\n", str); 1733 pr_warning("PERCPU: unknown allocator %s specified\n", str);
1739 1734
@@ -1970,242 +1965,6 @@ out_free_ar:
1970} 1965}
1971#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ 1966#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
1972 1967
1973#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
1974struct pcpul_ent {
1975 void *ptr;
1976 void *map_addr;
1977};
1978
1979static size_t pcpul_size;
1980static size_t pcpul_lpage_size;
1981static int pcpul_nr_lpages;
1982static struct pcpul_ent *pcpul_map;
1983
1984static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
1985 unsigned int *cpup)
1986{
1987 int group, cunit;
1988
1989 for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
1990 const struct pcpu_group_info *gi = &ai->groups[group];
1991
1992 if (unit < cunit + gi->nr_units) {
1993 if (cpup)
1994 *cpup = gi->cpu_map[unit - cunit];
1995 return true;
1996 }
1997 cunit += gi->nr_units;
1998 }
1999
2000 return false;
2001}
2002
2003static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
2004{
2005 int group, unit, i;
2006
2007 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
2008 const struct pcpu_group_info *gi = &ai->groups[group];
2009
2010 for (i = 0; i < gi->nr_units; i++)
2011 if (gi->cpu_map[i] == cpu)
2012 return unit + i;
2013 }
2014 BUG();
2015}
2016
2017/**
2018 * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
2019 * @ai: pcpu_alloc_info
2020 * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
2021 * @free_fn: function to free percpu memory, @size <= lpage_size
2022 * @map_fn: function to map percpu lpage, always called with lpage_size
2023 *
2024 * This allocator uses large page to build and map the first chunk.
2025 * Unlike other helpers, the caller should provide fully initialized
2026 * @ai. This can be done using pcpu_build_alloc_info(). This two
2027 * stage initialization is to allow arch code to evaluate the
2028 * parameters before committing to it.
2029 *
2030 * Large pages are allocated as directed by @unit_map and other
2031 * parameters and mapped to vmalloc space. Unused holes are returned
2032 * to the page allocator. Note that these holes end up being actively
2033 * mapped twice - once to the physical mapping and to the vmalloc area
2034 * for the first percpu chunk. Depending on architecture, this might
2035 * cause problem when changing page attributes of the returned area.
2036 * These double mapped areas can be detected using
2037 * pcpu_lpage_remapped().
2038 *
2039 * RETURNS:
2040 * 0 on success, -errno on failure.
2041 */
2042int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
2043 pcpu_fc_alloc_fn_t alloc_fn,
2044 pcpu_fc_free_fn_t free_fn,
2045 pcpu_fc_map_fn_t map_fn)
2046{
2047 static struct vm_struct vm;
2048 const size_t lpage_size = ai->atom_size;
2049 size_t chunk_size, map_size;
2050 unsigned int cpu;
2051 int i, j, unit, nr_units, rc;
2052
2053 nr_units = 0;
2054 for (i = 0; i < ai->nr_groups; i++)
2055 nr_units += ai->groups[i].nr_units;
2056
2057 chunk_size = ai->unit_size * nr_units;
2058 BUG_ON(chunk_size % lpage_size);
2059
2060 pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
2061 pcpul_lpage_size = lpage_size;
2062 pcpul_nr_lpages = chunk_size / lpage_size;
2063
2064 /* allocate pointer array and alloc large pages */
2065 map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
2066 pcpul_map = alloc_bootmem(map_size);
2067
2068 /* allocate all pages */
2069 for (i = 0; i < pcpul_nr_lpages; i++) {
2070 size_t offset = i * lpage_size;
2071 int first_unit = offset / ai->unit_size;
2072 int last_unit = (offset + lpage_size - 1) / ai->unit_size;
2073 void *ptr;
2074
2075 /* find out which cpu is mapped to this unit */
2076 for (unit = first_unit; unit <= last_unit; unit++)
2077 if (pcpul_unit_to_cpu(unit, ai, &cpu))
2078 goto found;
2079 continue;
2080 found:
2081 ptr = alloc_fn(cpu, lpage_size, lpage_size);
2082 if (!ptr) {
2083 pr_warning("PERCPU: failed to allocate large page "
2084 "for cpu%u\n", cpu);
2085 goto enomem;
2086 }
2087
2088 pcpul_map[i].ptr = ptr;
2089 }
2090
2091 /* return unused holes */
2092 for (unit = 0; unit < nr_units; unit++) {
2093 size_t start = unit * ai->unit_size;
2094 size_t end = start + ai->unit_size;
2095 size_t off, next;
2096
2097 /* don't free used part of occupied unit */
2098 if (pcpul_unit_to_cpu(unit, ai, NULL))
2099 start += pcpul_size;
2100
2101 /* unit can span more than one page, punch the holes */
2102 for (off = start; off < end; off = next) {
2103 void *ptr = pcpul_map[off / lpage_size].ptr;
2104 next = min(roundup(off + 1, lpage_size), end);
2105 if (ptr)
2106 free_fn(ptr + off % lpage_size, next - off);
2107 }
2108 }
2109
2110 /* allocate address, map and copy */
2111 vm.flags = VM_ALLOC;
2112 vm.size = chunk_size;
2113 vm_area_register_early(&vm, ai->unit_size);
2114
2115 for (i = 0; i < pcpul_nr_lpages; i++) {
2116 if (!pcpul_map[i].ptr)
2117 continue;
2118 pcpul_map[i].map_addr = vm.addr + i * lpage_size;
2119 map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
2120 }
2121
2122 for_each_possible_cpu(cpu)
2123 memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
2124 __per_cpu_load, ai->static_size);
2125
2126 /* we're ready, commit */
2127 pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
2128 vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
2129 ai->unit_size);
2130
2131 rc = pcpu_setup_first_chunk(ai, vm.addr);
2132
2133 /*
2134 * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped
2135 * lpages are pushed to the end and trimmed.
2136 */
2137 for (i = 0; i < pcpul_nr_lpages - 1; i++)
2138 for (j = i + 1; j < pcpul_nr_lpages; j++) {
2139 struct pcpul_ent tmp;
2140
2141 if (!pcpul_map[j].ptr)
2142 continue;
2143 if (pcpul_map[i].ptr &&
2144 pcpul_map[i].ptr < pcpul_map[j].ptr)
2145 continue;
2146
2147 tmp = pcpul_map[i];
2148 pcpul_map[i] = pcpul_map[j];
2149 pcpul_map[j] = tmp;
2150 }
2151
2152 while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
2153 pcpul_nr_lpages--;
2154
2155 return rc;
2156
2157enomem:
2158 for (i = 0; i < pcpul_nr_lpages; i++)
2159 if (pcpul_map[i].ptr)
2160 free_fn(pcpul_map[i].ptr, lpage_size);
2161 free_bootmem(__pa(pcpul_map), map_size);
2162 return -ENOMEM;
2163}
2164
2165/**
2166 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
2167 * @kaddr: the kernel address in question
2168 *
2169 * Determine whether @kaddr falls in the pcpul recycled area. This is
2170 * used by pageattr to detect VM aliases and break up the pcpu large
2171 * page mapping such that the same physical page is not mapped under
2172 * different attributes.
2173 *
2174 * The recycled area is always at the tail of a partially used large
2175 * page.
2176 *
2177 * RETURNS:
2178 * Address of corresponding remapped pcpu address if match is found;
2179 * otherwise, NULL.
2180 */
2181void *pcpu_lpage_remapped(void *kaddr)
2182{
2183 unsigned long lpage_mask = pcpul_lpage_size - 1;
2184 void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
2185 unsigned long offset = (unsigned long)kaddr & lpage_mask;
2186 int left = 0, right = pcpul_nr_lpages - 1;
2187 int pos;
2188
2189 /* pcpul in use at all? */
2190 if (!pcpul_map)
2191 return NULL;
2192
2193 /* okay, perform binary search */
2194 while (left <= right) {
2195 pos = (left + right) / 2;
2196
2197 if (pcpul_map[pos].ptr < lpage_addr)
2198 left = pos + 1;
2199 else if (pcpul_map[pos].ptr > lpage_addr)
2200 right = pos - 1;
2201 else
2202 return pcpul_map[pos].map_addr + offset;
2203 }
2204
2205 return NULL;
2206}
2207#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
2208
2209/* 1968/*
2210 * Generic percpu area setup. 1969 * Generic percpu area setup.
2211 * 1970 *