aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-28 16:49:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-28 16:49:43 -0500
commit9b5a4d4f65e260a109eaeea8bbc8062a7c58b55e (patch)
treeccb231635a020b629f95972dc67268d1156e5c03 /mm
parentcb3599926e3e7b3678583195effa61a03026ab0e (diff)
parent67589c71456b0346500629967292dea3802230b6 (diff)
Merge branch 'for-3.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-3.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: explain why per_cpu_ptr_to_phys() is more complicated than necessary percpu: fix chunk range calculation percpu: rename pcpu_mem_alloc to pcpu_mem_zalloc
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu-vm.c17
-rw-r--r--mm/percpu.c62
2 files changed, 48 insertions, 31 deletions
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04..12a48a88c0d 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
50 50
51 if (!pages || !bitmap) { 51 if (!pages || !bitmap) {
52 if (may_alloc && !pages) 52 if (may_alloc && !pages)
53 pages = pcpu_mem_alloc(pages_size); 53 pages = pcpu_mem_zalloc(pages_size);
54 if (may_alloc && !bitmap) 54 if (may_alloc && !bitmap)
55 bitmap = pcpu_mem_alloc(bitmap_size); 55 bitmap = pcpu_mem_zalloc(bitmap_size);
56 if (!pages || !bitmap) 56 if (!pages || !bitmap)
57 return NULL; 57 return NULL;
58 } 58 }
59 59
60 memset(pages, 0, pages_size);
61 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
62 61
63 *bitmapp = bitmap; 62 *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
143 int page_start, int page_end) 142 int page_start, int page_end)
144{ 143{
145 flush_cache_vunmap( 144 flush_cache_vunmap(
146 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 145 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
147 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 146 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
148} 147}
149 148
150static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 149static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
206 int page_start, int page_end) 205 int page_start, int page_end)
207{ 206{
208 flush_tlb_kernel_range( 207 flush_tlb_kernel_range(
209 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 208 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
210 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 209 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
211} 210}
212 211
213static int __pcpu_map_pages(unsigned long addr, struct page **pages, 212static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
284 int page_start, int page_end) 283 int page_start, int page_end)
285{ 284{
286 flush_cache_vmap( 285 flush_cache_vmap(
287 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 286 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
288 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 287 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
289} 288}
290 289
291/** 290/**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed..3bb810a7200 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
116static int pcpu_nr_slots __read_mostly; 116static int pcpu_nr_slots __read_mostly;
117static size_t pcpu_chunk_struct_size __read_mostly; 117static size_t pcpu_chunk_struct_size __read_mostly;
118 118
119/* cpus with the lowest and highest unit numbers */ 119/* cpus with the lowest and highest unit addresses */
120static unsigned int pcpu_first_unit_cpu __read_mostly; 120static unsigned int pcpu_low_unit_cpu __read_mostly;
121static unsigned int pcpu_last_unit_cpu __read_mostly; 121static unsigned int pcpu_high_unit_cpu __read_mostly;
122 122
123/* the address of the first chunk which starts with the kernel static area */ 123/* the address of the first chunk which starts with the kernel static area */
124void *pcpu_base_addr __read_mostly; 124void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) 273 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
274 274
275/** 275/**
276 * pcpu_mem_alloc - allocate memory 276 * pcpu_mem_zalloc - allocate memory
277 * @size: bytes to allocate 277 * @size: bytes to allocate
278 * 278 *
279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 279 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
280 * kzalloc() is used; otherwise, vmalloc() is used. The returned 280 * kzalloc() is used; otherwise, vzalloc() is used. The returned
281 * memory is always zeroed. 281 * memory is always zeroed.
282 * 282 *
283 * CONTEXT: 283 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
286 * RETURNS: 286 * RETURNS:
287 * Pointer to the allocated area on success, NULL on failure. 287 * Pointer to the allocated area on success, NULL on failure.
288 */ 288 */
289static void *pcpu_mem_alloc(size_t size) 289static void *pcpu_mem_zalloc(size_t size)
290{ 290{
291 if (WARN_ON_ONCE(!slab_is_available())) 291 if (WARN_ON_ONCE(!slab_is_available()))
292 return NULL; 292 return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
302 * @ptr: memory to free 302 * @ptr: memory to free
303 * @size: size of the area 303 * @size: size of the area
304 * 304 *
305 * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). 305 * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
306 */ 306 */
307static void pcpu_mem_free(void *ptr, size_t size) 307static void pcpu_mem_free(void *ptr, size_t size)
308{ 308{
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); 384 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
385 unsigned long flags; 385 unsigned long flags;
386 386
387 new = pcpu_mem_alloc(new_size); 387 new = pcpu_mem_zalloc(new_size);
388 if (!new) 388 if (!new)
389 return -ENOMEM; 389 return -ENOMEM;
390 390
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
604{ 604{
605 struct pcpu_chunk *chunk; 605 struct pcpu_chunk *chunk;
606 606
607 chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); 607 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
608 if (!chunk) 608 if (!chunk)
609 return NULL; 609 return NULL;
610 610
611 chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); 611 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
612 sizeof(chunk->map[0]));
612 if (!chunk->map) { 613 if (!chunk->map) {
613 kfree(chunk); 614 kfree(chunk);
614 return NULL; 615 return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
977 * address. The caller is responsible for ensuring @addr stays valid 978 * address. The caller is responsible for ensuring @addr stays valid
978 * until this function finishes. 979 * until this function finishes.
979 * 980 *
981 * percpu allocator has special setup for the first chunk, which currently
982 * supports either embedding in linear address space or vmalloc mapping,
983 * and, from the second one, the backing allocator (currently either vm or
984 * km) provides translation.
985 *
986 * The addr can be tranlated simply without checking if it falls into the
987 * first chunk. But the current code reflects better how percpu allocator
988 * actually works, and the verification can discover both bugs in percpu
989 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
990 * code.
991 *
980 * RETURNS: 992 * RETURNS:
981 * The physical address for @addr. 993 * The physical address for @addr.
982 */ 994 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
984{ 996{
985 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 997 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
986 bool in_first_chunk = false; 998 bool in_first_chunk = false;
987 unsigned long first_start, first_end; 999 unsigned long first_low, first_high;
988 unsigned int cpu; 1000 unsigned int cpu;
989 1001
990 /* 1002 /*
991 * The following test on first_start/end isn't strictly 1003 * The following test on unit_low/high isn't strictly
992 * necessary but will speed up lookups of addresses which 1004 * necessary but will speed up lookups of addresses which
993 * aren't in the first chunk. 1005 * aren't in the first chunk.
994 */ 1006 */
995 first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); 1007 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
996 first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, 1008 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
997 pcpu_unit_pages); 1009 pcpu_unit_pages);
998 if ((unsigned long)addr >= first_start && 1010 if ((unsigned long)addr >= first_low &&
999 (unsigned long)addr < first_end) { 1011 (unsigned long)addr < first_high) {
1000 for_each_possible_cpu(cpu) { 1012 for_each_possible_cpu(cpu) {
1001 void *start = per_cpu_ptr(base, cpu); 1013 void *start = per_cpu_ptr(base, cpu);
1002 1014
@@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1233 1245
1234 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 1246 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1235 unit_map[cpu] = UINT_MAX; 1247 unit_map[cpu] = UINT_MAX;
1236 pcpu_first_unit_cpu = NR_CPUS; 1248
1249 pcpu_low_unit_cpu = NR_CPUS;
1250 pcpu_high_unit_cpu = NR_CPUS;
1237 1251
1238 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { 1252 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1239 const struct pcpu_group_info *gi = &ai->groups[group]; 1253 const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1253 unit_map[cpu] = unit + i; 1267 unit_map[cpu] = unit + i;
1254 unit_off[cpu] = gi->base_offset + i * ai->unit_size; 1268 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1255 1269
1256 if (pcpu_first_unit_cpu == NR_CPUS) 1270 /* determine low/high unit_cpu */
1257 pcpu_first_unit_cpu = cpu; 1271 if (pcpu_low_unit_cpu == NR_CPUS ||
1258 pcpu_last_unit_cpu = cpu; 1272 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1273 pcpu_low_unit_cpu = cpu;
1274 if (pcpu_high_unit_cpu == NR_CPUS ||
1275 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1276 pcpu_high_unit_cpu = cpu;
1259 } 1277 }
1260 } 1278 }
1261 pcpu_nr_units = unit; 1279 pcpu_nr_units = unit;
@@ -1889,7 +1907,7 @@ void __init percpu_init_late(void)
1889 1907
1890 BUILD_BUG_ON(size > PAGE_SIZE); 1908 BUILD_BUG_ON(size > PAGE_SIZE);
1891 1909
1892 map = pcpu_mem_alloc(size); 1910 map = pcpu_mem_zalloc(size);
1893 BUG_ON(!map); 1911 BUG_ON(!map);
1894 1912
1895 spin_lock_irqsave(&pcpu_lock, flags); 1913 spin_lock_irqsave(&pcpu_lock, flags);