aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2007-10-16 04:25:56 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:43:00 -0400
commit5c0e3066474b57c56ff0d88ca31d95bd14232fee (patch)
tree90c963c62891db4a9039e84e615c01408b09c845
parent46dafbca2bba811665b01d8cedf911204820623c (diff)
Fix corruption of memmap on IA64 SPARSEMEM when mem_section is not a power of 2
There are problems in the use of SPARSEMEM and pageblock flags that causes problems on ia64. The first part of the problem is that units are incorrect in SECTION_BLOCKFLAGS_BITS computation. This results in a map_section's section_mem_map being treated as part of a bitmap which isn't good. This was evident with an invalid virtual address when mem_init attempted to free bootmem pages while relinquishing control from the bootmem allocator. The second part of the problem occurs because the pageblock flags bitmap is be located with the mem_section. The SECTIONS_PER_ROOT computation using sizeof (mem_section) may not be a power of 2 depending on the size of the bitmap. This renders masks and other such things not power of 2 base. This issue was seen with SPARSEMEM_EXTREME on ia64. This patch moves the bitmap outside of mem_section and uses a pointer instead in the mem_section. The bitmaps are allocated when the section is being initialised. Note that sparse_early_usemap_alloc() does not use alloc_remap() like sparse_early_mem_map_alloc(). The allocation required for the bitmap on x86, the only architecture that uses alloc_remap is typically smaller than a cache line. alloc_remap() pads out allocations to the cache size which would be a needless waste. Credit to Bob Picco for identifying the original problem and effecting a fix for the SECTION_BLOCKFLAGS_BITS calculation. Credit to Andy Whitcroft for devising the best way of allocating the bitmaps only when required for the section. [wli@holomorphy.com: warning fix] Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: "Luck, Tony" <tony.luck@intel.com> Signed-off-by: William Irwin <bill.irwin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--mm/sparse.c54
2 files changed, 55 insertions, 5 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a8140a9a65e8..9a5d5590bd39 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -748,7 +748,7 @@ extern struct zone *next_zone(struct zone *zone);
748#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) 748#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
749 749
750#define SECTION_BLOCKFLAGS_BITS \ 750#define SECTION_BLOCKFLAGS_BITS \
751 ((SECTION_SIZE_BITS - (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS) 751 ((1 << (PFN_SECTION_SHIFT - (MAX_ORDER-1))) * NR_PAGEBLOCK_BITS)
752 752
753#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS 753#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
754#error Allocator MAX_ORDER exceeds SECTION_SIZE 754#error Allocator MAX_ORDER exceeds SECTION_SIZE
@@ -769,7 +769,9 @@ struct mem_section {
769 * before using it wrong. 769 * before using it wrong.
770 */ 770 */
771 unsigned long section_mem_map; 771 unsigned long section_mem_map;
772 DECLARE_BITMAP(pageblock_flags, SECTION_BLOCKFLAGS_BITS); 772
773 /* See declaration of similar field in struct zone */
774 unsigned long *pageblock_flags;
773}; 775};
774 776
775#ifdef CONFIG_SPARSEMEM_EXTREME 777#ifdef CONFIG_SPARSEMEM_EXTREME
diff --git a/mm/sparse.c b/mm/sparse.c
index 52843a76feed..1f4dbb867b8a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -206,7 +206,8 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn
206} 206}
207 207
208static int __meminit sparse_init_one_section(struct mem_section *ms, 208static int __meminit sparse_init_one_section(struct mem_section *ms,
209 unsigned long pnum, struct page *mem_map) 209 unsigned long pnum, struct page *mem_map,
210 unsigned long *pageblock_bitmap)
210{ 211{
211 if (!present_section(ms)) 212 if (!present_section(ms))
212 return -EINVAL; 213 return -EINVAL;
@@ -214,6 +215,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms,
214 ms->section_mem_map &= ~SECTION_MAP_MASK; 215 ms->section_mem_map &= ~SECTION_MAP_MASK;
215 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | 216 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
216 SECTION_HAS_MEM_MAP; 217 SECTION_HAS_MEM_MAP;
218 ms->pageblock_flags = pageblock_bitmap;
217 219
218 return 1; 220 return 1;
219} 221}
@@ -224,6 +226,38 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
224 return NULL; 226 return NULL;
225} 227}
226 228
229static unsigned long usemap_size(void)
230{
231 unsigned long size_bytes;
232 size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
233 size_bytes = roundup(size_bytes, sizeof(unsigned long));
234 return size_bytes;
235}
236
237#ifdef CONFIG_MEMORY_HOTPLUG
238static unsigned long *__kmalloc_section_usemap(void)
239{
240 return kmalloc(usemap_size(), GFP_KERNEL);
241}
242#endif /* CONFIG_MEMORY_HOTPLUG */
243
244static unsigned long *sparse_early_usemap_alloc(unsigned long pnum)
245{
246 unsigned long *usemap;
247 struct mem_section *ms = __nr_to_section(pnum);
248 int nid = sparse_early_nid(ms);
249
250 usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
251 if (usemap)
252 return usemap;
253
254 /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
255 nid = 0;
256
257 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
258 return NULL;
259}
260
227#ifndef CONFIG_SPARSEMEM_VMEMMAP 261#ifndef CONFIG_SPARSEMEM_VMEMMAP
228struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid) 262struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid)
229{ 263{
@@ -268,6 +302,7 @@ void __init sparse_init(void)
268{ 302{
269 unsigned long pnum; 303 unsigned long pnum;
270 struct page *map; 304 struct page *map;
305 unsigned long *usemap;
271 306
272 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 307 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
273 if (!present_section_nr(pnum)) 308 if (!present_section_nr(pnum))
@@ -276,7 +311,13 @@ void __init sparse_init(void)
276 map = sparse_early_mem_map_alloc(pnum); 311 map = sparse_early_mem_map_alloc(pnum);
277 if (!map) 312 if (!map)
278 continue; 313 continue;
279 sparse_init_one_section(__nr_to_section(pnum), pnum, map); 314
315 usemap = sparse_early_usemap_alloc(pnum);
316 if (!usemap)
317 continue;
318
319 sparse_init_one_section(__nr_to_section(pnum), pnum, map,
320 usemap);
280 } 321 }
281} 322}
282 323
@@ -332,6 +373,7 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
332 struct pglist_data *pgdat = zone->zone_pgdat; 373 struct pglist_data *pgdat = zone->zone_pgdat;
333 struct mem_section *ms; 374 struct mem_section *ms;
334 struct page *memmap; 375 struct page *memmap;
376 unsigned long *usemap;
335 unsigned long flags; 377 unsigned long flags;
336 int ret; 378 int ret;
337 379
@@ -341,6 +383,7 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
341 */ 383 */
342 sparse_index_init(section_nr, pgdat->node_id); 384 sparse_index_init(section_nr, pgdat->node_id);
343 memmap = __kmalloc_section_memmap(nr_pages); 385 memmap = __kmalloc_section_memmap(nr_pages);
386 usemap = __kmalloc_section_usemap();
344 387
345 pgdat_resize_lock(pgdat, &flags); 388 pgdat_resize_lock(pgdat, &flags);
346 389
@@ -349,9 +392,14 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
349 ret = -EEXIST; 392 ret = -EEXIST;
350 goto out; 393 goto out;
351 } 394 }
395
396 if (!usemap) {
397 ret = -ENOMEM;
398 goto out;
399 }
352 ms->section_mem_map |= SECTION_MARKED_PRESENT; 400 ms->section_mem_map |= SECTION_MARKED_PRESENT;
353 401
354 ret = sparse_init_one_section(ms, section_nr, memmap); 402 ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
355 403
356out: 404out:
357 pgdat_resize_unlock(pgdat, &flags); 405 pgdat_resize_unlock(pgdat, &flags);