aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-12-06 23:31:45 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:20 -0500
commit89689ae7f95995723fbcd5c116c47933a3bb8b13 (patch)
tree4d73ff59b557fa1a84c6064406ff101c76ff8adc
parentc0a499c2c42992cff097b38be29d2ba60d2fd99a (diff)
[PATCH] Get rid of zone_table[]
The zone table is mostly not needed. If we have a node in the page flags then we can get to the zone via NODE_DATA() which is much more likely to be already in the cpu cache. In case of SMP and UP NODE_DATA() is a constant pointer which allows us to access an exact replica of zonetable in the node_zones field. In all of the above cases there will be no need at all for the zone table. The only remaining case is if in a NUMA system the node numbers do not fit into the page flags. In that case we make sparse generate a table that maps sections to nodes and use that table to to figure out the node number. This table is sized to fit in a single cache line for the known 32 bit NUMA platform which makes it very likely that the information can be obtained without a cache miss. For sparsemem the zone table seems to be have been fairly large based on the maximum possible number of sections and the number of zones per node. There is some memory saving by removing zone_table. The main benefit is to reduce the cache foootprint of the VM from the frequent lookups of zones. Plus it simplifies the page allocator. [akpm@osdl.org: build fix] Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h57
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/sparse.c23
4 files changed, 59 insertions, 44 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d538de901965..ab6e4974f379 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -396,7 +396,9 @@ void split_page(struct page *page, unsigned int order);
396 * We are going to use the flags for the page to node mapping if its in 396 * We are going to use the flags for the page to node mapping if its in
397 * there. This includes the case where there is no node, so it is implicit. 397 * there. This includes the case where there is no node, so it is implicit.
398 */ 398 */
399#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) 399#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0)
400#define NODE_NOT_IN_PAGE_FLAGS
401#endif
400 402
401#ifndef PFN_SECTION_SHIFT 403#ifndef PFN_SECTION_SHIFT
402#define PFN_SECTION_SHIFT 0 404#define PFN_SECTION_SHIFT 0
@@ -411,13 +413,18 @@ void split_page(struct page *page, unsigned int order);
411#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) 413#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
412#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) 414#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
413 415
414/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ 416/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */
415#if FLAGS_HAS_NODE 417#ifdef NODE_NOT_IN_PAGEFLAGS
416#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) 418#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
417#else 419#else
418#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) 420#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
421#endif
422
423#if ZONES_WIDTH > 0
424#define ZONEID_PGSHIFT ZONES_PGSHIFT
425#else
426#define ZONEID_PGSHIFT NODES_PGOFF
419#endif 427#endif
420#define ZONETABLE_PGSHIFT ZONES_PGSHIFT
421 428
422#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 429#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
423#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 430#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
@@ -426,23 +433,25 @@ void split_page(struct page *page, unsigned int order);
426#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) 433#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
427#define NODES_MASK ((1UL << NODES_WIDTH) - 1) 434#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
428#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) 435#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
429#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) 436#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
430 437
431static inline enum zone_type page_zonenum(struct page *page) 438static inline enum zone_type page_zonenum(struct page *page)
432{ 439{
433 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; 440 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
434} 441}
435 442
436struct zone; 443/*
437extern struct zone *zone_table[]; 444 * The identification function is only used by the buddy allocator for
438 445 * determining if two pages could be buddies. We are not really
446 * identifying a zone since we could be using a the section number
447 * id if we have not node id available in page flags.
448 * We guarantee only that it will return the same value for two
449 * combinable pages in a zone.
450 */
439static inline int page_zone_id(struct page *page) 451static inline int page_zone_id(struct page *page)
440{ 452{
441 return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK; 453 BUILD_BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK);
442} 454 return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
443static inline struct zone *page_zone(struct page *page)
444{
445 return zone_table[page_zone_id(page)];
446} 455}
447 456
448static inline unsigned long zone_to_nid(struct zone *zone) 457static inline unsigned long zone_to_nid(struct zone *zone)
@@ -454,13 +463,20 @@ static inline unsigned long zone_to_nid(struct zone *zone)
454#endif 463#endif
455} 464}
456 465
466#ifdef NODE_NOT_IN_PAGE_FLAGS
467extern unsigned long page_to_nid(struct page *page);
468#else
457static inline unsigned long page_to_nid(struct page *page) 469static inline unsigned long page_to_nid(struct page *page)
458{ 470{
459 if (FLAGS_HAS_NODE) 471 return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
460 return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
461 else
462 return zone_to_nid(page_zone(page));
463} 472}
473#endif
474
475static inline struct zone *page_zone(struct page *page)
476{
477 return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
478}
479
464static inline unsigned long page_to_section(struct page *page) 480static inline unsigned long page_to_section(struct page *page)
465{ 481{
466 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 482 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
@@ -477,6 +493,7 @@ static inline void set_page_node(struct page *page, unsigned long node)
477 page->flags &= ~(NODES_MASK << NODES_PGSHIFT); 493 page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
478 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; 494 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
479} 495}
496
480static inline void set_page_section(struct page *page, unsigned long section) 497static inline void set_page_section(struct page *page, unsigned long section)
481{ 498{
482 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); 499 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
@@ -947,8 +964,6 @@ extern void mem_init(void);
947extern void show_mem(void); 964extern void show_mem(void);
948extern void si_meminfo(struct sysinfo * val); 965extern void si_meminfo(struct sysinfo * val);
949extern void si_meminfo_node(struct sysinfo *val, int nid); 966extern void si_meminfo_node(struct sysinfo *val, int nid);
950extern void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
951 unsigned long pfn, unsigned long size);
952 967
953#ifdef CONFIG_NUMA 968#ifdef CONFIG_NUMA
954extern void setup_per_cpu_pageset(void); 969extern void setup_per_cpu_pageset(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fd678a662eae..0c055a090f4d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -72,7 +72,6 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
72 return ret; 72 return ret;
73 } 73 }
74 memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); 74 memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
75 zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages);
76 return 0; 75 return 0;
77} 76}
78 77
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 08360aa111f9..23bc5bcbdcf9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -83,13 +83,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
83 83
84EXPORT_SYMBOL(totalram_pages); 84EXPORT_SYMBOL(totalram_pages);
85 85
86/*
87 * Used by page_zone() to look up the address of the struct zone whose
88 * id is encoded in the upper bits of page->flags
89 */
90struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
91EXPORT_SYMBOL(zone_table);
92
93static char *zone_names[MAX_NR_ZONES] = { 86static char *zone_names[MAX_NR_ZONES] = {
94 "DMA", 87 "DMA",
95#ifdef CONFIG_ZONE_DMA32 88#ifdef CONFIG_ZONE_DMA32
@@ -1715,20 +1708,6 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
1715 } 1708 }
1716} 1709}
1717 1710
1718#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
1719void zonetable_add(struct zone *zone, int nid, enum zone_type zid,
1720 unsigned long pfn, unsigned long size)
1721{
1722 unsigned long snum = pfn_to_section_nr(pfn);
1723 unsigned long end = pfn_to_section_nr(pfn + size);
1724
1725 if (FLAGS_HAS_NODE)
1726 zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
1727 else
1728 for (; snum <= end; snum++)
1729 zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
1730}
1731
1732#ifndef __HAVE_ARCH_MEMMAP_INIT 1711#ifndef __HAVE_ARCH_MEMMAP_INIT
1733#define memmap_init(size, nid, zone, start_pfn) \ 1712#define memmap_init(size, nid, zone, start_pfn) \
1734 memmap_init_zone((size), (nid), (zone), (start_pfn)) 1713 memmap_init_zone((size), (nid), (zone), (start_pfn))
@@ -2421,7 +2400,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2421 if (!size) 2400 if (!size)
2422 continue; 2401 continue;
2423 2402
2424 zonetable_add(zone, nid, j, zone_start_pfn, size);
2425 ret = init_currently_empty_zone(zone, zone_start_pfn, size); 2403 ret = init_currently_empty_zone(zone, zone_start_pfn, size);
2426 BUG_ON(ret); 2404 BUG_ON(ret);
2427 zone_start_pfn += size; 2405 zone_start_pfn += size;
diff --git a/mm/sparse.c b/mm/sparse.c
index b3c82ba30012..158d6a2a5263 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -24,6 +24,25 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
24#endif 24#endif
25EXPORT_SYMBOL(mem_section); 25EXPORT_SYMBOL(mem_section);
26 26
27#ifdef NODE_NOT_IN_PAGE_FLAGS
28/*
29 * If we did not store the node number in the page then we have to
30 * do a lookup in the section_to_node_table in order to find which
31 * node the page belongs to.
32 */
33#if MAX_NUMNODES <= 256
34static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
35#else
36static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
37#endif
38
39unsigned long page_to_nid(struct page *page)
40{
41 return section_to_node_table[page_to_section(page)];
42}
43EXPORT_SYMBOL(page_to_nid);
44#endif
45
27#ifdef CONFIG_SPARSEMEM_EXTREME 46#ifdef CONFIG_SPARSEMEM_EXTREME
28static struct mem_section *sparse_index_alloc(int nid) 47static struct mem_section *sparse_index_alloc(int nid)
29{ 48{
@@ -49,6 +68,10 @@ static int sparse_index_init(unsigned long section_nr, int nid)
49 struct mem_section *section; 68 struct mem_section *section;
50 int ret = 0; 69 int ret = 0;
51 70
71#ifdef NODE_NOT_IN_PAGE_FLAGS
72 section_to_node_table[section_nr] = nid;
73#endif
74
52 if (mem_section[root]) 75 if (mem_section[root])
53 return -EEXIST; 76 return -EEXIST;
54 77