aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYasunori Goto <y-goto@jp.fujitsu.com>2006-06-23 05:03:10 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 10:42:46 -0400
commitcca448fe92246fb59efe55ba2e048ded0971a9af (patch)
tree9663651686508b62a061851927f0b330200c40b1
parent718127cc3170454f4aa274fdd2f1e01574fecd66 (diff)
[PATCH] wait_table and zonelist initializing for memory hotadd: wait_table initialization
Wait_table is initialized according to zone size at boot time. But, we cannot know the maixmum zone size when memory hotplug is enabled. It can be changed.... And resizing of wait_table is hard. So kernel allocate and initialzie wait_table as its maximum size. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/page_alloc.c59
1 files changed, 53 insertions, 6 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4bc66f6b7718..62564e27b448 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1727,6 +1727,7 @@ void __init build_all_zonelists(void)
1727 */ 1727 */
1728#define PAGES_PER_WAITQUEUE 256 1728#define PAGES_PER_WAITQUEUE 256
1729 1729
1730#ifndef CONFIG_MEMORY_HOTPLUG
1730static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) 1731static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
1731{ 1732{
1732 unsigned long size = 1; 1733 unsigned long size = 1;
@@ -1745,6 +1746,29 @@ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
1745 1746
1746 return max(size, 4UL); 1747 return max(size, 4UL);
1747} 1748}
1749#else
1750/*
1751 * A zone's size might be changed by hot-add, so it is not possible to determine
1752 * a suitable size for its wait_table. So we use the maximum size now.
1753 *
1754 * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie:
1755 *
1756 * i386 (preemption config) : 4096 x 16 = 64Kbyte.
1757 * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
1758 * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte.
1759 *
1760 * The maximum entries are prepared when a zone's memory is (512K + 256) pages
1761 * or more by the traditional way. (See above). It equals:
1762 *
1763 * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte.
1764 * ia64(16K page size) : = ( 8G + 4M)byte.
1765 * powerpc (64K page size) : = (32G +16M)byte.
1766 */
1767static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
1768{
1769 return 4096UL;
1770}
1771#endif
1748 1772
1749/* 1773/*
1750 * This is an integer logarithm so that shifts can be used later 1774 * This is an integer logarithm so that shifts can be used later
@@ -2010,10 +2034,11 @@ void __init setup_per_cpu_pageset(void)
2010#endif 2034#endif
2011 2035
2012static __meminit 2036static __meminit
2013void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) 2037int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
2014{ 2038{
2015 int i; 2039 int i;
2016 struct pglist_data *pgdat = zone->zone_pgdat; 2040 struct pglist_data *pgdat = zone->zone_pgdat;
2041 size_t alloc_size;
2017 2042
2018 /* 2043 /*
2019 * The per-page waitqueue mechanism uses hashed waitqueues 2044 * The per-page waitqueue mechanism uses hashed waitqueues
@@ -2023,12 +2048,32 @@ void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
2023 wait_table_hash_nr_entries(zone_size_pages); 2048 wait_table_hash_nr_entries(zone_size_pages);
2024 zone->wait_table_bits = 2049 zone->wait_table_bits =
2025 wait_table_bits(zone->wait_table_hash_nr_entries); 2050 wait_table_bits(zone->wait_table_hash_nr_entries);
2026 zone->wait_table = (wait_queue_head_t *) 2051 alloc_size = zone->wait_table_hash_nr_entries
2027 alloc_bootmem_node(pgdat, zone->wait_table_hash_nr_entries 2052 * sizeof(wait_queue_head_t);
2028 * sizeof(wait_queue_head_t)); 2053
2054 if (system_state == SYSTEM_BOOTING) {
2055 zone->wait_table = (wait_queue_head_t *)
2056 alloc_bootmem_node(pgdat, alloc_size);
2057 } else {
2058 /*
2059 * This case means that a zone whose size was 0 gets new memory
2060 * via memory hot-add.
2061 * But it may be the case that a new node was hot-added. In
2062 * this case vmalloc() will not be able to use this new node's
2063 * memory - this wait_table must be initialized to use this new
2064 * node itself as well.
2065 * To use this new node's memory, further consideration will be
2066 * necessary.
2067 */
2068 zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size);
2069 }
2070 if (!zone->wait_table)
2071 return -ENOMEM;
2029 2072
2030 for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) 2073 for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)
2031 init_waitqueue_head(zone->wait_table + i); 2074 init_waitqueue_head(zone->wait_table + i);
2075
2076 return 0;
2032} 2077}
2033 2078
2034static __meminit void zone_pcp_init(struct zone *zone) 2079static __meminit void zone_pcp_init(struct zone *zone)
@@ -2055,8 +2100,10 @@ __meminit int init_currently_empty_zone(struct zone *zone,
2055 unsigned long size) 2100 unsigned long size)
2056{ 2101{
2057 struct pglist_data *pgdat = zone->zone_pgdat; 2102 struct pglist_data *pgdat = zone->zone_pgdat;
2058 2103 int ret;
2059 zone_wait_table_init(zone, size); 2104 ret = zone_wait_table_init(zone, size);
2105 if (ret)
2106 return ret;
2060 pgdat->nr_zones = zone_idx(zone) + 1; 2107 pgdat->nr_zones = zone_idx(zone) + 1;
2061 2108
2062 zone->zone_start_pfn = zone_start_pfn; 2109 zone->zone_start_pfn = zone_start_pfn;