aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYasunori Goto <y-goto@jp.fujitsu.com>2008-04-28 05:13:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:25 -0400
commit04753278769f3b6c3b79a080edb52f21d83bf6e2 (patch)
tree0dff4088b44016b6d04930b2fc09419412821aa2
parent7f2e9525ba55b1c42ad6c4a5a59d7eb7bdd9be72 (diff)
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for memory-hotremove. Some structures of memory management are allocated by bootmem. ex) memmap, etc. To remove memory physically, some of them must be freed according to circumstance. This patch set makes basis to free those pages, and free memmaps. Basic my idea is using remain members of struct page to remember information of users of bootmem (section number or node id). When the section is removing, kernel can confirm it. By this information, some issues can be solved. 1) When the memmap of removing section is allocated on other section by bootmem, it should/can be free. 2) When the memmap of removing section is allocated on the same section, it shouldn't be freed. Because the section has to be logical memory offlined already and all pages must be isolated against page allocater. If it is freed, page allocator may use it which will be removed physically soon. 3) When removing section has other section's memmap, kernel will be able to show easily which section should be removed before it for user. (Not implemented yet) 4) When the above case 2), the page isolation will be able to check and skip memmap's page when logical memory offline (offline_pages()). Current page isolation code fails in this case because this page is just reserved page and it can't distinguish this pages can be removed or not. But, it will be able to do by this patch. (Not implemented yet.) 5) The node information like pgdat has similar issues. But, this will be able to be solved too by this. (Not implemented yet, but, remembering node id in the pages.) Fortunately, current bootmem allocator just keeps PageReserved flags, and doesn't use any other members of page struct. The users of bootmem doesn't use them too. This patch: This is to register information which is node or section's id. Kernel can distinguish which node/section uses the pages allcated by bootmem. This is basis for hot-remove sections or nodes. Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Cc: Yinghai Lu <yhlu.kernel@gmail.com> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memory_hotplug.h27
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/memory_hotplug.c99
-rw-r--r--mm/sparse.c3
5 files changed, 128 insertions, 3 deletions
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index aca9c65f8d08..73e358612eaf 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -11,6 +11,15 @@ struct pglist_data;
11struct mem_section; 11struct mem_section;
12 12
13#ifdef CONFIG_MEMORY_HOTPLUG 13#ifdef CONFIG_MEMORY_HOTPLUG
14
15/*
16 * Magic number for free bootmem.
17 * The normal smallest mapcount is -1. Here is smaller value than it.
18 */
19#define SECTION_INFO 0xfffffffe
20#define MIX_INFO 0xfffffffd
21#define NODE_INFO 0xfffffffc
22
14/* 23/*
15 * pgdat resizing functions 24 * pgdat resizing functions
16 */ 25 */
@@ -145,6 +154,18 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
145#endif /* CONFIG_NUMA */ 154#endif /* CONFIG_NUMA */
146#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ 155#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
147 156
157#ifdef CONFIG_SPARSEMEM_VMEMMAP
158static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
159{
160}
161static inline void put_page_bootmem(struct page *page)
162{
163}
164#else
165extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
166extern void put_page_bootmem(struct page *page);
167#endif
168
148#else /* ! CONFIG_MEMORY_HOTPLUG */ 169#else /* ! CONFIG_MEMORY_HOTPLUG */
149/* 170/*
150 * Stub functions for when hotplug is off 171 * Stub functions for when hotplug is off
@@ -172,6 +193,10 @@ static inline int mhp_notimplemented(const char *func)
172 return -ENOSYS; 193 return -ENOSYS;
173} 194}
174 195
196static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
197{
198}
199
175#endif /* ! CONFIG_MEMORY_HOTPLUG */ 200#endif /* ! CONFIG_MEMORY_HOTPLUG */
176 201
177extern int add_memory(int nid, u64 start, u64 size); 202extern int add_memory(int nid, u64 start, u64 size);
@@ -180,5 +205,7 @@ extern int remove_memory(u64 start, u64 size);
180extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 205extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
181 int nr_pages); 206 int nr_pages);
182extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); 207extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
208extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
209 unsigned long pnum);
183 210
184#endif /* __LINUX_MEMORY_HOTPLUG_H */ 211#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c3828497f41d..aad98003176f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -896,6 +896,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
896 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 896 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
897} 897}
898extern int __section_nr(struct mem_section* ms); 898extern int __section_nr(struct mem_section* ms);
899extern unsigned long usemap_size(void);
899 900
900/* 901/*
901 * We use the lower bits of the mem_map pointer to store 902 * We use the lower bits of the mem_map pointer to store
diff --git a/mm/bootmem.c b/mm/bootmem.c
index b6791646143e..369624d2789c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -461,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
461 461
462unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 462unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
463{ 463{
464 register_page_bootmem_info_node(pgdat);
464 return free_all_bootmem_core(pgdat); 465 return free_all_bootmem_core(pgdat);
465} 466}
466 467
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c8b3ca79de2d..cba36ef0d506 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -58,8 +58,105 @@ static void release_memory_resource(struct resource *res)
58 return; 58 return;
59} 59}
60 60
61
62#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 61#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
62#ifndef CONFIG_SPARSEMEM_VMEMMAP
63static void get_page_bootmem(unsigned long info, struct page *page, int magic)
64{
65 atomic_set(&page->_mapcount, magic);
66 SetPagePrivate(page);
67 set_page_private(page, info);
68 atomic_inc(&page->_count);
69}
70
71void put_page_bootmem(struct page *page)
72{
73 int magic;
74
75 magic = atomic_read(&page->_mapcount);
76 BUG_ON(magic >= -1);
77
78 if (atomic_dec_return(&page->_count) == 1) {
79 ClearPagePrivate(page);
80 set_page_private(page, 0);
81 reset_page_mapcount(page);
82 __free_pages_bootmem(page, 0);
83 }
84
85}
86
87void register_page_bootmem_info_section(unsigned long start_pfn)
88{
89 unsigned long *usemap, mapsize, section_nr, i;
90 struct mem_section *ms;
91 struct page *page, *memmap;
92
93 if (!pfn_valid(start_pfn))
94 return;
95
96 section_nr = pfn_to_section_nr(start_pfn);
97 ms = __nr_to_section(section_nr);
98
99 /* Get section's memmap address */
100 memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
101
102 /*
103 * Get page for the memmap's phys address
104 * XXX: need more consideration for sparse_vmemmap...
105 */
106 page = virt_to_page(memmap);
107 mapsize = sizeof(struct page) * PAGES_PER_SECTION;
108 mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
109
110 /* remember memmap's page */
111 for (i = 0; i < mapsize; i++, page++)
112 get_page_bootmem(section_nr, page, SECTION_INFO);
113
114 usemap = __nr_to_section(section_nr)->pageblock_flags;
115 page = virt_to_page(usemap);
116
117 mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
118
119 for (i = 0; i < mapsize; i++, page++)
120 get_page_bootmem(section_nr, page, MIX_INFO);
121
122}
123
124void register_page_bootmem_info_node(struct pglist_data *pgdat)
125{
126 unsigned long i, pfn, end_pfn, nr_pages;
127 int node = pgdat->node_id;
128 struct page *page;
129 struct zone *zone;
130
131 nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
132 page = virt_to_page(pgdat);
133
134 for (i = 0; i < nr_pages; i++, page++)
135 get_page_bootmem(node, page, NODE_INFO);
136
137 zone = &pgdat->node_zones[0];
138 for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
139 if (zone->wait_table) {
140 nr_pages = zone->wait_table_hash_nr_entries
141 * sizeof(wait_queue_head_t);
142 nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
143 page = virt_to_page(zone->wait_table);
144
145 for (i = 0; i < nr_pages; i++, page++)
146 get_page_bootmem(node, page, NODE_INFO);
147 }
148 }
149
150 pfn = pgdat->node_start_pfn;
151 end_pfn = pfn + pgdat->node_spanned_pages;
152
153 /* register_section info */
154 for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
155 register_page_bootmem_info_section(pfn);
156
157}
158#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
159
63static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) 160static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
64{ 161{
65 struct pglist_data *pgdat = zone->zone_pgdat; 162 struct pglist_data *pgdat = zone->zone_pgdat;
diff --git a/mm/sparse.c b/mm/sparse.c
index 186a85bf7912..8903c484389a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -210,7 +210,6 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
210/* 210/*
211 * Decode mem_map from the coded memmap 211 * Decode mem_map from the coded memmap
212 */ 212 */
213static
214struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 213struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
215{ 214{
216 /* mask off the extra low bits of information */ 215 /* mask off the extra low bits of information */
@@ -233,7 +232,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms,
233 return 1; 232 return 1;
234} 233}
235 234
236static unsigned long usemap_size(void) 235unsigned long usemap_size(void)
237{ 236{
238 unsigned long size_bytes; 237 unsigned long size_bytes;
239 size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8; 238 size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;