diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 154 |
1 files changed, 147 insertions, 7 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 70df5c0d957e..01c9fb97c619 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -4,7 +4,6 @@ | |||
4 | * Copyright (C) | 4 | * Copyright (C) |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/config.h> | ||
8 | #include <linux/stddef.h> | 7 | #include <linux/stddef.h> |
9 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
10 | #include <linux/swap.h> | 9 | #include <linux/swap.h> |
@@ -21,12 +20,13 @@ | |||
21 | #include <linux/memory_hotplug.h> | 20 | #include <linux/memory_hotplug.h> |
22 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
23 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/ioport.h> | ||
24 | 24 | ||
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | 26 | ||
27 | extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | 27 | extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, |
28 | unsigned long size); | 28 | unsigned long size); |
29 | static void __add_zone(struct zone *zone, unsigned long phys_start_pfn) | 29 | static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) |
30 | { | 30 | { |
31 | struct pglist_data *pgdat = zone->zone_pgdat; | 31 | struct pglist_data *pgdat = zone->zone_pgdat; |
32 | int nr_pages = PAGES_PER_SECTION; | 32 | int nr_pages = PAGES_PER_SECTION; |
@@ -34,8 +34,15 @@ static void __add_zone(struct zone *zone, unsigned long phys_start_pfn) | |||
34 | int zone_type; | 34 | int zone_type; |
35 | 35 | ||
36 | zone_type = zone - pgdat->node_zones; | 36 | zone_type = zone - pgdat->node_zones; |
37 | if (!populated_zone(zone)) { | ||
38 | int ret = 0; | ||
39 | ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages); | ||
40 | if (ret < 0) | ||
41 | return ret; | ||
42 | } | ||
37 | memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); | 43 | memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); |
38 | zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); | 44 | zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); |
45 | return 0; | ||
39 | } | 46 | } |
40 | 47 | ||
41 | extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, | 48 | extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, |
@@ -50,7 +57,11 @@ static int __add_section(struct zone *zone, unsigned long phys_start_pfn) | |||
50 | if (ret < 0) | 57 | if (ret < 0) |
51 | return ret; | 58 | return ret; |
52 | 59 | ||
53 | __add_zone(zone, phys_start_pfn); | 60 | ret = __add_zone(zone, phys_start_pfn); |
61 | |||
62 | if (ret < 0) | ||
63 | return ret; | ||
64 | |||
54 | return register_new_memory(__pfn_to_section(phys_start_pfn)); | 65 | return register_new_memory(__pfn_to_section(phys_start_pfn)); |
55 | } | 66 | } |
56 | 67 | ||
@@ -115,7 +126,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
115 | unsigned long i; | 126 | unsigned long i; |
116 | unsigned long flags; | 127 | unsigned long flags; |
117 | unsigned long onlined_pages = 0; | 128 | unsigned long onlined_pages = 0; |
129 | struct resource res; | ||
130 | u64 section_end; | ||
131 | unsigned long start_pfn; | ||
118 | struct zone *zone; | 132 | struct zone *zone; |
133 | int need_zonelists_rebuild = 0; | ||
119 | 134 | ||
120 | /* | 135 | /* |
121 | * This doesn't need a lock to do pfn_to_page(). | 136 | * This doesn't need a lock to do pfn_to_page(). |
@@ -128,15 +143,140 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
128 | grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); | 143 | grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); |
129 | pgdat_resize_unlock(zone->zone_pgdat, &flags); | 144 | pgdat_resize_unlock(zone->zone_pgdat, &flags); |
130 | 145 | ||
131 | for (i = 0; i < nr_pages; i++) { | 146 | /* |
132 | struct page *page = pfn_to_page(pfn + i); | 147 | * If this zone is not populated, then it is not in zonelist. |
133 | online_page(page); | 148 | * This means the page allocator ignores this zone. |
134 | onlined_pages++; | 149 | * So, zonelist must be updated after online. |
150 | */ | ||
151 | if (!populated_zone(zone)) | ||
152 | need_zonelists_rebuild = 1; | ||
153 | |||
154 | res.start = (u64)pfn << PAGE_SHIFT; | ||
155 | res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1; | ||
156 | res.flags = IORESOURCE_MEM; /* we just need system ram */ | ||
157 | section_end = res.end; | ||
158 | |||
159 | while (find_next_system_ram(&res) >= 0) { | ||
160 | start_pfn = (unsigned long)(res.start >> PAGE_SHIFT); | ||
161 | nr_pages = (unsigned long) | ||
162 | ((res.end + 1 - res.start) >> PAGE_SHIFT); | ||
163 | |||
164 | if (PageReserved(pfn_to_page(start_pfn))) { | ||
165 | /* this region's page is not onlined now */ | ||
166 | for (i = 0; i < nr_pages; i++) { | ||
167 | struct page *page = pfn_to_page(start_pfn + i); | ||
168 | online_page(page); | ||
169 | onlined_pages++; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | res.start = res.end + 1; | ||
174 | res.end = section_end; | ||
135 | } | 175 | } |
136 | zone->present_pages += onlined_pages; | 176 | zone->present_pages += onlined_pages; |
137 | zone->zone_pgdat->node_present_pages += onlined_pages; | 177 | zone->zone_pgdat->node_present_pages += onlined_pages; |
138 | 178 | ||
139 | setup_per_zone_pages_min(); | 179 | setup_per_zone_pages_min(); |
140 | 180 | ||
181 | if (need_zonelists_rebuild) | ||
182 | build_all_zonelists(); | ||
183 | vm_total_pages = nr_free_pagecache_pages(); | ||
141 | return 0; | 184 | return 0; |
142 | } | 185 | } |
186 | |||
187 | static pg_data_t *hotadd_new_pgdat(int nid, u64 start) | ||
188 | { | ||
189 | struct pglist_data *pgdat; | ||
190 | unsigned long zones_size[MAX_NR_ZONES] = {0}; | ||
191 | unsigned long zholes_size[MAX_NR_ZONES] = {0}; | ||
192 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
193 | |||
194 | pgdat = arch_alloc_nodedata(nid); | ||
195 | if (!pgdat) | ||
196 | return NULL; | ||
197 | |||
198 | arch_refresh_nodedata(nid, pgdat); | ||
199 | |||
200 | /* we can use NODE_DATA(nid) from here */ | ||
201 | |||
202 | /* init node's zones as empty zones, we don't have any present pages.*/ | ||
203 | free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size); | ||
204 | |||
205 | return pgdat; | ||
206 | } | ||
207 | |||
208 | static void rollback_node_hotadd(int nid, pg_data_t *pgdat) | ||
209 | { | ||
210 | arch_refresh_nodedata(nid, NULL); | ||
211 | arch_free_nodedata(pgdat); | ||
212 | return; | ||
213 | } | ||
214 | |||
215 | /* add this memory to iomem resource */ | ||
216 | static void register_memory_resource(u64 start, u64 size) | ||
217 | { | ||
218 | struct resource *res; | ||
219 | |||
220 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); | ||
221 | BUG_ON(!res); | ||
222 | |||
223 | res->name = "System RAM"; | ||
224 | res->start = start; | ||
225 | res->end = start + size - 1; | ||
226 | res->flags = IORESOURCE_MEM; | ||
227 | if (request_resource(&iomem_resource, res) < 0) { | ||
228 | printk("System RAM resource %llx - %llx cannot be added\n", | ||
229 | (unsigned long long)res->start, (unsigned long long)res->end); | ||
230 | kfree(res); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | |||
235 | |||
236 | int add_memory(int nid, u64 start, u64 size) | ||
237 | { | ||
238 | pg_data_t *pgdat = NULL; | ||
239 | int new_pgdat = 0; | ||
240 | int ret; | ||
241 | |||
242 | if (!node_online(nid)) { | ||
243 | pgdat = hotadd_new_pgdat(nid, start); | ||
244 | if (!pgdat) | ||
245 | return -ENOMEM; | ||
246 | new_pgdat = 1; | ||
247 | ret = kswapd_run(nid); | ||
248 | if (ret) | ||
249 | goto error; | ||
250 | } | ||
251 | |||
252 | /* call arch's memory hotadd */ | ||
253 | ret = arch_add_memory(nid, start, size); | ||
254 | |||
255 | if (ret < 0) | ||
256 | goto error; | ||
257 | |||
258 | /* we online node here. we can't roll back from here. */ | ||
259 | node_set_online(nid); | ||
260 | |||
261 | if (new_pgdat) { | ||
262 | ret = register_one_node(nid); | ||
263 | /* | ||
264 | * If sysfs file of new node can't create, cpu on the node | ||
265 | * can't be hot-added. There is no rollback way now. | ||
266 | * So, check by BUG_ON() to catch it reluctantly.. | ||
267 | */ | ||
268 | BUG_ON(ret); | ||
269 | } | ||
270 | |||
271 | /* register this memory as resource */ | ||
272 | register_memory_resource(start, size); | ||
273 | |||
274 | return ret; | ||
275 | error: | ||
276 | /* rollback pgdat allocation and others */ | ||
277 | if (new_pgdat) | ||
278 | rollback_node_hotadd(nid, pgdat); | ||
279 | |||
280 | return ret; | ||
281 | } | ||
282 | EXPORT_SYMBOL_GPL(add_memory); | ||