diff options
author | Lai Jiangshan <laijs@cn.fujitsu.com> | 2012-12-11 19:03:16 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-11 20:22:28 -0500 |
commit | 511c2aba8f07fc45bdcba548cb63f7b8a450c6dc (patch) | |
tree | 6a1a853e60e0004f5895d78231ed1bea33fecaac /mm | |
parent | fcf07d22f089856631b52a75c35ba3c33b70a1b4 (diff) |
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug. This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore". It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
Userspace workload is increased, we need more hugepage, we can't use
"online_movable" to add memory and allow the system use more
THP(transparent-huge-page), vice-verse when kernel workload is increase.
Also help for virtualization to dynamic configure host/guest's memory,
to save/(reduce waste) memory.
Memory capacity on Demand
o When a new node is physically online after boot, we need to use
"online_movable" or "online_kernel" to configure/portion it as we
expected when we logic-online it.
This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
hardware partitioning and high-available-system(hardware fault
management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory_hotplug.c | 100 |
1 files changed, 99 insertions, 1 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 571130ee66d7..5c1f4959e6b4 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -214,6 +214,88 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | |||
214 | zone_span_writeunlock(zone); | 214 | zone_span_writeunlock(zone); |
215 | } | 215 | } |
216 | 216 | ||
217 | static void resize_zone(struct zone *zone, unsigned long start_pfn, | ||
218 | unsigned long end_pfn) | ||
219 | { | ||
220 | zone_span_writelock(zone); | ||
221 | |||
222 | zone->zone_start_pfn = start_pfn; | ||
223 | zone->spanned_pages = end_pfn - start_pfn; | ||
224 | |||
225 | zone_span_writeunlock(zone); | ||
226 | } | ||
227 | |||
228 | static void fix_zone_id(struct zone *zone, unsigned long start_pfn, | ||
229 | unsigned long end_pfn) | ||
230 | { | ||
231 | enum zone_type zid = zone_idx(zone); | ||
232 | int nid = zone->zone_pgdat->node_id; | ||
233 | unsigned long pfn; | ||
234 | |||
235 | for (pfn = start_pfn; pfn < end_pfn; pfn++) | ||
236 | set_page_links(pfn_to_page(pfn), zid, nid, pfn); | ||
237 | } | ||
238 | |||
239 | static int move_pfn_range_left(struct zone *z1, struct zone *z2, | ||
240 | unsigned long start_pfn, unsigned long end_pfn) | ||
241 | { | ||
242 | unsigned long flags; | ||
243 | |||
244 | pgdat_resize_lock(z1->zone_pgdat, &flags); | ||
245 | |||
246 | /* can't move pfns which are higher than @z2 */ | ||
247 | if (end_pfn > z2->zone_start_pfn + z2->spanned_pages) | ||
248 | goto out_fail; | ||
249 | /* the move out part mast at the left most of @z2 */ | ||
250 | if (start_pfn > z2->zone_start_pfn) | ||
251 | goto out_fail; | ||
252 | /* must included/overlap */ | ||
253 | if (end_pfn <= z2->zone_start_pfn) | ||
254 | goto out_fail; | ||
255 | |||
256 | resize_zone(z1, z1->zone_start_pfn, end_pfn); | ||
257 | resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages); | ||
258 | |||
259 | pgdat_resize_unlock(z1->zone_pgdat, &flags); | ||
260 | |||
261 | fix_zone_id(z1, start_pfn, end_pfn); | ||
262 | |||
263 | return 0; | ||
264 | out_fail: | ||
265 | pgdat_resize_unlock(z1->zone_pgdat, &flags); | ||
266 | return -1; | ||
267 | } | ||
268 | |||
269 | static int move_pfn_range_right(struct zone *z1, struct zone *z2, | ||
270 | unsigned long start_pfn, unsigned long end_pfn) | ||
271 | { | ||
272 | unsigned long flags; | ||
273 | |||
274 | pgdat_resize_lock(z1->zone_pgdat, &flags); | ||
275 | |||
276 | /* can't move pfns which are lower than @z1 */ | ||
277 | if (z1->zone_start_pfn > start_pfn) | ||
278 | goto out_fail; | ||
279 | /* the move out part mast at the right most of @z1 */ | ||
280 | if (z1->zone_start_pfn + z1->spanned_pages > end_pfn) | ||
281 | goto out_fail; | ||
282 | /* must included/overlap */ | ||
283 | if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages) | ||
284 | goto out_fail; | ||
285 | |||
286 | resize_zone(z1, z1->zone_start_pfn, start_pfn); | ||
287 | resize_zone(z2, start_pfn, z2->zone_start_pfn + z2->spanned_pages); | ||
288 | |||
289 | pgdat_resize_unlock(z1->zone_pgdat, &flags); | ||
290 | |||
291 | fix_zone_id(z2, start_pfn, end_pfn); | ||
292 | |||
293 | return 0; | ||
294 | out_fail: | ||
295 | pgdat_resize_unlock(z1->zone_pgdat, &flags); | ||
296 | return -1; | ||
297 | } | ||
298 | |||
217 | static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, | 299 | static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, |
218 | unsigned long end_pfn) | 300 | unsigned long end_pfn) |
219 | { | 301 | { |
@@ -508,7 +590,7 @@ static void node_states_set_node(int node, struct memory_notify *arg) | |||
508 | } | 590 | } |
509 | 591 | ||
510 | 592 | ||
511 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages) | 593 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) |
512 | { | 594 | { |
513 | unsigned long onlined_pages = 0; | 595 | unsigned long onlined_pages = 0; |
514 | struct zone *zone; | 596 | struct zone *zone; |
@@ -525,6 +607,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages) | |||
525 | */ | 607 | */ |
526 | zone = page_zone(pfn_to_page(pfn)); | 608 | zone = page_zone(pfn_to_page(pfn)); |
527 | 609 | ||
610 | if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { | ||
611 | if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) { | ||
612 | unlock_memory_hotplug(); | ||
613 | return -1; | ||
614 | } | ||
615 | } | ||
616 | if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { | ||
617 | if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) { | ||
618 | unlock_memory_hotplug(); | ||
619 | return -1; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | /* Previous code may changed the zone of the pfn range */ | ||
624 | zone = page_zone(pfn_to_page(pfn)); | ||
625 | |||
528 | arg.start_pfn = pfn; | 626 | arg.start_pfn = pfn; |
529 | arg.nr_pages = nr_pages; | 627 | arg.nr_pages = nr_pages; |
530 | node_states_check_changes_online(nr_pages, zone, &arg); | 628 | node_states_check_changes_online(nr_pages, zone, &arg); |