aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Jennings <rcj@linux.vnet.ibm.com>2009-12-17 09:44:38 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-12-17 22:53:36 -0500
commit925cc71e512a29e2594bcc17dc58d0a0e9c4d524 (patch)
tree7240ccf6ba713cc180d388a28f00c1a43293bc14
parent55639353a0035052d9ea6cfe4dde0ac7fcbb2c9f (diff)
mm: Add notifier in pageblock isolation for balloon drivers
Memory balloon drivers can allocate a large amount of memory which is not movable but could be freed to accomodate memory hotplug remove. Prior to calling the memory hotplug notifier chain the memory in the pageblock is isolated. Currently, if the migrate type is not MIGRATE_MOVABLE the isolation will not proceed, causing the memory removal for that page range to fail. Rather than failing pageblock isolation if the migrateteype is not MIGRATE_MOVABLE, this patch checks if all of the pages in the pageblock, and not on the LRU, are owned by a registered balloon driver (or other entity) using a notifier chain. If all of the non-movable pages are owned by a balloon, they can be freed later through the memory notifier chain and the range can still be isolated in set_migratetype_isolate(). Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Ingo Molnar <mingo@elte.hu> Cc: Brian King <brking@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Gerald Schaefer <geralds@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--drivers/base/memory.c19
-rw-r--r--include/linux/memory.h27
-rw-r--r--mm/page_alloc.c57
3 files changed, 96 insertions, 7 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c4c8f2e1dd15..d7d77d4a402c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
63} 63}
64EXPORT_SYMBOL(unregister_memory_notifier); 64EXPORT_SYMBOL(unregister_memory_notifier);
65 65
66static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
67
68int register_memory_isolate_notifier(struct notifier_block *nb)
69{
70 return atomic_notifier_chain_register(&memory_isolate_chain, nb);
71}
72EXPORT_SYMBOL(register_memory_isolate_notifier);
73
74void unregister_memory_isolate_notifier(struct notifier_block *nb)
75{
76 atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
77}
78EXPORT_SYMBOL(unregister_memory_isolate_notifier);
79
66/* 80/*
67 * register_memory - Setup a sysfs device for a memory block 81 * register_memory - Setup a sysfs device for a memory block
68 */ 82 */
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
157 return blocking_notifier_call_chain(&memory_chain, val, v); 171 return blocking_notifier_call_chain(&memory_chain, val, v);
158} 172}
159 173
174int memory_isolate_notify(unsigned long val, void *v)
175{
176 return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
177}
178
160/* 179/*
161 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 180 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
162 * OK to have direct references to sparsemem variables in here. 181 * OK to have direct references to sparsemem variables in here.
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 37fa19b34ef5..1adfe779eb99 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -50,6 +50,19 @@ struct memory_notify {
50 int status_change_nid; 50 int status_change_nid;
51}; 51};
52 52
53/*
54 * During pageblock isolation, count the number of pages within the
55 * range [start_pfn, start_pfn + nr_pages) which are owned by code
56 * in the notifier chain.
57 */
58#define MEM_ISOLATE_COUNT (1<<0)
59
60struct memory_isolate_notify {
61 unsigned long start_pfn; /* Start of range to check */
62 unsigned int nr_pages; /* # pages in range to check */
63 unsigned int pages_found; /* # pages owned found by callbacks */
64};
65
53struct notifier_block; 66struct notifier_block;
54struct mem_section; 67struct mem_section;
55 68
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
76{ 89{
77 return 0; 90 return 0;
78} 91}
92static inline int register_memory_isolate_notifier(struct notifier_block *nb)
93{
94 return 0;
95}
96static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
97{
98}
99static inline int memory_isolate_notify(unsigned long val, void *v)
100{
101 return 0;
102}
79#else 103#else
80extern int register_memory_notifier(struct notifier_block *nb); 104extern int register_memory_notifier(struct notifier_block *nb);
81extern void unregister_memory_notifier(struct notifier_block *nb); 105extern void unregister_memory_notifier(struct notifier_block *nb);
106extern int register_memory_isolate_notifier(struct notifier_block *nb);
107extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
82extern int register_new_memory(int, struct mem_section *); 108extern int register_new_memory(int, struct mem_section *);
83extern int unregister_memory_section(struct mem_section *); 109extern int unregister_memory_section(struct mem_section *);
84extern int memory_dev_init(void); 110extern int memory_dev_init(void);
85extern int remove_memory_block(unsigned long, struct mem_section *, int); 111extern int remove_memory_block(unsigned long, struct mem_section *, int);
86extern int memory_notify(unsigned long val, void *v); 112extern int memory_notify(unsigned long val, void *v);
113extern int memory_isolate_notify(unsigned long val, void *v);
87extern struct memory_block *find_memory_block(struct mem_section *); 114extern struct memory_block *find_memory_block(struct mem_section *);
88#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) 115#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
89enum mem_add_context { BOOT, HOTPLUG }; 116enum mem_add_context { BOOT, HOTPLUG };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74af449b1f1d..998eacc1e4c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h>
51#include <trace/events/kmem.h> 52#include <trace/events/kmem.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5008int set_migratetype_isolate(struct page *page) 5009int set_migratetype_isolate(struct page *page)
5009{ 5010{
5010 struct zone *zone; 5011 struct zone *zone;
5011 unsigned long flags; 5012 struct page *curr_page;
5013 unsigned long flags, pfn, iter;
5014 unsigned long immobile = 0;
5015 struct memory_isolate_notify arg;
5016 int notifier_ret;
5012 int ret = -EBUSY; 5017 int ret = -EBUSY;
5013 int zone_idx; 5018 int zone_idx;
5014 5019
5015 zone = page_zone(page); 5020 zone = page_zone(page);
5016 zone_idx = zone_idx(zone); 5021 zone_idx = zone_idx(zone);
5022
5017 spin_lock_irqsave(&zone->lock, flags); 5023 spin_lock_irqsave(&zone->lock, flags);
5024 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
5025 zone_idx == ZONE_MOVABLE) {
5026 ret = 0;
5027 goto out;
5028 }
5029
5030 pfn = page_to_pfn(page);
5031 arg.start_pfn = pfn;
5032 arg.nr_pages = pageblock_nr_pages;
5033 arg.pages_found = 0;
5034
5018 /* 5035 /*
5019 * In future, more migrate types will be able to be isolation target. 5036 * It may be possible to isolate a pageblock even if the
5037 * migratetype is not MIGRATE_MOVABLE. The memory isolation
5038 * notifier chain is used by balloon drivers to return the
5039 * number of pages in a range that are held by the balloon
5040 * driver to shrink memory. If all the pages are accounted for
5041 * by balloons, are free, or on the LRU, isolation can continue.
5042 * Later, for example, when memory hotplug notifier runs, these
5043 * pages reported as "can be isolated" should be isolated(freed)
5044 * by the balloon driver through the memory notifier chain.
5020 */ 5045 */
5021 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && 5046 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
5022 zone_idx != ZONE_MOVABLE) 5047 notifier_ret = notifier_to_errno(notifier_ret);
5048 if (notifier_ret || !arg.pages_found)
5023 goto out; 5049 goto out;
5024 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5050
5025 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5051 for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
5026 ret = 0; 5052 if (!pfn_valid_within(pfn))
5053 continue;
5054
5055 curr_page = pfn_to_page(iter);
5056 if (!page_count(curr_page) || PageLRU(curr_page))
5057 continue;
5058
5059 immobile++;
5060 }
5061
5062 if (arg.pages_found == immobile)
5063 ret = 0;
5064
5027out: 5065out:
5066 if (!ret) {
5067 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
5068 move_freepages_block(zone, page, MIGRATE_ISOLATE);
5069 }
5070
5028 spin_unlock_irqrestore(&zone->lock, flags); 5071 spin_unlock_irqrestore(&zone->lock, flags);
5029 if (!ret) 5072 if (!ret)
5030 drain_all_pages(); 5073 drain_all_pages();