aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYasunori Goto <y-goto@jp.fujitsu.com>2007-10-21 19:41:36 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 11:13:17 -0400
commit7b78d335ac15b10bbcb0397c635d7f0d569b0270 (patch)
tree3e49006c0166ff8bcc6e11b401437fc31d875ec8
parent10020ca246c55744dad815ad4f15e1f488ca55a8 (diff)
memory hotplug: rearrange memory hotplug notifier
Current memory notifier has some defects yet. (Fortunately, nothing uses it.) This patch is to fix and rearrange for them. - Add information of start_pfn, nr_pages, and node id if node status is changes from/to memoryless node for callback functions. Callbacks can't do anything without those information. - Add notification going-online status. It is necessary for creating per node structure before the node's pages are available. - Move GOING_OFFLINE status notification after page isolation. It is good place for return memory like cache for callback, because returned page is not used again. - Make CANCEL events for rollingback when error occurs. - Delete MEM_MAPPING_INVALID notification. It will be not used. - Fix compile error of (un)register_memory_notifier(). Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/memory.c9
-rw-r--r--include/linux/memory.h27
-rw-r--r--mm/memory_hotplug.c48
3 files changed, 61 insertions, 23 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c41d0728efe2..7868707c7eda 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf)
137 return len; 137 return len;
138} 138}
139 139
140static inline int memory_notify(unsigned long val, void *v) 140int memory_notify(unsigned long val, void *v)
141{ 141{
142 return blocking_notifier_call_chain(&memory_chain, val, v); 142 return blocking_notifier_call_chain(&memory_chain, val, v);
143} 143}
@@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
183 break; 183 break;
184 case MEM_OFFLINE: 184 case MEM_OFFLINE:
185 mem->state = MEM_GOING_OFFLINE; 185 mem->state = MEM_GOING_OFFLINE;
186 memory_notify(MEM_GOING_OFFLINE, NULL);
187 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 186 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
188 ret = remove_memory(start_paddr, 187 ret = remove_memory(start_paddr,
189 PAGES_PER_SECTION << PAGE_SHIFT); 188 PAGES_PER_SECTION << PAGE_SHIFT);
@@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
191 mem->state = old_state; 190 mem->state = old_state;
192 break; 191 break;
193 } 192 }
194 memory_notify(MEM_MAPPING_INVALID, NULL);
195 break; 193 break;
196 default: 194 default:
197 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 195 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
@@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
199 WARN_ON(1); 197 WARN_ON(1);
200 ret = -EINVAL; 198 ret = -EINVAL;
201 } 199 }
202 /*
203 * For now, only notify on successful memory operations
204 */
205 if (!ret)
206 memory_notify(action, NULL);
207 200
208 return ret; 201 return ret;
209} 202}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 654ef5544878..ec376e482abb 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -41,18 +41,15 @@ struct memory_block {
41#define MEM_ONLINE (1<<0) /* exposed to userspace */ 41#define MEM_ONLINE (1<<0) /* exposed to userspace */
42#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ 42#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
43#define MEM_OFFLINE (1<<2) /* exposed to userspace */ 43#define MEM_OFFLINE (1<<2) /* exposed to userspace */
44#define MEM_GOING_ONLINE (1<<3)
45#define MEM_CANCEL_ONLINE (1<<4)
46#define MEM_CANCEL_OFFLINE (1<<5)
44 47
45/* 48struct memory_notify {
46 * All of these states are currently kernel-internal for notifying 49 unsigned long start_pfn;
47 * kernel components and architectures. 50 unsigned long nr_pages;
48 * 51 int status_change_nid;
49 * For MEM_MAPPING_INVALID, all notifier chains with priority >0 52};
50 * are called before pfn_to_page() becomes invalid. The priority=0
51 * entry is reserved for the function that actually makes
52 * pfn_to_page() stop working. Any notifiers that want to be called
53 * after that should have priority <0.
54 */
55#define MEM_MAPPING_INVALID (1<<3)
56 53
57struct notifier_block; 54struct notifier_block;
58struct mem_section; 55struct mem_section;
@@ -69,12 +66,18 @@ static inline int register_memory_notifier(struct notifier_block *nb)
69static inline void unregister_memory_notifier(struct notifier_block *nb) 66static inline void unregister_memory_notifier(struct notifier_block *nb)
70{ 67{
71} 68}
69static inline int memory_notify(unsigned long val, void *v)
70{
71 return 0;
72}
72#else 73#else
74extern int register_memory_notifier(struct notifier_block *nb);
75extern void unregister_memory_notifier(struct notifier_block *nb);
73extern int register_new_memory(struct mem_section *); 76extern int register_new_memory(struct mem_section *);
74extern int unregister_memory_section(struct mem_section *); 77extern int unregister_memory_section(struct mem_section *);
75extern int memory_dev_init(void); 78extern int memory_dev_init(void);
76extern int remove_memory_block(unsigned long, struct mem_section *, int); 79extern int remove_memory_block(unsigned long, struct mem_section *, int);
77 80extern int memory_notify(unsigned long val, void *v);
78#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) 81#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
79 82
80 83
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1833879f8438..3a47871a29d9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
187 unsigned long onlined_pages = 0; 187 unsigned long onlined_pages = 0;
188 struct zone *zone; 188 struct zone *zone;
189 int need_zonelists_rebuild = 0; 189 int need_zonelists_rebuild = 0;
190 int nid;
191 int ret;
192 struct memory_notify arg;
193
194 arg.start_pfn = pfn;
195 arg.nr_pages = nr_pages;
196 arg.status_change_nid = -1;
197
198 nid = page_to_nid(pfn_to_page(pfn));
199 if (node_present_pages(nid) == 0)
200 arg.status_change_nid = nid;
190 201
202 ret = memory_notify(MEM_GOING_ONLINE, &arg);
203 ret = notifier_to_errno(ret);
204 if (ret) {
205 memory_notify(MEM_CANCEL_ONLINE, &arg);
206 return ret;
207 }
191 /* 208 /*
192 * This doesn't need a lock to do pfn_to_page(). 209 * This doesn't need a lock to do pfn_to_page().
193 * The section can't be removed here because of the 210 * The section can't be removed here because of the
@@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
222 build_all_zonelists(); 239 build_all_zonelists();
223 vm_total_pages = nr_free_pagecache_pages(); 240 vm_total_pages = nr_free_pagecache_pages();
224 writeback_set_ratelimit(); 241 writeback_set_ratelimit();
242
243 if (onlined_pages)
244 memory_notify(MEM_ONLINE, &arg);
245
225 return 0; 246 return 0;
226} 247}
227#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 248#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
@@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn,
467{ 488{
468 unsigned long pfn, nr_pages, expire; 489 unsigned long pfn, nr_pages, expire;
469 long offlined_pages; 490 long offlined_pages;
470 int ret, drain, retry_max; 491 int ret, drain, retry_max, node;
471 struct zone *zone; 492 struct zone *zone;
493 struct memory_notify arg;
472 494
473 BUG_ON(start_pfn >= end_pfn); 495 BUG_ON(start_pfn >= end_pfn);
474 /* at least, alignment against pageblock is necessary */ 496 /* at least, alignment against pageblock is necessary */
@@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn,
480 we assume this for now. .*/ 502 we assume this for now. .*/
481 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 503 if (!test_pages_in_a_zone(start_pfn, end_pfn))
482 return -EINVAL; 504 return -EINVAL;
505
506 zone = page_zone(pfn_to_page(start_pfn));
507 node = zone_to_nid(zone);
508 nr_pages = end_pfn - start_pfn;
509
483 /* set above range as isolated */ 510 /* set above range as isolated */
484 ret = start_isolate_page_range(start_pfn, end_pfn); 511 ret = start_isolate_page_range(start_pfn, end_pfn);
485 if (ret) 512 if (ret)
486 return ret; 513 return ret;
487 nr_pages = end_pfn - start_pfn; 514
515 arg.start_pfn = start_pfn;
516 arg.nr_pages = nr_pages;
517 arg.status_change_nid = -1;
518 if (nr_pages >= node_present_pages(node))
519 arg.status_change_nid = node;
520
521 ret = memory_notify(MEM_GOING_OFFLINE, &arg);
522 ret = notifier_to_errno(ret);
523 if (ret)
524 goto failed_removal;
525
488 pfn = start_pfn; 526 pfn = start_pfn;
489 expire = jiffies + timeout; 527 expire = jiffies + timeout;
490 drain = 0; 528 drain = 0;
@@ -539,20 +577,24 @@ repeat:
539 /* reset pagetype flags */ 577 /* reset pagetype flags */
540 start_isolate_page_range(start_pfn, end_pfn); 578 start_isolate_page_range(start_pfn, end_pfn);
541 /* removal success */ 579 /* removal success */
542 zone = page_zone(pfn_to_page(start_pfn));
543 zone->present_pages -= offlined_pages; 580 zone->present_pages -= offlined_pages;
544 zone->zone_pgdat->node_present_pages -= offlined_pages; 581 zone->zone_pgdat->node_present_pages -= offlined_pages;
545 totalram_pages -= offlined_pages; 582 totalram_pages -= offlined_pages;
546 num_physpages -= offlined_pages; 583 num_physpages -= offlined_pages;
584
547 vm_total_pages = nr_free_pagecache_pages(); 585 vm_total_pages = nr_free_pagecache_pages();
548 writeback_set_ratelimit(); 586 writeback_set_ratelimit();
587
588 memory_notify(MEM_OFFLINE, &arg);
549 return 0; 589 return 0;
550 590
551failed_removal: 591failed_removal:
552 printk(KERN_INFO "memory offlining %lx to %lx failed\n", 592 printk(KERN_INFO "memory offlining %lx to %lx failed\n",
553 start_pfn, end_pfn); 593 start_pfn, end_pfn);
594 memory_notify(MEM_CANCEL_OFFLINE, &arg);
554 /* pushback to free area */ 595 /* pushback to free area */
555 undo_isolate_page_range(start_pfn, end_pfn); 596 undo_isolate_page_range(start_pfn, end_pfn);
597
556 return ret; 598 return ret;
557} 599}
558#else 600#else