aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:13 -0500
commit4faf8d950ec438c49ae4526b897c30f8a2cad741 (patch)
tree2764787c538868e73958e176d46c542008f345c2
parent8fe23e057172223fe2048768a4d87ab7de7477bc (diff)
hugetlb: handle memory hot-plug events
Register per node hstate attributes only for nodes with memory. As suggested by David Rientjes. With Memory Hotplug, memory can be added to a memoryless node and a node with memory can become memoryless. Therefore, add a memory on/off-line notifier callback to [un]register a node's attributes on transition to/from memoryless state. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Reviewed-by: Andi Kleen <andi@firstfloor.org> Acked-by: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/vm/hugetlbpage.txt3
-rw-r--r--drivers/base/node.c53
2 files changed, 50 insertions, 6 deletions
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 6a8e4667ab38..bc31636973e3 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -231,7 +231,8 @@ resulting effect on persistent huge page allocation is as follows:
231Per Node Hugepages Attributes 231Per Node Hugepages Attributes
232 232
233A subset of the contents of the root huge page control directory in sysfs, 233A subset of the contents of the root huge page control directory in sysfs,
234described above, has been replicated under each "node" system device in: 234described above, will be replicated under each the system device of each
235NUMA node with memory in:
235 236
236 /sys/devices/system/node/node[0-9]*/hugepages/ 237 /sys/devices/system/node/node[0-9]*/hugepages/
237 238
diff --git a/drivers/base/node.c b/drivers/base/node.c
index f502711d28db..9e218a6d4a5b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -177,8 +177,8 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
177/* 177/*
178 * hugetlbfs per node attributes registration interface: 178 * hugetlbfs per node attributes registration interface:
179 * When/if hugetlb[fs] subsystem initializes [sometime after this module], 179 * When/if hugetlb[fs] subsystem initializes [sometime after this module],
180 * it will register its per node attributes for all nodes online at that 180 * it will register its per node attributes for all online nodes with
181 * time. It will also call register_hugetlbfs_with_node(), below, to 181 * memory. It will also call register_hugetlbfs_with_node(), below, to
182 * register its attribute registration functions with this node driver. 182 * register its attribute registration functions with this node driver.
183 * Once these hooks have been initialized, the node driver will call into 183 * Once these hooks have been initialized, the node driver will call into
184 * the hugetlb module to [un]register attributes for hot-plugged nodes. 184 * the hugetlb module to [un]register attributes for hot-plugged nodes.
@@ -188,7 +188,8 @@ static node_registration_func_t __hugetlb_unregister_node;
188 188
189static inline void hugetlb_register_node(struct node *node) 189static inline void hugetlb_register_node(struct node *node)
190{ 190{
191 if (__hugetlb_register_node) 191 if (__hugetlb_register_node &&
192 node_state(node->sysdev.id, N_HIGH_MEMORY))
192 __hugetlb_register_node(node); 193 __hugetlb_register_node(node);
193} 194}
194 195
@@ -233,6 +234,7 @@ int register_node(struct node *node, int num, struct node *parent)
233 sysdev_create_file(&node->sysdev, &attr_distance); 234 sysdev_create_file(&node->sysdev, &attr_distance);
234 235
235 scan_unevictable_register_node(node); 236 scan_unevictable_register_node(node);
237
236 hugetlb_register_node(node); 238 hugetlb_register_node(node);
237 } 239 }
238 return error; 240 return error;
@@ -254,7 +256,7 @@ void unregister_node(struct node *node)
254 sysdev_remove_file(&node->sysdev, &attr_distance); 256 sysdev_remove_file(&node->sysdev, &attr_distance);
255 257
256 scan_unevictable_unregister_node(node); 258 scan_unevictable_unregister_node(node);
257 hugetlb_unregister_node(node); 259 hugetlb_unregister_node(node); /* no-op, if memoryless node */
258 260
259 sysdev_unregister(&node->sysdev); 261 sysdev_unregister(&node->sysdev);
260} 262}
@@ -384,8 +386,45 @@ static int link_mem_sections(int nid)
384 } 386 }
385 return err; 387 return err;
386} 388}
389
390/*
391 * Handle per node hstate attribute [un]registration on transistions
392 * to/from memoryless state.
393 */
394
395static int node_memory_callback(struct notifier_block *self,
396 unsigned long action, void *arg)
397{
398 struct memory_notify *mnb = arg;
399 int nid = mnb->status_change_nid;
400
401 switch (action) {
402 case MEM_ONLINE: /* memory successfully brought online */
403 if (nid != NUMA_NO_NODE)
404 hugetlb_register_node(&node_devices[nid]);
405 break;
406 case MEM_OFFLINE: /* or offline */
407 if (nid != NUMA_NO_NODE)
408 hugetlb_unregister_node(&node_devices[nid]);
409 break;
410 case MEM_GOING_ONLINE:
411 case MEM_GOING_OFFLINE:
412 case MEM_CANCEL_ONLINE:
413 case MEM_CANCEL_OFFLINE:
414 default:
415 break;
416 }
417
418 return NOTIFY_OK;
419}
387#else 420#else
388static int link_mem_sections(int nid) { return 0; } 421static int link_mem_sections(int nid) { return 0; }
422
423static inline int node_memory_callback(struct notifier_block *self,
424 unsigned long action, void *arg)
425{
426 return NOTIFY_OK;
427}
389#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 428#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
390 429
391int register_one_node(int nid) 430int register_one_node(int nid)
@@ -499,13 +538,17 @@ static int node_states_init(void)
499 return err; 538 return err;
500} 539}
501 540
541#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
502static int __init register_node_type(void) 542static int __init register_node_type(void)
503{ 543{
504 int ret; 544 int ret;
505 545
506 ret = sysdev_class_register(&node_class); 546 ret = sysdev_class_register(&node_class);
507 if (!ret) 547 if (!ret) {
508 ret = node_states_init(); 548 ret = node_states_init();
549 hotplug_memory_notifier(node_memory_callback,
550 NODE_CALLBACK_PRI);
551 }
509 552
510 /* 553 /*
511 * Note: we're not going to unregister the node class if we fail 554 * Note: we're not going to unregister the node class if we fail