aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGary Hade <garyhade@us.ibm.com>2009-01-06 17:39:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:59:00 -0500
commitc04fc586c1a480ba198f03ae7b6cbd7b57380b91 (patch)
tree9d6544a3b62cc01dbcbb1e315b84378b45ba86d2
parentee53a891f47444c53318b98dac947ede963db400 (diff)
mm: show node to memory section relationship with symlinks in sysfs
Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade <garyhade@us.ibm.com> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-devices-memory51
-rw-r--r--Documentation/memory-hotplug.txt16
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/sh/mm/init.c3
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--drivers/base/memory.c19
-rw-r--r--drivers/base/node.c103
-rw-r--r--include/linux/memory.h6
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--include/linux/node.h13
-rw-r--r--mm/memory_hotplug.c11
14 files changed, 209 insertions, 25 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7a16fe1e2270..9fe91c02ee40 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -6,7 +6,6 @@ Description:
6 internal state of the kernel memory blocks. Files could be 6 internal state of the kernel memory blocks. Files could be
7 added or removed dynamically to represent hot-add/remove 7 added or removed dynamically to represent hot-add/remove
8 operations. 8 operations.
9
10Users: hotplug memory add/remove tools 9Users: hotplug memory add/remove tools
11 https://w3.opensource.ibm.com/projects/powerpc-utils/ 10 https://w3.opensource.ibm.com/projects/powerpc-utils/
12 11
@@ -19,6 +18,56 @@ Description:
19 This is useful for a user-level agent to determine 18 This is useful for a user-level agent to determine
20 identify removable sections of the memory before attempting 19 identify removable sections of the memory before attempting
21 potentially expensive hot-remove memory operation 20 potentially expensive hot-remove memory operation
21Users: hotplug memory remove tools
22 https://w3.opensource.ibm.com/projects/powerpc-utils/
23
24What: /sys/devices/system/memory/memoryX/phys_device
25Date: September 2008
26Contact: Badari Pulavarty <pbadari@us.ibm.com>
27Description:
28 The file /sys/devices/system/memory/memoryX/phys_device
29 is read-only and is designed to show the name of physical
30 memory device. Implementation is currently incomplete.
22 31
32What: /sys/devices/system/memory/memoryX/phys_index
33Date: September 2008
34Contact: Badari Pulavarty <pbadari@us.ibm.com>
35Description:
36 The file /sys/devices/system/memory/memoryX/phys_index
37 is read-only and contains the section ID in hexadecimal
38 which is equivalent to decimal X contained in the
39 memory section directory name.
40
41What: /sys/devices/system/memory/memoryX/state
42Date: September 2008
43Contact: Badari Pulavarty <pbadari@us.ibm.com>
44Description:
45 The file /sys/devices/system/memory/memoryX/state
46 is read-write. When read, it's contents show the
47 online/offline state of the memory section. When written,
48 root can toggle the the online/offline state of a removable
49 memory section (see removable file description above)
50 using the following commands.
51 # echo online > /sys/devices/system/memory/memoryX/state
52 # echo offline > /sys/devices/system/memory/memoryX/state
53
54 For example, if /sys/devices/system/memory/memory22/removable
55 contains a value of 1 and
56 /sys/devices/system/memory/memory22/state contains the
57 string "online" the following command can be executed by
58 by root to offline that section.
59 # echo offline > /sys/devices/system/memory/memory22/state
23Users: hotplug memory remove tools 60Users: hotplug memory remove tools
24 https://w3.opensource.ibm.com/projects/powerpc-utils/ 61 https://w3.opensource.ibm.com/projects/powerpc-utils/
62
63What: /sys/devices/system/node/nodeX/memoryY
64Date: September 2008
65Contact: Gary Hade <garyhade@us.ibm.com>
66Description:
67 When CONFIG_NUMA is enabled
68 /sys/devices/system/node/nodeX/memoryY is a symbolic link that
69 points to the corresponding /sys/devices/system/memory/memoryY
70 memory section directory. For example, the following symbolic
71 link is created for memory section 9 on node0.
72 /sys/devices/system/node/node0/memory9 -> ../../memory/memory9
73
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 168117bd6ee8..4c2ecf537a4a 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -124,7 +124,7 @@ config options.
124 This option can be kernel module too. 124 This option can be kernel module too.
125 125
126-------------------------------- 126--------------------------------
1273 sysfs files for memory hotplug 1274 sysfs files for memory hotplug
128-------------------------------- 128--------------------------------
129All sections have their device information under /sys/devices/system/memory as 129All sections have their device information under /sys/devices/system/memory as
130 130
@@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at
138(0x100000000 / 1Gib = 4) 138(0x100000000 / 1Gib = 4)
139This device covers address range [0x100000000 ... 0x140000000) 139This device covers address range [0x100000000 ... 0x140000000)
140 140
141Under each section, you can see 3 files. 141Under each section, you can see 4 files.
142 142
143/sys/devices/system/memory/memoryXXX/phys_index 143/sys/devices/system/memory/memoryXXX/phys_index
144/sys/devices/system/memory/memoryXXX/phys_device 144/sys/devices/system/memory/memoryXXX/phys_device
145/sys/devices/system/memory/memoryXXX/state 145/sys/devices/system/memory/memoryXXX/state
146/sys/devices/system/memory/memoryXXX/removable
146 147
147'phys_index' : read-only and contains section id, same as XXX. 148'phys_index' : read-only and contains section id, same as XXX.
148'state' : read-write 149'state' : read-write
@@ -150,10 +151,20 @@ Under each section, you can see 3 files.
150 at write: user can specify "online", "offline" command 151 at write: user can specify "online", "offline" command
151'phys_device': read-only: designed to show the name of physical memory device. 152'phys_device': read-only: designed to show the name of physical memory device.
152 This is not well implemented now. 153 This is not well implemented now.
154'removable' : read-only: contains an integer value indicating
155 whether the memory section is removable or not
156 removable. A value of 1 indicates that the memory
157 section is removable and a value of 0 indicates that
158 it is not removable.
153 159
154NOTE: 160NOTE:
155 These directories/files appear after physical memory hotplug phase. 161 These directories/files appear after physical memory hotplug phase.
156 162
163If CONFIG_NUMA is enabled the
164/sys/devices/system/memory/memoryXXX memory section
165directories can also be accessed via symbolic links located in
166the /sys/devices/system/node/node* directories. For example:
167/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
157 168
158-------------------------------- 169--------------------------------
1594. Physical memory hot-add phase 1704. Physical memory hot-add phase
@@ -365,7 +376,6 @@ node if necessary.
365 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like 376 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
366 sysctl or new control file. 377 sysctl or new control file.
367 - showing memory section and physical device relationship. 378 - showing memory section and physical device relationship.
368 - showing memory section and node relationship (maybe good for NUMA)
369 - showing memory section is under ZONE_MOVABLE or not 379 - showing memory section is under ZONE_MOVABLE or not
370 - test and make it better memory offlining. 380 - test and make it better memory offlining.
371 - support HugeTLB page migration and offlining. 381 - support HugeTLB page migration and offlining.
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 054bcd9439aa..56e12903973c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
692 pgdat = NODE_DATA(nid); 692 pgdat = NODE_DATA(nid);
693 693
694 zone = pgdat->node_zones + ZONE_NORMAL; 694 zone = pgdat->node_zones + ZONE_NORMAL;
695 ret = __add_pages(zone, start_pfn, nr_pages); 695 ret = __add_pages(nid, zone, start_pfn, nr_pages);
696 696
697 if (ret) 697 if (ret)
698 printk("%s: Problem encountered in __add_pages() as ret=%d\n", 698 printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06ebb3f2f..f00f09a77f12 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
132 /* this should work for most non-highmem platforms */ 132 /* this should work for most non-highmem platforms */
133 zone = pgdata->node_zones; 133 zone = pgdata->node_zones;
134 134
135 return __add_pages(zone, start_pfn, nr_pages); 135 return __add_pages(nid, zone, start_pfn, nr_pages);
136} 136}
137#endif /* CONFIG_MEMORY_HOTPLUG */ 137#endif /* CONFIG_MEMORY_HOTPLUG */
138 138
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 158b0d6d7046..f0258ca3b17e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
183 rc = vmem_add_mapping(start, size); 183 rc = vmem_add_mapping(start, size);
184 if (rc) 184 if (rc)
185 return rc; 185 return rc;
186 rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); 186 rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
187 if (rc) 187 if (rc)
188 vmem_remove_mapping(start, size); 188 vmem_remove_mapping(start, size);
189 return rc; 189 return rc;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 6cbef8caeb56..3edf297c829b 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
311 pgdat = NODE_DATA(nid); 311 pgdat = NODE_DATA(nid);
312 312
313 /* We only have ZONE_NORMAL, so this is easy.. */ 313 /* We only have ZONE_NORMAL, so this is easy.. */
314 ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages); 314 ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
315 start_pfn, nr_pages);
315 if (unlikely(ret)) 316 if (unlikely(ret))
316 printk("%s: Failed, __add_pages() == %d\n", __func__, ret); 317 printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
317 318
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6c432e..544d724caeee 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
1079 unsigned long start_pfn = start >> PAGE_SHIFT; 1079 unsigned long start_pfn = start >> PAGE_SHIFT;
1080 unsigned long nr_pages = size >> PAGE_SHIFT; 1080 unsigned long nr_pages = size >> PAGE_SHIFT;
1081 1081
1082 return __add_pages(zone, start_pfn, nr_pages); 1082 return __add_pages(nid, zone, start_pfn, nr_pages);
1083} 1083}
1084#endif 1084#endif
1085 1085
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d24d42a..54c437e96541 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
857 if (last_mapped_pfn > max_pfn_mapped) 857 if (last_mapped_pfn > max_pfn_mapped)
858 max_pfn_mapped = last_mapped_pfn; 858 max_pfn_mapped = last_mapped_pfn;
859 859
860 ret = __add_pages(zone, start_pfn, nr_pages); 860 ret = __add_pages(nid, zone, start_pfn, nr_pages);
861 WARN_ON_ONCE(ret); 861 WARN_ON_ONCE(ret);
862 862
863 return ret; 863 return ret;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5260e9e0df48..989429cfed88 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -347,8 +347,9 @@ static inline int memory_probe_init(void)
347 * section belongs to... 347 * section belongs to...
348 */ 348 */
349 349
350static int add_memory_block(unsigned long node_id, struct mem_section *section, 350static int add_memory_block(int nid, struct mem_section *section,
351 unsigned long state, int phys_device) 351 unsigned long state, int phys_device,
352 enum mem_add_context context)
352{ 353{
353 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 354 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
354 int ret = 0; 355 int ret = 0;
@@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
370 ret = mem_create_simple_file(mem, phys_device); 371 ret = mem_create_simple_file(mem, phys_device);
371 if (!ret) 372 if (!ret)
372 ret = mem_create_simple_file(mem, removable); 373 ret = mem_create_simple_file(mem, removable);
374 if (!ret) {
375 if (context == HOTPLUG)
376 ret = register_mem_sect_under_node(mem, nid);
377 }
373 378
374 return ret; 379 return ret;
375} 380}
@@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
382 * 387 *
383 * This could be made generic for all sysdev classes. 388 * This could be made generic for all sysdev classes.
384 */ 389 */
385static struct memory_block *find_memory_block(struct mem_section *section) 390struct memory_block *find_memory_block(struct mem_section *section)
386{ 391{
387 struct kobject *kobj; 392 struct kobject *kobj;
388 struct sys_device *sysdev; 393 struct sys_device *sysdev;
@@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
411 struct memory_block *mem; 416 struct memory_block *mem;
412 417
413 mem = find_memory_block(section); 418 mem = find_memory_block(section);
419 unregister_mem_sect_under_nodes(mem);
414 mem_remove_simple_file(mem, phys_index); 420 mem_remove_simple_file(mem, phys_index);
415 mem_remove_simple_file(mem, state); 421 mem_remove_simple_file(mem, state);
416 mem_remove_simple_file(mem, phys_device); 422 mem_remove_simple_file(mem, phys_device);
@@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
424 * need an interface for the VM to add new memory regions, 430 * need an interface for the VM to add new memory regions,
425 * but without onlining it. 431 * but without onlining it.
426 */ 432 */
427int register_new_memory(struct mem_section *section) 433int register_new_memory(int nid, struct mem_section *section)
428{ 434{
429 return add_memory_block(0, section, MEM_OFFLINE, 0); 435 return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
430} 436}
431 437
432int unregister_memory_section(struct mem_section *section) 438int unregister_memory_section(struct mem_section *section)
@@ -458,7 +464,8 @@ int __init memory_dev_init(void)
458 for (i = 0; i < NR_MEM_SECTIONS; i++) { 464 for (i = 0; i < NR_MEM_SECTIONS; i++) {
459 if (!present_section_nr(i)) 465 if (!present_section_nr(i))
460 continue; 466 continue;
461 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 467 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
468 0, BOOT);
462 if (!ret) 469 if (!ret)
463 ret = err; 470 ret = err;
464 } 471 }
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91636cd8b6c9..43fa90b837ee 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/memory.h>
9#include <linux/node.h> 10#include <linux/node.h>
10#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
11#include <linux/cpumask.h> 12#include <linux/cpumask.h>
@@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
248 return 0; 249 return 0;
249} 250}
250 251
252#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
253#define page_initialized(page) (page->lru.next)
254
255static int get_nid_for_pfn(unsigned long pfn)
256{
257 struct page *page;
258
259 if (!pfn_valid_within(pfn))
260 return -1;
261 page = pfn_to_page(pfn);
262 if (!page_initialized(page))
263 return -1;
264 return pfn_to_nid(pfn);
265}
266
267/* register memory section under specified node if it spans that node */
268int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
269{
270 unsigned long pfn, sect_start_pfn, sect_end_pfn;
271
272 if (!mem_blk)
273 return -EFAULT;
274 if (!node_online(nid))
275 return 0;
276 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
277 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
278 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
279 int page_nid;
280
281 page_nid = get_nid_for_pfn(pfn);
282 if (page_nid < 0)
283 continue;
284 if (page_nid != nid)
285 continue;
286 return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
287 &mem_blk->sysdev.kobj,
288 kobject_name(&mem_blk->sysdev.kobj));
289 }
290 /* mem section does not span the specified node */
291 return 0;
292}
293
294/* unregister memory section under all nodes that it spans */
295int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
296{
297 nodemask_t unlinked_nodes;
298 unsigned long pfn, sect_start_pfn, sect_end_pfn;
299
300 if (!mem_blk)
301 return -EFAULT;
302 nodes_clear(unlinked_nodes);
303 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
304 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
305 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
306 unsigned int nid;
307
308 nid = get_nid_for_pfn(pfn);
309 if (nid < 0)
310 continue;
311 if (!node_online(nid))
312 continue;
313 if (node_test_and_set(nid, unlinked_nodes))
314 continue;
315 sysfs_remove_link(&node_devices[nid].sysdev.kobj,
316 kobject_name(&mem_blk->sysdev.kobj));
317 }
318 return 0;
319}
320
321static int link_mem_sections(int nid)
322{
323 unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
324 unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
325 unsigned long pfn;
326 int err = 0;
327
328 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
329 unsigned long section_nr = pfn_to_section_nr(pfn);
330 struct mem_section *mem_sect;
331 struct memory_block *mem_blk;
332 int ret;
333
334 if (!present_section_nr(section_nr))
335 continue;
336 mem_sect = __nr_to_section(section_nr);
337 mem_blk = find_memory_block(mem_sect);
338 ret = register_mem_sect_under_node(mem_blk, nid);
339 if (!err)
340 err = ret;
341
342 /* discard ref obtained in find_memory_block() */
343 kobject_put(&mem_blk->sysdev.kobj);
344 }
345 return err;
346}
347#else
348static int link_mem_sections(int nid) { return 0; }
349#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
350
251int register_one_node(int nid) 351int register_one_node(int nid)
252{ 352{
253 int error = 0; 353 int error = 0;
@@ -267,6 +367,9 @@ int register_one_node(int nid)
267 if (cpu_to_node(cpu) == nid) 367 if (cpu_to_node(cpu) == nid)
268 register_cpu_under_node(cpu, nid); 368 register_cpu_under_node(cpu, nid);
269 } 369 }
370
371 /* link memory sections under this node */
372 error = link_mem_sections(nid);
270 } 373 }
271 374
272 return error; 375 return error;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 36c82c9e6ea7..3fdc10806d31 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v)
79#else 79#else
80extern int register_memory_notifier(struct notifier_block *nb); 80extern int register_memory_notifier(struct notifier_block *nb);
81extern void unregister_memory_notifier(struct notifier_block *nb); 81extern void unregister_memory_notifier(struct notifier_block *nb);
82extern int register_new_memory(struct mem_section *); 82extern int register_new_memory(int, struct mem_section *);
83extern int unregister_memory_section(struct mem_section *); 83extern int unregister_memory_section(struct mem_section *);
84extern int memory_dev_init(void); 84extern int memory_dev_init(void);
85extern int remove_memory_block(unsigned long, struct mem_section *, int); 85extern int remove_memory_block(unsigned long, struct mem_section *, int);
86extern int memory_notify(unsigned long val, void *v); 86extern int memory_notify(unsigned long val, void *v);
87extern struct memory_block *find_memory_block(struct mem_section *);
87#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) 88#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
88 89enum mem_add_context { BOOT, HOTPLUG };
89
90#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 90#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
91 91
92#ifdef CONFIG_MEMORY_HOTPLUG 92#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 763ba81fc0f0..d95f72e79b82 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -72,7 +72,7 @@ extern void __offline_isolated_pages(unsigned long, unsigned long);
72extern int offline_pages(unsigned long, unsigned long, unsigned long); 72extern int offline_pages(unsigned long, unsigned long, unsigned long);
73 73
74/* reasonably generic interface to expand the physical pages in a zone */ 74/* reasonably generic interface to expand the physical pages in a zone */
75extern int __add_pages(struct zone *zone, unsigned long start_pfn, 75extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
76 unsigned long nr_pages); 76 unsigned long nr_pages);
77extern int __remove_pages(struct zone *zone, unsigned long start_pfn, 77extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
78 unsigned long nr_pages); 78 unsigned long nr_pages);
diff --git a/include/linux/node.h b/include/linux/node.h
index bc001bc225c3..681a697b9a86 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -26,6 +26,7 @@ struct node {
26 struct sys_device sysdev; 26 struct sys_device sysdev;
27}; 27};
28 28
29struct memory_block;
29extern struct node node_devices[]; 30extern struct node node_devices[];
30 31
31extern int register_node(struct node *, int, struct node *); 32extern int register_node(struct node *, int, struct node *);
@@ -35,6 +36,9 @@ extern int register_one_node(int nid);
35extern void unregister_one_node(int nid); 36extern void unregister_one_node(int nid);
36extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); 37extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
37extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); 38extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
39extern int register_mem_sect_under_node(struct memory_block *mem_blk,
40 int nid);
41extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
38#else 42#else
39static inline int register_one_node(int nid) 43static inline int register_one_node(int nid)
40{ 44{
@@ -52,6 +56,15 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
52{ 56{
53 return 0; 57 return 0;
54} 58}
59static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
60 int nid)
61{
62 return 0;
63}
64static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
65{
66 return 0;
67}
55#endif 68#endif
56 69
57#define to_node(sys_device) container_of(sys_device, struct node, sysdev) 70#define to_node(sys_device) container_of(sys_device, struct node, sysdev)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b17371185468..2ba38bc07b47 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -216,7 +216,8 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
216 return 0; 216 return 0;
217} 217}
218 218
219static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) 219static int __meminit __add_section(int nid, struct zone *zone,
220 unsigned long phys_start_pfn)
220{ 221{
221 int nr_pages = PAGES_PER_SECTION; 222 int nr_pages = PAGES_PER_SECTION;
222 int ret; 223 int ret;
@@ -234,7 +235,7 @@ static int __meminit __add_section(struct zone *zone, unsigned long phys_start_p
234 if (ret < 0) 235 if (ret < 0)
235 return ret; 236 return ret;
236 237
237 return register_new_memory(__pfn_to_section(phys_start_pfn)); 238 return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
238} 239}
239 240
240#ifdef CONFIG_SPARSEMEM_VMEMMAP 241#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -273,8 +274,8 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
273 * call this function after deciding the zone to which to 274 * call this function after deciding the zone to which to
274 * add the new pages. 275 * add the new pages.
275 */ 276 */
276int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, 277int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
277 unsigned long nr_pages) 278 unsigned long nr_pages)
278{ 279{
279 unsigned long i; 280 unsigned long i;
280 int err = 0; 281 int err = 0;
@@ -284,7 +285,7 @@ int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
284 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); 285 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
285 286
286 for (i = start_sec; i <= end_sec; i++) { 287 for (i = start_sec; i <= end_sec; i++) {
287 err = __add_section(zone, i << PFN_SECTION_SHIFT); 288 err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
288 289
289 /* 290 /*
290 * EEXIST is finally dealt with by ioresource collision 291 * EEXIST is finally dealt with by ioresource collision