diff options
author | Dave Hansen <haveblue@us.ibm.com> | 2005-10-29 21:16:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:44 -0400 |
commit | 3947be1969a9ce455ec30f60ef51efb10e4323d1 (patch) | |
tree | 0b4b3b4c268beb7aa88cb685cce48b6bb5053c47 /mm | |
parent | bdc8cb984576ab5b550c8b24c6fa111a873503e3 (diff) |
[PATCH] memory hotplug: sysfs and add/remove functions
This adds generic memory add/remove and supporting functions for memory
hotplug into a new file as well as a memory hotplug kernel config option.
Individual architecture patches will follow.
For now, disable memory hotplug when swsusp is enabled. There's a lot of
churn there right now. We'll fix it up properly once it calms down.
Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 8 | ||||
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 178 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 |
4 files changed, 189 insertions, 3 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index f35a550ba4b9..1a4473fcb2ca 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -112,6 +112,14 @@ config SPARSEMEM_EXTREME | |||
112 | def_bool y | 112 | def_bool y |
113 | depends on SPARSEMEM && !SPARSEMEM_STATIC | 113 | depends on SPARSEMEM && !SPARSEMEM_STATIC |
114 | 114 | ||
115 | # eventually, we can have this option just 'select SPARSEMEM' | ||
116 | config MEMORY_HOTPLUG | ||
117 | bool "Allow for memory hot-add" | ||
118 | depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND | ||
119 | |||
120 | comment "Memory hotplug is currently incompatible with Software Suspend" | ||
121 | depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND | ||
122 | |||
115 | # Heavily threaded applications may benefit from splitting the mm-wide | 123 | # Heavily threaded applications may benefit from splitting the mm-wide |
116 | # page_table_lock, so that faults on different parts of the user address | 124 | # page_table_lock, so that faults on different parts of the user address |
117 | # space can be handled with less contention: split it at this NR_CPUS. | 125 | # space can be handled with less contention: split it at this NR_CPUS. |
diff --git a/mm/Makefile b/mm/Makefile index 4cd69e3ce421..2fa6d2ca9f28 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -18,5 +18,5 @@ obj-$(CONFIG_NUMA) += mempolicy.o | |||
18 | obj-$(CONFIG_SPARSEMEM) += sparse.o | 18 | obj-$(CONFIG_SPARSEMEM) += sparse.o |
19 | obj-$(CONFIG_SHMEM) += shmem.o | 19 | obj-$(CONFIG_SHMEM) += shmem.o |
20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o | 20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o |
21 | 21 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | |
22 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 22 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c new file mode 100644 index 000000000000..855e0fc928b3 --- /dev/null +++ b/mm/memory_hotplug.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* | ||
2 | * linux/mm/memory_hotplug.c | ||
3 | * | ||
4 | * Copyright (C) | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <linux/stddef.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/swap.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/pagemap.h> | ||
13 | #include <linux/bootmem.h> | ||
14 | #include <linux/compiler.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/pagevec.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/sysctl.h> | ||
19 | #include <linux/cpu.h> | ||
20 | #include <linux/memory.h> | ||
21 | #include <linux/memory_hotplug.h> | ||
22 | #include <linux/highmem.h> | ||
23 | #include <linux/vmalloc.h> | ||
24 | |||
25 | #include <asm/tlbflush.h> | ||
26 | |||
27 | static struct page *__kmalloc_section_memmap(unsigned long nr_pages) | ||
28 | { | ||
29 | struct page *page, *ret; | ||
30 | unsigned long memmap_size = sizeof(struct page) * nr_pages; | ||
31 | |||
32 | page = alloc_pages(GFP_KERNEL, get_order(memmap_size)); | ||
33 | if (page) | ||
34 | goto got_map_page; | ||
35 | |||
36 | ret = vmalloc(memmap_size); | ||
37 | if (ret) | ||
38 | goto got_map_ptr; | ||
39 | |||
40 | return NULL; | ||
41 | got_map_page: | ||
42 | ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); | ||
43 | got_map_ptr: | ||
44 | memset(ret, 0, memmap_size); | ||
45 | |||
46 | return ret; | ||
47 | } | ||
48 | |||
49 | extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | ||
50 | unsigned long size); | ||
51 | static void __add_zone(struct zone *zone, unsigned long phys_start_pfn) | ||
52 | { | ||
53 | struct pglist_data *pgdat = zone->zone_pgdat; | ||
54 | int nr_pages = PAGES_PER_SECTION; | ||
55 | int nid = pgdat->node_id; | ||
56 | int zone_type; | ||
57 | |||
58 | zone_type = zone - pgdat->node_zones; | ||
59 | memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); | ||
60 | zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); | ||
61 | } | ||
62 | |||
63 | extern int sparse_add_one_section(struct zone *, unsigned long, | ||
64 | struct page *mem_map); | ||
65 | static int __add_section(struct zone *zone, unsigned long phys_start_pfn) | ||
66 | { | ||
67 | struct pglist_data *pgdat = zone->zone_pgdat; | ||
68 | int nr_pages = PAGES_PER_SECTION; | ||
69 | struct page *memmap; | ||
70 | int ret; | ||
71 | |||
72 | /* | ||
73 | * This can potentially allocate memory, and does its own | ||
74 | * internal locking. | ||
75 | */ | ||
76 | sparse_index_init(pfn_to_section_nr(phys_start_pfn), pgdat->node_id); | ||
77 | |||
78 | pgdat_resize_lock(pgdat, &flags); | ||
79 | memmap = __kmalloc_section_memmap(nr_pages); | ||
80 | ret = sparse_add_one_section(zone, phys_start_pfn, memmap); | ||
81 | pgdat_resize_unlock(pgdat, &flags); | ||
82 | |||
83 | if (ret <= 0) { | ||
84 | /* the mem_map didn't get used */ | ||
85 | if (memmap >= (struct page *)VMALLOC_START && | ||
86 | memmap < (struct page *)VMALLOC_END) | ||
87 | vfree(memmap); | ||
88 | else | ||
89 | free_pages((unsigned long)memmap, | ||
90 | get_order(sizeof(struct page) * nr_pages)); | ||
91 | } | ||
92 | |||
93 | if (ret < 0) | ||
94 | return ret; | ||
95 | |||
96 | __add_zone(zone, phys_start_pfn); | ||
97 | return register_new_memory(__pfn_to_section(phys_start_pfn)); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * Reasonably generic function for adding memory. It is | ||
102 | * expected that archs that support memory hotplug will | ||
103 | * call this function after deciding the zone to which to | ||
104 | * add the new pages. | ||
105 | */ | ||
106 | int __add_pages(struct zone *zone, unsigned long phys_start_pfn, | ||
107 | unsigned long nr_pages) | ||
108 | { | ||
109 | unsigned long i; | ||
110 | int err = 0; | ||
111 | |||
112 | for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) { | ||
113 | err = __add_section(zone, phys_start_pfn + i); | ||
114 | |||
115 | if (err) | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | return err; | ||
120 | } | ||
121 | |||
122 | static void grow_zone_span(struct zone *zone, | ||
123 | unsigned long start_pfn, unsigned long end_pfn) | ||
124 | { | ||
125 | unsigned long old_zone_end_pfn; | ||
126 | |||
127 | zone_span_writelock(zone); | ||
128 | |||
129 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
130 | if (start_pfn < zone->zone_start_pfn) | ||
131 | zone->zone_start_pfn = start_pfn; | ||
132 | |||
133 | if (end_pfn > old_zone_end_pfn) | ||
134 | zone->spanned_pages = end_pfn - zone->zone_start_pfn; | ||
135 | |||
136 | zone_span_writeunlock(zone); | ||
137 | } | ||
138 | |||
139 | static void grow_pgdat_span(struct pglist_data *pgdat, | ||
140 | unsigned long start_pfn, unsigned long end_pfn) | ||
141 | { | ||
142 | unsigned long old_pgdat_end_pfn = | ||
143 | pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
144 | |||
145 | if (start_pfn < pgdat->node_start_pfn) | ||
146 | pgdat->node_start_pfn = start_pfn; | ||
147 | |||
148 | if (end_pfn > old_pgdat_end_pfn) | ||
149 | pgdat->node_spanned_pages = end_pfn - pgdat->node_spanned_pages; | ||
150 | } | ||
151 | |||
152 | int online_pages(unsigned long pfn, unsigned long nr_pages) | ||
153 | { | ||
154 | unsigned long i; | ||
155 | unsigned long flags; | ||
156 | unsigned long onlined_pages = 0; | ||
157 | struct zone *zone; | ||
158 | |||
159 | /* | ||
160 | * This doesn't need a lock to do pfn_to_page(). | ||
161 | * The section can't be removed here because of the | ||
162 | * memory_block->state_sem. | ||
163 | */ | ||
164 | zone = page_zone(pfn_to_page(pfn)); | ||
165 | pgdat_resize_lock(zone->zone_pgdat, &flags); | ||
166 | grow_zone_span(zone, pfn, pfn + nr_pages); | ||
167 | grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); | ||
168 | pgdat_resize_unlock(zone->zone_pgdat, &flags); | ||
169 | |||
170 | for (i = 0; i < nr_pages; i++) { | ||
171 | struct page *page = pfn_to_page(pfn + i); | ||
172 | online_page(page); | ||
173 | onlined_pages++; | ||
174 | } | ||
175 | zone->present_pages += onlined_pages; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 817635f2ab62..183abf39b445 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1686,7 +1686,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | |||
1686 | * up by free_all_bootmem() once the early boot process is | 1686 | * up by free_all_bootmem() once the early boot process is |
1687 | * done. Non-atomic initialization, single-pass. | 1687 | * done. Non-atomic initialization, single-pass. |
1688 | */ | 1688 | */ |
1689 | void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, | 1689 | void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone, |
1690 | unsigned long start_pfn) | 1690 | unsigned long start_pfn) |
1691 | { | 1691 | { |
1692 | struct page *page; | 1692 | struct page *page; |
@@ -2407,7 +2407,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2407 | * that the pages_{min,low,high} values for each zone are set correctly | 2407 | * that the pages_{min,low,high} values for each zone are set correctly |
2408 | * with respect to min_free_kbytes. | 2408 | * with respect to min_free_kbytes. |
2409 | */ | 2409 | */ |
2410 | static void setup_per_zone_pages_min(void) | 2410 | void setup_per_zone_pages_min(void) |
2411 | { | 2411 | { |
2412 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); | 2412 | unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); |
2413 | unsigned long lowmem_pages = 0; | 2413 | unsigned long lowmem_pages = 0; |