diff options
author | Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> | 2013-02-22 19:33:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:12 -0500 |
commit | 46723bfa540f0a1e494476a1734d03626a0bd1e0 (patch) | |
tree | c8d3ef712dd67b45c9334f04edeec4aa981a2e29 | |
parent | 24d335ca3606b610ec69c66a1e42760c96d89470 (diff) |
memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by
get_page_bootmem(). So the patch searches pages of virtual mapping and
registers the pages by get_page_bootmem().
NOTE: register_page_bootmem_memmap() is not implemented for ia64,
ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE
and revert register_page_bootmem_info_node() when platform doesn't
support it.
It's implemented by adding a new Kconfig option named
CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected
by memory-hotplug feature fully supported archs(currently only on
x86_64).
Since we have 2 config options called MEMORY_HOTPLUG and
MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately,
and codes in function register_page_bootmem_info_node() are only
used for collecting infomation for hot-remove, so reside it under
MEMORY_HOTREMOVE.
Besides page_isolation.c selected by MEMORY_ISOLATION under
MEMORY_HOTPLUG is also such case, move it too.
[mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE]
[linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()]
[mhocko@suse.cz: remove the arch specific functions without any implementation]
[linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed]
[rientjes@google.com: fix defined but not used warning]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wu Jianguo <wujianguo@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/ia64/mm/discontig.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 1 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 60 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 13 | ||||
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | mm/Kconfig | 10 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 35 |
8 files changed, 111 insertions, 13 deletions
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c641333cd997..731bf84094b6 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
@@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
822 | { | 822 | { |
823 | return vmemmap_populate_basepages(start_page, size, node); | 823 | return vmemmap_populate_basepages(start_page, size, node); |
824 | } | 824 | } |
825 | |||
825 | #endif | 826 | #endif |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 95a45293e5ac..42bf082f0124 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
297 | 297 | ||
298 | return 0; | 298 | return 0; |
299 | } | 299 | } |
300 | |||
300 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 301 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
301 | 302 | ||
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5c2c6e61facb..59c6fcfdc782 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void) | |||
2235 | node_start = 0; | 2235 | node_start = 0; |
2236 | } | 2236 | } |
2237 | } | 2237 | } |
2238 | |||
2238 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 2239 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
2239 | 2240 | ||
2240 | static void prot_init_common(unsigned long page_none, | 2241 | static void prot_init_common(unsigned long page_none, |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b6dd1c480b30..f17aa76dc1ae 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1034 | return 0; | 1034 | return 0; |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) | ||
1038 | void register_page_bootmem_memmap(unsigned long section_nr, | ||
1039 | struct page *start_page, unsigned long size) | ||
1040 | { | ||
1041 | unsigned long addr = (unsigned long)start_page; | ||
1042 | unsigned long end = (unsigned long)(start_page + size); | ||
1043 | unsigned long next; | ||
1044 | pgd_t *pgd; | ||
1045 | pud_t *pud; | ||
1046 | pmd_t *pmd; | ||
1047 | unsigned int nr_pages; | ||
1048 | struct page *page; | ||
1049 | |||
1050 | for (; addr < end; addr = next) { | ||
1051 | pte_t *pte = NULL; | ||
1052 | |||
1053 | pgd = pgd_offset_k(addr); | ||
1054 | if (pgd_none(*pgd)) { | ||
1055 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1056 | continue; | ||
1057 | } | ||
1058 | get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); | ||
1059 | |||
1060 | pud = pud_offset(pgd, addr); | ||
1061 | if (pud_none(*pud)) { | ||
1062 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1063 | continue; | ||
1064 | } | ||
1065 | get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); | ||
1066 | |||
1067 | if (!cpu_has_pse) { | ||
1068 | next = (addr + PAGE_SIZE) & PAGE_MASK; | ||
1069 | pmd = pmd_offset(pud, addr); | ||
1070 | if (pmd_none(*pmd)) | ||
1071 | continue; | ||
1072 | get_page_bootmem(section_nr, pmd_page(*pmd), | ||
1073 | MIX_SECTION_INFO); | ||
1074 | |||
1075 | pte = pte_offset_kernel(pmd, addr); | ||
1076 | if (pte_none(*pte)) | ||
1077 | continue; | ||
1078 | get_page_bootmem(section_nr, pte_page(*pte), | ||
1079 | SECTION_INFO); | ||
1080 | } else { | ||
1081 | next = pmd_addr_end(addr, end); | ||
1082 | |||
1083 | pmd = pmd_offset(pud, addr); | ||
1084 | if (pmd_none(*pmd)) | ||
1085 | continue; | ||
1086 | |||
1087 | nr_pages = 1 << (get_order(PMD_SIZE)); | ||
1088 | page = pmd_page(*pmd); | ||
1089 | while (nr_pages--) | ||
1090 | get_page_bootmem(section_nr, page++, | ||
1091 | SECTION_INFO); | ||
1092 | } | ||
1093 | } | ||
1094 | } | ||
1095 | #endif | ||
1096 | |||
1037 | void __meminit vmemmap_populate_print_last(void) | 1097 | void __meminit vmemmap_populate_print_last(void) |
1038 | { | 1098 | { |
1039 | if (p_start) { | 1099 | if (p_start) { |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31a563bbd936..4d523fe75ba1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) | |||
174 | #endif /* CONFIG_NUMA */ | 174 | #endif /* CONFIG_NUMA */ |
175 | #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ | 175 | #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ |
176 | 176 | ||
177 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 177 | #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE |
178 | extern void register_page_bootmem_info_node(struct pglist_data *pgdat); | ||
179 | #else | ||
178 | static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) | 180 | static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) |
179 | { | 181 | { |
180 | } | 182 | } |
181 | static inline void put_page_bootmem(struct page *page) | ||
182 | { | ||
183 | } | ||
184 | #else | ||
185 | extern void register_page_bootmem_info_node(struct pglist_data *pgdat); | ||
186 | extern void put_page_bootmem(struct page *page); | ||
187 | #endif | 183 | #endif |
184 | extern void put_page_bootmem(struct page *page); | ||
185 | extern void get_page_bootmem(unsigned long ingo, struct page *page, | ||
186 | unsigned long type); | ||
188 | 187 | ||
189 | /* | 188 | /* |
190 | * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug | 189 | * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 95db68e34b18..060557b9764f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page, | |||
1718 | unsigned long pages, int node); | 1718 | unsigned long pages, int node); |
1719 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); | 1719 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); |
1720 | void vmemmap_populate_print_last(void); | 1720 | void vmemmap_populate_print_last(void); |
1721 | 1721 | void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, | |
1722 | unsigned long size); | ||
1722 | 1723 | ||
1723 | enum mf_flags { | 1724 | enum mf_flags { |
1724 | MF_COUNT_INCREASED = 1 << 0, | 1725 | MF_COUNT_INCREASED = 1 << 0, |
diff --git a/mm/Kconfig b/mm/Kconfig index 0b23db9a8791..2c7aea7106f9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -162,10 +162,16 @@ config MOVABLE_NODE | |||
162 | Say Y here if you want to hotplug a whole node. | 162 | Say Y here if you want to hotplug a whole node. |
163 | Say N here if you want kernel to use memory on all nodes evenly. | 163 | Say N here if you want kernel to use memory on all nodes evenly. |
164 | 164 | ||
165 | # | ||
166 | # Only be set on architectures that have completely implemented memory hotplug | ||
167 | # feature. If you are not sure, don't touch it. | ||
168 | # | ||
169 | config HAVE_BOOTMEM_INFO_NODE | ||
170 | def_bool n | ||
171 | |||
165 | # eventually, we can have this option just 'select SPARSEMEM' | 172 | # eventually, we can have this option just 'select SPARSEMEM' |
166 | config MEMORY_HOTPLUG | 173 | config MEMORY_HOTPLUG |
167 | bool "Allow for memory hot-add" | 174 | bool "Allow for memory hot-add" |
168 | select MEMORY_ISOLATION | ||
169 | depends on SPARSEMEM || X86_64_ACPI_NUMA | 175 | depends on SPARSEMEM || X86_64_ACPI_NUMA |
170 | depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG | 176 | depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG |
171 | depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) | 177 | depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) |
@@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE | |||
176 | 182 | ||
177 | config MEMORY_HOTREMOVE | 183 | config MEMORY_HOTREMOVE |
178 | bool "Allow for memory hot remove" | 184 | bool "Allow for memory hot remove" |
185 | select MEMORY_ISOLATION | ||
186 | select HAVE_BOOTMEM_INFO_NODE if X86_64 | ||
179 | depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE | 187 | depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE |
180 | depends on MIGRATION | 188 | depends on MIGRATION |
181 | 189 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 942b43f6d736..6c90d222ec0a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res) | |||
91 | } | 91 | } |
92 | 92 | ||
93 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 93 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
94 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 94 | void get_page_bootmem(unsigned long info, struct page *page, |
95 | static void get_page_bootmem(unsigned long info, struct page *page, | 95 | unsigned long type) |
96 | unsigned long type) | ||
97 | { | 96 | { |
98 | page->lru.next = (struct list_head *) type; | 97 | page->lru.next = (struct list_head *) type; |
99 | SetPagePrivate(page); | 98 | SetPagePrivate(page); |
@@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page) | |||
128 | 127 | ||
129 | } | 128 | } |
130 | 129 | ||
130 | #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE | ||
131 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | ||
131 | static void register_page_bootmem_info_section(unsigned long start_pfn) | 132 | static void register_page_bootmem_info_section(unsigned long start_pfn) |
132 | { | 133 | { |
133 | unsigned long *usemap, mapsize, section_nr, i; | 134 | unsigned long *usemap, mapsize, section_nr, i; |
@@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) | |||
161 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); | 162 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); |
162 | 163 | ||
163 | } | 164 | } |
165 | #else /* CONFIG_SPARSEMEM_VMEMMAP */ | ||
166 | static void register_page_bootmem_info_section(unsigned long start_pfn) | ||
167 | { | ||
168 | unsigned long *usemap, mapsize, section_nr, i; | ||
169 | struct mem_section *ms; | ||
170 | struct page *page, *memmap; | ||
171 | |||
172 | if (!pfn_valid(start_pfn)) | ||
173 | return; | ||
174 | |||
175 | section_nr = pfn_to_section_nr(start_pfn); | ||
176 | ms = __nr_to_section(section_nr); | ||
177 | |||
178 | memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); | ||
179 | |||
180 | register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); | ||
181 | |||
182 | usemap = __nr_to_section(section_nr)->pageblock_flags; | ||
183 | page = virt_to_page(usemap); | ||
184 | |||
185 | mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; | ||
186 | |||
187 | for (i = 0; i < mapsize; i++, page++) | ||
188 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); | ||
189 | } | ||
190 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | ||
164 | 191 | ||
165 | void register_page_bootmem_info_node(struct pglist_data *pgdat) | 192 | void register_page_bootmem_info_node(struct pglist_data *pgdat) |
166 | { | 193 | { |
@@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) | |||
203 | register_page_bootmem_info_section(pfn); | 230 | register_page_bootmem_info_section(pfn); |
204 | } | 231 | } |
205 | } | 232 | } |
206 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 233 | #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ |
207 | 234 | ||
208 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | 235 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, |
209 | unsigned long end_pfn) | 236 | unsigned long end_pfn) |