aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>2014-08-22 16:27:34 -0400
committerIngo Molnar <mingo@kernel.org>2014-09-16 02:55:09 -0400
commit9661d5bcd058fe15b4138a00d96bd36516134543 (patch)
treea6312947f6499f060ea2b8b36b09420978c81547
parent5255e0a79fcc0ff47b387af92bd9ef5729b1b859 (diff)
x86/mm/hotplug: Modify PGD entry when removing memory
When hot-adding/removing memory, sync_global_pgds() is called for synchronizing PGD to PGD entries of all processes MM. But when hot-removing memory, sync_global_pgds() does not work correctly. At first, sync_global_pgds() checks whether target PGD is none or not. And if PGD is none, the PGD is skipped. But when hot-removing memory, PGD may be none since PGD may be cleared by free_pud_table(). So when sync_global_pgds() is called after hot-removing memory, sync_global_pgds() should not skip PGD even if the PGD is none. And sync_global_pgds() must clear PGD entries of all processes MM. Currently sync_global_pgds() does not clear PGD entries of all processes MM when hot-removing memory. So when hot adding memory which is same memory range as removed memory after hot-removing memory, following call traces are shown: kernel BUG at arch/x86/mm/init_64.c:206! ... [<ffffffff815e0c80>] kernel_physical_mapping_init+0x1b2/0x1d2 [<ffffffff815ced94>] init_memory_mapping+0x1d4/0x380 [<ffffffff8104aebd>] arch_add_memory+0x3d/0xd0 [<ffffffff815d03d9>] add_memory+0xb9/0x1b0 [<ffffffff81352415>] acpi_memory_device_add+0x1af/0x28e [<ffffffff81325dc4>] acpi_bus_device_attach+0x8c/0xf0 [<ffffffff813413b9>] acpi_ns_walk_namespace+0xc8/0x17f [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7 [<ffffffff813418ed>] acpi_walk_namespace+0x95/0xc5 [<ffffffff81326b4c>] acpi_bus_scan+0x9a/0xc2 [<ffffffff81326bff>] acpi_scan_bus_device_check+0x8b/0x12e [<ffffffff81326cb5>] acpi_scan_device_check+0x13/0x15 [<ffffffff81320122>] acpi_os_execute_deferred+0x25/0x32 [<ffffffff8107e02b>] process_one_work+0x17b/0x460 [<ffffffff8107edfb>] worker_thread+0x11b/0x400 [<ffffffff8107ece0>] ? rescuer_thread+0x400/0x400 [<ffffffff81085aef>] kthread+0xcf/0xe0 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140 [<ffffffff815fc76c>] ret_from_fork+0x7c/0xb0 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140 This patch clears PGD entries of all processes MM when sync_global_pgds() is called after hot-removing memory Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Acked-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Gu Zheng <guz.fnst@cn.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c27
3 files changed, 22 insertions, 10 deletions
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063545d2..809abb335627 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
115 native_set_pgd(pgd, native_make_pgd(0)); 115 native_set_pgd(pgd, native_make_pgd(0));
116} 116}
117 117
118extern void sync_global_pgds(unsigned long start, unsigned long end); 118extern void sync_global_pgds(unsigned long start, unsigned long end,
119 int removed);
119 120
120/* 121/*
121 * Conversion functions: convert a page and protection to a page entry, 122 * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..d393ac669cc0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:
350 350
351void vmalloc_sync_all(void) 351void vmalloc_sync_all(void)
352{ 352{
353 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); 353 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
354} 354}
355 355
356/* 356/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0e996c0a7eff..529625118ff6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
178 * When memory was added/removed make sure all the processes MM have 178 * When memory was added/removed make sure all the processes MM have
179 * suitable PGD entries in the local PGD level page. 179 * suitable PGD entries in the local PGD level page.
180 */ 180 */
181void sync_global_pgds(unsigned long start, unsigned long end) 181void sync_global_pgds(unsigned long start, unsigned long end, int removed)
182{ 182{
183 unsigned long address; 183 unsigned long address;
184 184
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
186 const pgd_t *pgd_ref = pgd_offset_k(address); 186 const pgd_t *pgd_ref = pgd_offset_k(address);
187 struct page *page; 187 struct page *page;
188 188
189 if (pgd_none(*pgd_ref)) 189 /*
190 * When it is called after memory hot remove, pgd_none()
191 * returns true. In this case (removed == 1), we must clear
192 * the PGD entries in the local PGD level page.
193 */
194 if (pgd_none(*pgd_ref) && !removed)
190 continue; 195 continue;
191 196
192 spin_lock(&pgd_lock); 197 spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
199 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 204 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
200 spin_lock(pgt_lock); 205 spin_lock(pgt_lock);
201 206
202 if (pgd_none(*pgd)) 207 if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
203 set_pgd(pgd, *pgd_ref);
204 else
205 BUG_ON(pgd_page_vaddr(*pgd) 208 BUG_ON(pgd_page_vaddr(*pgd)
206 != pgd_page_vaddr(*pgd_ref)); 209 != pgd_page_vaddr(*pgd_ref));
207 210
211 if (removed) {
212 if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
213 pgd_clear(pgd);
214 } else {
215 if (pgd_none(*pgd))
216 set_pgd(pgd, *pgd_ref);
217 }
218
208 spin_unlock(pgt_lock); 219 spin_unlock(pgt_lock);
209 } 220 }
210 spin_unlock(&pgd_lock); 221 spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
633 } 644 }
634 645
635 if (pgd_changed) 646 if (pgd_changed)
636 sync_global_pgds(addr, end - 1); 647 sync_global_pgds(addr, end - 1, 0);
637 648
638 __flush_tlb_all(); 649 __flush_tlb_all();
639 650
@@ -995,7 +1006,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
995 } 1006 }
996 1007
997 if (pgd_changed) 1008 if (pgd_changed)
998 sync_global_pgds(start, end - 1); 1009 sync_global_pgds(start, end - 1, 1);
999 1010
1000 flush_tlb_all(); 1011 flush_tlb_all();
1001} 1012}
@@ -1342,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1342 else 1353 else
1343 err = vmemmap_populate_basepages(start, end, node); 1354 err = vmemmap_populate_basepages(start, end, node);
1344 if (!err) 1355 if (!err)
1345 sync_global_pgds(start, end - 1); 1356 sync_global_pgds(start, end - 1, 0);
1346 return err; 1357 return err;
1347} 1358}
1348 1359