aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 20:22:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 20:22:41 -0400
commitdf133e8fa8e1d4afa57c84953bf80eaed2b145e0 (patch)
tree863d38b50f91629513a414227874a1142d5d21e5
parente3438330f58330ec236c861d43f46bef06780e62 (diff)
parentbeb9147e95a75f41c984d7235cf6d59f3ca2d5db (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "This tree includes the following changes: - fix memory hotplug - fix hibernation bootup memory layout assumptions - fix hyperv numa guest kernel messages - remove dead code - update documentation" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Update memory map description to list hypervisor-reserved area x86/mm, hibernate: Do not assume the first e820 area to be RAM x86/mm/numa: Drop dead code and rename setup_node_data() to setup_alloc_data() x86/mm/hotplug: Modify PGD entry when removing memory x86/mm/hotplug: Pass sync_global_pgds() a correct argument in remove_pagetable() x86: Remove set_pmd_pfn
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--arch/x86/include/asm/numa.h1
-rw-r--r--arch/x86/include/asm/pgtable_32.h3
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/kernel/e820.c7
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c36
-rw-r--r--arch/x86/mm/numa.c34
-rw-r--r--arch/x86/mm/pgtable_32.c35
-rw-r--r--mm/page_alloc.c2
10 files changed, 47 insertions, 78 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index afe68ddbe6a4..052ee643a32e 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -5,7 +5,7 @@ Virtual memory map with 4 level page tables:
5 5
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
7hole caused by [48:63] sign extension 7hole caused by [48:63] sign extension
8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole 8ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
9ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory 9ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
10ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole 10ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
11ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 4064acae625d..01b493e5a99b 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -9,7 +9,6 @@
9#ifdef CONFIG_NUMA 9#ifdef CONFIG_NUMA
10 10
11#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) 11#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
12#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
13 12
14/* 13/*
15 * Too small node sizes may confuse the VM badly. Usually they 14 * Too small node sizes may confuse the VM badly. Usually they
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 9ee322103c6d..b6c0b404898a 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,9 +32,6 @@ static inline void pgtable_cache_init(void) { }
32static inline void check_pgt_cache(void) { } 32static inline void check_pgt_cache(void) { }
33void paging_init(void); 33void paging_init(void);
34 34
35extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
36
37
38/* 35/*
39 * Define this if things work differently on an i386 and an i486: 36 * Define this if things work differently on an i386 and an i486:
40 * it will (on an i486) warn about kernel memory accesses that are 37 * it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 3874693c0e53..4572b2f30237 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -116,7 +116,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
116 native_set_pgd(pgd, native_make_pgd(0)); 116 native_set_pgd(pgd, native_make_pgd(0));
117} 117}
118 118
119extern void sync_global_pgds(unsigned long start, unsigned long end); 119extern void sync_global_pgds(unsigned long start, unsigned long end,
120 int removed);
120 121
121/* 122/*
122 * Conversion functions: convert a page and protection to a page entry, 123 * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 988c00a1f60d..49f886481615 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
682 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). 682 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
683 * 683 *
684 * This function requires the e820 map to be sorted and without any 684 * This function requires the e820 map to be sorted and without any
685 * overlapping entries and assumes the first e820 area to be RAM. 685 * overlapping entries.
686 */ 686 */
687void __init e820_mark_nosave_regions(unsigned long limit_pfn) 687void __init e820_mark_nosave_regions(unsigned long limit_pfn)
688{ 688{
689 int i; 689 int i;
690 unsigned long pfn; 690 unsigned long pfn = 0;
691 691
692 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); 692 for (i = 0; i < e820.nr_map; i++) {
693 for (i = 1; i < e820.nr_map; i++) {
694 struct e820entry *ei = &e820.map[i]; 693 struct e820entry *ei = &e820.map[i];
695 694
696 if (pfn < PFN_UP(ei->addr)) 695 if (pfn < PFN_UP(ei->addr))
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9c5b32e2bdc0..d973e61e450d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -349,7 +349,7 @@ out:
349 349
350void vmalloc_sync_all(void) 350void vmalloc_sync_all(void)
351{ 351{
352 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); 352 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
353} 353}
354 354
355/* 355/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5d984769cbd8..4cb8763868fc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
178 * When memory was added/removed make sure all the processes MM have 178 * When memory was added/removed make sure all the processes MM have
179 * suitable PGD entries in the local PGD level page. 179 * suitable PGD entries in the local PGD level page.
180 */ 180 */
181void sync_global_pgds(unsigned long start, unsigned long end) 181void sync_global_pgds(unsigned long start, unsigned long end, int removed)
182{ 182{
183 unsigned long address; 183 unsigned long address;
184 184
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
186 const pgd_t *pgd_ref = pgd_offset_k(address); 186 const pgd_t *pgd_ref = pgd_offset_k(address);
187 struct page *page; 187 struct page *page;
188 188
189 if (pgd_none(*pgd_ref)) 189 /*
190 * When it is called after memory hot remove, pgd_none()
191 * returns true. In this case (removed == 1), we must clear
192 * the PGD entries in the local PGD level page.
193 */
194 if (pgd_none(*pgd_ref) && !removed)
190 continue; 195 continue;
191 196
192 spin_lock(&pgd_lock); 197 spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
199 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 204 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
200 spin_lock(pgt_lock); 205 spin_lock(pgt_lock);
201 206
202 if (pgd_none(*pgd)) 207 if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
203 set_pgd(pgd, *pgd_ref);
204 else
205 BUG_ON(pgd_page_vaddr(*pgd) 208 BUG_ON(pgd_page_vaddr(*pgd)
206 != pgd_page_vaddr(*pgd_ref)); 209 != pgd_page_vaddr(*pgd_ref));
207 210
211 if (removed) {
212 if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
213 pgd_clear(pgd);
214 } else {
215 if (pgd_none(*pgd))
216 set_pgd(pgd, *pgd_ref);
217 }
218
208 spin_unlock(pgt_lock); 219 spin_unlock(pgt_lock);
209 } 220 }
210 spin_unlock(&pgd_lock); 221 spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
633 } 644 }
634 645
635 if (pgd_changed) 646 if (pgd_changed)
636 sync_global_pgds(addr, end - 1); 647 sync_global_pgds(addr, end - 1, 0);
637 648
638 __flush_tlb_all(); 649 __flush_tlb_all();
639 650
@@ -976,25 +987,26 @@ static void __meminit
976remove_pagetable(unsigned long start, unsigned long end, bool direct) 987remove_pagetable(unsigned long start, unsigned long end, bool direct)
977{ 988{
978 unsigned long next; 989 unsigned long next;
990 unsigned long addr;
979 pgd_t *pgd; 991 pgd_t *pgd;
980 pud_t *pud; 992 pud_t *pud;
981 bool pgd_changed = false; 993 bool pgd_changed = false;
982 994
983 for (; start < end; start = next) { 995 for (addr = start; addr < end; addr = next) {
984 next = pgd_addr_end(start, end); 996 next = pgd_addr_end(addr, end);
985 997
986 pgd = pgd_offset_k(start); 998 pgd = pgd_offset_k(addr);
987 if (!pgd_present(*pgd)) 999 if (!pgd_present(*pgd))
988 continue; 1000 continue;
989 1001
990 pud = (pud_t *)pgd_page_vaddr(*pgd); 1002 pud = (pud_t *)pgd_page_vaddr(*pgd);
991 remove_pud_table(pud, start, next, direct); 1003 remove_pud_table(pud, addr, next, direct);
992 if (free_pud_table(pud, pgd)) 1004 if (free_pud_table(pud, pgd))
993 pgd_changed = true; 1005 pgd_changed = true;
994 } 1006 }
995 1007
996 if (pgd_changed) 1008 if (pgd_changed)
997 sync_global_pgds(start, end - 1); 1009 sync_global_pgds(start, end - 1, 1);
998 1010
999 flush_tlb_all(); 1011 flush_tlb_all();
1000} 1012}
@@ -1341,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1341 else 1353 else
1342 err = vmemmap_populate_basepages(start, end, node); 1354 err = vmemmap_populate_basepages(start, end, node);
1343 if (!err) 1355 if (!err)
1344 sync_global_pgds(start, end - 1); 1356 sync_global_pgds(start, end - 1, 0);
1345 return err; 1357 return err;
1346} 1358}
1347 1359
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a32b706c401a..d221374d5ce8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
185 return numa_add_memblk_to(nid, start, end, &numa_meminfo); 185 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
186} 186}
187 187
188/* Initialize NODE_DATA for a node on the local memory */ 188/* Allocate NODE_DATA for a node on the local memory */
189static void __init setup_node_data(int nid, u64 start, u64 end) 189static void __init alloc_node_data(int nid)
190{ 190{
191 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 191 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
192 u64 nd_pa; 192 u64 nd_pa;
@@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
194 int tnid; 194 int tnid;
195 195
196 /* 196 /*
197 * Don't confuse VM with a node that doesn't have the
198 * minimum amount of memory:
199 */
200 if (end && (end - start) < NODE_MIN_SIZE)
201 return;
202
203 start = roundup(start, ZONE_ALIGN);
204
205 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
206 nid, start, end - 1);
207
208 /*
209 * Allocate node data. Try node-local memory and then any node. 197 * Allocate node data. Try node-local memory and then any node.
210 * Never allocate in DMA zone. 198 * Never allocate in DMA zone.
211 */ 199 */
@@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
222 nd = __va(nd_pa); 210 nd = __va(nd_pa);
223 211
224 /* report and initialize */ 212 /* report and initialize */
225 printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", 213 printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
226 nd_pa, nd_pa + nd_size - 1); 214 nd_pa, nd_pa + nd_size - 1);
227 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 215 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
228 if (tnid != nid) 216 if (tnid != nid)
@@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
230 218
231 node_data[nid] = nd; 219 node_data[nid] = nd;
232 memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 220 memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
233 NODE_DATA(nid)->node_id = nid;
234 NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
235 NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
236 221
237 node_set_online(nid); 222 node_set_online(nid);
238} 223}
@@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
523 end = max(mi->blk[i].end, end); 508 end = max(mi->blk[i].end, end);
524 } 509 }
525 510
526 if (start < end) 511 if (start >= end)
527 setup_node_data(nid, start, end); 512 continue;
513
514 /*
515 * Don't confuse VM with a node that doesn't have the
516 * minimum amount of memory:
517 */
518 if (end && (end - start) < NODE_MIN_SIZE)
519 continue;
520
521 alloc_node_data(nid);
528 } 522 }
529 523
530 /* Dump memblock with node info and return. */ 524 /* Dump memblock with node info and return. */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 4dd8cf652579..75cc0978d45d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -59,41 +59,6 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
59 __flush_tlb_one(vaddr); 59 __flush_tlb_one(vaddr);
60} 60}
61 61
62/*
63 * Associate a large virtual page frame with a given physical page frame
64 * and protection flags for that frame. pfn is for the base of the page,
65 * vaddr is what the page gets mapped to - both must be properly aligned.
66 * The pmd must already be instantiated. Assumes PAE mode.
67 */
68void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
69{
70 pgd_t *pgd;
71 pud_t *pud;
72 pmd_t *pmd;
73
74 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
75 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
76 return; /* BUG(); */
77 }
78 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
79 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
80 return; /* BUG(); */
81 }
82 pgd = swapper_pg_dir + pgd_index(vaddr);
83 if (pgd_none(*pgd)) {
84 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
85 return; /* BUG(); */
86 }
87 pud = pud_offset(pgd, vaddr);
88 pmd = pmd_offset(pud, vaddr);
89 set_pmd(pmd, pfn_pmd(pfn, flags));
90 /*
91 * It's enough to flush this one mapping.
92 * (PGE mappings get flushed as well)
93 */
94 __flush_tlb_one(vaddr);
95}
96
97unsigned long __FIXADDR_TOP = 0xfffff000; 62unsigned long __FIXADDR_TOP = 0xfffff000;
98EXPORT_SYMBOL(__FIXADDR_TOP); 63EXPORT_SYMBOL(__FIXADDR_TOP);
99 64
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c9710c9bbee2..736d8e1b6381 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4971,6 +4971,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4971 pgdat->node_start_pfn = node_start_pfn; 4971 pgdat->node_start_pfn = node_start_pfn;
4972#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 4972#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4973 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 4973 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
4974 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
4975 (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
4974#endif 4976#endif
4975 calculate_node_totalpages(pgdat, start_pfn, end_pfn, 4977 calculate_node_totalpages(pgdat, start_pfn, end_pfn,
4976 zones_size, zholes_size); 4978 zones_size, zholes_size);