diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 20:22:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 20:22:41 -0400 |
commit | df133e8fa8e1d4afa57c84953bf80eaed2b145e0 (patch) | |
tree | 863d38b50f91629513a414227874a1142d5d21e5 | |
parent | e3438330f58330ec236c861d43f46bef06780e62 (diff) | |
parent | beb9147e95a75f41c984d7235cf6d59f3ca2d5db (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:
"This tree includes the following changes:
- fix memory hotplug
- fix hibernation bootup memory layout assumptions
- fix hyperv numa guest kernel messages
- remove dead code
- update documentation"
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Update memory map description to list hypervisor-reserved area
x86/mm, hibernate: Do not assume the first e820 area to be RAM
x86/mm/numa: Drop dead code and rename setup_node_data() to setup_alloc_data()
x86/mm/hotplug: Modify PGD entry when removing memory
x86/mm/hotplug: Pass sync_global_pgds() a correct argument in remove_pagetable()
x86: Remove set_pmd_pfn
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/numa.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 34 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 35 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 |
10 files changed, 47 insertions, 78 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index afe68ddbe6a4..052ee643a32e 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -5,7 +5,7 @@ Virtual memory map with 4 level page tables: | |||
5 | 5 | ||
6 | 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm | 6 | 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm |
7 | hole caused by [48:63] sign extension | 7 | hole caused by [48:63] sign extension |
8 | ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole | 8 | ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor |
9 | ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory | 9 | ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory |
10 | ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole | 10 | ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole |
11 | ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space | 11 | ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 4064acae625d..01b493e5a99b 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -9,7 +9,6 @@ | |||
9 | #ifdef CONFIG_NUMA | 9 | #ifdef CONFIG_NUMA |
10 | 10 | ||
11 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 11 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
12 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | ||
13 | 12 | ||
14 | /* | 13 | /* |
15 | * Too small node sizes may confuse the VM badly. Usually they | 14 | * Too small node sizes may confuse the VM badly. Usually they |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 9ee322103c6d..b6c0b404898a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -32,9 +32,6 @@ static inline void pgtable_cache_init(void) { } | |||
32 | static inline void check_pgt_cache(void) { } | 32 | static inline void check_pgt_cache(void) { } |
33 | void paging_init(void); | 33 | void paging_init(void); |
34 | 34 | ||
35 | extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | ||
36 | |||
37 | |||
38 | /* | 35 | /* |
39 | * Define this if things work differently on an i386 and an i486: | 36 | * Define this if things work differently on an i386 and an i486: |
40 | * it will (on an i486) warn about kernel memory accesses that are | 37 | * it will (on an i486) warn about kernel memory accesses that are |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3874693c0e53..4572b2f30237 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -116,7 +116,8 @@ static inline void native_pgd_clear(pgd_t *pgd) | |||
116 | native_set_pgd(pgd, native_make_pgd(0)); | 116 | native_set_pgd(pgd, native_make_pgd(0)); |
117 | } | 117 | } |
118 | 118 | ||
119 | extern void sync_global_pgds(unsigned long start, unsigned long end); | 119 | extern void sync_global_pgds(unsigned long start, unsigned long end, |
120 | int removed); | ||
120 | 121 | ||
121 | /* | 122 | /* |
122 | * Conversion functions: convert a page and protection to a page entry, | 123 | * Conversion functions: convert a page and protection to a page entry, |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 988c00a1f60d..49f886481615 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -682,15 +682,14 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len) | |||
682 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | 682 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). |
683 | * | 683 | * |
684 | * This function requires the e820 map to be sorted and without any | 684 | * This function requires the e820 map to be sorted and without any |
685 | * overlapping entries and assumes the first e820 area to be RAM. | 685 | * overlapping entries. |
686 | */ | 686 | */ |
687 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) | 687 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) |
688 | { | 688 | { |
689 | int i; | 689 | int i; |
690 | unsigned long pfn; | 690 | unsigned long pfn = 0; |
691 | 691 | ||
692 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | 692 | for (i = 0; i < e820.nr_map; i++) { |
693 | for (i = 1; i < e820.nr_map; i++) { | ||
694 | struct e820entry *ei = &e820.map[i]; | 693 | struct e820entry *ei = &e820.map[i]; |
695 | 694 | ||
696 | if (pfn < PFN_UP(ei->addr)) | 695 | if (pfn < PFN_UP(ei->addr)) |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9c5b32e2bdc0..d973e61e450d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | 349 | ||
350 | void vmalloc_sync_all(void) | 350 | void vmalloc_sync_all(void) |
351 | { | 351 | { |
352 | sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); | 352 | sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); |
353 | } | 353 | } |
354 | 354 | ||
355 | /* | 355 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5d984769cbd8..4cb8763868fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup); | |||
178 | * When memory was added/removed make sure all the processes MM have | 178 | * When memory was added/removed make sure all the processes MM have |
179 | * suitable PGD entries in the local PGD level page. | 179 | * suitable PGD entries in the local PGD level page. |
180 | */ | 180 | */ |
181 | void sync_global_pgds(unsigned long start, unsigned long end) | 181 | void sync_global_pgds(unsigned long start, unsigned long end, int removed) |
182 | { | 182 | { |
183 | unsigned long address; | 183 | unsigned long address; |
184 | 184 | ||
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
186 | const pgd_t *pgd_ref = pgd_offset_k(address); | 186 | const pgd_t *pgd_ref = pgd_offset_k(address); |
187 | struct page *page; | 187 | struct page *page; |
188 | 188 | ||
189 | if (pgd_none(*pgd_ref)) | 189 | /* |
190 | * When it is called after memory hot remove, pgd_none() | ||
191 | * returns true. In this case (removed == 1), we must clear | ||
192 | * the PGD entries in the local PGD level page. | ||
193 | */ | ||
194 | if (pgd_none(*pgd_ref) && !removed) | ||
190 | continue; | 195 | continue; |
191 | 196 | ||
192 | spin_lock(&pgd_lock); | 197 | spin_lock(&pgd_lock); |
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
199 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 204 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
200 | spin_lock(pgt_lock); | 205 | spin_lock(pgt_lock); |
201 | 206 | ||
202 | if (pgd_none(*pgd)) | 207 | if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) |
203 | set_pgd(pgd, *pgd_ref); | ||
204 | else | ||
205 | BUG_ON(pgd_page_vaddr(*pgd) | 208 | BUG_ON(pgd_page_vaddr(*pgd) |
206 | != pgd_page_vaddr(*pgd_ref)); | 209 | != pgd_page_vaddr(*pgd_ref)); |
207 | 210 | ||
211 | if (removed) { | ||
212 | if (pgd_none(*pgd_ref) && !pgd_none(*pgd)) | ||
213 | pgd_clear(pgd); | ||
214 | } else { | ||
215 | if (pgd_none(*pgd)) | ||
216 | set_pgd(pgd, *pgd_ref); | ||
217 | } | ||
218 | |||
208 | spin_unlock(pgt_lock); | 219 | spin_unlock(pgt_lock); |
209 | } | 220 | } |
210 | spin_unlock(&pgd_lock); | 221 | spin_unlock(&pgd_lock); |
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
633 | } | 644 | } |
634 | 645 | ||
635 | if (pgd_changed) | 646 | if (pgd_changed) |
636 | sync_global_pgds(addr, end - 1); | 647 | sync_global_pgds(addr, end - 1, 0); |
637 | 648 | ||
638 | __flush_tlb_all(); | 649 | __flush_tlb_all(); |
639 | 650 | ||
@@ -976,25 +987,26 @@ static void __meminit | |||
976 | remove_pagetable(unsigned long start, unsigned long end, bool direct) | 987 | remove_pagetable(unsigned long start, unsigned long end, bool direct) |
977 | { | 988 | { |
978 | unsigned long next; | 989 | unsigned long next; |
990 | unsigned long addr; | ||
979 | pgd_t *pgd; | 991 | pgd_t *pgd; |
980 | pud_t *pud; | 992 | pud_t *pud; |
981 | bool pgd_changed = false; | 993 | bool pgd_changed = false; |
982 | 994 | ||
983 | for (; start < end; start = next) { | 995 | for (addr = start; addr < end; addr = next) { |
984 | next = pgd_addr_end(start, end); | 996 | next = pgd_addr_end(addr, end); |
985 | 997 | ||
986 | pgd = pgd_offset_k(start); | 998 | pgd = pgd_offset_k(addr); |
987 | if (!pgd_present(*pgd)) | 999 | if (!pgd_present(*pgd)) |
988 | continue; | 1000 | continue; |
989 | 1001 | ||
990 | pud = (pud_t *)pgd_page_vaddr(*pgd); | 1002 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
991 | remove_pud_table(pud, start, next, direct); | 1003 | remove_pud_table(pud, addr, next, direct); |
992 | if (free_pud_table(pud, pgd)) | 1004 | if (free_pud_table(pud, pgd)) |
993 | pgd_changed = true; | 1005 | pgd_changed = true; |
994 | } | 1006 | } |
995 | 1007 | ||
996 | if (pgd_changed) | 1008 | if (pgd_changed) |
997 | sync_global_pgds(start, end - 1); | 1009 | sync_global_pgds(start, end - 1, 1); |
998 | 1010 | ||
999 | flush_tlb_all(); | 1011 | flush_tlb_all(); |
1000 | } | 1012 | } |
@@ -1341,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | |||
1341 | else | 1353 | else |
1342 | err = vmemmap_populate_basepages(start, end, node); | 1354 | err = vmemmap_populate_basepages(start, end, node); |
1343 | if (!err) | 1355 | if (!err) |
1344 | sync_global_pgds(start, end - 1); | 1356 | sync_global_pgds(start, end - 1, 0); |
1345 | return err; | 1357 | return err; |
1346 | } | 1358 | } |
1347 | 1359 | ||
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index a32b706c401a..d221374d5ce8 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) | |||
185 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | 185 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); |
186 | } | 186 | } |
187 | 187 | ||
188 | /* Initialize NODE_DATA for a node on the local memory */ | 188 | /* Allocate NODE_DATA for a node on the local memory */ |
189 | static void __init setup_node_data(int nid, u64 start, u64 end) | 189 | static void __init alloc_node_data(int nid) |
190 | { | 190 | { |
191 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 191 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
192 | u64 nd_pa; | 192 | u64 nd_pa; |
@@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
194 | int tnid; | 194 | int tnid; |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Don't confuse VM with a node that doesn't have the | ||
198 | * minimum amount of memory: | ||
199 | */ | ||
200 | if (end && (end - start) < NODE_MIN_SIZE) | ||
201 | return; | ||
202 | |||
203 | start = roundup(start, ZONE_ALIGN); | ||
204 | |||
205 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", | ||
206 | nid, start, end - 1); | ||
207 | |||
208 | /* | ||
209 | * Allocate node data. Try node-local memory and then any node. | 197 | * Allocate node data. Try node-local memory and then any node. |
210 | * Never allocate in DMA zone. | 198 | * Never allocate in DMA zone. |
211 | */ | 199 | */ |
@@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
222 | nd = __va(nd_pa); | 210 | nd = __va(nd_pa); |
223 | 211 | ||
224 | /* report and initialize */ | 212 | /* report and initialize */ |
225 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", | 213 | printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid, |
226 | nd_pa, nd_pa + nd_size - 1); | 214 | nd_pa, nd_pa + nd_size - 1); |
227 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | 215 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
228 | if (tnid != nid) | 216 | if (tnid != nid) |
@@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
230 | 218 | ||
231 | node_data[nid] = nd; | 219 | node_data[nid] = nd; |
232 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); | 220 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); |
233 | NODE_DATA(nid)->node_id = nid; | ||
234 | NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; | ||
235 | NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; | ||
236 | 221 | ||
237 | node_set_online(nid); | 222 | node_set_online(nid); |
238 | } | 223 | } |
@@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
523 | end = max(mi->blk[i].end, end); | 508 | end = max(mi->blk[i].end, end); |
524 | } | 509 | } |
525 | 510 | ||
526 | if (start < end) | 511 | if (start >= end) |
527 | setup_node_data(nid, start, end); | 512 | continue; |
513 | |||
514 | /* | ||
515 | * Don't confuse VM with a node that doesn't have the | ||
516 | * minimum amount of memory: | ||
517 | */ | ||
518 | if (end && (end - start) < NODE_MIN_SIZE) | ||
519 | continue; | ||
520 | |||
521 | alloc_node_data(nid); | ||
528 | } | 522 | } |
529 | 523 | ||
530 | /* Dump memblock with node info and return. */ | 524 | /* Dump memblock with node info and return. */ |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 4dd8cf652579..75cc0978d45d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -59,41 +59,6 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) | |||
59 | __flush_tlb_one(vaddr); | 59 | __flush_tlb_one(vaddr); |
60 | } | 60 | } |
61 | 61 | ||
62 | /* | ||
63 | * Associate a large virtual page frame with a given physical page frame | ||
64 | * and protection flags for that frame. pfn is for the base of the page, | ||
65 | * vaddr is what the page gets mapped to - both must be properly aligned. | ||
66 | * The pmd must already be instantiated. Assumes PAE mode. | ||
67 | */ | ||
68 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | ||
69 | { | ||
70 | pgd_t *pgd; | ||
71 | pud_t *pud; | ||
72 | pmd_t *pmd; | ||
73 | |||
74 | if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ | ||
75 | printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); | ||
76 | return; /* BUG(); */ | ||
77 | } | ||
78 | if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ | ||
79 | printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); | ||
80 | return; /* BUG(); */ | ||
81 | } | ||
82 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
83 | if (pgd_none(*pgd)) { | ||
84 | printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); | ||
85 | return; /* BUG(); */ | ||
86 | } | ||
87 | pud = pud_offset(pgd, vaddr); | ||
88 | pmd = pmd_offset(pud, vaddr); | ||
89 | set_pmd(pmd, pfn_pmd(pfn, flags)); | ||
90 | /* | ||
91 | * It's enough to flush this one mapping. | ||
92 | * (PGE mappings get flushed as well) | ||
93 | */ | ||
94 | __flush_tlb_one(vaddr); | ||
95 | } | ||
96 | |||
97 | unsigned long __FIXADDR_TOP = 0xfffff000; | 62 | unsigned long __FIXADDR_TOP = 0xfffff000; |
98 | EXPORT_SYMBOL(__FIXADDR_TOP); | 63 | EXPORT_SYMBOL(__FIXADDR_TOP); |
99 | 64 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c9710c9bbee2..736d8e1b6381 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4971,6 +4971,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
4971 | pgdat->node_start_pfn = node_start_pfn; | 4971 | pgdat->node_start_pfn = node_start_pfn; |
4972 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 4972 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4973 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | 4973 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
4974 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, | ||
4975 | (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1); | ||
4974 | #endif | 4976 | #endif |
4975 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, | 4977 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, |
4976 | zones_size, zholes_size); | 4978 | zones_size, zholes_size); |