diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/discontig_32.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 69 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 74 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 67 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 38 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 207 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 276 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 204 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 2 |
14 files changed, 695 insertions, 331 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 20941d2954e2..b7b3e4c7cfc9 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o | 2 | pat.o pgtable.o |
3 | 3 | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 5 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 18378850e25a..914ccf983687 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
476 | 476 | ||
477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
478 | #endif | 478 | #endif |
479 | |||
480 | #ifndef CONFIG_HAVE_ARCH_PARSE_SRAT | ||
481 | /* | ||
482 | * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA | ||
483 | * | ||
484 | * These stub functions are needed to compile 32-bit NUMA when SRAT is | ||
485 | * not set. There are functions in srat_64.c for parsing this table | ||
486 | * and it may be possible to make them common functions. | ||
487 | */ | ||
488 | void acpi_numa_slit_init (struct acpi_table_slit *slit) | ||
489 | { | ||
490 | printk(KERN_INFO "ACPI: No support for parsing SLIT table\n"); | ||
491 | } | ||
492 | |||
493 | void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa) | ||
494 | { | ||
495 | } | ||
496 | |||
497 | void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma) | ||
498 | { | ||
499 | } | ||
500 | |||
501 | void acpi_numa_arch_fixup(void) | ||
502 | { | ||
503 | } | ||
504 | #endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */ | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 6791b8334bc6..2c24bea92c66 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -324,7 +324,7 @@ static const struct file_operations ptdump_fops = { | |||
324 | .release = single_release, | 324 | .release = single_release, |
325 | }; | 325 | }; |
326 | 326 | ||
327 | int pt_dump_init(void) | 327 | static int pt_dump_init(void) |
328 | { | 328 | { |
329 | struct dentry *pe; | 329 | struct dentry *pe; |
330 | 330 | ||
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 9cf33d3ee5bc..165c871ba9af 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -155,4 +155,3 @@ EXPORT_SYMBOL(kmap); | |||
155 | EXPORT_SYMBOL(kunmap); | 155 | EXPORT_SYMBOL(kunmap); |
156 | EXPORT_SYMBOL(kmap_atomic); | 156 | EXPORT_SYMBOL(kmap_atomic); |
157 | EXPORT_SYMBOL(kunmap_atomic); | 157 | EXPORT_SYMBOL(kunmap_atomic); |
158 | EXPORT_SYMBOL(kmap_atomic_to_page); | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ec62da85fd7..de236e419cb5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
71 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 71 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
72 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 72 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
73 | 73 | ||
74 | paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 74 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
75 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 75 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
76 | pud = pud_offset(pgd, 0); | 76 | pud = pud_offset(pgd, 0); |
77 | BUG_ON(pmd_table != pmd_offset(pud, 0)); | 77 | BUG_ON(pmd_table != pmd_offset(pud, 0)); |
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
100 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 100 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
101 | } | 101 | } |
102 | 102 | ||
103 | paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 103 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
104 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 104 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
105 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); | 105 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
106 | } | 106 | } |
@@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr) | |||
227 | return 0; | 227 | return 0; |
228 | } | 228 | } |
229 | 229 | ||
230 | /* | ||
231 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
232 | * is valid. The argument is a physical page number. | ||
233 | * | ||
234 | * | ||
235 | * On x86, access has to be given to the first megabyte of ram because that area | ||
236 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
237 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
238 | * mmio resources as well as potential bios/acpi data regions. | ||
239 | */ | ||
240 | int devmem_is_allowed(unsigned long pagenr) | ||
241 | { | ||
242 | if (pagenr <= 256) | ||
243 | return 1; | ||
244 | if (!page_is_ram(pagenr)) | ||
245 | return 1; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
230 | #ifdef CONFIG_HIGHMEM | 249 | #ifdef CONFIG_HIGHMEM |
231 | pte_t *kmap_pte; | 250 | pte_t *kmap_pte; |
232 | pgprot_t kmap_prot; | 251 | pgprot_t kmap_prot; |
@@ -268,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
268 | pkmap_page_table = pte; | 287 | pkmap_page_table = pte; |
269 | } | 288 | } |
270 | 289 | ||
271 | static void __meminit free_new_highpage(struct page *page) | ||
272 | { | ||
273 | init_page_count(page); | ||
274 | __free_page(page); | ||
275 | totalhigh_pages++; | ||
276 | } | ||
277 | |||
278 | void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) | 290 | void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) |
279 | { | 291 | { |
280 | if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { | 292 | if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { |
281 | ClearPageReserved(page); | 293 | ClearPageReserved(page); |
282 | free_new_highpage(page); | 294 | init_page_count(page); |
295 | __free_page(page); | ||
296 | totalhigh_pages++; | ||
283 | } else | 297 | } else |
284 | SetPageReserved(page); | 298 | SetPageReserved(page); |
285 | } | 299 | } |
286 | 300 | ||
287 | static int __meminit | ||
288 | add_one_highpage_hotplug(struct page *page, unsigned long pfn) | ||
289 | { | ||
290 | free_new_highpage(page); | ||
291 | totalram_pages++; | ||
292 | #ifdef CONFIG_FLATMEM | ||
293 | max_mapnr = max(pfn, max_mapnr); | ||
294 | #endif | ||
295 | num_physpages++; | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * Not currently handling the NUMA case. | ||
302 | * Assuming single node and all memory that | ||
303 | * has been added dynamically that would be | ||
304 | * onlined here is in HIGHMEM. | ||
305 | */ | ||
306 | void __meminit online_page(struct page *page) | ||
307 | { | ||
308 | ClearPageReserved(page); | ||
309 | add_one_highpage_hotplug(page, page_to_pfn(page)); | ||
310 | } | ||
311 | |||
312 | #ifndef CONFIG_NUMA | 301 | #ifndef CONFIG_NUMA |
313 | static void __init set_highmem_pages_init(int bad_ppro) | 302 | static void __init set_highmem_pages_init(int bad_ppro) |
314 | { | 303 | { |
@@ -365,7 +354,7 @@ void __init native_pagetable_setup_start(pgd_t *base) | |||
365 | 354 | ||
366 | pte_clear(NULL, va, pte); | 355 | pte_clear(NULL, va, pte); |
367 | } | 356 | } |
368 | paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); | 357 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
369 | } | 358 | } |
370 | 359 | ||
371 | void __init native_pagetable_setup_done(pgd_t *base) | 360 | void __init native_pagetable_setup_done(pgd_t *base) |
@@ -457,7 +446,7 @@ void zap_low_mappings(void) | |||
457 | * Note that "pgd_clear()" doesn't do it for | 446 | * Note that "pgd_clear()" doesn't do it for |
458 | * us, because pgd_clear() is a no-op on i386. | 447 | * us, because pgd_clear() is a no-op on i386. |
459 | */ | 448 | */ |
460 | for (i = 0; i < USER_PTRS_PER_PGD; i++) { | 449 | for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { |
461 | #ifdef CONFIG_X86_PAE | 450 | #ifdef CONFIG_X86_PAE |
462 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); | 451 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); |
463 | #else | 452 | #else |
@@ -547,9 +536,9 @@ void __init paging_init(void) | |||
547 | 536 | ||
548 | /* | 537 | /* |
549 | * Test if the WP bit works in supervisor mode. It isn't supported on 386's | 538 | * Test if the WP bit works in supervisor mode. It isn't supported on 386's |
550 | * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This | 539 | * and also on some strange 486's. All 586+'s are OK. This used to involve |
551 | * used to involve black magic jumps to work around some nasty CPU bugs, | 540 | * black magic jumps to work around some nasty CPU bugs, but fortunately the |
552 | * but fortunately the switch to using exceptions got rid of all that. | 541 | * switch to using exceptions got rid of all that. |
553 | */ | 542 | */ |
554 | static void __init test_wp_bit(void) | 543 | static void __init test_wp_bit(void) |
555 | { | 544 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ff7906a9a4d..32ba13b0f818 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -135,7 +135,7 @@ static __init void *spp_getpage(void) | |||
135 | return ptr; | 135 | return ptr; |
136 | } | 136 | } |
137 | 137 | ||
138 | static __init void | 138 | static void |
139 | set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) | 139 | set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) |
140 | { | 140 | { |
141 | pgd_t *pgd; | 141 | pgd_t *pgd; |
@@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) | |||
173 | new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); | 173 | new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); |
174 | 174 | ||
175 | pte = pte_offset_kernel(pmd, vaddr); | 175 | pte = pte_offset_kernel(pmd, vaddr); |
176 | if (!pte_none(*pte) && | 176 | if (!pte_none(*pte) && pte_val(new_pte) && |
177 | pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) | 177 | pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) |
178 | pte_ERROR(*pte); | 178 | pte_ERROR(*pte); |
179 | set_pte(pte, new_pte); | 179 | set_pte(pte, new_pte); |
@@ -214,8 +214,7 @@ void __init cleanup_highmap(void) | |||
214 | } | 214 | } |
215 | 215 | ||
216 | /* NOTE: this is meant to be run only at boot */ | 216 | /* NOTE: this is meant to be run only at boot */ |
217 | void __init | 217 | void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) |
218 | __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | ||
219 | { | 218 | { |
220 | unsigned long address = __fix_to_virt(idx); | 219 | unsigned long address = __fix_to_virt(idx); |
221 | 220 | ||
@@ -621,15 +620,6 @@ void __init paging_init(void) | |||
621 | /* | 620 | /* |
622 | * Memory hotplug specific functions | 621 | * Memory hotplug specific functions |
623 | */ | 622 | */ |
624 | void online_page(struct page *page) | ||
625 | { | ||
626 | ClearPageReserved(page); | ||
627 | init_page_count(page); | ||
628 | __free_page(page); | ||
629 | totalram_pages++; | ||
630 | num_physpages++; | ||
631 | } | ||
632 | |||
633 | #ifdef CONFIG_MEMORY_HOTPLUG | 623 | #ifdef CONFIG_MEMORY_HOTPLUG |
634 | /* | 624 | /* |
635 | * Memory is added always to NORMAL zone. This means you will never get | 625 | * Memory is added always to NORMAL zone. This means you will never get |
@@ -664,6 +654,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | |||
664 | 654 | ||
665 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 655 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
666 | 656 | ||
657 | /* | ||
658 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | ||
659 | * is valid. The argument is a physical page number. | ||
660 | * | ||
661 | * | ||
662 | * On x86, access has to be given to the first megabyte of ram because that area | ||
663 | * contains bios code and data regions used by X and dosemu and similar apps. | ||
664 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | ||
665 | * mmio resources as well as potential bios/acpi data regions. | ||
666 | */ | ||
667 | int devmem_is_allowed(unsigned long pagenr) | ||
668 | { | ||
669 | if (pagenr <= 256) | ||
670 | return 1; | ||
671 | if (!page_is_ram(pagenr)) | ||
672 | return 1; | ||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | |||
667 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, | 677 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, |
668 | kcore_modules, kcore_vsyscall; | 678 | kcore_modules, kcore_vsyscall; |
669 | 679 | ||
@@ -791,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
791 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | 801 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) |
792 | { | 802 | { |
793 | #ifdef CONFIG_NUMA | 803 | #ifdef CONFIG_NUMA |
794 | int nid = phys_to_nid(phys); | 804 | int nid, next_nid; |
795 | #endif | 805 | #endif |
796 | unsigned long pfn = phys >> PAGE_SHIFT; | 806 | unsigned long pfn = phys >> PAGE_SHIFT; |
797 | 807 | ||
@@ -810,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
810 | 820 | ||
811 | /* Should check here against the e820 map to avoid double free */ | 821 | /* Should check here against the e820 map to avoid double free */ |
812 | #ifdef CONFIG_NUMA | 822 | #ifdef CONFIG_NUMA |
813 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | 823 | nid = phys_to_nid(phys); |
824 | next_nid = phys_to_nid(phys + len - 1); | ||
825 | if (nid == next_nid) | ||
826 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | ||
827 | else | ||
828 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | ||
814 | #else | 829 | #else |
815 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 830 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); |
816 | #endif | 831 | #endif |
832 | |||
817 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | 833 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { |
818 | dma_reserve += len / PAGE_SIZE; | 834 | dma_reserve += len / PAGE_SIZE; |
819 | set_dma_reserve(dma_reserve); | 835 | set_dma_reserve(dma_reserve); |
@@ -907,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
907 | /* | 923 | /* |
908 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | 924 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. |
909 | */ | 925 | */ |
926 | static long __meminitdata addr_start, addr_end; | ||
927 | static void __meminitdata *p_start, *p_end; | ||
928 | static int __meminitdata node_start; | ||
929 | |||
910 | int __meminit | 930 | int __meminit |
911 | vmemmap_populate(struct page *start_page, unsigned long size, int node) | 931 | vmemmap_populate(struct page *start_page, unsigned long size, int node) |
912 | { | 932 | { |
@@ -941,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
941 | PAGE_KERNEL_LARGE); | 961 | PAGE_KERNEL_LARGE); |
942 | set_pmd(pmd, __pmd(pte_val(entry))); | 962 | set_pmd(pmd, __pmd(pte_val(entry))); |
943 | 963 | ||
944 | printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", | 964 | /* check to see if we have contiguous blocks */ |
945 | addr, addr + PMD_SIZE - 1, p, node); | 965 | if (p_end != p || node_start != node) { |
966 | if (p_start) | ||
967 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
968 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
969 | addr_start = addr; | ||
970 | node_start = node; | ||
971 | p_start = p; | ||
972 | } | ||
973 | addr_end = addr + PMD_SIZE; | ||
974 | p_end = p + PMD_SIZE; | ||
946 | } else { | 975 | } else { |
947 | vmemmap_verify((pte_t *)pmd, node, addr, next); | 976 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
948 | } | 977 | } |
949 | } | 978 | } |
950 | return 0; | 979 | return 0; |
951 | } | 980 | } |
981 | |||
982 | void __meminit vmemmap_populate_print_last(void) | ||
983 | { | ||
984 | if (p_start) { | ||
985 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
986 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
987 | p_start = NULL; | ||
988 | p_end = NULL; | ||
989 | node_start = 0; | ||
990 | } | ||
991 | } | ||
952 | #endif | 992 | #endif |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3a4baf95e24d..71bb3159031a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, | |||
117 | * have to convert them into an offset in a page-aligned mapping, but the | 117 | * have to convert them into an offset in a page-aligned mapping, but the |
118 | * caller shouldn't need to know that small detail. | 118 | * caller shouldn't need to know that small detail. |
119 | */ | 119 | */ |
120 | static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | 120 | static void __iomem *__ioremap_caller(resource_size_t phys_addr, |
121 | unsigned long prot_val) | 121 | unsigned long size, unsigned long prot_val, void *caller) |
122 | { | 122 | { |
123 | unsigned long pfn, offset, vaddr; | 123 | unsigned long pfn, offset, vaddr; |
124 | resource_size_t last_addr; | 124 | resource_size_t last_addr; |
@@ -149,7 +149,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
149 | * Don't allow anybody to remap normal RAM that we're using.. | 149 | * Don't allow anybody to remap normal RAM that we're using.. |
150 | */ | 150 | */ |
151 | for (pfn = phys_addr >> PAGE_SHIFT; | 151 | for (pfn = phys_addr >> PAGE_SHIFT; |
152 | (pfn << PAGE_SHIFT) < last_addr; pfn++) { | 152 | (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); |
153 | pfn++) { | ||
153 | 154 | ||
154 | int is_ram = page_is_ram(pfn); | 155 | int is_ram = page_is_ram(pfn); |
155 | 156 | ||
@@ -176,11 +177,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
176 | /* | 177 | /* |
177 | * Do not fallback to certain memory types with certain | 178 | * Do not fallback to certain memory types with certain |
178 | * requested type: | 179 | * requested type: |
179 | * - request is uncached, return cannot be write-back | 180 | * - request is uc-, return cannot be write-back |
180 | * - request is uncached, return cannot be write-combine | 181 | * - request is uc-, return cannot be write-combine |
181 | * - request is write-combine, return cannot be write-back | 182 | * - request is write-combine, return cannot be write-back |
182 | */ | 183 | */ |
183 | if ((prot_val == _PAGE_CACHE_UC && | 184 | if ((prot_val == _PAGE_CACHE_UC_MINUS && |
184 | (new_prot_val == _PAGE_CACHE_WB || | 185 | (new_prot_val == _PAGE_CACHE_WB || |
185 | new_prot_val == _PAGE_CACHE_WC)) || | 186 | new_prot_val == _PAGE_CACHE_WC)) || |
186 | (prot_val == _PAGE_CACHE_WC && | 187 | (prot_val == _PAGE_CACHE_WC && |
@@ -201,6 +202,9 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
201 | default: | 202 | default: |
202 | prot = PAGE_KERNEL_NOCACHE; | 203 | prot = PAGE_KERNEL_NOCACHE; |
203 | break; | 204 | break; |
205 | case _PAGE_CACHE_UC_MINUS: | ||
206 | prot = PAGE_KERNEL_UC_MINUS; | ||
207 | break; | ||
204 | case _PAGE_CACHE_WC: | 208 | case _PAGE_CACHE_WC: |
205 | prot = PAGE_KERNEL_WC; | 209 | prot = PAGE_KERNEL_WC; |
206 | break; | 210 | break; |
@@ -212,7 +216,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
212 | /* | 216 | /* |
213 | * Ok, go for it.. | 217 | * Ok, go for it.. |
214 | */ | 218 | */ |
215 | area = get_vm_area(size, VM_IOREMAP); | 219 | area = get_vm_area_caller(size, VM_IOREMAP, caller); |
216 | if (!area) | 220 | if (!area) |
217 | return NULL; | 221 | return NULL; |
218 | area->phys_addr = phys_addr; | 222 | area->phys_addr = phys_addr; |
@@ -255,7 +259,17 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, | |||
255 | */ | 259 | */ |
256 | void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) | 260 | void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) |
257 | { | 261 | { |
258 | return __ioremap(phys_addr, size, _PAGE_CACHE_UC); | 262 | /* |
263 | * Ideally, this should be: | ||
264 | * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; | ||
265 | * | ||
266 | * Till we fix all X drivers to use ioremap_wc(), we will use | ||
267 | * UC MINUS. | ||
268 | */ | ||
269 | unsigned long val = _PAGE_CACHE_UC_MINUS; | ||
270 | |||
271 | return __ioremap_caller(phys_addr, size, val, | ||
272 | __builtin_return_address(0)); | ||
259 | } | 273 | } |
260 | EXPORT_SYMBOL(ioremap_nocache); | 274 | EXPORT_SYMBOL(ioremap_nocache); |
261 | 275 | ||
@@ -272,7 +286,8 @@ EXPORT_SYMBOL(ioremap_nocache); | |||
272 | void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) | 286 | void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) |
273 | { | 287 | { |
274 | if (pat_wc_enabled) | 288 | if (pat_wc_enabled) |
275 | return __ioremap(phys_addr, size, _PAGE_CACHE_WC); | 289 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, |
290 | __builtin_return_address(0)); | ||
276 | else | 291 | else |
277 | return ioremap_nocache(phys_addr, size); | 292 | return ioremap_nocache(phys_addr, size); |
278 | } | 293 | } |
@@ -280,7 +295,8 @@ EXPORT_SYMBOL(ioremap_wc); | |||
280 | 295 | ||
281 | void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) | 296 | void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) |
282 | { | 297 | { |
283 | return __ioremap(phys_addr, size, _PAGE_CACHE_WB); | 298 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, |
299 | __builtin_return_address(0)); | ||
284 | } | 300 | } |
285 | EXPORT_SYMBOL(ioremap_cache); | 301 | EXPORT_SYMBOL(ioremap_cache); |
286 | 302 | ||
@@ -336,6 +352,35 @@ void iounmap(volatile void __iomem *addr) | |||
336 | } | 352 | } |
337 | EXPORT_SYMBOL(iounmap); | 353 | EXPORT_SYMBOL(iounmap); |
338 | 354 | ||
355 | /* | ||
356 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem | ||
357 | * access | ||
358 | */ | ||
359 | void *xlate_dev_mem_ptr(unsigned long phys) | ||
360 | { | ||
361 | void *addr; | ||
362 | unsigned long start = phys & PAGE_MASK; | ||
363 | |||
364 | /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ | ||
365 | if (page_is_ram(start >> PAGE_SHIFT)) | ||
366 | return __va(phys); | ||
367 | |||
368 | addr = (void *)ioremap(start, PAGE_SIZE); | ||
369 | if (addr) | ||
370 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | ||
371 | |||
372 | return addr; | ||
373 | } | ||
374 | |||
375 | void unxlate_dev_mem_ptr(unsigned long phys, void *addr) | ||
376 | { | ||
377 | if (page_is_ram(phys >> PAGE_SHIFT)) | ||
378 | return; | ||
379 | |||
380 | iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); | ||
381 | return; | ||
382 | } | ||
383 | |||
339 | #ifdef CONFIG_X86_32 | 384 | #ifdef CONFIG_X86_32 |
340 | 385 | ||
341 | int __initdata early_ioremap_debug; | 386 | int __initdata early_ioremap_debug; |
@@ -407,7 +452,7 @@ void __init early_ioremap_clear(void) | |||
407 | 452 | ||
408 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | 453 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
409 | pmd_clear(pmd); | 454 | pmd_clear(pmd); |
410 | paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); | 455 | paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); |
411 | __flush_tlb_all(); | 456 | __flush_tlb_all(); |
412 | } | 457 | } |
413 | 458 | ||
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9c..1f476e477844 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -13,12 +13,15 @@ | |||
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <asm/io.h> | 14 | #include <asm/io.h> |
15 | #include <linux/pci_ids.h> | 15 | #include <linux/pci_ids.h> |
16 | #include <linux/acpi.h> | ||
16 | #include <asm/types.h> | 17 | #include <asm/types.h> |
17 | #include <asm/mmzone.h> | 18 | #include <asm/mmzone.h> |
18 | #include <asm/proto.h> | 19 | #include <asm/proto.h> |
19 | #include <asm/e820.h> | 20 | #include <asm/e820.h> |
20 | #include <asm/pci-direct.h> | 21 | #include <asm/pci-direct.h> |
21 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
23 | #include <asm/mpspec.h> | ||
24 | #include <asm/apic.h> | ||
22 | 25 | ||
23 | static __init int find_northbridge(void) | 26 | static __init int find_northbridge(void) |
24 | { | 27 | { |
@@ -44,6 +47,30 @@ static __init int find_northbridge(void) | |||
44 | return -1; | 47 | return -1; |
45 | } | 48 | } |
46 | 49 | ||
50 | static __init void early_get_boot_cpu_id(void) | ||
51 | { | ||
52 | /* | ||
53 | * need to get boot_cpu_id so can use that to create apicid_to_node | ||
54 | * in k8_scan_nodes() | ||
55 | */ | ||
56 | /* | ||
57 | * Find possible boot-time SMP configuration: | ||
58 | */ | ||
59 | early_find_smp_config(); | ||
60 | #ifdef CONFIG_ACPI | ||
61 | /* | ||
62 | * Read APIC information from ACPI tables. | ||
63 | */ | ||
64 | early_acpi_boot_init(); | ||
65 | #endif | ||
66 | /* | ||
67 | * get boot-time SMP configuration: | ||
68 | */ | ||
69 | if (smp_found_config) | ||
70 | early_get_smp_config(); | ||
71 | early_init_lapic_mapping(); | ||
72 | } | ||
73 | |||
47 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 74 | int __init k8_scan_nodes(unsigned long start, unsigned long end) |
48 | { | 75 | { |
49 | unsigned long prevbase; | 76 | unsigned long prevbase; |
@@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
56 | unsigned cores; | 83 | unsigned cores; |
57 | unsigned bits; | 84 | unsigned bits; |
58 | int j; | 85 | int j; |
86 | unsigned apicid_base; | ||
59 | 87 | ||
60 | if (!early_pci_allowed()) | 88 | if (!early_pci_allowed()) |
61 | return -1; | 89 | return -1; |
@@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
174 | /* use the coreid bits from early_identify_cpu */ | 202 | /* use the coreid bits from early_identify_cpu */ |
175 | bits = boot_cpu_data.x86_coreid_bits; | 203 | bits = boot_cpu_data.x86_coreid_bits; |
176 | cores = (1<<bits); | 204 | cores = (1<<bits); |
205 | apicid_base = 0; | ||
206 | /* need to get boot_cpu_id early for system with apicid lifting */ | ||
207 | early_get_boot_cpu_id(); | ||
208 | if (boot_cpu_physical_apicid > 0) { | ||
209 | printk(KERN_INFO "BSP APIC ID: %02x\n", | ||
210 | boot_cpu_physical_apicid); | ||
211 | apicid_base = boot_cpu_physical_apicid; | ||
212 | } | ||
177 | 213 | ||
178 | for (i = 0; i < 8; i++) { | 214 | for (i = 0; i < 8; i++) { |
179 | if (nodes[i].start != nodes[i].end) { | 215 | if (nodes[i].start != nodes[i].end) { |
180 | nodeid = nodeids[i]; | 216 | nodeid = nodeids[i]; |
181 | for (j = 0; j < cores; j++) | 217 | for (j = apicid_base; j < cores + apicid_base; j++) |
182 | apicid_to_node[(nodeid << bits) + j] = i; | 218 | apicid_to_node[(nodeid << bits) + j] = i; |
183 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 219 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
184 | } | 220 | } |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b27..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
196 | unsigned long bootmap_start, nodedata_phys; | 196 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 197 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
199 | int nid; | ||
199 | 200 | ||
200 | start = round_up(start, ZONE_ALIGN); | 201 | start = round_up(start, ZONE_ALIGN); |
201 | 202 | ||
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
218 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
219 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
220 | 221 | ||
221 | /* Find a place for the bootmem map */ | 222 | /* |
223 | * Find a place for the bootmem map | ||
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
225 | * so need to sure bootmap_start not to be small, otherwise | ||
226 | * early_node_mem will get that with find_e820_area instead | ||
227 | * of alloc_bootmem, that could clash with reserved range | ||
228 | */ | ||
222 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
223 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 230 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = round_up(start, PAGE_SIZE); | ||
224 | /* | 235 | /* |
225 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like |
226 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
245 | 256 | ||
246 | free_bootmem_with_active_regions(nodeid, end); | 257 | free_bootmem_with_active_regions(nodeid, end); |
247 | 258 | ||
248 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, | 259 | /* |
249 | BOOTMEM_DEFAULT); | 260 | * convert early reserve to bootmem reserve earlier |
250 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 261 | * otherwise early_node_mem could use early reserved mem |
251 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 262 | * on previous node |
263 | */ | ||
264 | early_res_to_bootmem(start, end); | ||
265 | |||
266 | /* | ||
267 | * in some case early_node_mem could use alloc_bootmem | ||
268 | * to get range on other node, don't reserve that again | ||
269 | */ | ||
270 | if (nid != nodeid) | ||
271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
272 | else | ||
273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
274 | pgdat_size, BOOTMEM_DEFAULT); | ||
275 | nid = phys_to_nid(bootmap_start); | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | ||
280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | ||
281 | |||
252 | #ifdef CONFIG_ACPI_NUMA | 282 | #ifdef CONFIG_ACPI_NUMA |
253 | srat_reserve_add_area(nodeid); | 283 | srat_reserve_add_area(nodeid); |
254 | #endif | 284 | #endif |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f7823a172868..60bcb5b6a37e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
483 | goto out_unlock; | 483 | goto out_unlock; |
484 | 484 | ||
485 | pbase = (pte_t *)page_address(base); | 485 | pbase = (pte_t *)page_address(base); |
486 | #ifdef CONFIG_X86_32 | 486 | paravirt_alloc_pte(&init_mm, page_to_pfn(base)); |
487 | paravirt_alloc_pt(&init_mm, page_to_pfn(base)); | ||
488 | #endif | ||
489 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); | 487 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); |
490 | 488 | ||
491 | #ifdef CONFIG_X86_64 | 489 | #ifdef CONFIG_X86_64 |
@@ -779,14 +777,20 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages, | |||
779 | 777 | ||
780 | int _set_memory_uc(unsigned long addr, int numpages) | 778 | int _set_memory_uc(unsigned long addr, int numpages) |
781 | { | 779 | { |
780 | /* | ||
781 | * for now UC MINUS. see comments in ioremap_nocache() | ||
782 | */ | ||
782 | return change_page_attr_set(addr, numpages, | 783 | return change_page_attr_set(addr, numpages, |
783 | __pgprot(_PAGE_CACHE_UC)); | 784 | __pgprot(_PAGE_CACHE_UC_MINUS)); |
784 | } | 785 | } |
785 | 786 | ||
786 | int set_memory_uc(unsigned long addr, int numpages) | 787 | int set_memory_uc(unsigned long addr, int numpages) |
787 | { | 788 | { |
789 | /* | ||
790 | * for now UC MINUS. see comments in ioremap_nocache() | ||
791 | */ | ||
788 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 792 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, |
789 | _PAGE_CACHE_UC, NULL)) | 793 | _PAGE_CACHE_UC_MINUS, NULL)) |
790 | return -EINVAL; | 794 | return -EINVAL; |
791 | 795 | ||
792 | return _set_memory_uc(addr, numpages); | 796 | return _set_memory_uc(addr, numpages); |
@@ -993,7 +997,7 @@ static const struct file_operations dpa_fops = { | |||
993 | .release = single_release, | 997 | .release = single_release, |
994 | }; | 998 | }; |
995 | 999 | ||
996 | int __init debug_pagealloc_proc_init(void) | 1000 | static int __init debug_pagealloc_proc_init(void) |
997 | { | 1001 | { |
998 | struct dentry *de; | 1002 | struct dentry *de; |
999 | 1003 | ||
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 72c0f6097402..277446cd30b6 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -11,16 +11,19 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/gfp.h> | 12 | #include <linux/gfp.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/bootmem.h> | ||
14 | 15 | ||
15 | #include <asm/msr.h> | 16 | #include <asm/msr.h> |
16 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/page.h> | ||
18 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
19 | #include <asm/pat.h> | 21 | #include <asm/pat.h> |
20 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
21 | #include <asm/cacheflush.h> | 23 | #include <asm/cacheflush.h> |
22 | #include <asm/fcntl.h> | 24 | #include <asm/fcntl.h> |
23 | #include <asm/mtrr.h> | 25 | #include <asm/mtrr.h> |
26 | #include <asm/io.h> | ||
24 | 27 | ||
25 | int pat_wc_enabled = 1; | 28 | int pat_wc_enabled = 1; |
26 | 29 | ||
@@ -190,6 +193,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | |||
190 | return 0; | 193 | return 0; |
191 | } | 194 | } |
192 | 195 | ||
196 | /* | ||
197 | * req_type typically has one of the: | ||
198 | * - _PAGE_CACHE_WB | ||
199 | * - _PAGE_CACHE_WC | ||
200 | * - _PAGE_CACHE_UC_MINUS | ||
201 | * - _PAGE_CACHE_UC | ||
202 | * | ||
203 | * req_type will have a special case value '-1', when requester want to inherit | ||
204 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | ||
205 | * | ||
206 | * If ret_type is NULL, function will return an error if it cannot reserve the | ||
207 | * region with req_type. If ret_type is non-null, function will return | ||
208 | * available type in ret_type in case of no error. In case of any error | ||
209 | * it will return a negative return value. | ||
210 | */ | ||
193 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 211 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
194 | unsigned long *ret_type) | 212 | unsigned long *ret_type) |
195 | { | 213 | { |
@@ -200,9 +218,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
200 | 218 | ||
201 | /* Only track when pat_wc_enabled */ | 219 | /* Only track when pat_wc_enabled */ |
202 | if (!pat_wc_enabled) { | 220 | if (!pat_wc_enabled) { |
203 | if (ret_type) | 221 | /* This is identical to page table setting without PAT */ |
204 | *ret_type = req_type; | 222 | if (ret_type) { |
205 | 223 | if (req_type == -1) { | |
224 | *ret_type = _PAGE_CACHE_WB; | ||
225 | } else { | ||
226 | *ret_type = req_type; | ||
227 | } | ||
228 | } | ||
206 | return 0; | 229 | return 0; |
207 | } | 230 | } |
208 | 231 | ||
@@ -214,8 +237,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
214 | return 0; | 237 | return 0; |
215 | } | 238 | } |
216 | 239 | ||
217 | req_type &= _PAGE_CACHE_MASK; | 240 | if (req_type == -1) { |
218 | err = pat_x_mtrr_type(start, end, req_type, &actual_type); | 241 | /* |
242 | * Special case where caller wants to inherit from mtrr or | ||
243 | * existing pat mapping, defaulting to UC_MINUS in case of | ||
244 | * no match. | ||
245 | */ | ||
246 | u8 mtrr_type = mtrr_type_lookup(start, end); | ||
247 | if (mtrr_type == 0xFE) { /* MTRR match error */ | ||
248 | err = -1; | ||
249 | } | ||
250 | |||
251 | if (mtrr_type == MTRR_TYPE_WRBACK) { | ||
252 | req_type = _PAGE_CACHE_WB; | ||
253 | actual_type = _PAGE_CACHE_WB; | ||
254 | } else { | ||
255 | req_type = _PAGE_CACHE_UC_MINUS; | ||
256 | actual_type = _PAGE_CACHE_UC_MINUS; | ||
257 | } | ||
258 | } else { | ||
259 | req_type &= _PAGE_CACHE_MASK; | ||
260 | err = pat_x_mtrr_type(start, end, req_type, &actual_type); | ||
261 | } | ||
262 | |||
219 | if (err) { | 263 | if (err) { |
220 | if (ret_type) | 264 | if (ret_type) |
221 | *ret_type = actual_type; | 265 | *ret_type = actual_type; |
@@ -241,7 +285,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
241 | struct memtype *saved_ptr; | 285 | struct memtype *saved_ptr; |
242 | 286 | ||
243 | if (parse->start >= end) { | 287 | if (parse->start >= end) { |
244 | printk("New Entry\n"); | 288 | pr_debug("New Entry\n"); |
245 | list_add(&new_entry->nd, parse->nd.prev); | 289 | list_add(&new_entry->nd, parse->nd.prev); |
246 | new_entry = NULL; | 290 | new_entry = NULL; |
247 | break; | 291 | break; |
@@ -291,7 +335,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
291 | break; | 335 | break; |
292 | } | 336 | } |
293 | 337 | ||
294 | printk("Overlap at 0x%Lx-0x%Lx\n", | 338 | pr_debug("Overlap at 0x%Lx-0x%Lx\n", |
295 | saved_ptr->start, saved_ptr->end); | 339 | saved_ptr->start, saved_ptr->end); |
296 | /* No conflict. Go ahead and add this new entry */ | 340 | /* No conflict. Go ahead and add this new entry */ |
297 | list_add(&new_entry->nd, saved_ptr->nd.prev); | 341 | list_add(&new_entry->nd, saved_ptr->nd.prev); |
@@ -343,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
343 | break; | 387 | break; |
344 | } | 388 | } |
345 | 389 | ||
346 | printk("Overlap at 0x%Lx-0x%Lx\n", | 390 | pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", |
347 | saved_ptr->start, saved_ptr->end); | 391 | saved_ptr->start, saved_ptr->end); |
348 | /* No conflict. Go ahead and add this new entry */ | 392 | /* No conflict. Go ahead and add this new entry */ |
349 | list_add(&new_entry->nd, &saved_ptr->nd); | 393 | list_add(&new_entry->nd, &saved_ptr->nd); |
350 | new_entry = NULL; | 394 | new_entry = NULL; |
@@ -353,7 +397,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
353 | } | 397 | } |
354 | 398 | ||
355 | if (err) { | 399 | if (err) { |
356 | printk( | 400 | printk(KERN_INFO |
357 | "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", | 401 | "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", |
358 | start, end, cattr_name(new_entry->type), | 402 | start, end, cattr_name(new_entry->type), |
359 | cattr_name(req_type)); | 403 | cattr_name(req_type)); |
@@ -365,16 +409,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
365 | if (new_entry) { | 409 | if (new_entry) { |
366 | /* No conflict. Not yet added to the list. Add to the tail */ | 410 | /* No conflict. Not yet added to the list. Add to the tail */ |
367 | list_add_tail(&new_entry->nd, &memtype_list); | 411 | list_add_tail(&new_entry->nd, &memtype_list); |
368 | printk("New Entry\n"); | 412 | pr_debug("New Entry\n"); |
369 | } | 413 | } |
370 | 414 | ||
371 | if (ret_type) { | 415 | if (ret_type) { |
372 | printk( | 416 | pr_debug( |
373 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 417 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
374 | start, end, cattr_name(actual_type), | 418 | start, end, cattr_name(actual_type), |
375 | cattr_name(req_type), cattr_name(*ret_type)); | 419 | cattr_name(req_type), cattr_name(*ret_type)); |
376 | } else { | 420 | } else { |
377 | printk( | 421 | pr_debug( |
378 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", | 422 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", |
379 | start, end, cattr_name(actual_type), | 423 | start, end, cattr_name(actual_type), |
380 | cattr_name(req_type)); | 424 | cattr_name(req_type)); |
@@ -411,11 +455,142 @@ int free_memtype(u64 start, u64 end) | |||
411 | spin_unlock(&memtype_lock); | 455 | spin_unlock(&memtype_lock); |
412 | 456 | ||
413 | if (err) { | 457 | if (err) { |
414 | printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n", | 458 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", |
415 | current->comm, current->pid, start, end); | 459 | current->comm, current->pid, start, end); |
416 | } | 460 | } |
417 | 461 | ||
418 | printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end); | 462 | pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); |
419 | return err; | 463 | return err; |
420 | } | 464 | } |
421 | 465 | ||
466 | |||
467 | /* | ||
468 | * /dev/mem mmap interface. The memtype used for mapping varies: | ||
469 | * - Use UC for mappings with O_SYNC flag | ||
470 | * - Without O_SYNC flag, if there is any conflict in reserve_memtype, | ||
471 | * inherit the memtype from existing mapping. | ||
472 | * - Else use UC_MINUS memtype (for backward compatibility with existing | ||
473 | * X drivers. | ||
474 | */ | ||
475 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | ||
476 | unsigned long size, pgprot_t vma_prot) | ||
477 | { | ||
478 | return vma_prot; | ||
479 | } | ||
480 | |||
481 | #ifdef CONFIG_NONPROMISC_DEVMEM | ||
482 | /* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ | ||
483 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | ||
484 | { | ||
485 | return 1; | ||
486 | } | ||
487 | #else | ||
488 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | ||
489 | { | ||
490 | u64 from = ((u64)pfn) << PAGE_SHIFT; | ||
491 | u64 to = from + size; | ||
492 | u64 cursor = from; | ||
493 | |||
494 | while (cursor < to) { | ||
495 | if (!devmem_is_allowed(pfn)) { | ||
496 | printk(KERN_INFO | ||
497 | "Program %s tried to access /dev/mem between %Lx->%Lx.\n", | ||
498 | current->comm, from, to); | ||
499 | return 0; | ||
500 | } | ||
501 | cursor += PAGE_SIZE; | ||
502 | pfn++; | ||
503 | } | ||
504 | return 1; | ||
505 | } | ||
506 | #endif /* CONFIG_NONPROMISC_DEVMEM */ | ||
507 | |||
508 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | ||
509 | unsigned long size, pgprot_t *vma_prot) | ||
510 | { | ||
511 | u64 offset = ((u64) pfn) << PAGE_SHIFT; | ||
512 | unsigned long flags = _PAGE_CACHE_UC_MINUS; | ||
513 | int retval; | ||
514 | |||
515 | if (!range_is_allowed(pfn, size)) | ||
516 | return 0; | ||
517 | |||
518 | if (file->f_flags & O_SYNC) { | ||
519 | flags = _PAGE_CACHE_UC; | ||
520 | } | ||
521 | |||
522 | #ifdef CONFIG_X86_32 | ||
523 | /* | ||
524 | * On the PPro and successors, the MTRRs are used to set | ||
525 | * memory types for physical addresses outside main memory, | ||
526 | * so blindly setting UC or PWT on those pages is wrong. | ||
527 | * For Pentiums and earlier, the surround logic should disable | ||
528 | * caching for the high addresses through the KEN pin, but | ||
529 | * we maintain the tradition of paranoia in this code. | ||
530 | */ | ||
531 | if (!pat_wc_enabled && | ||
532 | ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || | ||
533 | test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || | ||
534 | test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || | ||
535 | test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && | ||
536 | (pfn << PAGE_SHIFT) >= __pa(high_memory)) { | ||
537 | flags = _PAGE_CACHE_UC; | ||
538 | } | ||
539 | #endif | ||
540 | |||
541 | /* | ||
542 | * With O_SYNC, we can only take UC mapping. Fail if we cannot. | ||
543 | * Without O_SYNC, we want to get | ||
544 | * - WB for WB-able memory and no other conflicting mappings | ||
545 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | ||
546 | * - Inherit from confliting mappings otherwise | ||
547 | */ | ||
548 | if (flags != _PAGE_CACHE_UC_MINUS) { | ||
549 | retval = reserve_memtype(offset, offset + size, flags, NULL); | ||
550 | } else { | ||
551 | retval = reserve_memtype(offset, offset + size, -1, &flags); | ||
552 | } | ||
553 | |||
554 | if (retval < 0) | ||
555 | return 0; | ||
556 | |||
557 | if (pfn <= max_pfn_mapped && | ||
558 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { | ||
559 | free_memtype(offset, offset + size); | ||
560 | printk(KERN_INFO | ||
561 | "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", | ||
562 | current->comm, current->pid, | ||
563 | cattr_name(flags), | ||
564 | offset, offset + size); | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | | ||
569 | flags); | ||
570 | return 1; | ||
571 | } | ||
572 | |||
573 | void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | ||
574 | { | ||
575 | u64 addr = (u64)pfn << PAGE_SHIFT; | ||
576 | unsigned long flags; | ||
577 | unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); | ||
578 | |||
579 | reserve_memtype(addr, addr + size, want_flags, &flags); | ||
580 | if (flags != want_flags) { | ||
581 | printk(KERN_INFO | ||
582 | "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", | ||
583 | current->comm, current->pid, | ||
584 | cattr_name(want_flags), | ||
585 | addr, addr + size, | ||
586 | cattr_name(flags)); | ||
587 | } | ||
588 | } | ||
589 | |||
590 | void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | ||
591 | { | ||
592 | u64 addr = (u64)pfn << PAGE_SHIFT; | ||
593 | |||
594 | free_memtype(addr, addr + size); | ||
595 | } | ||
596 | |||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c new file mode 100644 index 000000000000..50159764f694 --- /dev/null +++ b/arch/x86/mm/pgtable.c | |||
@@ -0,0 +1,276 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <asm/pgalloc.h> | ||
3 | #include <asm/pgtable.h> | ||
4 | #include <asm/tlb.h> | ||
5 | |||
6 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
7 | { | ||
8 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | ||
9 | } | ||
10 | |||
11 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
12 | { | ||
13 | struct page *pte; | ||
14 | |||
15 | #ifdef CONFIG_HIGHPTE | ||
16 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | ||
17 | #else | ||
18 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | ||
19 | #endif | ||
20 | if (pte) | ||
21 | pgtable_page_ctor(pte); | ||
22 | return pte; | ||
23 | } | ||
24 | |||
25 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | ||
26 | { | ||
27 | pgtable_page_dtor(pte); | ||
28 | paravirt_release_pte(page_to_pfn(pte)); | ||
29 | tlb_remove_page(tlb, pte); | ||
30 | } | ||
31 | |||
32 | #if PAGETABLE_LEVELS > 2 | ||
33 | void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | ||
34 | { | ||
35 | paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); | ||
36 | tlb_remove_page(tlb, virt_to_page(pmd)); | ||
37 | } | ||
38 | |||
39 | #if PAGETABLE_LEVELS > 3 | ||
40 | void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | ||
41 | { | ||
42 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | ||
43 | tlb_remove_page(tlb, virt_to_page(pud)); | ||
44 | } | ||
45 | #endif /* PAGETABLE_LEVELS > 3 */ | ||
46 | #endif /* PAGETABLE_LEVELS > 2 */ | ||
47 | |||
48 | static inline void pgd_list_add(pgd_t *pgd) | ||
49 | { | ||
50 | struct page *page = virt_to_page(pgd); | ||
51 | |||
52 | list_add(&page->lru, &pgd_list); | ||
53 | } | ||
54 | |||
55 | static inline void pgd_list_del(pgd_t *pgd) | ||
56 | { | ||
57 | struct page *page = virt_to_page(pgd); | ||
58 | |||
59 | list_del(&page->lru); | ||
60 | } | ||
61 | |||
62 | #define UNSHARED_PTRS_PER_PGD \ | ||
63 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | ||
64 | |||
65 | static void pgd_ctor(void *p) | ||
66 | { | ||
67 | pgd_t *pgd = p; | ||
68 | unsigned long flags; | ||
69 | |||
70 | /* Clear usermode parts of PGD */ | ||
71 | memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); | ||
72 | |||
73 | spin_lock_irqsave(&pgd_lock, flags); | ||
74 | |||
75 | /* If the pgd points to a shared pagetable level (either the | ||
76 | ptes in non-PAE, or shared PMD in PAE), then just copy the | ||
77 | references from swapper_pg_dir. */ | ||
78 | if (PAGETABLE_LEVELS == 2 || | ||
79 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || | ||
80 | PAGETABLE_LEVELS == 4) { | ||
81 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | ||
82 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
83 | KERNEL_PGD_PTRS); | ||
84 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
85 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
86 | KERNEL_PGD_BOUNDARY, | ||
87 | KERNEL_PGD_PTRS); | ||
88 | } | ||
89 | |||
90 | /* list required to sync kernel mapping updates */ | ||
91 | if (!SHARED_KERNEL_PMD) | ||
92 | pgd_list_add(pgd); | ||
93 | |||
94 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
95 | } | ||
96 | |||
97 | static void pgd_dtor(void *pgd) | ||
98 | { | ||
99 | unsigned long flags; /* can be called from interrupt context */ | ||
100 | |||
101 | if (SHARED_KERNEL_PMD) | ||
102 | return; | ||
103 | |||
104 | spin_lock_irqsave(&pgd_lock, flags); | ||
105 | pgd_list_del(pgd); | ||
106 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * List of all pgd's needed for non-PAE so it can invalidate entries | ||
111 | * in both cached and uncached pgd's; not needed for PAE since the | ||
112 | * kernel pmd is shared. If PAE were not to share the pmd a similar | ||
113 | * tactic would be needed. This is essentially codepath-based locking | ||
114 | * against pageattr.c; it is the unique case in which a valid change | ||
115 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
116 | * vmalloc faults work because attached pagetables are never freed. | ||
117 | * -- wli | ||
118 | */ | ||
119 | |||
120 | #ifdef CONFIG_X86_PAE | ||
121 | /* | ||
122 | * Mop up any pmd pages which may still be attached to the pgd. | ||
123 | * Normally they will be freed by munmap/exit_mmap, but any pmd we | ||
124 | * preallocate which never got a corresponding vma will need to be | ||
125 | * freed manually. | ||
126 | */ | ||
127 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
128 | { | ||
129 | int i; | ||
130 | |||
131 | for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | ||
132 | pgd_t pgd = pgdp[i]; | ||
133 | |||
134 | if (pgd_val(pgd) != 0) { | ||
135 | pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | ||
136 | |||
137 | pgdp[i] = native_make_pgd(0); | ||
138 | |||
139 | paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); | ||
140 | pmd_free(mm, pmd); | ||
141 | } | ||
142 | } | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | ||
147 | * updating the top-level pagetable entries to guarantee the | ||
148 | * processor notices the update. Since this is expensive, and | ||
149 | * all 4 top-level entries are used almost immediately in a | ||
150 | * new process's life, we just pre-populate them here. | ||
151 | * | ||
152 | * Also, if we're in a paravirt environment where the kernel pmd is | ||
153 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | ||
154 | * and initialize the kernel pmds here. | ||
155 | */ | ||
156 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
157 | { | ||
158 | pud_t *pud; | ||
159 | unsigned long addr; | ||
160 | int i; | ||
161 | |||
162 | pud = pud_offset(pgd, 0); | ||
163 | for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | ||
164 | i++, pud++, addr += PUD_SIZE) { | ||
165 | pmd_t *pmd = pmd_alloc_one(mm, addr); | ||
166 | |||
167 | if (!pmd) { | ||
168 | pgd_mop_up_pmds(mm, pgd); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | if (i >= KERNEL_PGD_BOUNDARY) | ||
173 | memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
174 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
175 | |||
176 | pud_populate(mm, pud, pmd); | ||
177 | } | ||
178 | |||
179 | return 1; | ||
180 | } | ||
181 | |||
182 | void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | ||
183 | { | ||
184 | paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
185 | |||
186 | /* Note: almost everything apart from _PAGE_PRESENT is | ||
187 | reserved at the pmd (PDPT) level. */ | ||
188 | set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); | ||
189 | |||
190 | /* | ||
191 | * According to Intel App note "TLBs, Paging-Structure Caches, | ||
192 | * and Their Invalidation", April 2007, document 317080-001, | ||
193 | * section 8.1: in PAE mode we explicitly have to flush the | ||
194 | * TLB via cr3 if the top-level pgd is changed... | ||
195 | */ | ||
196 | if (mm == current->active_mm) | ||
197 | write_cr3(read_cr3()); | ||
198 | } | ||
199 | #else /* !CONFIG_X86_PAE */ | ||
200 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | ||
201 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
202 | { | ||
203 | return 1; | ||
204 | } | ||
205 | |||
206 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) | ||
207 | { | ||
208 | } | ||
209 | #endif /* CONFIG_X86_PAE */ | ||
210 | |||
211 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
212 | { | ||
213 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
214 | |||
215 | /* so that alloc_pmd can use it */ | ||
216 | mm->pgd = pgd; | ||
217 | if (pgd) | ||
218 | pgd_ctor(pgd); | ||
219 | |||
220 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | ||
221 | pgd_dtor(pgd); | ||
222 | free_page((unsigned long)pgd); | ||
223 | pgd = NULL; | ||
224 | } | ||
225 | |||
226 | return pgd; | ||
227 | } | ||
228 | |||
229 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
230 | { | ||
231 | pgd_mop_up_pmds(mm, pgd); | ||
232 | pgd_dtor(pgd); | ||
233 | free_page((unsigned long)pgd); | ||
234 | } | ||
235 | |||
236 | int ptep_set_access_flags(struct vm_area_struct *vma, | ||
237 | unsigned long address, pte_t *ptep, | ||
238 | pte_t entry, int dirty) | ||
239 | { | ||
240 | int changed = !pte_same(*ptep, entry); | ||
241 | |||
242 | if (changed && dirty) { | ||
243 | *ptep = entry; | ||
244 | pte_update_defer(vma->vm_mm, address, ptep); | ||
245 | flush_tlb_page(vma, address); | ||
246 | } | ||
247 | |||
248 | return changed; | ||
249 | } | ||
250 | |||
251 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | ||
252 | unsigned long addr, pte_t *ptep) | ||
253 | { | ||
254 | int ret = 0; | ||
255 | |||
256 | if (pte_young(*ptep)) | ||
257 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
258 | &ptep->pte); | ||
259 | |||
260 | if (ret) | ||
261 | pte_update(vma->vm_mm, addr, ptep); | ||
262 | |||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | int ptep_clear_flush_young(struct vm_area_struct *vma, | ||
267 | unsigned long address, pte_t *ptep) | ||
268 | { | ||
269 | int young; | ||
270 | |||
271 | young = ptep_test_and_clear_young(vma, address, ptep); | ||
272 | if (young) | ||
273 | flush_tlb_page(vma, address); | ||
274 | |||
275 | return young; | ||
276 | } | ||
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6fb9e7c6893f..9ee007be9142 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve) | |||
173 | __VMALLOC_RESERVE += reserve; | 173 | __VMALLOC_RESERVE += reserve; |
174 | } | 174 | } |
175 | 175 | ||
176 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | ||
177 | { | ||
178 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | ||
179 | } | ||
180 | |||
181 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | ||
182 | { | ||
183 | struct page *pte; | ||
184 | |||
185 | #ifdef CONFIG_HIGHPTE | ||
186 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | ||
187 | #else | ||
188 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | ||
189 | #endif | ||
190 | if (pte) | ||
191 | pgtable_page_ctor(pte); | ||
192 | return pte; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * List of all pgd's needed for non-PAE so it can invalidate entries | ||
197 | * in both cached and uncached pgd's; not needed for PAE since the | ||
198 | * kernel pmd is shared. If PAE were not to share the pmd a similar | ||
199 | * tactic would be needed. This is essentially codepath-based locking | ||
200 | * against pageattr.c; it is the unique case in which a valid change | ||
201 | * of kernel pagetables can't be lazily synchronized by vmalloc faults. | ||
202 | * vmalloc faults work because attached pagetables are never freed. | ||
203 | * -- wli | ||
204 | */ | ||
205 | static inline void pgd_list_add(pgd_t *pgd) | ||
206 | { | ||
207 | struct page *page = virt_to_page(pgd); | ||
208 | |||
209 | list_add(&page->lru, &pgd_list); | ||
210 | } | ||
211 | |||
212 | static inline void pgd_list_del(pgd_t *pgd) | ||
213 | { | ||
214 | struct page *page = virt_to_page(pgd); | ||
215 | |||
216 | list_del(&page->lru); | ||
217 | } | ||
218 | |||
219 | #define UNSHARED_PTRS_PER_PGD \ | ||
220 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
221 | |||
222 | static void pgd_ctor(void *p) | ||
223 | { | ||
224 | pgd_t *pgd = p; | ||
225 | unsigned long flags; | ||
226 | |||
227 | /* Clear usermode parts of PGD */ | ||
228 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
229 | |||
230 | spin_lock_irqsave(&pgd_lock, flags); | ||
231 | |||
232 | /* If the pgd points to a shared pagetable level (either the | ||
233 | ptes in non-PAE, or shared PMD in PAE), then just copy the | ||
234 | references from swapper_pg_dir. */ | ||
235 | if (PAGETABLE_LEVELS == 2 || | ||
236 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { | ||
237 | clone_pgd_range(pgd + USER_PTRS_PER_PGD, | ||
238 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
239 | KERNEL_PGD_PTRS); | ||
240 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
241 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
242 | USER_PTRS_PER_PGD, | ||
243 | KERNEL_PGD_PTRS); | ||
244 | } | ||
245 | |||
246 | /* list required to sync kernel mapping updates */ | ||
247 | if (!SHARED_KERNEL_PMD) | ||
248 | pgd_list_add(pgd); | ||
249 | |||
250 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
251 | } | ||
252 | |||
253 | static void pgd_dtor(void *pgd) | ||
254 | { | ||
255 | unsigned long flags; /* can be called from interrupt context */ | ||
256 | |||
257 | if (SHARED_KERNEL_PMD) | ||
258 | return; | ||
259 | |||
260 | spin_lock_irqsave(&pgd_lock, flags); | ||
261 | pgd_list_del(pgd); | ||
262 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
263 | } | ||
264 | |||
265 | #ifdef CONFIG_X86_PAE | ||
266 | /* | ||
267 | * Mop up any pmd pages which may still be attached to the pgd. | ||
268 | * Normally they will be freed by munmap/exit_mmap, but any pmd we | ||
269 | * preallocate which never got a corresponding vma will need to be | ||
270 | * freed manually. | ||
271 | */ | ||
272 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
273 | { | ||
274 | int i; | ||
275 | |||
276 | for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { | ||
277 | pgd_t pgd = pgdp[i]; | ||
278 | |||
279 | if (pgd_val(pgd) != 0) { | ||
280 | pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); | ||
281 | |||
282 | pgdp[i] = native_make_pgd(0); | ||
283 | |||
284 | paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); | ||
285 | pmd_free(mm, pmd); | ||
286 | } | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | ||
292 | * updating the top-level pagetable entries to guarantee the | ||
293 | * processor notices the update. Since this is expensive, and | ||
294 | * all 4 top-level entries are used almost immediately in a | ||
295 | * new process's life, we just pre-populate them here. | ||
296 | * | ||
297 | * Also, if we're in a paravirt environment where the kernel pmd is | ||
298 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | ||
299 | * and initialize the kernel pmds here. | ||
300 | */ | ||
301 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
302 | { | ||
303 | pud_t *pud; | ||
304 | unsigned long addr; | ||
305 | int i; | ||
306 | |||
307 | pud = pud_offset(pgd, 0); | ||
308 | for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | ||
309 | i++, pud++, addr += PUD_SIZE) { | ||
310 | pmd_t *pmd = pmd_alloc_one(mm, addr); | ||
311 | |||
312 | if (!pmd) { | ||
313 | pgd_mop_up_pmds(mm, pgd); | ||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | if (i >= USER_PTRS_PER_PGD) | ||
318 | memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
319 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
320 | |||
321 | pud_populate(mm, pud, pmd); | ||
322 | } | ||
323 | |||
324 | return 1; | ||
325 | } | ||
326 | #else /* !CONFIG_X86_PAE */ | ||
327 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | ||
328 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
329 | { | ||
330 | return 1; | ||
331 | } | ||
332 | |||
333 | static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | ||
334 | { | ||
335 | } | ||
336 | #endif /* CONFIG_X86_PAE */ | ||
337 | |||
338 | pgd_t *pgd_alloc(struct mm_struct *mm) | ||
339 | { | ||
340 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
341 | |||
342 | /* so that alloc_pd can use it */ | ||
343 | mm->pgd = pgd; | ||
344 | if (pgd) | ||
345 | pgd_ctor(pgd); | ||
346 | |||
347 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | ||
348 | pgd_dtor(pgd); | ||
349 | free_page((unsigned long)pgd); | ||
350 | pgd = NULL; | ||
351 | } | ||
352 | |||
353 | return pgd; | ||
354 | } | ||
355 | |||
356 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
357 | { | ||
358 | pgd_mop_up_pmds(mm, pgd); | ||
359 | pgd_dtor(pgd); | ||
360 | free_page((unsigned long)pgd); | ||
361 | } | ||
362 | |||
363 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | ||
364 | { | ||
365 | pgtable_page_dtor(pte); | ||
366 | paravirt_release_pt(page_to_pfn(pte)); | ||
367 | tlb_remove_page(tlb, pte); | ||
368 | } | ||
369 | |||
370 | #ifdef CONFIG_X86_PAE | ||
371 | |||
372 | void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | ||
373 | { | ||
374 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
375 | tlb_remove_page(tlb, virt_to_page(pmd)); | ||
376 | } | ||
377 | |||
378 | #endif | ||
379 | |||
380 | int pmd_bad(pmd_t pmd) | 176 | int pmd_bad(pmd_t pmd) |
381 | { | 177 | { |
382 | WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); | 178 | WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index fb43d89f46f3..3890234e5b26 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -163,7 +163,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
163 | pxm, apic_id, node); | 163 | pxm, apic_id, node); |
164 | } | 164 | } |
165 | 165 | ||
166 | int update_end_of_memory(unsigned long end) {return -1;} | 166 | static int update_end_of_memory(unsigned long end) {return -1;} |
167 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | 167 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} |
168 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 168 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
169 | static inline int save_add_info(void) {return 1;} | 169 | static inline int save_add_info(void) {return 1;} |