aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init_32.c35
-rw-r--r--arch/x86/mm/init_64.c77
-rw-r--r--arch/x86/mm/iomap_32.c59
-rw-r--r--arch/x86/mm/ioremap.c25
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/mm/pageattr.c8
-rw-r--r--arch/x86/mm/pat.c240
10 files changed, 432 insertions, 62 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 59f89b434b45..fea4565ff576 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,7 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o gup.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
5 5
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
7obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o 7obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..20ef272c412c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 4ba373c5b8c8..be54176e9eb2 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
233 len = (unsigned long) nr_pages << PAGE_SHIFT; 233 len = (unsigned long) nr_pages << PAGE_SHIFT;
234 end = start + len; 234 end = start + len;
235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 235 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
236 start, len))) 236 (void __user *)start, len)))
237 goto slow_irqon; 237 goto slow_irqon;
238 238
239 /* 239 /*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..800e1d94c1b5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static unsigned long __meminitdata table_top;
67 67
68static int __initdata after_init_bootmem; 68static int __initdata after_init_bootmem;
69 69
70static __init void *alloc_low_page(unsigned long *phys) 70static __init void *alloc_low_page(void)
71{ 71{
72 unsigned long pfn = table_end++; 72 unsigned long pfn = table_end++;
73 void *adr; 73 void *adr;
@@ -77,7 +77,6 @@ static __init void *alloc_low_page(unsigned long *phys)
77 77
78 adr = __va(pfn * PAGE_SIZE); 78 adr = __va(pfn * PAGE_SIZE);
79 memset(adr, 0, PAGE_SIZE); 79 memset(adr, 0, PAGE_SIZE);
80 *phys = pfn * PAGE_SIZE;
81 return adr; 80 return adr;
82} 81}
83 82
@@ -92,16 +91,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
92 pmd_t *pmd_table; 91 pmd_t *pmd_table;
93 92
94#ifdef CONFIG_X86_PAE 93#ifdef CONFIG_X86_PAE
95 unsigned long phys;
96 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 94 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
97 if (after_init_bootmem) 95 if (after_init_bootmem)
98 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 96 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
99 else 97 else
100 pmd_table = (pmd_t *)alloc_low_page(&phys); 98 pmd_table = (pmd_t *)alloc_low_page();
101 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 99 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 100 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 101 pud = pud_offset(pgd, 0);
104 BUG_ON(pmd_table != pmd_offset(pud, 0)); 102 BUG_ON(pmd_table != pmd_offset(pud, 0));
103
104 return pmd_table;
105 } 105 }
106#endif 106#endif
107 pud = pud_offset(pgd, 0); 107 pud = pud_offset(pgd, 0);
@@ -126,10 +126,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
126 if (!page_table) 126 if (!page_table)
127 page_table = 127 page_table =
128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
129 } else { 129 } else
130 unsigned long phys; 130 page_table = (pte_t *)alloc_low_page();
131 page_table = (pte_t *)alloc_low_page(&phys);
132 }
133 131
134 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 132 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
135 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 133 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -334,7 +332,6 @@ int devmem_is_allowed(unsigned long pagenr)
334 return 0; 332 return 0;
335} 333}
336 334
337#ifdef CONFIG_HIGHMEM
338pte_t *kmap_pte; 335pte_t *kmap_pte;
339pgprot_t kmap_prot; 336pgprot_t kmap_prot;
340 337
@@ -357,6 +354,7 @@ static void __init kmap_init(void)
357 kmap_prot = PAGE_KERNEL; 354 kmap_prot = PAGE_KERNEL;
358} 355}
359 356
357#ifdef CONFIG_HIGHMEM
360static void __init permanent_kmaps_init(pgd_t *pgd_base) 358static void __init permanent_kmaps_init(pgd_t *pgd_base)
361{ 359{
362 unsigned long vaddr; 360 unsigned long vaddr;
@@ -436,7 +434,6 @@ static void __init set_highmem_pages_init(void)
436#endif /* !CONFIG_NUMA */ 434#endif /* !CONFIG_NUMA */
437 435
438#else 436#else
439# define kmap_init() do { } while (0)
440# define permanent_kmaps_init(pgd_base) do { } while (0) 437# define permanent_kmaps_init(pgd_base) do { } while (0)
441# define set_highmem_pages_init() do { } while (0) 438# define set_highmem_pages_init() do { } while (0)
442#endif /* CONFIG_HIGHMEM */ 439#endif /* CONFIG_HIGHMEM */
@@ -970,8 +967,6 @@ void __init mem_init(void)
970 int codesize, reservedpages, datasize, initsize; 967 int codesize, reservedpages, datasize, initsize;
971 int tmp; 968 int tmp;
972 969
973 start_periodic_check_for_corruption();
974
975#ifdef CONFIG_FLATMEM 970#ifdef CONFIG_FLATMEM
976 BUG_ON(!mem_map); 971 BUG_ON(!mem_map);
977#endif 972#endif
@@ -1041,11 +1036,25 @@ void __init mem_init(void)
1041 (unsigned long)&_text, (unsigned long)&_etext, 1036 (unsigned long)&_text, (unsigned long)&_etext,
1042 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 1037 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
1043 1038
1039 /*
1040 * Check boundaries twice: Some fundamental inconsistencies can
1041 * be detected at build time already.
1042 */
1043#define __FIXADDR_TOP (-PAGE_SIZE)
1044#ifdef CONFIG_HIGHMEM
1045 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1046 BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
1047#endif
1048#define high_memory (-128UL << 20)
1049 BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
1050#undef high_memory
1051#undef __FIXADDR_TOP
1052
1044#ifdef CONFIG_HIGHMEM 1053#ifdef CONFIG_HIGHMEM
1045 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 1054 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1046 BUG_ON(VMALLOC_END > PKMAP_BASE); 1055 BUG_ON(VMALLOC_END > PKMAP_BASE);
1047#endif 1056#endif
1048 BUG_ON(VMALLOC_START > VMALLOC_END); 1057 BUG_ON(VMALLOC_START >= VMALLOC_END);
1049 BUG_ON((unsigned long)high_memory > VMALLOC_START); 1058 BUG_ON((unsigned long)high_memory > VMALLOC_START);
1050 1059
1051 if (boot_cpu_data.wp_works_ok < 0) 1060 if (boot_cpu_data.wp_works_ok < 0)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d49412..9f7a0d24d42a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
350 * pagetable pages as RO. So assume someone who pre-setup 350 * pagetable pages as RO. So assume someone who pre-setup
351 * these mappings are more intelligent. 351 * these mappings are more intelligent.
352 */ 352 */
353 if (pte_val(*pte)) 353 if (pte_val(*pte)) {
354 pages++;
354 continue; 355 continue;
356 }
355 357
356 if (0) 358 if (0)
357 printk(" pte=%p addr=%lx pte=%016lx\n", 359 printk(" pte=%p addr=%lx pte=%016lx\n",
@@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
418 * not differ with respect to page frame and 420 * not differ with respect to page frame and
419 * attributes. 421 * attributes.
420 */ 422 */
421 if (page_size_mask & (1 << PG_LEVEL_2M)) 423 if (page_size_mask & (1 << PG_LEVEL_2M)) {
424 pages++;
422 continue; 425 continue;
426 }
423 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 427 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
424 } 428 }
425 429
@@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
499 * not differ with respect to page frame and 503 * not differ with respect to page frame and
500 * attributes. 504 * attributes.
501 */ 505 */
502 if (page_size_mask & (1 << PG_LEVEL_1G)) 506 if (page_size_mask & (1 << PG_LEVEL_1G)) {
507 pages++;
503 continue; 508 continue;
509 }
504 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 510 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
505 } 511 }
506 512
@@ -665,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
665 unsigned long last_map_addr = 0; 671 unsigned long last_map_addr = 0;
666 unsigned long page_size_mask = 0; 672 unsigned long page_size_mask = 0;
667 unsigned long start_pfn, end_pfn; 673 unsigned long start_pfn, end_pfn;
674 unsigned long pos;
668 675
669 struct map_range mr[NR_RANGE_MR]; 676 struct map_range mr[NR_RANGE_MR];
670 int nr_range, i; 677 int nr_range, i;
671 int use_pse, use_gbpages; 678 int use_pse, use_gbpages;
672 679
673 printk(KERN_INFO "init_memory_mapping\n"); 680 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
674 681
675 /* 682 /*
676 * Find space for the kernel direct mapping tables. 683 * Find space for the kernel direct mapping tables.
@@ -704,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
704 711
705 /* head if not big page alignment ?*/ 712 /* head if not big page alignment ?*/
706 start_pfn = start >> PAGE_SHIFT; 713 start_pfn = start >> PAGE_SHIFT;
707 end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) 714 pos = start_pfn << PAGE_SHIFT;
715 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
708 << (PMD_SHIFT - PAGE_SHIFT); 716 << (PMD_SHIFT - PAGE_SHIFT);
709 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 717 if (start_pfn < end_pfn) {
718 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
719 pos = end_pfn << PAGE_SHIFT;
720 }
710 721
711 /* big page (2M) range*/ 722 /* big page (2M) range*/
712 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) 723 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
713 << (PMD_SHIFT - PAGE_SHIFT); 724 << (PMD_SHIFT - PAGE_SHIFT);
714 end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) 725 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
715 << (PUD_SHIFT - PAGE_SHIFT); 726 << (PUD_SHIFT - PAGE_SHIFT);
716 if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) 727 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
717 end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); 728 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
718 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 729 if (start_pfn < end_pfn) {
719 page_size_mask & (1<<PG_LEVEL_2M)); 730 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
731 page_size_mask & (1<<PG_LEVEL_2M));
732 pos = end_pfn << PAGE_SHIFT;
733 }
720 734
721 /* big page (1G) range */ 735 /* big page (1G) range */
722 start_pfn = end_pfn; 736 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
723 end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); 737 << (PUD_SHIFT - PAGE_SHIFT);
724 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 738 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
739 if (start_pfn < end_pfn) {
740 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
725 page_size_mask & 741 page_size_mask &
726 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); 742 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
743 pos = end_pfn << PAGE_SHIFT;
744 }
727 745
728 /* tail is not big page (1G) alignment */ 746 /* tail is not big page (1G) alignment */
729 start_pfn = end_pfn; 747 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
730 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); 748 << (PMD_SHIFT - PAGE_SHIFT);
731 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 749 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
732 page_size_mask & (1<<PG_LEVEL_2M)); 750 if (start_pfn < end_pfn) {
751 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
752 page_size_mask & (1<<PG_LEVEL_2M));
753 pos = end_pfn << PAGE_SHIFT;
754 }
733 755
734 /* tail is not big page (2M) alignment */ 756 /* tail is not big page (2M) alignment */
735 start_pfn = end_pfn; 757 start_pfn = pos>>PAGE_SHIFT;
736 end_pfn = end>>PAGE_SHIFT; 758 end_pfn = end>>PAGE_SHIFT;
737 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 759 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
738 760
@@ -831,12 +853,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
831 unsigned long nr_pages = size >> PAGE_SHIFT; 853 unsigned long nr_pages = size >> PAGE_SHIFT;
832 int ret; 854 int ret;
833 855
834 last_mapped_pfn = init_memory_mapping(start, start + size-1); 856 last_mapped_pfn = init_memory_mapping(start, start + size);
835 if (last_mapped_pfn > max_pfn_mapped) 857 if (last_mapped_pfn > max_pfn_mapped)
836 max_pfn_mapped = last_mapped_pfn; 858 max_pfn_mapped = last_mapped_pfn;
837 859
838 ret = __add_pages(zone, start_pfn, nr_pages); 860 ret = __add_pages(zone, start_pfn, nr_pages);
839 WARN_ON(1); 861 WARN_ON_ONCE(ret);
840 862
841 return ret; 863 return ret;
842} 864}
@@ -878,8 +900,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
878void __init mem_init(void) 900void __init mem_init(void)
879{ 901{
880 long codesize, reservedpages, datasize, initsize; 902 long codesize, reservedpages, datasize, initsize;
881 903 unsigned long absent_pages;
882 start_periodic_check_for_corruption();
883 904
884 pci_iommu_alloc(); 905 pci_iommu_alloc();
885 906
@@ -893,8 +914,9 @@ void __init mem_init(void)
893#else 914#else
894 totalram_pages = free_all_bootmem(); 915 totalram_pages = free_all_bootmem();
895#endif 916#endif
896 reservedpages = max_pfn - totalram_pages - 917
897 absent_pages_in_range(0, max_pfn); 918 absent_pages = absent_pages_in_range(0, max_pfn);
919 reservedpages = max_pfn - totalram_pages - absent_pages;
898 after_bootmem = 1; 920 after_bootmem = 1;
899 921
900 codesize = (unsigned long) &_etext - (unsigned long) &_text; 922 codesize = (unsigned long) &_etext - (unsigned long) &_text;
@@ -911,10 +933,11 @@ void __init mem_init(void)
911 VSYSCALL_END - VSYSCALL_START); 933 VSYSCALL_END - VSYSCALL_START);
912 934
913 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 935 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
914 "%ldk reserved, %ldk data, %ldk init)\n", 936 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
915 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 937 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
916 max_pfn << (PAGE_SHIFT-10), 938 max_pfn << (PAGE_SHIFT-10),
917 codesize >> 10, 939 codesize >> 10,
940 absent_pages << (PAGE_SHIFT-10),
918 reservedpages << (PAGE_SHIFT-10), 941 reservedpages << (PAGE_SHIFT-10),
919 datasize >> 10, 942 datasize >> 10,
920 initsize >> 10); 943 initsize >> 10);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
new file mode 100644
index 000000000000..d0151d8ce452
--- /dev/null
+++ b/arch/x86/mm/iomap_32.c
@@ -0,0 +1,59 @@
1/*
2 * Copyright © 2008 Ingo Molnar
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
17 */
18
19#include <asm/iomap.h>
20#include <linux/module.h>
21
22/* Map 'pfn' using fixed map 'type' and protections 'prot'
23 */
24void *
25iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
26{
27 enum fixed_addresses idx;
28 unsigned long vaddr;
29
30 pagefault_disable();
31
32 idx = type + KM_TYPE_NR*smp_processor_id();
33 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
34 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
35 arch_flush_lazy_mmu_mode();
36
37 return (void*) vaddr;
38}
39EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
40
41void
42iounmap_atomic(void *kvaddr, enum km_type type)
43{
44 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
45 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
46
47 /*
48 * Force other mappings to Oops if they'll try to access this pte
49 * without first remap it. Keeping stale mappings around is a bad idea
50 * also, in case the page changes cacheability attributes or becomes
51 * a protected page in a hypervisor.
52 */
53 if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
54 kpte_clear_flush(kmap_pte-idx, vaddr);
55
56 arch_flush_lazy_mmu_mode();
57 pagefault_enable();
58}
59EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ae71e11eb3e5..bd85d42819e1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
223 * Check if the request spans more than any BAR in the iomem resource 223 * Check if the request spans more than any BAR in the iomem resource
224 * tree. 224 * tree.
225 */ 225 */
226 WARN_ON(iomem_map_sanity_check(phys_addr, size)); 226 WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
227 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
227 228
228 /* 229 /*
229 * Don't allow anybody to remap normal RAM that we're using.. 230 * Don't allow anybody to remap normal RAM that we're using..
@@ -387,7 +388,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
387 unsigned long size) 388 unsigned long size)
388{ 389{
389 unsigned long flags; 390 unsigned long flags;
390 void *ret; 391 void __iomem *ret;
391 int err; 392 int err;
392 393
393 /* 394 /*
@@ -399,11 +400,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
399 if (err < 0) 400 if (err < 0)
400 return NULL; 401 return NULL;
401 402
402 ret = (void *) __ioremap_caller(phys_addr, size, flags, 403 ret = __ioremap_caller(phys_addr, size, flags,
403 __builtin_return_address(0)); 404 __builtin_return_address(0));
404 405
405 free_memtype(phys_addr, phys_addr + size); 406 free_memtype(phys_addr, phys_addr + size);
406 return (void __iomem *)ret; 407 return ret;
407} 408}
408 409
409void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, 410void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
@@ -622,7 +623,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
622 __early_set_fixmap(idx, 0, __pgprot(0)); 623 __early_set_fixmap(idx, 0, __pgprot(0));
623} 624}
624 625
625static void *prev_map[FIX_BTMAPS_SLOTS] __initdata; 626static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
626static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 627static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
627static int __init check_early_ioremap_leak(void) 628static int __init check_early_ioremap_leak(void)
628{ 629{
@@ -645,7 +646,7 @@ static int __init check_early_ioremap_leak(void)
645} 646}
646late_initcall(check_early_ioremap_leak); 647late_initcall(check_early_ioremap_leak);
647 648
648static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 649static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
649{ 650{
650 unsigned long offset, last_addr; 651 unsigned long offset, last_addr;
651 unsigned int nrpages; 652 unsigned int nrpages;
@@ -713,23 +714,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size,
713 if (early_ioremap_debug) 714 if (early_ioremap_debug)
714 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); 715 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
715 716
716 prev_map[slot] = (void *) (offset + fix_to_virt(idx0)); 717 prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
717 return prev_map[slot]; 718 return prev_map[slot];
718} 719}
719 720
720/* Remap an IO device */ 721/* Remap an IO device */
721void __init *early_ioremap(unsigned long phys_addr, unsigned long size) 722void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
722{ 723{
723 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); 724 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
724} 725}
725 726
726/* Remap memory */ 727/* Remap memory */
727void __init *early_memremap(unsigned long phys_addr, unsigned long size) 728void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
728{ 729{
729 return __early_ioremap(phys_addr, size, PAGE_KERNEL); 730 return __early_ioremap(phys_addr, size, PAGE_KERNEL);
730} 731}
731 732
732void __init early_iounmap(void *addr, unsigned long size) 733void __init early_iounmap(void __iomem *addr, unsigned long size)
733{ 734{
734 unsigned long virt_addr; 735 unsigned long virt_addr;
735 unsigned long offset; 736 unsigned long offset;
@@ -779,7 +780,7 @@ void __init early_iounmap(void *addr, unsigned long size)
779 --idx; 780 --idx;
780 --nrpages; 781 --nrpages;
781 } 782 }
782 prev_map[slot] = 0; 783 prev_map[slot] = NULL;
783} 784}
784 785
785void __this_fixmap_does_not_exist(void) 786void __this_fixmap_does_not_exist(void)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..8518c678d83f 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
222 } 222 }
223} 223}
224 224
225#ifdef CONFIG_HIBERNATION
226/**
227 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
228 * during resume from hibernation
229 * @pgd_base - temporary resume page directory
230 */
231void resume_map_numa_kva(pgd_t *pgd_base)
232{
233 int node;
234
235 for_each_online_node(node) {
236 unsigned long start_va, start_pfn, size, pfn;
237
238 start_va = (unsigned long)node_remap_start_vaddr[node];
239 start_pfn = node_remap_start_pfn[node];
240 size = node_remap_size[node];
241
242 printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
243
244 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
245 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
246 pgd_t *pgd = pgd_base + pgd_index(vaddr);
247 pud_t *pud = pud_offset(pgd, vaddr);
248 pmd_t *pmd = pmd_offset(pud, vaddr);
249
250 set_pmd(pmd, pfn_pmd(start_pfn + pfn,
251 PAGE_KERNEL_LARGE_EXEC));
252
253 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
254 __FUNCTION__, vaddr, start_pfn + pfn);
255 }
256 }
257}
258#endif
259
225static unsigned long calculate_numa_remap_pages(void) 260static unsigned long calculate_numa_remap_pages(void)
226{ 261{
227 int nid; 262 int nid;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f1dc1b75d166..e89d24815f26 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -67,18 +67,18 @@ static void split_page_count(int level)
67 67
68void arch_report_meminfo(struct seq_file *m) 68void arch_report_meminfo(struct seq_file *m)
69{ 69{
70 seq_printf(m, "DirectMap4k: %8lu kB\n", 70 seq_printf(m, "DirectMap4k: %8lu kB\n",
71 direct_pages_count[PG_LEVEL_4K] << 2); 71 direct_pages_count[PG_LEVEL_4K] << 2);
72#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 72#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
73 seq_printf(m, "DirectMap2M: %8lu kB\n", 73 seq_printf(m, "DirectMap2M: %8lu kB\n",
74 direct_pages_count[PG_LEVEL_2M] << 11); 74 direct_pages_count[PG_LEVEL_2M] << 11);
75#else 75#else
76 seq_printf(m, "DirectMap4M: %8lu kB\n", 76 seq_printf(m, "DirectMap4M: %8lu kB\n",
77 direct_pages_count[PG_LEVEL_2M] << 12); 77 direct_pages_count[PG_LEVEL_2M] << 12);
78#endif 78#endif
79#ifdef CONFIG_X86_64 79#ifdef CONFIG_X86_64
80 if (direct_gbpages) 80 if (direct_gbpages)
81 seq_printf(m, "DirectMap1G: %8lu kB\n", 81 seq_printf(m, "DirectMap1G: %8lu kB\n",
82 direct_pages_count[PG_LEVEL_1G] << 20); 82 direct_pages_count[PG_LEVEL_1G] << 20);
83#endif 83#endif
84} 84}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 738fd0f24958..541bcc944a5b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
481 return 1; 481 return 1;
482} 482}
483#else 483#else
484/* This check is needed to avoid cache aliasing when PAT is enabled */
484static inline int range_is_allowed(unsigned long pfn, unsigned long size) 485static inline int range_is_allowed(unsigned long pfn, unsigned long size)
485{ 486{
486 u64 from = ((u64)pfn) << PAGE_SHIFT; 487 u64 from = ((u64)pfn) << PAGE_SHIFT;
487 u64 to = from + size; 488 u64 to = from + size;
488 u64 cursor = from; 489 u64 cursor = from;
489 490
491 if (!pat_enabled)
492 return 1;
493
490 while (cursor < to) { 494 while (cursor < to) {
491 if (!devmem_is_allowed(pfn)) { 495 if (!devmem_is_allowed(pfn)) {
492 printk(KERN_INFO 496 printk(KERN_INFO
@@ -592,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
592 free_memtype(addr, addr + size); 596 free_memtype(addr, addr + size);
593} 597}
594 598
599/*
600 * Internal interface to reserve a range of physical memory with prot.
601 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
605{
606 int is_ram = 0;
607 int id_sz, ret;
608 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
610
611 is_ram = pagerange_is_ram(paddr, paddr + size);
612
613 if (is_ram != 0) {
614 /*
615 * For mapping RAM pages, drivers need to call
616 * set_memory_[uc|wc|wb] directly, for reserve and free, before
617 * setting up the PTE.
618 */
619 WARN_ON_ONCE(1);
620 return 0;
621 }
622
623 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
624 if (ret)
625 return ret;
626
627 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size);
629 printk(KERN_ERR
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
631 current->comm, current->pid,
632 cattr_name(want_flags),
633 (unsigned long long)paddr,
634 (unsigned long long)(paddr + size),
635 cattr_name(flags));
636 return -EINVAL;
637 }
638
639 /* Need to keep identity mapping in sync */
640 if (paddr >= __pa(high_memory))
641 return 0;
642
643 id_sz = (__pa(high_memory) < paddr + size) ?
644 __pa(high_memory) - paddr :
645 size;
646
647 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
648 free_memtype(paddr, paddr + size);
649 printk(KERN_ERR
650 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
651 "for %Lx-%Lx\n",
652 current->comm, current->pid,
653 cattr_name(flags),
654 (unsigned long long)paddr,
655 (unsigned long long)(paddr + size));
656 return -EINVAL;
657 }
658 return 0;
659}
660
661/*
662 * Internal interface to free a range of physical memory.
663 * Frees non RAM regions only.
664 */
665static void free_pfn_range(u64 paddr, unsigned long size)
666{
667 int is_ram;
668
669 is_ram = pagerange_is_ram(paddr, paddr + size);
670 if (is_ram == 0)
671 free_memtype(paddr, paddr + size);
672}
673
674/*
675 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
676 * copied through copy_page_range().
677 *
678 * If the vma has a linear pfn mapping for the entire range, we get the prot
679 * from pte and reserve the entire vma range with single reserve_pfn_range call.
680 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
681 * by page to get physical address and protection.
682 */
683int track_pfn_vma_copy(struct vm_area_struct *vma)
684{
685 int retval = 0;
686 unsigned long i, j;
687 u64 paddr;
688 unsigned long prot;
689 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start;
692
693 if (!pat_enabled)
694 return 0;
695
696 if (is_linear_pfn_mapping(vma)) {
697 /*
698 * reserve the whole chunk covered by vma. We need the
699 * starting address and protection from pte.
700 */
701 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
702 WARN_ON_ONCE(1);
703 return -EINVAL;
704 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
706 }
707
708 /* reserve entire vma page by page, using pfn and prot from pte */
709 for (i = 0; i < vma_size; i += PAGE_SIZE) {
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue;
712
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
714 if (retval)
715 goto cleanup_ret;
716 }
717 return 0;
718
719cleanup_ret:
720 /* Reserve error: Cleanup partial reservation and return error */
721 for (j = 0; j < i; j += PAGE_SIZE) {
722 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
723 continue;
724
725 free_pfn_range(paddr, PAGE_SIZE);
726 }
727
728 return retval;
729}
730
731/*
732 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
733 * for physical range indicated by pfn and size.
734 *
735 * prot is passed in as a parameter for the new mapping. If the vma has a
736 * linear pfn mapping for the entire range reserve the entire vma range with
737 * single reserve_pfn_range call.
738 * Otherwise, we look t the pfn and size and reserve only the specified range
739 * page by page.
740 *
741 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma.
743 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
745 unsigned long pfn, unsigned long size)
746{
747 int retval = 0;
748 unsigned long i, j;
749 u64 base_paddr;
750 u64 paddr;
751 unsigned long vma_start = vma->vm_start;
752 unsigned long vma_end = vma->vm_end;
753 unsigned long vma_size = vma_end - vma_start;
754
755 if (!pat_enabled)
756 return 0;
757
758 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot);
762 }
763
764 /* reserve page by page using pfn and size */
765 base_paddr = (u64)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
769 if (retval)
770 goto cleanup_ret;
771 }
772 return 0;
773
774cleanup_ret:
775 /* Reserve error: Cleanup partial reservation and return error */
776 for (j = 0; j < i; j += PAGE_SIZE) {
777 paddr = base_paddr + j;
778 free_pfn_range(paddr, PAGE_SIZE);
779 }
780
781 return retval;
782}
783
784/*
785 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
786 * untrack can be called for a specific region indicated by pfn and size or
787 * can be for the entire vma (in which case size can be zero).
788 */
789void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
790 unsigned long size)
791{
792 unsigned long i;
793 u64 paddr;
794 unsigned long prot;
795 unsigned long vma_start = vma->vm_start;
796 unsigned long vma_end = vma->vm_end;
797 unsigned long vma_size = vma_end - vma_start;
798
799 if (!pat_enabled)
800 return;
801
802 if (is_linear_pfn_mapping(vma)) {
803 /* free the whole chunk starting from vm_pgoff */
804 paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
805 free_pfn_range(paddr, vma_size);
806 return;
807 }
808
809 if (size != 0 && size != vma_size) {
810 /* free page by page, using pfn and size */
811 paddr = (u64)pfn << PAGE_SHIFT;
812 for (i = 0; i < size; i += PAGE_SIZE) {
813 paddr = paddr + i;
814 free_pfn_range(paddr, PAGE_SIZE);
815 }
816 } else {
817 /* free entire vma, page by page, using the pfn from pte */
818 for (i = 0; i < vma_size; i += PAGE_SIZE) {
819 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
820 continue;
821
822 free_pfn_range(paddr, PAGE_SIZE);
823 }
824 }
825}
826
827pgprot_t pgprot_writecombine(pgprot_t prot)
828{
829 if (pat_enabled)
830 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
831 else
832 return pgprot_noncached(prot);
833}
834
595#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 835#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
596 836
597/* get Nth element of the linked list */ 837/* get Nth element of the linked list */