diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/x86/kernel/head_32.S | 34 | ||||
| -rw-r--r-- | arch/x86/kernel/head_64.S | 4 | ||||
| -rw-r--r-- | arch/x86/mm/init_32.c | 84 | ||||
| -rw-r--r-- | arch/x86/mm/init_64.c | 110 | ||||
| -rw-r--r-- | arch/x86/mm/ioremap.c | 19 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr-test.c | 9 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr.c | 459 | ||||
| -rw-r--r-- | arch/x86/mm/pat.c | 132 |
8 files changed, 555 insertions, 296 deletions
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377a..e835b4eea70b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
| 172 | * | 172 | * |
| 173 | * Note that the stack is not yet set up! | 173 | * Note that the stack is not yet set up! |
| 174 | */ | 174 | */ |
| 175 | #define PTE_ATTR 0x007 /* PRESENT+RW+USER */ | ||
| 176 | #define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | ||
| 177 | #define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ | ||
| 178 | |||
| 179 | default_entry: | 175 | default_entry: |
| 180 | #ifdef CONFIG_X86_PAE | 176 | #ifdef CONFIG_X86_PAE |
| 181 | 177 | ||
| @@ -196,9 +192,9 @@ default_entry: | |||
| 196 | movl $pa(pg0), %edi | 192 | movl $pa(pg0), %edi |
| 197 | movl %edi, pa(init_pg_tables_start) | 193 | movl %edi, pa(init_pg_tables_start) |
| 198 | movl $pa(swapper_pg_pmd), %edx | 194 | movl $pa(swapper_pg_pmd), %edx |
| 199 | movl $PTE_ATTR, %eax | 195 | movl $PTE_IDENT_ATTR, %eax |
| 200 | 10: | 196 | 10: |
| 201 | leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ | 197 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ |
| 202 | movl %ecx,(%edx) /* Store PMD entry */ | 198 | movl %ecx,(%edx) /* Store PMD entry */ |
| 203 | /* Upper half already zero */ | 199 | /* Upper half already zero */ |
| 204 | addl $8,%edx | 200 | addl $8,%edx |
| @@ -215,7 +211,7 @@ default_entry: | |||
| 215 | * End condition: we must map up to and including INIT_MAP_BEYOND_END | 211 | * End condition: we must map up to and including INIT_MAP_BEYOND_END |
| 216 | * bytes beyond the end of our own page tables. | 212 | * bytes beyond the end of our own page tables. |
| 217 | */ | 213 | */ |
| 218 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 214 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 219 | cmpl %ebp,%eax | 215 | cmpl %ebp,%eax |
| 220 | jb 10b | 216 | jb 10b |
| 221 | 1: | 217 | 1: |
| @@ -224,7 +220,7 @@ default_entry: | |||
| 224 | movl %eax, pa(max_pfn_mapped) | 220 | movl %eax, pa(max_pfn_mapped) |
| 225 | 221 | ||
| 226 | /* Do early initialization of the fixmap area */ | 222 | /* Do early initialization of the fixmap area */ |
| 227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 223 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 228 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) | 224 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) |
| 229 | #else /* Not PAE */ | 225 | #else /* Not PAE */ |
| 230 | 226 | ||
| @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 233 | movl $pa(pg0), %edi | 229 | movl $pa(pg0), %edi |
| 234 | movl %edi, pa(init_pg_tables_start) | 230 | movl %edi, pa(init_pg_tables_start) |
| 235 | movl $pa(swapper_pg_dir), %edx | 231 | movl $pa(swapper_pg_dir), %edx |
| 236 | movl $PTE_ATTR, %eax | 232 | movl $PTE_IDENT_ATTR, %eax |
| 237 | 10: | 233 | 10: |
| 238 | leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ | 234 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ |
| 239 | movl %ecx,(%edx) /* Store identity PDE entry */ | 235 | movl %ecx,(%edx) /* Store identity PDE entry */ |
| 240 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | 236 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ |
| 241 | addl $4,%edx | 237 | addl $4,%edx |
| @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 249 | * bytes beyond the end of our own page tables; the +0x007 is | 245 | * bytes beyond the end of our own page tables; the +0x007 is |
| 250 | * the attribute bits | 246 | * the attribute bits |
| 251 | */ | 247 | */ |
| 252 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 248 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 253 | cmpl %ebp,%eax | 249 | cmpl %ebp,%eax |
| 254 | jb 10b | 250 | jb 10b |
| 255 | movl %edi,pa(init_pg_tables_end) | 251 | movl %edi,pa(init_pg_tables_end) |
| @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 257 | movl %eax, pa(max_pfn_mapped) | 253 | movl %eax, pa(max_pfn_mapped) |
| 258 | 254 | ||
| 259 | /* Do early initialization of the fixmap area */ | 255 | /* Do early initialization of the fixmap area */ |
| 260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 256 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 261 | movl %eax,pa(swapper_pg_dir+0xffc) | 257 | movl %eax,pa(swapper_pg_dir+0xffc) |
| 262 | #endif | 258 | #endif |
| 263 | jmp 3f | 259 | jmp 3f |
| @@ -634,19 +630,19 @@ ENTRY(empty_zero_page) | |||
| 634 | /* Page-aligned for the benefit of paravirt? */ | 630 | /* Page-aligned for the benefit of paravirt? */ |
| 635 | .align PAGE_SIZE_asm | 631 | .align PAGE_SIZE_asm |
| 636 | ENTRY(swapper_pg_dir) | 632 | ENTRY(swapper_pg_dir) |
| 637 | .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ | 633 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
| 638 | # if KPMDS == 3 | 634 | # if KPMDS == 3 |
| 639 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 635 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 640 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 636 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 641 | .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 | 637 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 |
| 642 | # elif KPMDS == 2 | 638 | # elif KPMDS == 2 |
| 643 | .long 0,0 | 639 | .long 0,0 |
| 644 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 640 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 645 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 641 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 646 | # elif KPMDS == 1 | 642 | # elif KPMDS == 1 |
| 647 | .long 0,0 | 643 | .long 0,0 |
| 648 | .long 0,0 | 644 | .long 0,0 |
| 649 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 645 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 650 | # else | 646 | # else |
| 651 | # error "Kernel PMDs should be 1, 2 or 3" | 647 | # error "Kernel PMDs should be 1, 2 or 3" |
| 652 | # endif | 648 | # endif |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe886..26cfdc1d7c7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -110,7 +110,7 @@ startup_64: | |||
| 110 | movq %rdi, %rax | 110 | movq %rdi, %rax |
| 111 | shrq $PMD_SHIFT, %rax | 111 | shrq $PMD_SHIFT, %rax |
| 112 | andq $(PTRS_PER_PMD - 1), %rax | 112 | andq $(PTRS_PER_PMD - 1), %rax |
| 113 | leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx | 113 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx |
| 114 | leaq level2_spare_pgt(%rip), %rbx | 114 | leaq level2_spare_pgt(%rip), %rbx |
| 115 | movq %rdx, 0(%rbx, %rax, 8) | 115 | movq %rdx, 0(%rbx, %rax, 8) |
| 116 | ident_complete: | 116 | ident_complete: |
| @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) | |||
| 374 | /* Since I easily can, map the first 1G. | 374 | /* Since I easily can, map the first 1G. |
| 375 | * Don't set NX because code runs from these pages. | 375 | * Don't set NX because code runs from these pages. |
| 376 | */ | 376 | */ |
| 377 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 377 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
| 378 | 378 | ||
| 379 | NEXT_PAGE(level2_kernel_pgt) | 379 | NEXT_PAGE(level2_kernel_pgt) |
| 380 | /* | 380 | /* |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 6b9a9358b330..c3789bb19308 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
| @@ -195,11 +195,30 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
| 195 | pgd_t *pgd; | 195 | pgd_t *pgd; |
| 196 | pmd_t *pmd; | 196 | pmd_t *pmd; |
| 197 | pte_t *pte; | 197 | pte_t *pte; |
| 198 | unsigned pages_2m = 0, pages_4k = 0; | 198 | unsigned pages_2m, pages_4k; |
| 199 | int mapping_iter; | ||
| 200 | |||
| 201 | /* | ||
| 202 | * First iteration will setup identity mapping using large/small pages | ||
| 203 | * based on use_pse, with other attributes same as set by | ||
| 204 | * the early code in head_32.S | ||
| 205 | * | ||
| 206 | * Second iteration will setup the appropriate attributes (NX, GLOBAL..) | ||
| 207 | * as desired for the kernel identity mapping. | ||
| 208 | * | ||
| 209 | * This two pass mechanism conforms to the TLB app note which says: | ||
| 210 | * | ||
| 211 | * "Software should not write to a paging-structure entry in a way | ||
| 212 | * that would change, for any linear address, both the page size | ||
| 213 | * and either the page frame or attributes." | ||
| 214 | */ | ||
| 215 | mapping_iter = 1; | ||
| 199 | 216 | ||
| 200 | if (!cpu_has_pse) | 217 | if (!cpu_has_pse) |
| 201 | use_pse = 0; | 218 | use_pse = 0; |
| 202 | 219 | ||
| 220 | repeat: | ||
| 221 | pages_2m = pages_4k = 0; | ||
| 203 | pfn = start_pfn; | 222 | pfn = start_pfn; |
| 204 | pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); | 223 | pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET); |
| 205 | pgd = pgd_base + pgd_idx; | 224 | pgd = pgd_base + pgd_idx; |
| @@ -225,6 +244,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
| 225 | if (use_pse) { | 244 | if (use_pse) { |
| 226 | unsigned int addr2; | 245 | unsigned int addr2; |
| 227 | pgprot_t prot = PAGE_KERNEL_LARGE; | 246 | pgprot_t prot = PAGE_KERNEL_LARGE; |
| 247 | /* | ||
| 248 | * first pass will use the same initial | ||
| 249 | * identity mapping attribute + _PAGE_PSE. | ||
| 250 | */ | ||
| 251 | pgprot_t init_prot = | ||
| 252 | __pgprot(PTE_IDENT_ATTR | | ||
| 253 | _PAGE_PSE); | ||
| 228 | 254 | ||
| 229 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | 255 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + |
| 230 | PAGE_OFFSET + PAGE_SIZE-1; | 256 | PAGE_OFFSET + PAGE_SIZE-1; |
| @@ -234,7 +260,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
| 234 | prot = PAGE_KERNEL_LARGE_EXEC; | 260 | prot = PAGE_KERNEL_LARGE_EXEC; |
| 235 | 261 | ||
| 236 | pages_2m++; | 262 | pages_2m++; |
| 237 | set_pmd(pmd, pfn_pmd(pfn, prot)); | 263 | if (mapping_iter == 1) |
| 264 | set_pmd(pmd, pfn_pmd(pfn, init_prot)); | ||
| 265 | else | ||
| 266 | set_pmd(pmd, pfn_pmd(pfn, prot)); | ||
| 238 | 267 | ||
| 239 | pfn += PTRS_PER_PTE; | 268 | pfn += PTRS_PER_PTE; |
| 240 | continue; | 269 | continue; |
| @@ -246,17 +275,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | |||
| 246 | for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; | 275 | for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn; |
| 247 | pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { | 276 | pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { |
| 248 | pgprot_t prot = PAGE_KERNEL; | 277 | pgprot_t prot = PAGE_KERNEL; |
| 278 | /* | ||
| 279 | * first pass will use the same initial | ||
| 280 | * identity mapping attribute. | ||
| 281 | */ | ||
| 282 | pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); | ||
| 249 | 283 | ||
| 250 | if (is_kernel_text(addr)) | 284 | if (is_kernel_text(addr)) |
| 251 | prot = PAGE_KERNEL_EXEC; | 285 | prot = PAGE_KERNEL_EXEC; |
| 252 | 286 | ||
| 253 | pages_4k++; | 287 | pages_4k++; |
| 254 | set_pte(pte, pfn_pte(pfn, prot)); | 288 | if (mapping_iter == 1) |
| 289 | set_pte(pte, pfn_pte(pfn, init_prot)); | ||
| 290 | else | ||
| 291 | set_pte(pte, pfn_pte(pfn, prot)); | ||
| 255 | } | 292 | } |
| 256 | } | 293 | } |
| 257 | } | 294 | } |
| 258 | update_page_count(PG_LEVEL_2M, pages_2m); | 295 | if (mapping_iter == 1) { |
| 259 | update_page_count(PG_LEVEL_4K, pages_4k); | 296 | /* |
| 297 | * update direct mapping page count only in the first | ||
| 298 | * iteration. | ||
| 299 | */ | ||
| 300 | update_page_count(PG_LEVEL_2M, pages_2m); | ||
| 301 | update_page_count(PG_LEVEL_4K, pages_4k); | ||
| 302 | |||
| 303 | /* | ||
| 304 | * local global flush tlb, which will flush the previous | ||
| 305 | * mappings present in both small and large page TLB's. | ||
| 306 | */ | ||
| 307 | __flush_tlb_all(); | ||
| 308 | |||
| 309 | /* | ||
| 310 | * Second iteration will set the actual desired PTE attributes. | ||
| 311 | */ | ||
| 312 | mapping_iter = 2; | ||
| 313 | goto repeat; | ||
| 314 | } | ||
| 260 | } | 315 | } |
| 261 | 316 | ||
| 262 | /* | 317 | /* |
| @@ -719,7 +774,7 @@ void __init setup_bootmem_allocator(void) | |||
| 719 | after_init_bootmem = 1; | 774 | after_init_bootmem = 1; |
| 720 | } | 775 | } |
| 721 | 776 | ||
| 722 | static void __init find_early_table_space(unsigned long end) | 777 | static void __init find_early_table_space(unsigned long end, int use_pse) |
| 723 | { | 778 | { |
| 724 | unsigned long puds, pmds, ptes, tables, start; | 779 | unsigned long puds, pmds, ptes, tables, start; |
| 725 | 780 | ||
| @@ -729,7 +784,7 @@ static void __init find_early_table_space(unsigned long end) | |||
| 729 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 784 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
| 730 | tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); | 785 | tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); |
| 731 | 786 | ||
| 732 | if (cpu_has_pse) { | 787 | if (use_pse) { |
| 733 | unsigned long extra; | 788 | unsigned long extra; |
| 734 | 789 | ||
| 735 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | 790 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); |
| @@ -769,12 +824,22 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 769 | pgd_t *pgd_base = swapper_pg_dir; | 824 | pgd_t *pgd_base = swapper_pg_dir; |
| 770 | unsigned long start_pfn, end_pfn; | 825 | unsigned long start_pfn, end_pfn; |
| 771 | unsigned long big_page_start; | 826 | unsigned long big_page_start; |
| 827 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 828 | /* | ||
| 829 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
| 830 | * This will simplify cpa(), which otherwise needs to support splitting | ||
| 831 | * large pages into small in interrupt context, etc. | ||
| 832 | */ | ||
| 833 | int use_pse = 0; | ||
| 834 | #else | ||
| 835 | int use_pse = cpu_has_pse; | ||
| 836 | #endif | ||
| 772 | 837 | ||
| 773 | /* | 838 | /* |
| 774 | * Find space for the kernel direct mapping tables. | 839 | * Find space for the kernel direct mapping tables. |
| 775 | */ | 840 | */ |
| 776 | if (!after_init_bootmem) | 841 | if (!after_init_bootmem) |
| 777 | find_early_table_space(end); | 842 | find_early_table_space(end, use_pse); |
| 778 | 843 | ||
| 779 | #ifdef CONFIG_X86_PAE | 844 | #ifdef CONFIG_X86_PAE |
| 780 | set_nx(); | 845 | set_nx(); |
| @@ -820,7 +885,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 820 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | 885 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); |
| 821 | if (start_pfn < end_pfn) | 886 | if (start_pfn < end_pfn) |
| 822 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, | 887 | kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, |
| 823 | cpu_has_pse); | 888 | use_pse); |
| 824 | 889 | ||
| 825 | /* tail is not big page alignment ? */ | 890 | /* tail is not big page alignment ? */ |
| 826 | start_pfn = end_pfn; | 891 | start_pfn = end_pfn; |
| @@ -983,7 +1048,6 @@ void __init mem_init(void) | |||
| 983 | if (boot_cpu_data.wp_works_ok < 0) | 1048 | if (boot_cpu_data.wp_works_ok < 0) |
| 984 | test_wp_bit(); | 1049 | test_wp_bit(); |
| 985 | 1050 | ||
| 986 | cpa_init(); | ||
| 987 | save_pg_dir(); | 1051 | save_pg_dir(); |
| 988 | zap_low_mappings(); | 1052 | zap_low_mappings(); |
| 989 | } | 1053 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 770536ebf7e9..fb30486c82f7 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -271,7 +271,8 @@ static __ref void unmap_low_page(void *adr) | |||
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | static unsigned long __meminit | 273 | static unsigned long __meminit |
| 274 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | 274 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
| 275 | pgprot_t prot) | ||
| 275 | { | 276 | { |
| 276 | unsigned pages = 0; | 277 | unsigned pages = 0; |
| 277 | unsigned long last_map_addr = end; | 278 | unsigned long last_map_addr = end; |
| @@ -289,36 +290,43 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | |||
| 289 | break; | 290 | break; |
| 290 | } | 291 | } |
| 291 | 292 | ||
| 293 | /* | ||
| 294 | * We will re-use the existing mapping. | ||
| 295 | * Xen for example has some special requirements, like mapping | ||
| 296 | * pagetable pages as RO. So assume someone who pre-setup | ||
| 297 | * these mappings are more intelligent. | ||
| 298 | */ | ||
| 292 | if (pte_val(*pte)) | 299 | if (pte_val(*pte)) |
| 293 | continue; | 300 | continue; |
| 294 | 301 | ||
| 295 | if (0) | 302 | if (0) |
| 296 | printk(" pte=%p addr=%lx pte=%016lx\n", | 303 | printk(" pte=%p addr=%lx pte=%016lx\n", |
| 297 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 304 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
| 298 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); | ||
| 299 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | ||
| 300 | pages++; | 305 | pages++; |
| 306 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); | ||
| 307 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | ||
| 301 | } | 308 | } |
| 309 | |||
| 302 | update_page_count(PG_LEVEL_4K, pages); | 310 | update_page_count(PG_LEVEL_4K, pages); |
| 303 | 311 | ||
| 304 | return last_map_addr; | 312 | return last_map_addr; |
| 305 | } | 313 | } |
| 306 | 314 | ||
| 307 | static unsigned long __meminit | 315 | static unsigned long __meminit |
| 308 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) | 316 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, |
| 317 | pgprot_t prot) | ||
| 309 | { | 318 | { |
| 310 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | 319 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); |
| 311 | 320 | ||
| 312 | return phys_pte_init(pte, address, end); | 321 | return phys_pte_init(pte, address, end, prot); |
| 313 | } | 322 | } |
| 314 | 323 | ||
| 315 | static unsigned long __meminit | 324 | static unsigned long __meminit |
| 316 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | 325 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, |
| 317 | unsigned long page_size_mask) | 326 | unsigned long page_size_mask, pgprot_t prot) |
| 318 | { | 327 | { |
| 319 | unsigned long pages = 0; | 328 | unsigned long pages = 0; |
| 320 | unsigned long last_map_addr = end; | 329 | unsigned long last_map_addr = end; |
| 321 | unsigned long start = address; | ||
| 322 | 330 | ||
| 323 | int i = pmd_index(address); | 331 | int i = pmd_index(address); |
| 324 | 332 | ||
| @@ -326,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
| 326 | unsigned long pte_phys; | 334 | unsigned long pte_phys; |
| 327 | pmd_t *pmd = pmd_page + pmd_index(address); | 335 | pmd_t *pmd = pmd_page + pmd_index(address); |
| 328 | pte_t *pte; | 336 | pte_t *pte; |
| 337 | pgprot_t new_prot = prot; | ||
| 329 | 338 | ||
| 330 | if (address >= end) { | 339 | if (address >= end) { |
| 331 | if (!after_bootmem) { | 340 | if (!after_bootmem) { |
| @@ -339,27 +348,40 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
| 339 | if (!pmd_large(*pmd)) { | 348 | if (!pmd_large(*pmd)) { |
| 340 | spin_lock(&init_mm.page_table_lock); | 349 | spin_lock(&init_mm.page_table_lock); |
| 341 | last_map_addr = phys_pte_update(pmd, address, | 350 | last_map_addr = phys_pte_update(pmd, address, |
| 342 | end); | 351 | end, prot); |
| 343 | spin_unlock(&init_mm.page_table_lock); | 352 | spin_unlock(&init_mm.page_table_lock); |
| 353 | continue; | ||
| 344 | } | 354 | } |
| 345 | /* Count entries we're using from level2_ident_pgt */ | 355 | /* |
| 346 | if (start == 0) | 356 | * If we are ok with PG_LEVEL_2M mapping, then we will |
| 347 | pages++; | 357 | * use the existing mapping, |
| 348 | continue; | 358 | * |
| 359 | * Otherwise, we will split the large page mapping but | ||
| 360 | * use the same existing protection bits except for | ||
| 361 | * large page, so that we don't violate Intel's TLB | ||
| 362 | * Application note (317080) which says, while changing | ||
| 363 | * the page sizes, new and old translations should | ||
| 364 | * not differ with respect to page frame and | ||
| 365 | * attributes. | ||
| 366 | */ | ||
| 367 | if (page_size_mask & (1 << PG_LEVEL_2M)) | ||
| 368 | continue; | ||
| 369 | new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); | ||
| 349 | } | 370 | } |
| 350 | 371 | ||
| 351 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 372 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
| 352 | pages++; | 373 | pages++; |
| 353 | spin_lock(&init_mm.page_table_lock); | 374 | spin_lock(&init_mm.page_table_lock); |
| 354 | set_pte((pte_t *)pmd, | 375 | set_pte((pte_t *)pmd, |
| 355 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 376 | pfn_pte(address >> PAGE_SHIFT, |
| 377 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
| 356 | spin_unlock(&init_mm.page_table_lock); | 378 | spin_unlock(&init_mm.page_table_lock); |
| 357 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | 379 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; |
| 358 | continue; | 380 | continue; |
| 359 | } | 381 | } |
| 360 | 382 | ||
| 361 | pte = alloc_low_page(&pte_phys); | 383 | pte = alloc_low_page(&pte_phys); |
| 362 | last_map_addr = phys_pte_init(pte, address, end); | 384 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
| 363 | unmap_low_page(pte); | 385 | unmap_low_page(pte); |
| 364 | 386 | ||
| 365 | spin_lock(&init_mm.page_table_lock); | 387 | spin_lock(&init_mm.page_table_lock); |
| @@ -372,12 +394,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
| 372 | 394 | ||
| 373 | static unsigned long __meminit | 395 | static unsigned long __meminit |
| 374 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | 396 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, |
| 375 | unsigned long page_size_mask) | 397 | unsigned long page_size_mask, pgprot_t prot) |
| 376 | { | 398 | { |
| 377 | pmd_t *pmd = pmd_offset(pud, 0); | 399 | pmd_t *pmd = pmd_offset(pud, 0); |
| 378 | unsigned long last_map_addr; | 400 | unsigned long last_map_addr; |
| 379 | 401 | ||
| 380 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | 402 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); |
| 381 | __flush_tlb_all(); | 403 | __flush_tlb_all(); |
| 382 | return last_map_addr; | 404 | return last_map_addr; |
| 383 | } | 405 | } |
| @@ -394,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 394 | unsigned long pmd_phys; | 416 | unsigned long pmd_phys; |
| 395 | pud_t *pud = pud_page + pud_index(addr); | 417 | pud_t *pud = pud_page + pud_index(addr); |
| 396 | pmd_t *pmd; | 418 | pmd_t *pmd; |
| 419 | pgprot_t prot = PAGE_KERNEL; | ||
| 397 | 420 | ||
| 398 | if (addr >= end) | 421 | if (addr >= end) |
| 399 | break; | 422 | break; |
| @@ -405,10 +428,26 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 405 | } | 428 | } |
| 406 | 429 | ||
| 407 | if (pud_val(*pud)) { | 430 | if (pud_val(*pud)) { |
| 408 | if (!pud_large(*pud)) | 431 | if (!pud_large(*pud)) { |
| 409 | last_map_addr = phys_pmd_update(pud, addr, end, | 432 | last_map_addr = phys_pmd_update(pud, addr, end, |
| 410 | page_size_mask); | 433 | page_size_mask, prot); |
| 411 | continue; | 434 | continue; |
| 435 | } | ||
| 436 | /* | ||
| 437 | * If we are ok with PG_LEVEL_1G mapping, then we will | ||
| 438 | * use the existing mapping. | ||
| 439 | * | ||
| 440 | * Otherwise, we will split the gbpage mapping but use | ||
| 441 | * the same existing protection bits except for large | ||
| 442 | * page, so that we don't violate Intel's TLB | ||
| 443 | * Application note (317080) which says, while changing | ||
| 444 | * the page sizes, new and old translations should | ||
| 445 | * not differ with respect to page frame and | ||
| 446 | * attributes. | ||
| 447 | */ | ||
| 448 | if (page_size_mask & (1 << PG_LEVEL_1G)) | ||
| 449 | continue; | ||
| 450 | prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); | ||
| 412 | } | 451 | } |
| 413 | 452 | ||
| 414 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 453 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
| @@ -422,7 +461,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 422 | } | 461 | } |
| 423 | 462 | ||
| 424 | pmd = alloc_low_page(&pmd_phys); | 463 | pmd = alloc_low_page(&pmd_phys); |
| 425 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | 464 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
| 465 | prot); | ||
| 426 | unmap_low_page(pmd); | 466 | unmap_low_page(pmd); |
| 427 | 467 | ||
| 428 | spin_lock(&init_mm.page_table_lock); | 468 | spin_lock(&init_mm.page_table_lock); |
| @@ -430,6 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
| 430 | spin_unlock(&init_mm.page_table_lock); | 470 | spin_unlock(&init_mm.page_table_lock); |
| 431 | } | 471 | } |
| 432 | __flush_tlb_all(); | 472 | __flush_tlb_all(); |
| 473 | |||
| 433 | update_page_count(PG_LEVEL_1G, pages); | 474 | update_page_count(PG_LEVEL_1G, pages); |
| 434 | 475 | ||
| 435 | return last_map_addr; | 476 | return last_map_addr; |
| @@ -446,13 +487,14 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
| 446 | return phys_pud_init(pud, addr, end, page_size_mask); | 487 | return phys_pud_init(pud, addr, end, page_size_mask); |
| 447 | } | 488 | } |
| 448 | 489 | ||
| 449 | static void __init find_early_table_space(unsigned long end) | 490 | static void __init find_early_table_space(unsigned long end, int use_pse, |
| 491 | int use_gbpages) | ||
| 450 | { | 492 | { |
| 451 | unsigned long puds, pmds, ptes, tables, start; | 493 | unsigned long puds, pmds, ptes, tables, start; |
| 452 | 494 | ||
| 453 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 495 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
| 454 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 496 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); |
| 455 | if (direct_gbpages) { | 497 | if (use_gbpages) { |
| 456 | unsigned long extra; | 498 | unsigned long extra; |
| 457 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); | 499 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); |
| 458 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; | 500 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; |
| @@ -460,7 +502,7 @@ static void __init find_early_table_space(unsigned long end) | |||
| 460 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 502 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
| 461 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 503 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); |
| 462 | 504 | ||
| 463 | if (cpu_has_pse) { | 505 | if (use_pse) { |
| 464 | unsigned long extra; | 506 | unsigned long extra; |
| 465 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | 507 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); |
| 466 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | 508 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| @@ -528,6 +570,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, | |||
| 528 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 570 | pgd_populate(&init_mm, pgd, __va(pud_phys)); |
| 529 | spin_unlock(&init_mm.page_table_lock); | 571 | spin_unlock(&init_mm.page_table_lock); |
| 530 | } | 572 | } |
| 573 | __flush_tlb_all(); | ||
| 531 | 574 | ||
| 532 | return last_map_addr; | 575 | return last_map_addr; |
| 533 | } | 576 | } |
| @@ -571,6 +614,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 571 | 614 | ||
| 572 | struct map_range mr[NR_RANGE_MR]; | 615 | struct map_range mr[NR_RANGE_MR]; |
| 573 | int nr_range, i; | 616 | int nr_range, i; |
| 617 | int use_pse, use_gbpages; | ||
| 574 | 618 | ||
| 575 | printk(KERN_INFO "init_memory_mapping\n"); | 619 | printk(KERN_INFO "init_memory_mapping\n"); |
| 576 | 620 | ||
| @@ -584,9 +628,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 584 | if (!after_bootmem) | 628 | if (!after_bootmem) |
| 585 | init_gbpages(); | 629 | init_gbpages(); |
| 586 | 630 | ||
| 587 | if (direct_gbpages) | 631 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 632 | /* | ||
| 633 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
| 634 | * This will simplify cpa(), which otherwise needs to support splitting | ||
| 635 | * large pages into small in interrupt context, etc. | ||
| 636 | */ | ||
| 637 | use_pse = use_gbpages = 0; | ||
| 638 | #else | ||
| 639 | use_pse = cpu_has_pse; | ||
| 640 | use_gbpages = direct_gbpages; | ||
| 641 | #endif | ||
| 642 | |||
| 643 | if (use_gbpages) | ||
| 588 | page_size_mask |= 1 << PG_LEVEL_1G; | 644 | page_size_mask |= 1 << PG_LEVEL_1G; |
| 589 | if (cpu_has_pse) | 645 | if (use_pse) |
| 590 | page_size_mask |= 1 << PG_LEVEL_2M; | 646 | page_size_mask |= 1 << PG_LEVEL_2M; |
| 591 | 647 | ||
| 592 | memset(mr, 0, sizeof(mr)); | 648 | memset(mr, 0, sizeof(mr)); |
| @@ -647,7 +703,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 647 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 703 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
| 648 | 704 | ||
| 649 | if (!after_bootmem) | 705 | if (!after_bootmem) |
| 650 | find_early_table_space(end); | 706 | find_early_table_space(end, use_pse, use_gbpages); |
| 651 | 707 | ||
| 652 | for (i = 0; i < nr_range; i++) | 708 | for (i = 0; i < nr_range; i++) |
| 653 | last_map_addr = kernel_physical_mapping_init( | 709 | last_map_addr = kernel_physical_mapping_init( |
| @@ -806,8 +862,6 @@ void __init mem_init(void) | |||
| 806 | reservedpages << (PAGE_SHIFT-10), | 862 | reservedpages << (PAGE_SHIFT-10), |
| 807 | datasize >> 10, | 863 | datasize >> 10, |
| 808 | initsize >> 10); | 864 | initsize >> 10); |
| 809 | |||
| 810 | cpa_init(); | ||
| 811 | } | 865 | } |
| 812 | 866 | ||
| 813 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 867 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index cac6da54203b..6ab3196d12b4 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
| @@ -83,6 +83,25 @@ int page_is_ram(unsigned long pagenr) | |||
| 83 | return 0; | 83 | return 0; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | int pagerange_is_ram(unsigned long start, unsigned long end) | ||
| 87 | { | ||
| 88 | int ram_page = 0, not_rampage = 0; | ||
| 89 | unsigned long page_nr; | ||
| 90 | |||
| 91 | for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); | ||
| 92 | ++page_nr) { | ||
| 93 | if (page_is_ram(page_nr)) | ||
| 94 | ram_page = 1; | ||
| 95 | else | ||
| 96 | not_rampage = 1; | ||
| 97 | |||
| 98 | if (ram_page == not_rampage) | ||
| 99 | return -1; | ||
| 100 | } | ||
| 101 | |||
| 102 | return ram_page; | ||
| 103 | } | ||
| 104 | |||
| 86 | /* | 105 | /* |
| 87 | * Fix up the linear direct mapping of the kernel to avoid cache attribute | 106 | * Fix up the linear direct mapping of the kernel to avoid cache attribute |
| 88 | * conflicts. | 107 | * conflicts. |
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index d4aa503caaa2..e1d106909218 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
| @@ -32,7 +32,7 @@ enum { | |||
| 32 | GPS = (1<<30) | 32 | GPS = (1<<30) |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | #define PAGE_TESTBIT __pgprot(_PAGE_UNUSED1) | 35 | #define PAGE_CPA_TEST __pgprot(_PAGE_CPA_TEST) |
| 36 | 36 | ||
| 37 | static int pte_testbit(pte_t pte) | 37 | static int pte_testbit(pte_t pte) |
| 38 | { | 38 | { |
| @@ -118,6 +118,7 @@ static int pageattr_test(void) | |||
| 118 | unsigned int level; | 118 | unsigned int level; |
| 119 | int i, k; | 119 | int i, k; |
| 120 | int err; | 120 | int err; |
| 121 | unsigned long test_addr; | ||
| 121 | 122 | ||
| 122 | if (print) | 123 | if (print) |
| 123 | printk(KERN_INFO "CPA self-test:\n"); | 124 | printk(KERN_INFO "CPA self-test:\n"); |
| @@ -172,7 +173,8 @@ static int pageattr_test(void) | |||
| 172 | continue; | 173 | continue; |
| 173 | } | 174 | } |
| 174 | 175 | ||
| 175 | err = change_page_attr_set(addr[i], len[i], PAGE_TESTBIT); | 176 | test_addr = addr[i]; |
| 177 | err = change_page_attr_set(&test_addr, len[i], PAGE_CPA_TEST, 0); | ||
| 176 | if (err < 0) { | 178 | if (err < 0) { |
| 177 | printk(KERN_ERR "CPA %d failed %d\n", i, err); | 179 | printk(KERN_ERR "CPA %d failed %d\n", i, err); |
| 178 | failed++; | 180 | failed++; |
| @@ -204,7 +206,8 @@ static int pageattr_test(void) | |||
| 204 | failed++; | 206 | failed++; |
| 205 | continue; | 207 | continue; |
| 206 | } | 208 | } |
| 207 | err = change_page_attr_clear(addr[i], len[i], PAGE_TESTBIT); | 209 | test_addr = addr[i]; |
| 210 | err = change_page_attr_clear(&test_addr, len[i], PAGE_CPA_TEST, 0); | ||
| 208 | if (err < 0) { | 211 | if (err < 0) { |
| 209 | printk(KERN_ERR "CPA reverting failed: %d\n", err); | 212 | printk(KERN_ERR "CPA reverting failed: %d\n", err); |
| 210 | failed++; | 213 | failed++; |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 898fad617abe..a9ec89c3fbca 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -25,15 +25,27 @@ | |||
| 25 | * The current flushing context - we pass it instead of 5 arguments: | 25 | * The current flushing context - we pass it instead of 5 arguments: |
| 26 | */ | 26 | */ |
| 27 | struct cpa_data { | 27 | struct cpa_data { |
| 28 | unsigned long vaddr; | 28 | unsigned long *vaddr; |
| 29 | pgprot_t mask_set; | 29 | pgprot_t mask_set; |
| 30 | pgprot_t mask_clr; | 30 | pgprot_t mask_clr; |
| 31 | int numpages; | 31 | int numpages; |
| 32 | int flushtlb; | 32 | int flags; |
| 33 | unsigned long pfn; | 33 | unsigned long pfn; |
| 34 | unsigned force_split : 1; | 34 | unsigned force_split : 1; |
| 35 | int curpage; | ||
| 35 | }; | 36 | }; |
| 36 | 37 | ||
| 38 | /* | ||
| 39 | * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) | ||
| 40 | * using cpa_lock. So that we don't allow any other cpu, with stale large tlb | ||
| 41 | * entries change the page attribute in parallel to some other cpu | ||
| 42 | * splitting a large page entry along with changing the attribute. | ||
| 43 | */ | ||
| 44 | static DEFINE_SPINLOCK(cpa_lock); | ||
| 45 | |||
| 46 | #define CPA_FLUSHTLB 1 | ||
| 47 | #define CPA_ARRAY 2 | ||
| 48 | |||
| 37 | #ifdef CONFIG_PROC_FS | 49 | #ifdef CONFIG_PROC_FS |
| 38 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; | 50 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; |
| 39 | 51 | ||
| @@ -190,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) | |||
| 190 | } | 202 | } |
| 191 | } | 203 | } |
| 192 | 204 | ||
| 205 | static void cpa_flush_array(unsigned long *start, int numpages, int cache) | ||
| 206 | { | ||
| 207 | unsigned int i, level; | ||
| 208 | unsigned long *addr; | ||
| 209 | |||
| 210 | BUG_ON(irqs_disabled()); | ||
| 211 | |||
| 212 | on_each_cpu(__cpa_flush_range, NULL, 1); | ||
| 213 | |||
| 214 | if (!cache) | ||
| 215 | return; | ||
| 216 | |||
| 217 | /* 4M threshold */ | ||
| 218 | if (numpages >= 1024) { | ||
| 219 | if (boot_cpu_data.x86_model >= 4) | ||
| 220 | wbinvd(); | ||
| 221 | return; | ||
| 222 | } | ||
| 223 | /* | ||
| 224 | * We only need to flush on one CPU, | ||
| 225 | * clflush is a MESI-coherent instruction that | ||
| 226 | * will cause all other CPUs to flush the same | ||
| 227 | * cachelines: | ||
| 228 | */ | ||
| 229 | for (i = 0, addr = start; i < numpages; i++, addr++) { | ||
| 230 | pte_t *pte = lookup_address(*addr, &level); | ||
| 231 | |||
| 232 | /* | ||
| 233 | * Only flush present addresses: | ||
| 234 | */ | ||
| 235 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | ||
| 236 | clflush_cache_range((void *) *addr, PAGE_SIZE); | ||
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 193 | /* | 240 | /* |
| 194 | * Certain areas of memory on x86 require very specific protection flags, | 241 | * Certain areas of memory on x86 require very specific protection flags, |
| 195 | * for example the BIOS area or kernel text. Callers don't always get this | 242 | * for example the BIOS area or kernel text. Callers don't always get this |
| @@ -398,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
| 398 | */ | 445 | */ |
| 399 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); | 446 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); |
| 400 | __set_pmd_pte(kpte, address, new_pte); | 447 | __set_pmd_pte(kpte, address, new_pte); |
| 401 | cpa->flushtlb = 1; | 448 | cpa->flags |= CPA_FLUSHTLB; |
| 402 | do_split = 0; | 449 | do_split = 0; |
| 403 | } | 450 | } |
| 404 | 451 | ||
| @@ -408,84 +455,6 @@ out_unlock: | |||
| 408 | return do_split; | 455 | return do_split; |
| 409 | } | 456 | } |
| 410 | 457 | ||
| 411 | static LIST_HEAD(page_pool); | ||
| 412 | static unsigned long pool_size, pool_pages, pool_low; | ||
| 413 | static unsigned long pool_used, pool_failed; | ||
| 414 | |||
| 415 | static void cpa_fill_pool(struct page **ret) | ||
| 416 | { | ||
| 417 | gfp_t gfp = GFP_KERNEL; | ||
| 418 | unsigned long flags; | ||
| 419 | struct page *p; | ||
| 420 | |||
| 421 | /* | ||
| 422 | * Avoid recursion (on debug-pagealloc) and also signal | ||
| 423 | * our priority to get to these pagetables: | ||
| 424 | */ | ||
| 425 | if (current->flags & PF_MEMALLOC) | ||
| 426 | return; | ||
| 427 | current->flags |= PF_MEMALLOC; | ||
| 428 | |||
| 429 | /* | ||
| 430 | * Allocate atomically from atomic contexts: | ||
| 431 | */ | ||
| 432 | if (in_atomic() || irqs_disabled() || debug_pagealloc) | ||
| 433 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | ||
| 434 | |||
| 435 | while (pool_pages < pool_size || (ret && !*ret)) { | ||
| 436 | p = alloc_pages(gfp, 0); | ||
| 437 | if (!p) { | ||
| 438 | pool_failed++; | ||
| 439 | break; | ||
| 440 | } | ||
| 441 | /* | ||
| 442 | * If the call site needs a page right now, provide it: | ||
| 443 | */ | ||
| 444 | if (ret && !*ret) { | ||
| 445 | *ret = p; | ||
| 446 | continue; | ||
| 447 | } | ||
| 448 | spin_lock_irqsave(&pgd_lock, flags); | ||
| 449 | list_add(&p->lru, &page_pool); | ||
| 450 | pool_pages++; | ||
| 451 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
| 452 | } | ||
| 453 | |||
| 454 | current->flags &= ~PF_MEMALLOC; | ||
| 455 | } | ||
| 456 | |||
| 457 | #define SHIFT_MB (20 - PAGE_SHIFT) | ||
| 458 | #define ROUND_MB_GB ((1 << 10) - 1) | ||
| 459 | #define SHIFT_MB_GB 10 | ||
| 460 | #define POOL_PAGES_PER_GB 16 | ||
| 461 | |||
| 462 | void __init cpa_init(void) | ||
| 463 | { | ||
| 464 | struct sysinfo si; | ||
| 465 | unsigned long gb; | ||
| 466 | |||
| 467 | si_meminfo(&si); | ||
| 468 | /* | ||
| 469 | * Calculate the number of pool pages: | ||
| 470 | * | ||
| 471 | * Convert totalram (nr of pages) to MiB and round to the next | ||
| 472 | * GiB. Shift MiB to Gib and multiply the result by | ||
| 473 | * POOL_PAGES_PER_GB: | ||
| 474 | */ | ||
| 475 | if (debug_pagealloc) { | ||
| 476 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | ||
| 477 | pool_size = POOL_PAGES_PER_GB * gb; | ||
| 478 | } else { | ||
| 479 | pool_size = 1; | ||
| 480 | } | ||
| 481 | pool_low = pool_size; | ||
| 482 | |||
| 483 | cpa_fill_pool(NULL); | ||
| 484 | printk(KERN_DEBUG | ||
| 485 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | ||
| 486 | pool_pages, pool_size); | ||
| 487 | } | ||
| 488 | |||
| 489 | static int split_large_page(pte_t *kpte, unsigned long address) | 458 | static int split_large_page(pte_t *kpte, unsigned long address) |
| 490 | { | 459 | { |
| 491 | unsigned long flags, pfn, pfninc = 1; | 460 | unsigned long flags, pfn, pfninc = 1; |
| @@ -494,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
| 494 | pgprot_t ref_prot; | 463 | pgprot_t ref_prot; |
| 495 | struct page *base; | 464 | struct page *base; |
| 496 | 465 | ||
| 497 | /* | 466 | if (!debug_pagealloc) |
| 498 | * Get a page from the pool. The pool list is protected by the | 467 | spin_unlock(&cpa_lock); |
| 499 | * pgd_lock, which we have to take anyway for the split | 468 | base = alloc_pages(GFP_KERNEL, 0); |
| 500 | * operation: | 469 | if (!debug_pagealloc) |
| 501 | */ | 470 | spin_lock(&cpa_lock); |
| 502 | spin_lock_irqsave(&pgd_lock, flags); | 471 | if (!base) |
| 503 | if (list_empty(&page_pool)) { | 472 | return -ENOMEM; |
| 504 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
| 505 | base = NULL; | ||
| 506 | cpa_fill_pool(&base); | ||
| 507 | if (!base) | ||
| 508 | return -ENOMEM; | ||
| 509 | spin_lock_irqsave(&pgd_lock, flags); | ||
| 510 | } else { | ||
| 511 | base = list_first_entry(&page_pool, struct page, lru); | ||
| 512 | list_del(&base->lru); | ||
| 513 | pool_pages--; | ||
| 514 | |||
| 515 | if (pool_pages < pool_low) | ||
| 516 | pool_low = pool_pages; | ||
| 517 | } | ||
| 518 | 473 | ||
| 474 | spin_lock_irqsave(&pgd_lock, flags); | ||
| 519 | /* | 475 | /* |
| 520 | * Check for races, another CPU might have split this page | 476 | * Check for races, another CPU might have split this page |
| 521 | * up for us already: | 477 | * up for us already: |
| @@ -572,11 +528,8 @@ out_unlock: | |||
| 572 | * If we dropped out via the lookup_address check under | 528 | * If we dropped out via the lookup_address check under |
| 573 | * pgd_lock then stick the page back into the pool: | 529 | * pgd_lock then stick the page back into the pool: |
| 574 | */ | 530 | */ |
| 575 | if (base) { | 531 | if (base) |
| 576 | list_add(&base->lru, &page_pool); | 532 | __free_page(base); |
| 577 | pool_pages++; | ||
| 578 | } else | ||
| 579 | pool_used++; | ||
| 580 | spin_unlock_irqrestore(&pgd_lock, flags); | 533 | spin_unlock_irqrestore(&pgd_lock, flags); |
| 581 | 534 | ||
| 582 | return 0; | 535 | return 0; |
| @@ -584,11 +537,16 @@ out_unlock: | |||
| 584 | 537 | ||
| 585 | static int __change_page_attr(struct cpa_data *cpa, int primary) | 538 | static int __change_page_attr(struct cpa_data *cpa, int primary) |
| 586 | { | 539 | { |
| 587 | unsigned long address = cpa->vaddr; | 540 | unsigned long address; |
| 588 | int do_split, err; | 541 | int do_split, err; |
| 589 | unsigned int level; | 542 | unsigned int level; |
| 590 | pte_t *kpte, old_pte; | 543 | pte_t *kpte, old_pte; |
| 591 | 544 | ||
| 545 | if (cpa->flags & CPA_ARRAY) | ||
| 546 | address = cpa->vaddr[cpa->curpage]; | ||
| 547 | else | ||
| 548 | address = *cpa->vaddr; | ||
| 549 | |||
| 592 | repeat: | 550 | repeat: |
| 593 | kpte = lookup_address(address, &level); | 551 | kpte = lookup_address(address, &level); |
| 594 | if (!kpte) | 552 | if (!kpte) |
| @@ -600,7 +558,7 @@ repeat: | |||
| 600 | return 0; | 558 | return 0; |
| 601 | WARN(1, KERN_WARNING "CPA: called for zero pte. " | 559 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
| 602 | "vaddr = %lx cpa->vaddr = %lx\n", address, | 560 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
| 603 | cpa->vaddr); | 561 | *cpa->vaddr); |
| 604 | return -EINVAL; | 562 | return -EINVAL; |
| 605 | } | 563 | } |
| 606 | 564 | ||
| @@ -626,7 +584,7 @@ repeat: | |||
| 626 | */ | 584 | */ |
| 627 | if (pte_val(old_pte) != pte_val(new_pte)) { | 585 | if (pte_val(old_pte) != pte_val(new_pte)) { |
| 628 | set_pte_atomic(kpte, new_pte); | 586 | set_pte_atomic(kpte, new_pte); |
| 629 | cpa->flushtlb = 1; | 587 | cpa->flags |= CPA_FLUSHTLB; |
| 630 | } | 588 | } |
| 631 | cpa->numpages = 1; | 589 | cpa->numpages = 1; |
| 632 | return 0; | 590 | return 0; |
| @@ -650,7 +608,25 @@ repeat: | |||
| 650 | */ | 608 | */ |
| 651 | err = split_large_page(kpte, address); | 609 | err = split_large_page(kpte, address); |
| 652 | if (!err) { | 610 | if (!err) { |
| 653 | cpa->flushtlb = 1; | 611 | /* |
| 612 | * Do a global flush tlb after splitting the large page | ||
| 613 | * and before we do the actual change page attribute in the PTE. | ||
| 614 | * | ||
| 615 | * With out this, we violate the TLB application note, that says | ||
| 616 | * "The TLBs may contain both ordinary and large-page | ||
| 617 | * translations for a 4-KByte range of linear addresses. This | ||
| 618 | * may occur if software modifies the paging structures so that | ||
| 619 | * the page size used for the address range changes. If the two | ||
| 620 | * translations differ with respect to page frame or attributes | ||
| 621 | * (e.g., permissions), processor behavior is undefined and may | ||
| 622 | * be implementation-specific." | ||
| 623 | * | ||
| 624 | * We do this global tlb flush inside the cpa_lock, so that we | ||
| 625 | * don't allow any other cpu, with stale tlb entries change the | ||
| 626 | * page attribute in parallel, that also falls into the | ||
| 627 | * just split large page entry. | ||
| 628 | */ | ||
| 629 | flush_tlb_all(); | ||
| 654 | goto repeat; | 630 | goto repeat; |
| 655 | } | 631 | } |
| 656 | 632 | ||
| @@ -663,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 663 | { | 639 | { |
| 664 | struct cpa_data alias_cpa; | 640 | struct cpa_data alias_cpa; |
| 665 | int ret = 0; | 641 | int ret = 0; |
| 642 | unsigned long temp_cpa_vaddr, vaddr; | ||
| 666 | 643 | ||
| 667 | if (cpa->pfn >= max_pfn_mapped) | 644 | if (cpa->pfn >= max_pfn_mapped) |
| 668 | return 0; | 645 | return 0; |
| @@ -675,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 675 | * No need to redo, when the primary call touched the direct | 652 | * No need to redo, when the primary call touched the direct |
| 676 | * mapping already: | 653 | * mapping already: |
| 677 | */ | 654 | */ |
| 678 | if (!(within(cpa->vaddr, PAGE_OFFSET, | 655 | if (cpa->flags & CPA_ARRAY) |
| 656 | vaddr = cpa->vaddr[cpa->curpage]; | ||
| 657 | else | ||
| 658 | vaddr = *cpa->vaddr; | ||
| 659 | |||
| 660 | if (!(within(vaddr, PAGE_OFFSET, | ||
| 679 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) | 661 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) |
| 680 | #ifdef CONFIG_X86_64 | 662 | #ifdef CONFIG_X86_64 |
| 681 | || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), | 663 | || within(vaddr, PAGE_OFFSET + (1UL<<32), |
| 682 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) | 664 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) |
| 683 | #endif | 665 | #endif |
| 684 | )) { | 666 | )) { |
| 685 | 667 | ||
| 686 | alias_cpa = *cpa; | 668 | alias_cpa = *cpa; |
| 687 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 669 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); |
| 670 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
| 671 | alias_cpa.flags &= ~CPA_ARRAY; | ||
| 672 | |||
| 688 | 673 | ||
| 689 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 674 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
| 690 | } | 675 | } |
| @@ -696,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 696 | * No need to redo, when the primary call touched the high | 681 | * No need to redo, when the primary call touched the high |
| 697 | * mapping already: | 682 | * mapping already: |
| 698 | */ | 683 | */ |
| 699 | if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) | 684 | if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) |
| 700 | return 0; | 685 | return 0; |
| 701 | 686 | ||
| 702 | /* | 687 | /* |
| @@ -707,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
| 707 | return 0; | 692 | return 0; |
| 708 | 693 | ||
| 709 | alias_cpa = *cpa; | 694 | alias_cpa = *cpa; |
| 710 | alias_cpa.vaddr = | 695 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; |
| 711 | (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 696 | alias_cpa.vaddr = &temp_cpa_vaddr; |
| 697 | alias_cpa.flags &= ~CPA_ARRAY; | ||
| 712 | 698 | ||
| 713 | /* | 699 | /* |
| 714 | * The high mapping range is imprecise, so ignore the return value. | 700 | * The high mapping range is imprecise, so ignore the return value. |
| @@ -728,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
| 728 | * preservation check. | 714 | * preservation check. |
| 729 | */ | 715 | */ |
| 730 | cpa->numpages = numpages; | 716 | cpa->numpages = numpages; |
| 717 | /* for array changes, we can't use large page */ | ||
| 718 | if (cpa->flags & CPA_ARRAY) | ||
| 719 | cpa->numpages = 1; | ||
| 731 | 720 | ||
| 721 | if (!debug_pagealloc) | ||
| 722 | spin_lock(&cpa_lock); | ||
| 732 | ret = __change_page_attr(cpa, checkalias); | 723 | ret = __change_page_attr(cpa, checkalias); |
| 724 | if (!debug_pagealloc) | ||
| 725 | spin_unlock(&cpa_lock); | ||
| 733 | if (ret) | 726 | if (ret) |
| 734 | return ret; | 727 | return ret; |
| 735 | 728 | ||
| @@ -746,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
| 746 | */ | 739 | */ |
| 747 | BUG_ON(cpa->numpages > numpages); | 740 | BUG_ON(cpa->numpages > numpages); |
| 748 | numpages -= cpa->numpages; | 741 | numpages -= cpa->numpages; |
| 749 | cpa->vaddr += cpa->numpages * PAGE_SIZE; | 742 | if (cpa->flags & CPA_ARRAY) |
| 743 | cpa->curpage++; | ||
| 744 | else | ||
| 745 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; | ||
| 746 | |||
| 750 | } | 747 | } |
| 751 | return 0; | 748 | return 0; |
| 752 | } | 749 | } |
| @@ -757,9 +754,9 @@ static inline int cache_attr(pgprot_t attr) | |||
| 757 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); | 754 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); |
| 758 | } | 755 | } |
| 759 | 756 | ||
| 760 | static int change_page_attr_set_clr(unsigned long addr, int numpages, | 757 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, |
| 761 | pgprot_t mask_set, pgprot_t mask_clr, | 758 | pgprot_t mask_set, pgprot_t mask_clr, |
| 762 | int force_split) | 759 | int force_split, int array) |
| 763 | { | 760 | { |
| 764 | struct cpa_data cpa; | 761 | struct cpa_data cpa; |
| 765 | int ret, cache, checkalias; | 762 | int ret, cache, checkalias; |
| @@ -774,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
| 774 | return 0; | 771 | return 0; |
| 775 | 772 | ||
| 776 | /* Ensure we are PAGE_SIZE aligned */ | 773 | /* Ensure we are PAGE_SIZE aligned */ |
| 777 | if (addr & ~PAGE_MASK) { | 774 | if (!array) { |
| 778 | addr &= PAGE_MASK; | 775 | if (*addr & ~PAGE_MASK) { |
| 779 | /* | 776 | *addr &= PAGE_MASK; |
| 780 | * People should not be passing in unaligned addresses: | 777 | /* |
| 781 | */ | 778 | * People should not be passing in unaligned addresses: |
| 782 | WARN_ON_ONCE(1); | 779 | */ |
| 780 | WARN_ON_ONCE(1); | ||
| 781 | } | ||
| 782 | } else { | ||
| 783 | int i; | ||
| 784 | for (i = 0; i < numpages; i++) { | ||
| 785 | if (addr[i] & ~PAGE_MASK) { | ||
| 786 | addr[i] &= PAGE_MASK; | ||
| 787 | WARN_ON_ONCE(1); | ||
| 788 | } | ||
| 789 | } | ||
| 783 | } | 790 | } |
| 784 | 791 | ||
| 792 | /* Must avoid aliasing mappings in the highmem code */ | ||
| 793 | kmap_flush_unused(); | ||
| 794 | |||
| 785 | cpa.vaddr = addr; | 795 | cpa.vaddr = addr; |
| 786 | cpa.numpages = numpages; | 796 | cpa.numpages = numpages; |
| 787 | cpa.mask_set = mask_set; | 797 | cpa.mask_set = mask_set; |
| 788 | cpa.mask_clr = mask_clr; | 798 | cpa.mask_clr = mask_clr; |
| 789 | cpa.flushtlb = 0; | 799 | cpa.flags = 0; |
| 800 | cpa.curpage = 0; | ||
| 790 | cpa.force_split = force_split; | 801 | cpa.force_split = force_split; |
| 791 | 802 | ||
| 803 | if (array) | ||
| 804 | cpa.flags |= CPA_ARRAY; | ||
| 805 | |||
| 792 | /* No alias checking for _NX bit modifications */ | 806 | /* No alias checking for _NX bit modifications */ |
| 793 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; | 807 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; |
| 794 | 808 | ||
| @@ -797,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
| 797 | /* | 811 | /* |
| 798 | * Check whether we really changed something: | 812 | * Check whether we really changed something: |
| 799 | */ | 813 | */ |
| 800 | if (!cpa.flushtlb) | 814 | if (!(cpa.flags & CPA_FLUSHTLB)) |
| 801 | goto out; | 815 | goto out; |
| 802 | 816 | ||
| 803 | /* | 817 | /* |
| @@ -812,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
| 812 | * error case we fall back to cpa_flush_all (which uses | 826 | * error case we fall back to cpa_flush_all (which uses |
| 813 | * wbindv): | 827 | * wbindv): |
| 814 | */ | 828 | */ |
| 815 | if (!ret && cpu_has_clflush) | 829 | if (!ret && cpu_has_clflush) { |
| 816 | cpa_flush_range(addr, numpages, cache); | 830 | if (cpa.flags & CPA_ARRAY) |
| 817 | else | 831 | cpa_flush_array(addr, numpages, cache); |
| 832 | else | ||
| 833 | cpa_flush_range(*addr, numpages, cache); | ||
| 834 | } else | ||
| 818 | cpa_flush_all(cache); | 835 | cpa_flush_all(cache); |
| 819 | 836 | ||
| 820 | out: | 837 | out: |
| 821 | cpa_fill_pool(NULL); | ||
| 822 | |||
| 823 | return ret; | 838 | return ret; |
| 824 | } | 839 | } |
| 825 | 840 | ||
| 826 | static inline int change_page_attr_set(unsigned long addr, int numpages, | 841 | static inline int change_page_attr_set(unsigned long *addr, int numpages, |
| 827 | pgprot_t mask) | 842 | pgprot_t mask, int array) |
| 828 | { | 843 | { |
| 829 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); | 844 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, |
| 845 | array); | ||
| 830 | } | 846 | } |
| 831 | 847 | ||
| 832 | static inline int change_page_attr_clear(unsigned long addr, int numpages, | 848 | static inline int change_page_attr_clear(unsigned long *addr, int numpages, |
| 833 | pgprot_t mask) | 849 | pgprot_t mask, int array) |
| 834 | { | 850 | { |
| 835 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); | 851 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, |
| 852 | array); | ||
| 836 | } | 853 | } |
| 837 | 854 | ||
| 838 | int _set_memory_uc(unsigned long addr, int numpages) | 855 | int _set_memory_uc(unsigned long addr, int numpages) |
| @@ -840,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages) | |||
| 840 | /* | 857 | /* |
| 841 | * for now UC MINUS. see comments in ioremap_nocache() | 858 | * for now UC MINUS. see comments in ioremap_nocache() |
| 842 | */ | 859 | */ |
| 843 | return change_page_attr_set(addr, numpages, | 860 | return change_page_attr_set(&addr, numpages, |
| 844 | __pgprot(_PAGE_CACHE_UC_MINUS)); | 861 | __pgprot(_PAGE_CACHE_UC_MINUS), 0); |
| 845 | } | 862 | } |
| 846 | 863 | ||
| 847 | int set_memory_uc(unsigned long addr, int numpages) | 864 | int set_memory_uc(unsigned long addr, int numpages) |
| @@ -857,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
| 857 | } | 874 | } |
| 858 | EXPORT_SYMBOL(set_memory_uc); | 875 | EXPORT_SYMBOL(set_memory_uc); |
| 859 | 876 | ||
| 877 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | ||
| 878 | { | ||
| 879 | unsigned long start; | ||
| 880 | unsigned long end; | ||
| 881 | int i; | ||
| 882 | /* | ||
| 883 | * for now UC MINUS. see comments in ioremap_nocache() | ||
| 884 | */ | ||
| 885 | for (i = 0; i < addrinarray; i++) { | ||
| 886 | start = __pa(addr[i]); | ||
| 887 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
| 888 | if (end != __pa(addr[i + 1])) | ||
| 889 | break; | ||
| 890 | i++; | ||
| 891 | } | ||
| 892 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | ||
| 893 | goto out; | ||
| 894 | } | ||
| 895 | |||
| 896 | return change_page_attr_set(addr, addrinarray, | ||
| 897 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); | ||
| 898 | out: | ||
| 899 | for (i = 0; i < addrinarray; i++) { | ||
| 900 | unsigned long tmp = __pa(addr[i]); | ||
| 901 | |||
| 902 | if (tmp == start) | ||
| 903 | break; | ||
| 904 | for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
| 905 | if (end != __pa(addr[i + 1])) | ||
| 906 | break; | ||
| 907 | i++; | ||
| 908 | } | ||
| 909 | free_memtype(tmp, end); | ||
| 910 | } | ||
| 911 | return -EINVAL; | ||
| 912 | } | ||
| 913 | EXPORT_SYMBOL(set_memory_array_uc); | ||
| 914 | |||
| 860 | int _set_memory_wc(unsigned long addr, int numpages) | 915 | int _set_memory_wc(unsigned long addr, int numpages) |
| 861 | { | 916 | { |
| 862 | return change_page_attr_set(addr, numpages, | 917 | return change_page_attr_set(&addr, numpages, |
| 863 | __pgprot(_PAGE_CACHE_WC)); | 918 | __pgprot(_PAGE_CACHE_WC), 0); |
| 864 | } | 919 | } |
| 865 | 920 | ||
| 866 | int set_memory_wc(unsigned long addr, int numpages) | 921 | int set_memory_wc(unsigned long addr, int numpages) |
| @@ -878,8 +933,8 @@ EXPORT_SYMBOL(set_memory_wc); | |||
| 878 | 933 | ||
| 879 | int _set_memory_wb(unsigned long addr, int numpages) | 934 | int _set_memory_wb(unsigned long addr, int numpages) |
| 880 | { | 935 | { |
| 881 | return change_page_attr_clear(addr, numpages, | 936 | return change_page_attr_clear(&addr, numpages, |
| 882 | __pgprot(_PAGE_CACHE_MASK)); | 937 | __pgprot(_PAGE_CACHE_MASK), 0); |
| 883 | } | 938 | } |
| 884 | 939 | ||
| 885 | int set_memory_wb(unsigned long addr, int numpages) | 940 | int set_memory_wb(unsigned long addr, int numpages) |
| @@ -890,39 +945,59 @@ int set_memory_wb(unsigned long addr, int numpages) | |||
| 890 | } | 945 | } |
| 891 | EXPORT_SYMBOL(set_memory_wb); | 946 | EXPORT_SYMBOL(set_memory_wb); |
| 892 | 947 | ||
| 948 | int set_memory_array_wb(unsigned long *addr, int addrinarray) | ||
| 949 | { | ||
| 950 | int i; | ||
| 951 | |||
| 952 | for (i = 0; i < addrinarray; i++) { | ||
| 953 | unsigned long start = __pa(addr[i]); | ||
| 954 | unsigned long end; | ||
| 955 | |||
| 956 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
| 957 | if (end != __pa(addr[i + 1])) | ||
| 958 | break; | ||
| 959 | i++; | ||
| 960 | } | ||
| 961 | free_memtype(start, end); | ||
| 962 | } | ||
| 963 | return change_page_attr_clear(addr, addrinarray, | ||
| 964 | __pgprot(_PAGE_CACHE_MASK), 1); | ||
| 965 | } | ||
| 966 | EXPORT_SYMBOL(set_memory_array_wb); | ||
| 967 | |||
| 893 | int set_memory_x(unsigned long addr, int numpages) | 968 | int set_memory_x(unsigned long addr, int numpages) |
| 894 | { | 969 | { |
| 895 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); | 970 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
| 896 | } | 971 | } |
| 897 | EXPORT_SYMBOL(set_memory_x); | 972 | EXPORT_SYMBOL(set_memory_x); |
| 898 | 973 | ||
| 899 | int set_memory_nx(unsigned long addr, int numpages) | 974 | int set_memory_nx(unsigned long addr, int numpages) |
| 900 | { | 975 | { |
| 901 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); | 976 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
| 902 | } | 977 | } |
| 903 | EXPORT_SYMBOL(set_memory_nx); | 978 | EXPORT_SYMBOL(set_memory_nx); |
| 904 | 979 | ||
| 905 | int set_memory_ro(unsigned long addr, int numpages) | 980 | int set_memory_ro(unsigned long addr, int numpages) |
| 906 | { | 981 | { |
| 907 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); | 982 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); |
| 908 | } | 983 | } |
| 909 | EXPORT_SYMBOL_GPL(set_memory_ro); | 984 | EXPORT_SYMBOL_GPL(set_memory_ro); |
| 910 | 985 | ||
| 911 | int set_memory_rw(unsigned long addr, int numpages) | 986 | int set_memory_rw(unsigned long addr, int numpages) |
| 912 | { | 987 | { |
| 913 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); | 988 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); |
| 914 | } | 989 | } |
| 915 | EXPORT_SYMBOL_GPL(set_memory_rw); | 990 | EXPORT_SYMBOL_GPL(set_memory_rw); |
| 916 | 991 | ||
| 917 | int set_memory_np(unsigned long addr, int numpages) | 992 | int set_memory_np(unsigned long addr, int numpages) |
| 918 | { | 993 | { |
| 919 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); | 994 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); |
| 920 | } | 995 | } |
| 921 | 996 | ||
| 922 | int set_memory_4k(unsigned long addr, int numpages) | 997 | int set_memory_4k(unsigned long addr, int numpages) |
| 923 | { | 998 | { |
| 924 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), | 999 | return change_page_attr_set_clr(&addr, numpages, __pgprot(0), |
| 925 | __pgprot(0), 1); | 1000 | __pgprot(0), 1, 0); |
| 926 | } | 1001 | } |
| 927 | 1002 | ||
| 928 | int set_pages_uc(struct page *page, int numpages) | 1003 | int set_pages_uc(struct page *page, int numpages) |
| @@ -975,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages) | |||
| 975 | 1050 | ||
| 976 | static int __set_pages_p(struct page *page, int numpages) | 1051 | static int __set_pages_p(struct page *page, int numpages) |
| 977 | { | 1052 | { |
| 978 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1053 | unsigned long tempaddr = (unsigned long) page_address(page); |
| 1054 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
| 979 | .numpages = numpages, | 1055 | .numpages = numpages, |
| 980 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1056 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
| 981 | .mask_clr = __pgprot(0)}; | 1057 | .mask_clr = __pgprot(0), |
| 1058 | .flags = 0}; | ||
| 982 | 1059 | ||
| 983 | return __change_page_attr_set_clr(&cpa, 1); | 1060 | /* |
| 1061 | * No alias checking needed for setting present flag. otherwise, | ||
| 1062 | * we may need to break large pages for 64-bit kernel text | ||
| 1063 | * mappings (this adds to complexity if we want to do this from | ||
| 1064 | * atomic context especially). Let's keep it simple! | ||
| 1065 | */ | ||
| 1066 | return __change_page_attr_set_clr(&cpa, 0); | ||
| 984 | } | 1067 | } |
| 985 | 1068 | ||
| 986 | static int __set_pages_np(struct page *page, int numpages) | 1069 | static int __set_pages_np(struct page *page, int numpages) |
| 987 | { | 1070 | { |
| 988 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1071 | unsigned long tempaddr = (unsigned long) page_address(page); |
| 1072 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
| 989 | .numpages = numpages, | 1073 | .numpages = numpages, |
| 990 | .mask_set = __pgprot(0), | 1074 | .mask_set = __pgprot(0), |
| 991 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; | 1075 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
| 1076 | .flags = 0}; | ||
| 992 | 1077 | ||
| 993 | return __change_page_attr_set_clr(&cpa, 1); | 1078 | /* |
| 1079 | * No alias checking needed for setting not present flag. otherwise, | ||
| 1080 | * we may need to break large pages for 64-bit kernel text | ||
| 1081 | * mappings (this adds to complexity if we want to do this from | ||
| 1082 | * atomic context especially). Let's keep it simple! | ||
| 1083 | */ | ||
| 1084 | return __change_page_attr_set_clr(&cpa, 0); | ||
| 994 | } | 1085 | } |
| 995 | 1086 | ||
| 996 | void kernel_map_pages(struct page *page, int numpages, int enable) | 1087 | void kernel_map_pages(struct page *page, int numpages, int enable) |
| @@ -1010,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 1010 | 1101 | ||
| 1011 | /* | 1102 | /* |
| 1012 | * The return value is ignored as the calls cannot fail. | 1103 | * The return value is ignored as the calls cannot fail. |
| 1013 | * Large pages are kept enabled at boot time, and are | 1104 | * Large pages for identity mappings are not used at boot time |
| 1014 | * split up quickly with DEBUG_PAGEALLOC. If a splitup | 1105 | * and hence no memory allocations during large page split. |
| 1015 | * fails here (due to temporary memory shortage) no damage | ||
| 1016 | * is done because we just keep the largepage intact up | ||
| 1017 | * to the next attempt when it will likely be split up: | ||
| 1018 | */ | 1106 | */ |
| 1019 | if (enable) | 1107 | if (enable) |
| 1020 | __set_pages_p(page, numpages); | 1108 | __set_pages_p(page, numpages); |
| @@ -1026,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 1026 | * but that can deadlock->flush only current cpu: | 1114 | * but that can deadlock->flush only current cpu: |
| 1027 | */ | 1115 | */ |
| 1028 | __flush_tlb_all(); | 1116 | __flush_tlb_all(); |
| 1029 | |||
| 1030 | /* | ||
| 1031 | * Try to refill the page pool here. We can do this only after | ||
| 1032 | * the tlb flush. | ||
| 1033 | */ | ||
| 1034 | cpa_fill_pool(NULL); | ||
| 1035 | } | 1117 | } |
| 1036 | 1118 | ||
| 1037 | #ifdef CONFIG_DEBUG_FS | ||
| 1038 | static int dpa_show(struct seq_file *m, void *v) | ||
| 1039 | { | ||
| 1040 | seq_puts(m, "DEBUG_PAGEALLOC\n"); | ||
| 1041 | seq_printf(m, "pool_size : %lu\n", pool_size); | ||
| 1042 | seq_printf(m, "pool_pages : %lu\n", pool_pages); | ||
| 1043 | seq_printf(m, "pool_low : %lu\n", pool_low); | ||
| 1044 | seq_printf(m, "pool_used : %lu\n", pool_used); | ||
| 1045 | seq_printf(m, "pool_failed : %lu\n", pool_failed); | ||
| 1046 | |||
| 1047 | return 0; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | static int dpa_open(struct inode *inode, struct file *filp) | ||
| 1051 | { | ||
| 1052 | return single_open(filp, dpa_show, NULL); | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | static const struct file_operations dpa_fops = { | ||
| 1056 | .open = dpa_open, | ||
| 1057 | .read = seq_read, | ||
| 1058 | .llseek = seq_lseek, | ||
| 1059 | .release = single_release, | ||
| 1060 | }; | ||
| 1061 | |||
| 1062 | static int __init debug_pagealloc_proc_init(void) | ||
| 1063 | { | ||
| 1064 | struct dentry *de; | ||
| 1065 | |||
| 1066 | de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL, | ||
| 1067 | &dpa_fops); | ||
| 1068 | if (!de) | ||
| 1069 | return -ENOMEM; | ||
| 1070 | |||
| 1071 | return 0; | ||
| 1072 | } | ||
| 1073 | __initcall(debug_pagealloc_proc_init); | ||
| 1074 | #endif | ||
| 1075 | |||
| 1076 | #ifdef CONFIG_HIBERNATION | 1119 | #ifdef CONFIG_HIBERNATION |
| 1077 | 1120 | ||
| 1078 | bool kernel_page_present(struct page *page) | 1121 | bool kernel_page_present(struct page *page) |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2a50e0fa64a5..738fd0f24958 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
| @@ -7,24 +7,24 @@ | |||
| 7 | * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. | 7 | * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include <linux/mm.h> | 10 | #include <linux/seq_file.h> |
| 11 | #include <linux/bootmem.h> | ||
| 12 | #include <linux/debugfs.h> | ||
| 11 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
| 12 | #include <linux/gfp.h> | 14 | #include <linux/gfp.h> |
| 15 | #include <linux/mm.h> | ||
| 13 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
| 14 | #include <linux/bootmem.h> | ||
| 15 | #include <linux/debugfs.h> | ||
| 16 | #include <linux/seq_file.h> | ||
| 17 | 17 | ||
| 18 | #include <asm/msr.h> | 18 | #include <asm/cacheflush.h> |
| 19 | #include <asm/tlbflush.h> | ||
| 20 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
| 21 | #include <asm/page.h> | 20 | #include <asm/tlbflush.h> |
| 22 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
| 23 | #include <asm/pat.h> | ||
| 24 | #include <asm/e820.h> | ||
| 25 | #include <asm/cacheflush.h> | ||
| 26 | #include <asm/fcntl.h> | 22 | #include <asm/fcntl.h> |
| 23 | #include <asm/e820.h> | ||
| 27 | #include <asm/mtrr.h> | 24 | #include <asm/mtrr.h> |
| 25 | #include <asm/page.h> | ||
| 26 | #include <asm/msr.h> | ||
| 27 | #include <asm/pat.h> | ||
| 28 | #include <asm/io.h> | 28 | #include <asm/io.h> |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_X86_PAT | 30 | #ifdef CONFIG_X86_PAT |
| @@ -46,6 +46,7 @@ early_param("nopat", nopat); | |||
| 46 | 46 | ||
| 47 | 47 | ||
| 48 | static int debug_enable; | 48 | static int debug_enable; |
| 49 | |||
| 49 | static int __init pat_debug_setup(char *str) | 50 | static int __init pat_debug_setup(char *str) |
| 50 | { | 51 | { |
| 51 | debug_enable = 1; | 52 | debug_enable = 1; |
| @@ -145,14 +146,14 @@ static char *cattr_name(unsigned long flags) | |||
| 145 | */ | 146 | */ |
| 146 | 147 | ||
| 147 | struct memtype { | 148 | struct memtype { |
| 148 | u64 start; | 149 | u64 start; |
| 149 | u64 end; | 150 | u64 end; |
| 150 | unsigned long type; | 151 | unsigned long type; |
| 151 | struct list_head nd; | 152 | struct list_head nd; |
| 152 | }; | 153 | }; |
| 153 | 154 | ||
| 154 | static LIST_HEAD(memtype_list); | 155 | static LIST_HEAD(memtype_list); |
| 155 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | 156 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ |
| 156 | 157 | ||
| 157 | /* | 158 | /* |
| 158 | * Does intersection of PAT memory type and MTRR memory type and returns | 159 | * Does intersection of PAT memory type and MTRR memory type and returns |
| @@ -180,8 +181,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
| 180 | return req_type; | 181 | return req_type; |
| 181 | } | 182 | } |
| 182 | 183 | ||
| 183 | static int chk_conflict(struct memtype *new, struct memtype *entry, | 184 | static int |
| 184 | unsigned long *type) | 185 | chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) |
| 185 | { | 186 | { |
| 186 | if (new->type != entry->type) { | 187 | if (new->type != entry->type) { |
| 187 | if (type) { | 188 | if (type) { |
| @@ -211,6 +212,66 @@ static struct memtype *cached_entry; | |||
| 211 | static u64 cached_start; | 212 | static u64 cached_start; |
| 212 | 213 | ||
| 213 | /* | 214 | /* |
| 215 | * For RAM pages, mark the pages as non WB memory type using | ||
| 216 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | ||
| 217 | * set_memory_wc() on a RAM page at a time before marking it as WB again. | ||
| 218 | * This is ok, because only one driver will be owning the page and | ||
| 219 | * doing set_memory_*() calls. | ||
| 220 | * | ||
| 221 | * For now, we use PageNonWB to track that the RAM page is being mapped | ||
| 222 | * as non WB. In future, we will have to use one more flag | ||
| 223 | * (or some other mechanism in page_struct) to distinguish between | ||
| 224 | * UC and WC mapping. | ||
| 225 | */ | ||
| 226 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | ||
| 227 | unsigned long *new_type) | ||
| 228 | { | ||
| 229 | struct page *page; | ||
| 230 | u64 pfn, end_pfn; | ||
| 231 | |||
| 232 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
| 233 | page = pfn_to_page(pfn); | ||
| 234 | if (page_mapped(page) || PageNonWB(page)) | ||
| 235 | goto out; | ||
| 236 | |||
| 237 | SetPageNonWB(page); | ||
| 238 | } | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | out: | ||
| 242 | end_pfn = pfn; | ||
| 243 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
| 244 | page = pfn_to_page(pfn); | ||
| 245 | ClearPageNonWB(page); | ||
| 246 | } | ||
| 247 | |||
| 248 | return -EINVAL; | ||
| 249 | } | ||
| 250 | |||
| 251 | static int free_ram_pages_type(u64 start, u64 end) | ||
| 252 | { | ||
| 253 | struct page *page; | ||
| 254 | u64 pfn, end_pfn; | ||
| 255 | |||
| 256 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
| 257 | page = pfn_to_page(pfn); | ||
| 258 | if (page_mapped(page) || !PageNonWB(page)) | ||
| 259 | goto out; | ||
| 260 | |||
| 261 | ClearPageNonWB(page); | ||
| 262 | } | ||
| 263 | return 0; | ||
| 264 | |||
| 265 | out: | ||
| 266 | end_pfn = pfn; | ||
| 267 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
| 268 | page = pfn_to_page(pfn); | ||
| 269 | SetPageNonWB(page); | ||
| 270 | } | ||
| 271 | return -EINVAL; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 214 | * req_type typically has one of the: | 275 | * req_type typically has one of the: |
| 215 | * - _PAGE_CACHE_WB | 276 | * - _PAGE_CACHE_WB |
| 216 | * - _PAGE_CACHE_WC | 277 | * - _PAGE_CACHE_WC |
| @@ -226,14 +287,15 @@ static u64 cached_start; | |||
| 226 | * it will return a negative return value. | 287 | * it will return a negative return value. |
| 227 | */ | 288 | */ |
| 228 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 289 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
| 229 | unsigned long *new_type) | 290 | unsigned long *new_type) |
| 230 | { | 291 | { |
| 231 | struct memtype *new, *entry; | 292 | struct memtype *new, *entry; |
| 232 | unsigned long actual_type; | 293 | unsigned long actual_type; |
| 233 | struct list_head *where; | 294 | struct list_head *where; |
| 295 | int is_range_ram; | ||
| 234 | int err = 0; | 296 | int err = 0; |
| 235 | 297 | ||
| 236 | BUG_ON(start >= end); /* end is exclusive */ | 298 | BUG_ON(start >= end); /* end is exclusive */ |
| 237 | 299 | ||
| 238 | if (!pat_enabled) { | 300 | if (!pat_enabled) { |
| 239 | /* This is identical to page table setting without PAT */ | 301 | /* This is identical to page table setting without PAT */ |
| @@ -266,17 +328,24 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 266 | actual_type = _PAGE_CACHE_WB; | 328 | actual_type = _PAGE_CACHE_WB; |
| 267 | else | 329 | else |
| 268 | actual_type = _PAGE_CACHE_UC_MINUS; | 330 | actual_type = _PAGE_CACHE_UC_MINUS; |
| 269 | } else | 331 | } else { |
| 270 | actual_type = pat_x_mtrr_type(start, end, | 332 | actual_type = pat_x_mtrr_type(start, end, |
| 271 | req_type & _PAGE_CACHE_MASK); | 333 | req_type & _PAGE_CACHE_MASK); |
| 334 | } | ||
| 335 | |||
| 336 | is_range_ram = pagerange_is_ram(start, end); | ||
| 337 | if (is_range_ram == 1) | ||
| 338 | return reserve_ram_pages_type(start, end, req_type, new_type); | ||
| 339 | else if (is_range_ram < 0) | ||
| 340 | return -EINVAL; | ||
| 272 | 341 | ||
| 273 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 342 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
| 274 | if (!new) | 343 | if (!new) |
| 275 | return -ENOMEM; | 344 | return -ENOMEM; |
| 276 | 345 | ||
| 277 | new->start = start; | 346 | new->start = start; |
| 278 | new->end = end; | 347 | new->end = end; |
| 279 | new->type = actual_type; | 348 | new->type = actual_type; |
| 280 | 349 | ||
| 281 | if (new_type) | 350 | if (new_type) |
| 282 | *new_type = actual_type; | 351 | *new_type = actual_type; |
| @@ -335,6 +404,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 335 | start, end, cattr_name(new->type), cattr_name(req_type)); | 404 | start, end, cattr_name(new->type), cattr_name(req_type)); |
| 336 | kfree(new); | 405 | kfree(new); |
| 337 | spin_unlock(&memtype_lock); | 406 | spin_unlock(&memtype_lock); |
| 407 | |||
| 338 | return err; | 408 | return err; |
| 339 | } | 409 | } |
| 340 | 410 | ||
| @@ -358,6 +428,7 @@ int free_memtype(u64 start, u64 end) | |||
| 358 | { | 428 | { |
| 359 | struct memtype *entry; | 429 | struct memtype *entry; |
| 360 | int err = -EINVAL; | 430 | int err = -EINVAL; |
| 431 | int is_range_ram; | ||
| 361 | 432 | ||
| 362 | if (!pat_enabled) | 433 | if (!pat_enabled) |
| 363 | return 0; | 434 | return 0; |
| @@ -366,6 +437,12 @@ int free_memtype(u64 start, u64 end) | |||
| 366 | if (is_ISA_range(start, end - 1)) | 437 | if (is_ISA_range(start, end - 1)) |
| 367 | return 0; | 438 | return 0; |
| 368 | 439 | ||
| 440 | is_range_ram = pagerange_is_ram(start, end); | ||
| 441 | if (is_range_ram == 1) | ||
| 442 | return free_ram_pages_type(start, end); | ||
| 443 | else if (is_range_ram < 0) | ||
| 444 | return -EINVAL; | ||
| 445 | |||
| 369 | spin_lock(&memtype_lock); | 446 | spin_lock(&memtype_lock); |
| 370 | list_for_each_entry(entry, &memtype_list, nd) { | 447 | list_for_each_entry(entry, &memtype_list, nd) { |
| 371 | if (entry->start == start && entry->end == end) { | 448 | if (entry->start == start && entry->end == end) { |
| @@ -386,6 +463,7 @@ int free_memtype(u64 start, u64 end) | |||
| 386 | } | 463 | } |
| 387 | 464 | ||
| 388 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); | 465 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); |
| 466 | |||
| 389 | return err; | 467 | return err; |
| 390 | } | 468 | } |
| 391 | 469 | ||
| @@ -492,9 +570,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
| 492 | 570 | ||
| 493 | void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | 571 | void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) |
| 494 | { | 572 | { |
| 573 | unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); | ||
| 495 | u64 addr = (u64)pfn << PAGE_SHIFT; | 574 | u64 addr = (u64)pfn << PAGE_SHIFT; |
| 496 | unsigned long flags; | 575 | unsigned long flags; |
| 497 | unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); | ||
| 498 | 576 | ||
| 499 | reserve_memtype(addr, addr + size, want_flags, &flags); | 577 | reserve_memtype(addr, addr + size, want_flags, &flags); |
| 500 | if (flags != want_flags) { | 578 | if (flags != want_flags) { |
| @@ -514,7 +592,7 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
| 514 | free_memtype(addr, addr + size); | 592 | free_memtype(addr, addr + size); |
| 515 | } | 593 | } |
| 516 | 594 | ||
| 517 | #if defined(CONFIG_DEBUG_FS) | 595 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
| 518 | 596 | ||
| 519 | /* get Nth element of the linked list */ | 597 | /* get Nth element of the linked list */ |
| 520 | static struct memtype *memtype_get_idx(loff_t pos) | 598 | static struct memtype *memtype_get_idx(loff_t pos) |
| @@ -537,6 +615,7 @@ static struct memtype *memtype_get_idx(loff_t pos) | |||
| 537 | } | 615 | } |
| 538 | spin_unlock(&memtype_lock); | 616 | spin_unlock(&memtype_lock); |
| 539 | kfree(print_entry); | 617 | kfree(print_entry); |
| 618 | |||
| 540 | return NULL; | 619 | return NULL; |
| 541 | } | 620 | } |
| 542 | 621 | ||
| @@ -567,6 +646,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v) | |||
| 567 | seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), | 646 | seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), |
| 568 | print_entry->start, print_entry->end); | 647 | print_entry->start, print_entry->end); |
| 569 | kfree(print_entry); | 648 | kfree(print_entry); |
| 649 | |||
| 570 | return 0; | 650 | return 0; |
| 571 | } | 651 | } |
| 572 | 652 | ||
| @@ -598,4 +678,4 @@ static int __init pat_memtype_list_init(void) | |||
| 598 | 678 | ||
| 599 | late_initcall(pat_memtype_list_init); | 679 | late_initcall(pat_memtype_list_init); |
| 600 | 680 | ||
| 601 | #endif /* CONFIG_DEBUG_FS */ | 681 | #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ |
