aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-05-08 04:50:00 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-08 04:50:00 -0400
commitf066a155334642b8a206eec625b1925d88c48aeb (patch)
treecb12975e60b70d1dae3b7397bab955de78a4d01e /arch/x86/mm
parente7c064889606aab3569669078c69b87b2c527e72 (diff)
parent33df4db04a79660150e1948e3296eeb451ac121b (diff)
Merge branch 'x86/urgent' into x86/xen
Conflicts: arch/frv/include/asm/pgtable.h arch/x86/include/asm/required-features.h arch/x86/xen/mmu.c Merge reason: x86/xen was on a .29 base still, move it to a fresher branch and pick up Xen fixes as well, plus resolve conflicts Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/gup.c16
-rw-r--r--arch/x86/mm/init.c18
-rw-r--r--arch/x86/mm/ioremap.c33
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/numa_64.c3
-rw-r--r--arch/x86/mm/pageattr.c127
-rw-r--r--arch/x86/mm/pat.c191
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c7
11 files changed, 176 insertions, 228 deletions
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index be54176e9eb2..6340cef6798a 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -219,6 +219,22 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219 return 1; 219 return 1;
220} 220}
221 221
222/**
223 * get_user_pages_fast() - pin user pages in memory
224 * @start: starting user address
225 * @nr_pages: number of pages from start to pin
226 * @write: whether pages will be written to
227 * @pages: array that receives pointers to the pages pinned.
228 * Should be at least nr_pages long.
229 *
230 * Attempt to pin user pages in memory without taking mm->mmap_sem.
231 * If not successful, it will fall back to taking the lock and
232 * calling get_user_pages().
233 *
234 * Returns number of pages pinned. This may be fewer than the number
235 * requested. If nr_pages is 0 or negative, returns 0. If no pages
236 * were pinned, returns -errno.
237 */
222int get_user_pages_fast(unsigned long start, int nr_pages, int write, 238int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages) 239 struct page **pages)
224{ 240{
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd3da1dda1c9..ae4f7b5d7104 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -7,6 +7,7 @@
7#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/page_types.h> 8#include <asm/page_types.h>
9#include <asm/sections.h> 9#include <asm/sections.h>
10#include <asm/setup.h>
10#include <asm/system.h> 11#include <asm/system.h>
11#include <asm/tlbflush.h> 12#include <asm/tlbflush.h>
12 13
@@ -304,8 +305,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
304#endif 305#endif
305 306
306#ifdef CONFIG_X86_64 307#ifdef CONFIG_X86_64
307 if (!after_bootmem) 308 if (!after_bootmem && !start) {
309 pud_t *pud;
310 pmd_t *pmd;
311
308 mmu_cr4_features = read_cr4(); 312 mmu_cr4_features = read_cr4();
313
314 /*
315 * _brk_end cannot change anymore, but it and _end may be
316 * located on different 2M pages. cleanup_highmap(), however,
317 * can only consider _end when it runs, so destroy any
318 * mappings beyond _brk_end here.
319 */
320 pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
321 pmd = pmd_offset(pud, _brk_end - 1);
322 while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
323 pmd_clear(pmd);
324 }
309#endif 325#endif
310 __flush_tlb_all(); 326 __flush_tlb_all();
311 327
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0dfa09d69e80..8a450930834f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -280,15 +280,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
280 return NULL; 280 return NULL;
281 area->phys_addr = phys_addr; 281 area->phys_addr = phys_addr;
282 vaddr = (unsigned long) area->addr; 282 vaddr = (unsigned long) area->addr;
283 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { 283
284 if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
284 free_memtype(phys_addr, phys_addr + size); 285 free_memtype(phys_addr, phys_addr + size);
285 free_vm_area(area); 286 free_vm_area(area);
286 return NULL; 287 return NULL;
287 } 288 }
288 289
289 if (ioremap_change_attr(vaddr, size, prot_val) < 0) { 290 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
290 free_memtype(phys_addr, phys_addr + size); 291 free_memtype(phys_addr, phys_addr + size);
291 vunmap(area->addr); 292 free_vm_area(area);
292 return NULL; 293 return NULL;
293 } 294 }
294 295
@@ -374,7 +375,8 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
374 * - UC_MINUS for non-WB-able memory with no other conflicting mappings 375 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
375 * - Inherit from confliting mappings otherwise 376 * - Inherit from confliting mappings otherwise
376 */ 377 */
377 err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); 378 err = reserve_memtype(phys_addr, phys_addr + size,
379 _PAGE_CACHE_WB, &flags);
378 if (err < 0) 380 if (err < 0)
379 return NULL; 381 return NULL;
380 382
@@ -547,7 +549,7 @@ void __init early_ioremap_reset(void)
547} 549}
548 550
549static void __init __early_set_fixmap(enum fixed_addresses idx, 551static void __init __early_set_fixmap(enum fixed_addresses idx,
550 unsigned long phys, pgprot_t flags) 552 phys_addr_t phys, pgprot_t flags)
551{ 553{
552 unsigned long addr = __fix_to_virt(idx); 554 unsigned long addr = __fix_to_virt(idx);
553 pte_t *pte; 555 pte_t *pte;
@@ -566,7 +568,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
566} 568}
567 569
568static inline void __init early_set_fixmap(enum fixed_addresses idx, 570static inline void __init early_set_fixmap(enum fixed_addresses idx,
569 unsigned long phys, pgprot_t prot) 571 phys_addr_t phys, pgprot_t prot)
570{ 572{
571 if (after_paging_init) 573 if (after_paging_init)
572 __set_fixmap(idx, phys, prot); 574 __set_fixmap(idx, phys, prot);
@@ -607,9 +609,10 @@ static int __init check_early_ioremap_leak(void)
607late_initcall(check_early_ioremap_leak); 609late_initcall(check_early_ioremap_leak);
608 610
609static void __init __iomem * 611static void __init __iomem *
610__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 612__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
611{ 613{
612 unsigned long offset, last_addr; 614 unsigned long offset;
615 resource_size_t last_addr;
613 unsigned int nrpages; 616 unsigned int nrpages;
614 enum fixed_addresses idx0, idx; 617 enum fixed_addresses idx0, idx;
615 int i, slot; 618 int i, slot;
@@ -625,15 +628,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
625 } 628 }
626 629
627 if (slot < 0) { 630 if (slot < 0) {
628 printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", 631 printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
629 phys_addr, size); 632 (u64)phys_addr, size);
630 WARN_ON(1); 633 WARN_ON(1);
631 return NULL; 634 return NULL;
632 } 635 }
633 636
634 if (early_ioremap_debug) { 637 if (early_ioremap_debug) {
635 printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", 638 printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
636 phys_addr, size, slot); 639 (u64)phys_addr, size, slot);
637 dump_stack(); 640 dump_stack();
638 } 641 }
639 642
@@ -680,13 +683,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
680} 683}
681 684
682/* Remap an IO device */ 685/* Remap an IO device */
683void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) 686void __init __iomem *
687early_ioremap(resource_size_t phys_addr, unsigned long size)
684{ 688{
685 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); 689 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
686} 690}
687 691
688/* Remap memory */ 692/* Remap memory */
689void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) 693void __init __iomem *
694early_memremap(resource_size_t phys_addr, unsigned long size)
690{ 695{
691 return __early_ioremap(phys_addr, size, PAGE_KERNEL); 696 return __early_ioremap(phys_addr, size, PAGE_KERNEL);
692} 697}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 4f115e00486b..50dc802a1c46 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -87,7 +87,7 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
87{ 87{
88 struct kmmio_probe *p; 88 struct kmmio_probe *p;
89 list_for_each_entry_rcu(p, &kmmio_probes, list) { 89 list_for_each_entry_rcu(p, &kmmio_probes, list) {
90 if (addr >= p->addr && addr <= (p->addr + p->len)) 90 if (addr >= p->addr && addr < (p->addr + p->len))
91 return p; 91 return p;
92 } 92 }
93 return NULL; 93 return NULL;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3daefa04ace5..d2530062fe00 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -257,7 +257,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
257} 257}
258#endif 258#endif
259 259
260static unsigned long calculate_numa_remap_pages(void) 260static __init unsigned long calculate_numa_remap_pages(void)
261{ 261{
262 int nid; 262 int nid;
263 unsigned long size, reserve_pages = 0; 263 unsigned long size, reserve_pages = 0;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index d73aaa892371..2d05a12029dc 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -188,6 +188,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
189 int nid; 189 int nid;
190 190
191 if (!end)
192 return;
193
191 start = roundup(start, ZONE_ALIGN); 194 start = roundup(start, ZONE_ALIGN);
192 195
193 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 196 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 660cac75ae11..b81b41a0481f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -931,71 +931,94 @@ int _set_memory_uc(unsigned long addr, int numpages)
931 931
932int set_memory_uc(unsigned long addr, int numpages) 932int set_memory_uc(unsigned long addr, int numpages)
933{ 933{
934 int ret;
935
934 /* 936 /*
935 * for now UC MINUS. see comments in ioremap_nocache() 937 * for now UC MINUS. see comments in ioremap_nocache()
936 */ 938 */
937 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 939 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
938 _PAGE_CACHE_UC_MINUS, NULL)) 940 _PAGE_CACHE_UC_MINUS, NULL);
939 return -EINVAL; 941 if (ret)
942 goto out_err;
943
944 ret = _set_memory_uc(addr, numpages);
945 if (ret)
946 goto out_free;
940 947
941 return _set_memory_uc(addr, numpages); 948 return 0;
949
950out_free:
951 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
952out_err:
953 return ret;
942} 954}
943EXPORT_SYMBOL(set_memory_uc); 955EXPORT_SYMBOL(set_memory_uc);
944 956
945int set_memory_array_uc(unsigned long *addr, int addrinarray) 957int set_memory_array_uc(unsigned long *addr, int addrinarray)
946{ 958{
947 unsigned long start; 959 int i, j;
948 unsigned long end; 960 int ret;
949 int i; 961
950 /* 962 /*
951 * for now UC MINUS. see comments in ioremap_nocache() 963 * for now UC MINUS. see comments in ioremap_nocache()
952 */ 964 */
953 for (i = 0; i < addrinarray; i++) { 965 for (i = 0; i < addrinarray; i++) {
954 start = __pa(addr[i]); 966 ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
955 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { 967 _PAGE_CACHE_UC_MINUS, NULL);
956 if (end != __pa(addr[i + 1])) 968 if (ret)
957 break; 969 goto out_free;
958 i++;
959 }
960 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
961 goto out;
962 } 970 }
963 971
964 return change_page_attr_set(addr, addrinarray, 972 ret = change_page_attr_set(addr, addrinarray,
965 __pgprot(_PAGE_CACHE_UC_MINUS), 1); 973 __pgprot(_PAGE_CACHE_UC_MINUS), 1);
966out: 974 if (ret)
967 for (i = 0; i < addrinarray; i++) { 975 goto out_free;
968 unsigned long tmp = __pa(addr[i]); 976
969 977 return 0;
970 if (tmp == start) 978
971 break; 979out_free:
972 for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { 980 for (j = 0; j < i; j++)
973 if (end != __pa(addr[i + 1])) 981 free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
974 break; 982
975 i++; 983 return ret;
976 }
977 free_memtype(tmp, end);
978 }
979 return -EINVAL;
980} 984}
981EXPORT_SYMBOL(set_memory_array_uc); 985EXPORT_SYMBOL(set_memory_array_uc);
982 986
983int _set_memory_wc(unsigned long addr, int numpages) 987int _set_memory_wc(unsigned long addr, int numpages)
984{ 988{
985 return change_page_attr_set(&addr, numpages, 989 int ret;
990 ret = change_page_attr_set(&addr, numpages,
991 __pgprot(_PAGE_CACHE_UC_MINUS), 0);
992
993 if (!ret) {
994 ret = change_page_attr_set(&addr, numpages,
986 __pgprot(_PAGE_CACHE_WC), 0); 995 __pgprot(_PAGE_CACHE_WC), 0);
996 }
997 return ret;
987} 998}
988 999
989int set_memory_wc(unsigned long addr, int numpages) 1000int set_memory_wc(unsigned long addr, int numpages)
990{ 1001{
1002 int ret;
1003
991 if (!pat_enabled) 1004 if (!pat_enabled)
992 return set_memory_uc(addr, numpages); 1005 return set_memory_uc(addr, numpages);
993 1006
994 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 1007 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
995 _PAGE_CACHE_WC, NULL)) 1008 _PAGE_CACHE_WC, NULL);
996 return -EINVAL; 1009 if (ret)
1010 goto out_err;
1011
1012 ret = _set_memory_wc(addr, numpages);
1013 if (ret)
1014 goto out_free;
1015
1016 return 0;
997 1017
998 return _set_memory_wc(addr, numpages); 1018out_free:
1019 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1020out_err:
1021 return ret;
999} 1022}
1000EXPORT_SYMBOL(set_memory_wc); 1023EXPORT_SYMBOL(set_memory_wc);
1001 1024
@@ -1007,29 +1030,31 @@ int _set_memory_wb(unsigned long addr, int numpages)
1007 1030
1008int set_memory_wb(unsigned long addr, int numpages) 1031int set_memory_wb(unsigned long addr, int numpages)
1009{ 1032{
1010 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); 1033 int ret;
1034
1035 ret = _set_memory_wb(addr, numpages);
1036 if (ret)
1037 return ret;
1011 1038
1012 return _set_memory_wb(addr, numpages); 1039 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1040 return 0;
1013} 1041}
1014EXPORT_SYMBOL(set_memory_wb); 1042EXPORT_SYMBOL(set_memory_wb);
1015 1043
1016int set_memory_array_wb(unsigned long *addr, int addrinarray) 1044int set_memory_array_wb(unsigned long *addr, int addrinarray)
1017{ 1045{
1018 int i; 1046 int i;
1047 int ret;
1019 1048
1020 for (i = 0; i < addrinarray; i++) { 1049 ret = change_page_attr_clear(addr, addrinarray,
1021 unsigned long start = __pa(addr[i]);
1022 unsigned long end;
1023
1024 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
1025 if (end != __pa(addr[i + 1]))
1026 break;
1027 i++;
1028 }
1029 free_memtype(start, end);
1030 }
1031 return change_page_attr_clear(addr, addrinarray,
1032 __pgprot(_PAGE_CACHE_MASK), 1); 1050 __pgprot(_PAGE_CACHE_MASK), 1);
1051 if (ret)
1052 return ret;
1053
1054 for (i = 0; i < addrinarray; i++)
1055 free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
1056
1057 return 0;
1033} 1058}
1034EXPORT_SYMBOL(set_memory_array_wb); 1059EXPORT_SYMBOL(set_memory_array_wb);
1035 1060
@@ -1122,6 +1147,8 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
1122 1147
1123 retval = cpa_clear_pages_array(pages, addrinarray, 1148 retval = cpa_clear_pages_array(pages, addrinarray,
1124 __pgprot(_PAGE_CACHE_MASK)); 1149 __pgprot(_PAGE_CACHE_MASK));
1150 if (retval)
1151 return retval;
1125 1152
1126 for (i = 0; i < addrinarray; i++) { 1153 for (i = 0; i < addrinarray; i++) {
1127 start = (unsigned long)page_address(pages[i]); 1154 start = (unsigned long)page_address(pages[i]);
@@ -1129,7 +1156,7 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
1129 free_memtype(start, end); 1156 free_memtype(start, end);
1130 } 1157 }
1131 1158
1132 return retval; 1159 return 0;
1133} 1160}
1134EXPORT_SYMBOL(set_pages_array_wb); 1161EXPORT_SYMBOL(set_pages_array_wb);
1135 1162
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 640339ee4fb2..e6718bb28065 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -31,7 +31,7 @@
31#ifdef CONFIG_X86_PAT 31#ifdef CONFIG_X86_PAT
32int __read_mostly pat_enabled = 1; 32int __read_mostly pat_enabled = 1;
33 33
34void __cpuinit pat_disable(const char *reason) 34static inline void pat_disable(const char *reason)
35{ 35{
36 pat_enabled = 0; 36 pat_enabled = 0;
37 printk(KERN_INFO "%s\n", reason); 37 printk(KERN_INFO "%s\n", reason);
@@ -182,10 +182,10 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
182 u8 mtrr_type; 182 u8 mtrr_type;
183 183
184 mtrr_type = mtrr_type_lookup(start, end); 184 mtrr_type = mtrr_type_lookup(start, end);
185 if (mtrr_type == MTRR_TYPE_UNCACHABLE) 185 if (mtrr_type != MTRR_TYPE_WRBACK)
186 return _PAGE_CACHE_UC; 186 return _PAGE_CACHE_UC_MINUS;
187 if (mtrr_type == MTRR_TYPE_WRCOMB) 187
188 return _PAGE_CACHE_WC; 188 return _PAGE_CACHE_WB;
189 } 189 }
190 190
191 return req_type; 191 return req_type;
@@ -352,23 +352,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
352 return 0; 352 return 0;
353 } 353 }
354 354
355 if (req_type == -1) { 355 /*
356 /* 356 * Call mtrr_lookup to get the type hint. This is an
357 * Call mtrr_lookup to get the type hint. This is an 357 * optimization for /dev/mem mmap'ers into WB memory (BIOS
358 * optimization for /dev/mem mmap'ers into WB memory (BIOS 358 * tools and ACPI tools). Use WB request for WB memory and use
359 * tools and ACPI tools). Use WB request for WB memory and use 359 * UC_MINUS otherwise.
360 * UC_MINUS otherwise. 360 */
361 */ 361 actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK);
362 u8 mtrr_type = mtrr_type_lookup(start, end);
363
364 if (mtrr_type == MTRR_TYPE_WRBACK)
365 actual_type = _PAGE_CACHE_WB;
366 else
367 actual_type = _PAGE_CACHE_UC_MINUS;
368 } else {
369 actual_type = pat_x_mtrr_type(start, end,
370 req_type & _PAGE_CACHE_MASK);
371 }
372 362
373 if (new_type) 363 if (new_type)
374 *new_type = actual_type; 364 *new_type = actual_type;
@@ -546,9 +536,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
546int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 536int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
547 unsigned long size, pgprot_t *vma_prot) 537 unsigned long size, pgprot_t *vma_prot)
548{ 538{
549 u64 offset = ((u64) pfn) << PAGE_SHIFT; 539 unsigned long flags = _PAGE_CACHE_WB;
550 unsigned long flags = -1;
551 int retval;
552 540
553 if (!range_is_allowed(pfn, size)) 541 if (!range_is_allowed(pfn, size))
554 return 0; 542 return 0;
@@ -576,64 +564,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
576 } 564 }
577#endif 565#endif
578 566
579 /*
580 * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
581 *
582 * Without O_SYNC, we want to get
583 * - WB for WB-able memory and no other conflicting mappings
584 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
585 * - Inherit from confliting mappings otherwise
586 */
587 if (flags != -1) {
588 retval = reserve_memtype(offset, offset + size, flags, NULL);
589 } else {
590 retval = reserve_memtype(offset, offset + size, -1, &flags);
591 }
592
593 if (retval < 0)
594 return 0;
595
596 if (((pfn < max_low_pfn_mapped) ||
597 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
598 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
599 free_memtype(offset, offset + size);
600 printk(KERN_INFO
601 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
602 current->comm, current->pid,
603 cattr_name(flags),
604 offset, (unsigned long long)(offset + size));
605 return 0;
606 }
607
608 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | 567 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
609 flags); 568 flags);
610 return 1; 569 return 1;
611} 570}
612 571
613void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
614{
615 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
616 u64 addr = (u64)pfn << PAGE_SHIFT;
617 unsigned long flags;
618
619 reserve_memtype(addr, addr + size, want_flags, &flags);
620 if (flags != want_flags) {
621 printk(KERN_INFO
622 "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
623 current->comm, current->pid,
624 cattr_name(want_flags),
625 addr, (unsigned long long)(addr + size),
626 cattr_name(flags));
627 }
628}
629
630void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
631{
632 u64 addr = (u64)pfn << PAGE_SHIFT;
633
634 free_memtype(addr, addr + size);
635}
636
637/* 572/*
638 * Change the memory type for the physial address range in kernel identity 573 * Change the memory type for the physial address range in kernel identity
639 * mapping space if that range is a part of identity map. 574 * mapping space if that range is a part of identity map.
@@ -671,8 +606,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
671{ 606{
672 int is_ram = 0; 607 int is_ram = 0;
673 int ret; 608 int ret;
674 unsigned long flags;
675 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 609 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
610 unsigned long flags = want_flags;
676 611
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 612 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 613
@@ -734,29 +669,28 @@ static void free_pfn_range(u64 paddr, unsigned long size)
734 * 669 *
735 * If the vma has a linear pfn mapping for the entire range, we get the prot 670 * If the vma has a linear pfn mapping for the entire range, we get the prot
736 * from pte and reserve the entire vma range with single reserve_pfn_range call. 671 * from pte and reserve the entire vma range with single reserve_pfn_range call.
737 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
738 * by page to get physical address and protection.
739 */ 672 */
740int track_pfn_vma_copy(struct vm_area_struct *vma) 673int track_pfn_vma_copy(struct vm_area_struct *vma)
741{ 674{
742 int retval = 0;
743 unsigned long i, j;
744 resource_size_t paddr; 675 resource_size_t paddr;
745 unsigned long prot; 676 unsigned long prot;
746 unsigned long vma_start = vma->vm_start; 677 unsigned long vma_size = vma->vm_end - vma->vm_start;
747 unsigned long vma_end = vma->vm_end;
748 unsigned long vma_size = vma_end - vma_start;
749 pgprot_t pgprot; 678 pgprot_t pgprot;
750 679
751 if (!pat_enabled) 680 if (!pat_enabled)
752 return 0; 681 return 0;
753 682
683 /*
684 * For now, only handle remap_pfn_range() vmas where
685 * is_linear_pfn_mapping() == TRUE. Handling of
686 * vm_insert_pfn() is TBD.
687 */
754 if (is_linear_pfn_mapping(vma)) { 688 if (is_linear_pfn_mapping(vma)) {
755 /* 689 /*
756 * reserve the whole chunk covered by vma. We need the 690 * reserve the whole chunk covered by vma. We need the
757 * starting address and protection from pte. 691 * starting address and protection from pte.
758 */ 692 */
759 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { 693 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
760 WARN_ON_ONCE(1); 694 WARN_ON_ONCE(1);
761 return -EINVAL; 695 return -EINVAL;
762 } 696 }
@@ -764,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
764 return reserve_pfn_range(paddr, vma_size, &pgprot, 1); 698 return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
765 } 699 }
766 700
767 /* reserve entire vma page by page, using pfn and prot from pte */
768 for (i = 0; i < vma_size; i += PAGE_SIZE) {
769 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
770 continue;
771
772 pgprot = __pgprot(prot);
773 retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
774 if (retval)
775 goto cleanup_ret;
776 }
777 return 0; 701 return 0;
778
779cleanup_ret:
780 /* Reserve error: Cleanup partial reservation and return error */
781 for (j = 0; j < i; j += PAGE_SIZE) {
782 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
783 continue;
784
785 free_pfn_range(paddr, PAGE_SIZE);
786 }
787
788 return retval;
789} 702}
790 703
791/* 704/*
@@ -795,50 +708,28 @@ cleanup_ret:
795 * prot is passed in as a parameter for the new mapping. If the vma has a 708 * prot is passed in as a parameter for the new mapping. If the vma has a
796 * linear pfn mapping for the entire range reserve the entire vma range with 709 * linear pfn mapping for the entire range reserve the entire vma range with
797 * single reserve_pfn_range call. 710 * single reserve_pfn_range call.
798 * Otherwise, we look t the pfn and size and reserve only the specified range
799 * page by page.
800 *
801 * Note that this function can be called with caller trying to map only a
802 * subrange/page inside the vma.
803 */ 711 */
804int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 712int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
805 unsigned long pfn, unsigned long size) 713 unsigned long pfn, unsigned long size)
806{ 714{
807 int retval = 0;
808 unsigned long i, j;
809 resource_size_t base_paddr;
810 resource_size_t paddr; 715 resource_size_t paddr;
811 unsigned long vma_start = vma->vm_start; 716 unsigned long vma_size = vma->vm_end - vma->vm_start;
812 unsigned long vma_end = vma->vm_end;
813 unsigned long vma_size = vma_end - vma_start;
814 717
815 if (!pat_enabled) 718 if (!pat_enabled)
816 return 0; 719 return 0;
817 720
721 /*
722 * For now, only handle remap_pfn_range() vmas where
723 * is_linear_pfn_mapping() == TRUE. Handling of
724 * vm_insert_pfn() is TBD.
725 */
818 if (is_linear_pfn_mapping(vma)) { 726 if (is_linear_pfn_mapping(vma)) {
819 /* reserve the whole chunk starting from vm_pgoff */ 727 /* reserve the whole chunk starting from vm_pgoff */
820 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 728 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
821 return reserve_pfn_range(paddr, vma_size, prot, 0); 729 return reserve_pfn_range(paddr, vma_size, prot, 0);
822 } 730 }
823 731
824 /* reserve page by page using pfn and size */
825 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
826 for (i = 0; i < size; i += PAGE_SIZE) {
827 paddr = base_paddr + i;
828 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
829 if (retval)
830 goto cleanup_ret;
831 }
832 return 0; 732 return 0;
833
834cleanup_ret:
835 /* Reserve error: Cleanup partial reservation and return error */
836 for (j = 0; j < i; j += PAGE_SIZE) {
837 paddr = base_paddr + j;
838 free_pfn_range(paddr, PAGE_SIZE);
839 }
840
841 return retval;
842} 733}
843 734
844/* 735/*
@@ -849,39 +740,23 @@ cleanup_ret:
849void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, 740void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
850 unsigned long size) 741 unsigned long size)
851{ 742{
852 unsigned long i;
853 resource_size_t paddr; 743 resource_size_t paddr;
854 unsigned long prot; 744 unsigned long vma_size = vma->vm_end - vma->vm_start;
855 unsigned long vma_start = vma->vm_start;
856 unsigned long vma_end = vma->vm_end;
857 unsigned long vma_size = vma_end - vma_start;
858 745
859 if (!pat_enabled) 746 if (!pat_enabled)
860 return; 747 return;
861 748
749 /*
750 * For now, only handle remap_pfn_range() vmas where
751 * is_linear_pfn_mapping() == TRUE. Handling of
752 * vm_insert_pfn() is TBD.
753 */
862 if (is_linear_pfn_mapping(vma)) { 754 if (is_linear_pfn_mapping(vma)) {
863 /* free the whole chunk starting from vm_pgoff */ 755 /* free the whole chunk starting from vm_pgoff */
864 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 756 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
865 free_pfn_range(paddr, vma_size); 757 free_pfn_range(paddr, vma_size);
866 return; 758 return;
867 } 759 }
868
869 if (size != 0 && size != vma_size) {
870 /* free page by page, using pfn and size */
871 paddr = (resource_size_t)pfn << PAGE_SHIFT;
872 for (i = 0; i < size; i += PAGE_SIZE) {
873 paddr = paddr + i;
874 free_pfn_range(paddr, PAGE_SIZE);
875 }
876 } else {
877 /* free entire vma, page by page, using the pfn from pte */
878 for (i = 0; i < vma_size; i += PAGE_SIZE) {
879 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
880 continue;
881
882 free_pfn_range(paddr, PAGE_SIZE);
883 }
884 }
885} 760}
886 761
887pgprot_t pgprot_writecombine(pgprot_t prot) 762pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5b7c7c8464fe..7aa03a5389f5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -345,7 +345,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
345 fixmaps_set++; 345 fixmaps_set++;
346} 346}
347 347
348void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 348void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
349 pgprot_t flags)
349{ 350{
350 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); 351 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
351} 352}
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 16ae70fc57e7..29a0e37114f8 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -216,7 +216,7 @@ int __init get_memcfg_from_srat(void)
216 216
217 if (num_memory_chunks == 0) { 217 if (num_memory_chunks == 0) {
218 printk(KERN_WARNING 218 printk(KERN_WARNING
219 "could not finy any ACPI SRAT memory areas.\n"); 219 "could not find any ACPI SRAT memory areas.\n");
220 goto out_fail; 220 goto out_fail;
221 } 221 }
222 222
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index c7d272b8574c..01765955baaf 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -28,6 +28,7 @@ int acpi_numa __initdata;
28static struct acpi_table_slit *acpi_slit; 28static struct acpi_table_slit *acpi_slit;
29 29
30static nodemask_t nodes_parsed __initdata; 30static nodemask_t nodes_parsed __initdata;
31static nodemask_t cpu_nodes_parsed __initdata;
31static struct bootnode nodes[MAX_NUMNODES] __initdata; 32static struct bootnode nodes[MAX_NUMNODES] __initdata;
32static struct bootnode nodes_add[MAX_NUMNODES]; 33static struct bootnode nodes_add[MAX_NUMNODES];
33static int found_add_area __initdata; 34static int found_add_area __initdata;
@@ -141,6 +142,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
141 142
142 apic_id = pa->apic_id; 143 apic_id = pa->apic_id;
143 apicid_to_node[apic_id] = node; 144 apicid_to_node[apic_id] = node;
145 node_set(node, cpu_nodes_parsed);
144 acpi_numa = 1; 146 acpi_numa = 1;
145 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", 147 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
146 pxm, apic_id, node); 148 pxm, apic_id, node);
@@ -174,6 +176,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
174 else 176 else
175 apic_id = pa->apic_id; 177 apic_id = pa->apic_id;
176 apicid_to_node[apic_id] = node; 178 apicid_to_node[apic_id] = node;
179 node_set(node, cpu_nodes_parsed);
177 acpi_numa = 1; 180 acpi_numa = 1;
178 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", 181 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
179 pxm, apic_id, node); 182 pxm, apic_id, node);
@@ -358,6 +361,7 @@ static void __init unparse_node(int node)
358{ 361{
359 int i; 362 int i;
360 node_clear(node, nodes_parsed); 363 node_clear(node, nodes_parsed);
364 node_clear(node, cpu_nodes_parsed);
361 for (i = 0; i < MAX_LOCAL_APIC; i++) { 365 for (i = 0; i < MAX_LOCAL_APIC; i++) {
362 if (apicid_to_node[i] == node) 366 if (apicid_to_node[i] == node)
363 apicid_to_node[i] = NUMA_NO_NODE; 367 apicid_to_node[i] = NUMA_NO_NODE;
@@ -402,7 +406,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
402 return -1; 406 return -1;
403 } 407 }
404 408
405 node_possible_map = nodes_parsed; 409 /* Account for nodes with cpus and no memory */
410 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
406 411
407 /* Finally register nodes */ 412 /* Finally register nodes */
408 for_each_node_mask(i, node_possible_map) 413 for_each_node_mask(i, node_possible_map)