aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-26 13:29:40 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-26 13:29:40 -0400
commit5a54bd1307471c1cd0521402fe65e2057edcab2f (patch)
tree25fb6a543db4ccc11b6d5662ed2e7facfce39ae7 /arch/x86/mm
parentf9f35677d81adb0feedcd6e0e661784805c8facd (diff)
parent8e0ee43bc2c3e19db56a4adaa9a9b04ce885cd84 (diff)
Merge commit 'v2.6.29' into core/header-fixes
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c11
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/kmmio.c164
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c39
-rw-r--r--arch/x86/mm/pat.c85
-rw-r--r--arch/x86/mm/testmmiotrace.c70
9 files changed, 257 insertions, 143 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..c76ef1d701c9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
603 603
604 si_code = SEGV_MAPERR; 604 si_code = SEGV_MAPERR;
605 605
606 if (notify_page_fault(regs))
607 return;
608 if (unlikely(kmmio_fault(regs, address))) 606 if (unlikely(kmmio_fault(regs, address)))
609 return; 607 return;
610 608
@@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
634 if (spurious_fault(address, error_code)) 632 if (spurious_fault(address, error_code))
635 return; 633 return;
636 634
635 /* kprobes don't want to hook the spurious faults. */
636 if (notify_page_fault(regs))
637 return;
637 /* 638 /*
638 * Don't take the mm semaphore here. If we fixup a prefetch 639 * Don't take the mm semaphore here. If we fixup a prefetch
639 * fault we could otherwise deadlock. 640 * fault we could otherwise deadlock.
@@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
641 goto bad_area_nosemaphore; 642 goto bad_area_nosemaphore;
642 } 643 }
643 644
645 /* kprobes don't want to hook the spurious faults. */
646 if (notify_page_fault(regs))
647 return;
644 648
645 /* 649 /*
646 * It's safe to allow irq's after cr2 has been saved and the 650 * It's safe to allow irq's after cr2 has been saved and the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e6d36b490250..b1352250096e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
714 pos = start_pfn << PAGE_SHIFT; 714 pos = start_pfn << PAGE_SHIFT;
715 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) 715 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
716 << (PMD_SHIFT - PAGE_SHIFT); 716 << (PMD_SHIFT - PAGE_SHIFT);
717 if (end_pfn > (end >> PAGE_SHIFT))
718 end_pfn = end >> PAGE_SHIFT;
717 if (start_pfn < end_pfn) { 719 if (start_pfn < end_pfn) {
718 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 720 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
719 pos = end_pfn << PAGE_SHIFT; 721 pos = end_pfn << PAGE_SHIFT;
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ca53224fc56c..04102d42ff42 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -20,6 +20,17 @@
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22 22
23int is_io_mapping_possible(resource_size_t base, unsigned long size)
24{
25#ifndef CONFIG_X86_PAE
26 /* There is no way to map greater than 1 << 32 address without PAE */
27 if (base + size > 0x100000000ULL)
28 return 0;
29#endif
30 return 1;
31}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33
23/* Map 'pfn' using fixed map 'type' and protections 'prot' 34/* Map 'pfn' using fixed map 'type' and protections 'prot'
24 */ 35 */
25void * 36void *
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..f45d5e29a72e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
134 return 0; 134 return 0;
135} 135}
136 136
137int pagerange_is_ram(unsigned long start, unsigned long end)
138{
139 int ram_page = 0, not_rampage = 0;
140 unsigned long page_nr;
141
142 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
143 ++page_nr) {
144 if (page_is_ram(page_nr))
145 ram_page = 1;
146 else
147 not_rampage = 1;
148
149 if (ram_page == not_rampage)
150 return -1;
151 }
152
153 return ram_page;
154}
155
156/* 137/*
157 * Fix up the linear direct mapping of the kernel to avoid cache attribute 138 * Fix up the linear direct mapping of the kernel to avoid cache attribute
158 * conflicts. 139 * conflicts.
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 93d82038af4b..6a518dd08a36 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
32 struct list_head list; 32 struct list_head list;
33 struct kmmio_fault_page *release_next; 33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */ 34 unsigned long page; /* location of the fault page */
35 bool old_presence; /* page presence prior to arming */
36 bool armed;
35 37
36 /* 38 /*
37 * Number of times this page has been registered as a part 39 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed. 40 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU). 41 * Used only by writers (RCU) and post_kmmio_handler().
42 * Protected by kmmio_lock, when linked into kmmio_page_table.
40 */ 43 */
41 int count; 44 int count;
42}; 45};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105 return NULL; 108 return NULL;
106} 109}
107 110
108static void set_page_present(unsigned long addr, bool present, 111static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
109 unsigned int *pglevel) 112{
113 pmdval_t v = pmd_val(*pmd);
114 *old = !!(v & _PAGE_PRESENT);
115 v &= ~_PAGE_PRESENT;
116 if (present)
117 v |= _PAGE_PRESENT;
118 set_pmd(pmd, __pmd(v));
119}
120
121static void set_pte_presence(pte_t *pte, bool present, bool *old)
122{
123 pteval_t v = pte_val(*pte);
124 *old = !!(v & _PAGE_PRESENT);
125 v &= ~_PAGE_PRESENT;
126 if (present)
127 v |= _PAGE_PRESENT;
128 set_pte_atomic(pte, __pte(v));
129}
130
131static int set_page_presence(unsigned long addr, bool present, bool *old)
110{ 132{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level; 133 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level); 134 pte_t *pte = lookup_address(addr, &level);
116 135
117 if (!pte) { 136 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr); 137 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return; 138 return -1;
120 } 139 }
121 140
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) { 141 switch (level) {
126 case PG_LEVEL_2M: 142 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte; 143 set_pmd_presence((pmd_t *)pte, present, old);
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break; 144 break;
133
134 case PG_LEVEL_4K: 145 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT; 146 set_pte_presence(pte, present, old);
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break; 147 break;
140
141 default: 148 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level); 149 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return; 150 return -1;
144 } 151 }
145 152
146 __flush_tlb_one(addr); 153 __flush_tlb_one(addr);
154 return 0;
147} 155}
148 156
149/** Mark the given page as not present. Access to it will trigger a fault. */ 157/*
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 158 * Mark the given page as not present. Access to it will trigger a fault.
159 *
160 * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
161 * protection is ignored here. RCU read lock is assumed held, so the struct
162 * will not disappear unexpectedly. Furthermore, the caller must guarantee,
163 * that double arming the same virtual address (page) cannot occur.
164 *
165 * Double disarming on the other hand is allowed, and may occur when a fault
166 * and mmiotrace shutdown happen simultaneously.
167 */
168static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
151{ 169{
152 set_page_present(page & PAGE_MASK, false, pglevel); 170 int ret;
171 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172 if (f->armed) {
173 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174 f->page, f->count, f->old_presence);
175 }
176 ret = set_page_presence(f->page, false, &f->old_presence);
177 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178 f->armed = true;
179 return ret;
153} 180}
154 181
155/** Mark the given page as present. */ 182/** Restore the given page to saved presence state. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) 183static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
157{ 184{
158 set_page_present(page & PAGE_MASK, true, pglevel); 185 bool tmp;
186 int ret = set_page_presence(f->page, f->old_presence, &tmp);
187 WARN_ONCE(ret < 0,
188 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189 f->armed = false;
159} 190}
160 191
161/* 192/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
202 233
203 ctx = &get_cpu_var(kmmio_ctx); 234 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) { 235 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) { 236 if (addr == ctx->addr) {
207 /* 237 /*
208 * On SMP we sometimes get recursive probe hits on the 238 * A second fault on the same page means some other
209 * same address. Context is already saved, fall out. 239 * condition needs handling by do_page_fault(), the
240 * page really not being present is the most common.
210 */ 241 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for " 242 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
212 "address 0x%08lx.\n", 243 addr, smp_processor_id());
213 smp_processor_id(), addr); 244
214 ret = 1; 245 if (!faultpage->old_presence)
215 goto no_kmmio_ctx; 246 pr_info("kmmio: unexpected secondary hit for "
216 } 247 "address 0x%08lx on CPU %d.\n", addr,
217 /* 248 smp_processor_id());
218 * Prevent overwriting already in-flight context. 249 } else {
219 * This should not happen, let's hope disarming at least 250 /*
220 * prevents a panic. 251 * Prevent overwriting already in-flight context.
221 */ 252 * This should not happen, let's hope disarming at
222 pr_emerg("kmmio: recursive probe hit on CPU %d, " 253 * least prevents a panic.
254 */
255 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n", 256 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr); 257 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n", 258 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr); 259 ctx->addr);
260 disarm_kmmio_fault_page(faultpage);
261 }
227 goto no_kmmio_ctx; 262 goto no_kmmio_ctx;
228 } 263 }
229 ctx->active++; 264 ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
244 regs->flags &= ~X86_EFLAGS_IF; 279 regs->flags &= ~X86_EFLAGS_IF;
245 280
246 /* Now we set present bit in PTE and single step. */ 281 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL); 282 disarm_kmmio_fault_page(ctx->fpage);
248 283
249 /* 284 /*
250 * If another cpu accesses the same page while we are stepping, 285 * If another cpu accesses the same page while we are stepping,
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276 311
277 if (!ctx->active) { 312 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n", 313 pr_warning("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id()); 314 smp_processor_id());
280 goto out; 315 goto out;
281 } 316 }
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
283 if (ctx->probe && ctx->probe->post_handler) 318 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs); 319 ctx->probe->post_handler(ctx->probe, condition, regs);
285 320
286 arm_kmmio_fault_page(ctx->fpage->page, NULL); 321 /* Prevent racing against release_kmmio_fault_page(). */
322 spin_lock(&kmmio_lock);
323 if (ctx->fpage->count)
324 arm_kmmio_fault_page(ctx->fpage);
325 spin_unlock(&kmmio_lock);
287 326
288 regs->flags &= ~X86_EFLAGS_TF; 327 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags; 328 regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
315 f = get_kmmio_fault_page(page); 354 f = get_kmmio_fault_page(page);
316 if (f) { 355 if (f) {
317 if (!f->count) 356 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL); 357 arm_kmmio_fault_page(f);
319 f->count++; 358 f->count++;
320 return 0; 359 return 0;
321 } 360 }
322 361
323 f = kmalloc(sizeof(*f), GFP_ATOMIC); 362 f = kzalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f) 363 if (!f)
325 return -1; 364 return -1;
326 365
327 f->count = 1; 366 f->count = 1;
328 f->page = page; 367 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330 368
331 arm_kmmio_fault_page(f->page, NULL); 369 if (arm_kmmio_fault_page(f)) {
370 kfree(f);
371 return -1;
372 }
373
374 list_add_rcu(&f->list, kmmio_page_list(f->page));
332 375
333 return 0; 376 return 0;
334} 377}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
347 f->count--; 390 f->count--;
348 BUG_ON(f->count < 0); 391 BUG_ON(f->count < 0);
349 if (!f->count) { 392 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL); 393 disarm_kmmio_fault_page(f);
351 f->release_next = *release_list; 394 f->release_next = *release_list;
352 *release_list = f; 395 *release_list = f;
353 } 396 }
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
408 451
409static void remove_kmmio_fault_pages(struct rcu_head *head) 452static void remove_kmmio_fault_pages(struct rcu_head *head)
410{ 453{
411 struct kmmio_delayed_release *dr = container_of( 454 struct kmmio_delayed_release *dr =
412 head, 455 container_of(head, struct kmmio_delayed_release, rcu);
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list; 456 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list; 457 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags; 458 unsigned long flags;
459
418 spin_lock_irqsave(&kmmio_lock, flags); 460 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) { 461 while (p) {
420 if (!p->count) 462 if (!p->count) {
421 list_del_rcu(&p->list); 463 list_del_rcu(&p->list);
422 else 464 prevp = &p->release_next;
465 } else {
423 *prevp = p->release_next; 466 *prevp = p->release_next;
424 prevp = &p->release_next; 467 }
425 p = p->release_next; 468 p = p->release_next;
426 } 469 }
427 spin_unlock_irqrestore(&kmmio_lock, flags); 470 spin_unlock_irqrestore(&kmmio_lock, flags);
471
428 /* This is the real RCU destroy call. */ 472 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); 473 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430} 474}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..f3516da035d1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
145 return shift; 145 return shift;
146} 146}
147 147
148int early_pfn_to_nid(unsigned long pfn) 148int __meminit __early_pfn_to_nid(unsigned long pfn)
149{ 149{
150 return phys_to_nid(pfn << PAGE_SHIFT); 150 return phys_to_nid(pfn << PAGE_SHIFT);
151} 151}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..7233bd7e357b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -508,18 +508,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
508#endif 508#endif
509 509
510 /* 510 /*
511 * Install the new, split up pagetable. Important details here: 511 * Install the new, split up pagetable.
512 * 512 *
513 * On Intel the NX bit of all levels must be cleared to make a 513 * We use the standard kernel pagetable protections for the new
514 * page executable. See section 4.13.2 of Intel 64 and IA-32 514 * pagetable protections, the actual ptes set above control the
515 * Architectures Software Developer's Manual). 515 * primary protection behavior:
516 */
517 __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
518
519 /*
520 * Intel Atom errata AAH41 workaround.
516 * 521 *
517 * Mark the entry present. The current mapping might be 522 * The real fix should be in hw or in a microcode update, but
518 * set to not present, which we preserved above. 523 * we also probabilistically try to reduce the window of having
524 * a large TLB mixed with 4K TLBs while instruction fetches are
525 * going on.
519 */ 526 */
520 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); 527 __flush_tlb_all();
521 pgprot_val(ref_prot) |= _PAGE_PRESENT; 528
522 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
523 base = NULL; 529 base = NULL;
524 530
525out_unlock: 531out_unlock:
@@ -575,7 +581,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
575 address = cpa->vaddr[cpa->curpage]; 581 address = cpa->vaddr[cpa->curpage];
576 else 582 else
577 address = *cpa->vaddr; 583 address = *cpa->vaddr;
578
579repeat: 584repeat:
580 kpte = lookup_address(address, &level); 585 kpte = lookup_address(address, &level);
581 if (!kpte) 586 if (!kpte)
@@ -812,6 +817,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
812 817
813 vm_unmap_aliases(); 818 vm_unmap_aliases();
814 819
820 /*
821 * If we're called with lazy mmu updates enabled, the
822 * in-memory pte state may be stale. Flush pending updates to
823 * bring them up to date.
824 */
825 arch_flush_lazy_mmu_mode();
826
815 cpa.vaddr = addr; 827 cpa.vaddr = addr;
816 cpa.numpages = numpages; 828 cpa.numpages = numpages;
817 cpa.mask_set = mask_set; 829 cpa.mask_set = mask_set;
@@ -854,6 +866,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
854 } else 866 } else
855 cpa_flush_all(cache); 867 cpa_flush_all(cache);
856 868
869 /*
870 * If we've been called with lazy mmu updates enabled, then
871 * make sure that everything gets flushed out before we
872 * return.
873 */
874 arch_flush_lazy_mmu_mode();
875
857out: 876out:
858 return ret; 877 return ret;
859} 878}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..e0ab173b6974 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,6 +11,7 @@
11#include <linux/bootmem.h> 11#include <linux/bootmem.h>
12#include <linux/debugfs.h> 12#include <linux/debugfs.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/module.h>
14#include <linux/gfp.h> 15#include <linux/gfp.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
@@ -211,6 +212,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
211static struct memtype *cached_entry; 212static struct memtype *cached_entry;
212static u64 cached_start; 213static u64 cached_start;
213 214
215static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
216{
217 int ram_page = 0, not_rampage = 0;
218 unsigned long page_nr;
219
220 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
221 ++page_nr) {
222 /*
223 * For legacy reasons, physical address range in the legacy ISA
224 * region is tracked as non-RAM. This will allow users of
225 * /dev/mem to map portions of legacy ISA region, even when
226 * some of those portions are listed(or not even listed) with
227 * different e820 types(RAM/reserved/..)
228 */
229 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
230 page_is_ram(page_nr))
231 ram_page = 1;
232 else
233 not_rampage = 1;
234
235 if (ram_page == not_rampage)
236 return -1;
237 }
238
239 return ram_page;
240}
241
214/* 242/*
215 * For RAM pages, mark the pages as non WB memory type using 243 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 244 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +364,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
336 if (new_type) 364 if (new_type)
337 *new_type = actual_type; 365 *new_type = actual_type;
338 366
339 /* 367 is_range_ram = pat_pagerange_is_ram(start, end);
340 * For legacy reasons, some parts of the physical address range in the 368 if (is_range_ram == 1)
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 369 return reserve_ram_pages_type(start, end, req_type,
342 * the e820 tables). So we will track the memory attributes of this 370 new_type);
343 * legacy 1MB region using the linear memtype_list always. 371 else if (is_range_ram < 0)
344 */ 372 return -EINVAL;
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
353 373
354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 374 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
355 if (!new) 375 if (!new)
@@ -446,19 +466,11 @@ int free_memtype(u64 start, u64 end)
446 if (is_ISA_range(start, end - 1)) 466 if (is_ISA_range(start, end - 1))
447 return 0; 467 return 0;
448 468
449 /* 469 is_range_ram = pat_pagerange_is_ram(start, end);
450 * For legacy reasons, some parts of the physical address range in the 470 if (is_range_ram == 1)
451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 471 return free_ram_pages_type(start, end);
452 * the e820 tables). So we will track the memory attributes of this 472 else if (is_range_ram < 0)
453 * legacy 1MB region using the linear memtype_list always. 473 return -EINVAL;
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
462 474
463 spin_lock(&memtype_lock); 475 spin_lock(&memtype_lock);
464 list_for_each_entry(entry, &memtype_list, nd) { 476 list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +638,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
626 unsigned long flags; 638 unsigned long flags;
627 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 639 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
628 640
629 is_ram = pagerange_is_ram(paddr, paddr + size); 641 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
630 642
631 if (is_ram != 0) { 643 /*
632 /* 644 * reserve_pfn_range() doesn't support RAM pages.
633 * For mapping RAM pages, drivers need to call 645 */
634 * set_memory_[uc|wc|wb] directly, for reserve and free, before 646 if (is_ram != 0)
635 * setting up the PTE. 647 return -EINVAL;
636 */
637 WARN_ON_ONCE(1);
638 return 0;
639 }
640 648
641 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 649 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
642 if (ret) 650 if (ret)
@@ -693,7 +701,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
693{ 701{
694 int is_ram; 702 int is_ram;
695 703
696 is_ram = pagerange_is_ram(paddr, paddr + size); 704 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
697 if (is_ram == 0) 705 if (is_ram == 0)
698 free_memtype(paddr, paddr + size); 706 free_memtype(paddr, paddr + size);
699} 707}
@@ -861,6 +869,7 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
861 else 869 else
862 return pgprot_noncached(prot); 870 return pgprot_noncached(prot);
863} 871}
872EXPORT_SYMBOL_GPL(pgprot_writecombine);
864 873
865#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 874#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
866 875
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index ab50a8d7402c..427fd1b56df5 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi> 2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/io.h> 5#include <linux/io.h>
@@ -9,35 +9,74 @@
9 9
10static unsigned long mmio_address; 10static unsigned long mmio_address;
11module_param(mmio_address, ulong, 0); 11module_param(mmio_address, ulong, 0);
12MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); 12MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
13 "(or 8 MB if read_far is non-zero).");
14
15static unsigned long read_far = 0x400100;
16module_param(read_far, ulong, 0);
17MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
18 "(default: 0x400100).");
19
20static unsigned v16(unsigned i)
21{
22 return i * 12 + 7;
23}
24
25static unsigned v32(unsigned i)
26{
27 return i * 212371 + 13;
28}
13 29
14static void do_write_test(void __iomem *p) 30static void do_write_test(void __iomem *p)
15{ 31{
16 unsigned int i; 32 unsigned int i;
33 pr_info(MODULE_NAME ": write test.\n");
17 mmiotrace_printk("Write test.\n"); 34 mmiotrace_printk("Write test.\n");
35
18 for (i = 0; i < 256; i++) 36 for (i = 0; i < 256; i++)
19 iowrite8(i, p + i); 37 iowrite8(i, p + i);
38
20 for (i = 1024; i < (5 * 1024); i += 2) 39 for (i = 1024; i < (5 * 1024); i += 2)
21 iowrite16(i * 12 + 7, p + i); 40 iowrite16(v16(i), p + i);
41
22 for (i = (5 * 1024); i < (16 * 1024); i += 4) 42 for (i = (5 * 1024); i < (16 * 1024); i += 4)
23 iowrite32(i * 212371 + 13, p + i); 43 iowrite32(v32(i), p + i);
24} 44}
25 45
26static void do_read_test(void __iomem *p) 46static void do_read_test(void __iomem *p)
27{ 47{
28 unsigned int i; 48 unsigned int i;
49 unsigned errs[3] = { 0 };
50 pr_info(MODULE_NAME ": read test.\n");
29 mmiotrace_printk("Read test.\n"); 51 mmiotrace_printk("Read test.\n");
52
30 for (i = 0; i < 256; i++) 53 for (i = 0; i < 256; i++)
31 ioread8(p + i); 54 if (ioread8(p + i) != i)
55 ++errs[0];
56
32 for (i = 1024; i < (5 * 1024); i += 2) 57 for (i = 1024; i < (5 * 1024); i += 2)
33 ioread16(p + i); 58 if (ioread16(p + i) != v16(i))
59 ++errs[1];
60
34 for (i = (5 * 1024); i < (16 * 1024); i += 4) 61 for (i = (5 * 1024); i < (16 * 1024); i += 4)
35 ioread32(p + i); 62 if (ioread32(p + i) != v32(i))
63 ++errs[2];
64
65 mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
66 errs[0], errs[1], errs[2]);
36} 67}
37 68
38static void do_test(void) 69static void do_read_far_test(void __iomem *p)
39{ 70{
40 void __iomem *p = ioremap_nocache(mmio_address, 0x4000); 71 pr_info(MODULE_NAME ": read far test.\n");
72 mmiotrace_printk("Read far test.\n");
73
74 ioread32(p + read_far);
75}
76
77static void do_test(unsigned long size)
78{
79 void __iomem *p = ioremap_nocache(mmio_address, size);
41 if (!p) { 80 if (!p) {
42 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 81 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
43 return; 82 return;
@@ -45,11 +84,15 @@ static void do_test(void)
45 mmiotrace_printk("ioremap returned %p.\n", p); 84 mmiotrace_printk("ioremap returned %p.\n", p);
46 do_write_test(p); 85 do_write_test(p);
47 do_read_test(p); 86 do_read_test(p);
87 if (read_far && read_far < size - 4)
88 do_read_far_test(p);
48 iounmap(p); 89 iounmap(p);
49} 90}
50 91
51static int __init init(void) 92static int __init init(void)
52{ 93{
94 unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95
53 if (mmio_address == 0) { 96 if (mmio_address == 0) {
54 pr_err(MODULE_NAME ": you have to use the module argument " 97 pr_err(MODULE_NAME ": you have to use the module argument "
55 "mmio_address.\n"); 98 "mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
58 return -ENXIO; 101 return -ENXIO;
59 } 102 }
60 103
61 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " 104 pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
62 "in PCI address space, and writing " 105 "address space, and writing 16 kB of rubbish in there.\n",
63 "rubbish in there.\n", mmio_address); 106 size >> 10, mmio_address);
64 do_test(); 107 do_test(size);
108 pr_info(MODULE_NAME ": All done.\n");
65 return 0; 109 return 0;
66} 110}
67 111