diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-03-26 13:29:40 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-26 13:29:40 -0400 |
commit | 5a54bd1307471c1cd0521402fe65e2057edcab2f (patch) | |
tree | 25fb6a543db4ccc11b6d5662ed2e7facfce39ae7 /arch/x86/mm | |
parent | f9f35677d81adb0feedcd6e0e661784805c8facd (diff) | |
parent | 8e0ee43bc2c3e19db56a4adaa9a9b04ce885cd84 (diff) |
Merge commit 'v2.6.29' into core/header-fixes
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 164 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 39 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 85 | ||||
-rw-r--r-- | arch/x86/mm/testmmiotrace.c | 70 |
9 files changed, 257 insertions, 143 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a41..c76ef1d701c9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
603 | 603 | ||
604 | si_code = SEGV_MAPERR; | 604 | si_code = SEGV_MAPERR; |
605 | 605 | ||
606 | if (notify_page_fault(regs)) | ||
607 | return; | ||
608 | if (unlikely(kmmio_fault(regs, address))) | 606 | if (unlikely(kmmio_fault(regs, address))) |
609 | return; | 607 | return; |
610 | 608 | ||
@@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
634 | if (spurious_fault(address, error_code)) | 632 | if (spurious_fault(address, error_code)) |
635 | return; | 633 | return; |
636 | 634 | ||
635 | /* kprobes don't want to hook the spurious faults. */ | ||
636 | if (notify_page_fault(regs)) | ||
637 | return; | ||
637 | /* | 638 | /* |
638 | * Don't take the mm semaphore here. If we fixup a prefetch | 639 | * Don't take the mm semaphore here. If we fixup a prefetch |
639 | * fault we could otherwise deadlock. | 640 | * fault we could otherwise deadlock. |
@@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
641 | goto bad_area_nosemaphore; | 642 | goto bad_area_nosemaphore; |
642 | } | 643 | } |
643 | 644 | ||
645 | /* kprobes don't want to hook the spurious faults. */ | ||
646 | if (notify_page_fault(regs)) | ||
647 | return; | ||
644 | 648 | ||
645 | /* | 649 | /* |
646 | * It's safe to allow irq's after cr2 has been saved and the | 650 | * It's safe to allow irq's after cr2 has been saved and the |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e6d36b490250..b1352250096e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -714,6 +714,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
714 | pos = start_pfn << PAGE_SHIFT; | 714 | pos = start_pfn << PAGE_SHIFT; |
715 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | 715 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) |
716 | << (PMD_SHIFT - PAGE_SHIFT); | 716 | << (PMD_SHIFT - PAGE_SHIFT); |
717 | if (end_pfn > (end >> PAGE_SHIFT)) | ||
718 | end_pfn = end >> PAGE_SHIFT; | ||
717 | if (start_pfn < end_pfn) { | 719 | if (start_pfn < end_pfn) { |
718 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 720 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
719 | pos = end_pfn << PAGE_SHIFT; | 721 | pos = end_pfn << PAGE_SHIFT; |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ca53224fc56c..04102d42ff42 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -20,6 +20,17 @@ | |||
20 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | 22 | ||
23 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | ||
24 | { | ||
25 | #ifndef CONFIG_X86_PAE | ||
26 | /* There is no way to map greater than 1 << 32 address without PAE */ | ||
27 | if (base + size > 0x100000000ULL) | ||
28 | return 0; | ||
29 | #endif | ||
30 | return 1; | ||
31 | } | ||
32 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | ||
33 | |||
23 | /* Map 'pfn' using fixed map 'type' and protections 'prot' | 34 | /* Map 'pfn' using fixed map 'type' and protections 'prot' |
24 | */ | 35 | */ |
25 | void * | 36 | void * |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b6..f45d5e29a72e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) | |||
134 | return 0; | 134 | return 0; |
135 | } | 135 | } |
136 | 136 | ||
137 | int pagerange_is_ram(unsigned long start, unsigned long end) | ||
138 | { | ||
139 | int ram_page = 0, not_rampage = 0; | ||
140 | unsigned long page_nr; | ||
141 | |||
142 | for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); | ||
143 | ++page_nr) { | ||
144 | if (page_is_ram(page_nr)) | ||
145 | ram_page = 1; | ||
146 | else | ||
147 | not_rampage = 1; | ||
148 | |||
149 | if (ram_page == not_rampage) | ||
150 | return -1; | ||
151 | } | ||
152 | |||
153 | return ram_page; | ||
154 | } | ||
155 | |||
156 | /* | 137 | /* |
157 | * Fix up the linear direct mapping of the kernel to avoid cache attribute | 138 | * Fix up the linear direct mapping of the kernel to avoid cache attribute |
158 | * conflicts. | 139 | * conflicts. |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,11 +32,14 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | ||
36 | bool armed; | ||
35 | 37 | ||
36 | /* | 38 | /* |
37 | * Number of times this page has been registered as a part | 39 | * Number of times this page has been registered as a part |
38 | * of a probe. If zero, page is disarmed and this may be freed. | 40 | * of a probe. If zero, page is disarmed and this may be freed. |
39 | * Used only by writers (RCU). | 41 | * Used only by writers (RCU) and post_kmmio_handler(). |
42 | * Protected by kmmio_lock, when linked into kmmio_page_table. | ||
40 | */ | 43 | */ |
41 | int count; | 44 | int count; |
42 | }; | 45 | }; |
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | |||
105 | return NULL; | 108 | return NULL; |
106 | } | 109 | } |
107 | 110 | ||
108 | static void set_page_present(unsigned long addr, bool present, | 111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) |
109 | unsigned int *pglevel) | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | ||
114 | *old = !!(v & _PAGE_PRESENT); | ||
115 | v &= ~_PAGE_PRESENT; | ||
116 | if (present) | ||
117 | v |= _PAGE_PRESENT; | ||
118 | set_pmd(pmd, __pmd(v)); | ||
119 | } | ||
120 | |||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | ||
122 | { | ||
123 | pteval_t v = pte_val(*pte); | ||
124 | *old = !!(v & _PAGE_PRESENT); | ||
125 | v &= ~_PAGE_PRESENT; | ||
126 | if (present) | ||
127 | v |= _PAGE_PRESENT; | ||
128 | set_pte_atomic(pte, __pte(v)); | ||
129 | } | ||
130 | |||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | ||
110 | { | 132 | { |
111 | pteval_t pteval; | ||
112 | pmdval_t pmdval; | ||
113 | unsigned int level; | 133 | unsigned int level; |
114 | pmd_t *pmd; | ||
115 | pte_t *pte = lookup_address(addr, &level); | 134 | pte_t *pte = lookup_address(addr, &level); |
116 | 135 | ||
117 | if (!pte) { | 136 | if (!pte) { |
118 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); |
119 | return; | 138 | return -1; |
120 | } | 139 | } |
121 | 140 | ||
122 | if (pglevel) | ||
123 | *pglevel = level; | ||
124 | |||
125 | switch (level) { | 141 | switch (level) { |
126 | case PG_LEVEL_2M: | 142 | case PG_LEVEL_2M: |
127 | pmd = (pmd_t *)pte; | 143 | set_pmd_presence((pmd_t *)pte, present, old); |
128 | pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; | ||
129 | if (present) | ||
130 | pmdval |= _PAGE_PRESENT; | ||
131 | set_pmd(pmd, __pmd(pmdval)); | ||
132 | break; | 144 | break; |
133 | |||
134 | case PG_LEVEL_4K: | 145 | case PG_LEVEL_4K: |
135 | pteval = pte_val(*pte) & ~_PAGE_PRESENT; | 146 | set_pte_presence(pte, present, old); |
136 | if (present) | ||
137 | pteval |= _PAGE_PRESENT; | ||
138 | set_pte_atomic(pte, __pte(pteval)); | ||
139 | break; | 147 | break; |
140 | |||
141 | default: | 148 | default: |
142 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
143 | return; | 150 | return -1; |
144 | } | 151 | } |
145 | 152 | ||
146 | __flush_tlb_one(addr); | 153 | __flush_tlb_one(addr); |
154 | return 0; | ||
147 | } | 155 | } |
148 | 156 | ||
149 | /** Mark the given page as not present. Access to it will trigger a fault. */ | 157 | /* |
150 | static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 158 | * Mark the given page as not present. Access to it will trigger a fault. |
159 | * | ||
160 | * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the | ||
161 | * protection is ignored here. RCU read lock is assumed held, so the struct | ||
162 | * will not disappear unexpectedly. Furthermore, the caller must guarantee, | ||
163 | * that double arming the same virtual address (page) cannot occur. | ||
164 | * | ||
165 | * Double disarming on the other hand is allowed, and may occur when a fault | ||
166 | * and mmiotrace shutdown happen simultaneously. | ||
167 | */ | ||
168 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | ||
151 | { | 169 | { |
152 | set_page_present(page & PAGE_MASK, false, pglevel); | 170 | int ret; |
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | ||
172 | if (f->armed) { | ||
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | ||
174 | f->page, f->count, f->old_presence); | ||
175 | } | ||
176 | ret = set_page_presence(f->page, false, &f->old_presence); | ||
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | ||
178 | f->armed = true; | ||
179 | return ret; | ||
153 | } | 180 | } |
154 | 181 | ||
155 | /** Mark the given page as present. */ | 182 | /** Restore the given page to saved presence state. */ |
156 | static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
157 | { | 184 | { |
158 | set_page_present(page & PAGE_MASK, true, pglevel); | 185 | bool tmp; |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | ||
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | ||
189 | f->armed = false; | ||
159 | } | 190 | } |
160 | 191 | ||
161 | /* | 192 | /* |
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
202 | 233 | ||
203 | ctx = &get_cpu_var(kmmio_ctx); | 234 | ctx = &get_cpu_var(kmmio_ctx); |
204 | if (ctx->active) { | 235 | if (ctx->active) { |
205 | disarm_kmmio_fault_page(faultpage->page, NULL); | ||
206 | if (addr == ctx->addr) { | 236 | if (addr == ctx->addr) { |
207 | /* | 237 | /* |
208 | * On SMP we sometimes get recursive probe hits on the | 238 | * A second fault on the same page means some other |
209 | * same address. Context is already saved, fall out. | 239 | * condition needs handling by do_page_fault(), the |
240 | * page really not being present is the most common. | ||
210 | */ | 241 | */ |
211 | pr_debug("kmmio: duplicate probe hit on CPU %d, for " | 242 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", |
212 | "address 0x%08lx.\n", | 243 | addr, smp_processor_id()); |
213 | smp_processor_id(), addr); | 244 | |
214 | ret = 1; | 245 | if (!faultpage->old_presence) |
215 | goto no_kmmio_ctx; | 246 | pr_info("kmmio: unexpected secondary hit for " |
216 | } | 247 | "address 0x%08lx on CPU %d.\n", addr, |
217 | /* | 248 | smp_processor_id()); |
218 | * Prevent overwriting already in-flight context. | 249 | } else { |
219 | * This should not happen, let's hope disarming at least | 250 | /* |
220 | * prevents a panic. | 251 | * Prevent overwriting already in-flight context. |
221 | */ | 252 | * This should not happen, let's hope disarming at |
222 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 253 | * least prevents a panic. |
254 | */ | ||
255 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | ||
223 | "for address 0x%08lx. Ignoring.\n", | 256 | "for address 0x%08lx. Ignoring.\n", |
224 | smp_processor_id(), addr); | 257 | smp_processor_id(), addr); |
225 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | 258 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", |
226 | ctx->addr); | 259 | ctx->addr); |
260 | disarm_kmmio_fault_page(faultpage); | ||
261 | } | ||
227 | goto no_kmmio_ctx; | 262 | goto no_kmmio_ctx; |
228 | } | 263 | } |
229 | ctx->active++; | 264 | ctx->active++; |
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
244 | regs->flags &= ~X86_EFLAGS_IF; | 279 | regs->flags &= ~X86_EFLAGS_IF; |
245 | 280 | ||
246 | /* Now we set present bit in PTE and single step. */ | 281 | /* Now we set present bit in PTE and single step. */ |
247 | disarm_kmmio_fault_page(ctx->fpage->page, NULL); | 282 | disarm_kmmio_fault_page(ctx->fpage); |
248 | 283 | ||
249 | /* | 284 | /* |
250 | * If another cpu accesses the same page while we are stepping, | 285 | * If another cpu accesses the same page while we are stepping, |
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
275 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
276 | 311 | ||
277 | if (!ctx->active) { | 312 | if (!ctx->active) { |
278 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 313 | pr_warning("kmmio: spurious debug trap on CPU %d.\n", |
279 | smp_processor_id()); | 314 | smp_processor_id()); |
280 | goto out; | 315 | goto out; |
281 | } | 316 | } |
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
283 | if (ctx->probe && ctx->probe->post_handler) | 318 | if (ctx->probe && ctx->probe->post_handler) |
284 | ctx->probe->post_handler(ctx->probe, condition, regs); | 319 | ctx->probe->post_handler(ctx->probe, condition, regs); |
285 | 320 | ||
286 | arm_kmmio_fault_page(ctx->fpage->page, NULL); | 321 | /* Prevent racing against release_kmmio_fault_page(). */ |
322 | spin_lock(&kmmio_lock); | ||
323 | if (ctx->fpage->count) | ||
324 | arm_kmmio_fault_page(ctx->fpage); | ||
325 | spin_unlock(&kmmio_lock); | ||
287 | 326 | ||
288 | regs->flags &= ~X86_EFLAGS_TF; | 327 | regs->flags &= ~X86_EFLAGS_TF; |
289 | regs->flags |= ctx->saved_flags; | 328 | regs->flags |= ctx->saved_flags; |
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page) | |||
315 | f = get_kmmio_fault_page(page); | 354 | f = get_kmmio_fault_page(page); |
316 | if (f) { | 355 | if (f) { |
317 | if (!f->count) | 356 | if (!f->count) |
318 | arm_kmmio_fault_page(f->page, NULL); | 357 | arm_kmmio_fault_page(f); |
319 | f->count++; | 358 | f->count++; |
320 | return 0; | 359 | return 0; |
321 | } | 360 | } |
322 | 361 | ||
323 | f = kmalloc(sizeof(*f), GFP_ATOMIC); | 362 | f = kzalloc(sizeof(*f), GFP_ATOMIC); |
324 | if (!f) | 363 | if (!f) |
325 | return -1; | 364 | return -1; |
326 | 365 | ||
327 | f->count = 1; | 366 | f->count = 1; |
328 | f->page = page; | 367 | f->page = page; |
329 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
330 | 368 | ||
331 | arm_kmmio_fault_page(f->page, NULL); | 369 | if (arm_kmmio_fault_page(f)) { |
370 | kfree(f); | ||
371 | return -1; | ||
372 | } | ||
373 | |||
374 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
332 | 375 | ||
333 | return 0; | 376 | return 0; |
334 | } | 377 | } |
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page, | |||
347 | f->count--; | 390 | f->count--; |
348 | BUG_ON(f->count < 0); | 391 | BUG_ON(f->count < 0); |
349 | if (!f->count) { | 392 | if (!f->count) { |
350 | disarm_kmmio_fault_page(f->page, NULL); | 393 | disarm_kmmio_fault_page(f); |
351 | f->release_next = *release_list; | 394 | f->release_next = *release_list; |
352 | *release_list = f; | 395 | *release_list = f; |
353 | } | 396 | } |
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
408 | 451 | ||
409 | static void remove_kmmio_fault_pages(struct rcu_head *head) | 452 | static void remove_kmmio_fault_pages(struct rcu_head *head) |
410 | { | 453 | { |
411 | struct kmmio_delayed_release *dr = container_of( | 454 | struct kmmio_delayed_release *dr = |
412 | head, | 455 | container_of(head, struct kmmio_delayed_release, rcu); |
413 | struct kmmio_delayed_release, | ||
414 | rcu); | ||
415 | struct kmmio_fault_page *p = dr->release_list; | 456 | struct kmmio_fault_page *p = dr->release_list; |
416 | struct kmmio_fault_page **prevp = &dr->release_list; | 457 | struct kmmio_fault_page **prevp = &dr->release_list; |
417 | unsigned long flags; | 458 | unsigned long flags; |
459 | |||
418 | spin_lock_irqsave(&kmmio_lock, flags); | 460 | spin_lock_irqsave(&kmmio_lock, flags); |
419 | while (p) { | 461 | while (p) { |
420 | if (!p->count) | 462 | if (!p->count) { |
421 | list_del_rcu(&p->list); | 463 | list_del_rcu(&p->list); |
422 | else | 464 | prevp = &p->release_next; |
465 | } else { | ||
423 | *prevp = p->release_next; | 466 | *prevp = p->release_next; |
424 | prevp = &p->release_next; | 467 | } |
425 | p = p->release_next; | 468 | p = p->release_next; |
426 | } | 469 | } |
427 | spin_unlock_irqrestore(&kmmio_lock, flags); | 470 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | |||
428 | /* This is the real RCU destroy call. */ | 472 | /* This is the real RCU destroy call. */ |
429 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); | 473 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); |
430 | } | 474 | } |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..f3516da035d1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, | |||
145 | return shift; | 145 | return shift; |
146 | } | 146 | } |
147 | 147 | ||
148 | int early_pfn_to_nid(unsigned long pfn) | 148 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
149 | { | 149 | { |
150 | return phys_to_nid(pfn << PAGE_SHIFT); | 150 | return phys_to_nid(pfn << PAGE_SHIFT); |
151 | } | 151 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad6..7233bd7e357b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -508,18 +508,24 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
508 | #endif | 508 | #endif |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Install the new, split up pagetable. Important details here: | 511 | * Install the new, split up pagetable. |
512 | * | 512 | * |
513 | * On Intel the NX bit of all levels must be cleared to make a | 513 | * We use the standard kernel pagetable protections for the new |
514 | * page executable. See section 4.13.2 of Intel 64 and IA-32 | 514 | * pagetable protections, the actual ptes set above control the |
515 | * Architectures Software Developer's Manual). | 515 | * primary protection behavior: |
516 | */ | ||
517 | __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); | ||
518 | |||
519 | /* | ||
520 | * Intel Atom errata AAH41 workaround. | ||
516 | * | 521 | * |
517 | * Mark the entry present. The current mapping might be | 522 | * The real fix should be in hw or in a microcode update, but |
518 | * set to not present, which we preserved above. | 523 | * we also probabilistically try to reduce the window of having |
524 | * a large TLB mixed with 4K TLBs while instruction fetches are | ||
525 | * going on. | ||
519 | */ | 526 | */ |
520 | ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); | 527 | __flush_tlb_all(); |
521 | pgprot_val(ref_prot) |= _PAGE_PRESENT; | 528 | |
522 | __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); | ||
523 | base = NULL; | 529 | base = NULL; |
524 | 530 | ||
525 | out_unlock: | 531 | out_unlock: |
@@ -575,7 +581,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
575 | address = cpa->vaddr[cpa->curpage]; | 581 | address = cpa->vaddr[cpa->curpage]; |
576 | else | 582 | else |
577 | address = *cpa->vaddr; | 583 | address = *cpa->vaddr; |
578 | |||
579 | repeat: | 584 | repeat: |
580 | kpte = lookup_address(address, &level); | 585 | kpte = lookup_address(address, &level); |
581 | if (!kpte) | 586 | if (!kpte) |
@@ -812,6 +817,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
812 | 817 | ||
813 | vm_unmap_aliases(); | 818 | vm_unmap_aliases(); |
814 | 819 | ||
820 | /* | ||
821 | * If we're called with lazy mmu updates enabled, the | ||
822 | * in-memory pte state may be stale. Flush pending updates to | ||
823 | * bring them up to date. | ||
824 | */ | ||
825 | arch_flush_lazy_mmu_mode(); | ||
826 | |||
815 | cpa.vaddr = addr; | 827 | cpa.vaddr = addr; |
816 | cpa.numpages = numpages; | 828 | cpa.numpages = numpages; |
817 | cpa.mask_set = mask_set; | 829 | cpa.mask_set = mask_set; |
@@ -854,6 +866,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
854 | } else | 866 | } else |
855 | cpa_flush_all(cache); | 867 | cpa_flush_all(cache); |
856 | 868 | ||
869 | /* | ||
870 | * If we've been called with lazy mmu updates enabled, then | ||
871 | * make sure that everything gets flushed out before we | ||
872 | * return. | ||
873 | */ | ||
874 | arch_flush_lazy_mmu_mode(); | ||
875 | |||
857 | out: | 876 | out: |
858 | return ret; | 877 | return ret; |
859 | } | 878 | } |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427df..e0ab173b6974 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/bootmem.h> | 11 | #include <linux/bootmem.h> |
12 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | ||
14 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
15 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
16 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
@@ -211,6 +212,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | |||
211 | static struct memtype *cached_entry; | 212 | static struct memtype *cached_entry; |
212 | static u64 cached_start; | 213 | static u64 cached_start; |
213 | 214 | ||
215 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | ||
216 | { | ||
217 | int ram_page = 0, not_rampage = 0; | ||
218 | unsigned long page_nr; | ||
219 | |||
220 | for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); | ||
221 | ++page_nr) { | ||
222 | /* | ||
223 | * For legacy reasons, physical address range in the legacy ISA | ||
224 | * region is tracked as non-RAM. This will allow users of | ||
225 | * /dev/mem to map portions of legacy ISA region, even when | ||
226 | * some of those portions are listed(or not even listed) with | ||
227 | * different e820 types(RAM/reserved/..) | ||
228 | */ | ||
229 | if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && | ||
230 | page_is_ram(page_nr)) | ||
231 | ram_page = 1; | ||
232 | else | ||
233 | not_rampage = 1; | ||
234 | |||
235 | if (ram_page == not_rampage) | ||
236 | return -1; | ||
237 | } | ||
238 | |||
239 | return ram_page; | ||
240 | } | ||
241 | |||
214 | /* | 242 | /* |
215 | * For RAM pages, mark the pages as non WB memory type using | 243 | * For RAM pages, mark the pages as non WB memory type using |
216 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | 244 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or |
@@ -336,20 +364,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
336 | if (new_type) | 364 | if (new_type) |
337 | *new_type = actual_type; | 365 | *new_type = actual_type; |
338 | 366 | ||
339 | /* | 367 | is_range_ram = pat_pagerange_is_ram(start, end); |
340 | * For legacy reasons, some parts of the physical address range in the | 368 | if (is_range_ram == 1) |
341 | * legacy 1MB region is treated as non-RAM (even when listed as RAM in | 369 | return reserve_ram_pages_type(start, end, req_type, |
342 | * the e820 tables). So we will track the memory attributes of this | 370 | new_type); |
343 | * legacy 1MB region using the linear memtype_list always. | 371 | else if (is_range_ram < 0) |
344 | */ | 372 | return -EINVAL; |
345 | if (end >= ISA_END_ADDRESS) { | ||
346 | is_range_ram = pagerange_is_ram(start, end); | ||
347 | if (is_range_ram == 1) | ||
348 | return reserve_ram_pages_type(start, end, req_type, | ||
349 | new_type); | ||
350 | else if (is_range_ram < 0) | ||
351 | return -EINVAL; | ||
352 | } | ||
353 | 373 | ||
354 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 374 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
355 | if (!new) | 375 | if (!new) |
@@ -446,19 +466,11 @@ int free_memtype(u64 start, u64 end) | |||
446 | if (is_ISA_range(start, end - 1)) | 466 | if (is_ISA_range(start, end - 1)) |
447 | return 0; | 467 | return 0; |
448 | 468 | ||
449 | /* | 469 | is_range_ram = pat_pagerange_is_ram(start, end); |
450 | * For legacy reasons, some parts of the physical address range in the | 470 | if (is_range_ram == 1) |
451 | * legacy 1MB region is treated as non-RAM (even when listed as RAM in | 471 | return free_ram_pages_type(start, end); |
452 | * the e820 tables). So we will track the memory attributes of this | 472 | else if (is_range_ram < 0) |
453 | * legacy 1MB region using the linear memtype_list always. | 473 | return -EINVAL; |
454 | */ | ||
455 | if (end >= ISA_END_ADDRESS) { | ||
456 | is_range_ram = pagerange_is_ram(start, end); | ||
457 | if (is_range_ram == 1) | ||
458 | return free_ram_pages_type(start, end); | ||
459 | else if (is_range_ram < 0) | ||
460 | return -EINVAL; | ||
461 | } | ||
462 | 474 | ||
463 | spin_lock(&memtype_lock); | 475 | spin_lock(&memtype_lock); |
464 | list_for_each_entry(entry, &memtype_list, nd) { | 476 | list_for_each_entry(entry, &memtype_list, nd) { |
@@ -626,17 +638,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
626 | unsigned long flags; | 638 | unsigned long flags; |
627 | unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); | 639 | unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); |
628 | 640 | ||
629 | is_ram = pagerange_is_ram(paddr, paddr + size); | 641 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
630 | 642 | ||
631 | if (is_ram != 0) { | 643 | /* |
632 | /* | 644 | * reserve_pfn_range() doesn't support RAM pages. |
633 | * For mapping RAM pages, drivers need to call | 645 | */ |
634 | * set_memory_[uc|wc|wb] directly, for reserve and free, before | 646 | if (is_ram != 0) |
635 | * setting up the PTE. | 647 | return -EINVAL; |
636 | */ | ||
637 | WARN_ON_ONCE(1); | ||
638 | return 0; | ||
639 | } | ||
640 | 648 | ||
641 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 649 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
642 | if (ret) | 650 | if (ret) |
@@ -693,7 +701,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) | |||
693 | { | 701 | { |
694 | int is_ram; | 702 | int is_ram; |
695 | 703 | ||
696 | is_ram = pagerange_is_ram(paddr, paddr + size); | 704 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
697 | if (is_ram == 0) | 705 | if (is_ram == 0) |
698 | free_memtype(paddr, paddr + size); | 706 | free_memtype(paddr, paddr + size); |
699 | } | 707 | } |
@@ -861,6 +869,7 @@ pgprot_t pgprot_writecombine(pgprot_t prot) | |||
861 | else | 869 | else |
862 | return pgprot_noncached(prot); | 870 | return pgprot_noncached(prot); |
863 | } | 871 | } |
872 | EXPORT_SYMBOL_GPL(pgprot_writecombine); | ||
864 | 873 | ||
865 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 874 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
866 | 875 | ||
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402c..427fd1b56df5 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/io.h> | 5 | #include <linux/io.h> |
@@ -9,35 +9,74 @@ | |||
9 | 9 | ||
10 | static unsigned long mmio_address; | 10 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 11 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); | 12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
13 | "(or 8 MB if read_far is non-zero)."); | ||
14 | |||
15 | static unsigned long read_far = 0x400100; | ||
16 | module_param(read_far, ulong, 0); | ||
17 | MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " | ||
18 | "(default: 0x400100)."); | ||
19 | |||
20 | static unsigned v16(unsigned i) | ||
21 | { | ||
22 | return i * 12 + 7; | ||
23 | } | ||
24 | |||
25 | static unsigned v32(unsigned i) | ||
26 | { | ||
27 | return i * 212371 + 13; | ||
28 | } | ||
13 | 29 | ||
14 | static void do_write_test(void __iomem *p) | 30 | static void do_write_test(void __iomem *p) |
15 | { | 31 | { |
16 | unsigned int i; | 32 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | ||
17 | mmiotrace_printk("Write test.\n"); | 34 | mmiotrace_printk("Write test.\n"); |
35 | |||
18 | for (i = 0; i < 256; i++) | 36 | for (i = 0; i < 256; i++) |
19 | iowrite8(i, p + i); | 37 | iowrite8(i, p + i); |
38 | |||
20 | for (i = 1024; i < (5 * 1024); i += 2) | 39 | for (i = 1024; i < (5 * 1024); i += 2) |
21 | iowrite16(i * 12 + 7, p + i); | 40 | iowrite16(v16(i), p + i); |
41 | |||
22 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 42 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
23 | iowrite32(i * 212371 + 13, p + i); | 43 | iowrite32(v32(i), p + i); |
24 | } | 44 | } |
25 | 45 | ||
26 | static void do_read_test(void __iomem *p) | 46 | static void do_read_test(void __iomem *p) |
27 | { | 47 | { |
28 | unsigned int i; | 48 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | ||
50 | pr_info(MODULE_NAME ": read test.\n"); | ||
29 | mmiotrace_printk("Read test.\n"); | 51 | mmiotrace_printk("Read test.\n"); |
52 | |||
30 | for (i = 0; i < 256; i++) | 53 | for (i = 0; i < 256; i++) |
31 | ioread8(p + i); | 54 | if (ioread8(p + i) != i) |
55 | ++errs[0]; | ||
56 | |||
32 | for (i = 1024; i < (5 * 1024); i += 2) | 57 | for (i = 1024; i < (5 * 1024); i += 2) |
33 | ioread16(p + i); | 58 | if (ioread16(p + i) != v16(i)) |
59 | ++errs[1]; | ||
60 | |||
34 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 61 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
35 | ioread32(p + i); | 62 | if (ioread32(p + i) != v32(i)) |
63 | ++errs[2]; | ||
64 | |||
65 | mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", | ||
66 | errs[0], errs[1], errs[2]); | ||
36 | } | 67 | } |
37 | 68 | ||
38 | static void do_test(void) | 69 | static void do_read_far_test(void __iomem *p) |
39 | { | 70 | { |
40 | void __iomem *p = ioremap_nocache(mmio_address, 0x4000); | 71 | pr_info(MODULE_NAME ": read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | ||
73 | |||
74 | ioread32(p + read_far); | ||
75 | } | ||
76 | |||
77 | static void do_test(unsigned long size) | ||
78 | { | ||
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | ||
41 | if (!p) { | 80 | if (!p) { |
42 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); |
43 | return; | 82 | return; |
@@ -45,11 +84,15 @@ static void do_test(void) | |||
45 | mmiotrace_printk("ioremap returned %p.\n", p); | 84 | mmiotrace_printk("ioremap returned %p.\n", p); |
46 | do_write_test(p); | 85 | do_write_test(p); |
47 | do_read_test(p); | 86 | do_read_test(p); |
87 | if (read_far && read_far < size - 4) | ||
88 | do_read_far_test(p); | ||
48 | iounmap(p); | 89 | iounmap(p); |
49 | } | 90 | } |
50 | 91 | ||
51 | static int __init init(void) | 92 | static int __init init(void) |
52 | { | 93 | { |
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | ||
95 | |||
53 | if (mmio_address == 0) { | 96 | if (mmio_address == 0) { |
54 | pr_err(MODULE_NAME ": you have to use the module argument " | 97 | pr_err(MODULE_NAME ": you have to use the module argument " |
55 | "mmio_address.\n"); | 98 | "mmio_address.\n"); |
@@ -58,10 +101,11 @@ static int __init init(void) | |||
58 | return -ENXIO; | 101 | return -ENXIO; |
59 | } | 102 | } |
60 | 103 | ||
61 | pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " | 104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " |
62 | "in PCI address space, and writing " | 105 | "address space, and writing 16 kB of rubbish in there.\n", |
63 | "rubbish in there.\n", mmio_address); | 106 | size >> 10, mmio_address); |
64 | do_test(); | 107 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | ||
65 | return 0; | 109 | return 0; |
66 | } | 110 | } |
67 | 111 | ||