aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-02-17 17:27:39 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-17 17:27:39 -0500
commit35010334aa007480a833401b80922299cb1a15ef (patch)
tree6e318eb89da45d37ea8dceddd1ae6858b74dee1e
parent8ce9a75a307e142a8871c649627555e0e4a1eefb (diff)
parentbe716615fe596ee117292dc615e95f707fb67fd1 (diff)
Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, vm86: fix preemption bug x86, olpc: fix model detection without OFW x86, hpet: fix for LS21 + HPET = boot hang x86: CPA avoid repeated lazy mmu flush x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem x86/cpa: make sure cpa is safe to call in lazy mmu mode x86, ptrace, mm: fix double-free on race
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/olpc.c2
-rw-r--r--arch/x86/kernel/paravirt.c26
-rw-r--r--arch/x86/kernel/ptrace.c16
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/pageattr.c15
-rw-r--r--arch/x86/mm/pat.c83
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/mlock.c7
12 files changed, 116 insertions, 83 deletions
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..776579119a00 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
57typedef struct { pgprotval_t pgprot; } pgprot_t; 57typedef struct { pgprotval_t pgprot; } pgprot_t;
58 58
59extern int page_is_ram(unsigned long pagenr); 59extern int page_is_ram(unsigned long pagenr);
60extern int pagerange_is_ram(unsigned long start, unsigned long end);
61extern int devmem_is_allowed(unsigned long pagenr); 60extern int devmem_is_allowed(unsigned long pagenr);
62extern void map_devmem(unsigned long pfn, unsigned long size, 61extern void map_devmem(unsigned long pfn, unsigned long size,
63 pgprot_t vma_prot); 62 pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c09a14127584..e299287e8e33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1353} 1353}
1354 1354
1355static inline void arch_flush_lazy_cpu_mode(void) 1355void arch_flush_lazy_cpu_mode(void);
1356{
1357 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1358 arch_leave_lazy_cpu_mode();
1359 arch_enter_lazy_cpu_mode();
1360 }
1361}
1362
1363 1356
1364#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1357#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1365static inline void arch_enter_lazy_mmu_mode(void) 1358static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
1372 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1365 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1373} 1366}
1374 1367
1375static inline void arch_flush_lazy_mmu_mode(void) 1368void arch_flush_lazy_mmu_mode(void);
1376{
1377 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1378 arch_leave_lazy_mmu_mode();
1379 arch_enter_lazy_mmu_mode();
1380 }
1381}
1382 1369
1383static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1370static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1384 unsigned long phys, pgprot_t flags) 1371 unsigned long phys, pgprot_t flags)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2a..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1f..4006c522adc7 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
203static void __init platform_detect(void) 203static void __init platform_detect(void)
204{ 204{
205 /* stopgap until OFW support is added to the kernel */ 205 /* stopgap until OFW support is added to the kernel */
206 olpc_platform_info.boardrev = 0xc2; 206 olpc_platform_info.boardrev = olpc_board(0xc2);
207} 207}
208#endif 208#endif
209 209
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..c6520a4e85d4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
269} 269}
270 270
271void arch_flush_lazy_mmu_mode(void)
272{
273 preempt_disable();
274
275 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
276 WARN_ON(preempt_count() == 1);
277 arch_leave_lazy_mmu_mode();
278 arch_enter_lazy_mmu_mode();
279 }
280
281 preempt_enable();
282}
283
284void arch_flush_lazy_cpu_mode(void)
285{
286 preempt_disable();
287
288 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
289 WARN_ON(preempt_count() == 1);
290 arch_leave_lazy_cpu_mode();
291 arch_enter_lazy_cpu_mode();
292 }
293
294 preempt_enable();
295}
296
271struct pv_info pv_info = { 297struct pv_info pv_info = {
272 .name = "bare hardware", 298 .name = "bare hardware",
273 .paravirt_enabled = 0, 299 .paravirt_enabled = 0,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..5a4c23d89892 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 810
811static void ptrace_bts_detach(struct task_struct *child) 811static void ptrace_bts_detach(struct task_struct *child)
812{ 812{
813 if (unlikely(child->bts)) { 813 /*
814 ds_release_bts(child->bts); 814 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 815 * the child dies and is reaped by another thread.
816 816 *
817 ptrace_bts_free_buffer(child); 817 * We only do the memory accounting at this point and
818 } 818 * leave the buffer deallocation and the bts tracer
819 * release to ptrace_bts_untrace() which will be called
820 * later on with tasklist_lock held.
821 */
822 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 823}
820#else 824#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 825static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb3..a9e7548e1790 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
99 local_irq_enable(); 99 local_irq_enable();
100} 100}
101 101
102static inline void conditional_cli(struct pt_regs *regs)
103{
104 if (regs->flags & X86_EFLAGS_IF)
105 local_irq_disable();
106}
107
102static inline void preempt_conditional_cli(struct pt_regs *regs) 108static inline void preempt_conditional_cli(struct pt_regs *regs)
103{ 109{
104 if (regs->flags & X86_EFLAGS_IF) 110 if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
626 632
627#ifdef CONFIG_X86_32 633#ifdef CONFIG_X86_32
628debug_vm86: 634debug_vm86:
635 /* reenable preemption: handle_vm86_trap() might sleep */
636 dec_preempt_count();
629 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 637 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
630 preempt_conditional_cli(regs); 638 conditional_cli(regs);
631 return; 639 return;
632#endif 640#endif
633 641
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..f45d5e29a72e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
134 return 0; 134 return 0;
135} 135}
136 136
137int pagerange_is_ram(unsigned long start, unsigned long end)
138{
139 int ram_page = 0, not_rampage = 0;
140 unsigned long page_nr;
141
142 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
143 ++page_nr) {
144 if (page_is_ram(page_nr))
145 ram_page = 1;
146 else
147 not_rampage = 1;
148
149 if (ram_page == not_rampage)
150 return -1;
151 }
152
153 return ram_page;
154}
155
156/* 137/*
157 * Fix up the linear direct mapping of the kernel to avoid cache attribute 138 * Fix up the linear direct mapping of the kernel to avoid cache attribute
158 * conflicts. 139 * conflicts.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..8ca0d8566fc8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
575 address = cpa->vaddr[cpa->curpage]; 575 address = cpa->vaddr[cpa->curpage];
576 else 576 else
577 address = *cpa->vaddr; 577 address = *cpa->vaddr;
578
579repeat: 578repeat:
580 kpte = lookup_address(address, &level); 579 kpte = lookup_address(address, &level);
581 if (!kpte) 580 if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
812 811
813 vm_unmap_aliases(); 812 vm_unmap_aliases();
814 813
814 /*
815 * If we're called with lazy mmu updates enabled, the
816 * in-memory pte state may be stale. Flush pending updates to
817 * bring them up to date.
818 */
819 arch_flush_lazy_mmu_mode();
820
815 cpa.vaddr = addr; 821 cpa.vaddr = addr;
816 cpa.numpages = numpages; 822 cpa.numpages = numpages;
817 cpa.mask_set = mask_set; 823 cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
854 } else 860 } else
855 cpa_flush_all(cache); 861 cpa_flush_all(cache);
856 862
863 /*
864 * If we've been called with lazy mmu updates enabled, then
865 * make sure that everything gets flushed out before we
866 * return.
867 */
868 arch_flush_lazy_mmu_mode();
869
857out: 870out:
858 return ret; 871 return ret;
859} 872}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..aebbf67a79d0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
211static struct memtype *cached_entry; 211static struct memtype *cached_entry;
212static u64 cached_start; 212static u64 cached_start;
213 213
214static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
215{
216 int ram_page = 0, not_rampage = 0;
217 unsigned long page_nr;
218
219 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
220 ++page_nr) {
221 /*
222 * For legacy reasons, physical address range in the legacy ISA
223 * region is tracked as non-RAM. This will allow users of
224 * /dev/mem to map portions of legacy ISA region, even when
225 * some of those portions are listed(or not even listed) with
226 * different e820 types(RAM/reserved/..)
227 */
228 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
229 page_is_ram(page_nr))
230 ram_page = 1;
231 else
232 not_rampage = 1;
233
234 if (ram_page == not_rampage)
235 return -1;
236 }
237
238 return ram_page;
239}
240
214/* 241/*
215 * For RAM pages, mark the pages as non WB memory type using 242 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 243 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
336 if (new_type) 363 if (new_type)
337 *new_type = actual_type; 364 *new_type = actual_type;
338 365
339 /* 366 is_range_ram = pat_pagerange_is_ram(start, end);
340 * For legacy reasons, some parts of the physical address range in the 367 if (is_range_ram == 1)
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 368 return reserve_ram_pages_type(start, end, req_type,
342 * the e820 tables). So we will track the memory attributes of this 369 new_type);
343 * legacy 1MB region using the linear memtype_list always. 370 else if (is_range_ram < 0)
344 */ 371 return -EINVAL;
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
353 372
354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
355 if (!new) 374 if (!new)
@@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
446 if (is_ISA_range(start, end - 1)) 465 if (is_ISA_range(start, end - 1))
447 return 0; 466 return 0;
448 467
449 /* 468 is_range_ram = pat_pagerange_is_ram(start, end);
450 * For legacy reasons, some parts of the physical address range in the 469 if (is_range_ram == 1)
451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 470 return free_ram_pages_type(start, end);
452 * the e820 tables). So we will track the memory attributes of this 471 else if (is_range_ram < 0)
453 * legacy 1MB region using the linear memtype_list always. 472 return -EINVAL;
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
462 473
463 spin_lock(&memtype_lock); 474 spin_lock(&memtype_lock);
464 list_for_each_entry(entry, &memtype_list, nd) { 475 list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
626 unsigned long flags; 637 unsigned long flags;
627 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 638 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
628 639
629 is_ram = pagerange_is_ram(paddr, paddr + size); 640 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
630 641
631 if (is_ram != 0) { 642 /*
632 /* 643 * reserve_pfn_range() doesn't support RAM pages.
633 * For mapping RAM pages, drivers need to call 644 */
634 * set_memory_[uc|wc|wb] directly, for reserve and free, before 645 if (is_ram != 0)
635 * setting up the PTE. 646 return -EINVAL;
636 */
637 WARN_ON_ONCE(1);
638 return 0;
639 }
640 647
641 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 648 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
642 if (ret) 649 if (ret)
@@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
693{ 700{
694 int is_ram; 701 int is_ram;
695 702
696 is_ram = pagerange_is_ram(paddr, paddr + size); 703 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
697 if (is_ram == 0) 704 if (is_ram == 0)
698 free_memtype(paddr, paddr + size); 705 free_memtype(paddr, paddr + size);
699} 706}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 323561582c10..7dc04ff5ab89 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1304,5 +1304,6 @@ void vmemmap_populate_print_last(void);
1304 1304
1305extern void *alloc_locked_buffer(size_t size); 1305extern void *alloc_locked_buffer(size_t size);
1306extern void free_locked_buffer(void *buffer, size_t size); 1306extern void free_locked_buffer(void *buffer, size_t size);
1307extern void release_locked_buffer(void *buffer, size_t size);
1307#endif /* __KERNEL__ */ 1308#endif /* __KERNEL__ */
1308#endif /* _LINUX_MM_H */ 1309#endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 037161d61b4e..cbe9e0581b75 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -660,7 +660,7 @@ void *alloc_locked_buffer(size_t size)
660 return buffer; 660 return buffer;
661} 661}
662 662
663void free_locked_buffer(void *buffer, size_t size) 663void release_locked_buffer(void *buffer, size_t size)
664{ 664{
665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
666 666
@@ -670,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size)
670 current->mm->locked_vm -= pgsz; 670 current->mm->locked_vm -= pgsz;
671 671
672 up_write(&current->mm->mmap_sem); 672 up_write(&current->mm->mmap_sem);
673}
674
675void free_locked_buffer(void *buffer, size_t size)
676{
677 release_locked_buffer(buffer, size);
673 678
674 kfree(buffer); 679 kfree(buffer);
675} 680}