From 8b8f79b927b6b302bb65fb8c56e7a19be5fbdbef Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 13 Jun 2010 23:56:54 +0200 Subject: x86, kmmio/mmiotrace: Fix double free of kmmio_fault_pages After every iounmap mmiotrace has to free kmmio_fault_pages, but it can't do it directly, so it defers freeing by RCU. It usually works, but when mmiotraced code calls ioremap-iounmap multiple times without sleeping between (so RCU won't kick in and start freeing) it can be given the same virtual address, so at every iounmap mmiotrace will schedule the same pages for release. Obviously it will explode on second free. Fix it by marking kmmio_fault_pages which are scheduled for release and not adding them second time. Signed-off-by: Marcin Slusarz Tested-by: Marcin Kocielnicki Tested-by: Shinpei KATO Acked-by: Pekka Paalanen Cc: Stuart Bennett Cc: Marcin Kocielnicki Cc: nouveau@lists.freedesktop.org Cc: LKML-Reference: <20100613215654.GA3829@joi.lan> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 16 +++++++++++++--- arch/x86/mm/testmmiotrace.c | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 5d0e67fff1a6..e5d5e2ce9f77 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -45,6 +45,8 @@ struct kmmio_fault_page { * Protected by kmmio_lock, when linked into kmmio_page_table. */ int count; + + bool scheduled_for_release; }; struct kmmio_delayed_release { @@ -398,8 +400,11 @@ static void release_kmmio_fault_page(unsigned long page, BUG_ON(f->count < 0); if (!f->count) { disarm_kmmio_fault_page(f); - f->release_next = *release_list; - *release_list = f; + if (!f->scheduled_for_release) { + f->release_next = *release_list; + *release_list = f; + f->scheduled_for_release = true; + } } } @@ -471,8 +476,10 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) prevp = &f->release_next; } else { *prevp = f->release_next; + f->release_next = NULL; + f->scheduled_for_release = false; } - f = f->release_next; + f = *prevp; } spin_unlock_irqrestore(&kmmio_lock, flags); @@ -510,6 +517,9 @@ void unregister_kmmio_probe(struct kmmio_probe *p) kmmio_count--; spin_unlock_irqrestore(&kmmio_lock, flags); + if (!release_list) + return; + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { pr_crit("leaking kmmio_fault_page objects.\n"); diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 8565d944f7cf..38868adf07ea 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -90,6 +90,27 @@ static void do_test(unsigned long size) iounmap(p); } +/* + * Tests how mmiotrace behaves in face of multiple ioremap / iounmaps in + * a short time. We had a bug in deferred freeing procedure which tried + * to free this region multiple times (ioremap can reuse the same address + * for many mappings). + */ +static void do_test_bulk_ioremapping(void) +{ + void __iomem *p; + int i; + + for (i = 0; i < 10; ++i) { + p = ioremap_nocache(mmio_address, PAGE_SIZE); + if (p) + iounmap(p); + } + + /* Force freeing. If it will crash we will know why. */ + synchronize_rcu(); +} + static int __init init(void) { unsigned long size = (read_far) ? (8 << 20) : (16 << 10); @@ -104,6 +125,7 @@ static int __init init(void) "and writing 16 kB of rubbish in there.\n", size >> 10, mmio_address); do_test(size); + do_test_bulk_ioremapping(); pr_info("All done.\n"); return 0; } -- cgit v1.2.2 From d7a0380dc3e6607d30ccdfc3cfc2ccee0d966716 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 16 Jun 2010 22:30:42 +0200 Subject: x86-64, mm: Initialize VDSO earlier on 64 bits When initrd is in use and a driver does request_module() in its module_init (i.e. __initcall or device_initcall), a modprobe process is created with VDSO mapping. But VDSO is inited even in __initcall, i.e. on the same level (at the same time), so it may not be inited yet (link order matters). Move the VDSO initialization code earlier by switching to something before rootfs_initcall where initrd is loaded as rootfs. Specifically to subsys_initcall. Do it for standard 64-bit path (init_vdso_vars) and for compat (sysenter_setup), just in case people have 32-bit initrd and ia32 emulation built-in. i386 (pure 32-bit) is not affected, since sysenter_setup() is called from check_bugs()->identify_boot_cpu() in start_kernel() before rest_init()->kernel_thread(kernel_init) where even kernel_init() calls do_basic_setup()->do_initcalls(). What this patch fixes are early modprobe crashes such as: Unpacking initramfs... Freeing initrd memory: 9324k freed modprobe[368]: segfault at 7fff4429c020 ip 00007fef397e160c \ sp 00007fff442795c0 error 4 in ld-2.11.2.so[7fef397df000+1f000] Signed-off-by: Jiri Slaby LKML-Reference: <1276720242-13365-1-git-send-email-jslaby@suse.cz> Signed-off-by: H. Peter Anvin --- arch/x86/vdso/vdso32-setup.c | 2 +- arch/x86/vdso/vma.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 02b442e92007..36df991985b2 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -374,7 +374,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) #ifdef CONFIG_X86_64 -__initcall(sysenter_setup); +subsys_initcall(sysenter_setup); #ifdef CONFIG_SYSCTL /* Register vsyscall32 into the ABI table */ diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index ac74869b8140..43456ee17692 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -74,7 +74,7 @@ static int __init init_vdso_vars(void) vdso_enabled = 0; return -ENOMEM; } -__initcall(init_vdso_vars); +subsys_initcall(init_vdso_vars); struct linux_binprm; -- cgit v1.2.2 From ffa71f33a820d1ab3f2fc5723819ac60fb76080b Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 18 Jun 2010 12:22:40 +0900 Subject: x86, ioremap: Fix incorrect physical address handling in PAE mode Current x86 ioremap() doesn't handle physical address higher than 32-bit properly in X86_32 PAE mode. When physical address higher than 32-bit is passed to ioremap(), higher 32-bits in physical address is cleared wrongly. Due to this bug, ioremap() can map wrong address to linear address space. In my case, 64-bit MMIO region was assigned to a PCI device (ioat device) on my system. Because of the ioremap()'s bug, wrong physical address (instead of MMIO region) was mapped to linear address space. Because of this, loading ioatdma driver caused unexpected behavior (kernel panic, kernel hangup, ...). Signed-off-by: Kenji Kaneshige LKML-Reference: <4C1AE680.7090408@jp.fujitsu.com> Signed-off-by: H. Peter Anvin --- arch/x86/mm/ioremap.c | 12 +++++------- include/linux/io.h | 4 ++-- include/linux/vmalloc.h | 2 +- lib/ioremap.c | 10 +++++----- mm/vmalloc.c | 2 +- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12e4d2d3c110..754cb4cbce66 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -62,8 +62,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long size, unsigned long prot_val, void *caller) { - unsigned long pfn, offset, vaddr; - resource_size_t last_addr; + unsigned long offset, vaddr; + resource_size_t pfn, last_pfn, last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; struct vm_struct *area; @@ -100,10 +100,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't allow anybody to remap normal RAM that we're using.. */ - for (pfn = phys_addr >> PAGE_SHIFT; - (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); - pfn++) { - + last_pfn = last_addr >> PAGE_SHIFT; + for (pfn = phys_addr >> PAGE_SHIFT; pfn < last_pfn; pfn++) { int is_ram = page_is_ram(pfn); if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) @@ -115,7 +113,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; + phys_addr &= PHYSICAL_PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; retval = reserve_memtype(phys_addr, (u64)phys_addr + size, diff --git a/include/linux/io.h b/include/linux/io.h index 6c7f0ba0d5fa..7fd2d2138bf3 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -29,10 +29,10 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, - unsigned long phys_addr, pgprot_t prot); + phys_addr_t phys_addr, pgprot_t prot); #else static inline int ioremap_page_range(unsigned long addr, unsigned long end, - unsigned long phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot) { return 0; } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 227c2a585e4f..de05e96e0a70 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -30,7 +30,7 @@ struct vm_struct { unsigned long flags; struct page **pages; unsigned int nr_pages; - unsigned long phys_addr; + phys_addr_t phys_addr; void *caller; }; diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a2..5730ecd3eb66 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@ #include static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; - unsigned long pfn; + u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, } int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ae007462b7f6..b7e314b1009f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2403,7 +2403,7 @@ static int s_show(struct seq_file *m, void *p) seq_printf(m, " pages=%d", v->nr_pages); if (v->phys_addr) - seq_printf(m, " phys=%lx", v->phys_addr); + seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr); if (v->flags & VM_IOREMAP) seq_printf(m, " ioremap"); -- cgit v1.2.2 From 35be1b716a475717611b2dc04185e9d80b9cb693 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 18 Jun 2010 12:23:57 +0900 Subject: x86, ioremap: Fix normal ram range check Check for normal RAM in x86 ioremap() code seems to not work for the last page frame in the specified physical address range. Signed-off-by: Kenji Kaneshige LKML-Reference: <4C1AE6CD.1080704@jp.fujitsu.com> Signed-off-by: H. Peter Anvin --- arch/x86/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 754cb4cbce66..d41d3a9036ca 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -101,7 +101,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * Don't allow anybody to remap normal RAM that we're using.. */ last_pfn = last_addr >> PAGE_SHIFT; - for (pfn = phys_addr >> PAGE_SHIFT; pfn < last_pfn; pfn++) { + for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { int is_ram = page_is_ram(pfn); if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) -- cgit v1.2.2 From 92851e2fca48f1893f899963c13b55b61ac6956c Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 20 Jul 2010 15:19:46 -0700 Subject: x86, mm: Create symbolic index into address_markers array Without this, adding entries into the address_markers array means adding more and more of an #ifdef maze in pt_dump_init(). By using indices, we can keep it a bit saner. Signed-off-by: Andres Salomon LKML-Reference: <201007202219.o6KMJkUs021052@imap1.linux-foundation.org> Cc: Jordan Crouse Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/dump_pagetables.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a725b7f760ae..0002a3a33081 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -37,6 +37,28 @@ struct addr_marker { const char *name; }; +/* indices for address_markers; keep sync'd w/ address_markers below */ +enum address_markers_idx { + USER_SPACE_NR = 0, +#ifdef CONFIG_X86_64 + KERNEL_SPACE_NR, + LOW_KERNEL_NR, + VMALLOC_START_NR, + VMEMMAP_START_NR, + HIGH_KERNEL_NR, + MODULES_VADDR_NR, + MODULES_END_NR, +#else + KERNEL_SPACE_NR, + VMALLOC_START_NR, + VMALLOC_END_NR, +# ifdef CONFIG_HIGHMEM + PKMAP_BASE_NR, +# endif + FIXADDR_START_NR, +#endif +}; + /* Address space markers hints */ static struct addr_marker address_markers[] = { { 0, "User Space" }, @@ -331,14 +353,12 @@ static int pt_dump_init(void) #ifdef CONFIG_X86_32 /* Not a compile-time constant on x86-32 */ - address_markers[2].start_address = VMALLOC_START; - address_markers[3].start_address = VMALLOC_END; + address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; + address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; # ifdef CONFIG_HIGHMEM - address_markers[4].start_address = PKMAP_BASE; - address_markers[5].start_address = FIXADDR_START; -# else - address_markers[4].start_address = FIXADDR_START; + address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif + address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; #endif pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, -- cgit v1.2.2 From 468c30f2bbdf1ba0fbf16667eade23a46eaa8f06 Mon Sep 17 00:00:00 2001 From: Florian Zumbiehl Date: Tue, 20 Jul 2010 15:19:47 -0700 Subject: x86, iomap: Fix wrong page aligned size calculation in ioremapping code x86 early_iounmap(): fix off-by-one error in page alignment of allocation size for sizes where size%PAGE_SIZE==1. Signed-off-by: Florian Zumbiehl LKML-Reference: <201007202219.o6KMJlES021058@imap1.linux-foundation.org> Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d41d3a9036ca..3ba6e0608c55 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -611,7 +611,7 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) return; } offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { -- cgit v1.2.2 From 3f8afb77cd8a672f024e4a16763ef177bc16c8f8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 21 Jul 2010 14:47:05 +0200 Subject: x86, tlb: Clean up and correct used type smp_processor_id() returns an int and not an unsigned long. Also, since the function is small enough, there's no need for a local variable caching its value. No functionality change, just cleanup. Signed-off-by: Borislav Petkov LKML-Reference: <20100721124705.GA674@aftab> Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 426f3a1a64d3..c03f14ab6667 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -278,11 +278,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) static void do_flush_tlb_all(void *info) { - unsigned long cpu = smp_processor_id(); - __flush_tlb_all(); if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); + leave_mm(smp_processor_id()); } void flush_tlb_all(void) -- cgit v1.2.2 From 3709c857350976408953831f0cf89d19951394a1 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Thu, 22 Jul 2010 14:57:35 +0900 Subject: x86: Ioremap: fix wrong physical address handling in PAT code The following two commits fixed a problem that x86 ioremap() doesn't handle physical address higher than 32-bit properly in X86_32 PAE mode. ffa71f33a820d1ab3f2fc5723819ac60fb76080b (x86, ioremap: Fix incorrect physical address handling in PAE mode) 35be1b716a475717611b2dc04185e9d80b9cb693 (x86, ioremap: Fix normal ram range check) But these fixes are not enough, since pat_pagerange_is_ram() in PAT code also has a same problem. This patch fixes it. Signed-off-by: Yasuaki Ishimatsu Reviewed-by: Kenji Kaneshige LKML-Reference: <4C47DDCF.80300@jp.fujitsu.com> Signed-off-by: Thomas Gleixner --- arch/x86/mm/pat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index acc15b23b743..03b48c80c651 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -158,7 +158,7 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) return req_type; } -static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) { int ram_page = 0, not_rampage = 0; unsigned long page_nr; -- cgit v1.2.2