aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2010-12-02 17:31:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-12-02 17:51:15 -0500
commit64141da587241301ce8638cc945f8b67853156ec (patch)
treebf11cfe53f606a2bda2342c6286ba637c4848e34
parent853ff88324a248a9f5da6e110850223db353ec07 (diff)
vmalloc: eagerly clear ptes on vunmap
On stock 2.6.37-rc4, running: # mount lilith:/export /mnt/lilith # find /mnt/lilith/ -type f -print0 | xargs -0 file crashes the machine fairly quickly under Xen. Often it results in oops messages, but the couple of times I tried just now, it just hung quietly and made Xen print some rude messages: (XEN) mm.c:2389:d80 Bad type (saw 7400000000000001 != exp 3000000000000000) for mfn 1d7058 (pfn 18fa7) (XEN) mm.c:964:d80 Attempt to create linear p.t. with write perms (XEN) mm.c:2389:d80 Bad type (saw 7400000000000010 != exp 1000000000000000) for mfn 1d2e04 (pfn 1d1fb) (XEN) mm.c:2965:d80 Error while pinning mfn 1d2e04 Which means the domain tried to map a pagetable page RW, which would allow it to map arbitrary memory, so Xen stopped it. This is because vm_unmap_ram() left some pages mapped in the vmalloc area after NFS had finished with them, and those pages got recycled as pagetable pages while still having these RW aliases. Removing those mappings immediately removes the Xen-visible aliases, and so it has no problem with those pages being reused as pagetable pages. Deferring the TLB flush doesn't upset Xen because it can flush the TLB itself as needed to maintain its invariants. When unmapping a region in the vmalloc space, clear the ptes immediately. There's no point in deferring this because there's no amortization benefit. The TLBs are left dirty, and they are flushed lazily to amortize the cost of the IPIs. This specific motivation for this patch is an oops-causing regression since 2.6.36 when using NFS under Xen, triggered by the NFS client's use of vm_map_ram() introduced in 56e4ebf877b60 ("NFS: readdir with vmapped pages") . XFS also uses vm_map_ram() and could cause similar problems. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Bryan Schumaker <bjschuma@netapp.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Alex Elder <aelder@sgi.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--mm/vmalloc.c28
3 files changed, 17 insertions, 15 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a1feff9e59b6..44924e551fde 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2415,8 +2415,6 @@ void __init xen_init_mmu_ops(void)
2415 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2415 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2416 pv_mmu_ops = xen_mmu_ops; 2416 pv_mmu_ops = xen_mmu_ops;
2417 2417
2418 vmap_lazy_unmap = false;
2419
2420 memset(dummy_mapping, 0xff, PAGE_SIZE); 2418 memset(dummy_mapping, 0xff, PAGE_SIZE);
2421} 2419}
2422 2420
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a03dcf62ca9d..44b54f619ac6 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -7,8 +7,6 @@
7 7
8struct vm_area_struct; /* vma defining user mapping in mm_types.h */ 8struct vm_area_struct; /* vma defining user mapping in mm_types.h */
9 9
10extern bool vmap_lazy_unmap;
11
12/* bits in flags of vmalloc's vm_struct below */ 10/* bits in flags of vmalloc's vm_struct below */
13#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ 11#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
14#define VM_ALLOC 0x00000002 /* vmalloc() */ 12#define VM_ALLOC 0x00000002 /* vmalloc() */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a3d66b3dc5cb..eb5cc7d00c5a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,8 +31,6 @@
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/shmparam.h> 32#include <asm/shmparam.h>
33 33
34bool vmap_lazy_unmap __read_mostly = true;
35
36/*** Page table manipulation functions ***/ 34/*** Page table manipulation functions ***/
37 35
38static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void)
503{ 501{
504 unsigned int log; 502 unsigned int log;
505 503
506 if (!vmap_lazy_unmap)
507 return 0;
508
509 log = fls(num_online_cpus()); 504 log = fls(num_online_cpus());
510 505
511 return log * (32UL * 1024 * 1024 / PAGE_SIZE); 506 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
@@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
566 if (va->va_end > *end) 561 if (va->va_end > *end)
567 *end = va->va_end; 562 *end = va->va_end;
568 nr += (va->va_end - va->va_start) >> PAGE_SHIFT; 563 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
569 unmap_vmap_area(va);
570 list_add_tail(&va->purge_list, &valist); 564 list_add_tail(&va->purge_list, &valist);
571 va->flags |= VM_LAZY_FREEING; 565 va->flags |= VM_LAZY_FREEING;
572 va->flags &= ~VM_LAZY_FREE; 566 va->flags &= ~VM_LAZY_FREE;
@@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void)
611} 605}
612 606
613/* 607/*
614 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been 608 * Free a vmap area, caller ensuring that the area has been unmapped
615 * called for the correct range previously. 609 * and flush_cache_vunmap had been called for the correct range
610 * previously.
616 */ 611 */
617static void free_unmap_vmap_area_noflush(struct vmap_area *va) 612static void free_vmap_area_noflush(struct vmap_area *va)
618{ 613{
619 va->flags |= VM_LAZY_FREE; 614 va->flags |= VM_LAZY_FREE;
620 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); 615 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
@@ -623,6 +618,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va)
623} 618}
624 619
625/* 620/*
621 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
622 * called for the correct range previously.
623 */
624static void free_unmap_vmap_area_noflush(struct vmap_area *va)
625{
626 unmap_vmap_area(va);
627 free_vmap_area_noflush(va);
628}
629
630/*
626 * Free and unmap a vmap area 631 * Free and unmap a vmap area
627 */ 632 */
628static void free_unmap_vmap_area(struct vmap_area *va) 633static void free_unmap_vmap_area(struct vmap_area *va)
@@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb)
798 spin_unlock(&vmap_block_tree_lock); 803 spin_unlock(&vmap_block_tree_lock);
799 BUG_ON(tmp != vb); 804 BUG_ON(tmp != vb);
800 805
801 free_unmap_vmap_area_noflush(vb->va); 806 free_vmap_area_noflush(vb->va);
802 call_rcu(&vb->rcu_head, rcu_free_vb); 807 call_rcu(&vb->rcu_head, rcu_free_vb);
803} 808}
804 809
@@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size)
936 rcu_read_unlock(); 941 rcu_read_unlock();
937 BUG_ON(!vb); 942 BUG_ON(!vb);
938 943
944 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
945
939 spin_lock(&vb->lock); 946 spin_lock(&vb->lock);
940 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); 947 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
941 948
@@ -988,7 +995,6 @@ void vm_unmap_aliases(void)
988 995
989 s = vb->va->va_start + (i << PAGE_SHIFT); 996 s = vb->va->va_start + (i << PAGE_SHIFT);
990 e = vb->va->va_start + (j << PAGE_SHIFT); 997 e = vb->va->va_start + (j << PAGE_SHIFT);
991 vunmap_page_range(s, e);
992 flush = 1; 998 flush = 1;
993 999
994 if (s < start) 1000 if (s < start)