diff options
author | Jan Stancek <jstancek@redhat.com> | 2019-03-05 18:50:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-06 00:07:21 -0500 |
commit | fc8efd2ddfed3f343c11b693e87140ff358d7ff5 (patch) | |
tree | 699cad1269b49604db80b125390962a69ad68640 | |
parent | a9519defc771d574888ffe01e84747889152ec35 (diff) |
mm/memory.c: do_fault: avoid usage of stale vm_area_struct
LTP testcase mtest06 [1] can trigger a crash on s390x running 5.0.0-rc8.
This is a stress test, where one thread mmaps/writes/munmaps memory area
and other thread is trying to read from it:
CPU: 0 PID: 2611 Comm: mmap1 Not tainted 5.0.0-rc8+ #51
Hardware name: IBM 2964 N63 400 (z/VM 6.4.0)
Krnl PSW : 0404e00180000000 00000000001ac8d8 (__lock_acquire+0x7/0x7a8)
Call Trace:
([<0000000000000000>] (null))
[<00000000001adae4>] lock_acquire+0xec/0x258
[<000000000080d1ac>] _raw_spin_lock_bh+0x5c/0x98
[<000000000012a780>] page_table_free+0x48/0x1a8
[<00000000002f6e54>] do_fault+0xdc/0x670
[<00000000002fadae>] __handle_mm_fault+0x416/0x5f0
[<00000000002fb138>] handle_mm_fault+0x1b0/0x320
[<00000000001248cc>] do_dat_exception+0x19c/0x2c8
[<000000000080e5ee>] pgm_check_handler+0x19e/0x200
page_table_free() is called with NULL mm parameter, but because "0" is a
valid address on s390 (see S390_lowcore), it keeps going until it
eventually crashes in lockdep's lock_acquire. This crash is
reproducible at least since 4.14.
Problem is that "vmf->vma" used in do_fault() can become stale. Because
mmap_sem may be released, other threads can come in, call munmap() and
cause "vma" be returned to kmem cache, and get zeroed/re-initialized and
re-used:
handle_mm_fault |
__handle_mm_fault |
do_fault |
vma = vmf->vma |
do_read_fault |
__do_fault |
vma->vm_ops->fault(vmf); |
mmap_sem is released |
|
| do_munmap()
| remove_vma_list()
| remove_vma()
| vm_area_free()
| # vma is released
| ...
| # same vma is allocated
| # from kmem cache
| do_mmap()
| vm_area_alloc()
| memset(vma, 0, ...)
|
pte_free(vma->vm_mm, ...); |
page_table_free |
spin_lock_bh(&mm->context.lock);|
<crash> |
Cache mm_struct to avoid using potentially stale "vma".
[1] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/mtest06/mmap1.c
Link: http://lkml.kernel.org/r/5b3fdf19e2a5be460a384b936f5b56e13733f1b8.1551595137.git.jstancek@redhat.com
Signed-off-by: Jan Stancek <jstancek@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Rafael Aquini <aquini@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/memory.c | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/mm/memory.c b/mm/memory.c index 706c4c4a2b8e..47fe250307c7 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3536,10 +3536,13 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) | |||
3536 | * but allow concurrent faults). | 3536 | * but allow concurrent faults). |
3537 | * The mmap_sem may have been released depending on flags and our | 3537 | * The mmap_sem may have been released depending on flags and our |
3538 | * return value. See filemap_fault() and __lock_page_or_retry(). | 3538 | * return value. See filemap_fault() and __lock_page_or_retry(). |
3539 | * If mmap_sem is released, vma may become invalid (for example | ||
3540 | * by other thread calling munmap()). | ||
3539 | */ | 3541 | */ |
3540 | static vm_fault_t do_fault(struct vm_fault *vmf) | 3542 | static vm_fault_t do_fault(struct vm_fault *vmf) |
3541 | { | 3543 | { |
3542 | struct vm_area_struct *vma = vmf->vma; | 3544 | struct vm_area_struct *vma = vmf->vma; |
3545 | struct mm_struct *vm_mm = vma->vm_mm; | ||
3543 | vm_fault_t ret; | 3546 | vm_fault_t ret; |
3544 | 3547 | ||
3545 | /* | 3548 | /* |
@@ -3580,7 +3583,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf) | |||
3580 | 3583 | ||
3581 | /* preallocated pagetable is unused: free it */ | 3584 | /* preallocated pagetable is unused: free it */ |
3582 | if (vmf->prealloc_pte) { | 3585 | if (vmf->prealloc_pte) { |
3583 | pte_free(vma->vm_mm, vmf->prealloc_pte); | 3586 | pte_free(vm_mm, vmf->prealloc_pte); |
3584 | vmf->prealloc_pte = NULL; | 3587 | vmf->prealloc_pte = NULL; |
3585 | } | 3588 | } |
3586 | return ret; | 3589 | return ret; |