aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-04-12 17:15:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-04-12 17:15:51 -0400
commit95042f9eb78a8d9a17455e2ef263f2f310ecef15 (patch)
treeac9fe0a5e17c4b94b18b84338ffbeca2cee140cb
parentbe85bccaa5aa5a11dcaf85f9e945ffefd253f631 (diff)
vm: fix mlock() on stack guard page
Commit 53a7706d5ed8 ("mlock: do not hold mmap_sem for extended periods of time") changed mlock() to care about the exact number of pages that __get_user_pages() had brought it. Before, it would only care about errors. And that doesn't work, because we also handled one page specially in __mlock_vma_pages_range(), namely the stack guard page. So when that case was handled, the number of pages that the function returned was off by one. In particular, it could be zero, and then the caller would end up not making any progress at all. Rather than try to fix up that off-by-one error for the mlock case specially, this just moves the logic to handle the stack guard page into__get_user_pages() itself, thus making all the counts come out right automatically. Reported-by: Robert Święcki <robert@swiecki.net> Cc: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory.c26
-rw-r--r--mm/mlock.c13
2 files changed, 18 insertions, 21 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 9da8cab1b1b0..b623a249918c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1410,6 +1410,13 @@ no_page_table:
1410 return page; 1410 return page;
1411} 1411}
1412 1412
1413static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
1414{
1415 return (vma->vm_flags & VM_GROWSDOWN) &&
1416 (vma->vm_start == addr) &&
1417 !vma_stack_continue(vma->vm_prev, addr);
1418}
1419
1413/** 1420/**
1414 * __get_user_pages() - pin user pages in memory 1421 * __get_user_pages() - pin user pages in memory
1415 * @tsk: task_struct of target task 1422 * @tsk: task_struct of target task
@@ -1488,7 +1495,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1488 vma = find_extend_vma(mm, start); 1495 vma = find_extend_vma(mm, start);
1489 if (!vma && in_gate_area(mm, start)) { 1496 if (!vma && in_gate_area(mm, start)) {
1490 unsigned long pg = start & PAGE_MASK; 1497 unsigned long pg = start & PAGE_MASK;
1491 struct vm_area_struct *gate_vma = get_gate_vma(mm);
1492 pgd_t *pgd; 1498 pgd_t *pgd;
1493 pud_t *pud; 1499 pud_t *pud;
1494 pmd_t *pmd; 1500 pmd_t *pmd;
@@ -1513,10 +1519,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1513 pte_unmap(pte); 1519 pte_unmap(pte);
1514 return i ? : -EFAULT; 1520 return i ? : -EFAULT;
1515 } 1521 }
1522 vma = get_gate_vma(mm);
1516 if (pages) { 1523 if (pages) {
1517 struct page *page; 1524 struct page *page;
1518 1525
1519 page = vm_normal_page(gate_vma, start, *pte); 1526 page = vm_normal_page(vma, start, *pte);
1520 if (!page) { 1527 if (!page) {
1521 if (!(gup_flags & FOLL_DUMP) && 1528 if (!(gup_flags & FOLL_DUMP) &&
1522 is_zero_pfn(pte_pfn(*pte))) 1529 is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1537,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1530 get_page(page); 1537 get_page(page);
1531 } 1538 }
1532 pte_unmap(pte); 1539 pte_unmap(pte);
1533 if (vmas) 1540 goto next_page;
1534 vmas[i] = gate_vma;
1535 i++;
1536 start += PAGE_SIZE;
1537 nr_pages--;
1538 continue;
1539 } 1541 }
1540 1542
1541 if (!vma || 1543 if (!vma ||
@@ -1549,6 +1551,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1549 continue; 1551 continue;
1550 } 1552 }
1551 1553
1554 /*
1555 * If we don't actually want the page itself,
1556 * and it's the stack guard page, just skip it.
1557 */
1558 if (!pages && stack_guard_page(vma, start))
1559 goto next_page;
1560
1552 do { 1561 do {
1553 struct page *page; 1562 struct page *page;
1554 unsigned int foll_flags = gup_flags; 1563 unsigned int foll_flags = gup_flags;
@@ -1631,6 +1640,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1631 flush_anon_page(vma, page, start); 1640 flush_anon_page(vma, page, start);
1632 flush_dcache_page(page); 1641 flush_dcache_page(page);
1633 } 1642 }
1643next_page:
1634 if (vmas) 1644 if (vmas)
1635 vmas[i] = vma; 1645 vmas[i] = vma;
1636 i++; 1646 i++;
diff --git a/mm/mlock.c b/mm/mlock.c
index 2689a08c79af..6b55e3efe0df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page)
135 } 135 }
136} 136}
137 137
138static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
139{
140 return (vma->vm_flags & VM_GROWSDOWN) &&
141 (vma->vm_start == addr) &&
142 !vma_stack_continue(vma->vm_prev, addr);
143}
144
145/** 138/**
146 * __mlock_vma_pages_range() - mlock a range of pages in the vma. 139 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
147 * @vma: target vma 140 * @vma: target vma
@@ -188,12 +181,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
188 if (vma->vm_flags & VM_LOCKED) 181 if (vma->vm_flags & VM_LOCKED)
189 gup_flags |= FOLL_MLOCK; 182 gup_flags |= FOLL_MLOCK;
190 183
191 /* We don't try to access the guard page of a stack vma */
192 if (stack_guard_page(vma, start)) {
193 addr += PAGE_SIZE;
194 nr_pages--;
195 }
196
197 return __get_user_pages(current, mm, addr, nr_pages, gup_flags, 184 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
198 NULL, NULL, nonblocking); 185 NULL, NULL, nonblocking);
199} 186}