aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2011-01-13 18:46:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:36 -0500
commit53a7706d5ed8f1a53ba062b318773160cc476dde (patch)
treea1990d90d5af3686b7a83b2bbc2ae6463971efc5
parent5fdb2002131cd4e210b9638a4fc932ec7be491d1 (diff)
mlock: do not hold mmap_sem for extended periods of time
__get_user_pages gets a new 'nonblocking' parameter to signal that the caller is prepared to re-acquire mmap_sem and retry the operation if needed. This is used to split off long operations if they are going to block on a disk transfer, or when we detect contention on the mmap_sem. [akpm@linux-foundation.org: remove ref to rwsem_is_contended()] Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/internal.h3
-rw-r--r--mm/memory.c23
-rw-r--r--mm/mlock.c40
-rw-r--r--mm/nommu.c6
4 files changed, 47 insertions, 25 deletions
diff --git a/mm/internal.h b/mm/internal.h
index dedb0aff673f..bd4f581f624a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -243,7 +243,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
243 243
244int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 244int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
245 unsigned long start, int len, unsigned int foll_flags, 245 unsigned long start, int len, unsigned int foll_flags,
246 struct page **pages, struct vm_area_struct **vmas); 246 struct page **pages, struct vm_area_struct **vmas,
247 int *nonblocking);
247 248
248#define ZONE_RECLAIM_NOSCAN -2 249#define ZONE_RECLAIM_NOSCAN -2
249#define ZONE_RECLAIM_FULL -1 250#define ZONE_RECLAIM_FULL -1
diff --git a/mm/memory.c b/mm/memory.c
index 15e1f19a3b10..1bbe9a22429c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1363,7 +1363,8 @@ no_page_table:
1363 1363
1364int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1364int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1365 unsigned long start, int nr_pages, unsigned int gup_flags, 1365 unsigned long start, int nr_pages, unsigned int gup_flags,
1366 struct page **pages, struct vm_area_struct **vmas) 1366 struct page **pages, struct vm_area_struct **vmas,
1367 int *nonblocking)
1367{ 1368{
1368 int i; 1369 int i;
1369 unsigned long vm_flags; 1370 unsigned long vm_flags;
@@ -1463,10 +1464,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1463 cond_resched(); 1464 cond_resched();
1464 while (!(page = follow_page(vma, start, foll_flags))) { 1465 while (!(page = follow_page(vma, start, foll_flags))) {
1465 int ret; 1466 int ret;
1467 unsigned int fault_flags = 0;
1468
1469 if (foll_flags & FOLL_WRITE)
1470 fault_flags |= FAULT_FLAG_WRITE;
1471 if (nonblocking)
1472 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
1466 1473
1467 ret = handle_mm_fault(mm, vma, start, 1474 ret = handle_mm_fault(mm, vma, start,
1468 (foll_flags & FOLL_WRITE) ? 1475 fault_flags);
1469 FAULT_FLAG_WRITE : 0);
1470 1476
1471 if (ret & VM_FAULT_ERROR) { 1477 if (ret & VM_FAULT_ERROR) {
1472 if (ret & VM_FAULT_OOM) 1478 if (ret & VM_FAULT_OOM)
@@ -1482,6 +1488,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1482 else 1488 else
1483 tsk->min_flt++; 1489 tsk->min_flt++;
1484 1490
1491 if (ret & VM_FAULT_RETRY) {
1492 *nonblocking = 0;
1493 return i;
1494 }
1495
1485 /* 1496 /*
1486 * The VM_FAULT_WRITE bit tells us that 1497 * The VM_FAULT_WRITE bit tells us that
1487 * do_wp_page has broken COW when necessary, 1498 * do_wp_page has broken COW when necessary,
@@ -1581,7 +1592,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1581 if (force) 1592 if (force)
1582 flags |= FOLL_FORCE; 1593 flags |= FOLL_FORCE;
1583 1594
1584 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); 1595 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
1596 NULL);
1585} 1597}
1586EXPORT_SYMBOL(get_user_pages); 1598EXPORT_SYMBOL(get_user_pages);
1587 1599
@@ -1606,7 +1618,8 @@ struct page *get_dump_page(unsigned long addr)
1606 struct page *page; 1618 struct page *page;
1607 1619
1608 if (__get_user_pages(current, current->mm, addr, 1, 1620 if (__get_user_pages(current, current->mm, addr, 1,
1609 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1) 1621 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1622 NULL) < 1)
1610 return NULL; 1623 return NULL;
1611 flush_cache_page(vma, addr, page_to_pfn(page)); 1624 flush_cache_page(vma, addr, page_to_pfn(page));
1612 return page; 1625 return page;
diff --git a/mm/mlock.c b/mm/mlock.c
index 84da66b7bbf0..13e81ee8be9d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -155,13 +155,13 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
155 * vma->vm_mm->mmap_sem must be held for at least read. 155 * vma->vm_mm->mmap_sem must be held for at least read.
156 */ 156 */
157static long __mlock_vma_pages_range(struct vm_area_struct *vma, 157static long __mlock_vma_pages_range(struct vm_area_struct *vma,
158 unsigned long start, unsigned long end) 158 unsigned long start, unsigned long end,
159 int *nonblocking)
159{ 160{
160 struct mm_struct *mm = vma->vm_mm; 161 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 162 unsigned long addr = start;
162 int nr_pages = (end - start) / PAGE_SIZE; 163 int nr_pages = (end - start) / PAGE_SIZE;
163 int gup_flags; 164 int gup_flags;
164 int ret;
165 165
166 VM_BUG_ON(start & ~PAGE_MASK); 166 VM_BUG_ON(start & ~PAGE_MASK);
167 VM_BUG_ON(end & ~PAGE_MASK); 167 VM_BUG_ON(end & ~PAGE_MASK);
@@ -187,9 +187,8 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
187 nr_pages--; 187 nr_pages--;
188 } 188 }
189 189
190 ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags, 190 return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
191 NULL, NULL); 191 NULL, NULL, nonblocking);
192 return max(ret, 0); /* 0 or negative error code */
193} 192}
194 193
195/* 194/*
@@ -233,7 +232,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
233 is_vm_hugetlb_page(vma) || 232 is_vm_hugetlb_page(vma) ||
234 vma == get_gate_vma(current))) { 233 vma == get_gate_vma(current))) {
235 234
236 __mlock_vma_pages_range(vma, start, end); 235 __mlock_vma_pages_range(vma, start, end, NULL);
237 236
238 /* Hide errors from mmap() and other callers */ 237 /* Hide errors from mmap() and other callers */
239 return 0; 238 return 0;
@@ -429,21 +428,23 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
429 struct mm_struct *mm = current->mm; 428 struct mm_struct *mm = current->mm;
430 unsigned long end, nstart, nend; 429 unsigned long end, nstart, nend;
431 struct vm_area_struct *vma = NULL; 430 struct vm_area_struct *vma = NULL;
431 int locked = 0;
432 int ret = 0; 432 int ret = 0;
433 433
434 VM_BUG_ON(start & ~PAGE_MASK); 434 VM_BUG_ON(start & ~PAGE_MASK);
435 VM_BUG_ON(len != PAGE_ALIGN(len)); 435 VM_BUG_ON(len != PAGE_ALIGN(len));
436 end = start + len; 436 end = start + len;
437 437
438 down_read(&mm->mmap_sem);
439 for (nstart = start; nstart < end; nstart = nend) { 438 for (nstart = start; nstart < end; nstart = nend) {
440 /* 439 /*
441 * We want to fault in pages for [nstart; end) address range. 440 * We want to fault in pages for [nstart; end) address range.
442 * Find first corresponding VMA. 441 * Find first corresponding VMA.
443 */ 442 */
444 if (!vma) 443 if (!locked) {
444 locked = 1;
445 down_read(&mm->mmap_sem);
445 vma = find_vma(mm, nstart); 446 vma = find_vma(mm, nstart);
446 else 447 } else if (nstart >= vma->vm_end)
447 vma = vma->vm_next; 448 vma = vma->vm_next;
448 if (!vma || vma->vm_start >= end) 449 if (!vma || vma->vm_start >= end)
449 break; 450 break;
@@ -457,19 +458,24 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
457 if (nstart < vma->vm_start) 458 if (nstart < vma->vm_start)
458 nstart = vma->vm_start; 459 nstart = vma->vm_start;
459 /* 460 /*
460 * Now fault in a range of pages within the first VMA. 461 * Now fault in a range of pages. __mlock_vma_pages_range()
462 * double checks the vma flags, so that it won't mlock pages
463 * if the vma was already munlocked.
461 */ 464 */
462 ret = __mlock_vma_pages_range(vma, nstart, nend); 465 ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
463 if (ret < 0 && ignore_errors) { 466 if (ret < 0) {
464 ret = 0; 467 if (ignore_errors) {
465 continue; /* continue at next VMA */ 468 ret = 0;
466 } 469 continue; /* continue at next VMA */
467 if (ret) { 470 }
468 ret = __mlock_posix_error_return(ret); 471 ret = __mlock_posix_error_return(ret);
469 break; 472 break;
470 } 473 }
474 nend = nstart + ret * PAGE_SIZE;
475 ret = 0;
471 } 476 }
472 up_read(&mm->mmap_sem); 477 if (locked)
478 up_read(&mm->mmap_sem);
473 return ret; /* 0 or negative error code */ 479 return ret; /* 0 or negative error code */
474} 480}
475 481
diff --git a/mm/nommu.c b/mm/nommu.c
index ef4045d010d5..f59e1424d3db 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -127,7 +127,8 @@ unsigned int kobjsize(const void *objp)
127 127
128int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 128int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
129 unsigned long start, int nr_pages, unsigned int foll_flags, 129 unsigned long start, int nr_pages, unsigned int foll_flags,
130 struct page **pages, struct vm_area_struct **vmas) 130 struct page **pages, struct vm_area_struct **vmas,
131 int *retry)
131{ 132{
132 struct vm_area_struct *vma; 133 struct vm_area_struct *vma;
133 unsigned long vm_flags; 134 unsigned long vm_flags;
@@ -185,7 +186,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
185 if (force) 186 if (force)
186 flags |= FOLL_FORCE; 187 flags |= FOLL_FORCE;
187 188
188 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); 189 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
190 NULL);
189} 191}
190EXPORT_SYMBOL(get_user_pages); 192EXPORT_SYMBOL(get_user_pages);
191 193