aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-10-18 23:26:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:30 -0400
commitb291f000393f5a0b679012b39d79fbc85c018233 (patch)
tree28eb785d4d157d3396e4377294e6054635a4bd90 /mm/memory.c
parent89e004ea55abe201b29e2d6e35124101f1288ef7 (diff)
mlock: mlocked pages are unevictable
Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin <npiggin@suse.de> splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Matt Mackall <mpm@selenic.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c56
1 files changed, 49 insertions, 7 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 71cdefd1ef14..9fef7272fb9e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,6 +64,8 @@
64 64
65#include "internal.h" 65#include "internal.h"
66 66
67#include "internal.h"
68
67#ifndef CONFIG_NEED_MULTIPLE_NODES 69#ifndef CONFIG_NEED_MULTIPLE_NODES
68/* use the per-pgdat data instead for discontigmem - mbligh */ 70/* use the per-pgdat data instead for discontigmem - mbligh */
69unsigned long max_mapnr; 71unsigned long max_mapnr;
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1129 return !vma->vm_ops || !vma->vm_ops->fault; 1131 return !vma->vm_ops || !vma->vm_ops->fault;
1130} 1132}
1131 1133
1132int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1134
1133 unsigned long start, int len, int write, int force, 1135
1136int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1137 unsigned long start, int len, int flags,
1134 struct page **pages, struct vm_area_struct **vmas) 1138 struct page **pages, struct vm_area_struct **vmas)
1135{ 1139{
1136 int i; 1140 int i;
1137 unsigned int vm_flags; 1141 unsigned int vm_flags = 0;
1142 int write = !!(flags & GUP_FLAGS_WRITE);
1143 int force = !!(flags & GUP_FLAGS_FORCE);
1144 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1138 1145
1139 if (len <= 0) 1146 if (len <= 0)
1140 return 0; 1147 return 0;
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1158 pud_t *pud; 1165 pud_t *pud;
1159 pmd_t *pmd; 1166 pmd_t *pmd;
1160 pte_t *pte; 1167 pte_t *pte;
1161 if (write) /* user gate pages are read-only */ 1168
1169 /* user gate pages are read-only */
1170 if (!ignore && write)
1162 return i ? : -EFAULT; 1171 return i ? : -EFAULT;
1163 if (pg > TASK_SIZE) 1172 if (pg > TASK_SIZE)
1164 pgd = pgd_offset_k(pg); 1173 pgd = pgd_offset_k(pg);
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1190 continue; 1199 continue;
1191 } 1200 }
1192 1201
1193 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) 1202 if (!vma ||
1194 || !(vm_flags & vma->vm_flags)) 1203 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1204 (!ignore && !(vm_flags & vma->vm_flags)))
1195 return i ? : -EFAULT; 1205 return i ? : -EFAULT;
1196 1206
1197 if (is_vm_hugetlb_page(vma)) { 1207 if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1266 } while (len); 1276 } while (len);
1267 return i; 1277 return i;
1268} 1278}
1279
1280int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1281 unsigned long start, int len, int write, int force,
1282 struct page **pages, struct vm_area_struct **vmas)
1283{
1284 int flags = 0;
1285
1286 if (write)
1287 flags |= GUP_FLAGS_WRITE;
1288 if (force)
1289 flags |= GUP_FLAGS_FORCE;
1290
1291 return __get_user_pages(tsk, mm,
1292 start, len, flags,
1293 pages, vmas);
1294}
1295
1269EXPORT_SYMBOL(get_user_pages); 1296EXPORT_SYMBOL(get_user_pages);
1270 1297
1271pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, 1298pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1858,6 +1885,15 @@ gotten:
1858 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1885 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1859 if (!new_page) 1886 if (!new_page)
1860 goto oom; 1887 goto oom;
1888 /*
1889 * Don't let another task, with possibly unlocked vma,
1890 * keep the mlocked page.
1891 */
1892 if (vma->vm_flags & VM_LOCKED) {
1893 lock_page(old_page); /* for LRU manipulation */
1894 clear_page_mlock(old_page);
1895 unlock_page(old_page);
1896 }
1861 cow_user_page(new_page, old_page, address, vma); 1897 cow_user_page(new_page, old_page, address, vma);
1862 __SetPageUptodate(new_page); 1898 __SetPageUptodate(new_page);
1863 1899
@@ -2325,7 +2361,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2325 page_add_anon_rmap(page, vma, address); 2361 page_add_anon_rmap(page, vma, address);
2326 2362
2327 swap_free(entry); 2363 swap_free(entry);
2328 if (vm_swap_full()) 2364 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2329 remove_exclusive_swap_page(page); 2365 remove_exclusive_swap_page(page);
2330 unlock_page(page); 2366 unlock_page(page);
2331 2367
@@ -2465,6 +2501,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2465 ret = VM_FAULT_OOM; 2501 ret = VM_FAULT_OOM;
2466 goto out; 2502 goto out;
2467 } 2503 }
2504 /*
2505 * Don't let another task, with possibly unlocked vma,
2506 * keep the mlocked page.
2507 */
2508 if (vma->vm_flags & VM_LOCKED)
2509 clear_page_mlock(vmf.page);
2468 copy_user_highpage(page, vmf.page, address, vma); 2510 copy_user_highpage(page, vmf.page, address, vma);
2469 __SetPageUptodate(page); 2511 __SetPageUptodate(page);
2470 } else { 2512 } else {