aboutsummaryrefslogtreecommitdiffstats
path: root/mm/nommu.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-10-18 23:26:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:30 -0400
commitb291f000393f5a0b679012b39d79fbc85c018233 (patch)
tree28eb785d4d157d3396e4377294e6054635a4bd90 /mm/nommu.c
parent89e004ea55abe201b29e2d6e35124101f1288ef7 (diff)
mlock: mlocked pages are unevictable
Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin <npiggin@suse.de> splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Matt Mackall <mpm@selenic.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/nommu.c')
-rw-r--r--mm/nommu.c44
1 files changed, 33 insertions, 11 deletions
diff --git a/mm/nommu.c b/mm/nommu.c
index ed75bc962fbe..2696b24f2bb3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -34,6 +34,8 @@
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36 36
37#include "internal.h"
38
37void *high_memory; 39void *high_memory;
38struct page *mem_map; 40struct page *mem_map;
39unsigned long max_mapnr; 41unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
128 return PAGE_SIZE << compound_order(page); 130 return PAGE_SIZE << compound_order(page);
129} 131}
130 132
131/* 133int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
132 * get a list of pages in an address range belonging to the specified process 134 unsigned long start, int len, int flags,
133 * and indicate the VMA that covers each page 135 struct page **pages, struct vm_area_struct **vmas)
134 * - this is potentially dodgy as we may end incrementing the page count of a
135 * slab page or a secondary page from a compound page
136 * - don't permit access to VMAs that don't support it, such as I/O mappings
137 */
138int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
139 unsigned long start, int len, int write, int force,
140 struct page **pages, struct vm_area_struct **vmas)
141{ 136{
142 struct vm_area_struct *vma; 137 struct vm_area_struct *vma;
143 unsigned long vm_flags; 138 unsigned long vm_flags;
144 int i; 139 int i;
140 int write = !!(flags & GUP_FLAGS_WRITE);
141 int force = !!(flags & GUP_FLAGS_FORCE);
142 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
145 143
146 /* calculate required read or write permissions. 144 /* calculate required read or write permissions.
147 * - if 'force' is set, we only require the "MAY" flags. 145 * - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
156 154
157 /* protect what we can, including chardevs */ 155 /* protect what we can, including chardevs */
158 if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 156 if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
159 !(vm_flags & vma->vm_flags)) 157 (!ignore && !(vm_flags & vma->vm_flags)))
160 goto finish_or_fault; 158 goto finish_or_fault;
161 159
162 if (pages) { 160 if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
174finish_or_fault: 172finish_or_fault:
175 return i ? : -EFAULT; 173 return i ? : -EFAULT;
176} 174}
175
176
177/*
178 * get a list of pages in an address range belonging to the specified process
179 * and indicate the VMA that covers each page
180 * - this is potentially dodgy as we may end incrementing the page count of a
181 * slab page or a secondary page from a compound page
182 * - don't permit access to VMAs that don't support it, such as I/O mappings
183 */
184int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
185 unsigned long start, int len, int write, int force,
186 struct page **pages, struct vm_area_struct **vmas)
187{
188 int flags = 0;
189
190 if (write)
191 flags |= GUP_FLAGS_WRITE;
192 if (force)
193 flags |= GUP_FLAGS_FORCE;
194
195 return __get_user_pages(tsk, mm,
196 start, len, flags,
197 pages, vmas);
198}
177EXPORT_SYMBOL(get_user_pages); 199EXPORT_SYMBOL(get_user_pages);
178 200
179DEFINE_RWLOCK(vmlist_lock); 201DEFINE_RWLOCK(vmlist_lock);