diff options
author | Nick Piggin <npiggin@suse.de> | 2008-10-18 23:26:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:52:30 -0400 |
commit | b291f000393f5a0b679012b39d79fbc85c018233 (patch) | |
tree | 28eb785d4d157d3396e4377294e6054635a4bd90 /include | |
parent | 89e004ea55abe201b29e2d6e35124101f1288ef7 (diff) |
mlock: mlocked pages are unevictable
Make sure that mlocked pages also live on the unevictable LRU, so kswapd
will not scan them over and over again.
This is achieved through various strategies:
1) add yet another page flag--PG_mlocked--to indicate that
the page is locked for efficient testing in vmscan and,
optionally, fault path. This allows early culling of
unevictable pages, preventing them from getting to
page_referenced()/try_to_unmap(). Also allows separate
accounting of mlock'd pages, as Nick's original patch
did.
Note: Nick's original mlock patch used a PG_mlocked
flag. I had removed this in favor of the PG_unevictable
flag + an mlock_count [new page struct member]. I
restored the PG_mlocked flag to eliminate the new
count field.
2) add the mlock/unevictable infrastructure to mm/mlock.c,
with internal APIs in mm/internal.h. This is a rework
of Nick's original patch to these files, taking into
account that mlocked pages are now kept on unevictable
LRU list.
3) update vmscan.c:page_evictable() to check PageMlocked()
and, if vma passed in, the vm_flags. Note that the vma
will only be passed in for new pages in the fault path;
and then only if the "cull unevictable pages in fault
path" patch is included.
4) add try_to_unlock() to rmap.c to walk a page's rmap and
ClearPageMlocked() if no other vmas have it mlocked.
Reuses as much of try_to_unmap() as possible. This
effectively replaces the use of one of the lru list links
as an mlock count. If this mechanism let's pages in mlocked
vmas leak through w/o PG_mlocked set [I don't know that it
does], we should catch them later in try_to_unmap(). One
hopes this will be rare, as it will be relatively expensive.
Original mm/internal.h, mm/rmap.c and mm/mlock.c changes:
Signed-off-by: Nick Piggin <npiggin@suse.de>
splitlru: introduce __get_user_pages():
New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS.
because current get_user_pages() can't grab PROT_NONE pages theresore it
cause PROT_NONE pages can't munlock.
[akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch]
[akpm@linux-foundation.org: untangle patch interdependencies]
[akpm@linux-foundation.org: fix things after out-of-order merging]
[hugh@veritas.com: fix page-flags mess]
[lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm']
[kosaki.motohiro@jp.fujitsu.com: build fix]
[kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments]
[kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/mm.h | 5 | ||||
-rw-r--r-- | include/linux/page-flags.h | 19 | ||||
-rw-r--r-- | include/linux/rmap.h | 14 |
3 files changed, 35 insertions, 3 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 40236290e2a..ffee2f74341 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -132,6 +132,11 @@ extern unsigned int kobjsize(const void *objp); | |||
132 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) | 132 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * special vmas that are non-mergable, non-mlock()able | ||
136 | */ | ||
137 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | ||
138 | |||
139 | /* | ||
135 | * mapping from the currently active vm_flags protection bits (the | 140 | * mapping from the currently active vm_flags protection bits (the |
136 | * low four bits) to a page protection mask.. | 141 | * low four bits) to a page protection mask.. |
137 | */ | 142 | */ |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ec1a1baad34..b12f93a3c34 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -96,6 +96,7 @@ enum pageflags { | |||
96 | PG_swapbacked, /* Page is backed by RAM/swap */ | 96 | PG_swapbacked, /* Page is backed by RAM/swap */ |
97 | #ifdef CONFIG_UNEVICTABLE_LRU | 97 | #ifdef CONFIG_UNEVICTABLE_LRU |
98 | PG_unevictable, /* Page is "unevictable" */ | 98 | PG_unevictable, /* Page is "unevictable" */ |
99 | PG_mlocked, /* Page is vma mlocked */ | ||
99 | #endif | 100 | #endif |
100 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR | 101 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR |
101 | PG_uncached, /* Page has been mapped as uncached */ | 102 | PG_uncached, /* Page has been mapped as uncached */ |
@@ -232,7 +233,17 @@ PAGEFLAG_FALSE(SwapCache) | |||
232 | #ifdef CONFIG_UNEVICTABLE_LRU | 233 | #ifdef CONFIG_UNEVICTABLE_LRU |
233 | PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) | 234 | PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) |
234 | TESTCLEARFLAG(Unevictable, unevictable) | 235 | TESTCLEARFLAG(Unevictable, unevictable) |
236 | |||
237 | #define MLOCK_PAGES 1 | ||
238 | PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) | ||
239 | TESTSCFLAG(Mlocked, mlocked) | ||
240 | |||
235 | #else | 241 | #else |
242 | |||
243 | #define MLOCK_PAGES 0 | ||
244 | PAGEFLAG_FALSE(Mlocked) | ||
245 | SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) | ||
246 | |||
236 | PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) | 247 | PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) |
237 | SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) | 248 | SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) |
238 | __CLEARPAGEFLAG_NOOP(Unevictable) | 249 | __CLEARPAGEFLAG_NOOP(Unevictable) |
@@ -354,15 +365,17 @@ static inline void __ClearPageTail(struct page *page) | |||
354 | #endif /* !PAGEFLAGS_EXTENDED */ | 365 | #endif /* !PAGEFLAGS_EXTENDED */ |
355 | 366 | ||
356 | #ifdef CONFIG_UNEVICTABLE_LRU | 367 | #ifdef CONFIG_UNEVICTABLE_LRU |
357 | #define __PG_UNEVICTABLE (1 << PG_unevictable) | 368 | #define __PG_UNEVICTABLE (1 << PG_unevictable) |
369 | #define __PG_MLOCKED (1 << PG_mlocked) | ||
358 | #else | 370 | #else |
359 | #define __PG_UNEVICTABLE 0 | 371 | #define __PG_UNEVICTABLE 0 |
372 | #define __PG_MLOCKED 0 | ||
360 | #endif | 373 | #endif |
361 | 374 | ||
362 | #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ | 375 | #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ |
363 | 1 << PG_buddy | 1 << PG_writeback | \ | 376 | 1 << PG_buddy | 1 << PG_writeback | \ |
364 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 377 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ |
365 | __PG_UNEVICTABLE) | 378 | __PG_UNEVICTABLE | __PG_MLOCKED) |
366 | 379 | ||
367 | /* | 380 | /* |
368 | * Flags checked in bad_page(). Pages on the free list should not have | 381 | * Flags checked in bad_page(). Pages on the free list should not have |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fed6f5e0b41..955667e6a52 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -117,6 +117,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | |||
117 | */ | 117 | */ |
118 | int page_mkclean(struct page *); | 118 | int page_mkclean(struct page *); |
119 | 119 | ||
120 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
121 | /* | ||
122 | * called in munlock()/munmap() path to check for other vmas holding | ||
123 | * the page mlocked. | ||
124 | */ | ||
125 | int try_to_munlock(struct page *); | ||
126 | #else | ||
127 | static inline int try_to_munlock(struct page *page) | ||
128 | { | ||
129 | return 0; /* a.k.a. SWAP_SUCCESS */ | ||
130 | } | ||
131 | #endif | ||
132 | |||
120 | #else /* !CONFIG_MMU */ | 133 | #else /* !CONFIG_MMU */ |
121 | 134 | ||
122 | #define anon_vma_init() do {} while (0) | 135 | #define anon_vma_init() do {} while (0) |
@@ -140,5 +153,6 @@ static inline int page_mkclean(struct page *page) | |||
140 | #define SWAP_SUCCESS 0 | 153 | #define SWAP_SUCCESS 0 |
141 | #define SWAP_AGAIN 1 | 154 | #define SWAP_AGAIN 1 |
142 | #define SWAP_FAIL 2 | 155 | #define SWAP_FAIL 2 |
156 | #define SWAP_MLOCK 3 | ||
143 | 157 | ||
144 | #endif /* _LINUX_RMAP_H */ | 158 | #endif /* _LINUX_RMAP_H */ |