diff options
-rw-r--r-- | include/linux/mm.h | 5 | ||||
-rw-r--r-- | include/linux/page-flags.h | 19 | ||||
-rw-r--r-- | include/linux/rmap.h | 14 | ||||
-rw-r--r-- | mm/internal.h | 71 | ||||
-rw-r--r-- | mm/memory.c | 56 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mlock.c | 394 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 44 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 | ||||
-rw-r--r-- | mm/rmap.c | 257 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 36 |
13 files changed, 817 insertions, 91 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 40236290e2ae..ffee2f743418 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -132,6 +132,11 @@ extern unsigned int kobjsize(const void *objp); | |||
132 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) | 132 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * special vmas that are non-mergable, non-mlock()able | ||
136 | */ | ||
137 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | ||
138 | |||
139 | /* | ||
135 | * mapping from the currently active vm_flags protection bits (the | 140 | * mapping from the currently active vm_flags protection bits (the |
136 | * low four bits) to a page protection mask.. | 141 | * low four bits) to a page protection mask.. |
137 | */ | 142 | */ |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ec1a1baad348..b12f93a3c345 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -96,6 +96,7 @@ enum pageflags { | |||
96 | PG_swapbacked, /* Page is backed by RAM/swap */ | 96 | PG_swapbacked, /* Page is backed by RAM/swap */ |
97 | #ifdef CONFIG_UNEVICTABLE_LRU | 97 | #ifdef CONFIG_UNEVICTABLE_LRU |
98 | PG_unevictable, /* Page is "unevictable" */ | 98 | PG_unevictable, /* Page is "unevictable" */ |
99 | PG_mlocked, /* Page is vma mlocked */ | ||
99 | #endif | 100 | #endif |
100 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR | 101 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR |
101 | PG_uncached, /* Page has been mapped as uncached */ | 102 | PG_uncached, /* Page has been mapped as uncached */ |
@@ -232,7 +233,17 @@ PAGEFLAG_FALSE(SwapCache) | |||
232 | #ifdef CONFIG_UNEVICTABLE_LRU | 233 | #ifdef CONFIG_UNEVICTABLE_LRU |
233 | PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) | 234 | PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) |
234 | TESTCLEARFLAG(Unevictable, unevictable) | 235 | TESTCLEARFLAG(Unevictable, unevictable) |
236 | |||
237 | #define MLOCK_PAGES 1 | ||
238 | PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) | ||
239 | TESTSCFLAG(Mlocked, mlocked) | ||
240 | |||
235 | #else | 241 | #else |
242 | |||
243 | #define MLOCK_PAGES 0 | ||
244 | PAGEFLAG_FALSE(Mlocked) | ||
245 | SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) | ||
246 | |||
236 | PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) | 247 | PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) |
237 | SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) | 248 | SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) |
238 | __CLEARPAGEFLAG_NOOP(Unevictable) | 249 | __CLEARPAGEFLAG_NOOP(Unevictable) |
@@ -354,15 +365,17 @@ static inline void __ClearPageTail(struct page *page) | |||
354 | #endif /* !PAGEFLAGS_EXTENDED */ | 365 | #endif /* !PAGEFLAGS_EXTENDED */ |
355 | 366 | ||
356 | #ifdef CONFIG_UNEVICTABLE_LRU | 367 | #ifdef CONFIG_UNEVICTABLE_LRU |
357 | #define __PG_UNEVICTABLE (1 << PG_unevictable) | 368 | #define __PG_UNEVICTABLE (1 << PG_unevictable) |
369 | #define __PG_MLOCKED (1 << PG_mlocked) | ||
358 | #else | 370 | #else |
359 | #define __PG_UNEVICTABLE 0 | 371 | #define __PG_UNEVICTABLE 0 |
372 | #define __PG_MLOCKED 0 | ||
360 | #endif | 373 | #endif |
361 | 374 | ||
362 | #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ | 375 | #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ |
363 | 1 << PG_buddy | 1 << PG_writeback | \ | 376 | 1 << PG_buddy | 1 << PG_writeback | \ |
364 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 377 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ |
365 | __PG_UNEVICTABLE) | 378 | __PG_UNEVICTABLE | __PG_MLOCKED) |
366 | 379 | ||
367 | /* | 380 | /* |
368 | * Flags checked in bad_page(). Pages on the free list should not have | 381 | * Flags checked in bad_page(). Pages on the free list should not have |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fed6f5e0b411..955667e6a52d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -117,6 +117,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | |||
117 | */ | 117 | */ |
118 | int page_mkclean(struct page *); | 118 | int page_mkclean(struct page *); |
119 | 119 | ||
120 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
121 | /* | ||
122 | * called in munlock()/munmap() path to check for other vmas holding | ||
123 | * the page mlocked. | ||
124 | */ | ||
125 | int try_to_munlock(struct page *); | ||
126 | #else | ||
127 | static inline int try_to_munlock(struct page *page) | ||
128 | { | ||
129 | return 0; /* a.k.a. SWAP_SUCCESS */ | ||
130 | } | ||
131 | #endif | ||
132 | |||
120 | #else /* !CONFIG_MMU */ | 133 | #else /* !CONFIG_MMU */ |
121 | 134 | ||
122 | #define anon_vma_init() do {} while (0) | 135 | #define anon_vma_init() do {} while (0) |
@@ -140,5 +153,6 @@ static inline int page_mkclean(struct page *page) | |||
140 | #define SWAP_SUCCESS 0 | 153 | #define SWAP_SUCCESS 0 |
141 | #define SWAP_AGAIN 1 | 154 | #define SWAP_AGAIN 1 |
142 | #define SWAP_FAIL 2 | 155 | #define SWAP_FAIL 2 |
156 | #define SWAP_MLOCK 3 | ||
143 | 157 | ||
144 | #endif /* _LINUX_RMAP_H */ | 158 | #endif /* _LINUX_RMAP_H */ |
diff --git a/mm/internal.h b/mm/internal.h index 3db17b2a1ac6..4ebf0bef9a39 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -61,6 +61,10 @@ static inline unsigned long page_order(struct page *page) | |||
61 | return page_private(page); | 61 | return page_private(page); |
62 | } | 62 | } |
63 | 63 | ||
64 | extern int mlock_vma_pages_range(struct vm_area_struct *vma, | ||
65 | unsigned long start, unsigned long end); | ||
66 | extern void munlock_vma_pages_all(struct vm_area_struct *vma); | ||
67 | |||
64 | #ifdef CONFIG_UNEVICTABLE_LRU | 68 | #ifdef CONFIG_UNEVICTABLE_LRU |
65 | /* | 69 | /* |
66 | * unevictable_migrate_page() called only from migrate_page_copy() to | 70 | * unevictable_migrate_page() called only from migrate_page_copy() to |
@@ -79,6 +83,65 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old) | |||
79 | } | 83 | } |
80 | #endif | 84 | #endif |
81 | 85 | ||
86 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
87 | /* | ||
88 | * Called only in fault path via page_evictable() for a new page | ||
89 | * to determine if it's being mapped into a LOCKED vma. | ||
90 | * If so, mark page as mlocked. | ||
91 | */ | ||
92 | static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) | ||
93 | { | ||
94 | VM_BUG_ON(PageLRU(page)); | ||
95 | |||
96 | if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) | ||
97 | return 0; | ||
98 | |||
99 | SetPageMlocked(page); | ||
100 | return 1; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * must be called with vma's mmap_sem held for read, and page locked. | ||
105 | */ | ||
106 | extern void mlock_vma_page(struct page *page); | ||
107 | |||
108 | /* | ||
109 | * Clear the page's PageMlocked(). This can be useful in a situation where | ||
110 | * we want to unconditionally remove a page from the pagecache -- e.g., | ||
111 | * on truncation or freeing. | ||
112 | * | ||
113 | * It is legal to call this function for any page, mlocked or not. | ||
114 | * If called for a page that is still mapped by mlocked vmas, all we do | ||
115 | * is revert to lazy LRU behaviour -- semantics are not broken. | ||
116 | */ | ||
117 | extern void __clear_page_mlock(struct page *page); | ||
118 | static inline void clear_page_mlock(struct page *page) | ||
119 | { | ||
120 | if (unlikely(TestClearPageMlocked(page))) | ||
121 | __clear_page_mlock(page); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * mlock_migrate_page - called only from migrate_page_copy() to | ||
126 | * migrate the Mlocked page flag | ||
127 | */ | ||
128 | static inline void mlock_migrate_page(struct page *newpage, struct page *page) | ||
129 | { | ||
130 | if (TestClearPageMlocked(page)) | ||
131 | SetPageMlocked(newpage); | ||
132 | } | ||
133 | |||
134 | |||
135 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
136 | static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) | ||
137 | { | ||
138 | return 0; | ||
139 | } | ||
140 | static inline void clear_page_mlock(struct page *page) { } | ||
141 | static inline void mlock_vma_page(struct page *page) { } | ||
142 | static inline void mlock_migrate_page(struct page *new, struct page *old) { } | ||
143 | |||
144 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
82 | 145 | ||
83 | /* | 146 | /* |
84 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 147 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
@@ -148,4 +211,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, | |||
148 | } | 211 | } |
149 | #endif /* CONFIG_SPARSEMEM */ | 212 | #endif /* CONFIG_SPARSEMEM */ |
150 | 213 | ||
214 | #define GUP_FLAGS_WRITE 0x1 | ||
215 | #define GUP_FLAGS_FORCE 0x2 | ||
216 | #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4 | ||
217 | |||
218 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
219 | unsigned long start, int len, int flags, | ||
220 | struct page **pages, struct vm_area_struct **vmas); | ||
221 | |||
151 | #endif | 222 | #endif |
diff --git a/mm/memory.c b/mm/memory.c index 71cdefd1ef14..9fef7272fb9e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -64,6 +64,8 @@ | |||
64 | 64 | ||
65 | #include "internal.h" | 65 | #include "internal.h" |
66 | 66 | ||
67 | #include "internal.h" | ||
68 | |||
67 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 69 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
68 | /* use the per-pgdat data instead for discontigmem - mbligh */ | 70 | /* use the per-pgdat data instead for discontigmem - mbligh */ |
69 | unsigned long max_mapnr; | 71 | unsigned long max_mapnr; |
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma) | |||
1129 | return !vma->vm_ops || !vma->vm_ops->fault; | 1131 | return !vma->vm_ops || !vma->vm_ops->fault; |
1130 | } | 1132 | } |
1131 | 1133 | ||
1132 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1134 | |
1133 | unsigned long start, int len, int write, int force, | 1135 | |
1136 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
1137 | unsigned long start, int len, int flags, | ||
1134 | struct page **pages, struct vm_area_struct **vmas) | 1138 | struct page **pages, struct vm_area_struct **vmas) |
1135 | { | 1139 | { |
1136 | int i; | 1140 | int i; |
1137 | unsigned int vm_flags; | 1141 | unsigned int vm_flags = 0; |
1142 | int write = !!(flags & GUP_FLAGS_WRITE); | ||
1143 | int force = !!(flags & GUP_FLAGS_FORCE); | ||
1144 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); | ||
1138 | 1145 | ||
1139 | if (len <= 0) | 1146 | if (len <= 0) |
1140 | return 0; | 1147 | return 0; |
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1158 | pud_t *pud; | 1165 | pud_t *pud; |
1159 | pmd_t *pmd; | 1166 | pmd_t *pmd; |
1160 | pte_t *pte; | 1167 | pte_t *pte; |
1161 | if (write) /* user gate pages are read-only */ | 1168 | |
1169 | /* user gate pages are read-only */ | ||
1170 | if (!ignore && write) | ||
1162 | return i ? : -EFAULT; | 1171 | return i ? : -EFAULT; |
1163 | if (pg > TASK_SIZE) | 1172 | if (pg > TASK_SIZE) |
1164 | pgd = pgd_offset_k(pg); | 1173 | pgd = pgd_offset_k(pg); |
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1190 | continue; | 1199 | continue; |
1191 | } | 1200 | } |
1192 | 1201 | ||
1193 | if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) | 1202 | if (!vma || |
1194 | || !(vm_flags & vma->vm_flags)) | 1203 | (vma->vm_flags & (VM_IO | VM_PFNMAP)) || |
1204 | (!ignore && !(vm_flags & vma->vm_flags))) | ||
1195 | return i ? : -EFAULT; | 1205 | return i ? : -EFAULT; |
1196 | 1206 | ||
1197 | if (is_vm_hugetlb_page(vma)) { | 1207 | if (is_vm_hugetlb_page(vma)) { |
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1266 | } while (len); | 1276 | } while (len); |
1267 | return i; | 1277 | return i; |
1268 | } | 1278 | } |
1279 | |||
1280 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
1281 | unsigned long start, int len, int write, int force, | ||
1282 | struct page **pages, struct vm_area_struct **vmas) | ||
1283 | { | ||
1284 | int flags = 0; | ||
1285 | |||
1286 | if (write) | ||
1287 | flags |= GUP_FLAGS_WRITE; | ||
1288 | if (force) | ||
1289 | flags |= GUP_FLAGS_FORCE; | ||
1290 | |||
1291 | return __get_user_pages(tsk, mm, | ||
1292 | start, len, flags, | ||
1293 | pages, vmas); | ||
1294 | } | ||
1295 | |||
1269 | EXPORT_SYMBOL(get_user_pages); | 1296 | EXPORT_SYMBOL(get_user_pages); |
1270 | 1297 | ||
1271 | pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, | 1298 | pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, |
@@ -1858,6 +1885,15 @@ gotten: | |||
1858 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1885 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1859 | if (!new_page) | 1886 | if (!new_page) |
1860 | goto oom; | 1887 | goto oom; |
1888 | /* | ||
1889 | * Don't let another task, with possibly unlocked vma, | ||
1890 | * keep the mlocked page. | ||
1891 | */ | ||
1892 | if (vma->vm_flags & VM_LOCKED) { | ||
1893 | lock_page(old_page); /* for LRU manipulation */ | ||
1894 | clear_page_mlock(old_page); | ||
1895 | unlock_page(old_page); | ||
1896 | } | ||
1861 | cow_user_page(new_page, old_page, address, vma); | 1897 | cow_user_page(new_page, old_page, address, vma); |
1862 | __SetPageUptodate(new_page); | 1898 | __SetPageUptodate(new_page); |
1863 | 1899 | ||
@@ -2325,7 +2361,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2325 | page_add_anon_rmap(page, vma, address); | 2361 | page_add_anon_rmap(page, vma, address); |
2326 | 2362 | ||
2327 | swap_free(entry); | 2363 | swap_free(entry); |
2328 | if (vm_swap_full()) | 2364 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
2329 | remove_exclusive_swap_page(page); | 2365 | remove_exclusive_swap_page(page); |
2330 | unlock_page(page); | 2366 | unlock_page(page); |
2331 | 2367 | ||
@@ -2465,6 +2501,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2465 | ret = VM_FAULT_OOM; | 2501 | ret = VM_FAULT_OOM; |
2466 | goto out; | 2502 | goto out; |
2467 | } | 2503 | } |
2504 | /* | ||
2505 | * Don't let another task, with possibly unlocked vma, | ||
2506 | * keep the mlocked page. | ||
2507 | */ | ||
2508 | if (vma->vm_flags & VM_LOCKED) | ||
2509 | clear_page_mlock(vmf.page); | ||
2468 | copy_user_highpage(page, vmf.page, address, vma); | 2510 | copy_user_highpage(page, vmf.page, address, vma); |
2469 | __SetPageUptodate(page); | 2511 | __SetPageUptodate(page); |
2470 | } else { | 2512 | } else { |
diff --git a/mm/migrate.c b/mm/migrate.c index b10237d8b459..6802a7a3dfec 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -371,6 +371,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
371 | __set_page_dirty_nobuffers(newpage); | 371 | __set_page_dirty_nobuffers(newpage); |
372 | } | 372 | } |
373 | 373 | ||
374 | mlock_migrate_page(newpage, page); | ||
375 | |||
374 | #ifdef CONFIG_SWAP | 376 | #ifdef CONFIG_SWAP |
375 | ClearPageSwapCache(page); | 377 | ClearPageSwapCache(page); |
376 | #endif | 378 | #endif |
diff --git a/mm/mlock.c b/mm/mlock.c index 01fbe93eff5c..8746fe3f9730 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -8,10 +8,18 @@ | |||
8 | #include <linux/capability.h> | 8 | #include <linux/capability.h> |
9 | #include <linux/mman.h> | 9 | #include <linux/mman.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/swap.h> | ||
12 | #include <linux/swapops.h> | ||
13 | #include <linux/pagemap.h> | ||
11 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
12 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
13 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
14 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/rmap.h> | ||
19 | #include <linux/mmzone.h> | ||
20 | #include <linux/hugetlb.h> | ||
21 | |||
22 | #include "internal.h" | ||
15 | 23 | ||
16 | int can_do_mlock(void) | 24 | int can_do_mlock(void) |
17 | { | 25 | { |
@@ -23,17 +31,360 @@ int can_do_mlock(void) | |||
23 | } | 31 | } |
24 | EXPORT_SYMBOL(can_do_mlock); | 32 | EXPORT_SYMBOL(can_do_mlock); |
25 | 33 | ||
34 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
35 | /* | ||
36 | * Mlocked pages are marked with PageMlocked() flag for efficient testing | ||
37 | * in vmscan and, possibly, the fault path; and to support semi-accurate | ||
38 | * statistics. | ||
39 | * | ||
40 | * An mlocked page [PageMlocked(page)] is unevictable. As such, it will | ||
41 | * be placed on the LRU "unevictable" list, rather than the [in]active lists. | ||
42 | * The unevictable list is an LRU sibling list to the [in]active lists. | ||
43 | * PageUnevictable is set to indicate the unevictable state. | ||
44 | * | ||
45 | * When lazy mlocking via vmscan, it is important to ensure that the | ||
46 | * vma's VM_LOCKED status is not concurrently being modified, otherwise we | ||
47 | * may have mlocked a page that is being munlocked. So lazy mlock must take | ||
48 | * the mmap_sem for read, and verify that the vma really is locked | ||
49 | * (see mm/rmap.c). | ||
50 | */ | ||
51 | |||
52 | /* | ||
53 | * LRU accounting for clear_page_mlock() | ||
54 | */ | ||
55 | void __clear_page_mlock(struct page *page) | ||
56 | { | ||
57 | VM_BUG_ON(!PageLocked(page)); | ||
58 | |||
59 | if (!page->mapping) { /* truncated ? */ | ||
60 | return; | ||
61 | } | ||
62 | |||
63 | if (!isolate_lru_page(page)) { | ||
64 | putback_lru_page(page); | ||
65 | } else { | ||
66 | /* | ||
67 | * Page not on the LRU yet. Flush all pagevecs and retry. | ||
68 | */ | ||
69 | lru_add_drain_all(); | ||
70 | if (!isolate_lru_page(page)) | ||
71 | putback_lru_page(page); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Mark page as mlocked if not already. | ||
77 | * If page on LRU, isolate and putback to move to unevictable list. | ||
78 | */ | ||
79 | void mlock_vma_page(struct page *page) | ||
80 | { | ||
81 | BUG_ON(!PageLocked(page)); | ||
82 | |||
83 | if (!TestSetPageMlocked(page) && !isolate_lru_page(page)) | ||
84 | putback_lru_page(page); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * called from munlock()/munmap() path with page supposedly on the LRU. | ||
89 | * | ||
90 | * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked | ||
91 | * [in try_to_munlock()] and then attempt to isolate the page. We must | ||
92 | * isolate the page to keep others from messing with its unevictable | ||
93 | * and mlocked state while trying to munlock. However, we pre-clear the | ||
94 | * mlocked state anyway as we might lose the isolation race and we might | ||
95 | * not get another chance to clear PageMlocked. If we successfully | ||
96 | * isolate the page and try_to_munlock() detects other VM_LOCKED vmas | ||
97 | * mapping the page, it will restore the PageMlocked state, unless the page | ||
98 | * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(), | ||
99 | * perhaps redundantly. | ||
100 | * If we lose the isolation race, and the page is mapped by other VM_LOCKED | ||
101 | * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap() | ||
102 | * either of which will restore the PageMlocked state by calling | ||
103 | * mlock_vma_page() above, if it can grab the vma's mmap sem. | ||
104 | */ | ||
105 | static void munlock_vma_page(struct page *page) | ||
106 | { | ||
107 | BUG_ON(!PageLocked(page)); | ||
108 | |||
109 | if (TestClearPageMlocked(page) && !isolate_lru_page(page)) { | ||
110 | try_to_munlock(page); | ||
111 | putback_lru_page(page); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * mlock a range of pages in the vma. | ||
117 | * | ||
118 | * This takes care of making the pages present too. | ||
119 | * | ||
120 | * vma->vm_mm->mmap_sem must be held for write. | ||
121 | */ | ||
122 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | ||
123 | unsigned long start, unsigned long end) | ||
124 | { | ||
125 | struct mm_struct *mm = vma->vm_mm; | ||
126 | unsigned long addr = start; | ||
127 | struct page *pages[16]; /* 16 gives a reasonable batch */ | ||
128 | int write = !!(vma->vm_flags & VM_WRITE); | ||
129 | int nr_pages = (end - start) / PAGE_SIZE; | ||
130 | int ret; | ||
131 | |||
132 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | ||
133 | VM_BUG_ON(start < vma->vm_start || end > vma->vm_end); | ||
134 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | ||
135 | |||
136 | lru_add_drain_all(); /* push cached pages to LRU */ | ||
137 | |||
138 | while (nr_pages > 0) { | ||
139 | int i; | ||
140 | |||
141 | cond_resched(); | ||
142 | |||
143 | /* | ||
144 | * get_user_pages makes pages present if we are | ||
145 | * setting mlock. and this extra reference count will | ||
146 | * disable migration of this page. However, page may | ||
147 | * still be truncated out from under us. | ||
148 | */ | ||
149 | ret = get_user_pages(current, mm, addr, | ||
150 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | ||
151 | write, 0, pages, NULL); | ||
152 | /* | ||
153 | * This can happen for, e.g., VM_NONLINEAR regions before | ||
154 | * a page has been allocated and mapped at a given offset, | ||
155 | * or for addresses that map beyond end of a file. | ||
156 | * We'll mlock the the pages if/when they get faulted in. | ||
157 | */ | ||
158 | if (ret < 0) | ||
159 | break; | ||
160 | if (ret == 0) { | ||
161 | /* | ||
162 | * We know the vma is there, so the only time | ||
163 | * we cannot get a single page should be an | ||
164 | * error (ret < 0) case. | ||
165 | */ | ||
166 | WARN_ON(1); | ||
167 | break; | ||
168 | } | ||
169 | |||
170 | lru_add_drain(); /* push cached pages to LRU */ | ||
171 | |||
172 | for (i = 0; i < ret; i++) { | ||
173 | struct page *page = pages[i]; | ||
174 | |||
175 | lock_page(page); | ||
176 | /* | ||
177 | * Because we lock page here and migration is blocked | ||
178 | * by the elevated reference, we need only check for | ||
179 | * page truncation (file-cache only). | ||
180 | */ | ||
181 | if (page->mapping) | ||
182 | mlock_vma_page(page); | ||
183 | unlock_page(page); | ||
184 | put_page(page); /* ref from get_user_pages() */ | ||
185 | |||
186 | /* | ||
187 | * here we assume that get_user_pages() has given us | ||
188 | * a list of virtually contiguous pages. | ||
189 | */ | ||
190 | addr += PAGE_SIZE; /* for next get_user_pages() */ | ||
191 | nr_pages--; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | lru_add_drain_all(); /* to update stats */ | ||
196 | |||
197 | return 0; /* count entire vma as locked_vm */ | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * private structure for munlock page table walk | ||
202 | */ | ||
203 | struct munlock_page_walk { | ||
204 | struct vm_area_struct *vma; | ||
205 | pmd_t *pmd; /* for migration_entry_wait() */ | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * munlock normal pages for present ptes | ||
210 | */ | ||
211 | static int __munlock_pte_handler(pte_t *ptep, unsigned long addr, | ||
212 | unsigned long end, struct mm_walk *walk) | ||
213 | { | ||
214 | struct munlock_page_walk *mpw = walk->private; | ||
215 | swp_entry_t entry; | ||
216 | struct page *page; | ||
217 | pte_t pte; | ||
218 | |||
219 | retry: | ||
220 | pte = *ptep; | ||
221 | /* | ||
222 | * If it's a swap pte, we might be racing with page migration. | ||
223 | */ | ||
224 | if (unlikely(!pte_present(pte))) { | ||
225 | if (!is_swap_pte(pte)) | ||
226 | goto out; | ||
227 | entry = pte_to_swp_entry(pte); | ||
228 | if (is_migration_entry(entry)) { | ||
229 | migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr); | ||
230 | goto retry; | ||
231 | } | ||
232 | goto out; | ||
233 | } | ||
234 | |||
235 | page = vm_normal_page(mpw->vma, addr, pte); | ||
236 | if (!page) | ||
237 | goto out; | ||
238 | |||
239 | lock_page(page); | ||
240 | if (!page->mapping) { | ||
241 | unlock_page(page); | ||
242 | goto retry; | ||
243 | } | ||
244 | munlock_vma_page(page); | ||
245 | unlock_page(page); | ||
246 | |||
247 | out: | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Save pmd for pte handler for waiting on migration entries | ||
253 | */ | ||
254 | static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr, | ||
255 | unsigned long end, struct mm_walk *walk) | ||
256 | { | ||
257 | struct munlock_page_walk *mpw = walk->private; | ||
258 | |||
259 | mpw->pmd = pmd; | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | |||
264 | /* | ||
265 | * munlock a range of pages in the vma using standard page table walk. | ||
266 | * | ||
267 | * vma->vm_mm->mmap_sem must be held for write. | ||
268 | */ | ||
269 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | ||
270 | unsigned long start, unsigned long end) | ||
271 | { | ||
272 | struct mm_struct *mm = vma->vm_mm; | ||
273 | struct munlock_page_walk mpw = { | ||
274 | .vma = vma, | ||
275 | }; | ||
276 | struct mm_walk munlock_page_walk = { | ||
277 | .pmd_entry = __munlock_pmd_handler, | ||
278 | .pte_entry = __munlock_pte_handler, | ||
279 | .private = &mpw, | ||
280 | .mm = mm, | ||
281 | }; | ||
282 | |||
283 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | ||
284 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | ||
285 | VM_BUG_ON(start < vma->vm_start); | ||
286 | VM_BUG_ON(end > vma->vm_end); | ||
287 | |||
288 | lru_add_drain_all(); /* push cached pages to LRU */ | ||
289 | walk_page_range(start, end, &munlock_page_walk); | ||
290 | lru_add_drain_all(); /* to update stats */ | ||
291 | } | ||
292 | |||
293 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
294 | |||
295 | /* | ||
296 | * Just make pages present if VM_LOCKED. No-op if unlocking. | ||
297 | */ | ||
298 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | ||
299 | unsigned long start, unsigned long end) | ||
300 | { | ||
301 | if (vma->vm_flags & VM_LOCKED) | ||
302 | make_pages_present(start, end); | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * munlock a range of pages in the vma -- no-op. | ||
308 | */ | ||
309 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | ||
310 | unsigned long start, unsigned long end) | ||
311 | { | ||
312 | } | ||
313 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
314 | |||
315 | /* | ||
316 | * mlock all pages in this vma range. For mmap()/mremap()/... | ||
317 | */ | ||
318 | int mlock_vma_pages_range(struct vm_area_struct *vma, | ||
319 | unsigned long start, unsigned long end) | ||
320 | { | ||
321 | int nr_pages = (end - start) / PAGE_SIZE; | ||
322 | BUG_ON(!(vma->vm_flags & VM_LOCKED)); | ||
323 | |||
324 | /* | ||
325 | * filter unlockable vmas | ||
326 | */ | ||
327 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
328 | goto no_mlock; | ||
329 | |||
330 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | ||
331 | is_vm_hugetlb_page(vma) || | ||
332 | vma == get_gate_vma(current))) | ||
333 | return __mlock_vma_pages_range(vma, start, end); | ||
334 | |||
335 | /* | ||
336 | * User mapped kernel pages or huge pages: | ||
337 | * make these pages present to populate the ptes, but | ||
338 | * fall thru' to reset VM_LOCKED--no need to unlock, and | ||
339 | * return nr_pages so these don't get counted against task's | ||
340 | * locked limit. huge pages are already counted against | ||
341 | * locked vm limit. | ||
342 | */ | ||
343 | make_pages_present(start, end); | ||
344 | |||
345 | no_mlock: | ||
346 | vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ | ||
347 | return nr_pages; /* pages NOT mlocked */ | ||
348 | } | ||
349 | |||
350 | |||
351 | /* | ||
352 | * munlock all pages in vma. For munmap() and exit(). | ||
353 | */ | ||
354 | void munlock_vma_pages_all(struct vm_area_struct *vma) | ||
355 | { | ||
356 | vma->vm_flags &= ~VM_LOCKED; | ||
357 | __munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * mlock_fixup - handle mlock[all]/munlock[all] requests. | ||
362 | * | ||
363 | * Filters out "special" vmas -- VM_LOCKED never gets set for these, and | ||
364 | * munlock is a no-op. However, for some special vmas, we go ahead and | ||
365 | * populate the ptes via make_pages_present(). | ||
366 | * | ||
367 | * For vmas that pass the filters, merge/split as appropriate. | ||
368 | */ | ||
26 | static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, | 369 | static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, |
27 | unsigned long start, unsigned long end, unsigned int newflags) | 370 | unsigned long start, unsigned long end, unsigned int newflags) |
28 | { | 371 | { |
29 | struct mm_struct * mm = vma->vm_mm; | 372 | struct mm_struct *mm = vma->vm_mm; |
30 | pgoff_t pgoff; | 373 | pgoff_t pgoff; |
31 | int pages; | 374 | int nr_pages; |
32 | int ret = 0; | 375 | int ret = 0; |
33 | 376 | int lock = newflags & VM_LOCKED; | |
34 | if (newflags == vma->vm_flags) { | 377 | |
35 | *prev = vma; | 378 | if (newflags == vma->vm_flags || |
36 | goto out; | 379 | (vma->vm_flags & (VM_IO | VM_PFNMAP))) |
380 | goto out; /* don't set VM_LOCKED, don't count */ | ||
381 | |||
382 | if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | ||
383 | is_vm_hugetlb_page(vma) || | ||
384 | vma == get_gate_vma(current)) { | ||
385 | if (lock) | ||
386 | make_pages_present(start, end); | ||
387 | goto out; /* don't set VM_LOCKED, don't count */ | ||
37 | } | 388 | } |
38 | 389 | ||
39 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | 390 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); |
@@ -44,8 +395,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
44 | goto success; | 395 | goto success; |
45 | } | 396 | } |
46 | 397 | ||
47 | *prev = vma; | ||
48 | |||
49 | if (start != vma->vm_start) { | 398 | if (start != vma->vm_start) { |
50 | ret = split_vma(mm, vma, start, 1); | 399 | ret = split_vma(mm, vma, start, 1); |
51 | if (ret) | 400 | if (ret) |
@@ -60,24 +409,31 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
60 | 409 | ||
61 | success: | 410 | success: |
62 | /* | 411 | /* |
412 | * Keep track of amount of locked VM. | ||
413 | */ | ||
414 | nr_pages = (end - start) >> PAGE_SHIFT; | ||
415 | if (!lock) | ||
416 | nr_pages = -nr_pages; | ||
417 | mm->locked_vm += nr_pages; | ||
418 | |||
419 | /* | ||
63 | * vm_flags is protected by the mmap_sem held in write mode. | 420 | * vm_flags is protected by the mmap_sem held in write mode. |
64 | * It's okay if try_to_unmap_one unmaps a page just after we | 421 | * It's okay if try_to_unmap_one unmaps a page just after we |
65 | * set VM_LOCKED, make_pages_present below will bring it back. | 422 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. |
66 | */ | 423 | */ |
67 | vma->vm_flags = newflags; | 424 | vma->vm_flags = newflags; |
68 | 425 | ||
69 | /* | 426 | if (lock) { |
70 | * Keep track of amount of locked VM. | 427 | ret = __mlock_vma_pages_range(vma, start, end); |
71 | */ | 428 | if (ret > 0) { |
72 | pages = (end - start) >> PAGE_SHIFT; | 429 | mm->locked_vm -= ret; |
73 | if (newflags & VM_LOCKED) { | 430 | ret = 0; |
74 | pages = -pages; | 431 | } |
75 | if (!(newflags & VM_IO)) | 432 | } else |
76 | ret = make_pages_present(start, end); | 433 | __munlock_vma_pages_range(vma, start, end); |
77 | } | ||
78 | 434 | ||
79 | mm->locked_vm -= pages; | ||
80 | out: | 435 | out: |
436 | *prev = vma; | ||
81 | return ret; | 437 | return ret; |
82 | } | 438 | } |
83 | 439 | ||
@@ -662,8 +662,6 @@ again: remove_next = 1 + (end > next->vm_end); | |||
662 | * If the vma has a ->close operation then the driver probably needs to release | 662 | * If the vma has a ->close operation then the driver probably needs to release |
663 | * per-vma resources, so we don't attempt to merge those. | 663 | * per-vma resources, so we don't attempt to merge those. |
664 | */ | 664 | */ |
665 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) | ||
666 | |||
667 | static inline int is_mergeable_vma(struct vm_area_struct *vma, | 665 | static inline int is_mergeable_vma(struct vm_area_struct *vma, |
668 | struct file *file, unsigned long vm_flags) | 666 | struct file *file, unsigned long vm_flags) |
669 | { | 667 | { |
diff --git a/mm/nommu.c b/mm/nommu.c index ed75bc962fbe..2696b24f2bb3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include <asm/tlb.h> | 34 | #include <asm/tlb.h> |
35 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
36 | 36 | ||
37 | #include "internal.h" | ||
38 | |||
37 | void *high_memory; | 39 | void *high_memory; |
38 | struct page *mem_map; | 40 | struct page *mem_map; |
39 | unsigned long max_mapnr; | 41 | unsigned long max_mapnr; |
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp) | |||
128 | return PAGE_SIZE << compound_order(page); | 130 | return PAGE_SIZE << compound_order(page); |
129 | } | 131 | } |
130 | 132 | ||
131 | /* | 133 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
132 | * get a list of pages in an address range belonging to the specified process | 134 | unsigned long start, int len, int flags, |
133 | * and indicate the VMA that covers each page | 135 | struct page **pages, struct vm_area_struct **vmas) |
134 | * - this is potentially dodgy as we may end incrementing the page count of a | ||
135 | * slab page or a secondary page from a compound page | ||
136 | * - don't permit access to VMAs that don't support it, such as I/O mappings | ||
137 | */ | ||
138 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
139 | unsigned long start, int len, int write, int force, | ||
140 | struct page **pages, struct vm_area_struct **vmas) | ||
141 | { | 136 | { |
142 | struct vm_area_struct *vma; | 137 | struct vm_area_struct *vma; |
143 | unsigned long vm_flags; | 138 | unsigned long vm_flags; |
144 | int i; | 139 | int i; |
140 | int write = !!(flags & GUP_FLAGS_WRITE); | ||
141 | int force = !!(flags & GUP_FLAGS_FORCE); | ||
142 | int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); | ||
145 | 143 | ||
146 | /* calculate required read or write permissions. | 144 | /* calculate required read or write permissions. |
147 | * - if 'force' is set, we only require the "MAY" flags. | 145 | * - if 'force' is set, we only require the "MAY" flags. |
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
156 | 154 | ||
157 | /* protect what we can, including chardevs */ | 155 | /* protect what we can, including chardevs */ |
158 | if (vma->vm_flags & (VM_IO | VM_PFNMAP) || | 156 | if (vma->vm_flags & (VM_IO | VM_PFNMAP) || |
159 | !(vm_flags & vma->vm_flags)) | 157 | (!ignore && !(vm_flags & vma->vm_flags))) |
160 | goto finish_or_fault; | 158 | goto finish_or_fault; |
161 | 159 | ||
162 | if (pages) { | 160 | if (pages) { |
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
174 | finish_or_fault: | 172 | finish_or_fault: |
175 | return i ? : -EFAULT; | 173 | return i ? : -EFAULT; |
176 | } | 174 | } |
175 | |||
176 | |||
177 | /* | ||
178 | * get a list of pages in an address range belonging to the specified process | ||
179 | * and indicate the VMA that covers each page | ||
180 | * - this is potentially dodgy as we may end incrementing the page count of a | ||
181 | * slab page or a secondary page from a compound page | ||
182 | * - don't permit access to VMAs that don't support it, such as I/O mappings | ||
183 | */ | ||
184 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | ||
185 | unsigned long start, int len, int write, int force, | ||
186 | struct page **pages, struct vm_area_struct **vmas) | ||
187 | { | ||
188 | int flags = 0; | ||
189 | |||
190 | if (write) | ||
191 | flags |= GUP_FLAGS_WRITE; | ||
192 | if (force) | ||
193 | flags |= GUP_FLAGS_FORCE; | ||
194 | |||
195 | return __get_user_pages(tsk, mm, | ||
196 | start, len, flags, | ||
197 | pages, vmas); | ||
198 | } | ||
177 | EXPORT_SYMBOL(get_user_pages); | 199 | EXPORT_SYMBOL(get_user_pages); |
178 | 200 | ||
179 | DEFINE_RWLOCK(vmlist_lock); | 201 | DEFINE_RWLOCK(vmlist_lock); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4125230a1b2c..5886586fde6c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -616,7 +616,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
616 | 616 | ||
617 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | | 617 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | |
618 | 1 << PG_referenced | 1 << PG_arch_1 | | 618 | 1 << PG_referenced | 1 << PG_arch_1 | |
619 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); | 619 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk |
620 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
621 | | 1 << PG_mlocked | ||
622 | #endif | ||
623 | ); | ||
620 | set_page_private(page, 0); | 624 | set_page_private(page, 0); |
621 | set_page_refcounted(page); | 625 | set_page_refcounted(page); |
622 | 626 | ||
@@ -53,6 +53,8 @@ | |||
53 | 53 | ||
54 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
55 | 55 | ||
56 | #include "internal.h" | ||
57 | |||
56 | struct kmem_cache *anon_vma_cachep; | 58 | struct kmem_cache *anon_vma_cachep; |
57 | 59 | ||
58 | /** | 60 | /** |
@@ -290,6 +292,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
290 | return NULL; | 292 | return NULL; |
291 | } | 293 | } |
292 | 294 | ||
295 | /** | ||
296 | * page_mapped_in_vma - check whether a page is really mapped in a VMA | ||
297 | * @page: the page to test | ||
298 | * @vma: the VMA to test | ||
299 | * | ||
300 | * Returns 1 if the page is mapped into the page tables of the VMA, 0 | ||
301 | * if the page is not mapped into the page tables of this VMA. Only | ||
302 | * valid for normal file or anonymous VMAs. | ||
303 | */ | ||
304 | static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | ||
305 | { | ||
306 | unsigned long address; | ||
307 | pte_t *pte; | ||
308 | spinlock_t *ptl; | ||
309 | |||
310 | address = vma_address(page, vma); | ||
311 | if (address == -EFAULT) /* out of vma range */ | ||
312 | return 0; | ||
313 | pte = page_check_address(page, vma->vm_mm, address, &ptl, 1); | ||
314 | if (!pte) /* the page is not in this mm */ | ||
315 | return 0; | ||
316 | pte_unmap_unlock(pte, ptl); | ||
317 | |||
318 | return 1; | ||
319 | } | ||
320 | |||
293 | /* | 321 | /* |
294 | * Subfunctions of page_referenced: page_referenced_one called | 322 | * Subfunctions of page_referenced: page_referenced_one called |
295 | * repeatedly from either page_referenced_anon or page_referenced_file. | 323 | * repeatedly from either page_referenced_anon or page_referenced_file. |
@@ -311,10 +339,17 @@ static int page_referenced_one(struct page *page, | |||
311 | if (!pte) | 339 | if (!pte) |
312 | goto out; | 340 | goto out; |
313 | 341 | ||
342 | /* | ||
343 | * Don't want to elevate referenced for mlocked page that gets this far, | ||
344 | * in order that it progresses to try_to_unmap and is moved to the | ||
345 | * unevictable list. | ||
346 | */ | ||
314 | if (vma->vm_flags & VM_LOCKED) { | 347 | if (vma->vm_flags & VM_LOCKED) { |
315 | referenced++; | ||
316 | *mapcount = 1; /* break early from loop */ | 348 | *mapcount = 1; /* break early from loop */ |
317 | } else if (ptep_clear_flush_young_notify(vma, address, pte)) | 349 | goto out_unmap; |
350 | } | ||
351 | |||
352 | if (ptep_clear_flush_young_notify(vma, address, pte)) | ||
318 | referenced++; | 353 | referenced++; |
319 | 354 | ||
320 | /* Pretend the page is referenced if the task has the | 355 | /* Pretend the page is referenced if the task has the |
@@ -323,6 +358,7 @@ static int page_referenced_one(struct page *page, | |||
323 | rwsem_is_locked(&mm->mmap_sem)) | 358 | rwsem_is_locked(&mm->mmap_sem)) |
324 | referenced++; | 359 | referenced++; |
325 | 360 | ||
361 | out_unmap: | ||
326 | (*mapcount)--; | 362 | (*mapcount)--; |
327 | pte_unmap_unlock(pte, ptl); | 363 | pte_unmap_unlock(pte, ptl); |
328 | out: | 364 | out: |
@@ -412,11 +448,6 @@ static int page_referenced_file(struct page *page, | |||
412 | */ | 448 | */ |
413 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) | 449 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
414 | continue; | 450 | continue; |
415 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | ||
416 | == (VM_LOCKED|VM_MAYSHARE)) { | ||
417 | referenced++; | ||
418 | break; | ||
419 | } | ||
420 | referenced += page_referenced_one(page, vma, &mapcount); | 451 | referenced += page_referenced_one(page, vma, &mapcount); |
421 | if (!mapcount) | 452 | if (!mapcount) |
422 | break; | 453 | break; |
@@ -739,11 +770,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
739 | * If it's recently referenced (perhaps page_referenced | 770 | * If it's recently referenced (perhaps page_referenced |
740 | * skipped over this mm) then we should reactivate it. | 771 | * skipped over this mm) then we should reactivate it. |
741 | */ | 772 | */ |
742 | if (!migration && ((vma->vm_flags & VM_LOCKED) || | 773 | if (!migration) { |
743 | (ptep_clear_flush_young_notify(vma, address, pte)))) { | 774 | if (vma->vm_flags & VM_LOCKED) { |
744 | ret = SWAP_FAIL; | 775 | ret = SWAP_MLOCK; |
745 | goto out_unmap; | 776 | goto out_unmap; |
746 | } | 777 | } |
778 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | ||
779 | ret = SWAP_FAIL; | ||
780 | goto out_unmap; | ||
781 | } | ||
782 | } | ||
747 | 783 | ||
748 | /* Nuke the page table entry. */ | 784 | /* Nuke the page table entry. */ |
749 | flush_cache_page(vma, address, page_to_pfn(page)); | 785 | flush_cache_page(vma, address, page_to_pfn(page)); |
@@ -824,12 +860,17 @@ out: | |||
824 | * For very sparsely populated VMAs this is a little inefficient - chances are | 860 | * For very sparsely populated VMAs this is a little inefficient - chances are |
825 | * there there won't be many ptes located within the scan cluster. In this case | 861 | * there there won't be many ptes located within the scan cluster. In this case |
826 | * maybe we could scan further - to the end of the pte page, perhaps. | 862 | * maybe we could scan further - to the end of the pte page, perhaps. |
863 | * | ||
864 | * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can | ||
865 | * acquire it without blocking. If vma locked, mlock the pages in the cluster, | ||
866 | * rather than unmapping them. If we encounter the "check_page" that vmscan is | ||
867 | * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN. | ||
827 | */ | 868 | */ |
828 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) | 869 | #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE) |
829 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) | 870 | #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) |
830 | 871 | ||
831 | static void try_to_unmap_cluster(unsigned long cursor, | 872 | static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, |
832 | unsigned int *mapcount, struct vm_area_struct *vma) | 873 | struct vm_area_struct *vma, struct page *check_page) |
833 | { | 874 | { |
834 | struct mm_struct *mm = vma->vm_mm; | 875 | struct mm_struct *mm = vma->vm_mm; |
835 | pgd_t *pgd; | 876 | pgd_t *pgd; |
@@ -841,6 +882,8 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
841 | struct page *page; | 882 | struct page *page; |
842 | unsigned long address; | 883 | unsigned long address; |
843 | unsigned long end; | 884 | unsigned long end; |
885 | int ret = SWAP_AGAIN; | ||
886 | int locked_vma = 0; | ||
844 | 887 | ||
845 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 888 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
846 | end = address + CLUSTER_SIZE; | 889 | end = address + CLUSTER_SIZE; |
@@ -851,15 +894,26 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
851 | 894 | ||
852 | pgd = pgd_offset(mm, address); | 895 | pgd = pgd_offset(mm, address); |
853 | if (!pgd_present(*pgd)) | 896 | if (!pgd_present(*pgd)) |
854 | return; | 897 | return ret; |
855 | 898 | ||
856 | pud = pud_offset(pgd, address); | 899 | pud = pud_offset(pgd, address); |
857 | if (!pud_present(*pud)) | 900 | if (!pud_present(*pud)) |
858 | return; | 901 | return ret; |
859 | 902 | ||
860 | pmd = pmd_offset(pud, address); | 903 | pmd = pmd_offset(pud, address); |
861 | if (!pmd_present(*pmd)) | 904 | if (!pmd_present(*pmd)) |
862 | return; | 905 | return ret; |
906 | |||
907 | /* | ||
908 | * MLOCK_PAGES => feature is configured. | ||
909 | * if we can acquire the mmap_sem for read, and vma is VM_LOCKED, | ||
910 | * keep the sem while scanning the cluster for mlocking pages. | ||
911 | */ | ||
912 | if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
913 | locked_vma = (vma->vm_flags & VM_LOCKED); | ||
914 | if (!locked_vma) | ||
915 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ | ||
916 | } | ||
863 | 917 | ||
864 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 918 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
865 | 919 | ||
@@ -872,6 +926,13 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
872 | page = vm_normal_page(vma, address, *pte); | 926 | page = vm_normal_page(vma, address, *pte); |
873 | BUG_ON(!page || PageAnon(page)); | 927 | BUG_ON(!page || PageAnon(page)); |
874 | 928 | ||
929 | if (locked_vma) { | ||
930 | mlock_vma_page(page); /* no-op if already mlocked */ | ||
931 | if (page == check_page) | ||
932 | ret = SWAP_MLOCK; | ||
933 | continue; /* don't unmap */ | ||
934 | } | ||
935 | |||
875 | if (ptep_clear_flush_young_notify(vma, address, pte)) | 936 | if (ptep_clear_flush_young_notify(vma, address, pte)) |
876 | continue; | 937 | continue; |
877 | 938 | ||
@@ -893,39 +954,104 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
893 | (*mapcount)--; | 954 | (*mapcount)--; |
894 | } | 955 | } |
895 | pte_unmap_unlock(pte - 1, ptl); | 956 | pte_unmap_unlock(pte - 1, ptl); |
957 | if (locked_vma) | ||
958 | up_read(&vma->vm_mm->mmap_sem); | ||
959 | return ret; | ||
896 | } | 960 | } |
897 | 961 | ||
898 | static int try_to_unmap_anon(struct page *page, int migration) | 962 | /* |
963 | * common handling for pages mapped in VM_LOCKED vmas | ||
964 | */ | ||
965 | static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) | ||
966 | { | ||
967 | int mlocked = 0; | ||
968 | |||
969 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
970 | if (vma->vm_flags & VM_LOCKED) { | ||
971 | mlock_vma_page(page); | ||
972 | mlocked++; /* really mlocked the page */ | ||
973 | } | ||
974 | up_read(&vma->vm_mm->mmap_sem); | ||
975 | } | ||
976 | return mlocked; | ||
977 | } | ||
978 | |||
979 | /** | ||
980 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | ||
981 | * rmap method | ||
982 | * @page: the page to unmap/unlock | ||
983 | * @unlock: request for unlock rather than unmap [unlikely] | ||
984 | * @migration: unmapping for migration - ignored if @unlock | ||
985 | * | ||
986 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
987 | * contained in the anon_vma struct it points to. | ||
988 | * | ||
989 | * This function is only called from try_to_unmap/try_to_munlock for | ||
990 | * anonymous pages. | ||
991 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
992 | * where the page was found will be held for write. So, we won't recheck | ||
993 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
994 | * 'LOCKED. | ||
995 | */ | ||
996 | static int try_to_unmap_anon(struct page *page, int unlock, int migration) | ||
899 | { | 997 | { |
900 | struct anon_vma *anon_vma; | 998 | struct anon_vma *anon_vma; |
901 | struct vm_area_struct *vma; | 999 | struct vm_area_struct *vma; |
1000 | unsigned int mlocked = 0; | ||
902 | int ret = SWAP_AGAIN; | 1001 | int ret = SWAP_AGAIN; |
903 | 1002 | ||
1003 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1004 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
1005 | |||
904 | anon_vma = page_lock_anon_vma(page); | 1006 | anon_vma = page_lock_anon_vma(page); |
905 | if (!anon_vma) | 1007 | if (!anon_vma) |
906 | return ret; | 1008 | return ret; |
907 | 1009 | ||
908 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1010 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
909 | ret = try_to_unmap_one(page, vma, migration); | 1011 | if (MLOCK_PAGES && unlikely(unlock)) { |
910 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1012 | if (!((vma->vm_flags & VM_LOCKED) && |
911 | break; | 1013 | page_mapped_in_vma(page, vma))) |
1014 | continue; /* must visit all unlocked vmas */ | ||
1015 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ | ||
1016 | } else { | ||
1017 | ret = try_to_unmap_one(page, vma, migration); | ||
1018 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1019 | break; | ||
1020 | } | ||
1021 | if (ret == SWAP_MLOCK) { | ||
1022 | mlocked = try_to_mlock_page(page, vma); | ||
1023 | if (mlocked) | ||
1024 | break; /* stop if actually mlocked page */ | ||
1025 | } | ||
912 | } | 1026 | } |
913 | 1027 | ||
914 | page_unlock_anon_vma(anon_vma); | 1028 | page_unlock_anon_vma(anon_vma); |
1029 | |||
1030 | if (mlocked) | ||
1031 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1032 | else if (ret == SWAP_MLOCK) | ||
1033 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
1034 | |||
915 | return ret; | 1035 | return ret; |
916 | } | 1036 | } |
917 | 1037 | ||
918 | /** | 1038 | /** |
919 | * try_to_unmap_file - unmap file page using the object-based rmap method | 1039 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method |
920 | * @page: the page to unmap | 1040 | * @page: the page to unmap/unlock |
921 | * @migration: migration flag | 1041 | * @unlock: request for unlock rather than unmap [unlikely] |
1042 | * @migration: unmapping for migration - ignored if @unlock | ||
922 | * | 1043 | * |
923 | * Find all the mappings of a page using the mapping pointer and the vma chains | 1044 | * Find all the mappings of a page using the mapping pointer and the vma chains |
924 | * contained in the address_space struct it points to. | 1045 | * contained in the address_space struct it points to. |
925 | * | 1046 | * |
926 | * This function is only called from try_to_unmap for object-based pages. | 1047 | * This function is only called from try_to_unmap/try_to_munlock for |
1048 | * object-based pages. | ||
1049 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1050 | * where the page was found will be held for write. So, we won't recheck | ||
1051 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1052 | * 'LOCKED. | ||
927 | */ | 1053 | */ |
928 | static int try_to_unmap_file(struct page *page, int migration) | 1054 | static int try_to_unmap_file(struct page *page, int unlock, int migration) |
929 | { | 1055 | { |
930 | struct address_space *mapping = page->mapping; | 1056 | struct address_space *mapping = page->mapping; |
931 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1057 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
@@ -936,20 +1062,44 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
936 | unsigned long max_nl_cursor = 0; | 1062 | unsigned long max_nl_cursor = 0; |
937 | unsigned long max_nl_size = 0; | 1063 | unsigned long max_nl_size = 0; |
938 | unsigned int mapcount; | 1064 | unsigned int mapcount; |
1065 | unsigned int mlocked = 0; | ||
1066 | |||
1067 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1068 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
939 | 1069 | ||
940 | spin_lock(&mapping->i_mmap_lock); | 1070 | spin_lock(&mapping->i_mmap_lock); |
941 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 1071 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
942 | ret = try_to_unmap_one(page, vma, migration); | 1072 | if (MLOCK_PAGES && unlikely(unlock)) { |
943 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1073 | if (!(vma->vm_flags & VM_LOCKED)) |
944 | goto out; | 1074 | continue; /* must visit all vmas */ |
1075 | ret = SWAP_MLOCK; | ||
1076 | } else { | ||
1077 | ret = try_to_unmap_one(page, vma, migration); | ||
1078 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1079 | goto out; | ||
1080 | } | ||
1081 | if (ret == SWAP_MLOCK) { | ||
1082 | mlocked = try_to_mlock_page(page, vma); | ||
1083 | if (mlocked) | ||
1084 | break; /* stop if actually mlocked page */ | ||
1085 | } | ||
945 | } | 1086 | } |
946 | 1087 | ||
1088 | if (mlocked) | ||
1089 | goto out; | ||
1090 | |||
947 | if (list_empty(&mapping->i_mmap_nonlinear)) | 1091 | if (list_empty(&mapping->i_mmap_nonlinear)) |
948 | goto out; | 1092 | goto out; |
949 | 1093 | ||
950 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1094 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
951 | shared.vm_set.list) { | 1095 | shared.vm_set.list) { |
952 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1096 | if (MLOCK_PAGES && unlikely(unlock)) { |
1097 | if (!(vma->vm_flags & VM_LOCKED)) | ||
1098 | continue; /* must visit all vmas */ | ||
1099 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ | ||
1100 | goto out; /* no need to look further */ | ||
1101 | } | ||
1102 | if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) | ||
953 | continue; | 1103 | continue; |
954 | cursor = (unsigned long) vma->vm_private_data; | 1104 | cursor = (unsigned long) vma->vm_private_data; |
955 | if (cursor > max_nl_cursor) | 1105 | if (cursor > max_nl_cursor) |
@@ -959,7 +1109,7 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
959 | max_nl_size = cursor; | 1109 | max_nl_size = cursor; |
960 | } | 1110 | } |
961 | 1111 | ||
962 | if (max_nl_size == 0) { /* any nonlinears locked or reserved */ | 1112 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ |
963 | ret = SWAP_FAIL; | 1113 | ret = SWAP_FAIL; |
964 | goto out; | 1114 | goto out; |
965 | } | 1115 | } |
@@ -983,12 +1133,16 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
983 | do { | 1133 | do { |
984 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1134 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
985 | shared.vm_set.list) { | 1135 | shared.vm_set.list) { |
986 | if ((vma->vm_flags & VM_LOCKED) && !migration) | 1136 | if (!MLOCK_PAGES && !migration && |
1137 | (vma->vm_flags & VM_LOCKED)) | ||
987 | continue; | 1138 | continue; |
988 | cursor = (unsigned long) vma->vm_private_data; | 1139 | cursor = (unsigned long) vma->vm_private_data; |
989 | while ( cursor < max_nl_cursor && | 1140 | while ( cursor < max_nl_cursor && |
990 | cursor < vma->vm_end - vma->vm_start) { | 1141 | cursor < vma->vm_end - vma->vm_start) { |
991 | try_to_unmap_cluster(cursor, &mapcount, vma); | 1142 | ret = try_to_unmap_cluster(cursor, &mapcount, |
1143 | vma, page); | ||
1144 | if (ret == SWAP_MLOCK) | ||
1145 | mlocked = 2; /* to return below */ | ||
992 | cursor += CLUSTER_SIZE; | 1146 | cursor += CLUSTER_SIZE; |
993 | vma->vm_private_data = (void *) cursor; | 1147 | vma->vm_private_data = (void *) cursor; |
994 | if ((int)mapcount <= 0) | 1148 | if ((int)mapcount <= 0) |
@@ -1009,6 +1163,10 @@ static int try_to_unmap_file(struct page *page, int migration) | |||
1009 | vma->vm_private_data = NULL; | 1163 | vma->vm_private_data = NULL; |
1010 | out: | 1164 | out: |
1011 | spin_unlock(&mapping->i_mmap_lock); | 1165 | spin_unlock(&mapping->i_mmap_lock); |
1166 | if (mlocked) | ||
1167 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1168 | else if (ret == SWAP_MLOCK) | ||
1169 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
1012 | return ret; | 1170 | return ret; |
1013 | } | 1171 | } |
1014 | 1172 | ||
@@ -1024,6 +1182,7 @@ out: | |||
1024 | * SWAP_SUCCESS - we succeeded in removing all mappings | 1182 | * SWAP_SUCCESS - we succeeded in removing all mappings |
1025 | * SWAP_AGAIN - we missed a mapping, try again later | 1183 | * SWAP_AGAIN - we missed a mapping, try again later |
1026 | * SWAP_FAIL - the page is unswappable | 1184 | * SWAP_FAIL - the page is unswappable |
1185 | * SWAP_MLOCK - page is mlocked. | ||
1027 | */ | 1186 | */ |
1028 | int try_to_unmap(struct page *page, int migration) | 1187 | int try_to_unmap(struct page *page, int migration) |
1029 | { | 1188 | { |
@@ -1032,12 +1191,36 @@ int try_to_unmap(struct page *page, int migration) | |||
1032 | BUG_ON(!PageLocked(page)); | 1191 | BUG_ON(!PageLocked(page)); |
1033 | 1192 | ||
1034 | if (PageAnon(page)) | 1193 | if (PageAnon(page)) |
1035 | ret = try_to_unmap_anon(page, migration); | 1194 | ret = try_to_unmap_anon(page, 0, migration); |
1036 | else | 1195 | else |
1037 | ret = try_to_unmap_file(page, migration); | 1196 | ret = try_to_unmap_file(page, 0, migration); |
1038 | 1197 | if (ret != SWAP_MLOCK && !page_mapped(page)) | |
1039 | if (!page_mapped(page)) | ||
1040 | ret = SWAP_SUCCESS; | 1198 | ret = SWAP_SUCCESS; |
1041 | return ret; | 1199 | return ret; |
1042 | } | 1200 | } |
1043 | 1201 | ||
1202 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
1203 | /** | ||
1204 | * try_to_munlock - try to munlock a page | ||
1205 | * @page: the page to be munlocked | ||
1206 | * | ||
1207 | * Called from munlock code. Checks all of the VMAs mapping the page | ||
1208 | * to make sure nobody else has this page mlocked. The page will be | ||
1209 | * returned with PG_mlocked cleared if no other vmas have it mlocked. | ||
1210 | * | ||
1211 | * Return values are: | ||
1212 | * | ||
1213 | * SWAP_SUCCESS - no vma's holding page mlocked. | ||
1214 | * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem | ||
1215 | * SWAP_MLOCK - page is now mlocked. | ||
1216 | */ | ||
1217 | int try_to_munlock(struct page *page) | ||
1218 | { | ||
1219 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | ||
1220 | |||
1221 | if (PageAnon(page)) | ||
1222 | return try_to_unmap_anon(page, 1, 0); | ||
1223 | else | ||
1224 | return try_to_unmap_file(page, 1, 0); | ||
1225 | } | ||
1226 | #endif | ||
@@ -278,7 +278,7 @@ void lru_add_drain(void) | |||
278 | put_cpu(); | 278 | put_cpu(); |
279 | } | 279 | } |
280 | 280 | ||
281 | #ifdef CONFIG_NUMA | 281 | #if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU) |
282 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | 282 | static void lru_add_drain_per_cpu(struct work_struct *dummy) |
283 | { | 283 | { |
284 | lru_add_drain(); | 284 | lru_add_drain(); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index dfb342e0db9b..e5aaaad159ef 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -582,11 +582,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
582 | 582 | ||
583 | sc->nr_scanned++; | 583 | sc->nr_scanned++; |
584 | 584 | ||
585 | if (unlikely(!page_evictable(page, NULL))) { | 585 | if (unlikely(!page_evictable(page, NULL))) |
586 | unlock_page(page); | 586 | goto cull_mlocked; |
587 | putback_lru_page(page); | ||
588 | continue; | ||
589 | } | ||
590 | 587 | ||
591 | if (!sc->may_swap && page_mapped(page)) | 588 | if (!sc->may_swap && page_mapped(page)) |
592 | goto keep_locked; | 589 | goto keep_locked; |
@@ -624,9 +621,19 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
624 | * Anonymous process memory has backing store? | 621 | * Anonymous process memory has backing store? |
625 | * Try to allocate it some swap space here. | 622 | * Try to allocate it some swap space here. |
626 | */ | 623 | */ |
627 | if (PageAnon(page) && !PageSwapCache(page)) | 624 | if (PageAnon(page) && !PageSwapCache(page)) { |
625 | switch (try_to_munlock(page)) { | ||
626 | case SWAP_FAIL: /* shouldn't happen */ | ||
627 | case SWAP_AGAIN: | ||
628 | goto keep_locked; | ||
629 | case SWAP_MLOCK: | ||
630 | goto cull_mlocked; | ||
631 | case SWAP_SUCCESS: | ||
632 | ; /* fall thru'; add to swap cache */ | ||
633 | } | ||
628 | if (!add_to_swap(page, GFP_ATOMIC)) | 634 | if (!add_to_swap(page, GFP_ATOMIC)) |
629 | goto activate_locked; | 635 | goto activate_locked; |
636 | } | ||
630 | #endif /* CONFIG_SWAP */ | 637 | #endif /* CONFIG_SWAP */ |
631 | 638 | ||
632 | mapping = page_mapping(page); | 639 | mapping = page_mapping(page); |
@@ -641,6 +648,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
641 | goto activate_locked; | 648 | goto activate_locked; |
642 | case SWAP_AGAIN: | 649 | case SWAP_AGAIN: |
643 | goto keep_locked; | 650 | goto keep_locked; |
651 | case SWAP_MLOCK: | ||
652 | goto cull_mlocked; | ||
644 | case SWAP_SUCCESS: | 653 | case SWAP_SUCCESS: |
645 | ; /* try to free the page below */ | 654 | ; /* try to free the page below */ |
646 | } | 655 | } |
@@ -731,6 +740,11 @@ free_it: | |||
731 | } | 740 | } |
732 | continue; | 741 | continue; |
733 | 742 | ||
743 | cull_mlocked: | ||
744 | unlock_page(page); | ||
745 | putback_lru_page(page); | ||
746 | continue; | ||
747 | |||
734 | activate_locked: | 748 | activate_locked: |
735 | /* Not a candidate for swapping, so reclaim swap space. */ | 749 | /* Not a candidate for swapping, so reclaim swap space. */ |
736 | if (PageSwapCache(page) && vm_swap_full()) | 750 | if (PageSwapCache(page) && vm_swap_full()) |
@@ -742,7 +756,7 @@ keep_locked: | |||
742 | unlock_page(page); | 756 | unlock_page(page); |
743 | keep: | 757 | keep: |
744 | list_add(&page->lru, &ret_pages); | 758 | list_add(&page->lru, &ret_pages); |
745 | VM_BUG_ON(PageLRU(page)); | 759 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
746 | } | 760 | } |
747 | list_splice(&ret_pages, page_list); | 761 | list_splice(&ret_pages, page_list); |
748 | if (pagevec_count(&freed_pvec)) | 762 | if (pagevec_count(&freed_pvec)) |
@@ -2329,12 +2343,13 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2329 | * @vma: the VMA in which the page is or will be mapped, may be NULL | 2343 | * @vma: the VMA in which the page is or will be mapped, may be NULL |
2330 | * | 2344 | * |
2331 | * Test whether page is evictable--i.e., should be placed on active/inactive | 2345 | * Test whether page is evictable--i.e., should be placed on active/inactive |
2332 | * lists vs unevictable list. | 2346 | * lists vs unevictable list. The vma argument is !NULL when called from the |
2347 | * fault path to determine how to instantate a new page. | ||
2333 | * | 2348 | * |
2334 | * Reasons page might not be evictable: | 2349 | * Reasons page might not be evictable: |
2335 | * (1) page's mapping marked unevictable | 2350 | * (1) page's mapping marked unevictable |
2351 | * (2) page is part of an mlocked VMA | ||
2336 | * | 2352 | * |
2337 | * TODO - later patches | ||
2338 | */ | 2353 | */ |
2339 | int page_evictable(struct page *page, struct vm_area_struct *vma) | 2354 | int page_evictable(struct page *page, struct vm_area_struct *vma) |
2340 | { | 2355 | { |
@@ -2342,7 +2357,8 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
2342 | if (mapping_unevictable(page_mapping(page))) | 2357 | if (mapping_unevictable(page_mapping(page))) |
2343 | return 0; | 2358 | return 0; |
2344 | 2359 | ||
2345 | /* TODO: test page [!]evictable conditions */ | 2360 | if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) |
2361 | return 0; | ||
2346 | 2362 | ||
2347 | return 1; | 2363 | return 1; |
2348 | } | 2364 | } |