aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 22:11:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 22:11:50 -0500
commit3aacd625f20129f5a41ea3ff3b5353b0e4dabd01 (patch)
tree7cf4ea65397f80098b30494df31cfc8f5fa26d63 /mm/mlock.c
parent7e21774db5cc9cf8fe93a64a2f0c6cf47db8ab24 (diff)
parent2a1d689c9ba42a6066540fb221b6ecbd6298b728 (diff)
Merge branch 'akpm' (incoming from Andrew)
Merge second patch-bomb from Andrew Morton: - various misc bits - the rest of MM - add generic fixmap.h, use it - backlight updates - dynamic_debug updates - printk() updates - checkpatch updates - binfmt_elf - ramfs - init/ - autofs4 - drivers/rtc - nilfs - hfsplus - Documentation/ - coredump - procfs - fork - exec - kexec - kdump - partitions - rapidio - rbtree - userns - memstick - w1 - decompressors * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (197 commits) lib/decompress_unlz4.c: always set an error return code on failures romfs: fix returm err while getting inode in fill_super drivers/w1/masters/w1-gpio.c: add strong pullup emulation drivers/memstick/host/rtsx_pci_ms.c: fix ms card data transfer bug userns: relax the posix_acl_valid() checks arch/sh/kernel/dwarf.c: use rbtree postorder iteration helper instead of solution using repeated rb_erase() fs-ext3-use-rbtree-postorder-iteration-helper-instead-of-opencoding-fix fs/ext3: use rbtree postorder iteration helper instead of opencoding fs/jffs2: use rbtree postorder iteration helper instead of opencoding fs/ext4: use rbtree postorder iteration helper instead of opencoding fs/ubifs: use rbtree postorder iteration helper instead of opencoding net/netfilter/ipset/ip_set_hash_netiface.c: use rbtree postorder iteration instead of opencoding rbtree/test: test rbtree_postorder_for_each_entry_safe() rbtree/test: move rb_node to the middle of the test struct rapidio: add modular rapidio core build into powerpc and mips branches partitions/efi: complete documentation of gpt kernel param purpose kdump: add /sys/kernel/vmcoreinfo ABI documentation kdump: fix exported size of vmcoreinfo note kexec: add sysctl to disable kexec_load fs/exec.c: call arch_pick_mmap_layout() only once ...
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c108
1 files changed, 62 insertions, 46 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 10819ed4df3e..4e1a68162285 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page)
91} 91}
92 92
93/* 93/*
94 * Isolate a page from LRU with optional get_page() pin.
95 * Assumes lru_lock already held and page already pinned.
96 */
97static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
98{
99 if (PageLRU(page)) {
100 struct lruvec *lruvec;
101
102 lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
103 if (getpage)
104 get_page(page);
105 ClearPageLRU(page);
106 del_page_from_lru_list(page, lruvec, page_lru(page));
107 return true;
108 }
109
110 return false;
111}
112
113/*
94 * Finish munlock after successful page isolation 114 * Finish munlock after successful page isolation
95 * 115 *
96 * Page must be locked. This is a wrapper for try_to_munlock() 116 * Page must be locked. This is a wrapper for try_to_munlock()
@@ -126,9 +146,9 @@ static void __munlock_isolated_page(struct page *page)
126static void __munlock_isolation_failed(struct page *page) 146static void __munlock_isolation_failed(struct page *page)
127{ 147{
128 if (PageUnevictable(page)) 148 if (PageUnevictable(page))
129 count_vm_event(UNEVICTABLE_PGSTRANDED); 149 __count_vm_event(UNEVICTABLE_PGSTRANDED);
130 else 150 else
131 count_vm_event(UNEVICTABLE_PGMUNLOCKED); 151 __count_vm_event(UNEVICTABLE_PGMUNLOCKED);
132} 152}
133 153
134/** 154/**
@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(struct page *page)
152unsigned int munlock_vma_page(struct page *page) 172unsigned int munlock_vma_page(struct page *page)
153{ 173{
154 unsigned int nr_pages; 174 unsigned int nr_pages;
175 struct zone *zone = page_zone(page);
155 176
156 BUG_ON(!PageLocked(page)); 177 BUG_ON(!PageLocked(page));
157 178
158 if (TestClearPageMlocked(page)) {
159 nr_pages = hpage_nr_pages(page);
160 mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
161 if (!isolate_lru_page(page))
162 __munlock_isolated_page(page);
163 else
164 __munlock_isolation_failed(page);
165 } else {
166 nr_pages = hpage_nr_pages(page);
167 }
168
169 /* 179 /*
170 * Regardless of the original PageMlocked flag, we determine nr_pages 180 * Serialize with any parallel __split_huge_page_refcount() which
171 * after touching the flag. This leaves a possible race with a THP page 181 * might otherwise copy PageMlocked to part of the tail pages before
172 * split, such that a whole THP page was munlocked, but nr_pages == 1. 182 * we clear it in the head page. It also stabilizes hpage_nr_pages().
173 * Returning a smaller mask due to that is OK, the worst that can
174 * happen is subsequent useless scanning of the former tail pages.
175 * The NR_MLOCK accounting can however become broken.
176 */ 183 */
184 spin_lock_irq(&zone->lru_lock);
185
186 nr_pages = hpage_nr_pages(page);
187 if (!TestClearPageMlocked(page))
188 goto unlock_out;
189
190 __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
191
192 if (__munlock_isolate_lru_page(page, true)) {
193 spin_unlock_irq(&zone->lru_lock);
194 __munlock_isolated_page(page);
195 goto out;
196 }
197 __munlock_isolation_failed(page);
198
199unlock_out:
200 spin_unlock_irq(&zone->lru_lock);
201
202out:
177 return nr_pages - 1; 203 return nr_pages - 1;
178} 204}
179 205
@@ -253,8 +279,8 @@ static int __mlock_posix_error_return(long retval)
253static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, 279static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
254 int *pgrescued) 280 int *pgrescued)
255{ 281{
256 VM_BUG_ON(PageLRU(page)); 282 VM_BUG_ON_PAGE(PageLRU(page), page);
257 VM_BUG_ON(!PageLocked(page)); 283 VM_BUG_ON_PAGE(!PageLocked(page), page);
258 284
259 if (page_mapcount(page) <= 1 && page_evictable(page)) { 285 if (page_mapcount(page) <= 1 && page_evictable(page)) {
260 pagevec_add(pvec, page); 286 pagevec_add(pvec, page);
@@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
310 struct page *page = pvec->pages[i]; 336 struct page *page = pvec->pages[i];
311 337
312 if (TestClearPageMlocked(page)) { 338 if (TestClearPageMlocked(page)) {
313 struct lruvec *lruvec;
314 int lru;
315
316 if (PageLRU(page)) {
317 lruvec = mem_cgroup_page_lruvec(page, zone);
318 lru = page_lru(page);
319 /*
320 * We already have pin from follow_page_mask()
321 * so we can spare the get_page() here.
322 */
323 ClearPageLRU(page);
324 del_page_from_lru_list(page, lruvec, lru);
325 } else {
326 __munlock_isolation_failed(page);
327 goto skip_munlock;
328 }
329
330 } else {
331skip_munlock:
332 /* 339 /*
333 * We won't be munlocking this page in the next phase 340 * We already have pin from follow_page_mask()
334 * but we still need to release the follow_page_mask() 341 * so we can spare the get_page() here.
335 * pin. We cannot do it under lru_lock however. If it's
336 * the last pin, __page_cache_release would deadlock.
337 */ 342 */
338 pagevec_add(&pvec_putback, pvec->pages[i]); 343 if (__munlock_isolate_lru_page(page, false))
339 pvec->pages[i] = NULL; 344 continue;
345 else
346 __munlock_isolation_failed(page);
340 } 347 }
348
349 /*
350 * We won't be munlocking this page in the next phase
351 * but we still need to release the follow_page_mask()
352 * pin. We cannot do it under lru_lock however. If it's
353 * the last pin, __page_cache_release() would deadlock.
354 */
355 pagevec_add(&pvec_putback, pvec->pages[i]);
356 pvec->pages[i] = NULL;
341 } 357 }
342 delta_munlocked = -nr + pagevec_count(&pvec_putback); 358 delta_munlocked = -nr + pagevec_count(&pvec_putback);
343 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); 359 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);