aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/uapi/asm-generic/mman-common.h1
-rw-r--r--mm/madvise.c170
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/vmscan.c14
-rw-r--r--mm/vmstat.c1
8 files changed, 221 insertions, 9 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 77d1ba57d495..bdf597c4f0be 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -85,6 +85,7 @@ enum ttu_flags {
85 TTU_UNMAP = 1, /* unmap mode */ 85 TTU_UNMAP = 1, /* unmap mode */
86 TTU_MIGRATION = 2, /* migration mode */ 86 TTU_MIGRATION = 2, /* migration mode */
87 TTU_MUNLOCK = 4, /* munlock mode */ 87 TTU_MUNLOCK = 4, /* munlock mode */
88 TTU_LZFREE = 8, /* lazy free mode */
88 89
89 TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ 90 TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
90 TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ 91 TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
@@ -311,5 +312,6 @@ static inline int page_mkclean(struct page *page)
311#define SWAP_AGAIN 1 312#define SWAP_AGAIN 1
312#define SWAP_FAIL 2 313#define SWAP_FAIL 2
313#define SWAP_MLOCK 3 314#define SWAP_MLOCK 3
315#define SWAP_LZFREE 4
314 316
315#endif /* _LINUX_RMAP_H */ 317#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index e1f8c993e73b..67c1dbd19c6d 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -25,6 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
25 FOR_ALL_ZONES(PGALLOC), 25 FOR_ALL_ZONES(PGALLOC),
26 PGFREE, PGACTIVATE, PGDEACTIVATE, 26 PGFREE, PGACTIVATE, PGDEACTIVATE,
27 PGFAULT, PGMAJFAULT, 27 PGFAULT, PGMAJFAULT,
28 PGLAZYFREED,
28 FOR_ALL_ZONES(PGREFILL), 29 FOR_ALL_ZONES(PGREFILL),
29 FOR_ALL_ZONES(PGSTEAL_KSWAPD), 30 FOR_ALL_ZONES(PGSTEAL_KSWAPD),
30 FOR_ALL_ZONES(PGSTEAL_DIRECT), 31 FOR_ALL_ZONES(PGSTEAL_DIRECT),
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index a74dd84bbb6d..0e821e3c3d45 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -39,6 +39,7 @@
39#define MADV_SEQUENTIAL 2 /* expect sequential page references */ 39#define MADV_SEQUENTIAL 2 /* expect sequential page references */
40#define MADV_WILLNEED 3 /* will need these pages */ 40#define MADV_WILLNEED 3 /* will need these pages */
41#define MADV_DONTNEED 4 /* don't need these pages */ 41#define MADV_DONTNEED 4 /* don't need these pages */
42#define MADV_FREE 5 /* free pages only if memory pressure */
42 43
43/* common parameters: try to keep these consistent across architectures */ 44/* common parameters: try to keep these consistent across architectures */
44#define MADV_REMOVE 9 /* remove these pages & resources */ 45#define MADV_REMOVE 9 /* remove these pages & resources */
diff --git a/mm/madvise.c b/mm/madvise.c
index c889fcbb530e..ed137fde4459 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -20,6 +20,9 @@
20#include <linux/backing-dev.h> 20#include <linux/backing-dev.h>
21#include <linux/swap.h> 21#include <linux/swap.h>
22#include <linux/swapops.h> 22#include <linux/swapops.h>
23#include <linux/mmu_notifier.h>
24
25#include <asm/tlb.h>
23 26
24/* 27/*
25 * Any behaviour which results in changes to the vma->vm_flags needs to 28 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -32,6 +35,7 @@ static int madvise_need_mmap_write(int behavior)
32 case MADV_REMOVE: 35 case MADV_REMOVE:
33 case MADV_WILLNEED: 36 case MADV_WILLNEED:
34 case MADV_DONTNEED: 37 case MADV_DONTNEED:
38 case MADV_FREE:
35 return 0; 39 return 0;
36 default: 40 default:
37 /* be safe, default to 1. list exceptions explicitly */ 41 /* be safe, default to 1. list exceptions explicitly */
@@ -256,6 +260,163 @@ static long madvise_willneed(struct vm_area_struct *vma,
256 return 0; 260 return 0;
257} 261}
258 262
263static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
264 unsigned long end, struct mm_walk *walk)
265
266{
267 struct mmu_gather *tlb = walk->private;
268 struct mm_struct *mm = tlb->mm;
269 struct vm_area_struct *vma = walk->vma;
270 spinlock_t *ptl;
271 pte_t *orig_pte, *pte, ptent;
272 struct page *page;
273
274 split_huge_pmd(vma, pmd, addr);
275 if (pmd_trans_unstable(pmd))
276 return 0;
277
278 orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
279 arch_enter_lazy_mmu_mode();
280 for (; addr != end; pte++, addr += PAGE_SIZE) {
281 ptent = *pte;
282
283 if (!pte_present(ptent))
284 continue;
285
286 page = vm_normal_page(vma, addr, ptent);
287 if (!page)
288 continue;
289
290 /*
291 * If pmd isn't transhuge but the page is THP and
292 * is owned by only this process, split it and
293 * deactivate all pages.
294 */
295 if (PageTransCompound(page)) {
296 if (page_mapcount(page) != 1)
297 goto out;
298 get_page(page);
299 if (!trylock_page(page)) {
300 put_page(page);
301 goto out;
302 }
303 pte_unmap_unlock(orig_pte, ptl);
304 if (split_huge_page(page)) {
305 unlock_page(page);
306 put_page(page);
307 pte_offset_map_lock(mm, pmd, addr, &ptl);
308 goto out;
309 }
310 put_page(page);
311 unlock_page(page);
312 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
313 pte--;
314 addr -= PAGE_SIZE;
315 continue;
316 }
317
318 VM_BUG_ON_PAGE(PageTransCompound(page), page);
319
320 if (PageSwapCache(page) || PageDirty(page)) {
321 if (!trylock_page(page))
322 continue;
323 /*
324 * If page is shared with others, we couldn't clear
325 * PG_dirty of the page.
326 */
327 if (page_mapcount(page) != 1) {
328 unlock_page(page);
329 continue;
330 }
331
332 if (PageSwapCache(page) && !try_to_free_swap(page)) {
333 unlock_page(page);
334 continue;
335 }
336
337 ClearPageDirty(page);
338 unlock_page(page);
339 }
340
341 if (pte_young(ptent) || pte_dirty(ptent)) {
342 /*
343 * Some of architecture(ex, PPC) don't update TLB
344 * with set_pte_at and tlb_remove_tlb_entry so for
345 * the portability, remap the pte with old|clean
346 * after pte clearing.
347 */
348 ptent = ptep_get_and_clear_full(mm, addr, pte,
349 tlb->fullmm);
350
351 ptent = pte_mkold(ptent);
352 ptent = pte_mkclean(ptent);
353 set_pte_at(mm, addr, pte, ptent);
354 tlb_remove_tlb_entry(tlb, pte, addr);
355 }
356 }
357out:
358 arch_leave_lazy_mmu_mode();
359 pte_unmap_unlock(orig_pte, ptl);
360 cond_resched();
361 return 0;
362}
363
364static void madvise_free_page_range(struct mmu_gather *tlb,
365 struct vm_area_struct *vma,
366 unsigned long addr, unsigned long end)
367{
368 struct mm_walk free_walk = {
369 .pmd_entry = madvise_free_pte_range,
370 .mm = vma->vm_mm,
371 .private = tlb,
372 };
373
374 tlb_start_vma(tlb, vma);
375 walk_page_range(addr, end, &free_walk);
376 tlb_end_vma(tlb, vma);
377}
378
379static int madvise_free_single_vma(struct vm_area_struct *vma,
380 unsigned long start_addr, unsigned long end_addr)
381{
382 unsigned long start, end;
383 struct mm_struct *mm = vma->vm_mm;
384 struct mmu_gather tlb;
385
386 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
387 return -EINVAL;
388
389 /* MADV_FREE works for only anon vma at the moment */
390 if (!vma_is_anonymous(vma))
391 return -EINVAL;
392
393 start = max(vma->vm_start, start_addr);
394 if (start >= vma->vm_end)
395 return -EINVAL;
396 end = min(vma->vm_end, end_addr);
397 if (end <= vma->vm_start)
398 return -EINVAL;
399
400 lru_add_drain();
401 tlb_gather_mmu(&tlb, mm, start, end);
402 update_hiwater_rss(mm);
403
404 mmu_notifier_invalidate_range_start(mm, start, end);
405 madvise_free_page_range(&tlb, vma, start, end);
406 mmu_notifier_invalidate_range_end(mm, start, end);
407 tlb_finish_mmu(&tlb, start, end);
408
409 return 0;
410}
411
412static long madvise_free(struct vm_area_struct *vma,
413 struct vm_area_struct **prev,
414 unsigned long start, unsigned long end)
415{
416 *prev = vma;
417 return madvise_free_single_vma(vma, start, end);
418}
419
259/* 420/*
260 * Application no longer needs these pages. If the pages are dirty, 421 * Application no longer needs these pages. If the pages are dirty,
261 * it's OK to just throw them away. The app will be more careful about 422 * it's OK to just throw them away. The app will be more careful about
@@ -379,6 +540,14 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
379 return madvise_remove(vma, prev, start, end); 540 return madvise_remove(vma, prev, start, end);
380 case MADV_WILLNEED: 541 case MADV_WILLNEED:
381 return madvise_willneed(vma, prev, start, end); 542 return madvise_willneed(vma, prev, start, end);
543 case MADV_FREE:
544 /*
545 * XXX: In this implementation, MADV_FREE works like
546 * MADV_DONTNEED on swapless system or full swap.
547 */
548 if (get_nr_swap_pages() > 0)
549 return madvise_free(vma, prev, start, end);
550 /* passthrough */
382 case MADV_DONTNEED: 551 case MADV_DONTNEED:
383 return madvise_dontneed(vma, prev, start, end); 552 return madvise_dontneed(vma, prev, start, end);
384 default: 553 default:
@@ -398,6 +567,7 @@ madvise_behavior_valid(int behavior)
398 case MADV_REMOVE: 567 case MADV_REMOVE:
399 case MADV_WILLNEED: 568 case MADV_WILLNEED:
400 case MADV_DONTNEED: 569 case MADV_DONTNEED:
570 case MADV_FREE:
401#ifdef CONFIG_KSM 571#ifdef CONFIG_KSM
402 case MADV_MERGEABLE: 572 case MADV_MERGEABLE:
403 case MADV_UNMERGEABLE: 573 case MADV_UNMERGEABLE:
diff --git a/mm/rmap.c b/mm/rmap.c
index cdc2a885a4cd..68af2e32f7ed 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1411,6 +1411,11 @@ void page_remove_rmap(struct page *page, bool compound)
1411 */ 1411 */
1412} 1412}
1413 1413
1414struct rmap_private {
1415 enum ttu_flags flags;
1416 int lazyfreed;
1417};
1418
1414/* 1419/*
1415 * @arg: enum ttu_flags will be passed to this argument 1420 * @arg: enum ttu_flags will be passed to this argument
1416 */ 1421 */
@@ -1422,7 +1427,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1422 pte_t pteval; 1427 pte_t pteval;
1423 spinlock_t *ptl; 1428 spinlock_t *ptl;
1424 int ret = SWAP_AGAIN; 1429 int ret = SWAP_AGAIN;
1425 enum ttu_flags flags = (enum ttu_flags)arg; 1430 struct rmap_private *rp = arg;
1431 enum ttu_flags flags = rp->flags;
1426 1432
1427 /* munlock has nothing to gain from examining un-locked vmas */ 1433 /* munlock has nothing to gain from examining un-locked vmas */
1428 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) 1434 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
@@ -1514,6 +1520,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1514 * See handle_pte_fault() ... 1520 * See handle_pte_fault() ...
1515 */ 1521 */
1516 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 1522 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
1523
1524 if (!PageDirty(page) && (flags & TTU_LZFREE)) {
1525 /* It's a freeable page by MADV_FREE */
1526 dec_mm_counter(mm, MM_ANONPAGES);
1527 rp->lazyfreed++;
1528 goto discard;
1529 }
1530
1517 if (swap_duplicate(entry) < 0) { 1531 if (swap_duplicate(entry) < 0) {
1518 set_pte_at(mm, address, pte, pteval); 1532 set_pte_at(mm, address, pte, pteval);
1519 ret = SWAP_FAIL; 1533 ret = SWAP_FAIL;
@@ -1534,6 +1548,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1534 } else 1548 } else
1535 dec_mm_counter(mm, mm_counter_file(page)); 1549 dec_mm_counter(mm, mm_counter_file(page));
1536 1550
1551discard:
1537 page_remove_rmap(page, PageHuge(page)); 1552 page_remove_rmap(page, PageHuge(page));
1538 page_cache_release(page); 1553 page_cache_release(page);
1539 1554
@@ -1586,9 +1601,14 @@ static int page_not_mapped(struct page *page)
1586int try_to_unmap(struct page *page, enum ttu_flags flags) 1601int try_to_unmap(struct page *page, enum ttu_flags flags)
1587{ 1602{
1588 int ret; 1603 int ret;
1604 struct rmap_private rp = {
1605 .flags = flags,
1606 .lazyfreed = 0,
1607 };
1608
1589 struct rmap_walk_control rwc = { 1609 struct rmap_walk_control rwc = {
1590 .rmap_one = try_to_unmap_one, 1610 .rmap_one = try_to_unmap_one,
1591 .arg = (void *)flags, 1611 .arg = &rp,
1592 .done = page_not_mapped, 1612 .done = page_not_mapped,
1593 .anon_lock = page_lock_anon_vma_read, 1613 .anon_lock = page_lock_anon_vma_read,
1594 }; 1614 };
@@ -1608,8 +1628,11 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1608 1628
1609 ret = rmap_walk(page, &rwc); 1629 ret = rmap_walk(page, &rwc);
1610 1630
1611 if (ret != SWAP_MLOCK && !page_mapped(page)) 1631 if (ret != SWAP_MLOCK && !page_mapped(page)) {
1612 ret = SWAP_SUCCESS; 1632 ret = SWAP_SUCCESS;
1633 if (rp.lazyfreed && !PageDirty(page))
1634 ret = SWAP_LZFREE;
1635 }
1613 return ret; 1636 return ret;
1614} 1637}
1615 1638
@@ -1631,9 +1654,14 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1631int try_to_munlock(struct page *page) 1654int try_to_munlock(struct page *page)
1632{ 1655{
1633 int ret; 1656 int ret;
1657 struct rmap_private rp = {
1658 .flags = TTU_MUNLOCK,
1659 .lazyfreed = 0,
1660 };
1661
1634 struct rmap_walk_control rwc = { 1662 struct rmap_walk_control rwc = {
1635 .rmap_one = try_to_unmap_one, 1663 .rmap_one = try_to_unmap_one,
1636 .arg = (void *)TTU_MUNLOCK, 1664 .arg = &rp,
1637 .done = page_not_mapped, 1665 .done = page_not_mapped,
1638 .anon_lock = page_lock_anon_vma_read, 1666 .anon_lock = page_lock_anon_vma_read,
1639 1667
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d783872d746c..676ff2991380 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -185,13 +185,12 @@ int add_to_swap(struct page *page, struct list_head *list)
185 * deadlock in the swap out path. 185 * deadlock in the swap out path.
186 */ 186 */
187 /* 187 /*
188 * Add it to the swap cache and mark it dirty 188 * Add it to the swap cache.
189 */ 189 */
190 err = add_to_swap_cache(page, entry, 190 err = add_to_swap_cache(page, entry,
191 __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); 191 __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
192 192
193 if (!err) { /* Success */ 193 if (!err) {
194 SetPageDirty(page);
195 return 1; 194 return 1;
196 } else { /* -ENOMEM radix-tree allocation failure */ 195 } else { /* -ENOMEM radix-tree allocation failure */
197 /* 196 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 983e407afc09..5ac86956ff9d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -906,6 +906,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
906 int may_enter_fs; 906 int may_enter_fs;
907 enum page_references references = PAGEREF_RECLAIM_CLEAN; 907 enum page_references references = PAGEREF_RECLAIM_CLEAN;
908 bool dirty, writeback; 908 bool dirty, writeback;
909 bool lazyfree = false;
910 int ret = SWAP_SUCCESS;
909 911
910 cond_resched(); 912 cond_resched();
911 913
@@ -1049,6 +1051,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1049 goto keep_locked; 1051 goto keep_locked;
1050 if (!add_to_swap(page, page_list)) 1052 if (!add_to_swap(page, page_list))
1051 goto activate_locked; 1053 goto activate_locked;
1054 lazyfree = true;
1052 may_enter_fs = 1; 1055 may_enter_fs = 1;
1053 1056
1054 /* Adding to swap updated mapping */ 1057 /* Adding to swap updated mapping */
@@ -1060,14 +1063,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1060 * processes. Try to unmap it here. 1063 * processes. Try to unmap it here.
1061 */ 1064 */
1062 if (page_mapped(page) && mapping) { 1065 if (page_mapped(page) && mapping) {
1063 switch (try_to_unmap(page, 1066 switch (ret = try_to_unmap(page, lazyfree ?
1064 ttu_flags|TTU_BATCH_FLUSH)) { 1067 (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
1068 (ttu_flags | TTU_BATCH_FLUSH))) {
1065 case SWAP_FAIL: 1069 case SWAP_FAIL:
1066 goto activate_locked; 1070 goto activate_locked;
1067 case SWAP_AGAIN: 1071 case SWAP_AGAIN:
1068 goto keep_locked; 1072 goto keep_locked;
1069 case SWAP_MLOCK: 1073 case SWAP_MLOCK:
1070 goto cull_mlocked; 1074 goto cull_mlocked;
1075 case SWAP_LZFREE:
1076 goto lazyfree;
1071 case SWAP_SUCCESS: 1077 case SWAP_SUCCESS:
1072 ; /* try to free the page below */ 1078 ; /* try to free the page below */
1073 } 1079 }
@@ -1174,6 +1180,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1174 } 1180 }
1175 } 1181 }
1176 1182
1183lazyfree:
1177 if (!mapping || !__remove_mapping(mapping, page, true)) 1184 if (!mapping || !__remove_mapping(mapping, page, true))
1178 goto keep_locked; 1185 goto keep_locked;
1179 1186
@@ -1186,6 +1193,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1186 */ 1193 */
1187 __ClearPageLocked(page); 1194 __ClearPageLocked(page);
1188free_it: 1195free_it:
1196 if (ret == SWAP_LZFREE)
1197 count_vm_event(PGLAZYFREED);
1198
1189 nr_reclaimed++; 1199 nr_reclaimed++;
1190 1200
1191 /* 1201 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6489086f0753..64bd0aa13f75 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -783,6 +783,7 @@ const char * const vmstat_text[] = {
783 783
784 "pgfault", 784 "pgfault",
785 "pgmajfault", 785 "pgmajfault",
786 "pglazyfreed",
786 787
787 TEXTS_FOR_ZONES("pgrefill") 788 TEXTS_FOR_ZONES("pgrefill")
788 TEXTS_FOR_ZONES("pgsteal_kswapd") 789 TEXTS_FOR_ZONES("pgsteal_kswapd")