summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/include/uapi/asm/mman.h1
-rw-r--r--arch/mips/include/uapi/asm/mman.h1
-rw-r--r--arch/parisc/include/uapi/asm/mman.h1
-rw-r--r--arch/xtensa/include/uapi/asm/mman.h1
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/uapi/asm-generic/mman-common.h1
-rw-r--r--mm/madvise.c189
-rw-r--r--mm/vmscan.c56
8 files changed, 251 insertions, 0 deletions
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index f3258fbf03d0..a18ec7f63888 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -69,6 +69,7 @@
69#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ 69#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
70 70
71#define MADV_COLD 20 /* deactivate these pages */ 71#define MADV_COLD 20 /* deactivate these pages */
72#define MADV_PAGEOUT 21 /* reclaim these pages */
72 73
73/* compatibility flags */ 74/* compatibility flags */
74#define MAP_FILE 0 75#define MAP_FILE 0
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 00ad09fc5eb1..57dc2ac4f8bd 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -96,6 +96,7 @@
96#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ 96#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
97 97
98#define MADV_COLD 20 /* deactivate these pages */ 98#define MADV_COLD 20 /* deactivate these pages */
99#define MADV_PAGEOUT 21 /* reclaim these pages */
99 100
100/* compatibility flags */ 101/* compatibility flags */
101#define MAP_FILE 0 102#define MAP_FILE 0
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index eb14e3a7b8f3..6fd8871e4081 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,6 +49,7 @@
49#define MADV_DOFORK 11 /* do inherit across fork */ 49#define MADV_DOFORK 11 /* do inherit across fork */
50 50
51#define MADV_COLD 20 /* deactivate these pages */ 51#define MADV_COLD 20 /* deactivate these pages */
52#define MADV_PAGEOUT 21 /* reclaim these pages */
52 53
53#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ 54#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
54#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ 55#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index f926b00ff11f..e5e643752947 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -104,6 +104,7 @@
104#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ 104#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
105 105
106#define MADV_COLD 20 /* deactivate these pages */ 106#define MADV_COLD 20 /* deactivate these pages */
107#define MADV_PAGEOUT 21 /* reclaim these pages */
107 108
108/* compatibility flags */ 109/* compatibility flags */
109#define MAP_FILE 0 110#define MAP_FILE 0
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0ce997edb8bb..063c0c1e112b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -365,6 +365,7 @@ extern int vm_swappiness;
365extern int remove_mapping(struct address_space *mapping, struct page *page); 365extern int remove_mapping(struct address_space *mapping, struct page *page);
366extern unsigned long vm_total_pages; 366extern unsigned long vm_total_pages;
367 367
368extern unsigned long reclaim_pages(struct list_head *page_list);
368#ifdef CONFIG_NUMA 369#ifdef CONFIG_NUMA
369extern int node_reclaim_mode; 370extern int node_reclaim_mode;
370extern int sysctl_min_unmapped_ratio; 371extern int sysctl_min_unmapped_ratio;
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 23431faf0eb6..c160a5354eb6 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -68,6 +68,7 @@
68#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ 68#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
69 69
70#define MADV_COLD 20 /* deactivate these pages */ 70#define MADV_COLD 20 /* deactivate these pages */
71#define MADV_PAGEOUT 21 /* reclaim these pages */
71 72
72/* compatibility flags */ 73/* compatibility flags */
73#define MAP_FILE 0 74#define MAP_FILE 0
diff --git a/mm/madvise.c b/mm/madvise.c
index e1aee62967c3..54c5639774b6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -44,6 +44,7 @@ static int madvise_need_mmap_write(int behavior)
44 case MADV_WILLNEED: 44 case MADV_WILLNEED:
45 case MADV_DONTNEED: 45 case MADV_DONTNEED:
46 case MADV_COLD: 46 case MADV_COLD:
47 case MADV_PAGEOUT:
47 case MADV_FREE: 48 case MADV_FREE:
48 return 0; 49 return 0;
49 default: 50 default:
@@ -461,6 +462,191 @@ static long madvise_cold(struct vm_area_struct *vma,
461 return 0; 462 return 0;
462} 463}
463 464
465static int madvise_pageout_pte_range(pmd_t *pmd, unsigned long addr,
466 unsigned long end, struct mm_walk *walk)
467{
468 struct mmu_gather *tlb = walk->private;
469 struct mm_struct *mm = tlb->mm;
470 struct vm_area_struct *vma = walk->vma;
471 pte_t *orig_pte, *pte, ptent;
472 spinlock_t *ptl;
473 LIST_HEAD(page_list);
474 struct page *page;
475
476 if (fatal_signal_pending(current))
477 return -EINTR;
478
479#ifdef CONFIG_TRANSPARENT_HUGEPAGE
480 if (pmd_trans_huge(*pmd)) {
481 pmd_t orig_pmd;
482 unsigned long next = pmd_addr_end(addr, end);
483
484 tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
485 ptl = pmd_trans_huge_lock(pmd, vma);
486 if (!ptl)
487 return 0;
488
489 orig_pmd = *pmd;
490 if (is_huge_zero_pmd(orig_pmd))
491 goto huge_unlock;
492
493 if (unlikely(!pmd_present(orig_pmd))) {
494 VM_BUG_ON(thp_migration_supported() &&
495 !is_pmd_migration_entry(orig_pmd));
496 goto huge_unlock;
497 }
498
499 page = pmd_page(orig_pmd);
500 if (next - addr != HPAGE_PMD_SIZE) {
501 int err;
502
503 if (page_mapcount(page) != 1)
504 goto huge_unlock;
505 get_page(page);
506 spin_unlock(ptl);
507 lock_page(page);
508 err = split_huge_page(page);
509 unlock_page(page);
510 put_page(page);
511 if (!err)
512 goto regular_page;
513 return 0;
514 }
515
516 if (pmd_young(orig_pmd)) {
517 pmdp_invalidate(vma, addr, pmd);
518 orig_pmd = pmd_mkold(orig_pmd);
519
520 set_pmd_at(mm, addr, pmd, orig_pmd);
521 tlb_remove_tlb_entry(tlb, pmd, addr);
522 }
523
524 ClearPageReferenced(page);
525 test_and_clear_page_young(page);
526
527 if (!isolate_lru_page(page))
528 list_add(&page->lru, &page_list);
529huge_unlock:
530 spin_unlock(ptl);
531 reclaim_pages(&page_list);
532 return 0;
533 }
534
535 if (pmd_trans_unstable(pmd))
536 return 0;
537regular_page:
538#endif
539 tlb_change_page_size(tlb, PAGE_SIZE);
540 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
541 flush_tlb_batched_pending(mm);
542 arch_enter_lazy_mmu_mode();
543 for (; addr < end; pte++, addr += PAGE_SIZE) {
544 ptent = *pte;
545 if (!pte_present(ptent))
546 continue;
547
548 page = vm_normal_page(vma, addr, ptent);
549 if (!page)
550 continue;
551
552 /*
553 * creating a THP page is expensive so split it only if we
554 * are sure it's worth. Split it if we are only owner.
555 */
556 if (PageTransCompound(page)) {
557 if (page_mapcount(page) != 1)
558 break;
559 get_page(page);
560 if (!trylock_page(page)) {
561 put_page(page);
562 break;
563 }
564 pte_unmap_unlock(orig_pte, ptl);
565 if (split_huge_page(page)) {
566 unlock_page(page);
567 put_page(page);
568 pte_offset_map_lock(mm, pmd, addr, &ptl);
569 break;
570 }
571 unlock_page(page);
572 put_page(page);
573 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
574 pte--;
575 addr -= PAGE_SIZE;
576 continue;
577 }
578
579 VM_BUG_ON_PAGE(PageTransCompound(page), page);
580
581 if (pte_young(ptent)) {
582 ptent = ptep_get_and_clear_full(mm, addr, pte,
583 tlb->fullmm);
584 ptent = pte_mkold(ptent);
585 set_pte_at(mm, addr, pte, ptent);
586 tlb_remove_tlb_entry(tlb, pte, addr);
587 }
588 ClearPageReferenced(page);
589 test_and_clear_page_young(page);
590
591 if (!isolate_lru_page(page))
592 list_add(&page->lru, &page_list);
593 }
594
595 arch_leave_lazy_mmu_mode();
596 pte_unmap_unlock(orig_pte, ptl);
597 reclaim_pages(&page_list);
598 cond_resched();
599
600 return 0;
601}
602
603static void madvise_pageout_page_range(struct mmu_gather *tlb,
604 struct vm_area_struct *vma,
605 unsigned long addr, unsigned long end)
606{
607 tlb_start_vma(tlb, vma);
608 walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL);
609 tlb_end_vma(tlb, vma);
610}
611
612static inline bool can_do_pageout(struct vm_area_struct *vma)
613{
614 if (vma_is_anonymous(vma))
615 return true;
616 if (!vma->vm_file)
617 return false;
618 /*
619 * paging out pagecache only for non-anonymous mappings that correspond
620 * to the files the calling process could (if tried) open for writing;
621 * otherwise we'd be including shared non-exclusive mappings, which
622 * opens a side channel.
623 */
624 return inode_owner_or_capable(file_inode(vma->vm_file)) ||
625 inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
626}
627
628static long madvise_pageout(struct vm_area_struct *vma,
629 struct vm_area_struct **prev,
630 unsigned long start_addr, unsigned long end_addr)
631{
632 struct mm_struct *mm = vma->vm_mm;
633 struct mmu_gather tlb;
634
635 *prev = vma;
636 if (!can_madv_lru_vma(vma))
637 return -EINVAL;
638
639 if (!can_do_pageout(vma))
640 return 0;
641
642 lru_add_drain();
643 tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
644 madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
645 tlb_finish_mmu(&tlb, start_addr, end_addr);
646
647 return 0;
648}
649
464static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, 650static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
465 unsigned long end, struct mm_walk *walk) 651 unsigned long end, struct mm_walk *walk)
466 652
@@ -843,6 +1029,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
843 return madvise_willneed(vma, prev, start, end); 1029 return madvise_willneed(vma, prev, start, end);
844 case MADV_COLD: 1030 case MADV_COLD:
845 return madvise_cold(vma, prev, start, end); 1031 return madvise_cold(vma, prev, start, end);
1032 case MADV_PAGEOUT:
1033 return madvise_pageout(vma, prev, start, end);
846 case MADV_FREE: 1034 case MADV_FREE:
847 case MADV_DONTNEED: 1035 case MADV_DONTNEED:
848 return madvise_dontneed_free(vma, prev, start, end, behavior); 1036 return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -865,6 +1053,7 @@ madvise_behavior_valid(int behavior)
865 case MADV_DONTNEED: 1053 case MADV_DONTNEED:
866 case MADV_FREE: 1054 case MADV_FREE:
867 case MADV_COLD: 1055 case MADV_COLD:
1056 case MADV_PAGEOUT:
868#ifdef CONFIG_KSM 1057#ifdef CONFIG_KSM
869 case MADV_MERGEABLE: 1058 case MADV_MERGEABLE:
870 case MADV_UNMERGEABLE: 1059 case MADV_UNMERGEABLE:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d8bbaf068c35..e5d52d6a24af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan,
2145 nr_deactivate, nr_rotated, sc->priority, file); 2145 nr_deactivate, nr_rotated, sc->priority, file);
2146} 2146}
2147 2147
2148unsigned long reclaim_pages(struct list_head *page_list)
2149{
2150 int nid = -1;
2151 unsigned long nr_reclaimed = 0;
2152 LIST_HEAD(node_page_list);
2153 struct reclaim_stat dummy_stat;
2154 struct page *page;
2155 struct scan_control sc = {
2156 .gfp_mask = GFP_KERNEL,
2157 .priority = DEF_PRIORITY,
2158 .may_writepage = 1,
2159 .may_unmap = 1,
2160 .may_swap = 1,
2161 };
2162
2163 while (!list_empty(page_list)) {
2164 page = lru_to_page(page_list);
2165 if (nid == -1) {
2166 nid = page_to_nid(page);
2167 INIT_LIST_HEAD(&node_page_list);
2168 }
2169
2170 if (nid == page_to_nid(page)) {
2171 ClearPageActive(page);
2172 list_move(&page->lru, &node_page_list);
2173 continue;
2174 }
2175
2176 nr_reclaimed += shrink_page_list(&node_page_list,
2177 NODE_DATA(nid),
2178 &sc, 0,
2179 &dummy_stat, false);
2180 while (!list_empty(&node_page_list)) {
2181 page = lru_to_page(&node_page_list);
2182 list_del(&page->lru);
2183 putback_lru_page(page);
2184 }
2185
2186 nid = -1;
2187 }
2188
2189 if (!list_empty(&node_page_list)) {
2190 nr_reclaimed += shrink_page_list(&node_page_list,
2191 NODE_DATA(nid),
2192 &sc, 0,
2193 &dummy_stat, false);
2194 while (!list_empty(&node_page_list)) {
2195 page = lru_to_page(&node_page_list);
2196 list_del(&page->lru);
2197 putback_lru_page(page);
2198 }
2199 }
2200
2201 return nr_reclaimed;
2202}
2203
2148/* 2204/*
2149 * The inactive anon list should be small enough that the VM never has 2205 * The inactive anon list should be small enough that the VM never has
2150 * to do too much work. 2206 * to do too much work.