diff options
-rw-r--r-- | arch/alpha/include/uapi/asm/mman.h | 1 | ||||
-rw-r--r-- | arch/mips/include/uapi/asm/mman.h | 1 | ||||
-rw-r--r-- | arch/parisc/include/uapi/asm/mman.h | 1 | ||||
-rw-r--r-- | arch/xtensa/include/uapi/asm/mman.h | 1 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/uapi/asm-generic/mman-common.h | 1 | ||||
-rw-r--r-- | mm/madvise.c | 189 | ||||
-rw-r--r-- | mm/vmscan.c | 56 |
8 files changed, 251 insertions, 0 deletions
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index f3258fbf03d0..a18ec7f63888 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h | |||
@@ -69,6 +69,7 @@ | |||
69 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ | 69 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ |
70 | 70 | ||
71 | #define MADV_COLD 20 /* deactivate these pages */ | 71 | #define MADV_COLD 20 /* deactivate these pages */ |
72 | #define MADV_PAGEOUT 21 /* reclaim these pages */ | ||
72 | 73 | ||
73 | /* compatibility flags */ | 74 | /* compatibility flags */ |
74 | #define MAP_FILE 0 | 75 | #define MAP_FILE 0 |
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 00ad09fc5eb1..57dc2ac4f8bd 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h | |||
@@ -96,6 +96,7 @@ | |||
96 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ | 96 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ |
97 | 97 | ||
98 | #define MADV_COLD 20 /* deactivate these pages */ | 98 | #define MADV_COLD 20 /* deactivate these pages */ |
99 | #define MADV_PAGEOUT 21 /* reclaim these pages */ | ||
99 | 100 | ||
100 | /* compatibility flags */ | 101 | /* compatibility flags */ |
101 | #define MAP_FILE 0 | 102 | #define MAP_FILE 0 |
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index eb14e3a7b8f3..6fd8871e4081 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h | |||
@@ -49,6 +49,7 @@ | |||
49 | #define MADV_DOFORK 11 /* do inherit across fork */ | 49 | #define MADV_DOFORK 11 /* do inherit across fork */ |
50 | 50 | ||
51 | #define MADV_COLD 20 /* deactivate these pages */ | 51 | #define MADV_COLD 20 /* deactivate these pages */ |
52 | #define MADV_PAGEOUT 21 /* reclaim these pages */ | ||
52 | 53 | ||
53 | #define MADV_MERGEABLE 65 /* KSM may merge identical pages */ | 54 | #define MADV_MERGEABLE 65 /* KSM may merge identical pages */ |
54 | #define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ | 55 | #define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ |
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index f926b00ff11f..e5e643752947 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h | |||
@@ -104,6 +104,7 @@ | |||
104 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ | 104 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ |
105 | 105 | ||
106 | #define MADV_COLD 20 /* deactivate these pages */ | 106 | #define MADV_COLD 20 /* deactivate these pages */ |
107 | #define MADV_PAGEOUT 21 /* reclaim these pages */ | ||
107 | 108 | ||
108 | /* compatibility flags */ | 109 | /* compatibility flags */ |
109 | #define MAP_FILE 0 | 110 | #define MAP_FILE 0 |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 0ce997edb8bb..063c0c1e112b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -365,6 +365,7 @@ extern int vm_swappiness; | |||
365 | extern int remove_mapping(struct address_space *mapping, struct page *page); | 365 | extern int remove_mapping(struct address_space *mapping, struct page *page); |
366 | extern unsigned long vm_total_pages; | 366 | extern unsigned long vm_total_pages; |
367 | 367 | ||
368 | extern unsigned long reclaim_pages(struct list_head *page_list); | ||
368 | #ifdef CONFIG_NUMA | 369 | #ifdef CONFIG_NUMA |
369 | extern int node_reclaim_mode; | 370 | extern int node_reclaim_mode; |
370 | extern int sysctl_min_unmapped_ratio; | 371 | extern int sysctl_min_unmapped_ratio; |
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 23431faf0eb6..c160a5354eb6 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h | |||
@@ -68,6 +68,7 @@ | |||
68 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ | 68 | #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ |
69 | 69 | ||
70 | #define MADV_COLD 20 /* deactivate these pages */ | 70 | #define MADV_COLD 20 /* deactivate these pages */ |
71 | #define MADV_PAGEOUT 21 /* reclaim these pages */ | ||
71 | 72 | ||
72 | /* compatibility flags */ | 73 | /* compatibility flags */ |
73 | #define MAP_FILE 0 | 74 | #define MAP_FILE 0 |
diff --git a/mm/madvise.c b/mm/madvise.c index e1aee62967c3..54c5639774b6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -44,6 +44,7 @@ static int madvise_need_mmap_write(int behavior) | |||
44 | case MADV_WILLNEED: | 44 | case MADV_WILLNEED: |
45 | case MADV_DONTNEED: | 45 | case MADV_DONTNEED: |
46 | case MADV_COLD: | 46 | case MADV_COLD: |
47 | case MADV_PAGEOUT: | ||
47 | case MADV_FREE: | 48 | case MADV_FREE: |
48 | return 0; | 49 | return 0; |
49 | default: | 50 | default: |
@@ -461,6 +462,191 @@ static long madvise_cold(struct vm_area_struct *vma, | |||
461 | return 0; | 462 | return 0; |
462 | } | 463 | } |
463 | 464 | ||
465 | static int madvise_pageout_pte_range(pmd_t *pmd, unsigned long addr, | ||
466 | unsigned long end, struct mm_walk *walk) | ||
467 | { | ||
468 | struct mmu_gather *tlb = walk->private; | ||
469 | struct mm_struct *mm = tlb->mm; | ||
470 | struct vm_area_struct *vma = walk->vma; | ||
471 | pte_t *orig_pte, *pte, ptent; | ||
472 | spinlock_t *ptl; | ||
473 | LIST_HEAD(page_list); | ||
474 | struct page *page; | ||
475 | |||
476 | if (fatal_signal_pending(current)) | ||
477 | return -EINTR; | ||
478 | |||
479 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
480 | if (pmd_trans_huge(*pmd)) { | ||
481 | pmd_t orig_pmd; | ||
482 | unsigned long next = pmd_addr_end(addr, end); | ||
483 | |||
484 | tlb_change_page_size(tlb, HPAGE_PMD_SIZE); | ||
485 | ptl = pmd_trans_huge_lock(pmd, vma); | ||
486 | if (!ptl) | ||
487 | return 0; | ||
488 | |||
489 | orig_pmd = *pmd; | ||
490 | if (is_huge_zero_pmd(orig_pmd)) | ||
491 | goto huge_unlock; | ||
492 | |||
493 | if (unlikely(!pmd_present(orig_pmd))) { | ||
494 | VM_BUG_ON(thp_migration_supported() && | ||
495 | !is_pmd_migration_entry(orig_pmd)); | ||
496 | goto huge_unlock; | ||
497 | } | ||
498 | |||
499 | page = pmd_page(orig_pmd); | ||
500 | if (next - addr != HPAGE_PMD_SIZE) { | ||
501 | int err; | ||
502 | |||
503 | if (page_mapcount(page) != 1) | ||
504 | goto huge_unlock; | ||
505 | get_page(page); | ||
506 | spin_unlock(ptl); | ||
507 | lock_page(page); | ||
508 | err = split_huge_page(page); | ||
509 | unlock_page(page); | ||
510 | put_page(page); | ||
511 | if (!err) | ||
512 | goto regular_page; | ||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | if (pmd_young(orig_pmd)) { | ||
517 | pmdp_invalidate(vma, addr, pmd); | ||
518 | orig_pmd = pmd_mkold(orig_pmd); | ||
519 | |||
520 | set_pmd_at(mm, addr, pmd, orig_pmd); | ||
521 | tlb_remove_tlb_entry(tlb, pmd, addr); | ||
522 | } | ||
523 | |||
524 | ClearPageReferenced(page); | ||
525 | test_and_clear_page_young(page); | ||
526 | |||
527 | if (!isolate_lru_page(page)) | ||
528 | list_add(&page->lru, &page_list); | ||
529 | huge_unlock: | ||
530 | spin_unlock(ptl); | ||
531 | reclaim_pages(&page_list); | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | if (pmd_trans_unstable(pmd)) | ||
536 | return 0; | ||
537 | regular_page: | ||
538 | #endif | ||
539 | tlb_change_page_size(tlb, PAGE_SIZE); | ||
540 | orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
541 | flush_tlb_batched_pending(mm); | ||
542 | arch_enter_lazy_mmu_mode(); | ||
543 | for (; addr < end; pte++, addr += PAGE_SIZE) { | ||
544 | ptent = *pte; | ||
545 | if (!pte_present(ptent)) | ||
546 | continue; | ||
547 | |||
548 | page = vm_normal_page(vma, addr, ptent); | ||
549 | if (!page) | ||
550 | continue; | ||
551 | |||
552 | /* | ||
553 | * creating a THP page is expensive so split it only if we | ||
554 | * are sure it's worth. Split it if we are only owner. | ||
555 | */ | ||
556 | if (PageTransCompound(page)) { | ||
557 | if (page_mapcount(page) != 1) | ||
558 | break; | ||
559 | get_page(page); | ||
560 | if (!trylock_page(page)) { | ||
561 | put_page(page); | ||
562 | break; | ||
563 | } | ||
564 | pte_unmap_unlock(orig_pte, ptl); | ||
565 | if (split_huge_page(page)) { | ||
566 | unlock_page(page); | ||
567 | put_page(page); | ||
568 | pte_offset_map_lock(mm, pmd, addr, &ptl); | ||
569 | break; | ||
570 | } | ||
571 | unlock_page(page); | ||
572 | put_page(page); | ||
573 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | ||
574 | pte--; | ||
575 | addr -= PAGE_SIZE; | ||
576 | continue; | ||
577 | } | ||
578 | |||
579 | VM_BUG_ON_PAGE(PageTransCompound(page), page); | ||
580 | |||
581 | if (pte_young(ptent)) { | ||
582 | ptent = ptep_get_and_clear_full(mm, addr, pte, | ||
583 | tlb->fullmm); | ||
584 | ptent = pte_mkold(ptent); | ||
585 | set_pte_at(mm, addr, pte, ptent); | ||
586 | tlb_remove_tlb_entry(tlb, pte, addr); | ||
587 | } | ||
588 | ClearPageReferenced(page); | ||
589 | test_and_clear_page_young(page); | ||
590 | |||
591 | if (!isolate_lru_page(page)) | ||
592 | list_add(&page->lru, &page_list); | ||
593 | } | ||
594 | |||
595 | arch_leave_lazy_mmu_mode(); | ||
596 | pte_unmap_unlock(orig_pte, ptl); | ||
597 | reclaim_pages(&page_list); | ||
598 | cond_resched(); | ||
599 | |||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static void madvise_pageout_page_range(struct mmu_gather *tlb, | ||
604 | struct vm_area_struct *vma, | ||
605 | unsigned long addr, unsigned long end) | ||
606 | { | ||
607 | tlb_start_vma(tlb, vma); | ||
608 | walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL); | ||
609 | tlb_end_vma(tlb, vma); | ||
610 | } | ||
611 | |||
612 | static inline bool can_do_pageout(struct vm_area_struct *vma) | ||
613 | { | ||
614 | if (vma_is_anonymous(vma)) | ||
615 | return true; | ||
616 | if (!vma->vm_file) | ||
617 | return false; | ||
618 | /* | ||
619 | * paging out pagecache only for non-anonymous mappings that correspond | ||
620 | * to the files the calling process could (if tried) open for writing; | ||
621 | * otherwise we'd be including shared non-exclusive mappings, which | ||
622 | * opens a side channel. | ||
623 | */ | ||
624 | return inode_owner_or_capable(file_inode(vma->vm_file)) || | ||
625 | inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; | ||
626 | } | ||
627 | |||
628 | static long madvise_pageout(struct vm_area_struct *vma, | ||
629 | struct vm_area_struct **prev, | ||
630 | unsigned long start_addr, unsigned long end_addr) | ||
631 | { | ||
632 | struct mm_struct *mm = vma->vm_mm; | ||
633 | struct mmu_gather tlb; | ||
634 | |||
635 | *prev = vma; | ||
636 | if (!can_madv_lru_vma(vma)) | ||
637 | return -EINVAL; | ||
638 | |||
639 | if (!can_do_pageout(vma)) | ||
640 | return 0; | ||
641 | |||
642 | lru_add_drain(); | ||
643 | tlb_gather_mmu(&tlb, mm, start_addr, end_addr); | ||
644 | madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); | ||
645 | tlb_finish_mmu(&tlb, start_addr, end_addr); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
464 | static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, | 650 | static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, |
465 | unsigned long end, struct mm_walk *walk) | 651 | unsigned long end, struct mm_walk *walk) |
466 | 652 | ||
@@ -843,6 +1029,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
843 | return madvise_willneed(vma, prev, start, end); | 1029 | return madvise_willneed(vma, prev, start, end); |
844 | case MADV_COLD: | 1030 | case MADV_COLD: |
845 | return madvise_cold(vma, prev, start, end); | 1031 | return madvise_cold(vma, prev, start, end); |
1032 | case MADV_PAGEOUT: | ||
1033 | return madvise_pageout(vma, prev, start, end); | ||
846 | case MADV_FREE: | 1034 | case MADV_FREE: |
847 | case MADV_DONTNEED: | 1035 | case MADV_DONTNEED: |
848 | return madvise_dontneed_free(vma, prev, start, end, behavior); | 1036 | return madvise_dontneed_free(vma, prev, start, end, behavior); |
@@ -865,6 +1053,7 @@ madvise_behavior_valid(int behavior) | |||
865 | case MADV_DONTNEED: | 1053 | case MADV_DONTNEED: |
866 | case MADV_FREE: | 1054 | case MADV_FREE: |
867 | case MADV_COLD: | 1055 | case MADV_COLD: |
1056 | case MADV_PAGEOUT: | ||
868 | #ifdef CONFIG_KSM | 1057 | #ifdef CONFIG_KSM |
869 | case MADV_MERGEABLE: | 1058 | case MADV_MERGEABLE: |
870 | case MADV_UNMERGEABLE: | 1059 | case MADV_UNMERGEABLE: |
diff --git a/mm/vmscan.c b/mm/vmscan.c index d8bbaf068c35..e5d52d6a24af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
2145 | nr_deactivate, nr_rotated, sc->priority, file); | 2145 | nr_deactivate, nr_rotated, sc->priority, file); |
2146 | } | 2146 | } |
2147 | 2147 | ||
2148 | unsigned long reclaim_pages(struct list_head *page_list) | ||
2149 | { | ||
2150 | int nid = -1; | ||
2151 | unsigned long nr_reclaimed = 0; | ||
2152 | LIST_HEAD(node_page_list); | ||
2153 | struct reclaim_stat dummy_stat; | ||
2154 | struct page *page; | ||
2155 | struct scan_control sc = { | ||
2156 | .gfp_mask = GFP_KERNEL, | ||
2157 | .priority = DEF_PRIORITY, | ||
2158 | .may_writepage = 1, | ||
2159 | .may_unmap = 1, | ||
2160 | .may_swap = 1, | ||
2161 | }; | ||
2162 | |||
2163 | while (!list_empty(page_list)) { | ||
2164 | page = lru_to_page(page_list); | ||
2165 | if (nid == -1) { | ||
2166 | nid = page_to_nid(page); | ||
2167 | INIT_LIST_HEAD(&node_page_list); | ||
2168 | } | ||
2169 | |||
2170 | if (nid == page_to_nid(page)) { | ||
2171 | ClearPageActive(page); | ||
2172 | list_move(&page->lru, &node_page_list); | ||
2173 | continue; | ||
2174 | } | ||
2175 | |||
2176 | nr_reclaimed += shrink_page_list(&node_page_list, | ||
2177 | NODE_DATA(nid), | ||
2178 | &sc, 0, | ||
2179 | &dummy_stat, false); | ||
2180 | while (!list_empty(&node_page_list)) { | ||
2181 | page = lru_to_page(&node_page_list); | ||
2182 | list_del(&page->lru); | ||
2183 | putback_lru_page(page); | ||
2184 | } | ||
2185 | |||
2186 | nid = -1; | ||
2187 | } | ||
2188 | |||
2189 | if (!list_empty(&node_page_list)) { | ||
2190 | nr_reclaimed += shrink_page_list(&node_page_list, | ||
2191 | NODE_DATA(nid), | ||
2192 | &sc, 0, | ||
2193 | &dummy_stat, false); | ||
2194 | while (!list_empty(&node_page_list)) { | ||
2195 | page = lru_to_page(&node_page_list); | ||
2196 | list_del(&page->lru); | ||
2197 | putback_lru_page(page); | ||
2198 | } | ||
2199 | } | ||
2200 | |||
2201 | return nr_reclaimed; | ||
2202 | } | ||
2203 | |||
2148 | /* | 2204 | /* |
2149 | * The inactive anon list should be small enough that the VM never has | 2205 | * The inactive anon list should be small enough that the VM never has |
2150 | * to do too much work. | 2206 | * to do too much work. |