diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/pgtable.h | 110 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 16 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 8 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 8 | ||||
-rw-r--r-- | include/linux/migrate.h | 46 | ||||
-rw-r--r-- | include/linux/mm.h | 39 | ||||
-rw-r--r-- | include/linux/mm_types.h | 31 | ||||
-rw-r--r-- | include/linux/mmzone.h | 13 | ||||
-rw-r--r-- | include/linux/rmap.h | 33 | ||||
-rw-r--r-- | include/linux/sched.h | 27 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 12 | ||||
-rw-r--r-- | include/linux/vmstat.h | 8 | ||||
-rw-r--r-- | include/trace/events/migrate.h | 51 | ||||
-rw-r--r-- | include/uapi/linux/mempolicy.h | 15 |
14 files changed, 396 insertions, 21 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 284e80831d2c..701beab27aab 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -219,6 +219,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | |||
219 | #define move_pte(pte, prot, old_addr, new_addr) (pte) | 219 | #define move_pte(pte, prot, old_addr, new_addr) (pte) |
220 | #endif | 220 | #endif |
221 | 221 | ||
222 | #ifndef pte_accessible | ||
223 | # define pte_accessible(pte) ((void)(pte),1) | ||
224 | #endif | ||
225 | |||
222 | #ifndef flush_tlb_fix_spurious_fault | 226 | #ifndef flush_tlb_fix_spurious_fault |
223 | #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) | 227 | #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) |
224 | #endif | 228 | #endif |
@@ -580,6 +584,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd) | |||
580 | #endif | 584 | #endif |
581 | } | 585 | } |
582 | 586 | ||
587 | #ifdef CONFIG_NUMA_BALANCING | ||
588 | #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE | ||
589 | /* | ||
590 | * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the | ||
591 | * same bit too). It's set only when _PAGE_PRESET is not set and it's | ||
592 | * never set if _PAGE_PRESENT is set. | ||
593 | * | ||
594 | * pte/pmd_present() returns true if pte/pmd_numa returns true. Page | ||
595 | * fault triggers on those regions if pte/pmd_numa returns true | ||
596 | * (because _PAGE_PRESENT is not set). | ||
597 | */ | ||
598 | #ifndef pte_numa | ||
599 | static inline int pte_numa(pte_t pte) | ||
600 | { | ||
601 | return (pte_flags(pte) & | ||
602 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | ||
603 | } | ||
604 | #endif | ||
605 | |||
606 | #ifndef pmd_numa | ||
607 | static inline int pmd_numa(pmd_t pmd) | ||
608 | { | ||
609 | return (pmd_flags(pmd) & | ||
610 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | ||
611 | } | ||
612 | #endif | ||
613 | |||
614 | /* | ||
615 | * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically | ||
616 | * because they're called by the NUMA hinting minor page fault. If we | ||
617 | * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler | ||
618 | * would be forced to set it later while filling the TLB after we | ||
619 | * return to userland. That would trigger a second write to memory | ||
620 | * that we optimize away by setting _PAGE_ACCESSED here. | ||
621 | */ | ||
622 | #ifndef pte_mknonnuma | ||
623 | static inline pte_t pte_mknonnuma(pte_t pte) | ||
624 | { | ||
625 | pte = pte_clear_flags(pte, _PAGE_NUMA); | ||
626 | return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); | ||
627 | } | ||
628 | #endif | ||
629 | |||
630 | #ifndef pmd_mknonnuma | ||
631 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | ||
632 | { | ||
633 | pmd = pmd_clear_flags(pmd, _PAGE_NUMA); | ||
634 | return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); | ||
635 | } | ||
636 | #endif | ||
637 | |||
638 | #ifndef pte_mknuma | ||
639 | static inline pte_t pte_mknuma(pte_t pte) | ||
640 | { | ||
641 | pte = pte_set_flags(pte, _PAGE_NUMA); | ||
642 | return pte_clear_flags(pte, _PAGE_PRESENT); | ||
643 | } | ||
644 | #endif | ||
645 | |||
646 | #ifndef pmd_mknuma | ||
647 | static inline pmd_t pmd_mknuma(pmd_t pmd) | ||
648 | { | ||
649 | pmd = pmd_set_flags(pmd, _PAGE_NUMA); | ||
650 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | ||
651 | } | ||
652 | #endif | ||
653 | #else | ||
654 | extern int pte_numa(pte_t pte); | ||
655 | extern int pmd_numa(pmd_t pmd); | ||
656 | extern pte_t pte_mknonnuma(pte_t pte); | ||
657 | extern pmd_t pmd_mknonnuma(pmd_t pmd); | ||
658 | extern pte_t pte_mknuma(pte_t pte); | ||
659 | extern pmd_t pmd_mknuma(pmd_t pmd); | ||
660 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ | ||
661 | #else | ||
662 | static inline int pmd_numa(pmd_t pmd) | ||
663 | { | ||
664 | return 0; | ||
665 | } | ||
666 | |||
667 | static inline int pte_numa(pte_t pte) | ||
668 | { | ||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | static inline pte_t pte_mknonnuma(pte_t pte) | ||
673 | { | ||
674 | return pte; | ||
675 | } | ||
676 | |||
677 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | ||
678 | { | ||
679 | return pmd; | ||
680 | } | ||
681 | |||
682 | static inline pte_t pte_mknuma(pte_t pte) | ||
683 | { | ||
684 | return pte; | ||
685 | } | ||
686 | |||
687 | static inline pmd_t pmd_mknuma(pmd_t pmd) | ||
688 | { | ||
689 | return pmd; | ||
690 | } | ||
691 | #endif /* CONFIG_NUMA_BALANCING */ | ||
692 | |||
583 | #endif /* CONFIG_MMU */ | 693 | #endif /* CONFIG_MMU */ |
584 | 694 | ||
585 | #endif /* !__ASSEMBLY__ */ | 695 | #endif /* !__ASSEMBLY__ */ |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 092dc5305a32..1d76f8ca90f0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -31,7 +31,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma, | |||
31 | unsigned long new_addr, unsigned long old_end, | 31 | unsigned long new_addr, unsigned long old_end, |
32 | pmd_t *old_pmd, pmd_t *new_pmd); | 32 | pmd_t *old_pmd, pmd_t *new_pmd); |
33 | extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 33 | extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
34 | unsigned long addr, pgprot_t newprot); | 34 | unsigned long addr, pgprot_t newprot, |
35 | int prot_numa); | ||
35 | 36 | ||
36 | enum transparent_hugepage_flag { | 37 | enum transparent_hugepage_flag { |
37 | TRANSPARENT_HUGEPAGE_FLAG, | 38 | TRANSPARENT_HUGEPAGE_FLAG, |
@@ -111,7 +112,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, | |||
111 | #define wait_split_huge_page(__anon_vma, __pmd) \ | 112 | #define wait_split_huge_page(__anon_vma, __pmd) \ |
112 | do { \ | 113 | do { \ |
113 | pmd_t *____pmd = (__pmd); \ | 114 | pmd_t *____pmd = (__pmd); \ |
114 | anon_vma_lock(__anon_vma); \ | 115 | anon_vma_lock_write(__anon_vma); \ |
115 | anon_vma_unlock(__anon_vma); \ | 116 | anon_vma_unlock(__anon_vma); \ |
116 | BUG_ON(pmd_trans_splitting(*____pmd) || \ | 117 | BUG_ON(pmd_trans_splitting(*____pmd) || \ |
117 | pmd_trans_huge(*____pmd)); \ | 118 | pmd_trans_huge(*____pmd)); \ |
@@ -171,6 +172,10 @@ static inline struct page *compound_trans_head(struct page *page) | |||
171 | } | 172 | } |
172 | return page; | 173 | return page; |
173 | } | 174 | } |
175 | |||
176 | extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
177 | unsigned long addr, pmd_t pmd, pmd_t *pmdp); | ||
178 | |||
174 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ | 179 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
175 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) | 180 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) |
176 | #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) | 181 | #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) |
@@ -209,6 +214,13 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, | |||
209 | { | 214 | { |
210 | return 0; | 215 | return 0; |
211 | } | 216 | } |
217 | |||
218 | static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
219 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) | ||
220 | { | ||
221 | return 0; | ||
222 | } | ||
223 | |||
212 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 224 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
213 | 225 | ||
214 | #endif /* _LINUX_HUGE_MM_H */ | 226 | #endif /* _LINUX_HUGE_MM_H */ |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3e7fa1acf09c..0c80d3f57a5b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, | |||
87 | pud_t *pud, int write); | 87 | pud_t *pud, int write); |
88 | int pmd_huge(pmd_t pmd); | 88 | int pmd_huge(pmd_t pmd); |
89 | int pud_huge(pud_t pmd); | 89 | int pud_huge(pud_t pmd); |
90 | void hugetlb_change_protection(struct vm_area_struct *vma, | 90 | unsigned long hugetlb_change_protection(struct vm_area_struct *vma, |
91 | unsigned long address, unsigned long end, pgprot_t newprot); | 91 | unsigned long address, unsigned long end, pgprot_t newprot); |
92 | 92 | ||
93 | #else /* !CONFIG_HUGETLB_PAGE */ | 93 | #else /* !CONFIG_HUGETLB_PAGE */ |
@@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src) | |||
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||
135 | #define hugetlb_change_protection(vma, address, end, newprot) | 135 | static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, |
136 | unsigned long address, unsigned long end, pgprot_t newprot) | ||
137 | { | ||
138 | return 0; | ||
139 | } | ||
136 | 140 | ||
137 | static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, | 141 | static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, |
138 | struct vm_area_struct *vma, unsigned long start, | 142 | struct vm_area_struct *vma, unsigned long start, |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index dbd212723b74..9adc270de7ef 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -188,6 +188,8 @@ static inline int vma_migratable(struct vm_area_struct *vma) | |||
188 | return 1; | 188 | return 1; |
189 | } | 189 | } |
190 | 190 | ||
191 | extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); | ||
192 | |||
191 | #else | 193 | #else |
192 | 194 | ||
193 | struct mempolicy {}; | 195 | struct mempolicy {}; |
@@ -307,5 +309,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, | |||
307 | return 0; | 309 | return 0; |
308 | } | 310 | } |
309 | 311 | ||
312 | static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, | ||
313 | unsigned long address) | ||
314 | { | ||
315 | return -1; /* no node preference */ | ||
316 | } | ||
317 | |||
310 | #endif /* CONFIG_NUMA */ | 318 | #endif /* CONFIG_NUMA */ |
311 | #endif | 319 | #endif |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 0b5865c61efd..1e9f627967a3 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -23,6 +23,15 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); | |||
23 | #define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page | 23 | #define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page |
24 | * sucessful migration case. | 24 | * sucessful migration case. |
25 | */ | 25 | */ |
26 | enum migrate_reason { | ||
27 | MR_COMPACTION, | ||
28 | MR_MEMORY_FAILURE, | ||
29 | MR_MEMORY_HOTPLUG, | ||
30 | MR_SYSCALL, /* also applies to cpusets */ | ||
31 | MR_MEMPOLICY_MBIND, | ||
32 | MR_NUMA_MISPLACED, | ||
33 | MR_CMA | ||
34 | }; | ||
26 | 35 | ||
27 | #ifdef CONFIG_MIGRATION | 36 | #ifdef CONFIG_MIGRATION |
28 | 37 | ||
@@ -32,7 +41,7 @@ extern int migrate_page(struct address_space *, | |||
32 | struct page *, struct page *, enum migrate_mode); | 41 | struct page *, struct page *, enum migrate_mode); |
33 | extern int migrate_pages(struct list_head *l, new_page_t x, | 42 | extern int migrate_pages(struct list_head *l, new_page_t x, |
34 | unsigned long private, bool offlining, | 43 | unsigned long private, bool offlining, |
35 | enum migrate_mode mode); | 44 | enum migrate_mode mode, int reason); |
36 | extern int migrate_huge_page(struct page *, new_page_t x, | 45 | extern int migrate_huge_page(struct page *, new_page_t x, |
37 | unsigned long private, bool offlining, | 46 | unsigned long private, bool offlining, |
38 | enum migrate_mode mode); | 47 | enum migrate_mode mode); |
@@ -54,7 +63,7 @@ static inline void putback_lru_pages(struct list_head *l) {} | |||
54 | static inline void putback_movable_pages(struct list_head *l) {} | 63 | static inline void putback_movable_pages(struct list_head *l) {} |
55 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 64 | static inline int migrate_pages(struct list_head *l, new_page_t x, |
56 | unsigned long private, bool offlining, | 65 | unsigned long private, bool offlining, |
57 | enum migrate_mode mode) { return -ENOSYS; } | 66 | enum migrate_mode mode, int reason) { return -ENOSYS; } |
58 | static inline int migrate_huge_page(struct page *page, new_page_t x, | 67 | static inline int migrate_huge_page(struct page *page, new_page_t x, |
59 | unsigned long private, bool offlining, | 68 | unsigned long private, bool offlining, |
60 | enum migrate_mode mode) { return -ENOSYS; } | 69 | enum migrate_mode mode) { return -ENOSYS; } |
@@ -83,4 +92,37 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
83 | #define fail_migrate_page NULL | 92 | #define fail_migrate_page NULL |
84 | 93 | ||
85 | #endif /* CONFIG_MIGRATION */ | 94 | #endif /* CONFIG_MIGRATION */ |
95 | |||
96 | #ifdef CONFIG_NUMA_BALANCING | ||
97 | extern int migrate_misplaced_page(struct page *page, int node); | ||
98 | extern int migrate_misplaced_page(struct page *page, int node); | ||
99 | extern bool migrate_ratelimited(int node); | ||
100 | #else | ||
101 | static inline int migrate_misplaced_page(struct page *page, int node) | ||
102 | { | ||
103 | return -EAGAIN; /* can't migrate now */ | ||
104 | } | ||
105 | static inline bool migrate_ratelimited(int node) | ||
106 | { | ||
107 | return false; | ||
108 | } | ||
109 | #endif /* CONFIG_NUMA_BALANCING */ | ||
110 | |||
111 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
112 | extern int migrate_misplaced_transhuge_page(struct mm_struct *mm, | ||
113 | struct vm_area_struct *vma, | ||
114 | pmd_t *pmd, pmd_t entry, | ||
115 | unsigned long address, | ||
116 | struct page *page, int node); | ||
117 | #else | ||
118 | static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, | ||
119 | struct vm_area_struct *vma, | ||
120 | pmd_t *pmd, pmd_t entry, | ||
121 | unsigned long address, | ||
122 | struct page *page, int node) | ||
123 | { | ||
124 | return -EAGAIN; | ||
125 | } | ||
126 | #endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ | ||
127 | |||
86 | #endif /* _LINUX_MIGRATE_H */ | 128 | #endif /* _LINUX_MIGRATE_H */ |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4af4f0b1be4c..7f4f906190bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page) | |||
693 | } | 693 | } |
694 | #endif | 694 | #endif |
695 | 695 | ||
696 | #ifdef CONFIG_NUMA_BALANCING | ||
697 | static inline int page_xchg_last_nid(struct page *page, int nid) | ||
698 | { | ||
699 | return xchg(&page->_last_nid, nid); | ||
700 | } | ||
701 | |||
702 | static inline int page_last_nid(struct page *page) | ||
703 | { | ||
704 | return page->_last_nid; | ||
705 | } | ||
706 | static inline void reset_page_last_nid(struct page *page) | ||
707 | { | ||
708 | page->_last_nid = -1; | ||
709 | } | ||
710 | #else | ||
711 | static inline int page_xchg_last_nid(struct page *page, int nid) | ||
712 | { | ||
713 | return page_to_nid(page); | ||
714 | } | ||
715 | |||
716 | static inline int page_last_nid(struct page *page) | ||
717 | { | ||
718 | return page_to_nid(page); | ||
719 | } | ||
720 | |||
721 | static inline void reset_page_last_nid(struct page *page) | ||
722 | { | ||
723 | } | ||
724 | #endif | ||
725 | |||
696 | static inline struct zone *page_zone(const struct page *page) | 726 | static inline struct zone *page_zone(const struct page *page) |
697 | { | 727 | { |
698 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; | 728 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; |
@@ -1078,6 +1108,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, | |||
1078 | extern unsigned long do_mremap(unsigned long addr, | 1108 | extern unsigned long do_mremap(unsigned long addr, |
1079 | unsigned long old_len, unsigned long new_len, | 1109 | unsigned long old_len, unsigned long new_len, |
1080 | unsigned long flags, unsigned long new_addr); | 1110 | unsigned long flags, unsigned long new_addr); |
1111 | extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, | ||
1112 | unsigned long end, pgprot_t newprot, | ||
1113 | int dirty_accountable, int prot_numa); | ||
1081 | extern int mprotect_fixup(struct vm_area_struct *vma, | 1114 | extern int mprotect_fixup(struct vm_area_struct *vma, |
1082 | struct vm_area_struct **pprev, unsigned long start, | 1115 | struct vm_area_struct **pprev, unsigned long start, |
1083 | unsigned long end, unsigned long newflags); | 1116 | unsigned long end, unsigned long newflags); |
@@ -1579,6 +1612,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) | |||
1579 | } | 1612 | } |
1580 | #endif | 1613 | #endif |
1581 | 1614 | ||
1615 | #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE | ||
1616 | unsigned long change_prot_numa(struct vm_area_struct *vma, | ||
1617 | unsigned long start, unsigned long end); | ||
1618 | #endif | ||
1619 | |||
1582 | struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); | 1620 | struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); |
1583 | int remap_pfn_range(struct vm_area_struct *, unsigned long addr, | 1621 | int remap_pfn_range(struct vm_area_struct *, unsigned long addr, |
1584 | unsigned long pfn, unsigned long size, pgprot_t); | 1622 | unsigned long pfn, unsigned long size, pgprot_t); |
@@ -1600,6 +1638,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, | |||
1600 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ | 1638 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ |
1601 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ | 1639 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ |
1602 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | 1640 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ |
1641 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | ||
1603 | 1642 | ||
1604 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | 1643 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, |
1605 | void *data); | 1644 | void *data); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7ade2731b5d6..7d9ebb7cc982 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -175,6 +175,10 @@ struct page { | |||
175 | */ | 175 | */ |
176 | void *shadow; | 176 | void *shadow; |
177 | #endif | 177 | #endif |
178 | |||
179 | #ifdef CONFIG_NUMA_BALANCING | ||
180 | int _last_nid; | ||
181 | #endif | ||
178 | } | 182 | } |
179 | /* | 183 | /* |
180 | * The struct page can be forced to be double word aligned so that atomic ops | 184 | * The struct page can be forced to be double word aligned so that atomic ops |
@@ -411,9 +415,36 @@ struct mm_struct { | |||
411 | #ifdef CONFIG_CPUMASK_OFFSTACK | 415 | #ifdef CONFIG_CPUMASK_OFFSTACK |
412 | struct cpumask cpumask_allocation; | 416 | struct cpumask cpumask_allocation; |
413 | #endif | 417 | #endif |
418 | #ifdef CONFIG_NUMA_BALANCING | ||
419 | /* | ||
420 | * numa_next_scan is the next time when the PTEs will me marked | ||
421 | * pte_numa to gather statistics and migrate pages to new nodes | ||
422 | * if necessary | ||
423 | */ | ||
424 | unsigned long numa_next_scan; | ||
425 | |||
426 | /* numa_next_reset is when the PTE scanner period will be reset */ | ||
427 | unsigned long numa_next_reset; | ||
428 | |||
429 | /* Restart point for scanning and setting pte_numa */ | ||
430 | unsigned long numa_scan_offset; | ||
431 | |||
432 | /* numa_scan_seq prevents two threads setting pte_numa */ | ||
433 | int numa_scan_seq; | ||
434 | |||
435 | /* | ||
436 | * The first node a task was scheduled on. If a task runs on | ||
437 | * a different node than Make PTE Scan Go Now. | ||
438 | */ | ||
439 | int first_nid; | ||
440 | #endif | ||
414 | struct uprobes_state uprobes_state; | 441 | struct uprobes_state uprobes_state; |
415 | }; | 442 | }; |
416 | 443 | ||
444 | /* first nid will either be a valid NID or one of these values */ | ||
445 | #define NUMA_PTE_SCAN_INIT -1 | ||
446 | #define NUMA_PTE_SCAN_ACTIVE -2 | ||
447 | |||
417 | static inline void mm_init_cpumask(struct mm_struct *mm) | 448 | static inline void mm_init_cpumask(struct mm_struct *mm) |
418 | { | 449 | { |
419 | #ifdef CONFIG_CPUMASK_OFFSTACK | 450 | #ifdef CONFIG_CPUMASK_OFFSTACK |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cd55dad56aac..4bec5be82cab 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -735,6 +735,19 @@ typedef struct pglist_data { | |||
735 | struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ | 735 | struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ |
736 | int kswapd_max_order; | 736 | int kswapd_max_order; |
737 | enum zone_type classzone_idx; | 737 | enum zone_type classzone_idx; |
738 | #ifdef CONFIG_NUMA_BALANCING | ||
739 | /* | ||
740 | * Lock serializing the per destination node AutoNUMA memory | ||
741 | * migration rate limiting data. | ||
742 | */ | ||
743 | spinlock_t numabalancing_migrate_lock; | ||
744 | |||
745 | /* Rate limiting time interval */ | ||
746 | unsigned long numabalancing_migrate_next_window; | ||
747 | |||
748 | /* Number of pages migrated during the rate limiting time interval */ | ||
749 | unsigned long numabalancing_migrate_nr_pages; | ||
750 | #endif | ||
738 | } pg_data_t; | 751 | } pg_data_t; |
739 | 752 | ||
740 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) | 753 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bfe1f4780644..c20635c527a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/list.h> | 7 | #include <linux/list.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/mutex.h> | 10 | #include <linux/rwsem.h> |
11 | #include <linux/memcontrol.h> | 11 | #include <linux/memcontrol.h> |
12 | 12 | ||
13 | /* | 13 | /* |
@@ -25,8 +25,8 @@ | |||
25 | * pointing to this anon_vma once its vma list is empty. | 25 | * pointing to this anon_vma once its vma list is empty. |
26 | */ | 26 | */ |
27 | struct anon_vma { | 27 | struct anon_vma { |
28 | struct anon_vma *root; /* Root of this anon_vma tree */ | 28 | struct anon_vma *root; /* Root of this anon_vma tree */ |
29 | struct mutex mutex; /* Serialize access to vma list */ | 29 | struct rw_semaphore rwsem; /* W: modification, R: walking the list */ |
30 | /* | 30 | /* |
31 | * The refcount is taken on an anon_vma when there is no | 31 | * The refcount is taken on an anon_vma when there is no |
32 | * guarantee that the vma of page tables will exist for | 32 | * guarantee that the vma of page tables will exist for |
@@ -64,7 +64,7 @@ struct anon_vma_chain { | |||
64 | struct vm_area_struct *vma; | 64 | struct vm_area_struct *vma; |
65 | struct anon_vma *anon_vma; | 65 | struct anon_vma *anon_vma; |
66 | struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ | 66 | struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ |
67 | struct rb_node rb; /* locked by anon_vma->mutex */ | 67 | struct rb_node rb; /* locked by anon_vma->rwsem */ |
68 | unsigned long rb_subtree_last; | 68 | unsigned long rb_subtree_last; |
69 | #ifdef CONFIG_DEBUG_VM_RB | 69 | #ifdef CONFIG_DEBUG_VM_RB |
70 | unsigned long cached_vma_start, cached_vma_last; | 70 | unsigned long cached_vma_start, cached_vma_last; |
@@ -108,26 +108,37 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma) | |||
108 | { | 108 | { |
109 | struct anon_vma *anon_vma = vma->anon_vma; | 109 | struct anon_vma *anon_vma = vma->anon_vma; |
110 | if (anon_vma) | 110 | if (anon_vma) |
111 | mutex_lock(&anon_vma->root->mutex); | 111 | down_write(&anon_vma->root->rwsem); |
112 | } | 112 | } |
113 | 113 | ||
114 | static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) | 114 | static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) |
115 | { | 115 | { |
116 | struct anon_vma *anon_vma = vma->anon_vma; | 116 | struct anon_vma *anon_vma = vma->anon_vma; |
117 | if (anon_vma) | 117 | if (anon_vma) |
118 | mutex_unlock(&anon_vma->root->mutex); | 118 | up_write(&anon_vma->root->rwsem); |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline void anon_vma_lock(struct anon_vma *anon_vma) | 121 | static inline void anon_vma_lock_write(struct anon_vma *anon_vma) |
122 | { | 122 | { |
123 | mutex_lock(&anon_vma->root->mutex); | 123 | down_write(&anon_vma->root->rwsem); |
124 | } | 124 | } |
125 | 125 | ||
126 | static inline void anon_vma_unlock(struct anon_vma *anon_vma) | 126 | static inline void anon_vma_unlock(struct anon_vma *anon_vma) |
127 | { | 127 | { |
128 | mutex_unlock(&anon_vma->root->mutex); | 128 | up_write(&anon_vma->root->rwsem); |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline void anon_vma_lock_read(struct anon_vma *anon_vma) | ||
132 | { | ||
133 | down_read(&anon_vma->root->rwsem); | ||
134 | } | ||
135 | |||
136 | static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) | ||
137 | { | ||
138 | up_read(&anon_vma->root->rwsem); | ||
139 | } | ||
140 | |||
141 | |||
131 | /* | 142 | /* |
132 | * anon_vma helper functions. | 143 | * anon_vma helper functions. |
133 | */ | 144 | */ |
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *); | |||
220 | /* | 231 | /* |
221 | * Called by memory-failure.c to kill processes. | 232 | * Called by memory-failure.c to kill processes. |
222 | */ | 233 | */ |
223 | struct anon_vma *page_lock_anon_vma(struct page *page); | 234 | struct anon_vma *page_lock_anon_vma_read(struct page *page); |
224 | void page_unlock_anon_vma(struct anon_vma *anon_vma); | 235 | void page_unlock_anon_vma_read(struct anon_vma *anon_vma); |
225 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); | 236 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); |
226 | 237 | ||
227 | /* | 238 | /* |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2f3072beef..b089c92c609b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1527,6 +1527,14 @@ struct task_struct { | |||
1527 | short il_next; | 1527 | short il_next; |
1528 | short pref_node_fork; | 1528 | short pref_node_fork; |
1529 | #endif | 1529 | #endif |
1530 | #ifdef CONFIG_NUMA_BALANCING | ||
1531 | int numa_scan_seq; | ||
1532 | int numa_migrate_seq; | ||
1533 | unsigned int numa_scan_period; | ||
1534 | u64 node_stamp; /* migration stamp */ | ||
1535 | struct callback_head numa_work; | ||
1536 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1537 | |||
1530 | struct rcu_head rcu; | 1538 | struct rcu_head rcu; |
1531 | 1539 | ||
1532 | /* | 1540 | /* |
@@ -1601,6 +1609,18 @@ struct task_struct { | |||
1601 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1609 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1602 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1610 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1603 | 1611 | ||
1612 | #ifdef CONFIG_NUMA_BALANCING | ||
1613 | extern void task_numa_fault(int node, int pages, bool migrated); | ||
1614 | extern void set_numabalancing_state(bool enabled); | ||
1615 | #else | ||
1616 | static inline void task_numa_fault(int node, int pages, bool migrated) | ||
1617 | { | ||
1618 | } | ||
1619 | static inline void set_numabalancing_state(bool enabled) | ||
1620 | { | ||
1621 | } | ||
1622 | #endif | ||
1623 | |||
1604 | /* | 1624 | /* |
1605 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT | 1625 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT |
1606 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH | 1626 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH |
@@ -2030,6 +2050,13 @@ enum sched_tunable_scaling { | |||
2030 | }; | 2050 | }; |
2031 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; | 2051 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; |
2032 | 2052 | ||
2053 | extern unsigned int sysctl_numa_balancing_scan_delay; | ||
2054 | extern unsigned int sysctl_numa_balancing_scan_period_min; | ||
2055 | extern unsigned int sysctl_numa_balancing_scan_period_max; | ||
2056 | extern unsigned int sysctl_numa_balancing_scan_period_reset; | ||
2057 | extern unsigned int sysctl_numa_balancing_scan_size; | ||
2058 | extern unsigned int sysctl_numa_balancing_settle_count; | ||
2059 | |||
2033 | #ifdef CONFIG_SCHED_DEBUG | 2060 | #ifdef CONFIG_SCHED_DEBUG |
2034 | extern unsigned int sysctl_sched_migration_cost; | 2061 | extern unsigned int sysctl_sched_migration_cost; |
2035 | extern unsigned int sysctl_sched_nr_migrate; | 2062 | extern unsigned int sysctl_sched_nr_migrate; |
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index fe786f07d2bd..fce0a2799d43 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h | |||
@@ -38,8 +38,18 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | |||
38 | KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, | 38 | KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, |
39 | KSWAPD_SKIP_CONGESTION_WAIT, | 39 | KSWAPD_SKIP_CONGESTION_WAIT, |
40 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, | 40 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, |
41 | #ifdef CONFIG_NUMA_BALANCING | ||
42 | NUMA_PTE_UPDATES, | ||
43 | NUMA_HINT_FAULTS, | ||
44 | NUMA_HINT_FAULTS_LOCAL, | ||
45 | NUMA_PAGE_MIGRATE, | ||
46 | #endif | ||
47 | #ifdef CONFIG_MIGRATION | ||
48 | PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, | ||
49 | #endif | ||
41 | #ifdef CONFIG_COMPACTION | 50 | #ifdef CONFIG_COMPACTION |
42 | COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, | 51 | COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, |
52 | COMPACTISOLATED, | ||
43 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, | 53 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, |
44 | #endif | 54 | #endif |
45 | #ifdef CONFIG_HUGETLB_PAGE | 55 | #ifdef CONFIG_HUGETLB_PAGE |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 92a86b2cce33..a13291f7da88 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu) | |||
80 | 80 | ||
81 | #endif /* CONFIG_VM_EVENT_COUNTERS */ | 81 | #endif /* CONFIG_VM_EVENT_COUNTERS */ |
82 | 82 | ||
83 | #ifdef CONFIG_NUMA_BALANCING | ||
84 | #define count_vm_numa_event(x) count_vm_event(x) | ||
85 | #define count_vm_numa_events(x, y) count_vm_events(x, y) | ||
86 | #else | ||
87 | #define count_vm_numa_event(x) do {} while (0) | ||
88 | #define count_vm_numa_events(x, y) do {} while (0) | ||
89 | #endif /* CONFIG_NUMA_BALANCING */ | ||
90 | |||
83 | #define __count_zone_vm_events(item, zone, delta) \ | 91 | #define __count_zone_vm_events(item, zone, delta) \ |
84 | __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ | 92 | __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ |
85 | zone_idx(zone), delta) | 93 | zone_idx(zone), delta) |
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h new file mode 100644 index 000000000000..ec2a6ccfd7e5 --- /dev/null +++ b/include/trace/events/migrate.h | |||
@@ -0,0 +1,51 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM migrate | ||
3 | |||
4 | #if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_MIGRATE_H | ||
6 | |||
7 | #define MIGRATE_MODE \ | ||
8 | {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \ | ||
9 | {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \ | ||
10 | {MIGRATE_SYNC, "MIGRATE_SYNC"} | ||
11 | |||
12 | #define MIGRATE_REASON \ | ||
13 | {MR_COMPACTION, "compaction"}, \ | ||
14 | {MR_MEMORY_FAILURE, "memory_failure"}, \ | ||
15 | {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \ | ||
16 | {MR_SYSCALL, "syscall_or_cpuset"}, \ | ||
17 | {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \ | ||
18 | {MR_CMA, "cma"} | ||
19 | |||
20 | TRACE_EVENT(mm_migrate_pages, | ||
21 | |||
22 | TP_PROTO(unsigned long succeeded, unsigned long failed, | ||
23 | enum migrate_mode mode, int reason), | ||
24 | |||
25 | TP_ARGS(succeeded, failed, mode, reason), | ||
26 | |||
27 | TP_STRUCT__entry( | ||
28 | __field( unsigned long, succeeded) | ||
29 | __field( unsigned long, failed) | ||
30 | __field( enum migrate_mode, mode) | ||
31 | __field( int, reason) | ||
32 | ), | ||
33 | |||
34 | TP_fast_assign( | ||
35 | __entry->succeeded = succeeded; | ||
36 | __entry->failed = failed; | ||
37 | __entry->mode = mode; | ||
38 | __entry->reason = reason; | ||
39 | ), | ||
40 | |||
41 | TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s", | ||
42 | __entry->succeeded, | ||
43 | __entry->failed, | ||
44 | __print_symbolic(__entry->mode, MIGRATE_MODE), | ||
45 | __print_symbolic(__entry->reason, MIGRATE_REASON)) | ||
46 | ); | ||
47 | |||
48 | #endif /* _TRACE_MIGRATE_H */ | ||
49 | |||
50 | /* This part must be outside protection */ | ||
51 | #include <trace/define_trace.h> | ||
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 23e62e0537e2..0d11c3dcd3a1 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h | |||
@@ -20,6 +20,7 @@ enum { | |||
20 | MPOL_PREFERRED, | 20 | MPOL_PREFERRED, |
21 | MPOL_BIND, | 21 | MPOL_BIND, |
22 | MPOL_INTERLEAVE, | 22 | MPOL_INTERLEAVE, |
23 | MPOL_LOCAL, | ||
23 | MPOL_MAX, /* always last member of enum */ | 24 | MPOL_MAX, /* always last member of enum */ |
24 | }; | 25 | }; |
25 | 26 | ||
@@ -47,9 +48,15 @@ enum mpol_rebind_step { | |||
47 | 48 | ||
48 | /* Flags for mbind */ | 49 | /* Flags for mbind */ |
49 | #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ | 50 | #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ |
50 | #define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ | 51 | #define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform |
51 | #define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ | 52 | to policy */ |
52 | #define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ | 53 | #define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */ |
54 | #define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */ | ||
55 | #define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */ | ||
56 | |||
57 | #define MPOL_MF_VALID (MPOL_MF_STRICT | \ | ||
58 | MPOL_MF_MOVE | \ | ||
59 | MPOL_MF_MOVE_ALL) | ||
53 | 60 | ||
54 | /* | 61 | /* |
55 | * Internal flags that share the struct mempolicy flags word with | 62 | * Internal flags that share the struct mempolicy flags word with |
@@ -59,6 +66,8 @@ enum mpol_rebind_step { | |||
59 | #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ | 66 | #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ |
60 | #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ | 67 | #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ |
61 | #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ | 68 | #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ |
69 | #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ | ||
70 | #define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ | ||
62 | 71 | ||
63 | 72 | ||
64 | #endif /* _UAPI_LINUX_MEMPOLICY_H */ | 73 | #endif /* _UAPI_LINUX_MEMPOLICY_H */ |