aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h110
-rw-r--r--include/linux/huge_mm.h16
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/mempolicy.h8
-rw-r--r--include/linux/migrate.h46
-rw-r--r--include/linux/mm.h39
-rw-r--r--include/linux/mm_types.h31
-rw-r--r--include/linux/mmzone.h13
-rw-r--r--include/linux/rmap.h33
-rw-r--r--include/linux/sched.h27
-rw-r--r--include/linux/vm_event_item.h12
-rw-r--r--include/linux/vmstat.h8
-rw-r--r--include/trace/events/migrate.h51
-rw-r--r--include/uapi/linux/mempolicy.h15
14 files changed, 396 insertions, 21 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 284e80831d2c..701beab27aab 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -219,6 +219,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
219#define move_pte(pte, prot, old_addr, new_addr) (pte) 219#define move_pte(pte, prot, old_addr, new_addr) (pte)
220#endif 220#endif
221 221
222#ifndef pte_accessible
223# define pte_accessible(pte) ((void)(pte),1)
224#endif
225
222#ifndef flush_tlb_fix_spurious_fault 226#ifndef flush_tlb_fix_spurious_fault
223#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 227#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
224#endif 228#endif
@@ -580,6 +584,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
580#endif 584#endif
581} 585}
582 586
587#ifdef CONFIG_NUMA_BALANCING
588#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
589/*
590 * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
591 * same bit too). It's set only when _PAGE_PRESET is not set and it's
592 * never set if _PAGE_PRESENT is set.
593 *
594 * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
595 * fault triggers on those regions if pte/pmd_numa returns true
596 * (because _PAGE_PRESENT is not set).
597 */
598#ifndef pte_numa
599static inline int pte_numa(pte_t pte)
600{
601 return (pte_flags(pte) &
602 (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
603}
604#endif
605
606#ifndef pmd_numa
607static inline int pmd_numa(pmd_t pmd)
608{
609 return (pmd_flags(pmd) &
610 (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
611}
612#endif
613
614/*
615 * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
616 * because they're called by the NUMA hinting minor page fault. If we
617 * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
618 * would be forced to set it later while filling the TLB after we
619 * return to userland. That would trigger a second write to memory
620 * that we optimize away by setting _PAGE_ACCESSED here.
621 */
622#ifndef pte_mknonnuma
623static inline pte_t pte_mknonnuma(pte_t pte)
624{
625 pte = pte_clear_flags(pte, _PAGE_NUMA);
626 return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
627}
628#endif
629
630#ifndef pmd_mknonnuma
631static inline pmd_t pmd_mknonnuma(pmd_t pmd)
632{
633 pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
634 return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
635}
636#endif
637
638#ifndef pte_mknuma
639static inline pte_t pte_mknuma(pte_t pte)
640{
641 pte = pte_set_flags(pte, _PAGE_NUMA);
642 return pte_clear_flags(pte, _PAGE_PRESENT);
643}
644#endif
645
646#ifndef pmd_mknuma
647static inline pmd_t pmd_mknuma(pmd_t pmd)
648{
649 pmd = pmd_set_flags(pmd, _PAGE_NUMA);
650 return pmd_clear_flags(pmd, _PAGE_PRESENT);
651}
652#endif
653#else
654extern int pte_numa(pte_t pte);
655extern int pmd_numa(pmd_t pmd);
656extern pte_t pte_mknonnuma(pte_t pte);
657extern pmd_t pmd_mknonnuma(pmd_t pmd);
658extern pte_t pte_mknuma(pte_t pte);
659extern pmd_t pmd_mknuma(pmd_t pmd);
660#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
661#else
662static inline int pmd_numa(pmd_t pmd)
663{
664 return 0;
665}
666
667static inline int pte_numa(pte_t pte)
668{
669 return 0;
670}
671
672static inline pte_t pte_mknonnuma(pte_t pte)
673{
674 return pte;
675}
676
677static inline pmd_t pmd_mknonnuma(pmd_t pmd)
678{
679 return pmd;
680}
681
682static inline pte_t pte_mknuma(pte_t pte)
683{
684 return pte;
685}
686
687static inline pmd_t pmd_mknuma(pmd_t pmd)
688{
689 return pmd;
690}
691#endif /* CONFIG_NUMA_BALANCING */
692
583#endif /* CONFIG_MMU */ 693#endif /* CONFIG_MMU */
584 694
585#endif /* !__ASSEMBLY__ */ 695#endif /* !__ASSEMBLY__ */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 092dc5305a32..1d76f8ca90f0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -31,7 +31,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
31 unsigned long new_addr, unsigned long old_end, 31 unsigned long new_addr, unsigned long old_end,
32 pmd_t *old_pmd, pmd_t *new_pmd); 32 pmd_t *old_pmd, pmd_t *new_pmd);
33extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 33extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
34 unsigned long addr, pgprot_t newprot); 34 unsigned long addr, pgprot_t newprot,
35 int prot_numa);
35 36
36enum transparent_hugepage_flag { 37enum transparent_hugepage_flag {
37 TRANSPARENT_HUGEPAGE_FLAG, 38 TRANSPARENT_HUGEPAGE_FLAG,
@@ -111,7 +112,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
111#define wait_split_huge_page(__anon_vma, __pmd) \ 112#define wait_split_huge_page(__anon_vma, __pmd) \
112 do { \ 113 do { \
113 pmd_t *____pmd = (__pmd); \ 114 pmd_t *____pmd = (__pmd); \
114 anon_vma_lock(__anon_vma); \ 115 anon_vma_lock_write(__anon_vma); \
115 anon_vma_unlock(__anon_vma); \ 116 anon_vma_unlock(__anon_vma); \
116 BUG_ON(pmd_trans_splitting(*____pmd) || \ 117 BUG_ON(pmd_trans_splitting(*____pmd) || \
117 pmd_trans_huge(*____pmd)); \ 118 pmd_trans_huge(*____pmd)); \
@@ -171,6 +172,10 @@ static inline struct page *compound_trans_head(struct page *page)
171 } 172 }
172 return page; 173 return page;
173} 174}
175
176extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
177 unsigned long addr, pmd_t pmd, pmd_t *pmdp);
178
174#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 179#else /* CONFIG_TRANSPARENT_HUGEPAGE */
175#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) 180#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
176#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) 181#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -209,6 +214,13 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
209{ 214{
210 return 0; 215 return 0;
211} 216}
217
218static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
219 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
220{
221 return 0;
222}
223
212#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 224#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
213 225
214#endif /* _LINUX_HUGE_MM_H */ 226#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3e7fa1acf09c..0c80d3f57a5b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
87 pud_t *pud, int write); 87 pud_t *pud, int write);
88int pmd_huge(pmd_t pmd); 88int pmd_huge(pmd_t pmd);
89int pud_huge(pud_t pmd); 89int pud_huge(pud_t pmd);
90void hugetlb_change_protection(struct vm_area_struct *vma, 90unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
91 unsigned long address, unsigned long end, pgprot_t newprot); 91 unsigned long address, unsigned long end, pgprot_t newprot);
92 92
93#else /* !CONFIG_HUGETLB_PAGE */ 93#else /* !CONFIG_HUGETLB_PAGE */
@@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src)
132{ 132{
133} 133}
134 134
135#define hugetlb_change_protection(vma, address, end, newprot) 135static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
136 unsigned long address, unsigned long end, pgprot_t newprot)
137{
138 return 0;
139}
136 140
137static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 141static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
138 struct vm_area_struct *vma, unsigned long start, 142 struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index dbd212723b74..9adc270de7ef 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -188,6 +188,8 @@ static inline int vma_migratable(struct vm_area_struct *vma)
188 return 1; 188 return 1;
189} 189}
190 190
191extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
192
191#else 193#else
192 194
193struct mempolicy {}; 195struct mempolicy {};
@@ -307,5 +309,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
307 return 0; 309 return 0;
308} 310}
309 311
312static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
313 unsigned long address)
314{
315 return -1; /* no node preference */
316}
317
310#endif /* CONFIG_NUMA */ 318#endif /* CONFIG_NUMA */
311#endif 319#endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 0b5865c61efd..1e9f627967a3 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -23,6 +23,15 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
23#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page 23#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
24 * sucessful migration case. 24 * sucessful migration case.
25 */ 25 */
26enum migrate_reason {
27 MR_COMPACTION,
28 MR_MEMORY_FAILURE,
29 MR_MEMORY_HOTPLUG,
30 MR_SYSCALL, /* also applies to cpusets */
31 MR_MEMPOLICY_MBIND,
32 MR_NUMA_MISPLACED,
33 MR_CMA
34};
26 35
27#ifdef CONFIG_MIGRATION 36#ifdef CONFIG_MIGRATION
28 37
@@ -32,7 +41,7 @@ extern int migrate_page(struct address_space *,
32 struct page *, struct page *, enum migrate_mode); 41 struct page *, struct page *, enum migrate_mode);
33extern int migrate_pages(struct list_head *l, new_page_t x, 42extern int migrate_pages(struct list_head *l, new_page_t x,
34 unsigned long private, bool offlining, 43 unsigned long private, bool offlining,
35 enum migrate_mode mode); 44 enum migrate_mode mode, int reason);
36extern int migrate_huge_page(struct page *, new_page_t x, 45extern int migrate_huge_page(struct page *, new_page_t x,
37 unsigned long private, bool offlining, 46 unsigned long private, bool offlining,
38 enum migrate_mode mode); 47 enum migrate_mode mode);
@@ -54,7 +63,7 @@ static inline void putback_lru_pages(struct list_head *l) {}
54static inline void putback_movable_pages(struct list_head *l) {} 63static inline void putback_movable_pages(struct list_head *l) {}
55static inline int migrate_pages(struct list_head *l, new_page_t x, 64static inline int migrate_pages(struct list_head *l, new_page_t x,
56 unsigned long private, bool offlining, 65 unsigned long private, bool offlining,
57 enum migrate_mode mode) { return -ENOSYS; } 66 enum migrate_mode mode, int reason) { return -ENOSYS; }
58static inline int migrate_huge_page(struct page *page, new_page_t x, 67static inline int migrate_huge_page(struct page *page, new_page_t x,
59 unsigned long private, bool offlining, 68 unsigned long private, bool offlining,
60 enum migrate_mode mode) { return -ENOSYS; } 69 enum migrate_mode mode) { return -ENOSYS; }
@@ -83,4 +92,37 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
83#define fail_migrate_page NULL 92#define fail_migrate_page NULL
84 93
85#endif /* CONFIG_MIGRATION */ 94#endif /* CONFIG_MIGRATION */
95
96#ifdef CONFIG_NUMA_BALANCING
97extern int migrate_misplaced_page(struct page *page, int node);
98extern int migrate_misplaced_page(struct page *page, int node);
99extern bool migrate_ratelimited(int node);
100#else
101static inline int migrate_misplaced_page(struct page *page, int node)
102{
103 return -EAGAIN; /* can't migrate now */
104}
105static inline bool migrate_ratelimited(int node)
106{
107 return false;
108}
109#endif /* CONFIG_NUMA_BALANCING */
110
111#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
112extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
113 struct vm_area_struct *vma,
114 pmd_t *pmd, pmd_t entry,
115 unsigned long address,
116 struct page *page, int node);
117#else
118static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
119 struct vm_area_struct *vma,
120 pmd_t *pmd, pmd_t entry,
121 unsigned long address,
122 struct page *page, int node)
123{
124 return -EAGAIN;
125}
126#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
127
86#endif /* _LINUX_MIGRATE_H */ 128#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4af4f0b1be4c..7f4f906190bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page)
693} 693}
694#endif 694#endif
695 695
696#ifdef CONFIG_NUMA_BALANCING
697static inline int page_xchg_last_nid(struct page *page, int nid)
698{
699 return xchg(&page->_last_nid, nid);
700}
701
702static inline int page_last_nid(struct page *page)
703{
704 return page->_last_nid;
705}
706static inline void reset_page_last_nid(struct page *page)
707{
708 page->_last_nid = -1;
709}
710#else
711static inline int page_xchg_last_nid(struct page *page, int nid)
712{
713 return page_to_nid(page);
714}
715
716static inline int page_last_nid(struct page *page)
717{
718 return page_to_nid(page);
719}
720
721static inline void reset_page_last_nid(struct page *page)
722{
723}
724#endif
725
696static inline struct zone *page_zone(const struct page *page) 726static inline struct zone *page_zone(const struct page *page)
697{ 727{
698 return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; 728 return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
@@ -1078,6 +1108,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
1078extern unsigned long do_mremap(unsigned long addr, 1108extern unsigned long do_mremap(unsigned long addr,
1079 unsigned long old_len, unsigned long new_len, 1109 unsigned long old_len, unsigned long new_len,
1080 unsigned long flags, unsigned long new_addr); 1110 unsigned long flags, unsigned long new_addr);
1111extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
1112 unsigned long end, pgprot_t newprot,
1113 int dirty_accountable, int prot_numa);
1081extern int mprotect_fixup(struct vm_area_struct *vma, 1114extern int mprotect_fixup(struct vm_area_struct *vma,
1082 struct vm_area_struct **pprev, unsigned long start, 1115 struct vm_area_struct **pprev, unsigned long start,
1083 unsigned long end, unsigned long newflags); 1116 unsigned long end, unsigned long newflags);
@@ -1579,6 +1612,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
1579} 1612}
1580#endif 1613#endif
1581 1614
1615#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
1616unsigned long change_prot_numa(struct vm_area_struct *vma,
1617 unsigned long start, unsigned long end);
1618#endif
1619
1582struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); 1620struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
1583int remap_pfn_range(struct vm_area_struct *, unsigned long addr, 1621int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
1584 unsigned long pfn, unsigned long size, pgprot_t); 1622 unsigned long pfn, unsigned long size, pgprot_t);
@@ -1600,6 +1638,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
1600#define FOLL_MLOCK 0x40 /* mark page as mlocked */ 1638#define FOLL_MLOCK 0x40 /* mark page as mlocked */
1601#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ 1639#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
1602#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ 1640#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
1641#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
1603 1642
1604typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, 1643typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
1605 void *data); 1644 void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7ade2731b5d6..7d9ebb7cc982 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,6 +175,10 @@ struct page {
175 */ 175 */
176 void *shadow; 176 void *shadow;
177#endif 177#endif
178
179#ifdef CONFIG_NUMA_BALANCING
180 int _last_nid;
181#endif
178} 182}
179/* 183/*
180 * The struct page can be forced to be double word aligned so that atomic ops 184 * The struct page can be forced to be double word aligned so that atomic ops
@@ -411,9 +415,36 @@ struct mm_struct {
411#ifdef CONFIG_CPUMASK_OFFSTACK 415#ifdef CONFIG_CPUMASK_OFFSTACK
412 struct cpumask cpumask_allocation; 416 struct cpumask cpumask_allocation;
413#endif 417#endif
418#ifdef CONFIG_NUMA_BALANCING
419 /*
420 * numa_next_scan is the next time when the PTEs will me marked
421 * pte_numa to gather statistics and migrate pages to new nodes
422 * if necessary
423 */
424 unsigned long numa_next_scan;
425
426 /* numa_next_reset is when the PTE scanner period will be reset */
427 unsigned long numa_next_reset;
428
429 /* Restart point for scanning and setting pte_numa */
430 unsigned long numa_scan_offset;
431
432 /* numa_scan_seq prevents two threads setting pte_numa */
433 int numa_scan_seq;
434
435 /*
436 * The first node a task was scheduled on. If a task runs on
437 * a different node than Make PTE Scan Go Now.
438 */
439 int first_nid;
440#endif
414 struct uprobes_state uprobes_state; 441 struct uprobes_state uprobes_state;
415}; 442};
416 443
444/* first nid will either be a valid NID or one of these values */
445#define NUMA_PTE_SCAN_INIT -1
446#define NUMA_PTE_SCAN_ACTIVE -2
447
417static inline void mm_init_cpumask(struct mm_struct *mm) 448static inline void mm_init_cpumask(struct mm_struct *mm)
418{ 449{
419#ifdef CONFIG_CPUMASK_OFFSTACK 450#ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cd55dad56aac..4bec5be82cab 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -735,6 +735,19 @@ typedef struct pglist_data {
735 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ 735 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
736 int kswapd_max_order; 736 int kswapd_max_order;
737 enum zone_type classzone_idx; 737 enum zone_type classzone_idx;
738#ifdef CONFIG_NUMA_BALANCING
739 /*
740 * Lock serializing the per destination node AutoNUMA memory
741 * migration rate limiting data.
742 */
743 spinlock_t numabalancing_migrate_lock;
744
745 /* Rate limiting time interval */
746 unsigned long numabalancing_migrate_next_window;
747
748 /* Number of pages migrated during the rate limiting time interval */
749 unsigned long numabalancing_migrate_nr_pages;
750#endif
738} pg_data_t; 751} pg_data_t;
739 752
740#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) 753#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f4780644..c20635c527a9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/mutex.h> 10#include <linux/rwsem.h>
11#include <linux/memcontrol.h> 11#include <linux/memcontrol.h>
12 12
13/* 13/*
@@ -25,8 +25,8 @@
25 * pointing to this anon_vma once its vma list is empty. 25 * pointing to this anon_vma once its vma list is empty.
26 */ 26 */
27struct anon_vma { 27struct anon_vma {
28 struct anon_vma *root; /* Root of this anon_vma tree */ 28 struct anon_vma *root; /* Root of this anon_vma tree */
29 struct mutex mutex; /* Serialize access to vma list */ 29 struct rw_semaphore rwsem; /* W: modification, R: walking the list */
30 /* 30 /*
31 * The refcount is taken on an anon_vma when there is no 31 * The refcount is taken on an anon_vma when there is no
32 * guarantee that the vma of page tables will exist for 32 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
64 struct vm_area_struct *vma; 64 struct vm_area_struct *vma;
65 struct anon_vma *anon_vma; 65 struct anon_vma *anon_vma;
66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ 66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
67 struct rb_node rb; /* locked by anon_vma->mutex */ 67 struct rb_node rb; /* locked by anon_vma->rwsem */
68 unsigned long rb_subtree_last; 68 unsigned long rb_subtree_last;
69#ifdef CONFIG_DEBUG_VM_RB 69#ifdef CONFIG_DEBUG_VM_RB
70 unsigned long cached_vma_start, cached_vma_last; 70 unsigned long cached_vma_start, cached_vma_last;
@@ -108,26 +108,37 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
108{ 108{
109 struct anon_vma *anon_vma = vma->anon_vma; 109 struct anon_vma *anon_vma = vma->anon_vma;
110 if (anon_vma) 110 if (anon_vma)
111 mutex_lock(&anon_vma->root->mutex); 111 down_write(&anon_vma->root->rwsem);
112} 112}
113 113
114static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) 114static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
115{ 115{
116 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 if (anon_vma) 117 if (anon_vma)
118 mutex_unlock(&anon_vma->root->mutex); 118 up_write(&anon_vma->root->rwsem);
119} 119}
120 120
121static inline void anon_vma_lock(struct anon_vma *anon_vma) 121static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
122{ 122{
123 mutex_lock(&anon_vma->root->mutex); 123 down_write(&anon_vma->root->rwsem);
124} 124}
125 125
126static inline void anon_vma_unlock(struct anon_vma *anon_vma) 126static inline void anon_vma_unlock(struct anon_vma *anon_vma)
127{ 127{
128 mutex_unlock(&anon_vma->root->mutex); 128 up_write(&anon_vma->root->rwsem);
129} 129}
130 130
131static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
132{
133 down_read(&anon_vma->root->rwsem);
134}
135
136static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
137{
138 up_read(&anon_vma->root->rwsem);
139}
140
141
131/* 142/*
132 * anon_vma helper functions. 143 * anon_vma helper functions.
133 */ 144 */
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);
220/* 231/*
221 * Called by memory-failure.c to kill processes. 232 * Called by memory-failure.c to kill processes.
222 */ 233 */
223struct anon_vma *page_lock_anon_vma(struct page *page); 234struct anon_vma *page_lock_anon_vma_read(struct page *page);
224void page_unlock_anon_vma(struct anon_vma *anon_vma); 235void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
225int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); 236int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
226 237
227/* 238/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c2f3072beef..b089c92c609b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,14 @@ struct task_struct {
1527 short il_next; 1527 short il_next;
1528 short pref_node_fork; 1528 short pref_node_fork;
1529#endif 1529#endif
1530#ifdef CONFIG_NUMA_BALANCING
1531 int numa_scan_seq;
1532 int numa_migrate_seq;
1533 unsigned int numa_scan_period;
1534 u64 node_stamp; /* migration stamp */
1535 struct callback_head numa_work;
1536#endif /* CONFIG_NUMA_BALANCING */
1537
1530 struct rcu_head rcu; 1538 struct rcu_head rcu;
1531 1539
1532 /* 1540 /*
@@ -1601,6 +1609,18 @@ struct task_struct {
1601/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1609/* Future-safe accessor for struct task_struct's cpus_allowed. */
1602#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) 1610#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1603 1611
1612#ifdef CONFIG_NUMA_BALANCING
1613extern void task_numa_fault(int node, int pages, bool migrated);
1614extern void set_numabalancing_state(bool enabled);
1615#else
1616static inline void task_numa_fault(int node, int pages, bool migrated)
1617{
1618}
1619static inline void set_numabalancing_state(bool enabled)
1620{
1621}
1622#endif
1623
1604/* 1624/*
1605 * Priority of a process goes from 0..MAX_PRIO-1, valid RT 1625 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
1606 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH 1626 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -2030,6 +2050,13 @@ enum sched_tunable_scaling {
2030}; 2050};
2031extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; 2051extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
2032 2052
2053extern unsigned int sysctl_numa_balancing_scan_delay;
2054extern unsigned int sysctl_numa_balancing_scan_period_min;
2055extern unsigned int sysctl_numa_balancing_scan_period_max;
2056extern unsigned int sysctl_numa_balancing_scan_period_reset;
2057extern unsigned int sysctl_numa_balancing_scan_size;
2058extern unsigned int sysctl_numa_balancing_settle_count;
2059
2033#ifdef CONFIG_SCHED_DEBUG 2060#ifdef CONFIG_SCHED_DEBUG
2034extern unsigned int sysctl_sched_migration_cost; 2061extern unsigned int sysctl_sched_migration_cost;
2035extern unsigned int sysctl_sched_nr_migrate; 2062extern unsigned int sysctl_sched_nr_migrate;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index fe786f07d2bd..fce0a2799d43 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,8 +38,18 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
38 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, 38 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
39 KSWAPD_SKIP_CONGESTION_WAIT, 39 KSWAPD_SKIP_CONGESTION_WAIT,
40 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 40 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
41#ifdef CONFIG_NUMA_BALANCING
42 NUMA_PTE_UPDATES,
43 NUMA_HINT_FAULTS,
44 NUMA_HINT_FAULTS_LOCAL,
45 NUMA_PAGE_MIGRATE,
46#endif
47#ifdef CONFIG_MIGRATION
48 PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
49#endif
41#ifdef CONFIG_COMPACTION 50#ifdef CONFIG_COMPACTION
42 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, 51 COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
52 COMPACTISOLATED,
43 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, 53 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
44#endif 54#endif
45#ifdef CONFIG_HUGETLB_PAGE 55#ifdef CONFIG_HUGETLB_PAGE
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2cce33..a13291f7da88 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)
80 80
81#endif /* CONFIG_VM_EVENT_COUNTERS */ 81#endif /* CONFIG_VM_EVENT_COUNTERS */
82 82
83#ifdef CONFIG_NUMA_BALANCING
84#define count_vm_numa_event(x) count_vm_event(x)
85#define count_vm_numa_events(x, y) count_vm_events(x, y)
86#else
87#define count_vm_numa_event(x) do {} while (0)
88#define count_vm_numa_events(x, y) do {} while (0)
89#endif /* CONFIG_NUMA_BALANCING */
90
83#define __count_zone_vm_events(item, zone, delta) \ 91#define __count_zone_vm_events(item, zone, delta) \
84 __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ 92 __count_vm_events(item##_NORMAL - ZONE_NORMAL + \
85 zone_idx(zone), delta) 93 zone_idx(zone), delta)
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
new file mode 100644
index 000000000000..ec2a6ccfd7e5
--- /dev/null
+++ b/include/trace/events/migrate.h
@@ -0,0 +1,51 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM migrate
3
4#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_MIGRATE_H
6
7#define MIGRATE_MODE \
8 {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \
9 {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \
10 {MIGRATE_SYNC, "MIGRATE_SYNC"}
11
12#define MIGRATE_REASON \
13 {MR_COMPACTION, "compaction"}, \
14 {MR_MEMORY_FAILURE, "memory_failure"}, \
15 {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
16 {MR_SYSCALL, "syscall_or_cpuset"}, \
17 {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
18 {MR_CMA, "cma"}
19
20TRACE_EVENT(mm_migrate_pages,
21
22 TP_PROTO(unsigned long succeeded, unsigned long failed,
23 enum migrate_mode mode, int reason),
24
25 TP_ARGS(succeeded, failed, mode, reason),
26
27 TP_STRUCT__entry(
28 __field( unsigned long, succeeded)
29 __field( unsigned long, failed)
30 __field( enum migrate_mode, mode)
31 __field( int, reason)
32 ),
33
34 TP_fast_assign(
35 __entry->succeeded = succeeded;
36 __entry->failed = failed;
37 __entry->mode = mode;
38 __entry->reason = reason;
39 ),
40
41 TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s",
42 __entry->succeeded,
43 __entry->failed,
44 __print_symbolic(__entry->mode, MIGRATE_MODE),
45 __print_symbolic(__entry->reason, MIGRATE_REASON))
46);
47
48#endif /* _TRACE_MIGRATE_H */
49
50/* This part must be outside protection */
51#include <trace/define_trace.h>
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 23e62e0537e2..0d11c3dcd3a1 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -20,6 +20,7 @@ enum {
20 MPOL_PREFERRED, 20 MPOL_PREFERRED,
21 MPOL_BIND, 21 MPOL_BIND,
22 MPOL_INTERLEAVE, 22 MPOL_INTERLEAVE,
23 MPOL_LOCAL,
23 MPOL_MAX, /* always last member of enum */ 24 MPOL_MAX, /* always last member of enum */
24}; 25};
25 26
@@ -47,9 +48,15 @@ enum mpol_rebind_step {
47 48
48/* Flags for mbind */ 49/* Flags for mbind */
49#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ 50#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
50#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ 51#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform
51#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ 52 to policy */
52#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ 53#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */
54#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */
55#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */
56
57#define MPOL_MF_VALID (MPOL_MF_STRICT | \
58 MPOL_MF_MOVE | \
59 MPOL_MF_MOVE_ALL)
53 60
54/* 61/*
55 * Internal flags that share the struct mempolicy flags word with 62 * Internal flags that share the struct mempolicy flags word with
@@ -59,6 +66,8 @@ enum mpol_rebind_step {
59#define MPOL_F_SHARED (1 << 0) /* identify shared policies */ 66#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
60#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ 67#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
61#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ 68#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
69#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
70#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */
62 71
63 72
64#endif /* _UAPI_LINUX_MEMPOLICY_H */ 73#endif /* _UAPI_LINUX_MEMPOLICY_H */