aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:23:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:23:15 -0400
commit9e2d8656f5e8aa214e66b462680cf86b210b74a8 (patch)
treef67d62e896cedf75599ea45f9ecf9999c6ad24cd /include/linux
parent1ea4f4f8405cc1ceec23f2d261bc3775785e6712 (diff)
parent9e695d2ecc8451cc2c1603d60b5c8e7f5581923a (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge patches from Andrew Morton: "A few misc things and very nearly all of the MM tree. A tremendous amount of stuff (again), including a significant rbtree library rework." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (160 commits) sparc64: Support transparent huge pages. mm: thp: Use more portable PMD clearing sequenece in zap_huge_pmd(). mm: Add and use update_mmu_cache_pmd() in transparent huge page code. sparc64: Document PGD and PMD layout. sparc64: Eliminate PTE table memory wastage. sparc64: Halve the size of PTE tables sparc64: Only support 4MB huge pages and 8KB base pages. memory-hotplug: suppress "Trying to free nonexistent resource <XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY>" warning mm: memcg: clean up mm_match_cgroup() signature mm: document PageHuge somewhat mm: use %pK for /proc/vmallocinfo mm, thp: fix mlock statistics mm, thp: fix mapped pages avoiding unevictable list on mlock memory-hotplug: update memory block's state and notify userspace memory-hotplug: preparation to notify memory block's state at memory hot remove mm: avoid section mismatch warning for memblock_type_name make GFP_NOTRACK definition unconditional cma: decrease cc.nr_migratepages after reclaiming pagelist CMA: migrate mlocked pages kpageflags: fix wrong KPF_THP on non-huge compound pages ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/atomic.h25
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/gfp.h9
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/interval_tree.h27
-rw-r--r--include/linux/interval_tree_generic.h191
-rw-r--r--include/linux/memblock.h3
-rw-r--r--include/linux/memcontrol.h14
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h140
-rw-r--r--include/linux/mm_types.h16
-rw-r--r--include/linux/mman.h1
-rw-r--r--include/linux/mmu_notifier.h60
-rw-r--r--include/linux/mmzone.h10
-rw-r--r--include/linux/oom.h11
-rw-r--r--include/linux/page-isolation.h7
-rw-r--r--include/linux/pageblock-flags.h19
-rw-r--r--include/linux/prio_tree.h120
-rw-r--r--include/linux/rbtree.h119
-rw-r--r--include/linux/rbtree_augmented.h223
-rw-r--r--include/linux/rmap.h36
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/timerqueue.h2
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/vmstat.h12
28 files changed, 682 insertions, 404 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 70cfcb2d63c4..5b08a8540ecf 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -86,6 +86,31 @@ static inline int atomic_dec_unless_positive(atomic_t *p)
86} 86}
87#endif 87#endif
88 88
89/*
90 * atomic_dec_if_positive - decrement by 1 if old value positive
91 * @v: pointer of type atomic_t
92 *
93 * The function returns the old value of *v minus 1, even if
94 * the atomic variable, v, was not decremented.
95 */
96#ifndef atomic_dec_if_positive
97static inline int atomic_dec_if_positive(atomic_t *v)
98{
99 int c, old, dec;
100 c = atomic_read(v);
101 for (;;) {
102 dec = c - 1;
103 if (unlikely(dec < 0))
104 break;
105 old = atomic_cmpxchg((v), c, dec);
106 if (likely(old == c))
107 break;
108 c = old;
109 }
110 return dec;
111}
112#endif
113
89#ifndef CONFIG_ARCH_HAS_ATOMIC_OR 114#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
90static inline void atomic_or(int i, atomic_t *v) 115static inline void atomic_or(int i, atomic_t *v)
91{ 116{
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ef658147e4e8..6ecb6dc2f303 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,8 +22,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
22extern int fragmentation_index(struct zone *zone, unsigned int order); 22extern int fragmentation_index(struct zone *zone, unsigned int order);
23extern unsigned long try_to_compact_pages(struct zonelist *zonelist, 23extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
24 int order, gfp_t gfp_mask, nodemask_t *mask, 24 int order, gfp_t gfp_mask, nodemask_t *mask,
25 bool sync, bool *contended); 25 bool sync, bool *contended, struct page **page);
26extern int compact_pgdat(pg_data_t *pgdat, int order); 26extern int compact_pgdat(pg_data_t *pgdat, int order);
27extern void reset_isolation_suitable(pg_data_t *pgdat);
27extern unsigned long compaction_suitable(struct zone *zone, int order); 28extern unsigned long compaction_suitable(struct zone *zone, int order);
28 29
29/* Do not skip compaction more than 64 times */ 30/* Do not skip compaction more than 64 times */
@@ -61,10 +62,20 @@ static inline bool compaction_deferred(struct zone *zone, int order)
61 return zone->compact_considered < defer_limit; 62 return zone->compact_considered < defer_limit;
62} 63}
63 64
65/* Returns true if restarting compaction after many failures */
66static inline bool compaction_restarting(struct zone *zone, int order)
67{
68 if (order < zone->compact_order_failed)
69 return false;
70
71 return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
72 zone->compact_considered >= 1UL << zone->compact_defer_shift;
73}
74
64#else 75#else
65static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, 76static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
66 int order, gfp_t gfp_mask, nodemask_t *nodemask, 77 int order, gfp_t gfp_mask, nodemask_t *nodemask,
67 bool sync, bool *contended) 78 bool sync, bool *contended, struct page **page)
68{ 79{
69 return COMPACT_CONTINUE; 80 return COMPACT_CONTINUE;
70} 81}
@@ -74,6 +85,10 @@ static inline int compact_pgdat(pg_data_t *pgdat, int order)
74 return COMPACT_CONTINUE; 85 return COMPACT_CONTINUE;
75} 86}
76 87
88static inline void reset_isolation_suitable(pg_data_t *pgdat)
89{
90}
91
77static inline unsigned long compaction_suitable(struct zone *zone, int order) 92static inline unsigned long compaction_suitable(struct zone *zone, int order)
78{ 93{
79 return COMPACT_SKIPPED; 94 return COMPACT_SKIPPED;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca6d8c806f47..c617ed024df8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,7 @@ struct inodes_stat_t {
401#include <linux/cache.h> 401#include <linux/cache.h>
402#include <linux/list.h> 402#include <linux/list.h>
403#include <linux/radix-tree.h> 403#include <linux/radix-tree.h>
404#include <linux/prio_tree.h> 404#include <linux/rbtree.h>
405#include <linux/init.h> 405#include <linux/init.h>
406#include <linux/pid.h> 406#include <linux/pid.h>
407#include <linux/bug.h> 407#include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
669 struct radix_tree_root page_tree; /* radix tree of all pages */ 669 struct radix_tree_root page_tree; /* radix tree of all pages */
670 spinlock_t tree_lock; /* and lock protecting it */ 670 spinlock_t tree_lock; /* and lock protecting it */
671 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 671 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
672 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 672 struct rb_root i_mmap; /* tree of private and shared mappings */
673 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 673 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
674 struct mutex i_mmap_mutex; /* protect tree, count, list */ 674 struct mutex i_mmap_mutex; /* protect tree, count, list */
675 /* Protected by tree_lock together with the radix tree */ 675 /* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
741 */ 741 */
742static inline int mapping_mapped(struct address_space *mapping) 742static inline int mapping_mapped(struct address_space *mapping)
743{ 743{
744 return !prio_tree_empty(&mapping->i_mmap) || 744 return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
745 !list_empty(&mapping->i_mmap_nonlinear); 745 !list_empty(&mapping->i_mmap_nonlinear);
746} 746}
747 747
@@ -2552,6 +2552,8 @@ extern int sb_min_blocksize(struct super_block *, int);
2552 2552
2553extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2553extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2554extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2554extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2555extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
2556 unsigned long size, pgoff_t pgoff);
2555extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2557extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2556int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2558int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2557extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2559extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4883f393f50a..02c1c9710be0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,12 +30,7 @@ struct vm_area_struct;
30#define ___GFP_HARDWALL 0x20000u 30#define ___GFP_HARDWALL 0x20000u
31#define ___GFP_THISNODE 0x40000u 31#define ___GFP_THISNODE 0x40000u
32#define ___GFP_RECLAIMABLE 0x80000u 32#define ___GFP_RECLAIMABLE 0x80000u
33#ifdef CONFIG_KMEMCHECK
34#define ___GFP_NOTRACK 0x200000u 33#define ___GFP_NOTRACK 0x200000u
35#else
36#define ___GFP_NOTRACK 0
37#endif
38#define ___GFP_NO_KSWAPD 0x400000u
39#define ___GFP_OTHER_NODE 0x800000u 34#define ___GFP_OTHER_NODE 0x800000u
40#define ___GFP_WRITE 0x1000000u 35#define ___GFP_WRITE 0x1000000u
41 36
@@ -90,7 +85,6 @@ struct vm_area_struct;
90#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ 85#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
91#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ 86#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
92 87
93#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
94#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 88#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
95#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 89#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
96 90
@@ -120,8 +114,7 @@ struct vm_area_struct;
120 __GFP_MOVABLE) 114 __GFP_MOVABLE)
121#define GFP_IOFS (__GFP_IO | __GFP_FS) 115#define GFP_IOFS (__GFP_IO | __GFP_FS)
122#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 116#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
123 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 117 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
124 __GFP_NO_KSWAPD)
125 118
126#ifdef CONFIG_NUMA 119#ifdef CONFIG_NUMA
127#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) 120#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4c59b1131187..b31cb7da0346 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,8 +11,7 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
11extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, 11extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
12 unsigned long address, pmd_t *pmd, 12 unsigned long address, pmd_t *pmd,
13 pmd_t orig_pmd); 13 pmd_t orig_pmd);
14extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm); 14extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
15extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
16 unsigned long addr, 15 unsigned long addr,
17 pmd_t *pmd, 16 pmd_t *pmd,
18 unsigned int flags); 17 unsigned int flags);
diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h
new file mode 100644
index 000000000000..724556aa3c95
--- /dev/null
+++ b/include/linux/interval_tree.h
@@ -0,0 +1,27 @@
1#ifndef _LINUX_INTERVAL_TREE_H
2#define _LINUX_INTERVAL_TREE_H
3
4#include <linux/rbtree.h>
5
6struct interval_tree_node {
7 struct rb_node rb;
8 unsigned long start; /* Start of interval */
9 unsigned long last; /* Last location _in_ interval */
10 unsigned long __subtree_last;
11};
12
13extern void
14interval_tree_insert(struct interval_tree_node *node, struct rb_root *root);
15
16extern void
17interval_tree_remove(struct interval_tree_node *node, struct rb_root *root);
18
19extern struct interval_tree_node *
20interval_tree_iter_first(struct rb_root *root,
21 unsigned long start, unsigned long last);
22
23extern struct interval_tree_node *
24interval_tree_iter_next(struct interval_tree_node *node,
25 unsigned long start, unsigned long last);
26
27#endif /* _LINUX_INTERVAL_TREE_H */
diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h
new file mode 100644
index 000000000000..58370e1862ad
--- /dev/null
+++ b/include/linux/interval_tree_generic.h
@@ -0,0 +1,191 @@
1/*
2 Interval Trees
3 (C) 2012 Michel Lespinasse <walken@google.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19 include/linux/interval_tree_generic.h
20*/
21
22#include <linux/rbtree_augmented.h>
23
24/*
25 * Template for implementing interval trees
26 *
27 * ITSTRUCT: struct type of the interval tree nodes
28 * ITRB: name of struct rb_node field within ITSTRUCT
29 * ITTYPE: type of the interval endpoints
30 * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
31 * ITSTART(n): start endpoint of ITSTRUCT node n
32 * ITLAST(n): last endpoint of ITSTRUCT node n
33 * ITSTATIC: 'static' or empty
34 * ITPREFIX: prefix to use for the inline tree definitions
35 *
36 * Note - before using this, please consider if non-generic version
37 * (interval_tree.h) would work for you...
38 */
39
40#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \
41 ITSTART, ITLAST, ITSTATIC, ITPREFIX) \
42 \
43/* Callbacks for augmented rbtree insert and remove */ \
44 \
45static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \
46{ \
47 ITTYPE max = ITLAST(node), subtree_last; \
48 if (node->ITRB.rb_left) { \
49 subtree_last = rb_entry(node->ITRB.rb_left, \
50 ITSTRUCT, ITRB)->ITSUBTREE; \
51 if (max < subtree_last) \
52 max = subtree_last; \
53 } \
54 if (node->ITRB.rb_right) { \
55 subtree_last = rb_entry(node->ITRB.rb_right, \
56 ITSTRUCT, ITRB)->ITSUBTREE; \
57 if (max < subtree_last) \
58 max = subtree_last; \
59 } \
60 return max; \
61} \
62 \
63RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \
64 ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \
65 \
66/* Insert / remove interval nodes from the tree */ \
67 \
68ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \
69{ \
70 struct rb_node **link = &root->rb_node, *rb_parent = NULL; \
71 ITTYPE start = ITSTART(node), last = ITLAST(node); \
72 ITSTRUCT *parent; \
73 \
74 while (*link) { \
75 rb_parent = *link; \
76 parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \
77 if (parent->ITSUBTREE < last) \
78 parent->ITSUBTREE = last; \
79 if (start < ITSTART(parent)) \
80 link = &parent->ITRB.rb_left; \
81 else \
82 link = &parent->ITRB.rb_right; \
83 } \
84 \
85 node->ITSUBTREE = last; \
86 rb_link_node(&node->ITRB, rb_parent, link); \
87 rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
88} \
89 \
90ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \
91{ \
92 rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
93} \
94 \
95/* \
96 * Iterate over intervals intersecting [start;last] \
97 * \
98 * Note that a node's interval intersects [start;last] iff: \
99 * Cond1: ITSTART(node) <= last \
100 * and \
101 * Cond2: start <= ITLAST(node) \
102 */ \
103 \
104static ITSTRUCT * \
105ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
106{ \
107 while (true) { \
108 /* \
109 * Loop invariant: start <= node->ITSUBTREE \
110 * (Cond2 is satisfied by one of the subtree nodes) \
111 */ \
112 if (node->ITRB.rb_left) { \
113 ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \
114 ITSTRUCT, ITRB); \
115 if (start <= left->ITSUBTREE) { \
116 /* \
117 * Some nodes in left subtree satisfy Cond2. \
118 * Iterate to find the leftmost such node N. \
119 * If it also satisfies Cond1, that's the \
120 * match we are looking for. Otherwise, there \
121 * is no matching interval as nodes to the \
122 * right of N can't satisfy Cond1 either. \
123 */ \
124 node = left; \
125 continue; \
126 } \
127 } \
128 if (ITSTART(node) <= last) { /* Cond1 */ \
129 if (start <= ITLAST(node)) /* Cond2 */ \
130 return node; /* node is leftmost match */ \
131 if (node->ITRB.rb_right) { \
132 node = rb_entry(node->ITRB.rb_right, \
133 ITSTRUCT, ITRB); \
134 if (start <= node->ITSUBTREE) \
135 continue; \
136 } \
137 } \
138 return NULL; /* No match */ \
139 } \
140} \
141 \
142ITSTATIC ITSTRUCT * \
143ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \
144{ \
145 ITSTRUCT *node; \
146 \
147 if (!root->rb_node) \
148 return NULL; \
149 node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \
150 if (node->ITSUBTREE < start) \
151 return NULL; \
152 return ITPREFIX ## _subtree_search(node, start, last); \
153} \
154 \
155ITSTATIC ITSTRUCT * \
156ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
157{ \
158 struct rb_node *rb = node->ITRB.rb_right, *prev; \
159 \
160 while (true) { \
161 /* \
162 * Loop invariants: \
163 * Cond1: ITSTART(node) <= last \
164 * rb == node->ITRB.rb_right \
165 * \
166 * First, search right subtree if suitable \
167 */ \
168 if (rb) { \
169 ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \
170 if (start <= right->ITSUBTREE) \
171 return ITPREFIX ## _subtree_search(right, \
172 start, last); \
173 } \
174 \
175 /* Move up the tree until we come from a node's left child */ \
176 do { \
177 rb = rb_parent(&node->ITRB); \
178 if (!rb) \
179 return NULL; \
180 prev = &node->ITRB; \
181 node = rb_entry(rb, ITSTRUCT, ITRB); \
182 rb = node->ITRB.rb_right; \
183 } while (prev == rb); \
184 \
185 /* Check if the node intersects [start;last] */ \
186 if (last < ITSTART(node)) /* !Cond1 */ \
187 return NULL; \
188 else if (start <= ITLAST(node)) /* Cond2 */ \
189 return node; \
190 } \
191}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 19dc455b4f3d..569d67d4243e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -70,8 +70,7 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
70 * @p_end: ptr to ulong for end pfn of the range, can be %NULL 70 * @p_end: ptr to ulong for end pfn of the range, can be %NULL
71 * @p_nid: ptr to int for nid of the range, can be %NULL 71 * @p_nid: ptr to int for nid of the range, can be %NULL
72 * 72 *
73 * Walks over configured memory ranges. Available after early_node_map is 73 * Walks over configured memory ranges.
74 * populated.
75 */ 74 */
76#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \ 75#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
77 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ 76 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8d9489fdab2e..fd0e6d53836e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,14 +84,14 @@ extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
84extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont); 84extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
85 85
86static inline 86static inline
87int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) 87bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
88{ 88{
89 struct mem_cgroup *memcg; 89 struct mem_cgroup *task_memcg;
90 int match; 90 bool match;
91 91
92 rcu_read_lock(); 92 rcu_read_lock();
93 memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); 93 task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
94 match = __mem_cgroup_same_or_subtree(cgroup, memcg); 94 match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
95 rcu_read_unlock(); 95 rcu_read_unlock();
96 return match; 96 return match;
97} 97}
@@ -258,10 +258,10 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm
258 return NULL; 258 return NULL;
259} 259}
260 260
261static inline int mm_match_cgroup(struct mm_struct *mm, 261static inline bool mm_match_cgroup(struct mm_struct *mm,
262 struct mem_cgroup *memcg) 262 struct mem_cgroup *memcg)
263{ 263{
264 return 1; 264 return true;
265} 265}
266 266
267static inline int task_in_mem_cgroup(struct task_struct *task, 267static inline int task_in_mem_cgroup(struct task_struct *task,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 910550f3b70e..95573ec4ee6c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -10,6 +10,7 @@ struct page;
10struct zone; 10struct zone;
11struct pglist_data; 11struct pglist_data;
12struct mem_section; 12struct mem_section;
13struct memory_block;
13 14
14#ifdef CONFIG_MEMORY_HOTPLUG 15#ifdef CONFIG_MEMORY_HOTPLUG
15 16
@@ -233,6 +234,8 @@ static inline int is_mem_section_removable(unsigned long pfn,
233extern int mem_online_node(int nid); 234extern int mem_online_node(int nid);
234extern int add_memory(int nid, u64 start, u64 size); 235extern int add_memory(int nid, u64 start, u64 size);
235extern int arch_add_memory(int nid, u64 start, u64 size); 236extern int arch_add_memory(int nid, u64 start, u64 size);
237extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
238extern int offline_memory_block(struct memory_block *mem);
236extern int remove_memory(u64 start, u64 size); 239extern int remove_memory(u64 start, u64 size);
237extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 240extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
238 int nr_pages); 241 int nr_pages);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 95b738c7abff..cec569325608 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -188,7 +188,7 @@ struct sp_node {
188 188
189struct shared_policy { 189struct shared_policy {
190 struct rb_root root; 190 struct rb_root root;
191 spinlock_t lock; 191 struct mutex mutex;
192}; 192};
193 193
194void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); 194void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
@@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
239/* Check if a vma is migratable */ 239/* Check if a vma is migratable */
240static inline int vma_migratable(struct vm_area_struct *vma) 240static inline int vma_migratable(struct vm_area_struct *vma)
241{ 241{
242 if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) 242 if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
243 return 0; 243 return 0;
244 /* 244 /*
245 * Migration allocates pages in the highest zone. If we cannot 245 * Migration allocates pages in the highest zone. If we cannot
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be906b57d..fa0680402738 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,7 +10,6 @@
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/mmzone.h> 11#include <linux/mmzone.h>
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/prio_tree.h>
14#include <linux/atomic.h> 13#include <linux/atomic.h>
15#include <linux/debug_locks.h> 14#include <linux/debug_locks.h>
16#include <linux/mm_types.h> 15#include <linux/mm_types.h>
@@ -21,6 +20,7 @@
21 20
22struct mempolicy; 21struct mempolicy;
23struct anon_vma; 22struct anon_vma;
23struct anon_vma_chain;
24struct file_ra_state; 24struct file_ra_state;
25struct user_struct; 25struct user_struct;
26struct writeback_control; 26struct writeback_control;
@@ -70,6 +70,8 @@ extern unsigned int kobjsize(const void *objp);
70/* 70/*
71 * vm_flags in vm_area_struct, see mm_types.h. 71 * vm_flags in vm_area_struct, see mm_types.h.
72 */ 72 */
73#define VM_NONE 0x00000000
74
73#define VM_READ 0x00000001 /* currently active flags */ 75#define VM_READ 0x00000001 /* currently active flags */
74#define VM_WRITE 0x00000002 76#define VM_WRITE 0x00000002
75#define VM_EXEC 0x00000004 77#define VM_EXEC 0x00000004
@@ -82,16 +84,9 @@ extern unsigned int kobjsize(const void *objp);
82#define VM_MAYSHARE 0x00000080 84#define VM_MAYSHARE 0x00000080
83 85
84#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ 86#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
85#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
86#define VM_GROWSUP 0x00000200
87#else
88#define VM_GROWSUP 0x00000000
89#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
90#endif
91#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ 87#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
92#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 88#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
93 89
94#define VM_EXECUTABLE 0x00001000
95#define VM_LOCKED 0x00002000 90#define VM_LOCKED 0x00002000
96#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ 91#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
97 92
@@ -101,25 +96,34 @@ extern unsigned int kobjsize(const void *objp);
101 96
102#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 97#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
103#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 98#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
104#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
105#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 99#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
106#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ 100#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
107#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 101#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
108#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 102#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
109#ifndef CONFIG_TRANSPARENT_HUGEPAGE 103#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
110#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ 104#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
111#else
112#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
113#endif
114#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
115#define VM_NODUMP 0x04000000 /* Do not include in the core dump */
116 105
117#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
118#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ 106#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
119#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ 107#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
120#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ 108#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
121#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ 109#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
122 110
111#if defined(CONFIG_X86)
112# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
113#elif defined(CONFIG_PPC)
114# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
115#elif defined(CONFIG_PARISC)
116# define VM_GROWSUP VM_ARCH_1
117#elif defined(CONFIG_IA64)
118# define VM_GROWSUP VM_ARCH_1
119#elif !defined(CONFIG_MMU)
120# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */
121#endif
122
123#ifndef VM_GROWSUP
124# define VM_GROWSUP VM_NONE
125#endif
126
123/* Bits set in the VMA until the stack is in its final location */ 127/* Bits set in the VMA until the stack is in its final location */
124#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) 128#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)
125 129
@@ -143,7 +147,7 @@ extern unsigned int kobjsize(const void *objp);
143 * Special vmas that are non-mergable, non-mlock()able. 147 * Special vmas that are non-mergable, non-mlock()able.
144 * Note: mm/huge_memory.c VM_NO_THP depends on this definition. 148 * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
145 */ 149 */
146#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) 150#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
147 151
148/* 152/*
149 * mapping from the currently active vm_flags protection bits (the 153 * mapping from the currently active vm_flags protection bits (the
@@ -157,24 +161,7 @@ extern pgprot_t protection_map[16];
157#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ 161#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
158#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ 162#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
159#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ 163#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
160 164#define FAULT_FLAG_TRIED 0x40 /* second try */
161/*
162 * This interface is used by x86 PAT code to identify a pfn mapping that is
163 * linear over entire vma. This is to optimize PAT code that deals with
164 * marking the physical region with a particular prot. This is not for generic
165 * mm use. Note also that this check will not work if the pfn mapping is
166 * linear for a vma starting at physical address 0. In which case PAT code
167 * falls back to slow path of reserving physical range page by page.
168 */
169static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
170{
171 return !!(vma->vm_flags & VM_PFN_AT_MMAP);
172}
173
174static inline int is_pfn_mapping(struct vm_area_struct *vma)
175{
176 return !!(vma->vm_flags & VM_PFNMAP);
177}
178 165
179/* 166/*
180 * vm_fault is filled by the the pagefault handler and passed to the vma's 167 * vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -182,8 +169,7 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma)
182 * of VM_FAULT_xxx flags that give details about how the fault was handled. 169 * of VM_FAULT_xxx flags that give details about how the fault was handled.
183 * 170 *
184 * pgoff should be used in favour of virtual_address, if possible. If pgoff 171 * pgoff should be used in favour of virtual_address, if possible. If pgoff
185 * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear 172 * is used, one may implement ->remap_pages to get nonlinear mapping support.
186 * mapping support.
187 */ 173 */
188struct vm_fault { 174struct vm_fault {
189 unsigned int flags; /* FAULT_FLAG_xxx flags */ 175 unsigned int flags; /* FAULT_FLAG_xxx flags */
@@ -241,6 +227,9 @@ struct vm_operations_struct {
241 int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from, 227 int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
242 const nodemask_t *to, unsigned long flags); 228 const nodemask_t *to, unsigned long flags);
243#endif 229#endif
230 /* called by sys_remap_file_pages() to populate non-linear mapping */
231 int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
232 unsigned long size, pgoff_t pgoff);
244}; 233};
245 234
246struct mmu_gather; 235struct mmu_gather;
@@ -249,6 +238,18 @@ struct inode;
249#define page_private(page) ((page)->private) 238#define page_private(page) ((page)->private)
250#define set_page_private(page, v) ((page)->private = (v)) 239#define set_page_private(page, v) ((page)->private = (v))
251 240
241/* It's valid only if the page is free path or free_list */
242static inline void set_freepage_migratetype(struct page *page, int migratetype)
243{
244 page->index = migratetype;
245}
246
247/* It's valid only if the page is free path or free_list */
248static inline int get_freepage_migratetype(struct page *page)
249{
250 return page->index;
251}
252
252/* 253/*
253 * FIXME: take this include out, include page-flags.h in 254 * FIXME: take this include out, include page-flags.h in
254 * files which need it (119 of them) 255 * files which need it (119 of them)
@@ -454,6 +455,7 @@ void put_pages_list(struct list_head *pages);
454 455
455void split_page(struct page *page, unsigned int order); 456void split_page(struct page *page, unsigned int order);
456int split_free_page(struct page *page); 457int split_free_page(struct page *page);
458int capture_free_page(struct page *page, int alloc_order, int migratetype);
457 459
458/* 460/*
459 * Compound pages have a destructor function. Provide a 461 * Compound pages have a destructor function. Provide a
@@ -1071,7 +1073,8 @@ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
1071 1073
1072extern unsigned long move_page_tables(struct vm_area_struct *vma, 1074extern unsigned long move_page_tables(struct vm_area_struct *vma,
1073 unsigned long old_addr, struct vm_area_struct *new_vma, 1075 unsigned long old_addr, struct vm_area_struct *new_vma,
1074 unsigned long new_addr, unsigned long len); 1076 unsigned long new_addr, unsigned long len,
1077 bool need_rmap_locks);
1075extern unsigned long do_mremap(unsigned long addr, 1078extern unsigned long do_mremap(unsigned long addr,
1076 unsigned long old_len, unsigned long new_len, 1079 unsigned long old_len, unsigned long new_len,
1077 unsigned long flags, unsigned long new_addr); 1080 unsigned long flags, unsigned long new_addr);
@@ -1366,24 +1369,45 @@ extern void zone_pcp_reset(struct zone *zone);
1366extern atomic_long_t mmap_pages_allocated; 1369extern atomic_long_t mmap_pages_allocated;
1367extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); 1370extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
1368 1371
1369/* prio_tree.c */ 1372/* interval_tree.c */
1370void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 1373void vma_interval_tree_insert(struct vm_area_struct *node,
1371void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 1374 struct rb_root *root);
1372void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); 1375void vma_interval_tree_insert_after(struct vm_area_struct *node,
1373struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, 1376 struct vm_area_struct *prev,
1374 struct prio_tree_iter *iter); 1377 struct rb_root *root);
1375 1378void vma_interval_tree_remove(struct vm_area_struct *node,
1376#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ 1379 struct rb_root *root);
1377 for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ 1380struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
1378 (vma = vma_prio_tree_next(vma, iter)); ) 1381 unsigned long start, unsigned long last);
1382struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
1383 unsigned long start, unsigned long last);
1384
1385#define vma_interval_tree_foreach(vma, root, start, last) \
1386 for (vma = vma_interval_tree_iter_first(root, start, last); \
1387 vma; vma = vma_interval_tree_iter_next(vma, start, last))
1379 1388
1380static inline void vma_nonlinear_insert(struct vm_area_struct *vma, 1389static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1381 struct list_head *list) 1390 struct list_head *list)
1382{ 1391{
1383 vma->shared.vm_set.parent = NULL; 1392 list_add_tail(&vma->shared.nonlinear, list);
1384 list_add_tail(&vma->shared.vm_set.list, list);
1385} 1393}
1386 1394
1395void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1396 struct rb_root *root);
1397void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
1398 struct rb_root *root);
1399struct anon_vma_chain *anon_vma_interval_tree_iter_first(
1400 struct rb_root *root, unsigned long start, unsigned long last);
1401struct anon_vma_chain *anon_vma_interval_tree_iter_next(
1402 struct anon_vma_chain *node, unsigned long start, unsigned long last);
1403#ifdef CONFIG_DEBUG_VM_RB
1404void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
1405#endif
1406
1407#define anon_vma_interval_tree_foreach(avc, root, start, last) \
1408 for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
1409 avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
1410
1387/* mmap.c */ 1411/* mmap.c */
1388extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); 1412extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
1389extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, 1413extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
@@ -1400,15 +1424,13 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
1400 struct rb_node **, struct rb_node *); 1424 struct rb_node **, struct rb_node *);
1401extern void unlink_file_vma(struct vm_area_struct *); 1425extern void unlink_file_vma(struct vm_area_struct *);
1402extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 1426extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
1403 unsigned long addr, unsigned long len, pgoff_t pgoff); 1427 unsigned long addr, unsigned long len, pgoff_t pgoff,
1428 bool *need_rmap_locks);
1404extern void exit_mmap(struct mm_struct *); 1429extern void exit_mmap(struct mm_struct *);
1405 1430
1406extern int mm_take_all_locks(struct mm_struct *mm); 1431extern int mm_take_all_locks(struct mm_struct *mm);
1407extern void mm_drop_all_locks(struct mm_struct *mm); 1432extern void mm_drop_all_locks(struct mm_struct *mm);
1408 1433
1409/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
1410extern void added_exe_file_vma(struct mm_struct *mm);
1411extern void removed_exe_file_vma(struct mm_struct *mm);
1412extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); 1434extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
1413extern struct file *get_mm_exe_file(struct mm_struct *mm); 1435extern struct file *get_mm_exe_file(struct mm_struct *mm);
1414 1436
@@ -1662,5 +1684,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
1662static inline bool page_is_guard(struct page *page) { return false; } 1684static inline bool page_is_guard(struct page *page) { return false; }
1663#endif /* CONFIG_DEBUG_PAGEALLOC */ 1685#endif /* CONFIG_DEBUG_PAGEALLOC */
1664 1686
1687extern void reset_zone_present_pages(void);
1688extern void fixup_zone_present_pages(int nid, unsigned long start_pfn,
1689 unsigned long end_pfn);
1690
1665#endif /* __KERNEL__ */ 1691#endif /* __KERNEL__ */
1666#endif /* _LINUX_MM_H */ 1692#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf7867200b95..31f8a3af7d94 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -6,7 +6,6 @@
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/prio_tree.h>
10#include <linux/rbtree.h> 9#include <linux/rbtree.h>
11#include <linux/rwsem.h> 10#include <linux/rwsem.h>
12#include <linux/completion.h> 11#include <linux/completion.h>
@@ -240,18 +239,15 @@ struct vm_area_struct {
240 239
241 /* 240 /*
242 * For areas with an address space and backing store, 241 * For areas with an address space and backing store,
243 * linkage into the address_space->i_mmap prio tree, or 242 * linkage into the address_space->i_mmap interval tree, or
244 * linkage to the list of like vmas hanging off its node, or
245 * linkage of vma in the address_space->i_mmap_nonlinear list. 243 * linkage of vma in the address_space->i_mmap_nonlinear list.
246 */ 244 */
247 union { 245 union {
248 struct { 246 struct {
249 struct list_head list; 247 struct rb_node rb;
250 void *parent; /* aligns with prio_tree_node parent */ 248 unsigned long rb_subtree_last;
251 struct vm_area_struct *head; 249 } linear;
252 } vm_set; 250 struct list_head nonlinear;
253
254 struct raw_prio_tree_node prio_tree_node;
255 } shared; 251 } shared;
256 252
257 /* 253 /*
@@ -349,7 +345,6 @@ struct mm_struct {
349 unsigned long shared_vm; /* Shared pages (files) */ 345 unsigned long shared_vm; /* Shared pages (files) */
350 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ 346 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
351 unsigned long stack_vm; /* VM_GROWSUP/DOWN */ 347 unsigned long stack_vm; /* VM_GROWSUP/DOWN */
352 unsigned long reserved_vm; /* VM_RESERVED|VM_IO pages */
353 unsigned long def_flags; 348 unsigned long def_flags;
354 unsigned long nr_ptes; /* Page table pages */ 349 unsigned long nr_ptes; /* Page table pages */
355 unsigned long start_code, end_code, start_data, end_data; 350 unsigned long start_code, end_code, start_data, end_data;
@@ -394,7 +389,6 @@ struct mm_struct {
394 389
395 /* store ref to file /proc/<pid>/exe symlink points to */ 390 /* store ref to file /proc/<pid>/exe symlink points to */
396 struct file *exe_file; 391 struct file *exe_file;
397 unsigned long num_exe_file_vmas;
398#ifdef CONFIG_MMU_NOTIFIER 392#ifdef CONFIG_MMU_NOTIFIER
399 struct mmu_notifier_mm *mmu_notifier_mm; 393 struct mmu_notifier_mm *mmu_notifier_mm;
400#endif 394#endif
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8b74e9b1d0ad..77cec2f45cb7 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -86,7 +86,6 @@ calc_vm_flag_bits(unsigned long flags)
86{ 86{
87 return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | 87 return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
88 _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | 88 _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
89 _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
90 _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); 89 _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
91} 90}
92#endif /* __KERNEL__ */ 91#endif /* __KERNEL__ */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1d1b1e13f79f..bc823c4c028b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -4,6 +4,7 @@
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/mm_types.h> 6#include <linux/mm_types.h>
7#include <linux/srcu.h>
7 8
8struct mmu_notifier; 9struct mmu_notifier;
9struct mmu_notifier_ops; 10struct mmu_notifier_ops;
@@ -245,50 +246,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
245 __mmu_notifier_mm_destroy(mm); 246 __mmu_notifier_mm_destroy(mm);
246} 247}
247 248
248/*
249 * These two macros will sometime replace ptep_clear_flush.
250 * ptep_clear_flush is implemented as macro itself, so this also is
251 * implemented as a macro until ptep_clear_flush will converted to an
252 * inline function, to diminish the risk of compilation failure. The
253 * invalidate_page method over time can be moved outside the PT lock
254 * and these two macros can be later removed.
255 */
256#define ptep_clear_flush_notify(__vma, __address, __ptep) \
257({ \
258 pte_t __pte; \
259 struct vm_area_struct *___vma = __vma; \
260 unsigned long ___address = __address; \
261 __pte = ptep_clear_flush(___vma, ___address, __ptep); \
262 mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
263 __pte; \
264})
265
266#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \
267({ \
268 pmd_t __pmd; \
269 struct vm_area_struct *___vma = __vma; \
270 unsigned long ___address = __address; \
271 VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
272 mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
273 (__address)+HPAGE_PMD_SIZE);\
274 __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \
275 mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
276 (__address)+HPAGE_PMD_SIZE); \
277 __pmd; \
278})
279
280#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \
281({ \
282 struct vm_area_struct *___vma = __vma; \
283 unsigned long ___address = __address; \
284 VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
285 mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
286 (__address)+HPAGE_PMD_SIZE);\
287 pmdp_splitting_flush(___vma, ___address, __pmdp); \
288 mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
289 (__address)+HPAGE_PMD_SIZE); \
290})
291
292#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 249#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
293({ \ 250({ \
294 int __young; \ 251 int __young; \
@@ -311,14 +268,24 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
311 __young; \ 268 __young; \
312}) 269})
313 270
271/*
272 * set_pte_at_notify() sets the pte _after_ running the notifier.
273 * This is safe to start by updating the secondary MMUs, because the primary MMU
274 * pte invalidate must have already happened with a ptep_clear_flush() before
275 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is
276 * required when we change both the protection of the mapping from read-only to
277 * read-write and the pfn (like during copy on write page faults). Otherwise the
278 * old page would remain mapped readonly in the secondary MMUs after the new
279 * page is already writable by some CPU through the primary MMU.
280 */
314#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 281#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
315({ \ 282({ \
316 struct mm_struct *___mm = __mm; \ 283 struct mm_struct *___mm = __mm; \
317 unsigned long ___address = __address; \ 284 unsigned long ___address = __address; \
318 pte_t ___pte = __pte; \ 285 pte_t ___pte = __pte; \
319 \ 286 \
320 set_pte_at(___mm, ___address, __ptep, ___pte); \
321 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 287 mmu_notifier_change_pte(___mm, ___address, ___pte); \
288 set_pte_at(___mm, ___address, __ptep, ___pte); \
322}) 289})
323 290
324#else /* CONFIG_MMU_NOTIFIER */ 291#else /* CONFIG_MMU_NOTIFIER */
@@ -369,9 +336,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
369 336
370#define ptep_clear_flush_young_notify ptep_clear_flush_young 337#define ptep_clear_flush_young_notify ptep_clear_flush_young
371#define pmdp_clear_flush_young_notify pmdp_clear_flush_young 338#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
372#define ptep_clear_flush_notify ptep_clear_flush
373#define pmdp_clear_flush_notify pmdp_clear_flush
374#define pmdp_splitting_flush_notify pmdp_splitting_flush
375#define set_pte_at_notify set_pte_at 339#define set_pte_at_notify set_pte_at
376 340
377#endif /* CONFIG_MMU_NOTIFIER */ 341#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2daa54f55db7..50aaca81f63d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -142,6 +142,7 @@ enum zone_stat_item {
142 NUMA_OTHER, /* allocation from other node */ 142 NUMA_OTHER, /* allocation from other node */
143#endif 143#endif
144 NR_ANON_TRANSPARENT_HUGEPAGES, 144 NR_ANON_TRANSPARENT_HUGEPAGES,
145 NR_FREE_CMA_PAGES,
145 NR_VM_ZONE_STAT_ITEMS }; 146 NR_VM_ZONE_STAT_ITEMS };
146 147
147/* 148/*
@@ -217,6 +218,8 @@ struct lruvec {
217#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) 218#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
218/* Isolate for asynchronous migration */ 219/* Isolate for asynchronous migration */
219#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) 220#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
221/* Isolate unevictable pages */
222#define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
220 223
221/* LRU Isolation modes. */ 224/* LRU Isolation modes. */
222typedef unsigned __bitwise__ isolate_mode_t; 225typedef unsigned __bitwise__ isolate_mode_t;
@@ -369,8 +372,12 @@ struct zone {
369 spinlock_t lock; 372 spinlock_t lock;
370 int all_unreclaimable; /* All pages pinned */ 373 int all_unreclaimable; /* All pages pinned */
371#if defined CONFIG_COMPACTION || defined CONFIG_CMA 374#if defined CONFIG_COMPACTION || defined CONFIG_CMA
372 /* pfn where the last incremental compaction isolated free pages */ 375 /* Set to true when the PG_migrate_skip bits should be cleared */
376 bool compact_blockskip_flush;
377
378 /* pfns where compaction scanners should start */
373 unsigned long compact_cached_free_pfn; 379 unsigned long compact_cached_free_pfn;
380 unsigned long compact_cached_migrate_pfn;
374#endif 381#endif
375#ifdef CONFIG_MEMORY_HOTPLUG 382#ifdef CONFIG_MEMORY_HOTPLUG
376 /* see spanned/present_pages for more description */ 383 /* see spanned/present_pages for more description */
@@ -704,6 +711,7 @@ typedef struct pglist_data {
704 unsigned long node_spanned_pages; /* total size of physical page 711 unsigned long node_spanned_pages; /* total size of physical page
705 range, including holes */ 712 range, including holes */
706 int node_id; 713 int node_id;
714 nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
707 wait_queue_head_t kswapd_wait; 715 wait_queue_head_t kswapd_wait;
708 wait_queue_head_t pfmemalloc_wait; 716 wait_queue_head_t pfmemalloc_wait;
709 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ 717 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 49a3031fda50..d36a8221f58b 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -2,17 +2,6 @@
2#define __INCLUDE_LINUX_OOM_H 2#define __INCLUDE_LINUX_OOM_H
3 3
4/* 4/*
5 * /proc/<pid>/oom_adj is deprecated, see
6 * Documentation/feature-removal-schedule.txt.
7 *
8 * /proc/<pid>/oom_adj set to -17 protects from the oom-killer
9 */
10#define OOM_DISABLE (-17)
11/* inclusive */
12#define OOM_ADJUST_MIN (-16)
13#define OOM_ADJUST_MAX 15
14
15/*
16 * /proc/<pid>/oom_score_adj set to OOM_SCORE_ADJ_MIN disables oom killing for 5 * /proc/<pid>/oom_score_adj set to OOM_SCORE_ADJ_MIN disables oom killing for
17 * pid. 6 * pid.
18 */ 7 */
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 105077aa7685..76a9539cfd3f 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -6,6 +6,10 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
6void set_pageblock_migratetype(struct page *page, int migratetype); 6void set_pageblock_migratetype(struct page *page, int migratetype);
7int move_freepages_block(struct zone *zone, struct page *page, 7int move_freepages_block(struct zone *zone, struct page *page,
8 int migratetype); 8 int migratetype);
9int move_freepages(struct zone *zone,
10 struct page *start_page, struct page *end_page,
11 int migratetype);
12
9/* 13/*
10 * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. 14 * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
11 * If specified range includes migrate types other than MOVABLE or CMA, 15 * If specified range includes migrate types other than MOVABLE or CMA,
@@ -37,6 +41,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
37 */ 41 */
38int set_migratetype_isolate(struct page *page); 42int set_migratetype_isolate(struct page *page);
39void unset_migratetype_isolate(struct page *page, unsigned migratetype); 43void unset_migratetype_isolate(struct page *page, unsigned migratetype);
40 44struct page *alloc_migrate_target(struct page *page, unsigned long private,
45 int **resultp);
41 46
42#endif 47#endif
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 19ef95d293ae..eed27f4f4c3e 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -30,6 +30,9 @@ enum pageblock_bits {
30 PB_migrate, 30 PB_migrate,
31 PB_migrate_end = PB_migrate + 3 - 1, 31 PB_migrate_end = PB_migrate + 3 - 1,
32 /* 3 bits required for migrate types */ 32 /* 3 bits required for migrate types */
33#ifdef CONFIG_COMPACTION
34 PB_migrate_skip,/* If set the block is skipped by compaction */
35#endif /* CONFIG_COMPACTION */
33 NR_PAGEBLOCK_BITS 36 NR_PAGEBLOCK_BITS
34}; 37};
35 38
@@ -65,10 +68,22 @@ unsigned long get_pageblock_flags_group(struct page *page,
65void set_pageblock_flags_group(struct page *page, unsigned long flags, 68void set_pageblock_flags_group(struct page *page, unsigned long flags,
66 int start_bitidx, int end_bitidx); 69 int start_bitidx, int end_bitidx);
67 70
71#ifdef CONFIG_COMPACTION
72#define get_pageblock_skip(page) \
73 get_pageblock_flags_group(page, PB_migrate_skip, \
74 PB_migrate_skip + 1)
75#define clear_pageblock_skip(page) \
76 set_pageblock_flags_group(page, 0, PB_migrate_skip, \
77 PB_migrate_skip + 1)
78#define set_pageblock_skip(page) \
79 set_pageblock_flags_group(page, 1, PB_migrate_skip, \
80 PB_migrate_skip + 1)
81#endif /* CONFIG_COMPACTION */
82
68#define get_pageblock_flags(page) \ 83#define get_pageblock_flags(page) \
69 get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) 84 get_pageblock_flags_group(page, 0, PB_migrate_end)
70#define set_pageblock_flags(page, flags) \ 85#define set_pageblock_flags(page, flags) \
71 set_pageblock_flags_group(page, flags, \ 86 set_pageblock_flags_group(page, flags, \
72 0, NR_PAGEBLOCK_BITS-1) 87 0, PB_migrate_end)
73 88
74#endif /* PAGEBLOCK_FLAGS_H */ 89#endif /* PAGEBLOCK_FLAGS_H */
diff --git a/include/linux/prio_tree.h b/include/linux/prio_tree.h
deleted file mode 100644
index db04abb557e0..000000000000
--- a/include/linux/prio_tree.h
+++ /dev/null
@@ -1,120 +0,0 @@
1#ifndef _LINUX_PRIO_TREE_H
2#define _LINUX_PRIO_TREE_H
3
4/*
5 * K&R 2nd ed. A8.3 somewhat obliquely hints that initial sequences of struct
6 * fields with identical types should end up at the same location. We'll use
7 * this until we can scrap struct raw_prio_tree_node.
8 *
9 * Note: all this could be done more elegantly by using unnamed union/struct
10 * fields. However, gcc 2.95.3 and apparently also gcc 3.0.4 don't support this
11 * language extension.
12 */
13
14struct raw_prio_tree_node {
15 struct prio_tree_node *left;
16 struct prio_tree_node *right;
17 struct prio_tree_node *parent;
18};
19
20struct prio_tree_node {
21 struct prio_tree_node *left;
22 struct prio_tree_node *right;
23 struct prio_tree_node *parent;
24 unsigned long start;
25 unsigned long last; /* last location _in_ interval */
26};
27
28struct prio_tree_root {
29 struct prio_tree_node *prio_tree_node;
30 unsigned short index_bits;
31 unsigned short raw;
32 /*
33 * 0: nodes are of type struct prio_tree_node
34 * 1: nodes are of type raw_prio_tree_node
35 */
36};
37
38struct prio_tree_iter {
39 struct prio_tree_node *cur;
40 unsigned long mask;
41 unsigned long value;
42 int size_level;
43
44 struct prio_tree_root *root;
45 pgoff_t r_index;
46 pgoff_t h_index;
47};
48
49static inline void prio_tree_iter_init(struct prio_tree_iter *iter,
50 struct prio_tree_root *root, pgoff_t r_index, pgoff_t h_index)
51{
52 iter->root = root;
53 iter->r_index = r_index;
54 iter->h_index = h_index;
55 iter->cur = NULL;
56}
57
58#define __INIT_PRIO_TREE_ROOT(ptr, _raw) \
59do { \
60 (ptr)->prio_tree_node = NULL; \
61 (ptr)->index_bits = 1; \
62 (ptr)->raw = (_raw); \
63} while (0)
64
65#define INIT_PRIO_TREE_ROOT(ptr) __INIT_PRIO_TREE_ROOT(ptr, 0)
66#define INIT_RAW_PRIO_TREE_ROOT(ptr) __INIT_PRIO_TREE_ROOT(ptr, 1)
67
68#define INIT_PRIO_TREE_NODE(ptr) \
69do { \
70 (ptr)->left = (ptr)->right = (ptr)->parent = (ptr); \
71} while (0)
72
73#define INIT_PRIO_TREE_ITER(ptr) \
74do { \
75 (ptr)->cur = NULL; \
76 (ptr)->mask = 0UL; \
77 (ptr)->value = 0UL; \
78 (ptr)->size_level = 0; \
79} while (0)
80
81#define prio_tree_entry(ptr, type, member) \
82 ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
83
84static inline int prio_tree_empty(const struct prio_tree_root *root)
85{
86 return root->prio_tree_node == NULL;
87}
88
89static inline int prio_tree_root(const struct prio_tree_node *node)
90{
91 return node->parent == node;
92}
93
94static inline int prio_tree_left_empty(const struct prio_tree_node *node)
95{
96 return node->left == node;
97}
98
99static inline int prio_tree_right_empty(const struct prio_tree_node *node)
100{
101 return node->right == node;
102}
103
104
105struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
106 struct prio_tree_node *old, struct prio_tree_node *node);
107struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
108 struct prio_tree_node *node);
109void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node);
110struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter);
111
112#define raw_prio_tree_replace(root, old, node) \
113 prio_tree_replace(root, (struct prio_tree_node *) (old), \
114 (struct prio_tree_node *) (node))
115#define raw_prio_tree_insert(root, node) \
116 prio_tree_insert(root, (struct prio_tree_node *) (node))
117#define raw_prio_tree_remove(root, node) \
118 prio_tree_remove(root, (struct prio_tree_node *) (node))
119
120#endif /* _LINUX_PRIO_TREE_H */
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 033b507b33b1..0022c1bb1e26 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -23,72 +23,7 @@
23 I know it's not the cleaner way, but in C (not in C++) to get 23 I know it's not the cleaner way, but in C (not in C++) to get
24 performances and genericity... 24 performances and genericity...
25 25
26 Some example of insert and search follows here. The search is a plain 26 See Documentation/rbtree.txt for documentation and samples.
27 normal search over an ordered tree. The insert instead must be implemented
28 in two steps: First, the code must insert the element in order as a red leaf
29 in the tree, and then the support library function rb_insert_color() must
30 be called. Such function will do the not trivial work to rebalance the
31 rbtree, if necessary.
32
33-----------------------------------------------------------------------
34static inline struct page * rb_search_page_cache(struct inode * inode,
35 unsigned long offset)
36{
37 struct rb_node * n = inode->i_rb_page_cache.rb_node;
38 struct page * page;
39
40 while (n)
41 {
42 page = rb_entry(n, struct page, rb_page_cache);
43
44 if (offset < page->offset)
45 n = n->rb_left;
46 else if (offset > page->offset)
47 n = n->rb_right;
48 else
49 return page;
50 }
51 return NULL;
52}
53
54static inline struct page * __rb_insert_page_cache(struct inode * inode,
55 unsigned long offset,
56 struct rb_node * node)
57{
58 struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
59 struct rb_node * parent = NULL;
60 struct page * page;
61
62 while (*p)
63 {
64 parent = *p;
65 page = rb_entry(parent, struct page, rb_page_cache);
66
67 if (offset < page->offset)
68 p = &(*p)->rb_left;
69 else if (offset > page->offset)
70 p = &(*p)->rb_right;
71 else
72 return page;
73 }
74
75 rb_link_node(node, parent, p);
76
77 return NULL;
78}
79
80static inline struct page * rb_insert_page_cache(struct inode * inode,
81 unsigned long offset,
82 struct rb_node * node)
83{
84 struct page * ret;
85 if ((ret = __rb_insert_page_cache(inode, offset, node)))
86 goto out;
87 rb_insert_color(node, &inode->i_rb_page_cache);
88 out:
89 return ret;
90}
91-----------------------------------------------------------------------
92*/ 27*/
93 28
94#ifndef _LINUX_RBTREE_H 29#ifndef _LINUX_RBTREE_H
@@ -97,63 +32,35 @@ static inline struct page * rb_insert_page_cache(struct inode * inode,
97#include <linux/kernel.h> 32#include <linux/kernel.h>
98#include <linux/stddef.h> 33#include <linux/stddef.h>
99 34
100struct rb_node 35struct rb_node {
101{ 36 unsigned long __rb_parent_color;
102 unsigned long rb_parent_color;
103#define RB_RED 0
104#define RB_BLACK 1
105 struct rb_node *rb_right; 37 struct rb_node *rb_right;
106 struct rb_node *rb_left; 38 struct rb_node *rb_left;
107} __attribute__((aligned(sizeof(long)))); 39} __attribute__((aligned(sizeof(long))));
108 /* The alignment might seem pointless, but allegedly CRIS needs it */ 40 /* The alignment might seem pointless, but allegedly CRIS needs it */
109 41
110struct rb_root 42struct rb_root {
111{
112 struct rb_node *rb_node; 43 struct rb_node *rb_node;
113}; 44};
114 45
115 46
116#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) 47#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
117#define rb_color(r) ((r)->rb_parent_color & 1)
118#define rb_is_red(r) (!rb_color(r))
119#define rb_is_black(r) rb_color(r)
120#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0)
121#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0)
122
123static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
124{
125 rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
126}
127static inline void rb_set_color(struct rb_node *rb, int color)
128{
129 rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
130}
131 48
132#define RB_ROOT (struct rb_root) { NULL, } 49#define RB_ROOT (struct rb_root) { NULL, }
133#define rb_entry(ptr, type, member) container_of(ptr, type, member) 50#define rb_entry(ptr, type, member) container_of(ptr, type, member)
134 51
135#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) 52#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
136#define RB_EMPTY_NODE(node) (rb_parent(node) == node) 53
137#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) 54/* 'empty' nodes are nodes that are known not to be inserted in an rbree */
55#define RB_EMPTY_NODE(node) \
56 ((node)->__rb_parent_color == (unsigned long)(node))
57#define RB_CLEAR_NODE(node) \
58 ((node)->__rb_parent_color = (unsigned long)(node))
138 59
139static inline void rb_init_node(struct rb_node *rb)
140{
141 rb->rb_parent_color = 0;
142 rb->rb_right = NULL;
143 rb->rb_left = NULL;
144 RB_CLEAR_NODE(rb);
145}
146 60
147extern void rb_insert_color(struct rb_node *, struct rb_root *); 61extern void rb_insert_color(struct rb_node *, struct rb_root *);
148extern void rb_erase(struct rb_node *, struct rb_root *); 62extern void rb_erase(struct rb_node *, struct rb_root *);
149 63
150typedef void (*rb_augment_f)(struct rb_node *node, void *data);
151
152extern void rb_augment_insert(struct rb_node *node,
153 rb_augment_f func, void *data);
154extern struct rb_node *rb_augment_erase_begin(struct rb_node *node);
155extern void rb_augment_erase_end(struct rb_node *node,
156 rb_augment_f func, void *data);
157 64
158/* Find logical next and previous nodes in a tree */ 65/* Find logical next and previous nodes in a tree */
159extern struct rb_node *rb_next(const struct rb_node *); 66extern struct rb_node *rb_next(const struct rb_node *);
@@ -168,7 +75,7 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
168static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, 75static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
169 struct rb_node ** rb_link) 76 struct rb_node ** rb_link)
170{ 77{
171 node->rb_parent_color = (unsigned long )parent; 78 node->__rb_parent_color = (unsigned long)parent;
172 node->rb_left = node->rb_right = NULL; 79 node->rb_left = node->rb_right = NULL;
173 80
174 *rb_link = node; 81 *rb_link = node;
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
new file mode 100644
index 000000000000..214caa33433b
--- /dev/null
+++ b/include/linux/rbtree_augmented.h
@@ -0,0 +1,223 @@
1/*
2 Red Black Trees
3 (C) 1999 Andrea Arcangeli <andrea@suse.de>
4 (C) 2002 David Woodhouse <dwmw2@infradead.org>
5 (C) 2012 Michel Lespinasse <walken@google.com>
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 linux/include/linux/rbtree_augmented.h
22*/
23
24#ifndef _LINUX_RBTREE_AUGMENTED_H
25#define _LINUX_RBTREE_AUGMENTED_H
26
27#include <linux/rbtree.h>
28
29/*
30 * Please note - only struct rb_augment_callbacks and the prototypes for
31 * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
32 * The rest are implementation details you are not expected to depend on.
33 *
34 * See Documentation/rbtree.txt for documentation and samples.
35 */
36
37struct rb_augment_callbacks {
38 void (*propagate)(struct rb_node *node, struct rb_node *stop);
39 void (*copy)(struct rb_node *old, struct rb_node *new);
40 void (*rotate)(struct rb_node *old, struct rb_node *new);
41};
42
43extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
44 void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
45static inline void
46rb_insert_augmented(struct rb_node *node, struct rb_root *root,
47 const struct rb_augment_callbacks *augment)
48{
49 __rb_insert_augmented(node, root, augment->rotate);
50}
51
52#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
53 rbtype, rbaugmented, rbcompute) \
54static inline void \
55rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \
56{ \
57 while (rb != stop) { \
58 rbstruct *node = rb_entry(rb, rbstruct, rbfield); \
59 rbtype augmented = rbcompute(node); \
60 if (node->rbaugmented == augmented) \
61 break; \
62 node->rbaugmented = augmented; \
63 rb = rb_parent(&node->rbfield); \
64 } \
65} \
66static inline void \
67rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
68{ \
69 rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
70 rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
71 new->rbaugmented = old->rbaugmented; \
72} \
73static void \
74rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
75{ \
76 rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
77 rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
78 new->rbaugmented = old->rbaugmented; \
79 old->rbaugmented = rbcompute(old); \
80} \
81rbstatic const struct rb_augment_callbacks rbname = { \
82 rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
83};
84
85
86#define RB_RED 0
87#define RB_BLACK 1
88
89#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
90
91#define __rb_color(pc) ((pc) & 1)
92#define __rb_is_black(pc) __rb_color(pc)
93#define __rb_is_red(pc) (!__rb_color(pc))
94#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
95#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
96#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
97
98static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
99{
100 rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
101}
102
103static inline void rb_set_parent_color(struct rb_node *rb,
104 struct rb_node *p, int color)
105{
106 rb->__rb_parent_color = (unsigned long)p | color;
107}
108
109static inline void
110__rb_change_child(struct rb_node *old, struct rb_node *new,
111 struct rb_node *parent, struct rb_root *root)
112{
113 if (parent) {
114 if (parent->rb_left == old)
115 parent->rb_left = new;
116 else
117 parent->rb_right = new;
118 } else
119 root->rb_node = new;
120}
121
122extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
123 void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
124
125static __always_inline void
126rb_erase_augmented(struct rb_node *node, struct rb_root *root,
127 const struct rb_augment_callbacks *augment)
128{
129 struct rb_node *child = node->rb_right, *tmp = node->rb_left;
130 struct rb_node *parent, *rebalance;
131 unsigned long pc;
132
133 if (!tmp) {
134 /*
135 * Case 1: node to erase has no more than 1 child (easy!)
136 *
137 * Note that if there is one child it must be red due to 5)
138 * and node must be black due to 4). We adjust colors locally
139 * so as to bypass __rb_erase_color() later on.
140 */
141 pc = node->__rb_parent_color;
142 parent = __rb_parent(pc);
143 __rb_change_child(node, child, parent, root);
144 if (child) {
145 child->__rb_parent_color = pc;
146 rebalance = NULL;
147 } else
148 rebalance = __rb_is_black(pc) ? parent : NULL;
149 tmp = parent;
150 } else if (!child) {
151 /* Still case 1, but this time the child is node->rb_left */
152 tmp->__rb_parent_color = pc = node->__rb_parent_color;
153 parent = __rb_parent(pc);
154 __rb_change_child(node, tmp, parent, root);
155 rebalance = NULL;
156 tmp = parent;
157 } else {
158 struct rb_node *successor = child, *child2;
159 tmp = child->rb_left;
160 if (!tmp) {
161 /*
162 * Case 2: node's successor is its right child
163 *
164 * (n) (s)
165 * / \ / \
166 * (x) (s) -> (x) (c)
167 * \
168 * (c)
169 */
170 parent = successor;
171 child2 = successor->rb_right;
172 augment->copy(node, successor);
173 } else {
174 /*
175 * Case 3: node's successor is leftmost under
176 * node's right child subtree
177 *
178 * (n) (s)
179 * / \ / \
180 * (x) (y) -> (x) (y)
181 * / /
182 * (p) (p)
183 * / /
184 * (s) (c)
185 * \
186 * (c)
187 */
188 do {
189 parent = successor;
190 successor = tmp;
191 tmp = tmp->rb_left;
192 } while (tmp);
193 parent->rb_left = child2 = successor->rb_right;
194 successor->rb_right = child;
195 rb_set_parent(child, successor);
196 augment->copy(node, successor);
197 augment->propagate(parent, successor);
198 }
199
200 successor->rb_left = tmp = node->rb_left;
201 rb_set_parent(tmp, successor);
202
203 pc = node->__rb_parent_color;
204 tmp = __rb_parent(pc);
205 __rb_change_child(node, successor, tmp, root);
206 if (child2) {
207 successor->__rb_parent_color = pc;
208 rb_set_parent_color(child2, parent, RB_BLACK);
209 rebalance = NULL;
210 } else {
211 unsigned long pc2 = successor->__rb_parent_color;
212 successor->__rb_parent_color = pc;
213 rebalance = __rb_is_black(pc2) ? parent : NULL;
214 }
215 tmp = successor;
216 }
217
218 augment->propagate(tmp, NULL);
219 if (rebalance)
220 __rb_erase_color(rebalance, root, augment->rotate);
221}
222
223#endif /* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3fce545df394..bfe1f4780644 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,14 +37,14 @@ struct anon_vma {
37 atomic_t refcount; 37 atomic_t refcount;
38 38
39 /* 39 /*
40 * NOTE: the LSB of the head.next is set by 40 * NOTE: the LSB of the rb_root.rb_node is set by
41 * mm_take_all_locks() _after_ taking the above lock. So the 41 * mm_take_all_locks() _after_ taking the above lock. So the
42 * head must only be read/written after taking the above lock 42 * rb_root must only be read/written after taking the above lock
43 * to be sure to see a valid next pointer. The LSB bit itself 43 * to be sure to see a valid next pointer. The LSB bit itself
44 * is serialized by a system wide lock only visible to 44 * is serialized by a system wide lock only visible to
45 * mm_take_all_locks() (mm_all_locks_mutex). 45 * mm_take_all_locks() (mm_all_locks_mutex).
46 */ 46 */
47 struct list_head head; /* Chain of private "related" vmas */ 47 struct rb_root rb_root; /* Interval tree of private "related" vmas */
48}; 48};
49 49
50/* 50/*
@@ -57,14 +57,29 @@ struct anon_vma {
57 * with a VMA, or the VMAs associated with an anon_vma. 57 * with a VMA, or the VMAs associated with an anon_vma.
58 * The "same_vma" list contains the anon_vma_chains linking 58 * The "same_vma" list contains the anon_vma_chains linking
59 * all the anon_vmas associated with this VMA. 59 * all the anon_vmas associated with this VMA.
60 * The "same_anon_vma" list contains the anon_vma_chains 60 * The "rb" field indexes on an interval tree the anon_vma_chains
61 * which link all the VMAs associated with this anon_vma. 61 * which link all the VMAs associated with this anon_vma.
62 */ 62 */
63struct anon_vma_chain { 63struct anon_vma_chain {
64 struct vm_area_struct *vma; 64 struct vm_area_struct *vma;
65 struct anon_vma *anon_vma; 65 struct anon_vma *anon_vma;
66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ 66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
67 struct list_head same_anon_vma; /* locked by anon_vma->mutex */ 67 struct rb_node rb; /* locked by anon_vma->mutex */
68 unsigned long rb_subtree_last;
69#ifdef CONFIG_DEBUG_VM_RB
70 unsigned long cached_vma_start, cached_vma_last;
71#endif
72};
73
74enum ttu_flags {
75 TTU_UNMAP = 0, /* unmap mode */
76 TTU_MIGRATION = 1, /* migration mode */
77 TTU_MUNLOCK = 2, /* munlock mode */
78 TTU_ACTION_MASK = 0xff,
79
80 TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
81 TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
82 TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
68}; 83};
69 84
70#ifdef CONFIG_MMU 85#ifdef CONFIG_MMU
@@ -120,7 +135,6 @@ void anon_vma_init(void); /* create anon_vma_cachep */
120int anon_vma_prepare(struct vm_area_struct *); 135int anon_vma_prepare(struct vm_area_struct *);
121void unlink_anon_vmas(struct vm_area_struct *); 136void unlink_anon_vmas(struct vm_area_struct *);
122int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); 137int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
123void anon_vma_moveto_tail(struct vm_area_struct *);
124int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); 138int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
125 139
126static inline void anon_vma_merge(struct vm_area_struct *vma, 140static inline void anon_vma_merge(struct vm_area_struct *vma,
@@ -161,16 +175,6 @@ int page_referenced(struct page *, int is_locked,
161int page_referenced_one(struct page *, struct vm_area_struct *, 175int page_referenced_one(struct page *, struct vm_area_struct *,
162 unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); 176 unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
163 177
164enum ttu_flags {
165 TTU_UNMAP = 0, /* unmap mode */
166 TTU_MIGRATION = 1, /* migration mode */
167 TTU_MUNLOCK = 2, /* munlock mode */
168 TTU_ACTION_MASK = 0xff,
169
170 TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
171 TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
172 TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
173};
174#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 178#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
175 179
176int try_to_unmap(struct page *, enum ttu_flags flags); 180int try_to_unmap(struct page *, enum ttu_flags flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c5612f0374b..c2070e92a9d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -671,7 +671,6 @@ struct signal_struct {
671 struct rw_semaphore group_rwsem; 671 struct rw_semaphore group_rwsem;
672#endif 672#endif
673 673
674 int oom_adj; /* OOM kill score adjustment (bit shift) */
675 int oom_score_adj; /* OOM kill score adjustment */ 674 int oom_score_adj; /* OOM kill score adjustment */
676 int oom_score_adj_min; /* OOM kill score adjustment minimum value. 675 int oom_score_adj_min; /* OOM kill score adjustment minimum value.
677 * Only settable by CAP_SYS_RESOURCE. */ 676 * Only settable by CAP_SYS_RESOURCE. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 388e70601413..68df9c17fbbb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -281,7 +281,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
281} 281}
282#endif 282#endif
283 283
284extern int page_evictable(struct page *page, struct vm_area_struct *vma); 284extern int page_evictable(struct page *page);
285extern void check_move_unevictable_pages(struct page **, int nr_pages); 285extern void check_move_unevictable_pages(struct page **, int nr_pages);
286 286
287extern unsigned long scan_unevictable_pages; 287extern unsigned long scan_unevictable_pages;
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 5088727478fd..a520fd70a59f 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
39 39
40static inline void timerqueue_init(struct timerqueue_node *node) 40static inline void timerqueue_init(struct timerqueue_node *node)
41{ 41{
42 rb_init_node(&node->node); 42 RB_CLEAR_NODE(&node->node);
43} 43}
44 44
45static inline void timerqueue_init_head(struct timerqueue_head *head) 45static inline void timerqueue_init_head(struct timerqueue_head *head)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 57f7b1091511..3d3114594370 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -52,7 +52,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
52 UNEVICTABLE_PGMUNLOCKED, 52 UNEVICTABLE_PGMUNLOCKED,
53 UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ 53 UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
54 UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ 54 UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
55 UNEVICTABLE_MLOCKFREED,
56#ifdef CONFIG_TRANSPARENT_HUGEPAGE 55#ifdef CONFIG_TRANSPARENT_HUGEPAGE
57 THP_FAULT_ALLOC, 56 THP_FAULT_ALLOC,
58 THP_FAULT_FALLBACK, 57 THP_FAULT_FALLBACK,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ad2cfd53dadc..92a86b2cce33 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -198,6 +198,8 @@ extern void __dec_zone_state(struct zone *, enum zone_stat_item);
198void refresh_cpu_vm_stats(int); 198void refresh_cpu_vm_stats(int);
199void refresh_zone_stat_thresholds(void); 199void refresh_zone_stat_thresholds(void);
200 200
201void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
202
201int calculate_pressure_threshold(struct zone *zone); 203int calculate_pressure_threshold(struct zone *zone);
202int calculate_normal_threshold(struct zone *zone); 204int calculate_normal_threshold(struct zone *zone);
203void set_pgdat_percpu_threshold(pg_data_t *pgdat, 205void set_pgdat_percpu_threshold(pg_data_t *pgdat,
@@ -251,8 +253,18 @@ static inline void __dec_zone_page_state(struct page *page,
251static inline void refresh_cpu_vm_stats(int cpu) { } 253static inline void refresh_cpu_vm_stats(int cpu) { }
252static inline void refresh_zone_stat_thresholds(void) { } 254static inline void refresh_zone_stat_thresholds(void) { }
253 255
256static inline void drain_zonestat(struct zone *zone,
257 struct per_cpu_pageset *pset) { }
254#endif /* CONFIG_SMP */ 258#endif /* CONFIG_SMP */
255 259
260static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
261 int migratetype)
262{
263 __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
264 if (is_migrate_cma(migratetype))
265 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
266}
267
256extern const char * const vmstat_text[]; 268extern const char * const vmstat_text[];
257 269
258#endif /* _LINUX_VMSTAT_H */ 270#endif /* _LINUX_VMSTAT_H */