aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bootmem.h153
-rw-r--r--include/linux/compaction.h16
-rw-r--r--include/linux/dma-debug.h6
-rw-r--r--include/linux/fsnotify_backend.h118
-rw-r--r--include/linux/huge_mm.h23
-rw-r--r--include/linux/hugetlb.h7
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/ksm.h15
-rw-r--r--include/linux/memblock.h54
-rw-r--r--include/linux/mempolicy.h32
-rw-r--r--include/linux/migrate.h6
-rw-r--r--include/linux/mm.h70
-rw-r--r--include/linux/mman.h1
-rw-r--r--include/linux/mmzone.h11
-rw-r--r--include/linux/posix_acl.h78
-rw-r--r--include/linux/rmap.h27
-rw-r--r--include/linux/sched.h12
-rw-r--r--include/trace/events/compaction.h42
-rw-r--r--include/trace/events/migrate.h26
-rw-r--r--include/trace/events/sched.h87
20 files changed, 543 insertions, 243 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f1f07d31a3af..2fae55def608 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -5,6 +5,7 @@
5#define _LINUX_BOOTMEM_H 5#define _LINUX_BOOTMEM_H
6 6
7#include <linux/mmzone.h> 7#include <linux/mmzone.h>
8#include <linux/mm_types.h>
8#include <asm/dma.h> 9#include <asm/dma.h>
9 10
10/* 11/*
@@ -52,7 +53,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
52 unsigned long size); 53 unsigned long size);
53extern void free_bootmem(unsigned long physaddr, unsigned long size); 54extern void free_bootmem(unsigned long physaddr, unsigned long size);
54extern void free_bootmem_late(unsigned long physaddr, unsigned long size); 55extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
55extern void __free_pages_bootmem(struct page *page, unsigned int order);
56 56
57/* 57/*
58 * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, 58 * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
@@ -142,6 +142,157 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
142#define alloc_bootmem_low_pages_node(pgdat, x) \ 142#define alloc_bootmem_low_pages_node(pgdat, x) \
143 __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) 143 __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
144 144
145
146#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
147
148/* FIXME: use MEMBLOCK_ALLOC_* variants here */
149#define BOOTMEM_ALLOC_ACCESSIBLE 0
150#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
151
152/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
153void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
154 phys_addr_t align, phys_addr_t min_addr,
155 phys_addr_t max_addr, int nid);
156void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
157 phys_addr_t min_addr, phys_addr_t max_addr, int nid);
158void __memblock_free_early(phys_addr_t base, phys_addr_t size);
159void __memblock_free_late(phys_addr_t base, phys_addr_t size);
160
161static inline void * __init memblock_virt_alloc(
162 phys_addr_t size, phys_addr_t align)
163{
164 return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
165 BOOTMEM_ALLOC_ACCESSIBLE,
166 NUMA_NO_NODE);
167}
168
169static inline void * __init memblock_virt_alloc_nopanic(
170 phys_addr_t size, phys_addr_t align)
171{
172 return memblock_virt_alloc_try_nid_nopanic(size, align,
173 BOOTMEM_LOW_LIMIT,
174 BOOTMEM_ALLOC_ACCESSIBLE,
175 NUMA_NO_NODE);
176}
177
178static inline void * __init memblock_virt_alloc_from_nopanic(
179 phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
180{
181 return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
182 BOOTMEM_ALLOC_ACCESSIBLE,
183 NUMA_NO_NODE);
184}
185
186static inline void * __init memblock_virt_alloc_node(
187 phys_addr_t size, int nid)
188{
189 return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
190 BOOTMEM_ALLOC_ACCESSIBLE, nid);
191}
192
193static inline void * __init memblock_virt_alloc_node_nopanic(
194 phys_addr_t size, int nid)
195{
196 return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
197 BOOTMEM_ALLOC_ACCESSIBLE,
198 nid);
199}
200
201static inline void __init memblock_free_early(
202 phys_addr_t base, phys_addr_t size)
203{
204 __memblock_free_early(base, size);
205}
206
207static inline void __init memblock_free_early_nid(
208 phys_addr_t base, phys_addr_t size, int nid)
209{
210 __memblock_free_early(base, size);
211}
212
213static inline void __init memblock_free_late(
214 phys_addr_t base, phys_addr_t size)
215{
216 __memblock_free_late(base, size);
217}
218
219#else
220
221#define BOOTMEM_ALLOC_ACCESSIBLE 0
222
223
224/* Fall back to all the existing bootmem APIs */
225static inline void * __init memblock_virt_alloc(
226 phys_addr_t size, phys_addr_t align)
227{
228 if (!align)
229 align = SMP_CACHE_BYTES;
230 return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
231}
232
233static inline void * __init memblock_virt_alloc_nopanic(
234 phys_addr_t size, phys_addr_t align)
235{
236 if (!align)
237 align = SMP_CACHE_BYTES;
238 return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
239}
240
241static inline void * __init memblock_virt_alloc_from_nopanic(
242 phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
243{
244 return __alloc_bootmem_nopanic(size, align, min_addr);
245}
246
247static inline void * __init memblock_virt_alloc_node(
248 phys_addr_t size, int nid)
249{
250 return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
251 BOOTMEM_LOW_LIMIT);
252}
253
254static inline void * __init memblock_virt_alloc_node_nopanic(
255 phys_addr_t size, int nid)
256{
257 return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
258 SMP_CACHE_BYTES,
259 BOOTMEM_LOW_LIMIT);
260}
261
262static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
263 phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
264{
265 return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
266 min_addr);
267}
268
269static inline void * __init memblock_virt_alloc_try_nid_nopanic(
270 phys_addr_t size, phys_addr_t align,
271 phys_addr_t min_addr, phys_addr_t max_addr, int nid)
272{
273 return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
274 min_addr, max_addr);
275}
276
277static inline void __init memblock_free_early(
278 phys_addr_t base, phys_addr_t size)
279{
280 free_bootmem(base, size);
281}
282
283static inline void __init memblock_free_early_nid(
284 phys_addr_t base, phys_addr_t size, int nid)
285{
286 free_bootmem_node(NODE_DATA(nid), base, size);
287}
288
289static inline void __init memblock_free_late(
290 phys_addr_t base, phys_addr_t size)
291{
292 free_bootmem_late(base, size);
293}
294#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
295
145#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP 296#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
146extern void *alloc_remap(int nid, unsigned long size); 297extern void *alloc_remap(int nid, unsigned long size);
147#else 298#else
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 091d72e70d8a..7e1c76e3cd68 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order)
62 return zone->compact_considered < defer_limit; 62 return zone->compact_considered < defer_limit;
63} 63}
64 64
65/*
66 * Update defer tracking counters after successful compaction of given order,
67 * which means an allocation either succeeded (alloc_success == true) or is
68 * expected to succeed.
69 */
70static inline void compaction_defer_reset(struct zone *zone, int order,
71 bool alloc_success)
72{
73 if (alloc_success) {
74 zone->compact_considered = 0;
75 zone->compact_defer_shift = 0;
76 }
77 if (order >= zone->compact_order_failed)
78 zone->compact_order_failed = order + 1;
79}
80
65/* Returns true if restarting compaction after many failures */ 81/* Returns true if restarting compaction after many failures */
66static inline bool compaction_restarting(struct zone *zone, int order) 82static inline bool compaction_restarting(struct zone *zone, int order)
67{ 83{
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index fc0e34ce038f..fe8cb610deac 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
85 85
86extern void debug_dma_dump_mappings(struct device *dev); 86extern void debug_dma_dump_mappings(struct device *dev);
87 87
88extern void debug_dma_assert_idle(struct page *page);
89
88#else /* CONFIG_DMA_API_DEBUG */ 90#else /* CONFIG_DMA_API_DEBUG */
89 91
90static inline void dma_debug_add_bus(struct bus_type *bus) 92static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
183{ 185{
184} 186}
185 187
188static inline void debug_dma_assert_idle(struct page *page)
189{
190}
191
186#endif /* CONFIG_DMA_API_DEBUG */ 192#endif /* CONFIG_DMA_API_DEBUG */
187 193
188#endif /* __DMA_DEBUG_H */ 194#endif /* __DMA_DEBUG_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 4b2ee8d12f5e..7d8d5e608594 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -15,7 +15,6 @@
15#include <linux/path.h> /* struct path */ 15#include <linux/path.h> /* struct path */
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/types.h> 17#include <linux/types.h>
18
19#include <linux/atomic.h> 18#include <linux/atomic.h>
20 19
21/* 20/*
@@ -79,6 +78,7 @@ struct fsnotify_group;
79struct fsnotify_event; 78struct fsnotify_event;
80struct fsnotify_mark; 79struct fsnotify_mark;
81struct fsnotify_event_private_data; 80struct fsnotify_event_private_data;
81struct fsnotify_fname;
82 82
83/* 83/*
84 * Each group much define these ops. The fsnotify infrastructure will call 84 * Each group much define these ops. The fsnotify infrastructure will call
@@ -94,17 +94,27 @@ struct fsnotify_event_private_data;
94 * userspace messages that marks have been removed. 94 * userspace messages that marks have been removed.
95 */ 95 */
96struct fsnotify_ops { 96struct fsnotify_ops {
97 bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
98 struct fsnotify_mark *inode_mark,
99 struct fsnotify_mark *vfsmount_mark,
100 __u32 mask, void *data, int data_type);
101 int (*handle_event)(struct fsnotify_group *group, 97 int (*handle_event)(struct fsnotify_group *group,
98 struct inode *inode,
102 struct fsnotify_mark *inode_mark, 99 struct fsnotify_mark *inode_mark,
103 struct fsnotify_mark *vfsmount_mark, 100 struct fsnotify_mark *vfsmount_mark,
104 struct fsnotify_event *event); 101 u32 mask, void *data, int data_type,
102 const unsigned char *file_name);
105 void (*free_group_priv)(struct fsnotify_group *group); 103 void (*free_group_priv)(struct fsnotify_group *group);
106 void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); 104 void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
107 void (*free_event_priv)(struct fsnotify_event_private_data *priv); 105 void (*free_event)(struct fsnotify_event *event);
106};
107
108/*
109 * all of the information about the original object we want to now send to
110 * a group. If you want to carry more info from the accessing task to the
111 * listener this structure is where you need to be adding fields.
112 */
113struct fsnotify_event {
114 struct list_head list;
115 /* inode may ONLY be dereferenced during handle_event(). */
116 struct inode *inode; /* either the inode the event happened to or its parent */
117 u32 mask; /* the type of access, bitwise OR for FS_* event types */
108}; 118};
109 119
110/* 120/*
@@ -148,7 +158,11 @@ struct fsnotify_group {
148 * a group */ 158 * a group */
149 struct list_head marks_list; /* all inode marks for this group */ 159 struct list_head marks_list; /* all inode marks for this group */
150 160
151 struct fasync_struct *fsn_fa; /* async notification */ 161 struct fasync_struct *fsn_fa; /* async notification */
162
163 struct fsnotify_event overflow_event; /* Event we queue when the
164 * notification list is too
165 * full */
152 166
153 /* groups can define private fields here or use the void *private */ 167 /* groups can define private fields here or use the void *private */
154 union { 168 union {
@@ -177,76 +191,10 @@ struct fsnotify_group {
177 }; 191 };
178}; 192};
179 193
180/*
181 * A single event can be queued in multiple group->notification_lists.
182 *
183 * each group->notification_list will point to an event_holder which in turns points
184 * to the actual event that needs to be sent to userspace.
185 *
186 * Seemed cheaper to create a refcnt'd event and a small holder for every group
187 * than create a different event for every group
188 *
189 */
190struct fsnotify_event_holder {
191 struct fsnotify_event *event;
192 struct list_head event_list;
193};
194
195/*
196 * Inotify needs to tack data onto an event. This struct lets us later find the
197 * correct private data of the correct group.
198 */
199struct fsnotify_event_private_data {
200 struct fsnotify_group *group;
201 struct list_head event_list;
202};
203
204/*
205 * all of the information about the original object we want to now send to
206 * a group. If you want to carry more info from the accessing task to the
207 * listener this structure is where you need to be adding fields.
208 */
209struct fsnotify_event {
210 /*
211 * If we create an event we are also likely going to need a holder
212 * to link to a group. So embed one holder in the event. Means only
213 * one allocation for the common case where we only have one group
214 */
215 struct fsnotify_event_holder holder;
216 spinlock_t lock; /* protection for the associated event_holder and private_list */
217 /* to_tell may ONLY be dereferenced during handle_event(). */
218 struct inode *to_tell; /* either the inode the event happened to or its parent */
219 /*
220 * depending on the event type we should have either a path or inode
221 * We hold a reference on path, but NOT on inode. Since we have the ref on
222 * the path, it may be dereferenced at any point during this object's
223 * lifetime. That reference is dropped when this object's refcnt hits
224 * 0. If this event contains an inode instead of a path, the inode may
225 * ONLY be used during handle_event().
226 */
227 union {
228 struct path path;
229 struct inode *inode;
230 };
231/* when calling fsnotify tell it if the data is a path or inode */ 194/* when calling fsnotify tell it if the data is a path or inode */
232#define FSNOTIFY_EVENT_NONE 0 195#define FSNOTIFY_EVENT_NONE 0
233#define FSNOTIFY_EVENT_PATH 1 196#define FSNOTIFY_EVENT_PATH 1
234#define FSNOTIFY_EVENT_INODE 2 197#define FSNOTIFY_EVENT_INODE 2
235 int data_type; /* which of the above union we have */
236 atomic_t refcnt; /* how many groups still are using/need to send this event */
237 __u32 mask; /* the type of access, bitwise OR for FS_* event types */
238
239 u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
240 const unsigned char *file_name;
241 size_t name_len;
242 struct pid *tgid;
243
244#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
245 __u32 response; /* userspace answer to question */
246#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
247
248 struct list_head private_data_list; /* groups can store private data here */
249};
250 198
251/* 199/*
252 * Inode specific fields in an fsnotify_mark 200 * Inode specific fields in an fsnotify_mark
@@ -370,17 +318,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
370extern void fsnotify_destroy_group(struct fsnotify_group *group); 318extern void fsnotify_destroy_group(struct fsnotify_group *group);
371/* fasync handler function */ 319/* fasync handler function */
372extern int fsnotify_fasync(int fd, struct file *file, int on); 320extern int fsnotify_fasync(int fd, struct file *file, int on);
373/* take a reference to an event */ 321/* Free event from memory */
374extern void fsnotify_get_event(struct fsnotify_event *event); 322extern void fsnotify_destroy_event(struct fsnotify_group *group,
375extern void fsnotify_put_event(struct fsnotify_event *event); 323 struct fsnotify_event *event);
376/* find private data previously attached to an event and unlink it */
377extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
378 struct fsnotify_event *event);
379
380/* attach the event to the group notification queue */ 324/* attach the event to the group notification queue */
381extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, 325extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
382 struct fsnotify_event *event, 326 struct fsnotify_event *event,
383 struct fsnotify_event_private_data *priv,
384 struct fsnotify_event *(*merge)(struct list_head *, 327 struct fsnotify_event *(*merge)(struct list_head *,
385 struct fsnotify_event *)); 328 struct fsnotify_event *));
386/* true if the group notification queue is empty */ 329/* true if the group notification queue is empty */
@@ -430,15 +373,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
430extern void fsnotify_unmount_inodes(struct list_head *list); 373extern void fsnotify_unmount_inodes(struct list_head *list);
431 374
432/* put here because inotify does some weird stuff when destroying watches */ 375/* put here because inotify does some weird stuff when destroying watches */
433extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, 376extern void fsnotify_init_event(struct fsnotify_event *event,
434 void *data, int data_is, 377 struct inode *to_tell, u32 mask);
435 const unsigned char *name,
436 u32 cookie, gfp_t gfp);
437
438/* fanotify likes to change events after they are on lists... */
439extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
440extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
441 struct fsnotify_event *new_event);
442 378
443#else 379#else
444 380
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 91672e2deec3..db512014e061 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -157,6 +157,26 @@ static inline int hpage_nr_pages(struct page *page)
157 return HPAGE_PMD_NR; 157 return HPAGE_PMD_NR;
158 return 1; 158 return 1;
159} 159}
160/*
161 * compound_trans_head() should be used instead of compound_head(),
162 * whenever the "page" passed as parameter could be the tail of a
163 * transparent hugepage that could be undergoing a
164 * __split_huge_page_refcount(). The page structure layout often
165 * changes across releases and it makes extensive use of unions. So if
166 * the page structure layout will change in a way that
167 * page->first_page gets clobbered by __split_huge_page_refcount, the
168 * implementation making use of smp_rmb() will be required.
169 *
170 * Currently we define compound_trans_head as compound_head, because
171 * page->private is in the same union with page->first_page, and
172 * page->private isn't clobbered. However this also means we're
173 * currently leaving dirt into the page->private field of anonymous
174 * pages resulting from a THP split, instead of setting page->private
175 * to zero like for every other page that has PG_private not set. But
176 * anonymous pages don't use page->private so this is not a problem.
177 */
178#if 0
179/* This will be needed if page->private will be clobbered in split_huge_page */
160static inline struct page *compound_trans_head(struct page *page) 180static inline struct page *compound_trans_head(struct page *page)
161{ 181{
162 if (PageTail(page)) { 182 if (PageTail(page)) {
@@ -174,6 +194,9 @@ static inline struct page *compound_trans_head(struct page *page)
174 } 194 }
175 return page; 195 return page;
176} 196}
197#else
198#define compound_trans_head(page) compound_head(page)
199#endif
177 200
178extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 201extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
179 unsigned long addr, pmd_t pmd, pmd_t *pmdp); 202 unsigned long addr, pmd_t pmd, pmd_t *pmdp);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index bd7e98752222..d01cc972a1d9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
31void hugepage_put_subpool(struct hugepage_subpool *spool); 31void hugepage_put_subpool(struct hugepage_subpool *spool);
32 32
33int PageHuge(struct page *page); 33int PageHuge(struct page *page);
34int PageHeadHuge(struct page *page_head);
35 34
36void reset_vma_resv_huge_pages(struct vm_area_struct *vma); 35void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
37int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 36int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -104,11 +103,6 @@ static inline int PageHuge(struct page *page)
104 return 0; 103 return 0;
105} 104}
106 105
107static inline int PageHeadHuge(struct page *page_head)
108{
109 return 0;
110}
111
112static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) 106static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
113{ 107{
114} 108}
@@ -360,6 +354,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
360 354
361static inline struct hstate *page_hstate(struct page *page) 355static inline struct hstate *page_hstate(struct page *page)
362{ 356{
357 VM_BUG_ON(!PageHuge(page));
363 return size_to_hstate(PAGE_SIZE << compound_order(page)); 358 return size_to_hstate(PAGE_SIZE << compound_order(page));
364} 359}
365 360
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f0e52383a001..1516a8ff8f92 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -41,6 +41,7 @@ extern struct fs_struct init_fs;
41 41
42#define INIT_SIGNALS(sig) { \ 42#define INIT_SIGNALS(sig) { \
43 .nr_threads = 1, \ 43 .nr_threads = 1, \
44 .thread_head = LIST_HEAD_INIT(init_task.thread_node), \
44 .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ 45 .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
45 .shared_pending = { \ 46 .shared_pending = { \
46 .list = LIST_HEAD_INIT(sig.shared_pending.list), \ 47 .list = LIST_HEAD_INIT(sig.shared_pending.list), \
@@ -222,6 +223,7 @@ extern struct task_group root_task_group;
222 [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ 223 [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
223 }, \ 224 }, \
224 .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ 225 .thread_group = LIST_HEAD_INIT(tsk.thread_group), \
226 .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
225 INIT_IDS \ 227 INIT_IDS \
226 INIT_PERF_EVENTS(tsk) \ 228 INIT_PERF_EVENTS(tsk) \
227 INIT_TRACE_IRQFLAGS \ 229 INIT_TRACE_IRQFLAGS \
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 45c9b6a17bcb..3be6bb18562d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -73,11 +73,7 @@ static inline void set_page_stable_node(struct page *page,
73struct page *ksm_might_need_to_copy(struct page *page, 73struct page *ksm_might_need_to_copy(struct page *page,
74 struct vm_area_struct *vma, unsigned long address); 74 struct vm_area_struct *vma, unsigned long address);
75 75
76int page_referenced_ksm(struct page *page, 76int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
77 struct mem_cgroup *memcg, unsigned long *vm_flags);
78int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
79int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
80 struct vm_area_struct *, unsigned long, void *), void *arg);
81void ksm_migrate_page(struct page *newpage, struct page *oldpage); 77void ksm_migrate_page(struct page *newpage, struct page *oldpage);
82 78
83#else /* !CONFIG_KSM */ 79#else /* !CONFIG_KSM */
@@ -115,13 +111,8 @@ static inline int page_referenced_ksm(struct page *page,
115 return 0; 111 return 0;
116} 112}
117 113
118static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) 114static inline int rmap_walk_ksm(struct page *page,
119{ 115 struct rmap_walk_control *rwc)
120 return 0;
121}
122
123static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
124 struct vm_area_struct *, unsigned long, void *), void *arg)
125{ 116{
126 return 0; 117 return 0;
127} 118}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 77c60e52939d..cd0274bebd4c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -19,9 +19,13 @@
19 19
20#define INIT_MEMBLOCK_REGIONS 128 20#define INIT_MEMBLOCK_REGIONS 128
21 21
22/* Definition of memblock flags. */
23#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */
24
22struct memblock_region { 25struct memblock_region {
23 phys_addr_t base; 26 phys_addr_t base;
24 phys_addr_t size; 27 phys_addr_t size;
28 unsigned long flags;
25#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 29#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
26 int nid; 30 int nid;
27#endif 31#endif
@@ -43,12 +47,17 @@ struct memblock {
43 47
44extern struct memblock memblock; 48extern struct memblock memblock;
45extern int memblock_debug; 49extern int memblock_debug;
50#ifdef CONFIG_MOVABLE_NODE
51/* If movable_node boot option specified */
52extern bool movable_node_enabled;
53#endif /* CONFIG_MOVABLE_NODE */
46 54
47#define memblock_dbg(fmt, ...) \ 55#define memblock_dbg(fmt, ...) \
48 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) 56 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
49 57
50phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, 58phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
51 phys_addr_t size, phys_addr_t align, int nid); 59 phys_addr_t start, phys_addr_t end,
60 int nid);
52phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, 61phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
53 phys_addr_t size, phys_addr_t align); 62 phys_addr_t size, phys_addr_t align);
54phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); 63phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -59,6 +68,28 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
59int memblock_free(phys_addr_t base, phys_addr_t size); 68int memblock_free(phys_addr_t base, phys_addr_t size);
60int memblock_reserve(phys_addr_t base, phys_addr_t size); 69int memblock_reserve(phys_addr_t base, phys_addr_t size);
61void memblock_trim_memory(phys_addr_t align); 70void memblock_trim_memory(phys_addr_t align);
71int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
72int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
73#ifdef CONFIG_MOVABLE_NODE
74static inline bool memblock_is_hotpluggable(struct memblock_region *m)
75{
76 return m->flags & MEMBLOCK_HOTPLUG;
77}
78
79static inline bool movable_node_is_enabled(void)
80{
81 return movable_node_enabled;
82}
83#else
84static inline bool memblock_is_hotpluggable(struct memblock_region *m)
85{
86 return false;
87}
88static inline bool movable_node_is_enabled(void)
89{
90 return false;
91}
92#endif
62 93
63#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 94#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
64int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, 95int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
@@ -87,7 +118,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
87/** 118/**
88 * for_each_free_mem_range - iterate through free memblock areas 119 * for_each_free_mem_range - iterate through free memblock areas
89 * @i: u64 used as loop variable 120 * @i: u64 used as loop variable
90 * @nid: node selector, %MAX_NUMNODES for all nodes 121 * @nid: node selector, %NUMA_NO_NODE for all nodes
91 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 122 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
92 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 123 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
93 * @p_nid: ptr to int for nid of the range, can be %NULL 124 * @p_nid: ptr to int for nid of the range, can be %NULL
@@ -107,7 +138,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
107/** 138/**
108 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas 139 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
109 * @i: u64 used as loop variable 140 * @i: u64 used as loop variable
110 * @nid: node selector, %MAX_NUMNODES for all nodes 141 * @nid: node selector, %NUMA_NO_NODE for all nodes
111 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 142 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
112 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 143 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
113 * @p_nid: ptr to int for nid of the range, can be %NULL 144 * @p_nid: ptr to int for nid of the range, can be %NULL
@@ -121,8 +152,21 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
121 i != (u64)ULLONG_MAX; \ 152 i != (u64)ULLONG_MAX; \
122 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid)) 153 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
123 154
155static inline void memblock_set_region_flags(struct memblock_region *r,
156 unsigned long flags)
157{
158 r->flags |= flags;
159}
160
161static inline void memblock_clear_region_flags(struct memblock_region *r,
162 unsigned long flags)
163{
164 r->flags &= ~flags;
165}
166
124#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 167#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
125int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); 168int memblock_set_node(phys_addr_t base, phys_addr_t size,
169 struct memblock_type *type, int nid);
126 170
127static inline void memblock_set_region_node(struct memblock_region *r, int nid) 171static inline void memblock_set_region_node(struct memblock_region *r, int nid)
128{ 172{
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 9fe426b30a41..5f1ea756aace 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -211,20 +211,8 @@ static inline void mpol_get(struct mempolicy *pol)
211{ 211{
212} 212}
213 213
214static inline struct mempolicy *mpol_dup(struct mempolicy *old)
215{
216 return NULL;
217}
218
219struct shared_policy {}; 214struct shared_policy {};
220 215
221static inline int mpol_set_shared_policy(struct shared_policy *info,
222 struct vm_area_struct *vma,
223 struct mempolicy *new)
224{
225 return -EINVAL;
226}
227
228static inline void mpol_shared_policy_init(struct shared_policy *sp, 216static inline void mpol_shared_policy_init(struct shared_policy *sp,
229 struct mempolicy *mpol) 217 struct mempolicy *mpol)
230{ 218{
@@ -234,12 +222,6 @@ static inline void mpol_free_shared_policy(struct shared_policy *p)
234{ 222{
235} 223}
236 224
237static inline struct mempolicy *
238mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
239{
240 return NULL;
241}
242
243#define vma_policy(vma) NULL 225#define vma_policy(vma) NULL
244 226
245static inline int 227static inline int
@@ -266,10 +248,6 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
266{ 248{
267} 249}
268 250
269static inline void mpol_fix_fork_child_flag(struct task_struct *p)
270{
271}
272
273static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 251static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
274 unsigned long addr, gfp_t gfp_flags, 252 unsigned long addr, gfp_t gfp_flags,
275 struct mempolicy **mpol, nodemask_t **nodemask) 253 struct mempolicy **mpol, nodemask_t **nodemask)
@@ -284,12 +262,6 @@ static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
284 return false; 262 return false;
285} 263}
286 264
287static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
288 const nodemask_t *mask)
289{
290 return false;
291}
292
293static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, 265static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
294 const nodemask_t *to, int flags) 266 const nodemask_t *to, int flags)
295{ 267{
@@ -307,10 +279,6 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
307} 279}
308#endif 280#endif
309 281
310static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
311{
312}
313
314static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, 282static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
315 unsigned long address) 283 unsigned long address)
316{ 284{
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f015c059e159..84a31ad0b791 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -35,16 +35,12 @@ enum migrate_reason {
35 35
36#ifdef CONFIG_MIGRATION 36#ifdef CONFIG_MIGRATION
37 37
38extern void putback_lru_pages(struct list_head *l);
39extern void putback_movable_pages(struct list_head *l); 38extern void putback_movable_pages(struct list_head *l);
40extern int migrate_page(struct address_space *, 39extern int migrate_page(struct address_space *,
41 struct page *, struct page *, enum migrate_mode); 40 struct page *, struct page *, enum migrate_mode);
42extern int migrate_pages(struct list_head *l, new_page_t x, 41extern int migrate_pages(struct list_head *l, new_page_t x,
43 unsigned long private, enum migrate_mode mode, int reason); 42 unsigned long private, enum migrate_mode mode, int reason);
44 43
45extern int fail_migrate_page(struct address_space *,
46 struct page *, struct page *);
47
48extern int migrate_prep(void); 44extern int migrate_prep(void);
49extern int migrate_prep_local(void); 45extern int migrate_prep_local(void);
50extern int migrate_vmas(struct mm_struct *mm, 46extern int migrate_vmas(struct mm_struct *mm,
@@ -59,7 +55,6 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
59 int extra_count); 55 int extra_count);
60#else 56#else
61 57
62static inline void putback_lru_pages(struct list_head *l) {}
63static inline void putback_movable_pages(struct list_head *l) {} 58static inline void putback_movable_pages(struct list_head *l) {}
64static inline int migrate_pages(struct list_head *l, new_page_t x, 59static inline int migrate_pages(struct list_head *l, new_page_t x,
65 unsigned long private, enum migrate_mode mode, int reason) 60 unsigned long private, enum migrate_mode mode, int reason)
@@ -86,7 +81,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
86 81
87/* Possible settings for the migrate_page() method in address_operations */ 82/* Possible settings for the migrate_page() method in address_operations */
88#define migrate_page NULL 83#define migrate_page NULL
89#define fail_migrate_page NULL
90 84
91#endif /* CONFIG_MIGRATION */ 85#endif /* CONFIG_MIGRATION */
92 86
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 35527173cf50..a512dd836931 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
57extern unsigned long sysctl_user_reserve_kbytes; 57extern unsigned long sysctl_user_reserve_kbytes;
58extern unsigned long sysctl_admin_reserve_kbytes; 58extern unsigned long sysctl_admin_reserve_kbytes;
59 59
60extern int sysctl_overcommit_memory;
61extern int sysctl_overcommit_ratio;
62extern unsigned long sysctl_overcommit_kbytes;
63
64extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
65 size_t *, loff_t *);
66extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
67 size_t *, loff_t *);
68
60#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) 69#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
61 70
62/* to align the pointer to the (next) page boundary */ 71/* to align the pointer to the (next) page boundary */
@@ -414,15 +423,44 @@ static inline int page_count(struct page *page)
414 return atomic_read(&compound_head(page)->_count); 423 return atomic_read(&compound_head(page)->_count);
415} 424}
416 425
426#ifdef CONFIG_HUGETLB_PAGE
427extern int PageHeadHuge(struct page *page_head);
428#else /* CONFIG_HUGETLB_PAGE */
429static inline int PageHeadHuge(struct page *page_head)
430{
431 return 0;
432}
433#endif /* CONFIG_HUGETLB_PAGE */
434
435static inline bool __compound_tail_refcounted(struct page *page)
436{
437 return !PageSlab(page) && !PageHeadHuge(page);
438}
439
440/*
441 * This takes a head page as parameter and tells if the
442 * tail page reference counting can be skipped.
443 *
444 * For this to be safe, PageSlab and PageHeadHuge must remain true on
445 * any given page where they return true here, until all tail pins
446 * have been released.
447 */
448static inline bool compound_tail_refcounted(struct page *page)
449{
450 VM_BUG_ON(!PageHead(page));
451 return __compound_tail_refcounted(page);
452}
453
417static inline void get_huge_page_tail(struct page *page) 454static inline void get_huge_page_tail(struct page *page)
418{ 455{
419 /* 456 /*
420 * __split_huge_page_refcount() cannot run 457 * __split_huge_page_refcount() cannot run from under us.
421 * from under us.
422 */ 458 */
459 VM_BUG_ON(!PageTail(page));
423 VM_BUG_ON(page_mapcount(page) < 0); 460 VM_BUG_ON(page_mapcount(page) < 0);
424 VM_BUG_ON(atomic_read(&page->_count) != 0); 461 VM_BUG_ON(atomic_read(&page->_count) != 0);
425 atomic_inc(&page->_mapcount); 462 if (compound_tail_refcounted(page->first_page))
463 atomic_inc(&page->_mapcount);
426} 464}
427 465
428extern bool __get_page_tail(struct page *page); 466extern bool __get_page_tail(struct page *page);
@@ -846,11 +884,14 @@ static __always_inline void *lowmem_page_address(const struct page *page)
846#endif 884#endif
847 885
848#if defined(WANT_PAGE_VIRTUAL) 886#if defined(WANT_PAGE_VIRTUAL)
849#define page_address(page) ((page)->virtual) 887static inline void *page_address(const struct page *page)
850#define set_page_address(page, address) \ 888{
851 do { \ 889 return page->virtual;
852 (page)->virtual = (address); \ 890}
853 } while(0) 891static inline void set_page_address(struct page *page, void *address)
892{
893 page->virtual = address;
894}
854#define page_address_init() do { } while(0) 895#define page_address_init() do { } while(0)
855#endif 896#endif
856 897
@@ -984,7 +1025,6 @@ extern void pagefault_out_of_memory(void);
984 * various contexts. 1025 * various contexts.
985 */ 1026 */
986#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ 1027#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
987#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */
988 1028
989extern void show_free_areas(unsigned int flags); 1029extern void show_free_areas(unsigned int flags);
990extern bool skip_free_areas_node(unsigned int flags, int nid); 1030extern bool skip_free_areas_node(unsigned int flags, int nid);
@@ -1318,6 +1358,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
1318 1358
1319#if USE_SPLIT_PTE_PTLOCKS 1359#if USE_SPLIT_PTE_PTLOCKS
1320#if ALLOC_SPLIT_PTLOCKS 1360#if ALLOC_SPLIT_PTLOCKS
1361void __init ptlock_cache_init(void);
1321extern bool ptlock_alloc(struct page *page); 1362extern bool ptlock_alloc(struct page *page);
1322extern void ptlock_free(struct page *page); 1363extern void ptlock_free(struct page *page);
1323 1364
@@ -1326,6 +1367,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
1326 return page->ptl; 1367 return page->ptl;
1327} 1368}
1328#else /* ALLOC_SPLIT_PTLOCKS */ 1369#else /* ALLOC_SPLIT_PTLOCKS */
1370static inline void ptlock_cache_init(void)
1371{
1372}
1373
1329static inline bool ptlock_alloc(struct page *page) 1374static inline bool ptlock_alloc(struct page *page)
1330{ 1375{
1331 return true; 1376 return true;
@@ -1378,10 +1423,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
1378{ 1423{
1379 return &mm->page_table_lock; 1424 return &mm->page_table_lock;
1380} 1425}
1426static inline void ptlock_cache_init(void) {}
1381static inline bool ptlock_init(struct page *page) { return true; } 1427static inline bool ptlock_init(struct page *page) { return true; }
1382static inline void pte_lock_deinit(struct page *page) {} 1428static inline void pte_lock_deinit(struct page *page) {}
1383#endif /* USE_SPLIT_PTE_PTLOCKS */ 1429#endif /* USE_SPLIT_PTE_PTLOCKS */
1384 1430
1431static inline void pgtable_init(void)
1432{
1433 ptlock_cache_init();
1434 pgtable_cache_init();
1435}
1436
1385static inline bool pgtable_page_ctor(struct page *page) 1437static inline bool pgtable_page_ctor(struct page *page)
1386{ 1438{
1387 inc_zone_page_state(page, NR_PAGETABLE); 1439 inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 7f7f8dae4b1d..16373c8f5f57 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -9,6 +9,7 @@
9 9
10extern int sysctl_overcommit_memory; 10extern int sysctl_overcommit_memory;
11extern int sysctl_overcommit_ratio; 11extern int sysctl_overcommit_ratio;
12extern unsigned long sysctl_overcommit_kbytes;
12extern struct percpu_counter vm_committed_as; 13extern struct percpu_counter vm_committed_as;
13 14
14#ifdef CONFIG_SMP 15#ifdef CONFIG_SMP
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bd791e452ad7..5f2052c83154 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -490,6 +490,12 @@ struct zone {
490 unsigned long managed_pages; 490 unsigned long managed_pages;
491 491
492 /* 492 /*
493 * Number of MIGRATE_RESEVE page block. To maintain for just
494 * optimization. Protected by zone->lock.
495 */
496 int nr_migrate_reserve_block;
497
498 /*
493 * rarely used fields: 499 * rarely used fields:
494 */ 500 */
495 const char *name; 501 const char *name;
@@ -758,10 +764,7 @@ typedef struct pglist_data {
758 int kswapd_max_order; 764 int kswapd_max_order;
759 enum zone_type classzone_idx; 765 enum zone_type classzone_idx;
760#ifdef CONFIG_NUMA_BALANCING 766#ifdef CONFIG_NUMA_BALANCING
761 /* 767 /* Lock serializing the migrate rate limiting window */
762 * Lock serializing the per destination node AutoNUMA memory
763 * migration rate limiting data.
764 */
765 spinlock_t numabalancing_migrate_lock; 768 spinlock_t numabalancing_migrate_lock;
766 769
767 /* Rate limiting time interval */ 770 /* Rate limiting time interval */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 7931efe71175..fb616942e4c7 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -94,78 +94,12 @@ extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
94extern struct posix_acl *get_posix_acl(struct inode *, int); 94extern struct posix_acl *get_posix_acl(struct inode *, int);
95extern int set_posix_acl(struct inode *, int, struct posix_acl *); 95extern int set_posix_acl(struct inode *, int, struct posix_acl *);
96 96
97#ifdef CONFIG_FS_POSIX_ACL 97struct posix_acl **acl_by_type(struct inode *inode, int type);
98static inline struct posix_acl **acl_by_type(struct inode *inode, int type) 98struct posix_acl *get_cached_acl(struct inode *inode, int type);
99{ 99struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
100 switch (type) { 100void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
101 case ACL_TYPE_ACCESS: 101void forget_cached_acl(struct inode *inode, int type);
102 return &inode->i_acl; 102void forget_all_cached_acls(struct inode *inode);
103 case ACL_TYPE_DEFAULT:
104 return &inode->i_default_acl;
105 default:
106 BUG();
107 }
108}
109
110static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
111{
112 struct posix_acl **p = acl_by_type(inode, type);
113 struct posix_acl *acl = ACCESS_ONCE(*p);
114 if (acl) {
115 spin_lock(&inode->i_lock);
116 acl = *p;
117 if (acl != ACL_NOT_CACHED)
118 acl = posix_acl_dup(acl);
119 spin_unlock(&inode->i_lock);
120 }
121 return acl;
122}
123
124static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
125{
126 return rcu_dereference(*acl_by_type(inode, type));
127}
128
129static inline void set_cached_acl(struct inode *inode,
130 int type,
131 struct posix_acl *acl)
132{
133 struct posix_acl **p = acl_by_type(inode, type);
134 struct posix_acl *old;
135 spin_lock(&inode->i_lock);
136 old = *p;
137 rcu_assign_pointer(*p, posix_acl_dup(acl));
138 spin_unlock(&inode->i_lock);
139 if (old != ACL_NOT_CACHED)
140 posix_acl_release(old);
141}
142
143static inline void forget_cached_acl(struct inode *inode, int type)
144{
145 struct posix_acl **p = acl_by_type(inode, type);
146 struct posix_acl *old;
147 spin_lock(&inode->i_lock);
148 old = *p;
149 *p = ACL_NOT_CACHED;
150 spin_unlock(&inode->i_lock);
151 if (old != ACL_NOT_CACHED)
152 posix_acl_release(old);
153}
154
155static inline void forget_all_cached_acls(struct inode *inode)
156{
157 struct posix_acl *old_access, *old_default;
158 spin_lock(&inode->i_lock);
159 old_access = inode->i_acl;
160 old_default = inode->i_default_acl;
161 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
162 spin_unlock(&inode->i_lock);
163 if (old_access != ACL_NOT_CACHED)
164 posix_acl_release(old_access);
165 if (old_default != ACL_NOT_CACHED)
166 posix_acl_release(old_default);
167}
168#endif
169 103
170static inline void cache_no_acl(struct inode *inode) 104static inline void cache_no_acl(struct inode *inode)
171{ 105{
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 6dacb93a6d94..1da693d51255 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -184,13 +184,13 @@ static inline void page_dup_rmap(struct page *page)
184int page_referenced(struct page *, int is_locked, 184int page_referenced(struct page *, int is_locked,
185 struct mem_cgroup *memcg, unsigned long *vm_flags); 185 struct mem_cgroup *memcg, unsigned long *vm_flags);
186int page_referenced_one(struct page *, struct vm_area_struct *, 186int page_referenced_one(struct page *, struct vm_area_struct *,
187 unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); 187 unsigned long address, void *arg);
188 188
189#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 189#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
190 190
191int try_to_unmap(struct page *, enum ttu_flags flags); 191int try_to_unmap(struct page *, enum ttu_flags flags);
192int try_to_unmap_one(struct page *, struct vm_area_struct *, 192int try_to_unmap_one(struct page *, struct vm_area_struct *,
193 unsigned long address, enum ttu_flags flags); 193 unsigned long address, void *arg);
194 194
195/* 195/*
196 * Called from mm/filemap_xip.c to unmap empty zero page 196 * Called from mm/filemap_xip.c to unmap empty zero page
@@ -236,10 +236,27 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
236int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); 236int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
237 237
238/* 238/*
239 * Called by migrate.c to remove migration ptes, but might be used more later. 239 * rmap_walk_control: To control rmap traversing for specific needs
240 *
241 * arg: passed to rmap_one() and invalid_vma()
242 * rmap_one: executed on each vma where page is mapped
243 * done: for checking traversing termination condition
244 * file_nonlinear: for handling file nonlinear mapping
245 * anon_lock: for getting anon_lock by optimized way rather than default
246 * invalid_vma: for skipping uninterested vma
240 */ 247 */
241int rmap_walk(struct page *page, int (*rmap_one)(struct page *, 248struct rmap_walk_control {
242 struct vm_area_struct *, unsigned long, void *), void *arg); 249 void *arg;
250 int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
251 unsigned long addr, void *arg);
252 int (*done)(struct page *page);
253 int (*file_nonlinear)(struct page *, struct address_space *,
254 struct vm_area_struct *vma);
255 struct anon_vma *(*anon_lock)(struct page *page);
256 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
257};
258
259int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
243 260
244#else /* !CONFIG_MMU */ 261#else /* !CONFIG_MMU */
245 262
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffccdad050b5..485234d2fd42 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -549,6 +549,7 @@ struct signal_struct {
549 atomic_t sigcnt; 549 atomic_t sigcnt;
550 atomic_t live; 550 atomic_t live;
551 int nr_threads; 551 int nr_threads;
552 struct list_head thread_head;
552 553
553 wait_queue_head_t wait_chldexit; /* for wait4() */ 554 wait_queue_head_t wait_chldexit; /* for wait4() */
554 555
@@ -1271,6 +1272,7 @@ struct task_struct {
1271 /* PID/PID hash table linkage. */ 1272 /* PID/PID hash table linkage. */
1272 struct pid_link pids[PIDTYPE_MAX]; 1273 struct pid_link pids[PIDTYPE_MAX];
1273 struct list_head thread_group; 1274 struct list_head thread_group;
1275 struct list_head thread_node;
1274 1276
1275 struct completion *vfork_done; /* for vfork() */ 1277 struct completion *vfork_done; /* for vfork() */
1276 int __user *set_child_tid; /* CLONE_CHILD_SETTID */ 1278 int __user *set_child_tid; /* CLONE_CHILD_SETTID */
@@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void);
2341#define while_each_thread(g, t) \ 2343#define while_each_thread(g, t) \
2342 while ((t = next_thread(t)) != g) 2344 while ((t = next_thread(t)) != g)
2343 2345
2346#define __for_each_thread(signal, t) \
2347 list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
2348
2349#define for_each_thread(p, t) \
2350 __for_each_thread((p)->signal, t)
2351
2352/* Careful: this is a double loop, 'break' won't work as expected. */
2353#define for_each_process_thread(p, t) \
2354 for_each_process(p) for_each_thread(p, t)
2355
2344static inline int get_nr_threads(struct task_struct *tsk) 2356static inline int get_nr_threads(struct task_struct *tsk)
2345{ 2357{
2346 return tsk->signal->nr_threads; 2358 return tsk->signal->nr_threads;
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index fde1b3e94c7d..06f544ef2f6f 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -67,6 +67,48 @@ TRACE_EVENT(mm_compaction_migratepages,
67 __entry->nr_failed) 67 __entry->nr_failed)
68); 68);
69 69
70TRACE_EVENT(mm_compaction_begin,
71 TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
72 unsigned long free_start, unsigned long zone_end),
73
74 TP_ARGS(zone_start, migrate_start, free_start, zone_end),
75
76 TP_STRUCT__entry(
77 __field(unsigned long, zone_start)
78 __field(unsigned long, migrate_start)
79 __field(unsigned long, free_start)
80 __field(unsigned long, zone_end)
81 ),
82
83 TP_fast_assign(
84 __entry->zone_start = zone_start;
85 __entry->migrate_start = migrate_start;
86 __entry->free_start = free_start;
87 __entry->zone_end = zone_end;
88 ),
89
90 TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
91 __entry->zone_start,
92 __entry->migrate_start,
93 __entry->free_start,
94 __entry->zone_end)
95);
96
97TRACE_EVENT(mm_compaction_end,
98 TP_PROTO(int status),
99
100 TP_ARGS(status),
101
102 TP_STRUCT__entry(
103 __field(int, status)
104 ),
105
106 TP_fast_assign(
107 __entry->status = status;
108 ),
109
110 TP_printk("status=%d", __entry->status)
111);
70 112
71#endif /* _TRACE_COMPACTION_H */ 113#endif /* _TRACE_COMPACTION_H */
72 114
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index ec2a6ccfd7e5..3075ffbb9a83 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -45,6 +45,32 @@ TRACE_EVENT(mm_migrate_pages,
45 __print_symbolic(__entry->reason, MIGRATE_REASON)) 45 __print_symbolic(__entry->reason, MIGRATE_REASON))
46); 46);
47 47
48TRACE_EVENT(mm_numa_migrate_ratelimit,
49
50 TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
51
52 TP_ARGS(p, dst_nid, nr_pages),
53
54 TP_STRUCT__entry(
55 __array( char, comm, TASK_COMM_LEN)
56 __field( pid_t, pid)
57 __field( int, dst_nid)
58 __field( unsigned long, nr_pages)
59 ),
60
61 TP_fast_assign(
62 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
63 __entry->pid = p->pid;
64 __entry->dst_nid = dst_nid;
65 __entry->nr_pages = nr_pages;
66 ),
67
68 TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
69 __entry->comm,
70 __entry->pid,
71 __entry->dst_nid,
72 __entry->nr_pages)
73);
48#endif /* _TRACE_MIGRATE_H */ 74#endif /* _TRACE_MIGRATE_H */
49 75
50/* This part must be outside protection */ 76/* This part must be outside protection */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 04c308413a5d..67e1bbf83695 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -443,6 +443,93 @@ TRACE_EVENT(sched_process_hang,
443); 443);
444#endif /* CONFIG_DETECT_HUNG_TASK */ 444#endif /* CONFIG_DETECT_HUNG_TASK */
445 445
446DECLARE_EVENT_CLASS(sched_move_task_template,
447
448 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
449
450 TP_ARGS(tsk, src_cpu, dst_cpu),
451
452 TP_STRUCT__entry(
453 __field( pid_t, pid )
454 __field( pid_t, tgid )
455 __field( pid_t, ngid )
456 __field( int, src_cpu )
457 __field( int, src_nid )
458 __field( int, dst_cpu )
459 __field( int, dst_nid )
460 ),
461
462 TP_fast_assign(
463 __entry->pid = task_pid_nr(tsk);
464 __entry->tgid = task_tgid_nr(tsk);
465 __entry->ngid = task_numa_group_id(tsk);
466 __entry->src_cpu = src_cpu;
467 __entry->src_nid = cpu_to_node(src_cpu);
468 __entry->dst_cpu = dst_cpu;
469 __entry->dst_nid = cpu_to_node(dst_cpu);
470 ),
471
472 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
473 __entry->pid, __entry->tgid, __entry->ngid,
474 __entry->src_cpu, __entry->src_nid,
475 __entry->dst_cpu, __entry->dst_nid)
476);
477
478/*
479 * Tracks migration of tasks from one runqueue to another. Can be used to
480 * detect if automatic NUMA balancing is bouncing between nodes
481 */
482DEFINE_EVENT(sched_move_task_template, sched_move_numa,
483 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
484
485 TP_ARGS(tsk, src_cpu, dst_cpu)
486);
487
488DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
489 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
490
491 TP_ARGS(tsk, src_cpu, dst_cpu)
492);
493
494TRACE_EVENT(sched_swap_numa,
495
496 TP_PROTO(struct task_struct *src_tsk, int src_cpu,
497 struct task_struct *dst_tsk, int dst_cpu),
498
499 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
500
501 TP_STRUCT__entry(
502 __field( pid_t, src_pid )
503 __field( pid_t, src_tgid )
504 __field( pid_t, src_ngid )
505 __field( int, src_cpu )
506 __field( int, src_nid )
507 __field( pid_t, dst_pid )
508 __field( pid_t, dst_tgid )
509 __field( pid_t, dst_ngid )
510 __field( int, dst_cpu )
511 __field( int, dst_nid )
512 ),
513
514 TP_fast_assign(
515 __entry->src_pid = task_pid_nr(src_tsk);
516 __entry->src_tgid = task_tgid_nr(src_tsk);
517 __entry->src_ngid = task_numa_group_id(src_tsk);
518 __entry->src_cpu = src_cpu;
519 __entry->src_nid = cpu_to_node(src_cpu);
520 __entry->dst_pid = task_pid_nr(dst_tsk);
521 __entry->dst_tgid = task_tgid_nr(dst_tsk);
522 __entry->dst_ngid = task_numa_group_id(dst_tsk);
523 __entry->dst_cpu = dst_cpu;
524 __entry->dst_nid = cpu_to_node(dst_cpu);
525 ),
526
527 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
528 __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
529 __entry->src_cpu, __entry->src_nid,
530 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
531 __entry->dst_cpu, __entry->dst_nid)
532);
446#endif /* _TRACE_SCHED_H */ 533#endif /* _TRACE_SCHED_H */
447 534
448/* This part must be outside protection */ 535/* This part must be outside protection */