aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/block/zram/zram_drv.c3
-rw-r--r--fs/notify/fsnotify.c36
-rw-r--r--fs/notify/fsnotify.h4
-rw-r--r--fs/notify/inode_mark.c8
-rw-r--r--fs/notify/mark.c36
-rw-r--r--fs/notify/vfsmount_mark.c8
-rw-r--r--include/linux/bootmem.h1
-rw-r--r--include/linux/mmzone.h9
-rw-r--r--include/linux/page-isolation.h8
-rw-r--r--kernel/panic.c1
-rw-r--r--mm/bootmem.c9
-rw-r--r--mm/compaction.c18
-rw-r--r--mm/internal.h25
-rw-r--r--mm/memory_hotplug.c26
-rw-r--r--mm/nobootmem.c8
-rw-r--r--mm/page_alloc.c68
-rw-r--r--mm/page_isolation.c43
-rw-r--r--mm/slab_common.c4
19 files changed, 240 insertions, 76 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index ea4d0058fd1b..60b1163dba28 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4716,6 +4716,7 @@ L: linux-iio@vger.kernel.org
4716S: Maintained 4716S: Maintained
4717F: drivers/iio/ 4717F: drivers/iio/
4718F: drivers/staging/iio/ 4718F: drivers/staging/iio/
4719F: include/linux/iio/
4719 4720
4720IKANOS/ADI EAGLE ADSL USB DRIVER 4721IKANOS/ADI EAGLE ADSL USB DRIVER
4721M: Matthieu Castet <castet.matthieu@free.fr> 4722M: Matthieu Castet <castet.matthieu@free.fr>
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 2ad0b5bce44b..3920ee45aa59 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
560 } 560 }
561 561
562 if (page_zero_filled(uncmem)) { 562 if (page_zero_filled(uncmem)) {
563 kunmap_atomic(user_mem); 563 if (user_mem)
564 kunmap_atomic(user_mem);
564 /* Free memory associated with this sector now. */ 565 /* Free memory associated with this sector now. */
565 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 566 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
566 zram_free_page(zram, index); 567 zram_free_page(zram, index);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9d3e9c50066a..89326acd4561 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
229 &fsnotify_mark_srcu); 229 &fsnotify_mark_srcu);
230 } 230 }
231 231
232 /*
233 * We need to merge inode & vfsmount mark lists so that inode mark
234 * ignore masks are properly reflected for mount mark notifications.
235 * That's why this traversal is so complicated...
236 */
232 while (inode_node || vfsmount_node) { 237 while (inode_node || vfsmount_node) {
233 inode_group = vfsmount_group = NULL; 238 inode_group = NULL;
239 inode_mark = NULL;
240 vfsmount_group = NULL;
241 vfsmount_mark = NULL;
234 242
235 if (inode_node) { 243 if (inode_node) {
236 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), 244 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
244 vfsmount_group = vfsmount_mark->group; 252 vfsmount_group = vfsmount_mark->group;
245 } 253 }
246 254
247 if (inode_group > vfsmount_group) { 255 if (inode_group && vfsmount_group) {
248 /* handle inode */ 256 int cmp = fsnotify_compare_groups(inode_group,
249 ret = send_to_group(to_tell, inode_mark, NULL, mask, 257 vfsmount_group);
250 data, data_is, cookie, file_name); 258 if (cmp > 0) {
251 /* we didn't use the vfsmount_mark */ 259 inode_group = NULL;
252 vfsmount_group = NULL; 260 inode_mark = NULL;
253 } else if (vfsmount_group > inode_group) { 261 } else if (cmp < 0) {
254 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, 262 vfsmount_group = NULL;
255 data, data_is, cookie, file_name); 263 vfsmount_mark = NULL;
256 inode_group = NULL; 264 }
257 } else {
258 ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
259 mask, data, data_is, cookie,
260 file_name);
261 } 265 }
266 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
267 data, data_is, cookie, file_name);
262 268
263 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) 269 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
264 goto out; 270 goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 9c0898c4cfe1..3b68b0ae0a97 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
12/* protects reads of inode and vfsmount marks list */ 12/* protects reads of inode and vfsmount marks list */
13extern struct srcu_struct fsnotify_mark_srcu; 13extern struct srcu_struct fsnotify_mark_srcu;
14 14
15/* compare two groups for sorting of marks lists */
16extern int fsnotify_compare_groups(struct fsnotify_group *a,
17 struct fsnotify_group *b);
18
15extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, 19extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
16 __u32 mask); 20 __u32 mask);
17/* add a mark to an inode */ 21/* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index e8497144b323..dfbf5447eea4 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
194{ 194{
195 struct fsnotify_mark *lmark, *last = NULL; 195 struct fsnotify_mark *lmark, *last = NULL;
196 int ret = 0; 196 int ret = 0;
197 int cmp;
197 198
198 mark->flags |= FSNOTIFY_MARK_FLAG_INODE; 199 mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
199 200
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
219 goto out; 220 goto out;
220 } 221 }
221 222
222 if (mark->group->priority < lmark->group->priority) 223 cmp = fsnotify_compare_groups(lmark->group, mark->group);
223 continue; 224 if (cmp < 0)
224
225 if ((mark->group->priority == lmark->group->priority) &&
226 (mark->group < lmark->group))
227 continue; 225 continue;
228 226
229 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); 227 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d90deaa08e78..34c38fabf514 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
210} 210}
211 211
212/* 212/*
213 * Sorting function for lists of fsnotify marks.
214 *
215 * Fanotify supports different notification classes (reflected as priority of
216 * notification group). Events shall be passed to notification groups in
217 * decreasing priority order. To achieve this marks in notification lists for
218 * inodes and vfsmounts are sorted so that priorities of corresponding groups
219 * are descending.
220 *
221 * Furthermore correct handling of the ignore mask requires processing inode
222 * and vfsmount marks of each group together. Using the group address as
223 * further sort criterion provides a unique sorting order and thus we can
224 * merge inode and vfsmount lists of marks in linear time and find groups
225 * present in both lists.
226 *
227 * A return value of 1 signifies that b has priority over a.
228 * A return value of 0 signifies that the two marks have to be handled together.
229 * A return value of -1 signifies that a has priority over b.
230 */
231int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
232{
233 if (a == b)
234 return 0;
235 if (!a)
236 return 1;
237 if (!b)
238 return -1;
239 if (a->priority < b->priority)
240 return 1;
241 if (a->priority > b->priority)
242 return -1;
243 if (a < b)
244 return 1;
245 return -1;
246}
247
248/*
213 * Attach an initialized mark to a given group and fs object. 249 * Attach an initialized mark to a given group and fs object.
214 * These marks may be used for the fsnotify backend to determine which 250 * These marks may be used for the fsnotify backend to determine which
215 * event types should be delivered to which group. 251 * event types should be delivered to which group.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ac851e8376b1..faefa72a11eb 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
153 struct mount *m = real_mount(mnt); 153 struct mount *m = real_mount(mnt);
154 struct fsnotify_mark *lmark, *last = NULL; 154 struct fsnotify_mark *lmark, *last = NULL;
155 int ret = 0; 155 int ret = 0;
156 int cmp;
156 157
157 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; 158 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
158 159
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
178 goto out; 179 goto out;
179 } 180 }
180 181
181 if (mark->group->priority < lmark->group->priority) 182 cmp = fsnotify_compare_groups(lmark->group, mark->group);
182 continue; 183 if (cmp < 0)
183
184 if ((mark->group->priority == lmark->group->priority) &&
185 (mark->group < lmark->group))
186 continue; 184 continue;
187 185
188 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); 186 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4e2bd4c95b66..0995c2de8162 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
46extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); 46extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
47 47
48extern unsigned long free_all_bootmem(void); 48extern unsigned long free_all_bootmem(void);
49extern void reset_node_managed_pages(pg_data_t *pgdat);
49extern void reset_all_zones_managed_pages(void); 50extern void reset_all_zones_managed_pages(void);
50 51
51extern void free_bootmem_node(pg_data_t *pgdat, 52extern void free_bootmem_node(pg_data_t *pgdat,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 48bf12ef6620..ffe66e381c04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -431,6 +431,15 @@ struct zone {
431 */ 431 */
432 int nr_migrate_reserve_block; 432 int nr_migrate_reserve_block;
433 433
434#ifdef CONFIG_MEMORY_ISOLATION
435 /*
436 * Number of isolated pageblock. It is used to solve incorrect
437 * freepage counting problem due to racy retrieving migratetype
438 * of pageblock. Protected by zone->lock.
439 */
440 unsigned long nr_isolate_pageblock;
441#endif
442
434#ifdef CONFIG_MEMORY_HOTPLUG 443#ifdef CONFIG_MEMORY_HOTPLUG
435 /* see spanned/present_pages for more description */ 444 /* see spanned/present_pages for more description */
436 seqlock_t span_seqlock; 445 seqlock_t span_seqlock;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3fff8e774067..2dc1e1697b45 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,10 @@
2#define __LINUX_PAGEISOLATION_H 2#define __LINUX_PAGEISOLATION_H
3 3
4#ifdef CONFIG_MEMORY_ISOLATION 4#ifdef CONFIG_MEMORY_ISOLATION
5static inline bool has_isolate_pageblock(struct zone *zone)
6{
7 return zone->nr_isolate_pageblock;
8}
5static inline bool is_migrate_isolate_page(struct page *page) 9static inline bool is_migrate_isolate_page(struct page *page)
6{ 10{
7 return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; 11 return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
11 return migratetype == MIGRATE_ISOLATE; 15 return migratetype == MIGRATE_ISOLATE;
12} 16}
13#else 17#else
18static inline bool has_isolate_pageblock(struct zone *zone)
19{
20 return false;
21}
14static inline bool is_migrate_isolate_page(struct page *page) 22static inline bool is_migrate_isolate_page(struct page *page)
15{ 23{
16 return false; 24 return false;
diff --git a/kernel/panic.c b/kernel/panic.c
index d09dc5c32c67..cf80672b7924 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
244 * 'I' - Working around severe firmware bug. 244 * 'I' - Working around severe firmware bug.
245 * 'O' - Out-of-tree module has been loaded. 245 * 'O' - Out-of-tree module has been loaded.
246 * 'E' - Unsigned module has been loaded. 246 * 'E' - Unsigned module has been loaded.
247 * 'L' - A soft lockup has previously occurred.
247 * 248 *
248 * The string is overwritten by the next call to print_tainted(). 249 * The string is overwritten by the next call to print_tainted().
249 */ 250 */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8a000cebb0d7..477be696511d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
243 243
244static int reset_managed_pages_done __initdata; 244static int reset_managed_pages_done __initdata;
245 245
246static inline void __init reset_node_managed_pages(pg_data_t *pgdat) 246void reset_node_managed_pages(pg_data_t *pgdat)
247{ 247{
248 struct zone *z; 248 struct zone *z;
249 249
250 if (reset_managed_pages_done)
251 return;
252
253 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 250 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
254 z->managed_pages = 0; 251 z->managed_pages = 0;
255} 252}
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
258{ 255{
259 struct pglist_data *pgdat; 256 struct pglist_data *pgdat;
260 257
258 if (reset_managed_pages_done)
259 return;
260
261 for_each_online_pgdat(pgdat) 261 for_each_online_pgdat(pgdat)
262 reset_node_managed_pages(pgdat); 262 reset_node_managed_pages(pgdat);
263
263 reset_managed_pages_done = 1; 264 reset_managed_pages_done = 1;
264} 265}
265 266
diff --git a/mm/compaction.c b/mm/compaction.c
index ec74cf0123ef..f9792ba3537c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc,
479 479
480 block_end_pfn = min(block_end_pfn, end_pfn); 480 block_end_pfn = min(block_end_pfn, end_pfn);
481 481
482 /*
483 * pfn could pass the block_end_pfn if isolated freepage
484 * is more than pageblock order. In this case, we adjust
485 * scanning range to right one.
486 */
487 if (pfn >= block_end_pfn) {
488 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
489 block_end_pfn = min(block_end_pfn, end_pfn);
490 }
491
482 if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) 492 if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
483 break; 493 break;
484 494
@@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
1029 } 1039 }
1030 1040
1031 acct_isolated(zone, cc); 1041 acct_isolated(zone, cc);
1032 /* Record where migration scanner will be restarted */ 1042 /*
1033 cc->migrate_pfn = low_pfn; 1043 * Record where migration scanner will be restarted. If we end up in
1044 * the same pageblock as the free scanner, make the scanners fully
1045 * meet so that compact_finished() terminates compaction.
1046 */
1047 cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
1034 1048
1035 return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; 1049 return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
1036} 1050}
diff --git a/mm/internal.h b/mm/internal.h
index 829304090b90..a4f90ba7068e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
108/* 108/*
109 * in mm/page_alloc.c 109 * in mm/page_alloc.c
110 */ 110 */
111
112/*
113 * Locate the struct page for both the matching buddy in our
114 * pair (buddy1) and the combined O(n+1) page they form (page).
115 *
116 * 1) Any buddy B1 will have an order O twin B2 which satisfies
117 * the following equation:
118 * B2 = B1 ^ (1 << O)
119 * For example, if the starting buddy (buddy2) is #8 its order
120 * 1 buddy is #10:
121 * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
122 *
123 * 2) Any buddy B will have an order O+1 parent P which
124 * satisfies the following equation:
125 * P = B & ~(1 << O)
126 *
127 * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
128 */
129static inline unsigned long
130__find_buddy_index(unsigned long page_idx, unsigned int order)
131{
132 return page_idx ^ (1 << order);
133}
134
135extern int __isolate_free_page(struct page *page, unsigned int order);
111extern void __free_pages_bootmem(struct page *page, unsigned int order); 136extern void __free_pages_bootmem(struct page *page, unsigned int order);
112extern void prep_compound_page(struct page *page, unsigned long order); 137extern void prep_compound_page(struct page *page, unsigned long order);
113#ifdef CONFIG_MEMORY_FAILURE 138#ifdef CONFIG_MEMORY_FAILURE
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 252e1dbbed86..1bf4807cb21e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
31#include <linux/stop_machine.h> 31#include <linux/stop_machine.h>
32#include <linux/hugetlb.h> 32#include <linux/hugetlb.h>
33#include <linux/memblock.h> 33#include <linux/memblock.h>
34#include <linux/bootmem.h>
34 35
35#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
36 37
@@ -1066,6 +1067,16 @@ out:
1066} 1067}
1067#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 1068#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
1068 1069
1070static void reset_node_present_pages(pg_data_t *pgdat)
1071{
1072 struct zone *z;
1073
1074 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
1075 z->present_pages = 0;
1076
1077 pgdat->node_present_pages = 0;
1078}
1079
1069/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ 1080/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
1070static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) 1081static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
1071{ 1082{
@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
1096 build_all_zonelists(pgdat, NULL); 1107 build_all_zonelists(pgdat, NULL);
1097 mutex_unlock(&zonelists_mutex); 1108 mutex_unlock(&zonelists_mutex);
1098 1109
1110 /*
1111 * zone->managed_pages is set to an approximate value in
1112 * free_area_init_core(), which will cause
1113 * /sys/device/system/node/nodeX/meminfo has wrong data.
1114 * So reset it to 0 before any memory is onlined.
1115 */
1116 reset_node_managed_pages(pgdat);
1117
1118 /*
1119 * When memory is hot-added, all the memory is in offline state. So
1120 * clear all zones' present_pages because they will be updated in
1121 * online_pages() and offline_pages().
1122 */
1123 reset_node_present_pages(pgdat);
1124
1099 return pgdat; 1125 return pgdat;
1100} 1126}
1101 1127
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7c7ab32ee503..90b50468333e 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
145 145
146static int reset_managed_pages_done __initdata; 146static int reset_managed_pages_done __initdata;
147 147
148static inline void __init reset_node_managed_pages(pg_data_t *pgdat) 148void reset_node_managed_pages(pg_data_t *pgdat)
149{ 149{
150 struct zone *z; 150 struct zone *z;
151 151
152 if (reset_managed_pages_done)
153 return;
154 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 152 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
155 z->managed_pages = 0; 153 z->managed_pages = 0;
156} 154}
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
159{ 157{
160 struct pglist_data *pgdat; 158 struct pglist_data *pgdat;
161 159
160 if (reset_managed_pages_done)
161 return;
162
162 for_each_online_pgdat(pgdat) 163 for_each_online_pgdat(pgdat)
163 reset_node_managed_pages(pgdat); 164 reset_node_managed_pages(pgdat);
165
164 reset_managed_pages_done = 1; 166 reset_managed_pages_done = 1;
165} 167}
166 168
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..616a2c956b4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -467,29 +467,6 @@ static inline void rmv_page_order(struct page *page)
467} 467}
468 468
469/* 469/*
470 * Locate the struct page for both the matching buddy in our
471 * pair (buddy1) and the combined O(n+1) page they form (page).
472 *
473 * 1) Any buddy B1 will have an order O twin B2 which satisfies
474 * the following equation:
475 * B2 = B1 ^ (1 << O)
476 * For example, if the starting buddy (buddy2) is #8 its order
477 * 1 buddy is #10:
478 * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
479 *
480 * 2) Any buddy B will have an order O+1 parent P which
481 * satisfies the following equation:
482 * P = B & ~(1 << O)
483 *
484 * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
485 */
486static inline unsigned long
487__find_buddy_index(unsigned long page_idx, unsigned int order)
488{
489 return page_idx ^ (1 << order);
490}
491
492/*
493 * This function checks whether a page is free && is the buddy 470 * This function checks whether a page is free && is the buddy
494 * we can do coalesce a page and its buddy if 471 * we can do coalesce a page and its buddy if
495 * (a) the buddy is not in a hole && 472 * (a) the buddy is not in a hole &&
@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page,
569 unsigned long combined_idx; 546 unsigned long combined_idx;
570 unsigned long uninitialized_var(buddy_idx); 547 unsigned long uninitialized_var(buddy_idx);
571 struct page *buddy; 548 struct page *buddy;
549 int max_order = MAX_ORDER;
572 550
573 VM_BUG_ON(!zone_is_initialized(zone)); 551 VM_BUG_ON(!zone_is_initialized(zone));
574 552
@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page,
577 return; 555 return;
578 556
579 VM_BUG_ON(migratetype == -1); 557 VM_BUG_ON(migratetype == -1);
558 if (is_migrate_isolate(migratetype)) {
559 /*
560 * We restrict max order of merging to prevent merge
561 * between freepages on isolate pageblock and normal
562 * pageblock. Without this, pageblock isolation
563 * could cause incorrect freepage accounting.
564 */
565 max_order = min(MAX_ORDER, pageblock_order + 1);
566 } else {
567 __mod_zone_freepage_state(zone, 1 << order, migratetype);
568 }
580 569
581 page_idx = pfn & ((1 << MAX_ORDER) - 1); 570 page_idx = pfn & ((1 << max_order) - 1);
582 571
583 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); 572 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
584 VM_BUG_ON_PAGE(bad_range(zone, page), page); 573 VM_BUG_ON_PAGE(bad_range(zone, page), page);
585 574
586 while (order < MAX_ORDER-1) { 575 while (order < max_order - 1) {
587 buddy_idx = __find_buddy_index(page_idx, order); 576 buddy_idx = __find_buddy_index(page_idx, order);
588 buddy = page + (buddy_idx - page_idx); 577 buddy = page + (buddy_idx - page_idx);
589 if (!page_is_buddy(page, buddy, order)) 578 if (!page_is_buddy(page, buddy, order))
@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page,
594 */ 583 */
595 if (page_is_guard(buddy)) { 584 if (page_is_guard(buddy)) {
596 clear_page_guard_flag(buddy); 585 clear_page_guard_flag(buddy);
597 set_page_private(page, 0); 586 set_page_private(buddy, 0);
598 __mod_zone_freepage_state(zone, 1 << order, 587 if (!is_migrate_isolate(migratetype)) {
599 migratetype); 588 __mod_zone_freepage_state(zone, 1 << order,
589 migratetype);
590 }
600 } else { 591 } else {
601 list_del(&buddy->lru); 592 list_del(&buddy->lru);
602 zone->free_area[order].nr_free--; 593 zone->free_area[order].nr_free--;
@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
715 /* must delete as __free_one_page list manipulates */ 706 /* must delete as __free_one_page list manipulates */
716 list_del(&page->lru); 707 list_del(&page->lru);
717 mt = get_freepage_migratetype(page); 708 mt = get_freepage_migratetype(page);
709 if (unlikely(has_isolate_pageblock(zone)))
710 mt = get_pageblock_migratetype(page);
711
718 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ 712 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
719 __free_one_page(page, page_to_pfn(page), zone, 0, mt); 713 __free_one_page(page, page_to_pfn(page), zone, 0, mt);
720 trace_mm_page_pcpu_drain(page, 0, mt); 714 trace_mm_page_pcpu_drain(page, 0, mt);
721 if (likely(!is_migrate_isolate_page(page))) {
722 __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
723 if (is_migrate_cma(mt))
724 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
725 }
726 } while (--to_free && --batch_free && !list_empty(list)); 715 } while (--to_free && --batch_free && !list_empty(list));
727 } 716 }
728 spin_unlock(&zone->lock); 717 spin_unlock(&zone->lock);
@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone,
739 if (nr_scanned) 728 if (nr_scanned)
740 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 729 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
741 730
731 if (unlikely(has_isolate_pageblock(zone) ||
732 is_migrate_isolate(migratetype))) {
733 migratetype = get_pfnblock_migratetype(page, pfn);
734 }
742 __free_one_page(page, pfn, zone, order, migratetype); 735 __free_one_page(page, pfn, zone, order, migratetype);
743 if (unlikely(!is_migrate_isolate(migratetype)))
744 __mod_zone_freepage_state(zone, 1 << order, migratetype);
745 spin_unlock(&zone->lock); 736 spin_unlock(&zone->lock);
746} 737}
747 738
@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order)
1484} 1475}
1485EXPORT_SYMBOL_GPL(split_page); 1476EXPORT_SYMBOL_GPL(split_page);
1486 1477
1487static int __isolate_free_page(struct page *page, unsigned int order) 1478int __isolate_free_page(struct page *page, unsigned int order)
1488{ 1479{
1489 unsigned long watermark; 1480 unsigned long watermark;
1490 struct zone *zone; 1481 struct zone *zone;
@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
6408 6399
6409 /* Make sure the range is really isolated. */ 6400 /* Make sure the range is really isolated. */
6410 if (test_pages_isolated(outer_start, end, false)) { 6401 if (test_pages_isolated(outer_start, end, false)) {
6411 pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", 6402 pr_info("%s: [%lx, %lx) PFNs busy\n",
6412 outer_start, end); 6403 __func__, outer_start, end);
6413 ret = -EBUSY; 6404 ret = -EBUSY;
6414 goto done; 6405 goto done;
6415 } 6406 }
6416 6407
6417
6418 /* Grab isolated pages from freelists. */ 6408 /* Grab isolated pages from freelists. */
6419 outer_end = isolate_freepages_range(&cc, outer_start, end); 6409 outer_end = isolate_freepages_range(&cc, outer_start, end);
6420 if (!outer_end) { 6410 if (!outer_end) {
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index d1473b2e9481..c8778f7e208e 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -60,6 +60,7 @@ out:
60 int migratetype = get_pageblock_migratetype(page); 60 int migratetype = get_pageblock_migratetype(page);
61 61
62 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 62 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
63 zone->nr_isolate_pageblock++;
63 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); 64 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
64 65
65 __mod_zone_freepage_state(zone, -nr_pages, migratetype); 66 __mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
75{ 76{
76 struct zone *zone; 77 struct zone *zone;
77 unsigned long flags, nr_pages; 78 unsigned long flags, nr_pages;
79 struct page *isolated_page = NULL;
80 unsigned int order;
81 unsigned long page_idx, buddy_idx;
82 struct page *buddy;
78 83
79 zone = page_zone(page); 84 zone = page_zone(page);
80 spin_lock_irqsave(&zone->lock, flags); 85 spin_lock_irqsave(&zone->lock, flags);
81 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 86 if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
82 goto out; 87 goto out;
83 nr_pages = move_freepages_block(zone, page, migratetype); 88
84 __mod_zone_freepage_state(zone, nr_pages, migratetype); 89 /*
90 * Because freepage with more than pageblock_order on isolated
91 * pageblock is restricted to merge due to freepage counting problem,
92 * it is possible that there is free buddy page.
93 * move_freepages_block() doesn't care of merge so we need other
94 * approach in order to merge them. Isolation and free will make
95 * these pages to be merged.
96 */
97 if (PageBuddy(page)) {
98 order = page_order(page);
99 if (order >= pageblock_order) {
100 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
101 buddy_idx = __find_buddy_index(page_idx, order);
102 buddy = page + (buddy_idx - page_idx);
103
104 if (!is_migrate_isolate_page(buddy)) {
105 __isolate_free_page(page, order);
106 set_page_refcounted(page);
107 isolated_page = page;
108 }
109 }
110 }
111
112 /*
113 * If we isolate freepage with more than pageblock_order, there
114 * should be no freepage in the range, so we could avoid costly
115 * pageblock scanning for freepage moving.
116 */
117 if (!isolated_page) {
118 nr_pages = move_freepages_block(zone, page, migratetype);
119 __mod_zone_freepage_state(zone, nr_pages, migratetype);
120 }
85 set_pageblock_migratetype(page, migratetype); 121 set_pageblock_migratetype(page, migratetype);
122 zone->nr_isolate_pageblock--;
86out: 123out:
87 spin_unlock_irqrestore(&zone->lock, flags); 124 spin_unlock_irqrestore(&zone->lock, flags);
125 if (isolated_page)
126 __free_pages(isolated_page, order);
88} 127}
89 128
90static inline struct page * 129static inline struct page *
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 406944207b61..dcdab81bd240 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
259 if (s->size - size >= sizeof(void *)) 259 if (s->size - size >= sizeof(void *))
260 continue; 260 continue;
261 261
262 if (IS_ENABLED(CONFIG_SLAB) && align &&
263 (align > s->align || s->align % align))
264 continue;
265
262 return s; 266 return s;
263 } 267 }
264 return NULL; 268 return NULL;