diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 3 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 36 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 4 | ||||
-rw-r--r-- | fs/notify/inode_mark.c | 8 | ||||
-rw-r--r-- | fs/notify/mark.c | 36 | ||||
-rw-r--r-- | fs/notify/vfsmount_mark.c | 8 | ||||
-rw-r--r-- | include/linux/bootmem.h | 1 | ||||
-rw-r--r-- | include/linux/mmzone.h | 9 | ||||
-rw-r--r-- | include/linux/page-isolation.h | 8 | ||||
-rw-r--r-- | kernel/panic.c | 1 | ||||
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/compaction.c | 18 | ||||
-rw-r--r-- | mm/internal.h | 25 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 26 | ||||
-rw-r--r-- | mm/nobootmem.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 68 | ||||
-rw-r--r-- | mm/page_isolation.c | 43 | ||||
-rw-r--r-- | mm/slab_common.c | 4 |
19 files changed, 240 insertions, 76 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ea4d0058fd1b..60b1163dba28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4716,6 +4716,7 @@ L: linux-iio@vger.kernel.org | |||
4716 | S: Maintained | 4716 | S: Maintained |
4717 | F: drivers/iio/ | 4717 | F: drivers/iio/ |
4718 | F: drivers/staging/iio/ | 4718 | F: drivers/staging/iio/ |
4719 | F: include/linux/iio/ | ||
4719 | 4720 | ||
4720 | IKANOS/ADI EAGLE ADSL USB DRIVER | 4721 | IKANOS/ADI EAGLE ADSL USB DRIVER |
4721 | M: Matthieu Castet <castet.matthieu@free.fr> | 4722 | M: Matthieu Castet <castet.matthieu@free.fr> |
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2ad0b5bce44b..3920ee45aa59 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c | |||
@@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |||
560 | } | 560 | } |
561 | 561 | ||
562 | if (page_zero_filled(uncmem)) { | 562 | if (page_zero_filled(uncmem)) { |
563 | kunmap_atomic(user_mem); | 563 | if (user_mem) |
564 | kunmap_atomic(user_mem); | ||
564 | /* Free memory associated with this sector now. */ | 565 | /* Free memory associated with this sector now. */ |
565 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | 566 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
566 | zram_free_page(zram, index); | 567 | zram_free_page(zram, index); |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066a..89326acd4561 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
229 | &fsnotify_mark_srcu); | 229 | &fsnotify_mark_srcu); |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * We need to merge inode & vfsmount mark lists so that inode mark | ||
234 | * ignore masks are properly reflected for mount mark notifications. | ||
235 | * That's why this traversal is so complicated... | ||
236 | */ | ||
232 | while (inode_node || vfsmount_node) { | 237 | while (inode_node || vfsmount_node) { |
233 | inode_group = vfsmount_group = NULL; | 238 | inode_group = NULL; |
239 | inode_mark = NULL; | ||
240 | vfsmount_group = NULL; | ||
241 | vfsmount_mark = NULL; | ||
234 | 242 | ||
235 | if (inode_node) { | 243 | if (inode_node) { |
236 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | 244 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), |
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
244 | vfsmount_group = vfsmount_mark->group; | 252 | vfsmount_group = vfsmount_mark->group; |
245 | } | 253 | } |
246 | 254 | ||
247 | if (inode_group > vfsmount_group) { | 255 | if (inode_group && vfsmount_group) { |
248 | /* handle inode */ | 256 | int cmp = fsnotify_compare_groups(inode_group, |
249 | ret = send_to_group(to_tell, inode_mark, NULL, mask, | 257 | vfsmount_group); |
250 | data, data_is, cookie, file_name); | 258 | if (cmp > 0) { |
251 | /* we didn't use the vfsmount_mark */ | 259 | inode_group = NULL; |
252 | vfsmount_group = NULL; | 260 | inode_mark = NULL; |
253 | } else if (vfsmount_group > inode_group) { | 261 | } else if (cmp < 0) { |
254 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, | 262 | vfsmount_group = NULL; |
255 | data, data_is, cookie, file_name); | 263 | vfsmount_mark = NULL; |
256 | inode_group = NULL; | 264 | } |
257 | } else { | ||
258 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, | ||
259 | mask, data, data_is, cookie, | ||
260 | file_name); | ||
261 | } | 265 | } |
266 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, | ||
267 | data, data_is, cookie, file_name); | ||
262 | 268 | ||
263 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) | 269 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) |
264 | goto out; | 270 | goto out; |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe1..3b68b0ae0a97 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); | |||
12 | /* protects reads of inode and vfsmount marks list */ | 12 | /* protects reads of inode and vfsmount marks list */ |
13 | extern struct srcu_struct fsnotify_mark_srcu; | 13 | extern struct srcu_struct fsnotify_mark_srcu; |
14 | 14 | ||
15 | /* compare two groups for sorting of marks lists */ | ||
16 | extern int fsnotify_compare_groups(struct fsnotify_group *a, | ||
17 | struct fsnotify_group *b); | ||
18 | |||
15 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, | 19 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, |
16 | __u32 mask); | 20 | __u32 mask); |
17 | /* add a mark to an inode */ | 21 | /* add a mark to an inode */ |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index e8497144b323..dfbf5447eea4 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
194 | { | 194 | { |
195 | struct fsnotify_mark *lmark, *last = NULL; | 195 | struct fsnotify_mark *lmark, *last = NULL; |
196 | int ret = 0; | 196 | int ret = 0; |
197 | int cmp; | ||
197 | 198 | ||
198 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; | 199 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; |
199 | 200 | ||
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
219 | goto out; | 220 | goto out; |
220 | } | 221 | } |
221 | 222 | ||
222 | if (mark->group->priority < lmark->group->priority) | 223 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
223 | continue; | 224 | if (cmp < 0) |
224 | |||
225 | if ((mark->group->priority == lmark->group->priority) && | ||
226 | (mark->group < lmark->group)) | ||
227 | continue; | 225 | continue; |
228 | 226 | ||
229 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); | 227 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e78..34c38fabf514 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas | |||
210 | } | 210 | } |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * Sorting function for lists of fsnotify marks. | ||
214 | * | ||
215 | * Fanotify supports different notification classes (reflected as priority of | ||
216 | * notification group). Events shall be passed to notification groups in | ||
217 | * decreasing priority order. To achieve this marks in notification lists for | ||
218 | * inodes and vfsmounts are sorted so that priorities of corresponding groups | ||
219 | * are descending. | ||
220 | * | ||
221 | * Furthermore correct handling of the ignore mask requires processing inode | ||
222 | * and vfsmount marks of each group together. Using the group address as | ||
223 | * further sort criterion provides a unique sorting order and thus we can | ||
224 | * merge inode and vfsmount lists of marks in linear time and find groups | ||
225 | * present in both lists. | ||
226 | * | ||
227 | * A return value of 1 signifies that b has priority over a. | ||
228 | * A return value of 0 signifies that the two marks have to be handled together. | ||
229 | * A return value of -1 signifies that a has priority over b. | ||
230 | */ | ||
231 | int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) | ||
232 | { | ||
233 | if (a == b) | ||
234 | return 0; | ||
235 | if (!a) | ||
236 | return 1; | ||
237 | if (!b) | ||
238 | return -1; | ||
239 | if (a->priority < b->priority) | ||
240 | return 1; | ||
241 | if (a->priority > b->priority) | ||
242 | return -1; | ||
243 | if (a < b) | ||
244 | return 1; | ||
245 | return -1; | ||
246 | } | ||
247 | |||
248 | /* | ||
213 | * Attach an initialized mark to a given group and fs object. | 249 | * Attach an initialized mark to a given group and fs object. |
214 | * These marks may be used for the fsnotify backend to determine which | 250 | * These marks may be used for the fsnotify backend to determine which |
215 | * event types should be delivered to which group. | 251 | * event types should be delivered to which group. |
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b1..faefa72a11eb 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
153 | struct mount *m = real_mount(mnt); | 153 | struct mount *m = real_mount(mnt); |
154 | struct fsnotify_mark *lmark, *last = NULL; | 154 | struct fsnotify_mark *lmark, *last = NULL; |
155 | int ret = 0; | 155 | int ret = 0; |
156 | int cmp; | ||
156 | 157 | ||
157 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; | 158 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; |
158 | 159 | ||
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
178 | goto out; | 179 | goto out; |
179 | } | 180 | } |
180 | 181 | ||
181 | if (mark->group->priority < lmark->group->priority) | 182 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
182 | continue; | 183 | if (cmp < 0) |
183 | |||
184 | if ((mark->group->priority == lmark->group->priority) && | ||
185 | (mark->group < lmark->group)) | ||
186 | continue; | 184 | continue; |
187 | 185 | ||
188 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); | 186 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e2bd4c95b66..0995c2de8162 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, | |||
46 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); | 46 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); |
47 | 47 | ||
48 | extern unsigned long free_all_bootmem(void); | 48 | extern unsigned long free_all_bootmem(void); |
49 | extern void reset_node_managed_pages(pg_data_t *pgdat); | ||
49 | extern void reset_all_zones_managed_pages(void); | 50 | extern void reset_all_zones_managed_pages(void); |
50 | 51 | ||
51 | extern void free_bootmem_node(pg_data_t *pgdat, | 52 | extern void free_bootmem_node(pg_data_t *pgdat, |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 48bf12ef6620..ffe66e381c04 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -431,6 +431,15 @@ struct zone { | |||
431 | */ | 431 | */ |
432 | int nr_migrate_reserve_block; | 432 | int nr_migrate_reserve_block; |
433 | 433 | ||
434 | #ifdef CONFIG_MEMORY_ISOLATION | ||
435 | /* | ||
436 | * Number of isolated pageblock. It is used to solve incorrect | ||
437 | * freepage counting problem due to racy retrieving migratetype | ||
438 | * of pageblock. Protected by zone->lock. | ||
439 | */ | ||
440 | unsigned long nr_isolate_pageblock; | ||
441 | #endif | ||
442 | |||
434 | #ifdef CONFIG_MEMORY_HOTPLUG | 443 | #ifdef CONFIG_MEMORY_HOTPLUG |
435 | /* see spanned/present_pages for more description */ | 444 | /* see spanned/present_pages for more description */ |
436 | seqlock_t span_seqlock; | 445 | seqlock_t span_seqlock; |
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e774067..2dc1e1697b45 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h | |||
@@ -2,6 +2,10 @@ | |||
2 | #define __LINUX_PAGEISOLATION_H | 2 | #define __LINUX_PAGEISOLATION_H |
3 | 3 | ||
4 | #ifdef CONFIG_MEMORY_ISOLATION | 4 | #ifdef CONFIG_MEMORY_ISOLATION |
5 | static inline bool has_isolate_pageblock(struct zone *zone) | ||
6 | { | ||
7 | return zone->nr_isolate_pageblock; | ||
8 | } | ||
5 | static inline bool is_migrate_isolate_page(struct page *page) | 9 | static inline bool is_migrate_isolate_page(struct page *page) |
6 | { | 10 | { |
7 | return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; | 11 | return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; |
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype) | |||
11 | return migratetype == MIGRATE_ISOLATE; | 15 | return migratetype == MIGRATE_ISOLATE; |
12 | } | 16 | } |
13 | #else | 17 | #else |
18 | static inline bool has_isolate_pageblock(struct zone *zone) | ||
19 | { | ||
20 | return false; | ||
21 | } | ||
14 | static inline bool is_migrate_isolate_page(struct page *page) | 22 | static inline bool is_migrate_isolate_page(struct page *page) |
15 | { | 23 | { |
16 | return false; | 24 | return false; |
diff --git a/kernel/panic.c b/kernel/panic.c index d09dc5c32c67..cf80672b7924 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = { | |||
244 | * 'I' - Working around severe firmware bug. | 244 | * 'I' - Working around severe firmware bug. |
245 | * 'O' - Out-of-tree module has been loaded. | 245 | * 'O' - Out-of-tree module has been loaded. |
246 | * 'E' - Unsigned module has been loaded. | 246 | * 'E' - Unsigned module has been loaded. |
247 | * 'L' - A soft lockup has previously occurred. | ||
247 | * | 248 | * |
248 | * The string is overwritten by the next call to print_tainted(). | 249 | * The string is overwritten by the next call to print_tainted(). |
249 | */ | 250 | */ |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 8a000cebb0d7..477be696511d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
243 | 243 | ||
244 | static int reset_managed_pages_done __initdata; | 244 | static int reset_managed_pages_done __initdata; |
245 | 245 | ||
246 | static inline void __init reset_node_managed_pages(pg_data_t *pgdat) | 246 | void reset_node_managed_pages(pg_data_t *pgdat) |
247 | { | 247 | { |
248 | struct zone *z; | 248 | struct zone *z; |
249 | 249 | ||
250 | if (reset_managed_pages_done) | ||
251 | return; | ||
252 | |||
253 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | 250 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) |
254 | z->managed_pages = 0; | 251 | z->managed_pages = 0; |
255 | } | 252 | } |
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void) | |||
258 | { | 255 | { |
259 | struct pglist_data *pgdat; | 256 | struct pglist_data *pgdat; |
260 | 257 | ||
258 | if (reset_managed_pages_done) | ||
259 | return; | ||
260 | |||
261 | for_each_online_pgdat(pgdat) | 261 | for_each_online_pgdat(pgdat) |
262 | reset_node_managed_pages(pgdat); | 262 | reset_node_managed_pages(pgdat); |
263 | |||
263 | reset_managed_pages_done = 1; | 264 | reset_managed_pages_done = 1; |
264 | } | 265 | } |
265 | 266 | ||
diff --git a/mm/compaction.c b/mm/compaction.c index ec74cf0123ef..f9792ba3537c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc, | |||
479 | 479 | ||
480 | block_end_pfn = min(block_end_pfn, end_pfn); | 480 | block_end_pfn = min(block_end_pfn, end_pfn); |
481 | 481 | ||
482 | /* | ||
483 | * pfn could pass the block_end_pfn if isolated freepage | ||
484 | * is more than pageblock order. In this case, we adjust | ||
485 | * scanning range to right one. | ||
486 | */ | ||
487 | if (pfn >= block_end_pfn) { | ||
488 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | ||
489 | block_end_pfn = min(block_end_pfn, end_pfn); | ||
490 | } | ||
491 | |||
482 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 492 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) |
483 | break; | 493 | break; |
484 | 494 | ||
@@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1029 | } | 1039 | } |
1030 | 1040 | ||
1031 | acct_isolated(zone, cc); | 1041 | acct_isolated(zone, cc); |
1032 | /* Record where migration scanner will be restarted */ | 1042 | /* |
1033 | cc->migrate_pfn = low_pfn; | 1043 | * Record where migration scanner will be restarted. If we end up in |
1044 | * the same pageblock as the free scanner, make the scanners fully | ||
1045 | * meet so that compact_finished() terminates compaction. | ||
1046 | */ | ||
1047 | cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn; | ||
1034 | 1048 | ||
1035 | return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; | 1049 | return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; |
1036 | } | 1050 | } |
diff --git a/mm/internal.h b/mm/internal.h index 829304090b90..a4f90ba7068e 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); | |||
108 | /* | 108 | /* |
109 | * in mm/page_alloc.c | 109 | * in mm/page_alloc.c |
110 | */ | 110 | */ |
111 | |||
112 | /* | ||
113 | * Locate the struct page for both the matching buddy in our | ||
114 | * pair (buddy1) and the combined O(n+1) page they form (page). | ||
115 | * | ||
116 | * 1) Any buddy B1 will have an order O twin B2 which satisfies | ||
117 | * the following equation: | ||
118 | * B2 = B1 ^ (1 << O) | ||
119 | * For example, if the starting buddy (buddy2) is #8 its order | ||
120 | * 1 buddy is #10: | ||
121 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 | ||
122 | * | ||
123 | * 2) Any buddy B will have an order O+1 parent P which | ||
124 | * satisfies the following equation: | ||
125 | * P = B & ~(1 << O) | ||
126 | * | ||
127 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | ||
128 | */ | ||
129 | static inline unsigned long | ||
130 | __find_buddy_index(unsigned long page_idx, unsigned int order) | ||
131 | { | ||
132 | return page_idx ^ (1 << order); | ||
133 | } | ||
134 | |||
135 | extern int __isolate_free_page(struct page *page, unsigned int order); | ||
111 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 136 | extern void __free_pages_bootmem(struct page *page, unsigned int order); |
112 | extern void prep_compound_page(struct page *page, unsigned long order); | 137 | extern void prep_compound_page(struct page *page, unsigned long order); |
113 | #ifdef CONFIG_MEMORY_FAILURE | 138 | #ifdef CONFIG_MEMORY_FAILURE |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 252e1dbbed86..1bf4807cb21e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/stop_machine.h> | 31 | #include <linux/stop_machine.h> |
32 | #include <linux/hugetlb.h> | 32 | #include <linux/hugetlb.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/bootmem.h> | ||
34 | 35 | ||
35 | #include <asm/tlbflush.h> | 36 | #include <asm/tlbflush.h> |
36 | 37 | ||
@@ -1066,6 +1067,16 @@ out: | |||
1066 | } | 1067 | } |
1067 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 1068 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
1068 | 1069 | ||
1070 | static void reset_node_present_pages(pg_data_t *pgdat) | ||
1071 | { | ||
1072 | struct zone *z; | ||
1073 | |||
1074 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | ||
1075 | z->present_pages = 0; | ||
1076 | |||
1077 | pgdat->node_present_pages = 0; | ||
1078 | } | ||
1079 | |||
1069 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1080 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
1070 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | 1081 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) |
1071 | { | 1082 | { |
@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
1096 | build_all_zonelists(pgdat, NULL); | 1107 | build_all_zonelists(pgdat, NULL); |
1097 | mutex_unlock(&zonelists_mutex); | 1108 | mutex_unlock(&zonelists_mutex); |
1098 | 1109 | ||
1110 | /* | ||
1111 | * zone->managed_pages is set to an approximate value in | ||
1112 | * free_area_init_core(), which will cause | ||
1113 | * /sys/device/system/node/nodeX/meminfo has wrong data. | ||
1114 | * So reset it to 0 before any memory is onlined. | ||
1115 | */ | ||
1116 | reset_node_managed_pages(pgdat); | ||
1117 | |||
1118 | /* | ||
1119 | * When memory is hot-added, all the memory is in offline state. So | ||
1120 | * clear all zones' present_pages because they will be updated in | ||
1121 | * online_pages() and offline_pages(). | ||
1122 | */ | ||
1123 | reset_node_present_pages(pgdat); | ||
1124 | |||
1099 | return pgdat; | 1125 | return pgdat; |
1100 | } | 1126 | } |
1101 | 1127 | ||
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 7c7ab32ee503..90b50468333e 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void) | |||
145 | 145 | ||
146 | static int reset_managed_pages_done __initdata; | 146 | static int reset_managed_pages_done __initdata; |
147 | 147 | ||
148 | static inline void __init reset_node_managed_pages(pg_data_t *pgdat) | 148 | void reset_node_managed_pages(pg_data_t *pgdat) |
149 | { | 149 | { |
150 | struct zone *z; | 150 | struct zone *z; |
151 | 151 | ||
152 | if (reset_managed_pages_done) | ||
153 | return; | ||
154 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) | 152 | for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) |
155 | z->managed_pages = 0; | 153 | z->managed_pages = 0; |
156 | } | 154 | } |
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void) | |||
159 | { | 157 | { |
160 | struct pglist_data *pgdat; | 158 | struct pglist_data *pgdat; |
161 | 159 | ||
160 | if (reset_managed_pages_done) | ||
161 | return; | ||
162 | |||
162 | for_each_online_pgdat(pgdat) | 163 | for_each_online_pgdat(pgdat) |
163 | reset_node_managed_pages(pgdat); | 164 | reset_node_managed_pages(pgdat); |
165 | |||
164 | reset_managed_pages_done = 1; | 166 | reset_managed_pages_done = 1; |
165 | } | 167 | } |
166 | 168 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9cd36b822444..616a2c956b4b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -467,29 +467,6 @@ static inline void rmv_page_order(struct page *page) | |||
467 | } | 467 | } |
468 | 468 | ||
469 | /* | 469 | /* |
470 | * Locate the struct page for both the matching buddy in our | ||
471 | * pair (buddy1) and the combined O(n+1) page they form (page). | ||
472 | * | ||
473 | * 1) Any buddy B1 will have an order O twin B2 which satisfies | ||
474 | * the following equation: | ||
475 | * B2 = B1 ^ (1 << O) | ||
476 | * For example, if the starting buddy (buddy2) is #8 its order | ||
477 | * 1 buddy is #10: | ||
478 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 | ||
479 | * | ||
480 | * 2) Any buddy B will have an order O+1 parent P which | ||
481 | * satisfies the following equation: | ||
482 | * P = B & ~(1 << O) | ||
483 | * | ||
484 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | ||
485 | */ | ||
486 | static inline unsigned long | ||
487 | __find_buddy_index(unsigned long page_idx, unsigned int order) | ||
488 | { | ||
489 | return page_idx ^ (1 << order); | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * This function checks whether a page is free && is the buddy | 470 | * This function checks whether a page is free && is the buddy |
494 | * we can do coalesce a page and its buddy if | 471 | * we can do coalesce a page and its buddy if |
495 | * (a) the buddy is not in a hole && | 472 | * (a) the buddy is not in a hole && |
@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page, | |||
569 | unsigned long combined_idx; | 546 | unsigned long combined_idx; |
570 | unsigned long uninitialized_var(buddy_idx); | 547 | unsigned long uninitialized_var(buddy_idx); |
571 | struct page *buddy; | 548 | struct page *buddy; |
549 | int max_order = MAX_ORDER; | ||
572 | 550 | ||
573 | VM_BUG_ON(!zone_is_initialized(zone)); | 551 | VM_BUG_ON(!zone_is_initialized(zone)); |
574 | 552 | ||
@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page, | |||
577 | return; | 555 | return; |
578 | 556 | ||
579 | VM_BUG_ON(migratetype == -1); | 557 | VM_BUG_ON(migratetype == -1); |
558 | if (is_migrate_isolate(migratetype)) { | ||
559 | /* | ||
560 | * We restrict max order of merging to prevent merge | ||
561 | * between freepages on isolate pageblock and normal | ||
562 | * pageblock. Without this, pageblock isolation | ||
563 | * could cause incorrect freepage accounting. | ||
564 | */ | ||
565 | max_order = min(MAX_ORDER, pageblock_order + 1); | ||
566 | } else { | ||
567 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
568 | } | ||
580 | 569 | ||
581 | page_idx = pfn & ((1 << MAX_ORDER) - 1); | 570 | page_idx = pfn & ((1 << max_order) - 1); |
582 | 571 | ||
583 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); | 572 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); |
584 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | 573 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
585 | 574 | ||
586 | while (order < MAX_ORDER-1) { | 575 | while (order < max_order - 1) { |
587 | buddy_idx = __find_buddy_index(page_idx, order); | 576 | buddy_idx = __find_buddy_index(page_idx, order); |
588 | buddy = page + (buddy_idx - page_idx); | 577 | buddy = page + (buddy_idx - page_idx); |
589 | if (!page_is_buddy(page, buddy, order)) | 578 | if (!page_is_buddy(page, buddy, order)) |
@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page, | |||
594 | */ | 583 | */ |
595 | if (page_is_guard(buddy)) { | 584 | if (page_is_guard(buddy)) { |
596 | clear_page_guard_flag(buddy); | 585 | clear_page_guard_flag(buddy); |
597 | set_page_private(page, 0); | 586 | set_page_private(buddy, 0); |
598 | __mod_zone_freepage_state(zone, 1 << order, | 587 | if (!is_migrate_isolate(migratetype)) { |
599 | migratetype); | 588 | __mod_zone_freepage_state(zone, 1 << order, |
589 | migratetype); | ||
590 | } | ||
600 | } else { | 591 | } else { |
601 | list_del(&buddy->lru); | 592 | list_del(&buddy->lru); |
602 | zone->free_area[order].nr_free--; | 593 | zone->free_area[order].nr_free--; |
@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
715 | /* must delete as __free_one_page list manipulates */ | 706 | /* must delete as __free_one_page list manipulates */ |
716 | list_del(&page->lru); | 707 | list_del(&page->lru); |
717 | mt = get_freepage_migratetype(page); | 708 | mt = get_freepage_migratetype(page); |
709 | if (unlikely(has_isolate_pageblock(zone))) | ||
710 | mt = get_pageblock_migratetype(page); | ||
711 | |||
718 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 712 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
719 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); | 713 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
720 | trace_mm_page_pcpu_drain(page, 0, mt); | 714 | trace_mm_page_pcpu_drain(page, 0, mt); |
721 | if (likely(!is_migrate_isolate_page(page))) { | ||
722 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); | ||
723 | if (is_migrate_cma(mt)) | ||
724 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
725 | } | ||
726 | } while (--to_free && --batch_free && !list_empty(list)); | 715 | } while (--to_free && --batch_free && !list_empty(list)); |
727 | } | 716 | } |
728 | spin_unlock(&zone->lock); | 717 | spin_unlock(&zone->lock); |
@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone, | |||
739 | if (nr_scanned) | 728 | if (nr_scanned) |
740 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); | 729 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); |
741 | 730 | ||
731 | if (unlikely(has_isolate_pageblock(zone) || | ||
732 | is_migrate_isolate(migratetype))) { | ||
733 | migratetype = get_pfnblock_migratetype(page, pfn); | ||
734 | } | ||
742 | __free_one_page(page, pfn, zone, order, migratetype); | 735 | __free_one_page(page, pfn, zone, order, migratetype); |
743 | if (unlikely(!is_migrate_isolate(migratetype))) | ||
744 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
745 | spin_unlock(&zone->lock); | 736 | spin_unlock(&zone->lock); |
746 | } | 737 | } |
747 | 738 | ||
@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order) | |||
1484 | } | 1475 | } |
1485 | EXPORT_SYMBOL_GPL(split_page); | 1476 | EXPORT_SYMBOL_GPL(split_page); |
1486 | 1477 | ||
1487 | static int __isolate_free_page(struct page *page, unsigned int order) | 1478 | int __isolate_free_page(struct page *page, unsigned int order) |
1488 | { | 1479 | { |
1489 | unsigned long watermark; | 1480 | unsigned long watermark; |
1490 | struct zone *zone; | 1481 | struct zone *zone; |
@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6408 | 6399 | ||
6409 | /* Make sure the range is really isolated. */ | 6400 | /* Make sure the range is really isolated. */ |
6410 | if (test_pages_isolated(outer_start, end, false)) { | 6401 | if (test_pages_isolated(outer_start, end, false)) { |
6411 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | 6402 | pr_info("%s: [%lx, %lx) PFNs busy\n", |
6412 | outer_start, end); | 6403 | __func__, outer_start, end); |
6413 | ret = -EBUSY; | 6404 | ret = -EBUSY; |
6414 | goto done; | 6405 | goto done; |
6415 | } | 6406 | } |
6416 | 6407 | ||
6417 | |||
6418 | /* Grab isolated pages from freelists. */ | 6408 | /* Grab isolated pages from freelists. */ |
6419 | outer_end = isolate_freepages_range(&cc, outer_start, end); | 6409 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
6420 | if (!outer_end) { | 6410 | if (!outer_end) { |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index d1473b2e9481..c8778f7e208e 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -60,6 +60,7 @@ out: | |||
60 | int migratetype = get_pageblock_migratetype(page); | 60 | int migratetype = get_pageblock_migratetype(page); |
61 | 61 | ||
62 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 62 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
63 | zone->nr_isolate_pageblock++; | ||
63 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); | 64 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); |
64 | 65 | ||
65 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); | 66 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); |
@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
75 | { | 76 | { |
76 | struct zone *zone; | 77 | struct zone *zone; |
77 | unsigned long flags, nr_pages; | 78 | unsigned long flags, nr_pages; |
79 | struct page *isolated_page = NULL; | ||
80 | unsigned int order; | ||
81 | unsigned long page_idx, buddy_idx; | ||
82 | struct page *buddy; | ||
78 | 83 | ||
79 | zone = page_zone(page); | 84 | zone = page_zone(page); |
80 | spin_lock_irqsave(&zone->lock, flags); | 85 | spin_lock_irqsave(&zone->lock, flags); |
81 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 86 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
82 | goto out; | 87 | goto out; |
83 | nr_pages = move_freepages_block(zone, page, migratetype); | 88 | |
84 | __mod_zone_freepage_state(zone, nr_pages, migratetype); | 89 | /* |
90 | * Because freepage with more than pageblock_order on isolated | ||
91 | * pageblock is restricted to merge due to freepage counting problem, | ||
92 | * it is possible that there is free buddy page. | ||
93 | * move_freepages_block() doesn't care of merge so we need other | ||
94 | * approach in order to merge them. Isolation and free will make | ||
95 | * these pages to be merged. | ||
96 | */ | ||
97 | if (PageBuddy(page)) { | ||
98 | order = page_order(page); | ||
99 | if (order >= pageblock_order) { | ||
100 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | ||
101 | buddy_idx = __find_buddy_index(page_idx, order); | ||
102 | buddy = page + (buddy_idx - page_idx); | ||
103 | |||
104 | if (!is_migrate_isolate_page(buddy)) { | ||
105 | __isolate_free_page(page, order); | ||
106 | set_page_refcounted(page); | ||
107 | isolated_page = page; | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * If we isolate freepage with more than pageblock_order, there | ||
114 | * should be no freepage in the range, so we could avoid costly | ||
115 | * pageblock scanning for freepage moving. | ||
116 | */ | ||
117 | if (!isolated_page) { | ||
118 | nr_pages = move_freepages_block(zone, page, migratetype); | ||
119 | __mod_zone_freepage_state(zone, nr_pages, migratetype); | ||
120 | } | ||
85 | set_pageblock_migratetype(page, migratetype); | 121 | set_pageblock_migratetype(page, migratetype); |
122 | zone->nr_isolate_pageblock--; | ||
86 | out: | 123 | out: |
87 | spin_unlock_irqrestore(&zone->lock, flags); | 124 | spin_unlock_irqrestore(&zone->lock, flags); |
125 | if (isolated_page) | ||
126 | __free_pages(isolated_page, order); | ||
88 | } | 127 | } |
89 | 128 | ||
90 | static inline struct page * | 129 | static inline struct page * |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 406944207b61..dcdab81bd240 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, | |||
259 | if (s->size - size >= sizeof(void *)) | 259 | if (s->size - size >= sizeof(void *)) |
260 | continue; | 260 | continue; |
261 | 261 | ||
262 | if (IS_ENABLED(CONFIG_SLAB) && align && | ||
263 | (align > s->align || s->align % align)) | ||
264 | continue; | ||
265 | |||
262 | return s; | 266 | return s; |
263 | } | 267 | } |
264 | return NULL; | 268 | return NULL; |