diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-24 19:54:39 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-24 19:54:39 -0500 |
commit | 883af14e67e8b8702b5560aa64c888c0cd0bd66c (patch) | |
tree | 74e3a6b53f5fad9f7848ab1b9f6921b7012940a4 | |
parent | 0263d4ebd94b36280608e296cba39b924b6e832b (diff) | |
parent | aab45453ff5c77200c6da4ac909f7a4392aed17e (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton:
"26 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits)
MAINTAINERS: add Dan Streetman to zbud maintainers
MAINTAINERS: add Dan Streetman to zswap maintainers
mm: do not export ioremap_page_range symbol for external module
mn10300: fix build error of missing fpu_save()
romfs: use different way to generate fsid for BLOCK or MTD
frv: add missing atomic64 operations
mm, page_alloc: fix premature OOM when racing with cpuset mems update
mm, page_alloc: move cpuset seqcount checking to slowpath
mm, page_alloc: fix fast-path race with cpuset update or removal
mm, page_alloc: fix check for NULL preferred_zone
kernel/panic.c: add missing \n
fbdev: color map copying bounds checking
frv: add atomic64_add_unless()
mm/mempolicy.c: do not put mempolicy before using its nodemask
radix-tree: fix private list warnings
Documentation/filesystems/proc.txt: add VmPin
mm, memcg: do not retry precharge charges
proc: add a schedule point in proc_pid_readdir()
mm: alloc_contig: re-allow CMA to compact FS pages
mm/slub.c: trace free objects at KERN_INFO
...
-rw-r--r-- | Documentation/filesystems/proc.txt | 5 | ||||
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | arch/frv/include/asm/atomic.h | 35 | ||||
-rw-r--r-- | arch/mn10300/include/asm/switch_to.h | 2 | ||||
-rw-r--r-- | drivers/base/memory.c | 4 | ||||
-rw-r--r-- | drivers/memstick/core/memstick.c | 2 | ||||
-rw-r--r-- | drivers/video/fbdev/core/fbcmap.c | 26 | ||||
-rw-r--r-- | fs/Kconfig | 1 | ||||
-rw-r--r-- | fs/dax.c | 2 | ||||
-rw-r--r-- | fs/ext2/Kconfig | 1 | ||||
-rw-r--r-- | fs/ext4/Kconfig | 1 | ||||
-rw-r--r-- | fs/proc/base.c | 2 | ||||
-rw-r--r-- | fs/romfs/super.c | 23 | ||||
-rw-r--r-- | fs/userfaultfd.c | 37 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 4 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/nmi.h | 1 | ||||
-rw-r--r-- | kernel/panic.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 9 | ||||
-rw-r--r-- | kernel/watchdog_hld.c | 3 | ||||
-rw-r--r-- | lib/ioremap.c | 1 | ||||
-rw-r--r-- | lib/radix-tree.c | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 18 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 28 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 69 | ||||
-rw-r--r-- | mm/slub.c | 23 |
28 files changed, 237 insertions, 78 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 72624a16b792..c94b4675d021 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise | |||
212 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. | 212 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. |
213 | It's slow but very precise. | 213 | It's slow but very precise. |
214 | 214 | ||
215 | Table 1-2: Contents of the status files (as of 4.1) | 215 | Table 1-2: Contents of the status files (as of 4.8) |
216 | .............................................................................. | 216 | .............................................................................. |
217 | Field Content | 217 | Field Content |
218 | Name filename of the executable | 218 | Name filename of the executable |
219 | Umask file mode creation mask | ||
219 | State state (R is running, S is sleeping, D is sleeping | 220 | State state (R is running, S is sleeping, D is sleeping |
220 | in an uninterruptible wait, Z is zombie, | 221 | in an uninterruptible wait, Z is zombie, |
221 | T is traced or stopped) | 222 | T is traced or stopped) |
@@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1) | |||
226 | TracerPid PID of process tracing this process (0 if not) | 227 | TracerPid PID of process tracing this process (0 if not) |
227 | Uid Real, effective, saved set, and file system UIDs | 228 | Uid Real, effective, saved set, and file system UIDs |
228 | Gid Real, effective, saved set, and file system GIDs | 229 | Gid Real, effective, saved set, and file system GIDs |
229 | Umask file mode creation mask | ||
230 | FDSize number of file descriptor slots currently allocated | 230 | FDSize number of file descriptor slots currently allocated |
231 | Groups supplementary group list | 231 | Groups supplementary group list |
232 | NStgid descendant namespace thread group ID hierarchy | 232 | NStgid descendant namespace thread group ID hierarchy |
@@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1) | |||
236 | VmPeak peak virtual memory size | 236 | VmPeak peak virtual memory size |
237 | VmSize total program size | 237 | VmSize total program size |
238 | VmLck locked memory size | 238 | VmLck locked memory size |
239 | VmPin pinned memory size | ||
239 | VmHWM peak resident set size ("high water mark") | 240 | VmHWM peak resident set size ("high water mark") |
240 | VmRSS size of memory portions. It contains the three | 241 | VmRSS size of memory portions. It contains the three |
241 | following parts (VmRSS = RssAnon + RssFile + RssShmem) | 242 | following parts (VmRSS = RssAnon + RssFile + RssShmem) |
diff --git a/MAINTAINERS b/MAINTAINERS index 795942555b4d..50e6f7c561d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -13625,6 +13625,7 @@ F: drivers/net/hamradio/z8530.h | |||
13625 | 13625 | ||
13626 | ZBUD COMPRESSED PAGE ALLOCATOR | 13626 | ZBUD COMPRESSED PAGE ALLOCATOR |
13627 | M: Seth Jennings <sjenning@redhat.com> | 13627 | M: Seth Jennings <sjenning@redhat.com> |
13628 | M: Dan Streetman <ddstreet@ieee.org> | ||
13628 | L: linux-mm@kvack.org | 13629 | L: linux-mm@kvack.org |
13629 | S: Maintained | 13630 | S: Maintained |
13630 | F: mm/zbud.c | 13631 | F: mm/zbud.c |
@@ -13680,6 +13681,7 @@ F: Documentation/vm/zsmalloc.txt | |||
13680 | 13681 | ||
13681 | ZSWAP COMPRESSED SWAP CACHING | 13682 | ZSWAP COMPRESSED SWAP CACHING |
13682 | M: Seth Jennings <sjenning@redhat.com> | 13683 | M: Seth Jennings <sjenning@redhat.com> |
13684 | M: Dan Streetman <ddstreet@ieee.org> | ||
13683 | L: linux-mm@kvack.org | 13685 | L: linux-mm@kvack.org |
13684 | S: Maintained | 13686 | S: Maintained |
13685 | F: mm/zswap.c | 13687 | F: mm/zswap.c |
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 1c2a5e264fc7..e93c9494503a 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h | |||
@@ -139,7 +139,7 @@ static inline void atomic64_dec(atomic64_t *v) | |||
139 | #define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) | 139 | #define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) |
140 | #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) | 140 | #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) |
141 | #define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0) | 141 | #define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0) |
142 | 142 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) | |
143 | 143 | ||
144 | #define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new)) | 144 | #define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new)) |
145 | #define atomic_xchg(v, new) (xchg(&(v)->counter, new)) | 145 | #define atomic_xchg(v, new) (xchg(&(v)->counter, new)) |
@@ -161,6 +161,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) | |||
161 | return c; | 161 | return c; |
162 | } | 162 | } |
163 | 163 | ||
164 | static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) | ||
165 | { | ||
166 | long long c, old; | ||
167 | |||
168 | c = atomic64_read(v); | ||
169 | for (;;) { | ||
170 | if (unlikely(c == u)) | ||
171 | break; | ||
172 | old = atomic64_cmpxchg(v, c, c + i); | ||
173 | if (likely(old == c)) | ||
174 | break; | ||
175 | c = old; | ||
176 | } | ||
177 | return c != u; | ||
178 | } | ||
179 | |||
180 | static inline long long atomic64_dec_if_positive(atomic64_t *v) | ||
181 | { | ||
182 | long long c, old, dec; | ||
183 | |||
184 | c = atomic64_read(v); | ||
185 | for (;;) { | ||
186 | dec = c - 1; | ||
187 | if (unlikely(dec < 0)) | ||
188 | break; | ||
189 | old = atomic64_cmpxchg((v), c, dec); | ||
190 | if (likely(old == c)) | ||
191 | break; | ||
192 | c = old; | ||
193 | } | ||
194 | return dec; | ||
195 | } | ||
196 | |||
164 | #define ATOMIC_OP(op) \ | 197 | #define ATOMIC_OP(op) \ |
165 | static inline int atomic_fetch_##op(int i, atomic_t *v) \ | 198 | static inline int atomic_fetch_##op(int i, atomic_t *v) \ |
166 | { \ | 199 | { \ |
diff --git a/arch/mn10300/include/asm/switch_to.h b/arch/mn10300/include/asm/switch_to.h index 393d311735c8..67e333aa7629 100644 --- a/arch/mn10300/include/asm/switch_to.h +++ b/arch/mn10300/include/asm/switch_to.h | |||
@@ -16,7 +16,7 @@ | |||
16 | struct task_struct; | 16 | struct task_struct; |
17 | struct thread_struct; | 17 | struct thread_struct; |
18 | 18 | ||
19 | #if !defined(CONFIG_LAZY_SAVE_FPU) | 19 | #if defined(CONFIG_FPU) && !defined(CONFIG_LAZY_SAVE_FPU) |
20 | struct fpu_state_struct; | 20 | struct fpu_state_struct; |
21 | extern asmlinkage void fpu_save(struct fpu_state_struct *); | 21 | extern asmlinkage void fpu_save(struct fpu_state_struct *); |
22 | #define switch_fpu(prev, next) \ | 22 | #define switch_fpu(prev, next) \ |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8ab8ea1253e6..dacb6a8418aa 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -408,14 +408,14 @@ static ssize_t show_valid_zones(struct device *dev, | |||
408 | sprintf(buf, "%s", zone->name); | 408 | sprintf(buf, "%s", zone->name); |
409 | 409 | ||
410 | /* MMOP_ONLINE_KERNEL */ | 410 | /* MMOP_ONLINE_KERNEL */ |
411 | zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL); | 411 | zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); |
412 | if (zone_shift) { | 412 | if (zone_shift) { |
413 | strcat(buf, " "); | 413 | strcat(buf, " "); |
414 | strcat(buf, (zone + zone_shift)->name); | 414 | strcat(buf, (zone + zone_shift)->name); |
415 | } | 415 | } |
416 | 416 | ||
417 | /* MMOP_ONLINE_MOVABLE */ | 417 | /* MMOP_ONLINE_MOVABLE */ |
418 | zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE); | 418 | zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); |
419 | if (zone_shift) { | 419 | if (zone_shift) { |
420 | strcat(buf, " "); | 420 | strcat(buf, " "); |
421 | strcat(buf, (zone + zone_shift)->name); | 421 | strcat(buf, (zone + zone_shift)->name); |
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a0547dbf9806..76382c858c35 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c | |||
@@ -330,7 +330,7 @@ static int h_memstick_read_dev_id(struct memstick_dev *card, | |||
330 | struct ms_id_register id_reg; | 330 | struct ms_id_register id_reg; |
331 | 331 | ||
332 | if (!(*mrq)) { | 332 | if (!(*mrq)) { |
333 | memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL, | 333 | memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg, |
334 | sizeof(struct ms_id_register)); | 334 | sizeof(struct ms_id_register)); |
335 | *mrq = &card->current_mrq; | 335 | *mrq = &card->current_mrq; |
336 | return 0; | 336 | return 0; |
diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index f89245b8ba8e..68a113594808 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c | |||
@@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) | |||
163 | 163 | ||
164 | int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) | 164 | int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) |
165 | { | 165 | { |
166 | int tooff = 0, fromoff = 0; | 166 | unsigned int tooff = 0, fromoff = 0; |
167 | int size; | 167 | size_t size; |
168 | 168 | ||
169 | if (to->start > from->start) | 169 | if (to->start > from->start) |
170 | fromoff = to->start - from->start; | 170 | fromoff = to->start - from->start; |
171 | else | 171 | else |
172 | tooff = from->start - to->start; | 172 | tooff = from->start - to->start; |
173 | size = to->len - tooff; | 173 | if (fromoff >= from->len || tooff >= to->len) |
174 | if (size > (int) (from->len - fromoff)) | 174 | return -EINVAL; |
175 | size = from->len - fromoff; | 175 | |
176 | if (size <= 0) | 176 | size = min_t(size_t, to->len - tooff, from->len - fromoff); |
177 | if (size == 0) | ||
177 | return -EINVAL; | 178 | return -EINVAL; |
178 | size *= sizeof(u16); | 179 | size *= sizeof(u16); |
179 | 180 | ||
@@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) | |||
187 | 188 | ||
188 | int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) | 189 | int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) |
189 | { | 190 | { |
190 | int tooff = 0, fromoff = 0; | 191 | unsigned int tooff = 0, fromoff = 0; |
191 | int size; | 192 | size_t size; |
192 | 193 | ||
193 | if (to->start > from->start) | 194 | if (to->start > from->start) |
194 | fromoff = to->start - from->start; | 195 | fromoff = to->start - from->start; |
195 | else | 196 | else |
196 | tooff = from->start - to->start; | 197 | tooff = from->start - to->start; |
197 | size = to->len - tooff; | 198 | if (fromoff >= from->len || tooff >= to->len) |
198 | if (size > (int) (from->len - fromoff)) | 199 | return -EINVAL; |
199 | size = from->len - fromoff; | 200 | |
200 | if (size <= 0) | 201 | size = min_t(size_t, to->len - tooff, from->len - fromoff); |
202 | if (size == 0) | ||
201 | return -EINVAL; | 203 | return -EINVAL; |
202 | size *= sizeof(u16); | 204 | size *= sizeof(u16); |
203 | 205 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index c2a377cdda2b..83eab52fb3f6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -38,6 +38,7 @@ config FS_DAX | |||
38 | bool "Direct Access (DAX) support" | 38 | bool "Direct Access (DAX) support" |
39 | depends on MMU | 39 | depends on MMU |
40 | depends on !(ARM || MIPS || SPARC) | 40 | depends on !(ARM || MIPS || SPARC) |
41 | select FS_IOMAP | ||
41 | help | 42 | help |
42 | Direct Access (DAX) can be used on memory-backed block devices. | 43 | Direct Access (DAX) can be used on memory-backed block devices. |
43 | If the block device supports DAX and the filesystem supports DAX, | 44 | If the block device supports DAX and the filesystem supports DAX, |
@@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector, | |||
990 | } | 990 | } |
991 | EXPORT_SYMBOL_GPL(__dax_zero_page_range); | 991 | EXPORT_SYMBOL_GPL(__dax_zero_page_range); |
992 | 992 | ||
993 | #ifdef CONFIG_FS_IOMAP | ||
994 | static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) | 993 | static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) |
995 | { | 994 | { |
996 | return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); | 995 | return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); |
@@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
1428 | } | 1427 | } |
1429 | EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); | 1428 | EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); |
1430 | #endif /* CONFIG_FS_DAX_PMD */ | 1429 | #endif /* CONFIG_FS_DAX_PMD */ |
1431 | #endif /* CONFIG_FS_IOMAP */ | ||
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 36bea5adcaba..c634874e12d9 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig | |||
@@ -1,6 +1,5 @@ | |||
1 | config EXT2_FS | 1 | config EXT2_FS |
2 | tristate "Second extended fs support" | 2 | tristate "Second extended fs support" |
3 | select FS_IOMAP if FS_DAX | ||
4 | help | 3 | help |
5 | Ext2 is a standard Linux file system for hard disks. | 4 | Ext2 is a standard Linux file system for hard disks. |
6 | 5 | ||
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 7b90691e98c4..e38039fd96ff 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -37,7 +37,6 @@ config EXT4_FS | |||
37 | select CRC16 | 37 | select CRC16 |
38 | select CRYPTO | 38 | select CRYPTO |
39 | select CRYPTO_CRC32C | 39 | select CRYPTO_CRC32C |
40 | select FS_IOMAP if FS_DAX | ||
41 | help | 40 | help |
42 | This is the next generation of the ext3 filesystem. | 41 | This is the next generation of the ext3 filesystem. |
43 | 42 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 8e7e61b28f31..87c9a9aacda3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) | |||
3179 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3179 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
3180 | char name[PROC_NUMBUF]; | 3180 | char name[PROC_NUMBUF]; |
3181 | int len; | 3181 | int len; |
3182 | |||
3183 | cond_resched(); | ||
3182 | if (!has_pid_permissions(ns, iter.task, 2)) | 3184 | if (!has_pid_permissions(ns, iter.task, 2)) |
3183 | continue; | 3185 | continue; |
3184 | 3186 | ||
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index d0f8a38dfafa..0186fe6d39f3 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include <linux/highmem.h> | 74 | #include <linux/highmem.h> |
75 | #include <linux/pagemap.h> | 75 | #include <linux/pagemap.h> |
76 | #include <linux/uaccess.h> | 76 | #include <linux/uaccess.h> |
77 | #include <linux/major.h> | ||
77 | #include "internal.h" | 78 | #include "internal.h" |
78 | 79 | ||
79 | static struct kmem_cache *romfs_inode_cachep; | 80 | static struct kmem_cache *romfs_inode_cachep; |
@@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode) | |||
416 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 417 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
417 | { | 418 | { |
418 | struct super_block *sb = dentry->d_sb; | 419 | struct super_block *sb = dentry->d_sb; |
419 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 420 | u64 id = 0; |
421 | |||
422 | /* When calling huge_encode_dev(), | ||
423 | * use sb->s_bdev->bd_dev when, | ||
424 | * - CONFIG_ROMFS_ON_BLOCK defined | ||
425 | * use sb->s_dev when, | ||
426 | * - CONFIG_ROMFS_ON_BLOCK undefined and | ||
427 | * - CONFIG_ROMFS_ON_MTD defined | ||
428 | * leave id as 0 when, | ||
429 | * - CONFIG_ROMFS_ON_BLOCK undefined and | ||
430 | * - CONFIG_ROMFS_ON_MTD undefined | ||
431 | */ | ||
432 | if (sb->s_bdev) | ||
433 | id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
434 | else if (sb->s_dev) | ||
435 | id = huge_encode_dev(sb->s_dev); | ||
420 | 436 | ||
421 | buf->f_type = ROMFS_MAGIC; | 437 | buf->f_type = ROMFS_MAGIC; |
422 | buf->f_namelen = ROMFS_MAXFN; | 438 | buf->f_namelen = ROMFS_MAXFN; |
@@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) | |||
489 | sb->s_flags |= MS_RDONLY | MS_NOATIME; | 505 | sb->s_flags |= MS_RDONLY | MS_NOATIME; |
490 | sb->s_op = &romfs_super_ops; | 506 | sb->s_op = &romfs_super_ops; |
491 | 507 | ||
508 | #ifdef CONFIG_ROMFS_ON_MTD | ||
509 | /* Use same dev ID from the underlying mtdblock device */ | ||
510 | if (sb->s_mtd) | ||
511 | sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index); | ||
512 | #endif | ||
492 | /* read the image superblock and check it */ | 513 | /* read the image superblock and check it */ |
493 | rsb = kmalloc(512, GFP_KERNEL); | 514 | rsb = kmalloc(512, GFP_KERNEL); |
494 | if (!rsb) | 515 | if (!rsb) |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d96e2f30084b..43953e03c356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue { | |||
63 | struct uffd_msg msg; | 63 | struct uffd_msg msg; |
64 | wait_queue_t wq; | 64 | wait_queue_t wq; |
65 | struct userfaultfd_ctx *ctx; | 65 | struct userfaultfd_ctx *ctx; |
66 | bool waken; | ||
66 | }; | 67 | }; |
67 | 68 | ||
68 | struct userfaultfd_wake_range { | 69 | struct userfaultfd_wake_range { |
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, | |||
86 | if (len && (start > uwq->msg.arg.pagefault.address || | 87 | if (len && (start > uwq->msg.arg.pagefault.address || |
87 | start + len <= uwq->msg.arg.pagefault.address)) | 88 | start + len <= uwq->msg.arg.pagefault.address)) |
88 | goto out; | 89 | goto out; |
90 | WRITE_ONCE(uwq->waken, true); | ||
91 | /* | ||
92 | * The implicit smp_mb__before_spinlock in try_to_wake_up() | ||
93 | * renders uwq->waken visible to other CPUs before the task is | ||
94 | * waken. | ||
95 | */ | ||
89 | ret = wake_up_state(wq->private, mode); | 96 | ret = wake_up_state(wq->private, mode); |
90 | if (ret) | 97 | if (ret) |
91 | /* | 98 | /* |
@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
264 | struct userfaultfd_wait_queue uwq; | 271 | struct userfaultfd_wait_queue uwq; |
265 | int ret; | 272 | int ret; |
266 | bool must_wait, return_to_userland; | 273 | bool must_wait, return_to_userland; |
274 | long blocking_state; | ||
267 | 275 | ||
268 | BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | 276 | BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
269 | 277 | ||
@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
334 | uwq.wq.private = current; | 342 | uwq.wq.private = current; |
335 | uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); | 343 | uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); |
336 | uwq.ctx = ctx; | 344 | uwq.ctx = ctx; |
345 | uwq.waken = false; | ||
337 | 346 | ||
338 | return_to_userland = | 347 | return_to_userland = |
339 | (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == | 348 | (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == |
340 | (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); | 349 | (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); |
350 | blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : | ||
351 | TASK_KILLABLE; | ||
341 | 352 | ||
342 | spin_lock(&ctx->fault_pending_wqh.lock); | 353 | spin_lock(&ctx->fault_pending_wqh.lock); |
343 | /* | 354 | /* |
@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
350 | * following the spin_unlock to happen before the list_add in | 361 | * following the spin_unlock to happen before the list_add in |
351 | * __add_wait_queue. | 362 | * __add_wait_queue. |
352 | */ | 363 | */ |
353 | set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : | 364 | set_current_state(blocking_state); |
354 | TASK_KILLABLE); | ||
355 | spin_unlock(&ctx->fault_pending_wqh.lock); | 365 | spin_unlock(&ctx->fault_pending_wqh.lock); |
356 | 366 | ||
357 | must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, | 367 | must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, |
@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) | |||
364 | wake_up_poll(&ctx->fd_wqh, POLLIN); | 374 | wake_up_poll(&ctx->fd_wqh, POLLIN); |
365 | schedule(); | 375 | schedule(); |
366 | ret |= VM_FAULT_MAJOR; | 376 | ret |= VM_FAULT_MAJOR; |
377 | |||
378 | /* | ||
379 | * False wakeups can orginate even from rwsem before | ||
380 | * up_read() however userfaults will wait either for a | ||
381 | * targeted wakeup on the specific uwq waitqueue from | ||
382 | * wake_userfault() or for signals or for uffd | ||
383 | * release. | ||
384 | */ | ||
385 | while (!READ_ONCE(uwq.waken)) { | ||
386 | /* | ||
387 | * This needs the full smp_store_mb() | ||
388 | * guarantee as the state write must be | ||
389 | * visible to other CPUs before reading | ||
390 | * uwq.waken from other CPUs. | ||
391 | */ | ||
392 | set_current_state(blocking_state); | ||
393 | if (READ_ONCE(uwq.waken) || | ||
394 | READ_ONCE(ctx->released) || | ||
395 | (return_to_userland ? signal_pending(current) : | ||
396 | fatal_signal_pending(current))) | ||
397 | break; | ||
398 | schedule(); | ||
399 | } | ||
367 | } | 400 | } |
368 | 401 | ||
369 | __set_current_state(TASK_RUNNING); | 402 | __set_current_state(TASK_RUNNING); |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01033fadea47..c1784c0b4f35 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, | |||
284 | unsigned long map_offset); | 284 | unsigned long map_offset); |
285 | extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, | 285 | extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, |
286 | unsigned long pnum); | 286 | unsigned long pnum); |
287 | extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, | 287 | extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, |
288 | enum zone_type target); | 288 | enum zone_type target, int *zone_shift); |
289 | 289 | ||
290 | #endif /* __LINUX_MEMORY_HOTPLUG_H */ | 290 | #endif /* __LINUX_MEMORY_HOTPLUG_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 36d9896fbc1e..f4aac87adcc3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, | |||
972 | * @zonelist - The zonelist to search for a suitable zone | 972 | * @zonelist - The zonelist to search for a suitable zone |
973 | * @highest_zoneidx - The zone index of the highest zone to return | 973 | * @highest_zoneidx - The zone index of the highest zone to return |
974 | * @nodes - An optional nodemask to filter the zonelist with | 974 | * @nodes - An optional nodemask to filter the zonelist with |
975 | * @zone - The first suitable zone found is returned via this parameter | 975 | * @return - Zoneref pointer for the first suitable zone found (see below) |
976 | * | 976 | * |
977 | * This function returns the first zone at or below a given zone index that is | 977 | * This function returns the first zone at or below a given zone index that is |
978 | * within the allowed nodemask. The zoneref returned is a cursor that can be | 978 | * within the allowed nodemask. The zoneref returned is a cursor that can be |
979 | * used to iterate the zonelist with next_zones_zonelist by advancing it by | 979 | * used to iterate the zonelist with next_zones_zonelist by advancing it by |
980 | * one before calling. | 980 | * one before calling. |
981 | * | ||
982 | * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is | ||
983 | * never NULL). This may happen either genuinely, or due to concurrent nodemask | ||
984 | * update due to cpuset modification. | ||
981 | */ | 985 | */ |
982 | static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, | 986 | static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, |
983 | enum zone_type highest_zoneidx, | 987 | enum zone_type highest_zoneidx, |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aacca824a6ae..0a3fadc32693 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -110,6 +110,7 @@ extern int watchdog_user_enabled; | |||
110 | extern int watchdog_thresh; | 110 | extern int watchdog_thresh; |
111 | extern unsigned long watchdog_enabled; | 111 | extern unsigned long watchdog_enabled; |
112 | extern unsigned long *watchdog_cpumask_bits; | 112 | extern unsigned long *watchdog_cpumask_bits; |
113 | extern atomic_t watchdog_park_in_progress; | ||
113 | #ifdef CONFIG_SMP | 114 | #ifdef CONFIG_SMP |
114 | extern int sysctl_softlockup_all_cpu_backtrace; | 115 | extern int sysctl_softlockup_all_cpu_backtrace; |
115 | extern int sysctl_hardlockup_all_cpu_backtrace; | 116 | extern int sysctl_hardlockup_all_cpu_backtrace; |
diff --git a/kernel/panic.c b/kernel/panic.c index 901c4fb46002..08aa88dde7de 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -249,7 +249,7 @@ void panic(const char *fmt, ...) | |||
249 | * Delay timeout seconds before rebooting the machine. | 249 | * Delay timeout seconds before rebooting the machine. |
250 | * We can't use the "normal" timers since we just panicked. | 250 | * We can't use the "normal" timers since we just panicked. |
251 | */ | 251 | */ |
252 | pr_emerg("Rebooting in %d seconds..", panic_timeout); | 252 | pr_emerg("Rebooting in %d seconds..\n", panic_timeout); |
253 | 253 | ||
254 | for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { | 254 | for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { |
255 | touch_nmi_watchdog(); | 255 | touch_nmi_watchdog(); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d4b0fa01cae3..63177be0159e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); | |||
49 | #define for_each_watchdog_cpu(cpu) \ | 49 | #define for_each_watchdog_cpu(cpu) \ |
50 | for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) | 50 | for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) |
51 | 51 | ||
52 | atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); | ||
53 | |||
52 | /* | 54 | /* |
53 | * The 'watchdog_running' variable is set to 1 when the watchdog threads | 55 | * The 'watchdog_running' variable is set to 1 when the watchdog threads |
54 | * are registered/started and is set to 0 when the watchdog threads are | 56 | * are registered/started and is set to 0 when the watchdog threads are |
@@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
260 | int duration; | 262 | int duration; |
261 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; | 263 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; |
262 | 264 | ||
265 | if (atomic_read(&watchdog_park_in_progress) != 0) | ||
266 | return HRTIMER_NORESTART; | ||
267 | |||
263 | /* kick the hardlockup detector */ | 268 | /* kick the hardlockup detector */ |
264 | watchdog_interrupt_count(); | 269 | watchdog_interrupt_count(); |
265 | 270 | ||
@@ -467,12 +472,16 @@ static int watchdog_park_threads(void) | |||
467 | { | 472 | { |
468 | int cpu, ret = 0; | 473 | int cpu, ret = 0; |
469 | 474 | ||
475 | atomic_set(&watchdog_park_in_progress, 1); | ||
476 | |||
470 | for_each_watchdog_cpu(cpu) { | 477 | for_each_watchdog_cpu(cpu) { |
471 | ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); | 478 | ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); |
472 | if (ret) | 479 | if (ret) |
473 | break; | 480 | break; |
474 | } | 481 | } |
475 | 482 | ||
483 | atomic_set(&watchdog_park_in_progress, 0); | ||
484 | |||
476 | return ret; | 485 | return ret; |
477 | } | 486 | } |
478 | 487 | ||
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 84016c8aee6b..12b8dd640786 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c | |||
@@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event, | |||
84 | /* Ensure the watchdog never gets throttled */ | 84 | /* Ensure the watchdog never gets throttled */ |
85 | event->hw.interrupts = 0; | 85 | event->hw.interrupts = 0; |
86 | 86 | ||
87 | if (atomic_read(&watchdog_park_in_progress) != 0) | ||
88 | return; | ||
89 | |||
87 | if (__this_cpu_read(watchdog_nmi_touch) == true) { | 90 | if (__this_cpu_read(watchdog_nmi_touch) == true) { |
88 | __this_cpu_write(watchdog_nmi_touch, false); | 91 | __this_cpu_write(watchdog_nmi_touch, false); |
89 | return; | 92 | return; |
diff --git a/lib/ioremap.c b/lib/ioremap.c index 86c8911b0e3a..a3e14ce92a56 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c | |||
@@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr, | |||
144 | 144 | ||
145 | return err; | 145 | return err; |
146 | } | 146 | } |
147 | EXPORT_SYMBOL_GPL(ioremap_page_range); | ||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0b92d605fb69..84812a9fb16f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -769,7 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) | |||
769 | struct radix_tree_node *old = child; | 769 | struct radix_tree_node *old = child; |
770 | offset = child->offset + 1; | 770 | offset = child->offset + 1; |
771 | child = child->parent; | 771 | child = child->parent; |
772 | WARN_ON_ONCE(!list_empty(&node->private_list)); | 772 | WARN_ON_ONCE(!list_empty(&old->private_list)); |
773 | radix_tree_node_free(old); | 773 | radix_tree_node_free(old); |
774 | if (old == entry_to_node(node)) | 774 | if (old == entry_to_node(node)) |
775 | return; | 775 | return; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9a6bd6c8d55a..5f3ad65c85de 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, | |||
783 | 783 | ||
784 | assert_spin_locked(pmd_lockptr(mm, pmd)); | 784 | assert_spin_locked(pmd_lockptr(mm, pmd)); |
785 | 785 | ||
786 | /* | ||
787 | * When we COW a devmap PMD entry, we split it into PTEs, so we should | ||
788 | * not be in this function with `flags & FOLL_COW` set. | ||
789 | */ | ||
790 | WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); | ||
791 | |||
786 | if (flags & FOLL_WRITE && !pmd_write(*pmd)) | 792 | if (flags & FOLL_WRITE && !pmd_write(*pmd)) |
787 | return NULL; | 793 | return NULL; |
788 | 794 | ||
@@ -1128,6 +1134,16 @@ out_unlock: | |||
1128 | return ret; | 1134 | return ret; |
1129 | } | 1135 | } |
1130 | 1136 | ||
1137 | /* | ||
1138 | * FOLL_FORCE can write to even unwritable pmd's, but only | ||
1139 | * after we've gone through a COW cycle and they are dirty. | ||
1140 | */ | ||
1141 | static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) | ||
1142 | { | ||
1143 | return pmd_write(pmd) || | ||
1144 | ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); | ||
1145 | } | ||
1146 | |||
1131 | struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | 1147 | struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, |
1132 | unsigned long addr, | 1148 | unsigned long addr, |
1133 | pmd_t *pmd, | 1149 | pmd_t *pmd, |
@@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1138 | 1154 | ||
1139 | assert_spin_locked(pmd_lockptr(mm, pmd)); | 1155 | assert_spin_locked(pmd_lockptr(mm, pmd)); |
1140 | 1156 | ||
1141 | if (flags & FOLL_WRITE && !pmd_write(*pmd)) | 1157 | if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) |
1142 | goto out; | 1158 | goto out; |
1143 | 1159 | ||
1144 | /* Avoid dumping huge zero page */ | 1160 | /* Avoid dumping huge zero page */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a63a8f832664..b822e158b319 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count) | |||
4353 | return ret; | 4353 | return ret; |
4354 | } | 4354 | } |
4355 | 4355 | ||
4356 | /* Try charges one by one with reclaim */ | 4356 | /* Try charges one by one with reclaim, but do not retry */ |
4357 | while (count--) { | 4357 | while (count--) { |
4358 | ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); | 4358 | ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1); |
4359 | if (ret) | 4359 | if (ret) |
4360 | return ret; | 4360 | return ret; |
4361 | mc.precharge++; | 4361 | mc.precharge++; |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e43142c15631..ca2723d47338 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg) | |||
1033 | node_set_state(node, N_MEMORY); | 1033 | node_set_state(node, N_MEMORY); |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | int zone_can_shift(unsigned long pfn, unsigned long nr_pages, | 1036 | bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, |
1037 | enum zone_type target) | 1037 | enum zone_type target, int *zone_shift) |
1038 | { | 1038 | { |
1039 | struct zone *zone = page_zone(pfn_to_page(pfn)); | 1039 | struct zone *zone = page_zone(pfn_to_page(pfn)); |
1040 | enum zone_type idx = zone_idx(zone); | 1040 | enum zone_type idx = zone_idx(zone); |
1041 | int i; | 1041 | int i; |
1042 | 1042 | ||
1043 | *zone_shift = 0; | ||
1044 | |||
1043 | if (idx < target) { | 1045 | if (idx < target) { |
1044 | /* pages must be at end of current zone */ | 1046 | /* pages must be at end of current zone */ |
1045 | if (pfn + nr_pages != zone_end_pfn(zone)) | 1047 | if (pfn + nr_pages != zone_end_pfn(zone)) |
1046 | return 0; | 1048 | return false; |
1047 | 1049 | ||
1048 | /* no zones in use between current zone and target */ | 1050 | /* no zones in use between current zone and target */ |
1049 | for (i = idx + 1; i < target; i++) | 1051 | for (i = idx + 1; i < target; i++) |
1050 | if (zone_is_initialized(zone - idx + i)) | 1052 | if (zone_is_initialized(zone - idx + i)) |
1051 | return 0; | 1053 | return false; |
1052 | } | 1054 | } |
1053 | 1055 | ||
1054 | if (target < idx) { | 1056 | if (target < idx) { |
1055 | /* pages must be at beginning of current zone */ | 1057 | /* pages must be at beginning of current zone */ |
1056 | if (pfn != zone->zone_start_pfn) | 1058 | if (pfn != zone->zone_start_pfn) |
1057 | return 0; | 1059 | return false; |
1058 | 1060 | ||
1059 | /* no zones in use between current zone and target */ | 1061 | /* no zones in use between current zone and target */ |
1060 | for (i = target + 1; i < idx; i++) | 1062 | for (i = target + 1; i < idx; i++) |
1061 | if (zone_is_initialized(zone - idx + i)) | 1063 | if (zone_is_initialized(zone - idx + i)) |
1062 | return 0; | 1064 | return false; |
1063 | } | 1065 | } |
1064 | 1066 | ||
1065 | return target - idx; | 1067 | *zone_shift = target - idx; |
1068 | return true; | ||
1066 | } | 1069 | } |
1067 | 1070 | ||
1068 | /* Must be protected by mem_hotplug_begin() */ | 1071 | /* Must be protected by mem_hotplug_begin() */ |
@@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1089 | !can_online_high_movable(zone)) | 1092 | !can_online_high_movable(zone)) |
1090 | return -EINVAL; | 1093 | return -EINVAL; |
1091 | 1094 | ||
1092 | if (online_type == MMOP_ONLINE_KERNEL) | 1095 | if (online_type == MMOP_ONLINE_KERNEL) { |
1093 | zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL); | 1096 | if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) |
1094 | else if (online_type == MMOP_ONLINE_MOVABLE) | 1097 | return -EINVAL; |
1095 | zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE); | 1098 | } else if (online_type == MMOP_ONLINE_MOVABLE) { |
1099 | if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) | ||
1100 | return -EINVAL; | ||
1101 | } | ||
1096 | 1102 | ||
1097 | zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); | 1103 | zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); |
1098 | if (!zone) | 1104 | if (!zone) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 2e346645eb80..1e7873e40c9a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2017,8 +2017,8 @@ retry_cpuset: | |||
2017 | 2017 | ||
2018 | nmask = policy_nodemask(gfp, pol); | 2018 | nmask = policy_nodemask(gfp, pol); |
2019 | zl = policy_zonelist(gfp, pol, node); | 2019 | zl = policy_zonelist(gfp, pol, node); |
2020 | mpol_cond_put(pol); | ||
2021 | page = __alloc_pages_nodemask(gfp, order, zl, nmask); | 2020 | page = __alloc_pages_nodemask(gfp, order, zl, nmask); |
2021 | mpol_cond_put(pol); | ||
2022 | out: | 2022 | out: |
2023 | if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) | 2023 | if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) |
2024 | goto retry_cpuset; | 2024 | goto retry_cpuset; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d604d2596b7b..f3e0c69a97b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3523 | struct page *page = NULL; | 3523 | struct page *page = NULL; |
3524 | unsigned int alloc_flags; | 3524 | unsigned int alloc_flags; |
3525 | unsigned long did_some_progress; | 3525 | unsigned long did_some_progress; |
3526 | enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; | 3526 | enum compact_priority compact_priority; |
3527 | enum compact_result compact_result; | 3527 | enum compact_result compact_result; |
3528 | int compaction_retries = 0; | 3528 | int compaction_retries; |
3529 | int no_progress_loops = 0; | 3529 | int no_progress_loops; |
3530 | unsigned long alloc_start = jiffies; | 3530 | unsigned long alloc_start = jiffies; |
3531 | unsigned int stall_timeout = 10 * HZ; | 3531 | unsigned int stall_timeout = 10 * HZ; |
3532 | unsigned int cpuset_mems_cookie; | ||
3532 | 3533 | ||
3533 | /* | 3534 | /* |
3534 | * In the slowpath, we sanity check order to avoid ever trying to | 3535 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -3549,6 +3550,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3549 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) | 3550 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) |
3550 | gfp_mask &= ~__GFP_ATOMIC; | 3551 | gfp_mask &= ~__GFP_ATOMIC; |
3551 | 3552 | ||
3553 | retry_cpuset: | ||
3554 | compaction_retries = 0; | ||
3555 | no_progress_loops = 0; | ||
3556 | compact_priority = DEF_COMPACT_PRIORITY; | ||
3557 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3558 | /* | ||
3559 | * We need to recalculate the starting point for the zonelist iterator | ||
3560 | * because we might have used different nodemask in the fast path, or | ||
3561 | * there was a cpuset modification and we are retrying - otherwise we | ||
3562 | * could end up iterating over non-eligible zones endlessly. | ||
3563 | */ | ||
3564 | ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, | ||
3565 | ac->high_zoneidx, ac->nodemask); | ||
3566 | if (!ac->preferred_zoneref->zone) | ||
3567 | goto nopage; | ||
3568 | |||
3569 | |||
3552 | /* | 3570 | /* |
3553 | * The fast path uses conservative alloc_flags to succeed only until | 3571 | * The fast path uses conservative alloc_flags to succeed only until |
3554 | * kswapd needs to be woken up, and to avoid the cost of setting up | 3572 | * kswapd needs to be woken up, and to avoid the cost of setting up |
@@ -3708,6 +3726,13 @@ retry: | |||
3708 | &compaction_retries)) | 3726 | &compaction_retries)) |
3709 | goto retry; | 3727 | goto retry; |
3710 | 3728 | ||
3729 | /* | ||
3730 | * It's possible we raced with cpuset update so the OOM would be | ||
3731 | * premature (see below the nopage: label for full explanation). | ||
3732 | */ | ||
3733 | if (read_mems_allowed_retry(cpuset_mems_cookie)) | ||
3734 | goto retry_cpuset; | ||
3735 | |||
3711 | /* Reclaim has failed us, start killing things */ | 3736 | /* Reclaim has failed us, start killing things */ |
3712 | page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); | 3737 | page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); |
3713 | if (page) | 3738 | if (page) |
@@ -3720,6 +3745,16 @@ retry: | |||
3720 | } | 3745 | } |
3721 | 3746 | ||
3722 | nopage: | 3747 | nopage: |
3748 | /* | ||
3749 | * When updating a task's mems_allowed or mempolicy nodemask, it is | ||
3750 | * possible to race with parallel threads in such a way that our | ||
3751 | * allocation can fail while the mask is being updated. If we are about | ||
3752 | * to fail, check if the cpuset changed during allocation and if so, | ||
3753 | * retry. | ||
3754 | */ | ||
3755 | if (read_mems_allowed_retry(cpuset_mems_cookie)) | ||
3756 | goto retry_cpuset; | ||
3757 | |||
3723 | warn_alloc(gfp_mask, | 3758 | warn_alloc(gfp_mask, |
3724 | "page allocation failure: order:%u", order); | 3759 | "page allocation failure: order:%u", order); |
3725 | got_pg: | 3760 | got_pg: |
@@ -3734,7 +3769,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3734 | struct zonelist *zonelist, nodemask_t *nodemask) | 3769 | struct zonelist *zonelist, nodemask_t *nodemask) |
3735 | { | 3770 | { |
3736 | struct page *page; | 3771 | struct page *page; |
3737 | unsigned int cpuset_mems_cookie; | ||
3738 | unsigned int alloc_flags = ALLOC_WMARK_LOW; | 3772 | unsigned int alloc_flags = ALLOC_WMARK_LOW; |
3739 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ | 3773 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ |
3740 | struct alloc_context ac = { | 3774 | struct alloc_context ac = { |
@@ -3771,9 +3805,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3771 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) | 3805 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) |
3772 | alloc_flags |= ALLOC_CMA; | 3806 | alloc_flags |= ALLOC_CMA; |
3773 | 3807 | ||
3774 | retry_cpuset: | ||
3775 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3776 | |||
3777 | /* Dirty zone balancing only done in the fast path */ | 3808 | /* Dirty zone balancing only done in the fast path */ |
3778 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); | 3809 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); |
3779 | 3810 | ||
@@ -3784,8 +3815,13 @@ retry_cpuset: | |||
3784 | */ | 3815 | */ |
3785 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, | 3816 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, |
3786 | ac.high_zoneidx, ac.nodemask); | 3817 | ac.high_zoneidx, ac.nodemask); |
3787 | if (!ac.preferred_zoneref) { | 3818 | if (!ac.preferred_zoneref->zone) { |
3788 | page = NULL; | 3819 | page = NULL; |
3820 | /* | ||
3821 | * This might be due to race with cpuset_current_mems_allowed | ||
3822 | * update, so make sure we retry with original nodemask in the | ||
3823 | * slow path. | ||
3824 | */ | ||
3789 | goto no_zone; | 3825 | goto no_zone; |
3790 | } | 3826 | } |
3791 | 3827 | ||
@@ -3794,6 +3830,7 @@ retry_cpuset: | |||
3794 | if (likely(page)) | 3830 | if (likely(page)) |
3795 | goto out; | 3831 | goto out; |
3796 | 3832 | ||
3833 | no_zone: | ||
3797 | /* | 3834 | /* |
3798 | * Runtime PM, block IO and its error handling path can deadlock | 3835 | * Runtime PM, block IO and its error handling path can deadlock |
3799 | * because I/O on the device might not complete. | 3836 | * because I/O on the device might not complete. |
@@ -3805,21 +3842,10 @@ retry_cpuset: | |||
3805 | * Restore the original nodemask if it was potentially replaced with | 3842 | * Restore the original nodemask if it was potentially replaced with |
3806 | * &cpuset_current_mems_allowed to optimize the fast-path attempt. | 3843 | * &cpuset_current_mems_allowed to optimize the fast-path attempt. |
3807 | */ | 3844 | */ |
3808 | if (cpusets_enabled()) | 3845 | if (unlikely(ac.nodemask != nodemask)) |
3809 | ac.nodemask = nodemask; | 3846 | ac.nodemask = nodemask; |
3810 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); | ||
3811 | 3847 | ||
3812 | no_zone: | 3848 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); |
3813 | /* | ||
3814 | * When updating a task's mems_allowed, it is possible to race with | ||
3815 | * parallel threads in such a way that an allocation can fail while | ||
3816 | * the mask is being updated. If a page allocation is about to fail, | ||
3817 | * check if the cpuset changed during allocation and if so, retry. | ||
3818 | */ | ||
3819 | if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { | ||
3820 | alloc_mask = gfp_mask; | ||
3821 | goto retry_cpuset; | ||
3822 | } | ||
3823 | 3849 | ||
3824 | out: | 3850 | out: |
3825 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && | 3851 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && |
@@ -7248,6 +7274,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
7248 | .zone = page_zone(pfn_to_page(start)), | 7274 | .zone = page_zone(pfn_to_page(start)), |
7249 | .mode = MIGRATE_SYNC, | 7275 | .mode = MIGRATE_SYNC, |
7250 | .ignore_skip_hint = true, | 7276 | .ignore_skip_hint = true, |
7277 | .gfp_mask = GFP_KERNEL, | ||
7251 | }; | 7278 | }; |
7252 | INIT_LIST_HEAD(&cc.migratepages); | 7279 | INIT_LIST_HEAD(&cc.migratepages); |
7253 | 7280 | ||
@@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s, | |||
496 | return 1; | 496 | return 1; |
497 | } | 497 | } |
498 | 498 | ||
499 | static void print_section(char *text, u8 *addr, unsigned int length) | 499 | static void print_section(char *level, char *text, u8 *addr, |
500 | unsigned int length) | ||
500 | { | 501 | { |
501 | metadata_access_enable(); | 502 | metadata_access_enable(); |
502 | print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, | 503 | print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, |
503 | length, 1); | 504 | length, 1); |
504 | metadata_access_disable(); | 505 | metadata_access_disable(); |
505 | } | 506 | } |
@@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
636 | p, p - addr, get_freepointer(s, p)); | 637 | p, p - addr, get_freepointer(s, p)); |
637 | 638 | ||
638 | if (s->flags & SLAB_RED_ZONE) | 639 | if (s->flags & SLAB_RED_ZONE) |
639 | print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); | 640 | print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, |
641 | s->red_left_pad); | ||
640 | else if (p > addr + 16) | 642 | else if (p > addr + 16) |
641 | print_section("Bytes b4 ", p - 16, 16); | 643 | print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); |
642 | 644 | ||
643 | print_section("Object ", p, min_t(unsigned long, s->object_size, | 645 | print_section(KERN_ERR, "Object ", p, |
644 | PAGE_SIZE)); | 646 | min_t(unsigned long, s->object_size, PAGE_SIZE)); |
645 | if (s->flags & SLAB_RED_ZONE) | 647 | if (s->flags & SLAB_RED_ZONE) |
646 | print_section("Redzone ", p + s->object_size, | 648 | print_section(KERN_ERR, "Redzone ", p + s->object_size, |
647 | s->inuse - s->object_size); | 649 | s->inuse - s->object_size); |
648 | 650 | ||
649 | if (s->offset) | 651 | if (s->offset) |
@@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
658 | 660 | ||
659 | if (off != size_from_object(s)) | 661 | if (off != size_from_object(s)) |
660 | /* Beginning of the filler is the free pointer */ | 662 | /* Beginning of the filler is the free pointer */ |
661 | print_section("Padding ", p + off, size_from_object(s) - off); | 663 | print_section(KERN_ERR, "Padding ", p + off, |
664 | size_from_object(s) - off); | ||
662 | 665 | ||
663 | dump_stack(); | 666 | dump_stack(); |
664 | } | 667 | } |
@@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
820 | end--; | 823 | end--; |
821 | 824 | ||
822 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); | 825 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); |
823 | print_section("Padding ", end - remainder, remainder); | 826 | print_section(KERN_ERR, "Padding ", end - remainder, remainder); |
824 | 827 | ||
825 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); | 828 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); |
826 | return 0; | 829 | return 0; |
@@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
973 | page->freelist); | 976 | page->freelist); |
974 | 977 | ||
975 | if (!alloc) | 978 | if (!alloc) |
976 | print_section("Object ", (void *)object, | 979 | print_section(KERN_INFO, "Object ", (void *)object, |
977 | s->object_size); | 980 | s->object_size); |
978 | 981 | ||
979 | dump_stack(); | 982 | dump_stack(); |