aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-10-17 00:36:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-10-17 00:36:03 -0400
commit056cdce0d3a214158f3a4ea40887b22639f855a8 (patch)
tree8ced4ccf6c7bac7eef49710c3cdfb0745cc85102
parent0056019da4b7ee5ab51fb174fe0655278578516f (diff)
parent57a8f0cdb87da776bf0e4ce7554a9133854fa779 (diff)
Merge branch 'akpm' (fixes from Andrew Morton)
Merge misc fixes from Andrew Morton. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits) mm: revert mremap pud_free anti-fix mm: fix BUG in __split_huge_page_pmd swap: fix set_blocksize race during swapon/swapoff procfs: call default get_unmapped_area on MMU-present architectures procfs: fix unintended truncation of returned mapped address writeback: fix negative bdi max pause percpu_refcount: export symbols fs: buffer: move allocation failure loop into the allocator mm: memcg: handle non-error OOM situations more gracefully tools/testing/selftests: fix uninitialized variable block/partitions/efi.c: treat size mismatch as a warning, not an error mm: hugetlb: initialize PG_reserved for tail pages of gigantic compound pages mm/zswap: bugfix: memory leak when re-swapon mm: /proc/pid/pagemap: inspect _PAGE_SOFT_DIRTY only on present pages mm: migration: do not lose soft dirty bit if page is in migration state gcov: MAINTAINERS: Add an entry for gcov mm/hugetlb.c: correct missing private flag clearing mm/vmscan.c: don't forget to free shrinker->nr_deferred ipc/sem.c: synchronize semop and semctl with IPC_RMID ipc: update locking scheme comments ...
-rw-r--r--MAINTAINERS6
-rw-r--r--block/partitions/efi.c7
-rw-r--r--fs/buffer.c14
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--include/linux/memcontrol.h50
-rw-r--r--include/linux/sched.h7
-rw-r--r--ipc/sem.c42
-rw-r--r--ipc/util.c27
-rw-r--r--lib/percpu-refcount.c3
-rw-r--r--mm/filemap.c11
-rw-r--r--mm/huge_memory.c10
-rw-r--r--mm/hugetlb.c17
-rw-r--r--mm/memcontrol.c143
-rw-r--r--mm/memory.c20
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mprotect.c7
-rw-r--r--mm/mremap.c5
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c10
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/vmscan.c1
-rw-r--r--mm/zswap.c4
-rw-r--r--tools/testing/selftests/timers/posix_timers.c2
24 files changed, 219 insertions, 189 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 72b1e5c2378a..a7c34ef3509d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3624,6 +3624,12 @@ L: linux-scsi@vger.kernel.org
3624S: Odd Fixes (e.g., new signatures) 3624S: Odd Fixes (e.g., new signatures)
3625F: drivers/scsi/fdomain.* 3625F: drivers/scsi/fdomain.*
3626 3626
3627GCOV BASED KERNEL PROFILING
3628M: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
3629S: Maintained
3630F: kernel/gcov/
3631F: Documentation/gcov.txt
3632
3627GDT SCSI DISK ARRAY CONTROLLER DRIVER 3633GDT SCSI DISK ARRAY CONTROLLER DRIVER
3628M: Achim Leubner <achim_leubner@adaptec.com> 3634M: Achim Leubner <achim_leubner@adaptec.com>
3629L: linux-scsi@vger.kernel.org 3635L: linux-scsi@vger.kernel.org
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 1eb09ee5311b..a8287b49d062 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -222,11 +222,16 @@ check_hybrid:
222 * the disk size. 222 * the disk size.
223 * 223 *
224 * Hybrid MBRs do not necessarily comply with this. 224 * Hybrid MBRs do not necessarily comply with this.
225 *
226 * Consider a bad value here to be a warning to support dd'ing
227 * an image from a smaller disk to a larger disk.
225 */ 228 */
226 if (ret == GPT_MBR_PROTECTIVE) { 229 if (ret == GPT_MBR_PROTECTIVE) {
227 sz = le32_to_cpu(mbr->partition_record[part].size_in_lba); 230 sz = le32_to_cpu(mbr->partition_record[part].size_in_lba);
228 if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF) 231 if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF)
229 ret = 0; 232 pr_debug("GPT: mbr size in lba (%u) different than whole disk (%u).\n",
233 sz, min_t(uint32_t,
234 total_sectors - 1, 0xFFFFFFFF));
230 } 235 }
231done: 236done:
232 return ret; 237 return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 4d7433534f5c..6024877335ca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1005 struct buffer_head *bh; 1005 struct buffer_head *bh;
1006 sector_t end_block; 1006 sector_t end_block;
1007 int ret = 0; /* Will call free_more_memory() */ 1007 int ret = 0; /* Will call free_more_memory() */
1008 gfp_t gfp_mask;
1008 1009
1009 page = find_or_create_page(inode->i_mapping, index, 1010 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
1010 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); 1011 gfp_mask |= __GFP_MOVABLE;
1012 /*
1013 * XXX: __getblk_slow() can not really deal with failure and
1014 * will endlessly loop on improvised global reclaim. Prefer
1015 * looping in the allocator rather than here, at least that
1016 * code knows what it's doing.
1017 */
1018 gfp_mask |= __GFP_NOFAIL;
1019
1020 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
1011 if (!page) 1021 if (!page)
1012 return ret; 1022 return ret;
1013 1023
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9f8ef9b7674d..8eaa1ba793fc 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -288,10 +288,14 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
288static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) 288static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
289{ 289{
290 struct proc_dir_entry *pde = PDE(file_inode(file)); 290 struct proc_dir_entry *pde = PDE(file_inode(file));
291 int rv = -EIO; 291 unsigned long rv = -EIO;
292 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 292 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
293 if (use_pde(pde)) { 293 if (use_pde(pde)) {
294 get_unmapped_area = pde->proc_fops->get_unmapped_area; 294#ifdef CONFIG_MMU
295 get_unmapped_area = current->mm->get_unmapped_area;
296#endif
297 if (pde->proc_fops->get_unmapped_area)
298 get_unmapped_area = pde->proc_fops->get_unmapped_area;
295 if (get_unmapped_area) 299 if (get_unmapped_area)
296 rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); 300 rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
297 unuse_pde(pde); 301 unuse_pde(pde);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7366e9d63cee..390bdab01c3c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
941 frame = pte_pfn(pte); 941 frame = pte_pfn(pte);
942 flags = PM_PRESENT; 942 flags = PM_PRESENT;
943 page = vm_normal_page(vma, addr, pte); 943 page = vm_normal_page(vma, addr, pte);
944 if (pte_soft_dirty(pte))
945 flags2 |= __PM_SOFT_DIRTY;
944 } else if (is_swap_pte(pte)) { 946 } else if (is_swap_pte(pte)) {
945 swp_entry_t entry; 947 swp_entry_t entry;
946 if (pte_swp_soft_dirty(pte)) 948 if (pte_swp_soft_dirty(pte))
@@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
960 962
961 if (page && !PageAnon(page)) 963 if (page && !PageAnon(page))
962 flags |= PM_FILE; 964 flags |= PM_FILE;
963 if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) 965 if ((vma->vm_flags & VM_SOFTDIRTY))
964 flags2 |= __PM_SOFT_DIRTY; 966 flags2 |= __PM_SOFT_DIRTY;
965 967
966 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); 968 *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ecc82b37c4cc..b3e7a667e03c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
137extern void mem_cgroup_replace_page_cache(struct page *oldpage, 137extern void mem_cgroup_replace_page_cache(struct page *oldpage,
138 struct page *newpage); 138 struct page *newpage);
139 139
140/** 140static inline void mem_cgroup_oom_enable(void)
141 * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
142 * @new: true to enable, false to disable
143 *
144 * Toggle whether a failed memcg charge should invoke the OOM killer
145 * or just return -ENOMEM. Returns the previous toggle state.
146 *
147 * NOTE: Any path that enables the OOM killer before charging must
148 * call mem_cgroup_oom_synchronize() afterward to finalize the
149 * OOM handling and clean up.
150 */
151static inline bool mem_cgroup_toggle_oom(bool new)
152{ 141{
153 bool old; 142 WARN_ON(current->memcg_oom.may_oom);
154 143 current->memcg_oom.may_oom = 1;
155 old = current->memcg_oom.may_oom;
156 current->memcg_oom.may_oom = new;
157
158 return old;
159} 144}
160 145
161static inline void mem_cgroup_enable_oom(void) 146static inline void mem_cgroup_oom_disable(void)
162{ 147{
163 bool old = mem_cgroup_toggle_oom(true); 148 WARN_ON(!current->memcg_oom.may_oom);
164 149 current->memcg_oom.may_oom = 0;
165 WARN_ON(old == true);
166}
167
168static inline void mem_cgroup_disable_oom(void)
169{
170 bool old = mem_cgroup_toggle_oom(false);
171
172 WARN_ON(old == false);
173} 150}
174 151
175static inline bool task_in_memcg_oom(struct task_struct *p) 152static inline bool task_in_memcg_oom(struct task_struct *p)
176{ 153{
177 return p->memcg_oom.in_memcg_oom; 154 return p->memcg_oom.memcg;
178} 155}
179 156
180bool mem_cgroup_oom_synchronize(void); 157bool mem_cgroup_oom_synchronize(bool wait);
181 158
182#ifdef CONFIG_MEMCG_SWAP 159#ifdef CONFIG_MEMCG_SWAP
183extern int do_swap_account; 160extern int do_swap_account;
@@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
402{ 379{
403} 380}
404 381
405static inline bool mem_cgroup_toggle_oom(bool new) 382static inline void mem_cgroup_oom_enable(void)
406{
407 return false;
408}
409
410static inline void mem_cgroup_enable_oom(void)
411{ 383{
412} 384}
413 385
414static inline void mem_cgroup_disable_oom(void) 386static inline void mem_cgroup_oom_disable(void)
415{ 387{
416} 388}
417 389
@@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
420 return false; 392 return false;
421} 393}
422 394
423static inline bool mem_cgroup_oom_synchronize(void) 395static inline bool mem_cgroup_oom_synchronize(bool wait)
424{ 396{
425 return false; 397 return false;
426} 398}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..e27baeeda3f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1394,11 +1394,10 @@ struct task_struct {
1394 } memcg_batch; 1394 } memcg_batch;
1395 unsigned int memcg_kmem_skip_account; 1395 unsigned int memcg_kmem_skip_account;
1396 struct memcg_oom_info { 1396 struct memcg_oom_info {
1397 struct mem_cgroup *memcg;
1398 gfp_t gfp_mask;
1399 int order;
1397 unsigned int may_oom:1; 1400 unsigned int may_oom:1;
1398 unsigned int in_memcg_oom:1;
1399 unsigned int oom_locked:1;
1400 int wakeups;
1401 struct mem_cgroup *wait_on_memcg;
1402 } memcg_oom; 1401 } memcg_oom;
1403#endif 1402#endif
1404#ifdef CONFIG_UPROBES 1403#ifdef CONFIG_UPROBES
diff --git a/ipc/sem.c b/ipc/sem.c
index 8c4f59b0204a..db9d241af133 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1282,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1282 1282
1283 sem_lock(sma, NULL, -1); 1283 sem_lock(sma, NULL, -1);
1284 1284
1285 if (sma->sem_perm.deleted) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 return -EIDRM;
1289 }
1290
1285 curr = &sma->sem_base[semnum]; 1291 curr = &sma->sem_base[semnum];
1286 1292
1287 ipc_assert_locked_object(&sma->sem_perm); 1293 ipc_assert_locked_object(&sma->sem_perm);
@@ -1336,12 +1342,14 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1336 int i; 1342 int i;
1337 1343
1338 sem_lock(sma, NULL, -1); 1344 sem_lock(sma, NULL, -1);
1345 if (sma->sem_perm.deleted) {
1346 err = -EIDRM;
1347 goto out_unlock;
1348 }
1339 if(nsems > SEMMSL_FAST) { 1349 if(nsems > SEMMSL_FAST) {
1340 if (!ipc_rcu_getref(sma)) { 1350 if (!ipc_rcu_getref(sma)) {
1341 sem_unlock(sma, -1);
1342 rcu_read_unlock();
1343 err = -EIDRM; 1351 err = -EIDRM;
1344 goto out_free; 1352 goto out_unlock;
1345 } 1353 }
1346 sem_unlock(sma, -1); 1354 sem_unlock(sma, -1);
1347 rcu_read_unlock(); 1355 rcu_read_unlock();
@@ -1354,10 +1362,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1354 rcu_read_lock(); 1362 rcu_read_lock();
1355 sem_lock_and_putref(sma); 1363 sem_lock_and_putref(sma);
1356 if (sma->sem_perm.deleted) { 1364 if (sma->sem_perm.deleted) {
1357 sem_unlock(sma, -1);
1358 rcu_read_unlock();
1359 err = -EIDRM; 1365 err = -EIDRM;
1360 goto out_free; 1366 goto out_unlock;
1361 } 1367 }
1362 } 1368 }
1363 for (i = 0; i < sma->sem_nsems; i++) 1369 for (i = 0; i < sma->sem_nsems; i++)
@@ -1375,8 +1381,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1375 struct sem_undo *un; 1381 struct sem_undo *un;
1376 1382
1377 if (!ipc_rcu_getref(sma)) { 1383 if (!ipc_rcu_getref(sma)) {
1378 rcu_read_unlock(); 1384 err = -EIDRM;
1379 return -EIDRM; 1385 goto out_rcu_wakeup;
1380 } 1386 }
1381 rcu_read_unlock(); 1387 rcu_read_unlock();
1382 1388
@@ -1404,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1404 rcu_read_lock(); 1410 rcu_read_lock();
1405 sem_lock_and_putref(sma); 1411 sem_lock_and_putref(sma);
1406 if (sma->sem_perm.deleted) { 1412 if (sma->sem_perm.deleted) {
1407 sem_unlock(sma, -1);
1408 rcu_read_unlock();
1409 err = -EIDRM; 1413 err = -EIDRM;
1410 goto out_free; 1414 goto out_unlock;
1411 } 1415 }
1412 1416
1413 for (i = 0; i < nsems; i++) 1417 for (i = 0; i < nsems; i++)
@@ -1431,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1431 goto out_rcu_wakeup; 1435 goto out_rcu_wakeup;
1432 1436
1433 sem_lock(sma, NULL, -1); 1437 sem_lock(sma, NULL, -1);
1438 if (sma->sem_perm.deleted) {
1439 err = -EIDRM;
1440 goto out_unlock;
1441 }
1434 curr = &sma->sem_base[semnum]; 1442 curr = &sma->sem_base[semnum];
1435 1443
1436 switch (cmd) { 1444 switch (cmd) {
@@ -1836,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1836 if (error) 1844 if (error)
1837 goto out_rcu_wakeup; 1845 goto out_rcu_wakeup;
1838 1846
1847 error = -EIDRM;
1848 locknum = sem_lock(sma, sops, nsops);
1849 if (sma->sem_perm.deleted)
1850 goto out_unlock_free;
1839 /* 1851 /*
1840 * semid identifiers are not unique - find_alloc_undo may have 1852 * semid identifiers are not unique - find_alloc_undo may have
1841 * allocated an undo structure, it was invalidated by an RMID 1853 * allocated an undo structure, it was invalidated by an RMID
@@ -1843,8 +1855,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1843 * This case can be detected checking un->semid. The existence of 1855 * This case can be detected checking un->semid. The existence of
1844 * "un" itself is guaranteed by rcu. 1856 * "un" itself is guaranteed by rcu.
1845 */ 1857 */
1846 error = -EIDRM;
1847 locknum = sem_lock(sma, sops, nsops);
1848 if (un && un->semid == -1) 1858 if (un && un->semid == -1)
1849 goto out_unlock_free; 1859 goto out_unlock_free;
1850 1860
@@ -2057,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
2057 } 2067 }
2058 2068
2059 sem_lock(sma, NULL, -1); 2069 sem_lock(sma, NULL, -1);
2070 /* exit_sem raced with IPC_RMID, nothing to do */
2071 if (sma->sem_perm.deleted) {
2072 sem_unlock(sma, -1);
2073 rcu_read_unlock();
2074 continue;
2075 }
2060 un = __lookup_undo(ulp, semid); 2076 un = __lookup_undo(ulp, semid);
2061 if (un == NULL) { 2077 if (un == NULL) {
2062 /* exit_sem raced with IPC_RMID+semget() that created 2078 /* exit_sem raced with IPC_RMID+semget() that created
diff --git a/ipc/util.c b/ipc/util.c
index fdb8ae740775..7684f41bce76 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -17,12 +17,27 @@
17 * Pavel Emelianov <xemul@openvz.org> 17 * Pavel Emelianov <xemul@openvz.org>
18 * 18 *
19 * General sysv ipc locking scheme: 19 * General sysv ipc locking scheme:
20 * when doing ipc id lookups, take the ids->rwsem 20 * rcu_read_lock()
21 * rcu_read_lock() 21 * obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
22 * obtain the ipc object (kern_ipc_perm) 22 * tree.
23 * perform security, capabilities, auditing and permission checks, etc. 23 * - perform initial checks (capabilities, auditing and permission,
24 * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() 24 * etc).
25 * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) 25 * - perform read-only operations, such as STAT, INFO commands.
26 * acquire the ipc lock (kern_ipc_perm.lock) through
27 * ipc_lock_object()
28 * - perform data updates, such as SET, RMID commands and
29 * mechanism-specific operations (semop/semtimedop,
30 * msgsnd/msgrcv, shmat/shmdt).
31 * drop the ipc lock, through ipc_unlock_object().
32 * rcu_read_unlock()
33 *
34 * The ids->rwsem must be taken when:
35 * - creating, removing and iterating the existing entries in ipc
36 * identifier sets.
37 * - iterating through files under /proc/sysvipc/
38 *
39 * Note that sems have a special fast path that avoids kern_ipc_perm.lock -
40 * see sem_lock().
26 */ 41 */
27 42
28#include <linux/mm.h> 43#include <linux/mm.h>
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 7deeb6297a48..1a53d497a8c5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -53,6 +53,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
53 ref->release = release; 53 ref->release = release;
54 return 0; 54 return 0;
55} 55}
56EXPORT_SYMBOL_GPL(percpu_ref_init);
56 57
57/** 58/**
58 * percpu_ref_cancel_init - cancel percpu_ref_init() 59 * percpu_ref_cancel_init - cancel percpu_ref_init()
@@ -84,6 +85,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref)
84 free_percpu(ref->pcpu_count); 85 free_percpu(ref->pcpu_count);
85 } 86 }
86} 87}
88EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
87 89
88static void percpu_ref_kill_rcu(struct rcu_head *rcu) 90static void percpu_ref_kill_rcu(struct rcu_head *rcu)
89{ 91{
@@ -156,3 +158,4 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
156 158
157 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); 159 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
158} 160}
161EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1e6aec4a2d2e..ae4846ff4849 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1616 struct inode *inode = mapping->host; 1616 struct inode *inode = mapping->host;
1617 pgoff_t offset = vmf->pgoff; 1617 pgoff_t offset = vmf->pgoff;
1618 struct page *page; 1618 struct page *page;
1619 bool memcg_oom;
1620 pgoff_t size; 1619 pgoff_t size;
1621 int ret = 0; 1620 int ret = 0;
1622 1621
@@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1625 return VM_FAULT_SIGBUS; 1624 return VM_FAULT_SIGBUS;
1626 1625
1627 /* 1626 /*
1628 * Do we have something in the page cache already? Either 1627 * Do we have something in the page cache already?
1629 * way, try readahead, but disable the memcg OOM killer for it
1630 * as readahead is optional and no errors are propagated up
1631 * the fault stack. The OOM killer is enabled while trying to
1632 * instantiate the faulting page individually below.
1633 */ 1628 */
1634 page = find_get_page(mapping, offset); 1629 page = find_get_page(mapping, offset);
1635 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { 1630 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1637 * We found the page, so try async readahead before 1632 * We found the page, so try async readahead before
1638 * waiting for the lock. 1633 * waiting for the lock.
1639 */ 1634 */
1640 memcg_oom = mem_cgroup_toggle_oom(false);
1641 do_async_mmap_readahead(vma, ra, file, page, offset); 1635 do_async_mmap_readahead(vma, ra, file, page, offset);
1642 mem_cgroup_toggle_oom(memcg_oom);
1643 } else if (!page) { 1636 } else if (!page) {
1644 /* No page in the page cache at all */ 1637 /* No page in the page cache at all */
1645 memcg_oom = mem_cgroup_toggle_oom(false);
1646 do_sync_mmap_readahead(vma, ra, file, offset); 1638 do_sync_mmap_readahead(vma, ra, file, offset);
1647 mem_cgroup_toggle_oom(memcg_oom);
1648 count_vm_event(PGMAJFAULT); 1639 count_vm_event(PGMAJFAULT);
1649 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); 1640 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1650 ret = VM_FAULT_MAJOR; 1641 ret = VM_FAULT_MAJOR;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7489884682d8..610e3df2768a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2697,6 +2697,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
2697 2697
2698 mmun_start = haddr; 2698 mmun_start = haddr;
2699 mmun_end = haddr + HPAGE_PMD_SIZE; 2699 mmun_end = haddr + HPAGE_PMD_SIZE;
2700again:
2700 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2701 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2701 spin_lock(&mm->page_table_lock); 2702 spin_lock(&mm->page_table_lock);
2702 if (unlikely(!pmd_trans_huge(*pmd))) { 2703 if (unlikely(!pmd_trans_huge(*pmd))) {
@@ -2719,7 +2720,14 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
2719 split_huge_page(page); 2720 split_huge_page(page);
2720 2721
2721 put_page(page); 2722 put_page(page);
2722 BUG_ON(pmd_trans_huge(*pmd)); 2723
2724 /*
2725 * We don't always have down_write of mmap_sem here: a racing
2726 * do_huge_pmd_wp_page() might have copied-on-write to another
2727 * huge page before our split_huge_page() got the anon_vma lock.
2728 */
2729 if (unlikely(pmd_trans_huge(*pmd)))
2730 goto again;
2723} 2731}
2724 2732
2725void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, 2733void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b49579c7f2a5..0b7656e804d1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -653,6 +653,7 @@ static void free_huge_page(struct page *page)
653 BUG_ON(page_count(page)); 653 BUG_ON(page_count(page));
654 BUG_ON(page_mapcount(page)); 654 BUG_ON(page_mapcount(page));
655 restore_reserve = PagePrivate(page); 655 restore_reserve = PagePrivate(page);
656 ClearPagePrivate(page);
656 657
657 spin_lock(&hugetlb_lock); 658 spin_lock(&hugetlb_lock);
658 hugetlb_cgroup_uncharge_page(hstate_index(h), 659 hugetlb_cgroup_uncharge_page(hstate_index(h),
@@ -695,8 +696,22 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
695 /* we rely on prep_new_huge_page to set the destructor */ 696 /* we rely on prep_new_huge_page to set the destructor */
696 set_compound_order(page, order); 697 set_compound_order(page, order);
697 __SetPageHead(page); 698 __SetPageHead(page);
699 __ClearPageReserved(page);
698 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { 700 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
699 __SetPageTail(p); 701 __SetPageTail(p);
702 /*
703 * For gigantic hugepages allocated through bootmem at
704 * boot, it's safer to be consistent with the not-gigantic
705 * hugepages and clear the PG_reserved bit from all tail pages
706 * too. Otherwse drivers using get_user_pages() to access tail
707 * pages may get the reference counting wrong if they see
708 * PG_reserved set on a tail page (despite the head page not
709 * having PG_reserved set). Enforcing this consistency between
710 * head and tail pages allows drivers to optimize away a check
711 * on the head page when they need know if put_page() is needed
712 * after get_user_pages().
713 */
714 __ClearPageReserved(p);
700 set_page_count(p, 0); 715 set_page_count(p, 0);
701 p->first_page = page; 716 p->first_page = page;
702 } 717 }
@@ -1329,9 +1344,9 @@ static void __init gather_bootmem_prealloc(void)
1329#else 1344#else
1330 page = virt_to_page(m); 1345 page = virt_to_page(m);
1331#endif 1346#endif
1332 __ClearPageReserved(page);
1333 WARN_ON(page_count(page) != 1); 1347 WARN_ON(page_count(page) != 1);
1334 prep_compound_huge_page(page, h->order); 1348 prep_compound_huge_page(page, h->order);
1349 WARN_ON(PageReserved(page));
1335 prep_new_huge_page(h, page, page_to_nid(page)); 1350 prep_new_huge_page(h, page, page_to_nid(page));
1336 /* 1351 /*
1337 * If we had gigantic hugepages allocated at boot time, we need 1352 * If we had gigantic hugepages allocated at boot time, we need
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c52ddbc839b..34d3ca9572d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -866,6 +866,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
866 unsigned long val = 0; 866 unsigned long val = 0;
867 int cpu; 867 int cpu;
868 868
869 get_online_cpus();
869 for_each_online_cpu(cpu) 870 for_each_online_cpu(cpu)
870 val += per_cpu(memcg->stat->events[idx], cpu); 871 val += per_cpu(memcg->stat->events[idx], cpu);
871#ifdef CONFIG_HOTPLUG_CPU 872#ifdef CONFIG_HOTPLUG_CPU
@@ -873,6 +874,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
873 val += memcg->nocpu_base.events[idx]; 874 val += memcg->nocpu_base.events[idx];
874 spin_unlock(&memcg->pcp_counter_lock); 875 spin_unlock(&memcg->pcp_counter_lock);
875#endif 876#endif
877 put_online_cpus();
876 return val; 878 return val;
877} 879}
878 880
@@ -2159,110 +2161,59 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
2159 memcg_wakeup_oom(memcg); 2161 memcg_wakeup_oom(memcg);
2160} 2162}
2161 2163
2162/*
2163 * try to call OOM killer
2164 */
2165static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) 2164static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
2166{ 2165{
2167 bool locked;
2168 int wakeups;
2169
2170 if (!current->memcg_oom.may_oom) 2166 if (!current->memcg_oom.may_oom)
2171 return; 2167 return;
2172
2173 current->memcg_oom.in_memcg_oom = 1;
2174
2175 /* 2168 /*
2176 * As with any blocking lock, a contender needs to start 2169 * We are in the middle of the charge context here, so we
2177 * listening for wakeups before attempting the trylock, 2170 * don't want to block when potentially sitting on a callstack
2178 * otherwise it can miss the wakeup from the unlock and sleep 2171 * that holds all kinds of filesystem and mm locks.
2179 * indefinitely. This is just open-coded because our locking 2172 *
2180 * is so particular to memcg hierarchies. 2173 * Also, the caller may handle a failed allocation gracefully
2174 * (like optional page cache readahead) and so an OOM killer
2175 * invocation might not even be necessary.
2176 *
2177 * That's why we don't do anything here except remember the
2178 * OOM context and then deal with it at the end of the page
2179 * fault when the stack is unwound, the locks are released,
2180 * and when we know whether the fault was overall successful.
2181 */ 2181 */
2182 wakeups = atomic_read(&memcg->oom_wakeups); 2182 css_get(&memcg->css);
2183 mem_cgroup_mark_under_oom(memcg); 2183 current->memcg_oom.memcg = memcg;
2184 2184 current->memcg_oom.gfp_mask = mask;
2185 locked = mem_cgroup_oom_trylock(memcg); 2185 current->memcg_oom.order = order;
2186
2187 if (locked)
2188 mem_cgroup_oom_notify(memcg);
2189
2190 if (locked && !memcg->oom_kill_disable) {
2191 mem_cgroup_unmark_under_oom(memcg);
2192 mem_cgroup_out_of_memory(memcg, mask, order);
2193 mem_cgroup_oom_unlock(memcg);
2194 /*
2195 * There is no guarantee that an OOM-lock contender
2196 * sees the wakeups triggered by the OOM kill
2197 * uncharges. Wake any sleepers explicitely.
2198 */
2199 memcg_oom_recover(memcg);
2200 } else {
2201 /*
2202 * A system call can just return -ENOMEM, but if this
2203 * is a page fault and somebody else is handling the
2204 * OOM already, we need to sleep on the OOM waitqueue
2205 * for this memcg until the situation is resolved.
2206 * Which can take some time because it might be
2207 * handled by a userspace task.
2208 *
2209 * However, this is the charge context, which means
2210 * that we may sit on a large call stack and hold
2211 * various filesystem locks, the mmap_sem etc. and we
2212 * don't want the OOM handler to deadlock on them
2213 * while we sit here and wait. Store the current OOM
2214 * context in the task_struct, then return -ENOMEM.
2215 * At the end of the page fault handler, with the
2216 * stack unwound, pagefault_out_of_memory() will check
2217 * back with us by calling
2218 * mem_cgroup_oom_synchronize(), possibly putting the
2219 * task to sleep.
2220 */
2221 current->memcg_oom.oom_locked = locked;
2222 current->memcg_oom.wakeups = wakeups;
2223 css_get(&memcg->css);
2224 current->memcg_oom.wait_on_memcg = memcg;
2225 }
2226} 2186}
2227 2187
2228/** 2188/**
2229 * mem_cgroup_oom_synchronize - complete memcg OOM handling 2189 * mem_cgroup_oom_synchronize - complete memcg OOM handling
2190 * @handle: actually kill/wait or just clean up the OOM state
2230 * 2191 *
2231 * This has to be called at the end of a page fault if the the memcg 2192 * This has to be called at the end of a page fault if the memcg OOM
2232 * OOM handler was enabled and the fault is returning %VM_FAULT_OOM. 2193 * handler was enabled.
2233 * 2194 *
2234 * Memcg supports userspace OOM handling, so failed allocations must 2195 * Memcg supports userspace OOM handling where failed allocations must
2235 * sleep on a waitqueue until the userspace task resolves the 2196 * sleep on a waitqueue until the userspace task resolves the
2236 * situation. Sleeping directly in the charge context with all kinds 2197 * situation. Sleeping directly in the charge context with all kinds
2237 * of locks held is not a good idea, instead we remember an OOM state 2198 * of locks held is not a good idea, instead we remember an OOM state
2238 * in the task and mem_cgroup_oom_synchronize() has to be called at 2199 * in the task and mem_cgroup_oom_synchronize() has to be called at
2239 * the end of the page fault to put the task to sleep and clean up the 2200 * the end of the page fault to complete the OOM handling.
2240 * OOM state.
2241 * 2201 *
2242 * Returns %true if an ongoing memcg OOM situation was detected and 2202 * Returns %true if an ongoing memcg OOM situation was detected and
2243 * finalized, %false otherwise. 2203 * completed, %false otherwise.
2244 */ 2204 */
2245bool mem_cgroup_oom_synchronize(void) 2205bool mem_cgroup_oom_synchronize(bool handle)
2246{ 2206{
2207 struct mem_cgroup *memcg = current->memcg_oom.memcg;
2247 struct oom_wait_info owait; 2208 struct oom_wait_info owait;
2248 struct mem_cgroup *memcg; 2209 bool locked;
2249 2210
2250 /* OOM is global, do not handle */ 2211 /* OOM is global, do not handle */
2251 if (!current->memcg_oom.in_memcg_oom)
2252 return false;
2253
2254 /*
2255 * We invoked the OOM killer but there is a chance that a kill
2256 * did not free up any charges. Everybody else might already
2257 * be sleeping, so restart the fault and keep the rampage
2258 * going until some charges are released.
2259 */
2260 memcg = current->memcg_oom.wait_on_memcg;
2261 if (!memcg) 2212 if (!memcg)
2262 goto out; 2213 return false;
2263 2214
2264 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) 2215 if (!handle)
2265 goto out_memcg; 2216 goto cleanup;
2266 2217
2267 owait.memcg = memcg; 2218 owait.memcg = memcg;
2268 owait.wait.flags = 0; 2219 owait.wait.flags = 0;
@@ -2271,13 +2222,25 @@ bool mem_cgroup_oom_synchronize(void)
2271 INIT_LIST_HEAD(&owait.wait.task_list); 2222 INIT_LIST_HEAD(&owait.wait.task_list);
2272 2223
2273 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); 2224 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
2274 /* Only sleep if we didn't miss any wakeups since OOM */ 2225 mem_cgroup_mark_under_oom(memcg);
2275 if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups) 2226
2227 locked = mem_cgroup_oom_trylock(memcg);
2228
2229 if (locked)
2230 mem_cgroup_oom_notify(memcg);
2231
2232 if (locked && !memcg->oom_kill_disable) {
2233 mem_cgroup_unmark_under_oom(memcg);
2234 finish_wait(&memcg_oom_waitq, &owait.wait);
2235 mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
2236 current->memcg_oom.order);
2237 } else {
2276 schedule(); 2238 schedule();
2277 finish_wait(&memcg_oom_waitq, &owait.wait); 2239 mem_cgroup_unmark_under_oom(memcg);
2278out_memcg: 2240 finish_wait(&memcg_oom_waitq, &owait.wait);
2279 mem_cgroup_unmark_under_oom(memcg); 2241 }
2280 if (current->memcg_oom.oom_locked) { 2242
2243 if (locked) {
2281 mem_cgroup_oom_unlock(memcg); 2244 mem_cgroup_oom_unlock(memcg);
2282 /* 2245 /*
2283 * There is no guarantee that an OOM-lock contender 2246 * There is no guarantee that an OOM-lock contender
@@ -2286,10 +2249,9 @@ out_memcg:
2286 */ 2249 */
2287 memcg_oom_recover(memcg); 2250 memcg_oom_recover(memcg);
2288 } 2251 }
2252cleanup:
2253 current->memcg_oom.memcg = NULL;
2289 css_put(&memcg->css); 2254 css_put(&memcg->css);
2290 current->memcg_oom.wait_on_memcg = NULL;
2291out:
2292 current->memcg_oom.in_memcg_oom = 0;
2293 return true; 2255 return true;
2294} 2256}
2295 2257
@@ -2703,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2703 || fatal_signal_pending(current))) 2665 || fatal_signal_pending(current)))
2704 goto bypass; 2666 goto bypass;
2705 2667
2668 if (unlikely(task_in_memcg_oom(current)))
2669 goto bypass;
2670
2706 /* 2671 /*
2707 * We always charge the cgroup the mm_struct belongs to. 2672 * We always charge the cgroup the mm_struct belongs to.
2708 * The mm_struct's mem_cgroup changes on task migration if the 2673 * The mm_struct's mem_cgroup changes on task migration if the
@@ -2801,6 +2766,8 @@ done:
2801 return 0; 2766 return 0;
2802nomem: 2767nomem:
2803 *ptr = NULL; 2768 *ptr = NULL;
2769 if (gfp_mask & __GFP_NOFAIL)
2770 return 0;
2804 return -ENOMEM; 2771 return -ENOMEM;
2805bypass: 2772bypass:
2806 *ptr = root_mem_cgroup; 2773 *ptr = root_mem_cgroup;
diff --git a/mm/memory.c b/mm/memory.c
index ca0003947115..1311f26497e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
837 */ 837 */
838 make_migration_entry_read(&entry); 838 make_migration_entry_read(&entry);
839 pte = swp_entry_to_pte(entry); 839 pte = swp_entry_to_pte(entry);
840 if (pte_swp_soft_dirty(*src_pte))
841 pte = pte_swp_mksoft_dirty(pte);
840 set_pte_at(src_mm, addr, src_pte, pte); 842 set_pte_at(src_mm, addr, src_pte, pte);
841 } 843 }
842 } 844 }
@@ -3863,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3863 * space. Kernel faults are handled more gracefully. 3865 * space. Kernel faults are handled more gracefully.
3864 */ 3866 */
3865 if (flags & FAULT_FLAG_USER) 3867 if (flags & FAULT_FLAG_USER)
3866 mem_cgroup_enable_oom(); 3868 mem_cgroup_oom_enable();
3867 3869
3868 ret = __handle_mm_fault(mm, vma, address, flags); 3870 ret = __handle_mm_fault(mm, vma, address, flags);
3869 3871
3870 if (flags & FAULT_FLAG_USER) 3872 if (flags & FAULT_FLAG_USER) {
3871 mem_cgroup_disable_oom(); 3873 mem_cgroup_oom_disable();
3872 3874 /*
3873 if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))) 3875 * The task may have entered a memcg OOM situation but
3874 mem_cgroup_oom_synchronize(); 3876 * if the allocation error was handled gracefully (no
3877 * VM_FAULT_OOM), there is no need to kill anything.
3878 * Just clean up the OOM state peacefully.
3879 */
3880 if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
3881 mem_cgroup_oom_synchronize(false);
3882 }
3875 3883
3876 return ret; 3884 return ret;
3877} 3885}
diff --git a/mm/migrate.c b/mm/migrate.c
index a26bccd44ccb..7a7325ee1d08 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -161,6 +161,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
161 161
162 get_page(new); 162 get_page(new);
163 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 163 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
164 if (pte_swp_soft_dirty(*ptep))
165 pte = pte_mksoft_dirty(pte);
164 if (is_write_migration_entry(entry)) 166 if (is_write_migration_entry(entry))
165 pte = pte_mkwrite(pte); 167 pte = pte_mkwrite(pte);
166#ifdef CONFIG_HUGETLB_PAGE 168#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 94722a4d6b43..a3af058f68e4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -94,13 +94,16 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
94 swp_entry_t entry = pte_to_swp_entry(oldpte); 94 swp_entry_t entry = pte_to_swp_entry(oldpte);
95 95
96 if (is_write_migration_entry(entry)) { 96 if (is_write_migration_entry(entry)) {
97 pte_t newpte;
97 /* 98 /*
98 * A protection check is difficult so 99 * A protection check is difficult so
99 * just be safe and disable write 100 * just be safe and disable write
100 */ 101 */
101 make_migration_entry_read(&entry); 102 make_migration_entry_read(&entry);
102 set_pte_at(mm, addr, pte, 103 newpte = swp_entry_to_pte(entry);
103 swp_entry_to_pte(entry)); 104 if (pte_swp_soft_dirty(oldpte))
105 newpte = pte_swp_mksoft_dirty(newpte);
106 set_pte_at(mm, addr, pte, newpte);
104 } 107 }
105 pages++; 108 pages++;
106 } 109 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 91b13d6a16d4..0843feb66f3d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -25,7 +25,6 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28#include <asm/pgalloc.h>
29 28
30#include "internal.h" 29#include "internal.h"
31 30
@@ -63,10 +62,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
63 return NULL; 62 return NULL;
64 63
65 pmd = pmd_alloc(mm, pud, addr); 64 pmd = pmd_alloc(mm, pud, addr);
66 if (!pmd) { 65 if (!pmd)
67 pud_free(mm, pud);
68 return NULL; 66 return NULL;
69 }
70 67
71 VM_BUG_ON(pmd_trans_huge(*pmd)); 68 VM_BUG_ON(pmd_trans_huge(*pmd));
72 69
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 314e9d274381..6738c47f1f72 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -680,7 +680,7 @@ void pagefault_out_of_memory(void)
680{ 680{
681 struct zonelist *zonelist; 681 struct zonelist *zonelist;
682 682
683 if (mem_cgroup_oom_synchronize()) 683 if (mem_cgroup_oom_synchronize(true))
684 return; 684 return;
685 685
686 zonelist = node_zonelist(first_online_node, GFP_KERNEL); 686 zonelist = node_zonelist(first_online_node, GFP_KERNEL);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f5236f804aa6..63807583d8e8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1210,11 +1210,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
1210 return 1; 1210 return 1;
1211} 1211}
1212 1212
1213static long bdi_max_pause(struct backing_dev_info *bdi, 1213static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
1214 unsigned long bdi_dirty) 1214 unsigned long bdi_dirty)
1215{ 1215{
1216 long bw = bdi->avg_write_bandwidth; 1216 unsigned long bw = bdi->avg_write_bandwidth;
1217 long t; 1217 unsigned long t;
1218 1218
1219 /* 1219 /*
1220 * Limit pause time for small memory systems. If sleeping for too long 1220 * Limit pause time for small memory systems. If sleeping for too long
@@ -1226,7 +1226,7 @@ static long bdi_max_pause(struct backing_dev_info *bdi,
1226 t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); 1226 t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1227 t++; 1227 t++;
1228 1228
1229 return min_t(long, t, MAX_PAUSE); 1229 return min_t(unsigned long, t, MAX_PAUSE);
1230} 1230}
1231 1231
1232static long bdi_min_pause(struct backing_dev_info *bdi, 1232static long bdi_min_pause(struct backing_dev_info *bdi,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 3963fc24fcc1..de7c904e52e5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1824,6 +1824,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1824 struct filename *pathname; 1824 struct filename *pathname;
1825 int i, type, prev; 1825 int i, type, prev;
1826 int err; 1826 int err;
1827 unsigned int old_block_size;
1827 1828
1828 if (!capable(CAP_SYS_ADMIN)) 1829 if (!capable(CAP_SYS_ADMIN))
1829 return -EPERM; 1830 return -EPERM;
@@ -1914,6 +1915,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1914 } 1915 }
1915 1916
1916 swap_file = p->swap_file; 1917 swap_file = p->swap_file;
1918 old_block_size = p->old_block_size;
1917 p->swap_file = NULL; 1919 p->swap_file = NULL;
1918 p->max = 0; 1920 p->max = 0;
1919 swap_map = p->swap_map; 1921 swap_map = p->swap_map;
@@ -1938,7 +1940,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1938 inode = mapping->host; 1940 inode = mapping->host;
1939 if (S_ISBLK(inode->i_mode)) { 1941 if (S_ISBLK(inode->i_mode)) {
1940 struct block_device *bdev = I_BDEV(inode); 1942 struct block_device *bdev = I_BDEV(inode);
1941 set_blocksize(bdev, p->old_block_size); 1943 set_blocksize(bdev, old_block_size);
1942 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1944 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1943 } else { 1945 } else {
1944 mutex_lock(&inode->i_mutex); 1946 mutex_lock(&inode->i_mutex);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 53f2f82f83ae..eea668d9cff6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -211,6 +211,7 @@ void unregister_shrinker(struct shrinker *shrinker)
211 down_write(&shrinker_rwsem); 211 down_write(&shrinker_rwsem);
212 list_del(&shrinker->list); 212 list_del(&shrinker->list);
213 up_write(&shrinker_rwsem); 213 up_write(&shrinker_rwsem);
214 kfree(shrinker->nr_deferred);
214} 215}
215EXPORT_SYMBOL(unregister_shrinker); 216EXPORT_SYMBOL(unregister_shrinker);
216 217
diff --git a/mm/zswap.c b/mm/zswap.c
index 841e35f1db22..d93510c6aa2d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -804,6 +804,10 @@ static void zswap_frontswap_invalidate_area(unsigned type)
804 } 804 }
805 tree->rbroot = RB_ROOT; 805 tree->rbroot = RB_ROOT;
806 spin_unlock(&tree->lock); 806 spin_unlock(&tree->lock);
807
808 zbud_destroy_pool(tree->pool);
809 kfree(tree);
810 zswap_trees[type] = NULL;
807} 811}
808 812
809static struct zbud_ops zswap_zbud_ops = { 813static struct zbud_ops zswap_zbud_ops = {
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 4fa655d68a81..41bd85559d4b 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -151,7 +151,7 @@ static int check_timer_create(int which)
151 fflush(stdout); 151 fflush(stdout);
152 152
153 done = 0; 153 done = 0;
154 timer_create(which, NULL, &id); 154 err = timer_create(which, NULL, &id);
155 if (err < 0) { 155 if (err < 0) {
156 perror("Can't create timer\n"); 156 perror("Can't create timer\n");
157 return -1; 157 return -1;