aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-07 18:05:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-07 18:05:43 -0400
commit46edaedaf3842164281d0f86c41bc152f4b4d32e (patch)
treef1f3d2c9aa8ef73a9c46bf8519be635a972f4439
parent513335f964a17bd99a699b939391eb111aa5f65b (diff)
parent40af1bbdca47e5c8a2044039bb78ca8fd8b20f94 (diff)
Merge branch 'akpm' (Andrew's fixups)
Merge random fixes from Andrew Morton. * emailed from Andrew Morton <akpm@linux-foundation.org>: (11 patches) mm: correctly synchronize rss-counters at exit/exec btree: catch NULL value before it does harm btree: fix tree corruption in btree_get_prev() ipc: shm: restore MADV_REMOVE functionality on shared memory segments drivers/platform/x86/acerhdf.c: correct Boris' mail address c/r: prctl: drop VMA flags test on PR_SET_MM_ stack data assignment c/r: prctl: add ability to get clear_tid_address c/r: prctl: add minimal address test to PR_SET_MM c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal MAINTAINERS: whitespace fixes shmem: replace_page must flush_dcache and others
-rw-r--r--MAINTAINERS18
-rw-r--r--drivers/platform/x86/acerhdf.c2
-rw-r--r--fs/exec.c1
-rw-r--r--include/linux/prctl.h10
-rw-r--r--include/linux/sched.h1
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/exit.c13
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/sys.c60
-rw-r--r--lib/btree.c5
-rw-r--r--mm/shmem.c57
11 files changed, 118 insertions, 69 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index dafcba7e2312..14bc7071f9df 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1077,7 +1077,7 @@ F: drivers/media/video/s5p-fimc/
1077ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT 1077ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
1078M: Kyungmin Park <kyungmin.park@samsung.com> 1078M: Kyungmin Park <kyungmin.park@samsung.com>
1079M: Kamil Debski <k.debski@samsung.com> 1079M: Kamil Debski <k.debski@samsung.com>
1080M: Jeongtae Park <jtp.park@samsung.com> 1080M: Jeongtae Park <jtp.park@samsung.com>
1081L: linux-arm-kernel@lists.infradead.org 1081L: linux-arm-kernel@lists.infradead.org
1082L: linux-media@vger.kernel.org 1082L: linux-media@vger.kernel.org
1083S: Maintained 1083S: Maintained
@@ -1743,10 +1743,10 @@ F: include/linux/can/platform/
1743CAPABILITIES 1743CAPABILITIES
1744M: Serge Hallyn <serge.hallyn@canonical.com> 1744M: Serge Hallyn <serge.hallyn@canonical.com>
1745L: linux-security-module@vger.kernel.org 1745L: linux-security-module@vger.kernel.org
1746S: Supported 1746S: Supported
1747F: include/linux/capability.h 1747F: include/linux/capability.h
1748F: security/capability.c 1748F: security/capability.c
1749F: security/commoncap.c 1749F: security/commoncap.c
1750F: kernel/capability.c 1750F: kernel/capability.c
1751 1751
1752CELL BROADBAND ENGINE ARCHITECTURE 1752CELL BROADBAND ENGINE ARCHITECTURE
@@ -2146,11 +2146,11 @@ S: Orphan
2146F: drivers/net/wan/pc300* 2146F: drivers/net/wan/pc300*
2147 2147
2148CYTTSP TOUCHSCREEN DRIVER 2148CYTTSP TOUCHSCREEN DRIVER
2149M: Javier Martinez Canillas <javier@dowhile0.org> 2149M: Javier Martinez Canillas <javier@dowhile0.org>
2150L: linux-input@vger.kernel.org 2150L: linux-input@vger.kernel.org
2151S: Maintained 2151S: Maintained
2152F: drivers/input/touchscreen/cyttsp* 2152F: drivers/input/touchscreen/cyttsp*
2153F: include/linux/input/cyttsp.h 2153F: include/linux/input/cyttsp.h
2154 2154
2155DAMA SLAVE for AX.25 2155DAMA SLAVE for AX.25
2156M: Joerg Reuter <jreuter@yaina.de> 2156M: Joerg Reuter <jreuter@yaina.de>
@@ -5185,7 +5185,7 @@ S: Maintained
5185F: drivers/firmware/pcdp.* 5185F: drivers/firmware/pcdp.*
5186 5186
5187PCI ERROR RECOVERY 5187PCI ERROR RECOVERY
5188M: Linas Vepstas <linasvepstas@gmail.com> 5188M: Linas Vepstas <linasvepstas@gmail.com>
5189L: linux-pci@vger.kernel.org 5189L: linux-pci@vger.kernel.org
5190S: Supported 5190S: Supported
5191F: Documentation/PCI/pci-error-recovery.txt 5191F: Documentation/PCI/pci-error-recovery.txt
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index 639db4d0aa76..2fd9d36acd15 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * (C) 2009 - Peter Feuerer peter (a) piie.net 6 * (C) 2009 - Peter Feuerer peter (a) piie.net
7 * http://piie.net 7 * http://piie.net
8 * 2009 Borislav Petkov <petkovbb@gmail.com> 8 * 2009 Borislav Petkov bp (a) alien8.de
9 * 9 *
10 * Inspired by and many thanks to: 10 * Inspired by and many thanks to:
11 * o acerfand - Rachel Greenham 11 * o acerfand - Rachel Greenham
diff --git a/fs/exec.c b/fs/exec.c
index a79786a8d2c8..b926ed19301e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -819,7 +819,6 @@ static int exec_mmap(struct mm_struct *mm)
819 /* Notify parent that we're no longer interested in the old VM */ 819 /* Notify parent that we're no longer interested in the old VM */
820 tsk = current; 820 tsk = current;
821 old_mm = current->mm; 821 old_mm = current->mm;
822 sync_mm_rss(old_mm);
823 mm_release(tsk, old_mm); 822 mm_release(tsk, old_mm);
824 823
825 if (old_mm) { 824 if (old_mm) {
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 711e0a30aacc..3988012255dc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -127,8 +127,8 @@
127#define PR_SET_PTRACER 0x59616d61 127#define PR_SET_PTRACER 0x59616d61
128# define PR_SET_PTRACER_ANY ((unsigned long)-1) 128# define PR_SET_PTRACER_ANY ((unsigned long)-1)
129 129
130#define PR_SET_CHILD_SUBREAPER 36 130#define PR_SET_CHILD_SUBREAPER 36
131#define PR_GET_CHILD_SUBREAPER 37 131#define PR_GET_CHILD_SUBREAPER 37
132 132
133/* 133/*
134 * If no_new_privs is set, then operations that grant new privileges (i.e. 134 * If no_new_privs is set, then operations that grant new privileges (i.e.
@@ -142,7 +142,9 @@
142 * asking selinux for a specific new context (e.g. with runcon) will result 142 * asking selinux for a specific new context (e.g. with runcon) will result
143 * in execve returning -EPERM. 143 * in execve returning -EPERM.
144 */ 144 */
145#define PR_SET_NO_NEW_PRIVS 38 145#define PR_SET_NO_NEW_PRIVS 38
146#define PR_GET_NO_NEW_PRIVS 39 146#define PR_GET_NO_NEW_PRIVS 39
147
148#define PR_GET_TID_ADDRESS 40
147 149
148#endif /* _LINUX_PRCTL_H */ 150#endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..c688d4cc2e40 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
439 /* leave room for more dump flags */ 439 /* leave room for more dump flags */
440#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ 440#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
441#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ 441#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
442#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
442 443
443#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) 444#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
444 445
diff --git a/ipc/shm.c b/ipc/shm.c
index 5e2cbfdab6fc..41c1285d697a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
394} 394}
395 395
396static long shm_fallocate(struct file *file, int mode, loff_t offset,
397 loff_t len)
398{
399 struct shm_file_data *sfd = shm_file_data(file);
400
401 if (!sfd->file->f_op->fallocate)
402 return -EOPNOTSUPP;
403 return sfd->file->f_op->fallocate(file, mode, offset, len);
404}
405
396static unsigned long shm_get_unmapped_area(struct file *file, 406static unsigned long shm_get_unmapped_area(struct file *file,
397 unsigned long addr, unsigned long len, unsigned long pgoff, 407 unsigned long addr, unsigned long len, unsigned long pgoff,
398 unsigned long flags) 408 unsigned long flags)
@@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = {
410 .get_unmapped_area = shm_get_unmapped_area, 420 .get_unmapped_area = shm_get_unmapped_area,
411#endif 421#endif
412 .llseek = noop_llseek, 422 .llseek = noop_llseek,
423 .fallocate = shm_fallocate,
413}; 424};
414 425
415static const struct file_operations shm_file_operations_huge = { 426static const struct file_operations shm_file_operations_huge = {
@@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = {
418 .release = shm_release, 429 .release = shm_release,
419 .get_unmapped_area = shm_get_unmapped_area, 430 .get_unmapped_area = shm_get_unmapped_area,
420 .llseek = noop_llseek, 431 .llseek = noop_llseek,
432 .fallocate = shm_fallocate,
421}; 433};
422 434
423int is_file_shm_hugepages(struct file *file) 435int is_file_shm_hugepages(struct file *file)
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..804fb6bb8161 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -423,6 +423,7 @@ void daemonize(const char *name, ...)
423 * user space pages. We don't need them, and if we didn't close them 423 * user space pages. We don't need them, and if we didn't close them
424 * they would be locked into memory. 424 * they would be locked into memory.
425 */ 425 */
426 mm_release(current, current->mm);
426 exit_mm(current); 427 exit_mm(current);
427 /* 428 /*
428 * We don't want to get frozen, in case system-wide hibernation 429 * We don't want to get frozen, in case system-wide hibernation
@@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk)
640 struct mm_struct *mm = tsk->mm; 641 struct mm_struct *mm = tsk->mm;
641 struct core_state *core_state; 642 struct core_state *core_state;
642 643
643 mm_release(tsk, mm);
644 if (!mm) 644 if (!mm)
645 return; 645 return;
646 /* 646 /*
@@ -960,9 +960,13 @@ void do_exit(long code)
960 preempt_count()); 960 preempt_count());
961 961
962 acct_update_integrals(tsk); 962 acct_update_integrals(tsk);
963 /* sync mm's RSS info before statistics gathering */ 963
964 if (tsk->mm) 964 /* Set exit_code before complete_vfork_done() in mm_release() */
965 sync_mm_rss(tsk->mm); 965 tsk->exit_code = code;
966
967 /* Release mm and sync mm's RSS info before statistics gathering */
968 mm_release(tsk, tsk->mm);
969
966 group_dead = atomic_dec_and_test(&tsk->signal->live); 970 group_dead = atomic_dec_and_test(&tsk->signal->live);
967 if (group_dead) { 971 if (group_dead) {
968 hrtimer_cancel(&tsk->signal->real_timer); 972 hrtimer_cancel(&tsk->signal->real_timer);
@@ -975,7 +979,6 @@ void do_exit(long code)
975 tty_audit_exit(); 979 tty_audit_exit();
976 audit_free(tsk); 980 audit_free(tsk);
977 981
978 tsk->exit_code = code;
979 taskstats_exit(tsk, group_dead); 982 taskstats_exit(tsk, group_dead);
980 983
981 exit_mm(tsk); 984 exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index ab5211b9e622..0560781c6904 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm)
619 module_put(mm->binfmt->module); 619 module_put(mm->binfmt->module);
620 mmdrop(mm); 620 mmdrop(mm);
621 } 621 }
622
623 /*
624 * Final rss-counter synchronization. After this point there must be
625 * no pagefaults into this mm from the current context. Otherwise
626 * mm->rss_stat will be inconsistent.
627 */
628 if (mm)
629 sync_mm_rss(mm);
622} 630}
623EXPORT_SYMBOL_GPL(mmput); 631EXPORT_SYMBOL_GPL(mmput);
624 632
diff --git a/kernel/sys.c b/kernel/sys.c
index 9ff89cb9657a..f0ec44dcd415 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
1786} 1786}
1787 1787
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static bool vma_flags_mismatch(struct vm_area_struct *vma,
1790 unsigned long required,
1791 unsigned long banned)
1792{
1793 return (vma->vm_flags & required) != required ||
1794 (vma->vm_flags & banned);
1795}
1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{ 1790{
1791 struct vm_area_struct *vma;
1799 struct file *exe_file; 1792 struct file *exe_file;
1800 struct dentry *dentry; 1793 struct dentry *dentry;
1801 int err; 1794 int err;
1802 1795
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd); 1796 exe_file = fget(fd);
1811 if (!exe_file) 1797 if (!exe_file)
1812 return -EBADF; 1798 return -EBADF;
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1827 if (err) 1813 if (err)
1828 goto exit; 1814 goto exit;
1829 1815
1816 down_write(&mm->mmap_sem);
1817
1818 /*
1819 * Forbid mm->exe_file change if there are mapped other files.
1820 */
1821 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
1824 &exe_file->f_path))
1825 goto exit_unlock;
1826 }
1827
1830 /* 1828 /*
1831 * The symlink can be changed only once, just to disallow arbitrary 1829 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one 1830 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor 1831 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed. 1832 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */ 1833 */
1836 down_write(&mm->mmap_sem); 1834 err = -EPERM;
1837 if (likely(!mm->exe_file)) 1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1838 set_mm_exe_file(mm, exe_file); 1836 goto exit_unlock;
1839 else 1837
1840 err = -EBUSY; 1838 set_mm_exe_file(mm, exe_file);
1839exit_unlock:
1841 up_write(&mm->mmap_sem); 1840 up_write(&mm->mmap_sem);
1842 1841
1843exit: 1842exit:
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
1862 if (opt == PR_SET_MM_EXE_FILE) 1861 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1862 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864 1863
1865 if (addr >= TASK_SIZE) 1864 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1866 return -EINVAL; 1865 return -EINVAL;
1867 1866
1868 error = -EINVAL; 1867 error = -EINVAL;
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
1924 error = -EFAULT; 1923 error = -EFAULT;
1925 goto out; 1924 goto out;
1926 } 1925 }
1927#ifdef CONFIG_STACK_GROWSUP
1928 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1929#else
1930 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1931#endif
1932 goto out;
1933 if (opt == PR_SET_MM_START_STACK) 1926 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr; 1927 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START) 1928 else if (opt == PR_SET_MM_ARG_START)
@@ -1981,12 +1974,22 @@ out:
1981 up_read(&mm->mmap_sem); 1974 up_read(&mm->mmap_sem);
1982 return error; 1975 return error;
1983} 1976}
1977
1978static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1979{
1980 return put_user(me->clear_child_tid, tid_addr);
1981}
1982
1984#else /* CONFIG_CHECKPOINT_RESTORE */ 1983#else /* CONFIG_CHECKPOINT_RESTORE */
1985static int prctl_set_mm(int opt, unsigned long addr, 1984static int prctl_set_mm(int opt, unsigned long addr,
1986 unsigned long arg4, unsigned long arg5) 1985 unsigned long arg4, unsigned long arg5)
1987{ 1986{
1988 return -EINVAL; 1987 return -EINVAL;
1989} 1988}
1989static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1990{
1991 return -EINVAL;
1992}
1990#endif 1993#endif
1991 1994
1992SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1995SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2124 else 2127 else
2125 return -EINVAL; 2128 return -EINVAL;
2126 break; 2129 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2127 default: 2133 default:
2128 return -EINVAL; 2134 return -EINVAL;
2129 } 2135 }
diff --git a/lib/btree.c b/lib/btree.c
index e5ec1e9c1aa5..f9a484676cb6 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
319 319
320 if (head->height == 0) 320 if (head->height == 0)
321 return NULL; 321 return NULL;
322retry:
323 longcpy(key, __key, geo->keylen); 322 longcpy(key, __key, geo->keylen);
323retry:
324 dec_key(geo, key); 324 dec_key(geo, key);
325 325
326 node = head->node; 326 node = head->node;
@@ -351,7 +351,7 @@ retry:
351 } 351 }
352miss: 352miss:
353 if (retry_key) { 353 if (retry_key) {
354 __key = retry_key; 354 longcpy(key, retry_key, geo->keylen);
355 retry_key = NULL; 355 retry_key = NULL;
356 goto retry; 356 goto retry;
357 } 357 }
@@ -509,6 +509,7 @@ retry:
509int btree_insert(struct btree_head *head, struct btree_geo *geo, 509int btree_insert(struct btree_head *head, struct btree_geo *geo,
510 unsigned long *key, void *val, gfp_t gfp) 510 unsigned long *key, void *val, gfp_t gfp)
511{ 511{
512 BUG_ON(!val);
512 return btree_insert_level(head, geo, key, val, 1, gfp); 513 return btree_insert_level(head, geo, key, val, 1, gfp);
513} 514}
514EXPORT_SYMBOL_GPL(btree_insert); 515EXPORT_SYMBOL_GPL(btree_insert);
diff --git a/mm/shmem.c b/mm/shmem.c
index 585bd220a21e..a15a466d0d1d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
683 mutex_lock(&shmem_swaplist_mutex); 683 mutex_lock(&shmem_swaplist_mutex);
684 /* 684 /*
685 * We needed to drop mutex to make that restrictive page 685 * We needed to drop mutex to make that restrictive page
686 * allocation; but the inode might already be freed by now, 686 * allocation, but the inode might have been freed while we
687 * and we cannot refer to inode or mapping or info to check. 687 * dropped it: although a racing shmem_evict_inode() cannot
688 * However, we do hold page lock on the PageSwapCache page, 688 * complete without emptying the radix_tree, our page lock
689 * so can check if that still has our reference remaining. 689 * on this swapcache page is not enough to prevent that -
690 * free_swap_and_cache() of our swap entry will only
691 * trylock_page(), removing swap from radix_tree whatever.
692 *
693 * We must not proceed to shmem_add_to_page_cache() if the
694 * inode has been freed, but of course we cannot rely on
695 * inode or mapping or info to check that. However, we can
696 * safely check if our swap entry is still in use (and here
697 * it can't have got reused for another page): if it's still
698 * in use, then the inode cannot have been freed yet, and we
699 * can safely proceed (if it's no longer in use, that tells
700 * nothing about the inode, but we don't need to unuse swap).
690 */ 701 */
691 if (!page_swapcount(*pagep)) 702 if (!page_swapcount(*pagep))
692 error = -ENOENT; 703 error = -ENOENT;
@@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
730 741
731 /* 742 /*
732 * There's a faint possibility that swap page was replaced before 743 * There's a faint possibility that swap page was replaced before
733 * caller locked it: it will come back later with the right page. 744 * caller locked it: caller will come back later with the right page.
734 */ 745 */
735 if (unlikely(!PageSwapCache(page))) 746 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
736 goto out; 747 goto out;
737 748
738 /* 749 /*
@@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
995 newpage = shmem_alloc_page(gfp, info, index); 1006 newpage = shmem_alloc_page(gfp, info, index);
996 if (!newpage) 1007 if (!newpage)
997 return -ENOMEM; 1008 return -ENOMEM;
998 VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
999 1009
1000 *pagep = newpage;
1001 page_cache_get(newpage); 1010 page_cache_get(newpage);
1002 copy_highpage(newpage, oldpage); 1011 copy_highpage(newpage, oldpage);
1012 flush_dcache_page(newpage);
1003 1013
1004 VM_BUG_ON(!PageLocked(oldpage));
1005 __set_page_locked(newpage); 1014 __set_page_locked(newpage);
1006 VM_BUG_ON(!PageUptodate(oldpage));
1007 SetPageUptodate(newpage); 1015 SetPageUptodate(newpage);
1008 VM_BUG_ON(!PageSwapBacked(oldpage));
1009 SetPageSwapBacked(newpage); 1016 SetPageSwapBacked(newpage);
1010 VM_BUG_ON(!swap_index);
1011 set_page_private(newpage, swap_index); 1017 set_page_private(newpage, swap_index);
1012 VM_BUG_ON(!PageSwapCache(oldpage));
1013 SetPageSwapCache(newpage); 1018 SetPageSwapCache(newpage);
1014 1019
1015 /* 1020 /*
@@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1019 spin_lock_irq(&swap_mapping->tree_lock); 1024 spin_lock_irq(&swap_mapping->tree_lock);
1020 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, 1025 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1021 newpage); 1026 newpage);
1022 __inc_zone_page_state(newpage, NR_FILE_PAGES); 1027 if (!error) {
1023 __dec_zone_page_state(oldpage, NR_FILE_PAGES); 1028 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1029 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1030 }
1024 spin_unlock_irq(&swap_mapping->tree_lock); 1031 spin_unlock_irq(&swap_mapping->tree_lock);
1025 BUG_ON(error);
1026 1032
1027 mem_cgroup_replace_page_cache(oldpage, newpage); 1033 if (unlikely(error)) {
1028 lru_cache_add_anon(newpage); 1034 /*
1035 * Is this possible? I think not, now that our callers check
1036 * both PageSwapCache and page_private after getting page lock;
1037 * but be defensive. Reverse old to newpage for clear and free.
1038 */
1039 oldpage = newpage;
1040 } else {
1041 mem_cgroup_replace_page_cache(oldpage, newpage);
1042 lru_cache_add_anon(newpage);
1043 *pagep = newpage;
1044 }
1029 1045
1030 ClearPageSwapCache(oldpage); 1046 ClearPageSwapCache(oldpage);
1031 set_page_private(oldpage, 0); 1047 set_page_private(oldpage, 0);
@@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1033 unlock_page(oldpage); 1049 unlock_page(oldpage);
1034 page_cache_release(oldpage); 1050 page_cache_release(oldpage);
1035 page_cache_release(oldpage); 1051 page_cache_release(oldpage);
1036 return 0; 1052 return error;
1037} 1053}
1038 1054
1039/* 1055/*
@@ -1107,7 +1123,8 @@ repeat:
1107 1123
1108 /* We have to do this with page locked to prevent races */ 1124 /* We have to do this with page locked to prevent races */
1109 lock_page(page); 1125 lock_page(page);
1110 if (!PageSwapCache(page) || page->mapping) { 1126 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1127 page->mapping) {
1111 error = -EEXIST; /* try again */ 1128 error = -EEXIST; /* try again */
1112 goto failed; 1129 goto failed;
1113 } 1130 }