diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-07 18:05:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-07 18:05:43 -0400 |
commit | 46edaedaf3842164281d0f86c41bc152f4b4d32e (patch) | |
tree | f1f3d2c9aa8ef73a9c46bf8519be635a972f4439 | |
parent | 513335f964a17bd99a699b939391eb111aa5f65b (diff) | |
parent | 40af1bbdca47e5c8a2044039bb78ca8fd8b20f94 (diff) |
Merge branch 'akpm' (Andrew's fixups)
Merge random fixes from Andrew Morton.
* emailed from Andrew Morton <akpm@linux-foundation.org>: (11 patches)
mm: correctly synchronize rss-counters at exit/exec
btree: catch NULL value before it does harm
btree: fix tree corruption in btree_get_prev()
ipc: shm: restore MADV_REMOVE functionality on shared memory segments
drivers/platform/x86/acerhdf.c: correct Boris' mail address
c/r: prctl: drop VMA flags test on PR_SET_MM_ stack data assignment
c/r: prctl: add ability to get clear_tid_address
c/r: prctl: add minimal address test to PR_SET_MM
c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal
MAINTAINERS: whitespace fixes
shmem: replace_page must flush_dcache and others
-rw-r--r-- | MAINTAINERS | 18 | ||||
-rw-r--r-- | drivers/platform/x86/acerhdf.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 1 | ||||
-rw-r--r-- | include/linux/prctl.h | 10 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | ipc/shm.c | 12 | ||||
-rw-r--r-- | kernel/exit.c | 13 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/sys.c | 60 | ||||
-rw-r--r-- | lib/btree.c | 5 | ||||
-rw-r--r-- | mm/shmem.c | 57 |
11 files changed, 118 insertions, 69 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index dafcba7e2312..14bc7071f9df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1077,7 +1077,7 @@ F: drivers/media/video/s5p-fimc/ | |||
1077 | ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT | 1077 | ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT |
1078 | M: Kyungmin Park <kyungmin.park@samsung.com> | 1078 | M: Kyungmin Park <kyungmin.park@samsung.com> |
1079 | M: Kamil Debski <k.debski@samsung.com> | 1079 | M: Kamil Debski <k.debski@samsung.com> |
1080 | M: Jeongtae Park <jtp.park@samsung.com> | 1080 | M: Jeongtae Park <jtp.park@samsung.com> |
1081 | L: linux-arm-kernel@lists.infradead.org | 1081 | L: linux-arm-kernel@lists.infradead.org |
1082 | L: linux-media@vger.kernel.org | 1082 | L: linux-media@vger.kernel.org |
1083 | S: Maintained | 1083 | S: Maintained |
@@ -1743,10 +1743,10 @@ F: include/linux/can/platform/ | |||
1743 | CAPABILITIES | 1743 | CAPABILITIES |
1744 | M: Serge Hallyn <serge.hallyn@canonical.com> | 1744 | M: Serge Hallyn <serge.hallyn@canonical.com> |
1745 | L: linux-security-module@vger.kernel.org | 1745 | L: linux-security-module@vger.kernel.org |
1746 | S: Supported | 1746 | S: Supported |
1747 | F: include/linux/capability.h | 1747 | F: include/linux/capability.h |
1748 | F: security/capability.c | 1748 | F: security/capability.c |
1749 | F: security/commoncap.c | 1749 | F: security/commoncap.c |
1750 | F: kernel/capability.c | 1750 | F: kernel/capability.c |
1751 | 1751 | ||
1752 | CELL BROADBAND ENGINE ARCHITECTURE | 1752 | CELL BROADBAND ENGINE ARCHITECTURE |
@@ -2146,11 +2146,11 @@ S: Orphan | |||
2146 | F: drivers/net/wan/pc300* | 2146 | F: drivers/net/wan/pc300* |
2147 | 2147 | ||
2148 | CYTTSP TOUCHSCREEN DRIVER | 2148 | CYTTSP TOUCHSCREEN DRIVER |
2149 | M: Javier Martinez Canillas <javier@dowhile0.org> | 2149 | M: Javier Martinez Canillas <javier@dowhile0.org> |
2150 | L: linux-input@vger.kernel.org | 2150 | L: linux-input@vger.kernel.org |
2151 | S: Maintained | 2151 | S: Maintained |
2152 | F: drivers/input/touchscreen/cyttsp* | 2152 | F: drivers/input/touchscreen/cyttsp* |
2153 | F: include/linux/input/cyttsp.h | 2153 | F: include/linux/input/cyttsp.h |
2154 | 2154 | ||
2155 | DAMA SLAVE for AX.25 | 2155 | DAMA SLAVE for AX.25 |
2156 | M: Joerg Reuter <jreuter@yaina.de> | 2156 | M: Joerg Reuter <jreuter@yaina.de> |
@@ -5185,7 +5185,7 @@ S: Maintained | |||
5185 | F: drivers/firmware/pcdp.* | 5185 | F: drivers/firmware/pcdp.* |
5186 | 5186 | ||
5187 | PCI ERROR RECOVERY | 5187 | PCI ERROR RECOVERY |
5188 | M: Linas Vepstas <linasvepstas@gmail.com> | 5188 | M: Linas Vepstas <linasvepstas@gmail.com> |
5189 | L: linux-pci@vger.kernel.org | 5189 | L: linux-pci@vger.kernel.org |
5190 | S: Supported | 5190 | S: Supported |
5191 | F: Documentation/PCI/pci-error-recovery.txt | 5191 | F: Documentation/PCI/pci-error-recovery.txt |
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 639db4d0aa76..2fd9d36acd15 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * (C) 2009 - Peter Feuerer peter (a) piie.net | 6 | * (C) 2009 - Peter Feuerer peter (a) piie.net |
7 | * http://piie.net | 7 | * http://piie.net |
8 | * 2009 Borislav Petkov <petkovbb@gmail.com> | 8 | * 2009 Borislav Petkov bp (a) alien8.de |
9 | * | 9 | * |
10 | * Inspired by and many thanks to: | 10 | * Inspired by and many thanks to: |
11 | * o acerfand - Rachel Greenham | 11 | * o acerfand - Rachel Greenham |
@@ -819,7 +819,6 @@ static int exec_mmap(struct mm_struct *mm) | |||
819 | /* Notify parent that we're no longer interested in the old VM */ | 819 | /* Notify parent that we're no longer interested in the old VM */ |
820 | tsk = current; | 820 | tsk = current; |
821 | old_mm = current->mm; | 821 | old_mm = current->mm; |
822 | sync_mm_rss(old_mm); | ||
823 | mm_release(tsk, old_mm); | 822 | mm_release(tsk, old_mm); |
824 | 823 | ||
825 | if (old_mm) { | 824 | if (old_mm) { |
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a30aacc..3988012255dc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -127,8 +127,8 @@ | |||
127 | #define PR_SET_PTRACER 0x59616d61 | 127 | #define PR_SET_PTRACER 0x59616d61 |
128 | # define PR_SET_PTRACER_ANY ((unsigned long)-1) | 128 | # define PR_SET_PTRACER_ANY ((unsigned long)-1) |
129 | 129 | ||
130 | #define PR_SET_CHILD_SUBREAPER 36 | 130 | #define PR_SET_CHILD_SUBREAPER 36 |
131 | #define PR_GET_CHILD_SUBREAPER 37 | 131 | #define PR_GET_CHILD_SUBREAPER 37 |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * If no_new_privs is set, then operations that grant new privileges (i.e. | 134 | * If no_new_privs is set, then operations that grant new privileges (i.e. |
@@ -142,7 +142,9 @@ | |||
142 | * asking selinux for a specific new context (e.g. with runcon) will result | 142 | * asking selinux for a specific new context (e.g. with runcon) will result |
143 | * in execve returning -EPERM. | 143 | * in execve returning -EPERM. |
144 | */ | 144 | */ |
145 | #define PR_SET_NO_NEW_PRIVS 38 | 145 | #define PR_SET_NO_NEW_PRIVS 38 |
146 | #define PR_GET_NO_NEW_PRIVS 39 | 146 | #define PR_GET_NO_NEW_PRIVS 39 |
147 | |||
148 | #define PR_GET_TID_ADDRESS 40 | ||
147 | 149 | ||
148 | #endif /* _LINUX_PRCTL_H */ | 150 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..c688d4cc2e40 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); | |||
439 | /* leave room for more dump flags */ | 439 | /* leave room for more dump flags */ |
440 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ | 440 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ |
441 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ | 441 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ |
442 | #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ | ||
442 | 443 | ||
443 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) | 444 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) |
444 | 445 | ||
@@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
393 | return sfd->file->f_op->fsync(sfd->file, start, end, datasync); | 393 | return sfd->file->f_op->fsync(sfd->file, start, end, datasync); |
394 | } | 394 | } |
395 | 395 | ||
396 | static long shm_fallocate(struct file *file, int mode, loff_t offset, | ||
397 | loff_t len) | ||
398 | { | ||
399 | struct shm_file_data *sfd = shm_file_data(file); | ||
400 | |||
401 | if (!sfd->file->f_op->fallocate) | ||
402 | return -EOPNOTSUPP; | ||
403 | return sfd->file->f_op->fallocate(file, mode, offset, len); | ||
404 | } | ||
405 | |||
396 | static unsigned long shm_get_unmapped_area(struct file *file, | 406 | static unsigned long shm_get_unmapped_area(struct file *file, |
397 | unsigned long addr, unsigned long len, unsigned long pgoff, | 407 | unsigned long addr, unsigned long len, unsigned long pgoff, |
398 | unsigned long flags) | 408 | unsigned long flags) |
@@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = { | |||
410 | .get_unmapped_area = shm_get_unmapped_area, | 420 | .get_unmapped_area = shm_get_unmapped_area, |
411 | #endif | 421 | #endif |
412 | .llseek = noop_llseek, | 422 | .llseek = noop_llseek, |
423 | .fallocate = shm_fallocate, | ||
413 | }; | 424 | }; |
414 | 425 | ||
415 | static const struct file_operations shm_file_operations_huge = { | 426 | static const struct file_operations shm_file_operations_huge = { |
@@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = { | |||
418 | .release = shm_release, | 429 | .release = shm_release, |
419 | .get_unmapped_area = shm_get_unmapped_area, | 430 | .get_unmapped_area = shm_get_unmapped_area, |
420 | .llseek = noop_llseek, | 431 | .llseek = noop_llseek, |
432 | .fallocate = shm_fallocate, | ||
421 | }; | 433 | }; |
422 | 434 | ||
423 | int is_file_shm_hugepages(struct file *file) | 435 | int is_file_shm_hugepages(struct file *file) |
diff --git a/kernel/exit.c b/kernel/exit.c index 34867cc5b42a..804fb6bb8161 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -423,6 +423,7 @@ void daemonize(const char *name, ...) | |||
423 | * user space pages. We don't need them, and if we didn't close them | 423 | * user space pages. We don't need them, and if we didn't close them |
424 | * they would be locked into memory. | 424 | * they would be locked into memory. |
425 | */ | 425 | */ |
426 | mm_release(current, current->mm); | ||
426 | exit_mm(current); | 427 | exit_mm(current); |
427 | /* | 428 | /* |
428 | * We don't want to get frozen, in case system-wide hibernation | 429 | * We don't want to get frozen, in case system-wide hibernation |
@@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk) | |||
640 | struct mm_struct *mm = tsk->mm; | 641 | struct mm_struct *mm = tsk->mm; |
641 | struct core_state *core_state; | 642 | struct core_state *core_state; |
642 | 643 | ||
643 | mm_release(tsk, mm); | ||
644 | if (!mm) | 644 | if (!mm) |
645 | return; | 645 | return; |
646 | /* | 646 | /* |
@@ -960,9 +960,13 @@ void do_exit(long code) | |||
960 | preempt_count()); | 960 | preempt_count()); |
961 | 961 | ||
962 | acct_update_integrals(tsk); | 962 | acct_update_integrals(tsk); |
963 | /* sync mm's RSS info before statistics gathering */ | 963 | |
964 | if (tsk->mm) | 964 | /* Set exit_code before complete_vfork_done() in mm_release() */ |
965 | sync_mm_rss(tsk->mm); | 965 | tsk->exit_code = code; |
966 | |||
967 | /* Release mm and sync mm's RSS info before statistics gathering */ | ||
968 | mm_release(tsk, tsk->mm); | ||
969 | |||
966 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 970 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
967 | if (group_dead) { | 971 | if (group_dead) { |
968 | hrtimer_cancel(&tsk->signal->real_timer); | 972 | hrtimer_cancel(&tsk->signal->real_timer); |
@@ -975,7 +979,6 @@ void do_exit(long code) | |||
975 | tty_audit_exit(); | 979 | tty_audit_exit(); |
976 | audit_free(tsk); | 980 | audit_free(tsk); |
977 | 981 | ||
978 | tsk->exit_code = code; | ||
979 | taskstats_exit(tsk, group_dead); | 982 | taskstats_exit(tsk, group_dead); |
980 | 983 | ||
981 | exit_mm(tsk); | 984 | exit_mm(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..0560781c6904 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm) | |||
619 | module_put(mm->binfmt->module); | 619 | module_put(mm->binfmt->module); |
620 | mmdrop(mm); | 620 | mmdrop(mm); |
621 | } | 621 | } |
622 | |||
623 | /* | ||
624 | * Final rss-counter synchronization. After this point there must be | ||
625 | * no pagefaults into this mm from the current context. Otherwise | ||
626 | * mm->rss_stat will be inconsistent. | ||
627 | */ | ||
628 | if (mm) | ||
629 | sync_mm_rss(mm); | ||
622 | } | 630 | } |
623 | EXPORT_SYMBOL_GPL(mmput); | 631 | EXPORT_SYMBOL_GPL(mmput); |
624 | 632 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 9ff89cb9657a..f0ec44dcd415 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1789 | static bool vma_flags_mismatch(struct vm_area_struct *vma, | ||
1790 | unsigned long required, | ||
1791 | unsigned long banned) | ||
1792 | { | ||
1793 | return (vma->vm_flags & required) != required || | ||
1794 | (vma->vm_flags & banned); | ||
1795 | } | ||
1796 | |||
1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1789 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1798 | { | 1790 | { |
1791 | struct vm_area_struct *vma; | ||
1799 | struct file *exe_file; | 1792 | struct file *exe_file; |
1800 | struct dentry *dentry; | 1793 | struct dentry *dentry; |
1801 | int err; | 1794 | int err; |
1802 | 1795 | ||
1803 | /* | ||
1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
1805 | * remain. So perform a quick test first. | ||
1806 | */ | ||
1807 | if (mm->num_exe_file_vmas) | ||
1808 | return -EBUSY; | ||
1809 | |||
1810 | exe_file = fget(fd); | 1796 | exe_file = fget(fd); |
1811 | if (!exe_file) | 1797 | if (!exe_file) |
1812 | return -EBADF; | 1798 | return -EBADF; |
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1827 | if (err) | 1813 | if (err) |
1828 | goto exit; | 1814 | goto exit; |
1829 | 1815 | ||
1816 | down_write(&mm->mmap_sem); | ||
1817 | |||
1818 | /* | ||
1819 | * Forbid mm->exe_file change if there are mapped other files. | ||
1820 | */ | ||
1821 | err = -EBUSY; | ||
1822 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
1823 | if (vma->vm_file && !path_equal(&vma->vm_file->f_path, | ||
1824 | &exe_file->f_path)) | ||
1825 | goto exit_unlock; | ||
1826 | } | ||
1827 | |||
1830 | /* | 1828 | /* |
1831 | * The symlink can be changed only once, just to disallow arbitrary | 1829 | * The symlink can be changed only once, just to disallow arbitrary |
1832 | * transitions malicious software might bring in. This means one | 1830 | * transitions malicious software might bring in. This means one |
1833 | * could make a snapshot over all processes running and monitor | 1831 | * could make a snapshot over all processes running and monitor |
1834 | * /proc/pid/exe changes to notice unusual activity if needed. | 1832 | * /proc/pid/exe changes to notice unusual activity if needed. |
1835 | */ | 1833 | */ |
1836 | down_write(&mm->mmap_sem); | 1834 | err = -EPERM; |
1837 | if (likely(!mm->exe_file)) | 1835 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) |
1838 | set_mm_exe_file(mm, exe_file); | 1836 | goto exit_unlock; |
1839 | else | 1837 | |
1840 | err = -EBUSY; | 1838 | set_mm_exe_file(mm, exe_file); |
1839 | exit_unlock: | ||
1841 | up_write(&mm->mmap_sem); | 1840 | up_write(&mm->mmap_sem); |
1842 | 1841 | ||
1843 | exit: | 1842 | exit: |
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1862 | if (opt == PR_SET_MM_EXE_FILE) | 1861 | if (opt == PR_SET_MM_EXE_FILE) |
1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | 1862 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); |
1864 | 1863 | ||
1865 | if (addr >= TASK_SIZE) | 1864 | if (addr >= TASK_SIZE || addr < mmap_min_addr) |
1866 | return -EINVAL; | 1865 | return -EINVAL; |
1867 | 1866 | ||
1868 | error = -EINVAL; | 1867 | error = -EINVAL; |
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1924 | error = -EFAULT; | 1923 | error = -EFAULT; |
1925 | goto out; | 1924 | goto out; |
1926 | } | 1925 | } |
1927 | #ifdef CONFIG_STACK_GROWSUP | ||
1928 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) | ||
1929 | #else | ||
1930 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) | ||
1931 | #endif | ||
1932 | goto out; | ||
1933 | if (opt == PR_SET_MM_START_STACK) | 1926 | if (opt == PR_SET_MM_START_STACK) |
1934 | mm->start_stack = addr; | 1927 | mm->start_stack = addr; |
1935 | else if (opt == PR_SET_MM_ARG_START) | 1928 | else if (opt == PR_SET_MM_ARG_START) |
@@ -1981,12 +1974,22 @@ out: | |||
1981 | up_read(&mm->mmap_sem); | 1974 | up_read(&mm->mmap_sem); |
1982 | return error; | 1975 | return error; |
1983 | } | 1976 | } |
1977 | |||
1978 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | ||
1979 | { | ||
1980 | return put_user(me->clear_child_tid, tid_addr); | ||
1981 | } | ||
1982 | |||
1984 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1983 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
1985 | static int prctl_set_mm(int opt, unsigned long addr, | 1984 | static int prctl_set_mm(int opt, unsigned long addr, |
1986 | unsigned long arg4, unsigned long arg5) | 1985 | unsigned long arg4, unsigned long arg5) |
1987 | { | 1986 | { |
1988 | return -EINVAL; | 1987 | return -EINVAL; |
1989 | } | 1988 | } |
1989 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | ||
1990 | { | ||
1991 | return -EINVAL; | ||
1992 | } | ||
1990 | #endif | 1993 | #endif |
1991 | 1994 | ||
1992 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | 1995 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
2124 | else | 2127 | else |
2125 | return -EINVAL; | 2128 | return -EINVAL; |
2126 | break; | 2129 | break; |
2130 | case PR_GET_TID_ADDRESS: | ||
2131 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
2132 | break; | ||
2127 | default: | 2133 | default: |
2128 | return -EINVAL; | 2134 | return -EINVAL; |
2129 | } | 2135 | } |
diff --git a/lib/btree.c b/lib/btree.c index e5ec1e9c1aa5..f9a484676cb6 100644 --- a/lib/btree.c +++ b/lib/btree.c | |||
@@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, | |||
319 | 319 | ||
320 | if (head->height == 0) | 320 | if (head->height == 0) |
321 | return NULL; | 321 | return NULL; |
322 | retry: | ||
323 | longcpy(key, __key, geo->keylen); | 322 | longcpy(key, __key, geo->keylen); |
323 | retry: | ||
324 | dec_key(geo, key); | 324 | dec_key(geo, key); |
325 | 325 | ||
326 | node = head->node; | 326 | node = head->node; |
@@ -351,7 +351,7 @@ retry: | |||
351 | } | 351 | } |
352 | miss: | 352 | miss: |
353 | if (retry_key) { | 353 | if (retry_key) { |
354 | __key = retry_key; | 354 | longcpy(key, retry_key, geo->keylen); |
355 | retry_key = NULL; | 355 | retry_key = NULL; |
356 | goto retry; | 356 | goto retry; |
357 | } | 357 | } |
@@ -509,6 +509,7 @@ retry: | |||
509 | int btree_insert(struct btree_head *head, struct btree_geo *geo, | 509 | int btree_insert(struct btree_head *head, struct btree_geo *geo, |
510 | unsigned long *key, void *val, gfp_t gfp) | 510 | unsigned long *key, void *val, gfp_t gfp) |
511 | { | 511 | { |
512 | BUG_ON(!val); | ||
512 | return btree_insert_level(head, geo, key, val, 1, gfp); | 513 | return btree_insert_level(head, geo, key, val, 1, gfp); |
513 | } | 514 | } |
514 | EXPORT_SYMBOL_GPL(btree_insert); | 515 | EXPORT_SYMBOL_GPL(btree_insert); |
diff --git a/mm/shmem.c b/mm/shmem.c index 585bd220a21e..a15a466d0d1d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, | |||
683 | mutex_lock(&shmem_swaplist_mutex); | 683 | mutex_lock(&shmem_swaplist_mutex); |
684 | /* | 684 | /* |
685 | * We needed to drop mutex to make that restrictive page | 685 | * We needed to drop mutex to make that restrictive page |
686 | * allocation; but the inode might already be freed by now, | 686 | * allocation, but the inode might have been freed while we |
687 | * and we cannot refer to inode or mapping or info to check. | 687 | * dropped it: although a racing shmem_evict_inode() cannot |
688 | * However, we do hold page lock on the PageSwapCache page, | 688 | * complete without emptying the radix_tree, our page lock |
689 | * so can check if that still has our reference remaining. | 689 | * on this swapcache page is not enough to prevent that - |
690 | * free_swap_and_cache() of our swap entry will only | ||
691 | * trylock_page(), removing swap from radix_tree whatever. | ||
692 | * | ||
693 | * We must not proceed to shmem_add_to_page_cache() if the | ||
694 | * inode has been freed, but of course we cannot rely on | ||
695 | * inode or mapping or info to check that. However, we can | ||
696 | * safely check if our swap entry is still in use (and here | ||
697 | * it can't have got reused for another page): if it's still | ||
698 | * in use, then the inode cannot have been freed yet, and we | ||
699 | * can safely proceed (if it's no longer in use, that tells | ||
700 | * nothing about the inode, but we don't need to unuse swap). | ||
690 | */ | 701 | */ |
691 | if (!page_swapcount(*pagep)) | 702 | if (!page_swapcount(*pagep)) |
692 | error = -ENOENT; | 703 | error = -ENOENT; |
@@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
730 | 741 | ||
731 | /* | 742 | /* |
732 | * There's a faint possibility that swap page was replaced before | 743 | * There's a faint possibility that swap page was replaced before |
733 | * caller locked it: it will come back later with the right page. | 744 | * caller locked it: caller will come back later with the right page. |
734 | */ | 745 | */ |
735 | if (unlikely(!PageSwapCache(page))) | 746 | if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) |
736 | goto out; | 747 | goto out; |
737 | 748 | ||
738 | /* | 749 | /* |
@@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
995 | newpage = shmem_alloc_page(gfp, info, index); | 1006 | newpage = shmem_alloc_page(gfp, info, index); |
996 | if (!newpage) | 1007 | if (!newpage) |
997 | return -ENOMEM; | 1008 | return -ENOMEM; |
998 | VM_BUG_ON(shmem_should_replace_page(newpage, gfp)); | ||
999 | 1009 | ||
1000 | *pagep = newpage; | ||
1001 | page_cache_get(newpage); | 1010 | page_cache_get(newpage); |
1002 | copy_highpage(newpage, oldpage); | 1011 | copy_highpage(newpage, oldpage); |
1012 | flush_dcache_page(newpage); | ||
1003 | 1013 | ||
1004 | VM_BUG_ON(!PageLocked(oldpage)); | ||
1005 | __set_page_locked(newpage); | 1014 | __set_page_locked(newpage); |
1006 | VM_BUG_ON(!PageUptodate(oldpage)); | ||
1007 | SetPageUptodate(newpage); | 1015 | SetPageUptodate(newpage); |
1008 | VM_BUG_ON(!PageSwapBacked(oldpage)); | ||
1009 | SetPageSwapBacked(newpage); | 1016 | SetPageSwapBacked(newpage); |
1010 | VM_BUG_ON(!swap_index); | ||
1011 | set_page_private(newpage, swap_index); | 1017 | set_page_private(newpage, swap_index); |
1012 | VM_BUG_ON(!PageSwapCache(oldpage)); | ||
1013 | SetPageSwapCache(newpage); | 1018 | SetPageSwapCache(newpage); |
1014 | 1019 | ||
1015 | /* | 1020 | /* |
@@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
1019 | spin_lock_irq(&swap_mapping->tree_lock); | 1024 | spin_lock_irq(&swap_mapping->tree_lock); |
1020 | error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, | 1025 | error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, |
1021 | newpage); | 1026 | newpage); |
1022 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | 1027 | if (!error) { |
1023 | __dec_zone_page_state(oldpage, NR_FILE_PAGES); | 1028 | __inc_zone_page_state(newpage, NR_FILE_PAGES); |
1029 | __dec_zone_page_state(oldpage, NR_FILE_PAGES); | ||
1030 | } | ||
1024 | spin_unlock_irq(&swap_mapping->tree_lock); | 1031 | spin_unlock_irq(&swap_mapping->tree_lock); |
1025 | BUG_ON(error); | ||
1026 | 1032 | ||
1027 | mem_cgroup_replace_page_cache(oldpage, newpage); | 1033 | if (unlikely(error)) { |
1028 | lru_cache_add_anon(newpage); | 1034 | /* |
1035 | * Is this possible? I think not, now that our callers check | ||
1036 | * both PageSwapCache and page_private after getting page lock; | ||
1037 | * but be defensive. Reverse old to newpage for clear and free. | ||
1038 | */ | ||
1039 | oldpage = newpage; | ||
1040 | } else { | ||
1041 | mem_cgroup_replace_page_cache(oldpage, newpage); | ||
1042 | lru_cache_add_anon(newpage); | ||
1043 | *pagep = newpage; | ||
1044 | } | ||
1029 | 1045 | ||
1030 | ClearPageSwapCache(oldpage); | 1046 | ClearPageSwapCache(oldpage); |
1031 | set_page_private(oldpage, 0); | 1047 | set_page_private(oldpage, 0); |
@@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
1033 | unlock_page(oldpage); | 1049 | unlock_page(oldpage); |
1034 | page_cache_release(oldpage); | 1050 | page_cache_release(oldpage); |
1035 | page_cache_release(oldpage); | 1051 | page_cache_release(oldpage); |
1036 | return 0; | 1052 | return error; |
1037 | } | 1053 | } |
1038 | 1054 | ||
1039 | /* | 1055 | /* |
@@ -1107,7 +1123,8 @@ repeat: | |||
1107 | 1123 | ||
1108 | /* We have to do this with page locked to prevent races */ | 1124 | /* We have to do this with page locked to prevent races */ |
1109 | lock_page(page); | 1125 | lock_page(page); |
1110 | if (!PageSwapCache(page) || page->mapping) { | 1126 | if (!PageSwapCache(page) || page_private(page) != swap.val || |
1127 | page->mapping) { | ||
1111 | error = -EEXIST; /* try again */ | 1128 | error = -EEXIST; /* try again */ |
1112 | goto failed; | 1129 | goto failed; |
1113 | } | 1130 | } |