aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/hexagon/Kconfig1
-rw-r--r--arch/microblaze/Kconfig2
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/score/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h48
-rw-r--r--arch/x86/include/asm/pgtable-3level.h3
-rw-r--r--arch/x86/include/asm/pgtable.h30
-rw-r--r--arch/x86/include/asm/pgtable_types.h17
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--drivers/block/aoe/aoecmd.c17
-rw-r--r--drivers/rtc/rtc-stmp3xxx.c35
-rw-r--r--fs/hugetlbfs/inode.c10
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c53
-rw-r--r--fs/ocfs2/refcounttree.h6
-rw-r--r--fs/proc/task_mmu.c31
-rw-r--r--include/asm-generic/pgtable.h30
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/swapops.h2
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--kernel/fork.c6
-rw-r--r--mm/fremap.c11
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/memory.c13
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/swapfile.c19
32 files changed, 282 insertions, 105 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 8d2ae24b9f4a..1feb169274fe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -407,6 +407,12 @@ config CLONE_BACKWARDS2
407 help 407 help
408 Architecture has the first two arguments of clone(2) swapped. 408 Architecture has the first two arguments of clone(2) swapped.
409 409
410config CLONE_BACKWARDS3
411 bool
412 help
413 Architecture has tls passed as the 3rd argument of clone(2),
414 not the 5th one.
415
410config ODD_RT_SIGACTION 416config ODD_RT_SIGACTION
411 bool 417 bool
412 help 418 help
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 33a97929d055..77d442ab28c8 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -158,6 +158,7 @@ source "kernel/Kconfig.hz"
158endmenu 158endmenu
159 159
160source "init/Kconfig" 160source "init/Kconfig"
161source "kernel/Kconfig.freezer"
161source "drivers/Kconfig" 162source "drivers/Kconfig"
162source "fs/Kconfig" 163source "fs/Kconfig"
163 164
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index d22a4ecffff4..4fab52294d98 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,7 @@ config MICROBLAZE
28 select GENERIC_CLOCKEVENTS 28 select GENERIC_CLOCKEVENTS
29 select GENERIC_IDLE_POLL_SETUP 29 select GENERIC_IDLE_POLL_SETUP
30 select MODULES_USE_ELF_RELA 30 select MODULES_USE_ELF_RELA
31 select CLONE_BACKWARDS 31 select CLONE_BACKWARDS3
32 32
33config SWAP 33config SWAP
34 def_bool n 34 def_bool n
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 99dbab1c59ac..d60bf98fa5cf 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -55,6 +55,7 @@ config GENERIC_CSUM
55 55
56source "init/Kconfig" 56source "init/Kconfig"
57 57
58source "kernel/Kconfig.freezer"
58 59
59menu "Processor type and features" 60menu "Processor type and features"
60 61
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index c8def8bc9020..5fc237581caf 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -87,6 +87,8 @@ config STACKTRACE_SUPPORT
87 87
88source "init/Kconfig" 88source "init/Kconfig"
89 89
90source "kernel/Kconfig.freezer"
91
90config MMU 92config MMU
91 def_bool y 93 def_bool y
92 94
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index f2b489cf1602..3bf2dd0cf61f 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
55#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 55#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
56#endif 56#endif
57 57
58#ifdef CONFIG_MEM_SOFT_DIRTY
59
60/*
61 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
62 * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
63 * into this range.
64 */
65#define PTE_FILE_MAX_BITS 28
66#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
67#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
68#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
69#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
70#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
71#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
72#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
73
74#define pte_to_pgoff(pte) \
75 ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
76 & ((1U << PTE_FILE_BITS1) - 1))) \
77 + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
78 & ((1U << PTE_FILE_BITS2) - 1)) \
79 << (PTE_FILE_BITS1)) \
80 + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
81 & ((1U << PTE_FILE_BITS3) - 1)) \
82 << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
83 + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
84 << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
85
86#define pgoff_to_pte(off) \
87 ((pte_t) { .pte_low = \
88 ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
89 + ((((off) >> PTE_FILE_BITS1) \
90 & ((1U << PTE_FILE_BITS2) - 1)) \
91 << PTE_FILE_SHIFT2) \
92 + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
93 & ((1U << PTE_FILE_BITS3) - 1)) \
94 << PTE_FILE_SHIFT3) \
95 + ((((off) >> \
96 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
97 << PTE_FILE_SHIFT4) \
98 + _PAGE_FILE })
99
100#else /* CONFIG_MEM_SOFT_DIRTY */
101
58/* 102/*
59 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, 103 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
60 * split up the 29 bits of offset into this range: 104 * split up the 29 bits of offset into this range.
61 */ 105 */
62#define PTE_FILE_MAX_BITS 29 106#define PTE_FILE_MAX_BITS 29
63#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) 107#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
88 << PTE_FILE_SHIFT3) \ 132 << PTE_FILE_SHIFT3) \
89 + _PAGE_FILE }) 133 + _PAGE_FILE })
90 134
135#endif /* CONFIG_MEM_SOFT_DIRTY */
136
91/* Encode and de-code a swap entry */ 137/* Encode and de-code a swap entry */
92#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 138#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
93#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 139#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 4cc9f2b7cdc3..81bb91b49a88 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
179/* 179/*
180 * Bits 0, 6 and 7 are taken in the low part of the pte, 180 * Bits 0, 6 and 7 are taken in the low part of the pte,
181 * put the 32 bits of offset into the high part. 181 * put the 32 bits of offset into the high part.
182 *
183 * For soft-dirty tracking 11 bit is taken from
184 * the low part of pte as well.
182 */ 185 */
183#define pte_to_pgoff(pte) ((pte).pte_high) 186#define pte_to_pgoff(pte) ((pte).pte_high)
184#define pgoff_to_pte(off) \ 187#define pgoff_to_pte(off) \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7dc305a46058..1c00631164c2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -314,6 +314,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
314 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); 314 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
315} 315}
316 316
317static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
318{
319 return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
320}
321
322static inline int pte_swp_soft_dirty(pte_t pte)
323{
324 return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
325}
326
327static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
328{
329 return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
330}
331
332static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
333{
334 return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
335}
336
337static inline pte_t pte_file_mksoft_dirty(pte_t pte)
338{
339 return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
340}
341
342static inline int pte_file_soft_dirty(pte_t pte)
343{
344 return pte_flags(pte) & _PAGE_SOFT_DIRTY;
345}
346
317/* 347/*
318 * Mask out unsupported bits in a present pgprot. Non-present pgprots 348 * Mask out unsupported bits in a present pgprot. Non-present pgprots
319 * can use those bits for other purposes, so leave them be. 349 * can use those bits for other purposes, so leave them be.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index c98ac63aae48..f4843e031131 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -61,12 +61,27 @@
61 * they do not conflict with each other. 61 * they do not conflict with each other.
62 */ 62 */
63 63
64#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
65
64#ifdef CONFIG_MEM_SOFT_DIRTY 66#ifdef CONFIG_MEM_SOFT_DIRTY
65#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) 67#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
66#else 68#else
67#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) 69#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
68#endif 70#endif
69 71
72/*
73 * Tracking soft dirty bit when a page goes to a swap is tricky.
74 * We need a bit which can be stored in pte _and_ not conflict
75 * with swap entry format. On x86 bits 6 and 7 are *not* involved
76 * into swap entry computation, but bit 6 is used for nonlinear
77 * file mapping, so we borrow bit 7 for soft dirty tracking.
78 */
79#ifdef CONFIG_MEM_SOFT_DIRTY
80#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
81#else
82#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
83#endif
84
70#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 85#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
71#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) 86#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
72#else 87#else
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5aedb81..48f8375e4c6b 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
101 *begin = new_begin; 101 *begin = new_begin;
102 } 102 }
103 } else { 103 } else {
104 *begin = TASK_UNMAPPED_BASE; 104 *begin = mmap_legacy_base();
105 *end = TASK_SIZE; 105 *end = TASK_SIZE;
106 } 106 }
107} 107}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 62c29a5bfe26..f63778cb2363 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -98,7 +98,7 @@ static unsigned long mmap_base(void)
98 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 98 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
99 * does, but not when emulating X86_32 99 * does, but not when emulating X86_32
100 */ 100 */
101static unsigned long mmap_legacy_base(void) 101unsigned long mmap_legacy_base(void)
102{ 102{
103 if (mmap_is_ia32()) 103 if (mmap_is_ia32())
104 return TASK_UNMAPPED_BASE; 104 return TASK_UNMAPPED_BASE;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 99cb944a002d..4d45dba7fb8f 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -906,16 +906,10 @@ bio_pageinc(struct bio *bio)
906 int i; 906 int i;
907 907
908 bio_for_each_segment(bv, bio, i) { 908 bio_for_each_segment(bv, bio, i) {
909 page = bv->bv_page;
910 /* Non-zero page count for non-head members of 909 /* Non-zero page count for non-head members of
911 * compound pages is no longer allowed by the kernel, 910 * compound pages is no longer allowed by the kernel.
912 * but this has never been seen here.
913 */ 911 */
914 if (unlikely(PageCompound(page))) 912 page = compound_trans_head(bv->bv_page);
915 if (compound_trans_head(page) != page) {
916 pr_crit("page tail used for block I/O\n");
917 BUG();
918 }
919 atomic_inc(&page->_count); 913 atomic_inc(&page->_count);
920 } 914 }
921} 915}
@@ -924,10 +918,13 @@ static void
924bio_pagedec(struct bio *bio) 918bio_pagedec(struct bio *bio)
925{ 919{
926 struct bio_vec *bv; 920 struct bio_vec *bv;
921 struct page *page;
927 int i; 922 int i;
928 923
929 bio_for_each_segment(bv, bio, i) 924 bio_for_each_segment(bv, bio, i) {
930 atomic_dec(&bv->bv_page->_count); 925 page = compound_trans_head(bv->bv_page);
926 atomic_dec(&page->_count);
927 }
931} 928}
932 929
933static void 930static void
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 767fee2ab340..26019531db15 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -23,6 +23,7 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/platform_device.h> 24#include <linux/platform_device.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/delay.h>
26#include <linux/rtc.h> 27#include <linux/rtc.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <linux/of_device.h> 29#include <linux/of_device.h>
@@ -119,24 +120,39 @@ static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
119} 120}
120#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ 121#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
121 122
122static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) 123static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
123{ 124{
125 int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */
124 /* 126 /*
125 * The datasheet doesn't say which way round the 127 * The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010
126 * NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0, 128 * states:
127 * 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS 129 * | The order in which registers are updated is
130 * | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds.
131 * | (This list is in bitfield order, from LSB to MSB, as they would
132 * | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT
133 * | register. For example, the Seconds register corresponds to
134 * | STALE_REGS or NEW_REGS containing 0x80.)
128 */ 135 */
129 while (readl(rtc_data->io + STMP3XXX_RTC_STAT) & 136 do {
130 (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) 137 if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) &
131 cpu_relax(); 138 (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)))
139 return 0;
140 udelay(1);
141 } while (--timeout > 0);
142 return (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
143 (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0;
132} 144}
133 145
134/* Time read/write */ 146/* Time read/write */
135static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) 147static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
136{ 148{
149 int ret;
137 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 150 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
138 151
139 stmp3xxx_wait_time(rtc_data); 152 ret = stmp3xxx_wait_time(rtc_data);
153 if (ret)
154 return ret;
155
140 rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm); 156 rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm);
141 return 0; 157 return 0;
142} 158}
@@ -146,8 +162,7 @@ static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t)
146 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 162 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
147 163
148 writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS); 164 writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS);
149 stmp3xxx_wait_time(rtc_data); 165 return stmp3xxx_wait_time(rtc_data);
150 return 0;
151} 166}
152 167
153/* interrupt(s) handler */ 168/* interrupt(s) handler */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868ae3fd4..34423978b170 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
463 return inode; 463 return inode;
464} 464}
465 465
466/*
467 * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
468 * be taken from reclaim -- unlike regular filesystems. This needs an
469 * annotation because huge_pmd_share() does an allocation under
470 * i_mmap_mutex.
471 */
472struct lock_class_key hugetlbfs_i_mmap_mutex_key;
473
466static struct inode *hugetlbfs_get_inode(struct super_block *sb, 474static struct inode *hugetlbfs_get_inode(struct super_block *sb,
467 struct inode *dir, 475 struct inode *dir,
468 umode_t mode, dev_t dev) 476 umode_t mode, dev_t dev)
@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
474 struct hugetlbfs_inode_info *info; 482 struct hugetlbfs_inode_info *info;
475 inode->i_ino = get_next_ino(); 483 inode->i_ino = get_next_ino();
476 inode_init_owner(inode, dir, mode); 484 inode_init_owner(inode, dir, mode);
485 lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
486 &hugetlbfs_i_mmap_mutex_key);
477 inode->i_mapping->a_ops = &hugetlbfs_aops; 487 inode->i_mapping->a_ops = &hugetlbfs_aops;
478 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 488 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
479 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 489 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 79736a28d84f..2abf97b2a592 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1757,7 +1757,7 @@ try_again:
1757 goto out; 1757 goto out;
1758 } else if (ret == 1) { 1758 } else if (ret == 1) {
1759 clusters_need = wc->w_clen; 1759 clusters_need = wc->w_clen;
1760 ret = ocfs2_refcount_cow(inode, filp, di_bh, 1760 ret = ocfs2_refcount_cow(inode, di_bh,
1761 wc->w_cpos, wc->w_clen, UINT_MAX); 1761 wc->w_cpos, wc->w_clen, UINT_MAX);
1762 if (ret) { 1762 if (ret) {
1763 mlog_errno(ret); 1763 mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index eb760d8acd50..30544ce8e9f7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode)
2153{ 2153{
2154 int ret; 2154 int ret;
2155 struct ocfs2_empty_dir_priv priv = { 2155 struct ocfs2_empty_dir_priv priv = {
2156 .ctx.actor = ocfs2_empty_dir_filldir 2156 .ctx.actor = ocfs2_empty_dir_filldir,
2157 }; 2157 };
2158 2158
2159 memset(&priv, 0, sizeof(priv));
2160
2161 if (ocfs2_dir_indexed(inode)) { 2159 if (ocfs2_dir_indexed(inode)) {
2162 ret = ocfs2_empty_dir_dx(inode, &priv); 2160 ret = ocfs2_empty_dir_dx(inode, &priv);
2163 if (ret) 2161 if (ret)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41000f223ca4..3261d71319ee 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
371 goto out; 371 goto out;
372 372
373 return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); 373 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
374 374
375out: 375out:
376 return status; 376 return status;
@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
899 zero_clusters = last_cpos - zero_cpos; 899 zero_clusters = last_cpos - zero_cpos;
900 900
901 if (needs_cow) { 901 if (needs_cow) {
902 rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, 902 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
903 zero_clusters, UINT_MAX); 903 zero_clusters, UINT_MAX);
904 if (rc) { 904 if (rc) {
905 mlog_errno(rc); 905 mlog_errno(rc);
@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
2078 2078
2079 *meta_level = 1; 2079 *meta_level = 1;
2080 2080
2081 ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); 2081 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
2082 if (ret) 2082 if (ret)
2083 mlog_errno(ret); 2083 mlog_errno(ret);
2084out: 2084out:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 96f9ac237e86..0a992737dcaf 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
537 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); 537 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
538 538
539 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + 539 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
540 ocfs2_quota_trans_credits(sb) + bits_wanted; 540 ocfs2_quota_trans_credits(sb);
541} 541}
542 542
543static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 543static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index f1fc172175b6..452068b45749 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle,
69 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); 69 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
70 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); 70 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
71 71
72 ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, 72 ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
73 p_cpos, new_p_cpos, len); 73 p_cpos, new_p_cpos, len);
74 if (ret) { 74 if (ret) {
75 mlog_errno(ret); 75 mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9f6b96a09615..a70d604593b6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,7 +49,6 @@
49 49
50struct ocfs2_cow_context { 50struct ocfs2_cow_context {
51 struct inode *inode; 51 struct inode *inode;
52 struct file *file;
53 u32 cow_start; 52 u32 cow_start;
54 u32 cow_len; 53 u32 cow_len;
55 struct ocfs2_extent_tree data_et; 54 struct ocfs2_extent_tree data_et;
@@ -66,7 +65,7 @@ struct ocfs2_cow_context {
66 u32 *num_clusters, 65 u32 *num_clusters,
67 unsigned int *extent_flags); 66 unsigned int *extent_flags);
68 int (*cow_duplicate_clusters)(handle_t *handle, 67 int (*cow_duplicate_clusters)(handle_t *handle,
69 struct file *file, 68 struct inode *inode,
70 u32 cpos, u32 old_cluster, 69 u32 cpos, u32 old_cluster,
71 u32 new_cluster, u32 new_len); 70 u32 new_cluster, u32 new_len);
72}; 71};
@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2922} 2921}
2923 2922
2924int ocfs2_duplicate_clusters_by_page(handle_t *handle, 2923int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2925 struct file *file, 2924 struct inode *inode,
2926 u32 cpos, u32 old_cluster, 2925 u32 cpos, u32 old_cluster,
2927 u32 new_cluster, u32 new_len) 2926 u32 new_cluster, u32 new_len)
2928{ 2927{
2929 int ret = 0, partial; 2928 int ret = 0, partial;
2930 struct inode *inode = file_inode(file); 2929 struct super_block *sb = inode->i_sb;
2931 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
2932 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2930 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2934 struct page *page; 2931 struct page *page;
2935 pgoff_t page_index; 2932 pgoff_t page_index;
@@ -2978,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2978 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2975 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2979 BUG_ON(PageDirty(page)); 2976 BUG_ON(PageDirty(page));
2980 2977
2981 if (PageReadahead(page)) {
2982 page_cache_async_readahead(mapping,
2983 &file->f_ra, file,
2984 page, page_index,
2985 readahead_pages);
2986 }
2987
2988 if (!PageUptodate(page)) { 2978 if (!PageUptodate(page)) {
2989 ret = block_read_full_page(page, ocfs2_get_block); 2979 ret = block_read_full_page(page, ocfs2_get_block);
2990 if (ret) { 2980 if (ret) {
@@ -3004,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
3004 } 2994 }
3005 } 2995 }
3006 2996
3007 ocfs2_map_and_dirty_page(inode, handle, from, to, 2997 ocfs2_map_and_dirty_page(inode,
2998 handle, from, to,
3008 page, 0, &new_block); 2999 page, 0, &new_block);
3009 mark_page_accessed(page); 3000 mark_page_accessed(page);
3010unlock: 3001unlock:
@@ -3020,12 +3011,11 @@ unlock:
3020} 3011}
3021 3012
3022int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 3013int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3023 struct file *file, 3014 struct inode *inode,
3024 u32 cpos, u32 old_cluster, 3015 u32 cpos, u32 old_cluster,
3025 u32 new_cluster, u32 new_len) 3016 u32 new_cluster, u32 new_len)
3026{ 3017{
3027 int ret = 0; 3018 int ret = 0;
3028 struct inode *inode = file_inode(file);
3029 struct super_block *sb = inode->i_sb; 3019 struct super_block *sb = inode->i_sb;
3030 struct ocfs2_caching_info *ci = INODE_CACHE(inode); 3020 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
3031 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); 3021 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -3150,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
3150 3140
3151 /*If the old clusters is unwritten, no need to duplicate. */ 3141 /*If the old clusters is unwritten, no need to duplicate. */
3152 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3142 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
3153 ret = context->cow_duplicate_clusters(handle, context->file, 3143 ret = context->cow_duplicate_clusters(handle, context->inode,
3154 cpos, old, new, len); 3144 cpos, old, new, len);
3155 if (ret) { 3145 if (ret) {
3156 mlog_errno(ret); 3146 mlog_errno(ret);
@@ -3428,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
3428 return ret; 3418 return ret;
3429} 3419}
3430 3420
3431static void ocfs2_readahead_for_cow(struct inode *inode,
3432 struct file *file,
3433 u32 start, u32 len)
3434{
3435 struct address_space *mapping;
3436 pgoff_t index;
3437 unsigned long num_pages;
3438 int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
3439
3440 if (!file)
3441 return;
3442
3443 mapping = file->f_mapping;
3444 num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
3445 if (!num_pages)
3446 num_pages = 1;
3447
3448 index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
3449 page_cache_sync_readahead(mapping, &file->f_ra, file,
3450 index, num_pages);
3451}
3452
3453/* 3421/*
3454 * Starting at cpos, try to CoW write_len clusters. Don't CoW 3422 * Starting at cpos, try to CoW write_len clusters. Don't CoW
3455 * past max_cpos. This will stop when it runs into a hole or an 3423 * past max_cpos. This will stop when it runs into a hole or an
3456 * unrefcounted extent. 3424 * unrefcounted extent.
3457 */ 3425 */
3458static int ocfs2_refcount_cow_hunk(struct inode *inode, 3426static int ocfs2_refcount_cow_hunk(struct inode *inode,
3459 struct file *file,
3460 struct buffer_head *di_bh, 3427 struct buffer_head *di_bh,
3461 u32 cpos, u32 write_len, u32 max_cpos) 3428 u32 cpos, u32 write_len, u32 max_cpos)
3462{ 3429{
@@ -3485,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3485 3452
3486 BUG_ON(cow_len == 0); 3453 BUG_ON(cow_len == 0);
3487 3454
3488 ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
3489
3490 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3455 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3491 if (!context) { 3456 if (!context) {
3492 ret = -ENOMEM; 3457 ret = -ENOMEM;
@@ -3508,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3508 context->ref_root_bh = ref_root_bh; 3473 context->ref_root_bh = ref_root_bh;
3509 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; 3474 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
3510 context->get_clusters = ocfs2_di_get_clusters; 3475 context->get_clusters = ocfs2_di_get_clusters;
3511 context->file = file;
3512 3476
3513 ocfs2_init_dinode_extent_tree(&context->data_et, 3477 ocfs2_init_dinode_extent_tree(&context->data_et,
3514 INODE_CACHE(inode), di_bh); 3478 INODE_CACHE(inode), di_bh);
@@ -3537,7 +3501,6 @@ out:
3537 * clusters between cpos and cpos+write_len are safe to modify. 3501 * clusters between cpos and cpos+write_len are safe to modify.
3538 */ 3502 */
3539int ocfs2_refcount_cow(struct inode *inode, 3503int ocfs2_refcount_cow(struct inode *inode,
3540 struct file *file,
3541 struct buffer_head *di_bh, 3504 struct buffer_head *di_bh,
3542 u32 cpos, u32 write_len, u32 max_cpos) 3505 u32 cpos, u32 write_len, u32 max_cpos)
3543{ 3506{
@@ -3557,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode,
3557 num_clusters = write_len; 3520 num_clusters = write_len;
3558 3521
3559 if (ext_flags & OCFS2_EXT_REFCOUNTED) { 3522 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3560 ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, 3523 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
3561 num_clusters, max_cpos); 3524 num_clusters, max_cpos);
3562 if (ret) { 3525 if (ret) {
3563 mlog_errno(ret); 3526 mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7754608c83a4..6422bbcdb525 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
53 int *credits, 53 int *credits,
54 int *ref_blocks); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, 55int ocfs2_refcount_cow(struct inode *inode,
56 struct file *filep, struct buffer_head *di_bh, 56 struct buffer_head *di_bh,
57 u32 cpos, u32 write_len, u32 max_cpos); 57 u32 cpos, u32 write_len, u32 max_cpos);
58 58
59typedef int (ocfs2_post_refcount_func)(struct inode *inode, 59typedef int (ocfs2_post_refcount_func)(struct inode *inode,
@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
85 u32 cpos, u32 write_len, 85 u32 cpos, u32 write_len,
86 struct ocfs2_post_refcount *post); 86 struct ocfs2_post_refcount *post);
87int ocfs2_duplicate_clusters_by_page(handle_t *handle, 87int ocfs2_duplicate_clusters_by_page(handle_t *handle,
88 struct file *file, 88 struct inode *inode,
89 u32 cpos, u32 old_cluster, 89 u32 cpos, u32 old_cluster,
90 u32 new_cluster, u32 new_len); 90 u32 new_cluster, u32 new_len);
91int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 91int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
92 struct file *file, 92 struct inode *inode,
93 u32 cpos, u32 old_cluster, 93 u32 cpos, u32 old_cluster,
94 u32 new_cluster, u32 new_len); 94 u32 new_cluster, u32 new_len);
95int ocfs2_cow_sync_writeback(struct super_block *sb, 95int ocfs2_cow_sync_writeback(struct super_block *sb,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6174f0..107d026f5d6e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
730 * of how soft-dirty works. 730 * of how soft-dirty works.
731 */ 731 */
732 pte_t ptent = *pte; 732 pte_t ptent = *pte;
733 ptent = pte_wrprotect(ptent); 733
734 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 734 if (pte_present(ptent)) {
735 ptent = pte_wrprotect(ptent);
736 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
737 } else if (is_swap_pte(ptent)) {
738 ptent = pte_swp_clear_soft_dirty(ptent);
739 } else if (pte_file(ptent)) {
740 ptent = pte_file_clear_soft_dirty(ptent);
741 }
742
735 set_pte_at(vma->vm_mm, addr, pte, ptent); 743 set_pte_at(vma->vm_mm, addr, pte, ptent);
736#endif 744#endif
737} 745}
@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
752 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 760 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
753 for (; addr != end; pte++, addr += PAGE_SIZE) { 761 for (; addr != end; pte++, addr += PAGE_SIZE) {
754 ptent = *pte; 762 ptent = *pte;
755 if (!pte_present(ptent))
756 continue;
757 763
758 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 764 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
759 clear_soft_dirty(vma, addr, pte); 765 clear_soft_dirty(vma, addr, pte);
760 continue; 766 continue;
761 } 767 }
762 768
769 if (!pte_present(ptent))
770 continue;
771
763 page = vm_normal_page(vma, addr, ptent); 772 page = vm_normal_page(vma, addr, ptent);
764 if (!page) 773 if (!page)
765 continue; 774 continue;
@@ -859,7 +868,7 @@ typedef struct {
859} pagemap_entry_t; 868} pagemap_entry_t;
860 869
861struct pagemapread { 870struct pagemapread {
862 int pos, len; 871 int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
863 pagemap_entry_t *buffer; 872 pagemap_entry_t *buffer;
864 bool v2; 873 bool v2;
865}; 874};
@@ -867,7 +876,7 @@ struct pagemapread {
867#define PAGEMAP_WALK_SIZE (PMD_SIZE) 876#define PAGEMAP_WALK_SIZE (PMD_SIZE)
868#define PAGEMAP_WALK_MASK (PMD_MASK) 877#define PAGEMAP_WALK_MASK (PMD_MASK)
869 878
870#define PM_ENTRY_BYTES sizeof(u64) 879#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
871#define PM_STATUS_BITS 3 880#define PM_STATUS_BITS 3
872#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 881#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
873#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) 882#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
930 flags = PM_PRESENT; 939 flags = PM_PRESENT;
931 page = vm_normal_page(vma, addr, pte); 940 page = vm_normal_page(vma, addr, pte);
932 } else if (is_swap_pte(pte)) { 941 } else if (is_swap_pte(pte)) {
933 swp_entry_t entry = pte_to_swp_entry(pte); 942 swp_entry_t entry;
934 943 if (pte_swp_soft_dirty(pte))
944 flags2 |= __PM_SOFT_DIRTY;
945 entry = pte_to_swp_entry(pte);
935 frame = swp_type(entry) | 946 frame = swp_type(entry) |
936 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 947 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
937 flags = PM_SWAP; 948 flags = PM_SWAP;
@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1116 goto out_task; 1127 goto out_task;
1117 1128
1118 pm.v2 = soft_dirty_cleared; 1129 pm.v2 = soft_dirty_cleared;
1119 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1130 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1120 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 1131 pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
1121 ret = -ENOMEM; 1132 ret = -ENOMEM;
1122 if (!pm.buffer) 1133 if (!pm.buffer)
1123 goto out_task; 1134 goto out_task;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 2f47ade1b567..0807ddf97b05 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -417,6 +417,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
417{ 417{
418 return pmd; 418 return pmd;
419} 419}
420
421static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
422{
423 return pte;
424}
425
426static inline int pte_swp_soft_dirty(pte_t pte)
427{
428 return 0;
429}
430
431static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
432{
433 return pte;
434}
435
436static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
437{
438 return pte;
439}
440
441static inline pte_t pte_file_mksoft_dirty(pte_t pte)
442{
443 return pte;
444}
445
446static inline int pte_file_soft_dirty(pte_t pte)
447{
448 return 0;
449}
420#endif 450#endif
421 451
422#ifndef __HAVE_PFNMAP_TRACKING 452#ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 078066daffd4..e9995eb5985c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -314,6 +314,7 @@ struct nsproxy;
314struct user_namespace; 314struct user_namespace;
315 315
316#ifdef CONFIG_MMU 316#ifdef CONFIG_MMU
317extern unsigned long mmap_legacy_base(void);
317extern void arch_pick_mmap_layout(struct mm_struct *mm); 318extern void arch_pick_mmap_layout(struct mm_struct *mm);
318extern unsigned long 319extern unsigned long
319arch_get_unmapped_area(struct file *, unsigned long, unsigned long, 320arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index c5fd30d2a415..8d4fa82bfb91 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
67 swp_entry_t arch_entry; 67 swp_entry_t arch_entry;
68 68
69 BUG_ON(pte_file(pte)); 69 BUG_ON(pte_file(pte));
70 if (pte_swp_soft_dirty(pte))
71 pte = pte_swp_clear_soft_dirty(pte);
70 arch_entry = __pte_to_swp_entry(pte); 72 arch_entry = __pte_to_swp_entry(pte);
71 return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); 73 return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
72} 74}
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4147d700a293..84662ecc7b51 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void);
802asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, 802asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
803 int __user *); 803 int __user *);
804#else 804#else
805#ifdef CONFIG_CLONE_BACKWARDS3
806asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
807 int __user *, int);
808#else
805asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, 809asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
806 int __user *, int); 810 int __user *, int);
807#endif 811#endif
812#endif
808 813
809asmlinkage long sys_execve(const char __user *filename, 814asmlinkage long sys_execve(const char __user *filename,
810 const char __user *const __user *argv, 815 const char __user *const __user *argv,
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb8a968..e23bb19e2a3e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1679 int __user *, parent_tidptr, 1679 int __user *, parent_tidptr,
1680 int __user *, child_tidptr, 1680 int __user *, child_tidptr,
1681 int, tls_val) 1681 int, tls_val)
1682#elif defined(CONFIG_CLONE_BACKWARDS3)
1683SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
1684 int, stack_size,
1685 int __user *, parent_tidptr,
1686 int __user *, child_tidptr,
1687 int, tls_val)
1682#else 1688#else
1683SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 1689SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1684 int __user *, parent_tidptr, 1690 int __user *, parent_tidptr,
diff --git a/mm/fremap.c b/mm/fremap.c
index 87da3590c61e..5bff08147768 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -57,17 +57,22 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
57 unsigned long addr, unsigned long pgoff, pgprot_t prot) 57 unsigned long addr, unsigned long pgoff, pgprot_t prot)
58{ 58{
59 int err = -ENOMEM; 59 int err = -ENOMEM;
60 pte_t *pte; 60 pte_t *pte, ptfile;
61 spinlock_t *ptl; 61 spinlock_t *ptl;
62 62
63 pte = get_locked_pte(mm, addr, &ptl); 63 pte = get_locked_pte(mm, addr, &ptl);
64 if (!pte) 64 if (!pte)
65 goto out; 65 goto out;
66 66
67 if (!pte_none(*pte)) 67 ptfile = pgoff_to_pte(pgoff);
68
69 if (!pte_none(*pte)) {
70 if (pte_present(*pte) && pte_soft_dirty(*pte))
71 pte_file_mksoft_dirty(ptfile);
68 zap_pte(mm, vma, addr, pte); 72 zap_pte(mm, vma, addr, pte);
73 }
69 74
70 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); 75 set_pte_at(mm, addr, pte, ptfile);
71 /* 76 /*
72 * We don't need to run update_mmu_cache() here because the "file pte" 77 * We don't need to run update_mmu_cache() here because the "file pte"
73 * being installed by install_file_pte() is not a real pte - it's a 78 * being installed by install_file_pte() is not a real pte - it's a
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c290a1cf3862..c5792a5d87ce 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3195,11 +3195,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
3195 if (!s->memcg_params) 3195 if (!s->memcg_params)
3196 return -ENOMEM; 3196 return -ENOMEM;
3197 3197
3198 INIT_WORK(&s->memcg_params->destroy,
3199 kmem_cache_destroy_work_func);
3200 if (memcg) { 3198 if (memcg) {
3201 s->memcg_params->memcg = memcg; 3199 s->memcg_params->memcg = memcg;
3202 s->memcg_params->root_cache = root_cache; 3200 s->memcg_params->root_cache = root_cache;
3201 INIT_WORK(&s->memcg_params->destroy,
3202 kmem_cache_destroy_work_func);
3203 } else 3203 } else
3204 s->memcg_params->is_root_cache = true; 3204 s->memcg_params->is_root_cache = true;
3205 3205
diff --git a/mm/memory.c b/mm/memory.c
index 1ce2e2a734fc..40268410732a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1141,9 +1141,12 @@ again:
1141 continue; 1141 continue;
1142 if (unlikely(details) && details->nonlinear_vma 1142 if (unlikely(details) && details->nonlinear_vma
1143 && linear_page_index(details->nonlinear_vma, 1143 && linear_page_index(details->nonlinear_vma,
1144 addr) != page->index) 1144 addr) != page->index) {
1145 set_pte_at(mm, addr, pte, 1145 pte_t ptfile = pgoff_to_pte(page->index);
1146 pgoff_to_pte(page->index)); 1146 if (pte_soft_dirty(ptent))
1147 pte_file_mksoft_dirty(ptfile);
1148 set_pte_at(mm, addr, pte, ptfile);
1149 }
1147 if (PageAnon(page)) 1150 if (PageAnon(page))
1148 rss[MM_ANONPAGES]--; 1151 rss[MM_ANONPAGES]--;
1149 else { 1152 else {
@@ -3115,6 +3118,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
3115 exclusive = 1; 3118 exclusive = 1;
3116 } 3119 }
3117 flush_icache_page(vma, page); 3120 flush_icache_page(vma, page);
3121 if (pte_swp_soft_dirty(orig_pte))
3122 pte = pte_mksoft_dirty(pte);
3118 set_pte_at(mm, address, page_table, pte); 3123 set_pte_at(mm, address, page_table, pte);
3119 if (page == swapcache) 3124 if (page == swapcache)
3120 do_page_add_anon_rmap(page, vma, address, exclusive); 3125 do_page_add_anon_rmap(page, vma, address, exclusive);
@@ -3408,6 +3413,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3408 entry = mk_pte(page, vma->vm_page_prot); 3413 entry = mk_pte(page, vma->vm_page_prot);
3409 if (flags & FAULT_FLAG_WRITE) 3414 if (flags & FAULT_FLAG_WRITE)
3410 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 3415 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
3416 else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte))
3417 pte_mksoft_dirty(entry);
3411 if (anon) { 3418 if (anon) {
3412 inc_mm_counter_fast(mm, MM_ANONPAGES); 3419 inc_mm_counter_fast(mm, MM_ANONPAGES);
3413 page_add_new_anon_rmap(page, vma, address); 3420 page_add_new_anon_rmap(page, vma, address);
diff --git a/mm/rmap.c b/mm/rmap.c
index cd356df4f71a..b2e29acd7e3d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1236 swp_entry_to_pte(make_hwpoison_entry(page))); 1236 swp_entry_to_pte(make_hwpoison_entry(page)));
1237 } else if (PageAnon(page)) { 1237 } else if (PageAnon(page)) {
1238 swp_entry_t entry = { .val = page_private(page) }; 1238 swp_entry_t entry = { .val = page_private(page) };
1239 pte_t swp_pte;
1239 1240
1240 if (PageSwapCache(page)) { 1241 if (PageSwapCache(page)) {
1241 /* 1242 /*
@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1264 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); 1265 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1265 entry = make_migration_entry(page, pte_write(pteval)); 1266 entry = make_migration_entry(page, pte_write(pteval));
1266 } 1267 }
1267 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 1268 swp_pte = swp_entry_to_pte(entry);
1269 if (pte_soft_dirty(pteval))
1270 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1271 set_pte_at(mm, address, pte, swp_pte);
1268 BUG_ON(pte_file(*pte)); 1272 BUG_ON(pte_file(*pte));
1269 } else if (IS_ENABLED(CONFIG_MIGRATION) && 1273 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1270 (TTU_ACTION(flags) == TTU_MIGRATION)) { 1274 (TTU_ACTION(flags) == TTU_MIGRATION)) {
@@ -1401,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1401 pteval = ptep_clear_flush(vma, address, pte); 1405 pteval = ptep_clear_flush(vma, address, pte);
1402 1406
1403 /* If nonlinear, store the file page offset in the pte. */ 1407 /* If nonlinear, store the file page offset in the pte. */
1404 if (page->index != linear_page_index(vma, address)) 1408 if (page->index != linear_page_index(vma, address)) {
1405 set_pte_at(mm, address, pte, pgoff_to_pte(page->index)); 1409 pte_t ptfile = pgoff_to_pte(page->index);
1410 if (pte_soft_dirty(pteval))
1411 pte_file_mksoft_dirty(ptfile);
1412 set_pte_at(mm, address, pte, ptfile);
1413 }
1406 1414
1407 /* Move the dirty bit to the physical page now the pte is gone. */ 1415 /* Move the dirty bit to the physical page now the pte is gone. */
1408 if (pte_dirty(pteval)) 1416 if (pte_dirty(pteval))
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 36af6eeaa67e..6cf2e60983b7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free)
866} 866}
867#endif /* CONFIG_HIBERNATION */ 867#endif /* CONFIG_HIBERNATION */
868 868
869static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
870{
871#ifdef CONFIG_MEM_SOFT_DIRTY
872 /*
873 * When pte keeps soft dirty bit the pte generated
874 * from swap entry does not has it, still it's same
875 * pte from logical point of view.
876 */
877 pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
878 return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
879#else
880 return pte_same(pte, swp_pte);
881#endif
882}
883
869/* 884/*
870 * No need to decide whether this PTE shares the swap entry with others, 885 * No need to decide whether this PTE shares the swap entry with others,
871 * just let do_wp_page work it out if a write is requested later - to 886 * just let do_wp_page work it out if a write is requested later - to
@@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
892 } 907 }
893 908
894 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 909 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
895 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 910 if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
896 mem_cgroup_cancel_charge_swapin(memcg); 911 mem_cgroup_cancel_charge_swapin(memcg);
897 ret = 0; 912 ret = 0;
898 goto out; 913 goto out;
@@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
947 * swapoff spends a _lot_ of time in this loop! 962 * swapoff spends a _lot_ of time in this loop!
948 * Test inline before going to call unuse_pte. 963 * Test inline before going to call unuse_pte.
949 */ 964 */
950 if (unlikely(pte_same(*pte, swp_pte))) { 965 if (unlikely(maybe_same_pte(*pte, swp_pte))) {
951 pte_unmap(pte); 966 pte_unmap(pte);
952 ret = unuse_pte(vma, pmd, addr, entry, page); 967 ret = unuse_pte(vma, pmd, addr, entry, page);
953 if (ret) 968 if (ret)