diff options
32 files changed, 282 insertions, 105 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 8d2ae24b9f4a..1feb169274fe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -407,6 +407,12 @@ config CLONE_BACKWARDS2 | |||
407 | help | 407 | help |
408 | Architecture has the first two arguments of clone(2) swapped. | 408 | Architecture has the first two arguments of clone(2) swapped. |
409 | 409 | ||
410 | config CLONE_BACKWARDS3 | ||
411 | bool | ||
412 | help | ||
413 | Architecture has tls passed as the 3rd argument of clone(2), | ||
414 | not the 5th one. | ||
415 | |||
410 | config ODD_RT_SIGACTION | 416 | config ODD_RT_SIGACTION |
411 | bool | 417 | bool |
412 | help | 418 | help |
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 33a97929d055..77d442ab28c8 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig | |||
@@ -158,6 +158,7 @@ source "kernel/Kconfig.hz" | |||
158 | endmenu | 158 | endmenu |
159 | 159 | ||
160 | source "init/Kconfig" | 160 | source "init/Kconfig" |
161 | source "kernel/Kconfig.freezer" | ||
161 | source "drivers/Kconfig" | 162 | source "drivers/Kconfig" |
162 | source "fs/Kconfig" | 163 | source "fs/Kconfig" |
163 | 164 | ||
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d22a4ecffff4..4fab52294d98 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig | |||
@@ -28,7 +28,7 @@ config MICROBLAZE | |||
28 | select GENERIC_CLOCKEVENTS | 28 | select GENERIC_CLOCKEVENTS |
29 | select GENERIC_IDLE_POLL_SETUP | 29 | select GENERIC_IDLE_POLL_SETUP |
30 | select MODULES_USE_ELF_RELA | 30 | select MODULES_USE_ELF_RELA |
31 | select CLONE_BACKWARDS | 31 | select CLONE_BACKWARDS3 |
32 | 32 | ||
33 | config SWAP | 33 | config SWAP |
34 | def_bool n | 34 | def_bool n |
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 99dbab1c59ac..d60bf98fa5cf 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig | |||
@@ -55,6 +55,7 @@ config GENERIC_CSUM | |||
55 | 55 | ||
56 | source "init/Kconfig" | 56 | source "init/Kconfig" |
57 | 57 | ||
58 | source "kernel/Kconfig.freezer" | ||
58 | 59 | ||
59 | menu "Processor type and features" | 60 | menu "Processor type and features" |
60 | 61 | ||
diff --git a/arch/score/Kconfig b/arch/score/Kconfig index c8def8bc9020..5fc237581caf 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig | |||
@@ -87,6 +87,8 @@ config STACKTRACE_SUPPORT | |||
87 | 87 | ||
88 | source "init/Kconfig" | 88 | source "init/Kconfig" |
89 | 89 | ||
90 | source "kernel/Kconfig.freezer" | ||
91 | |||
90 | config MMU | 92 | config MMU |
91 | def_bool y | 93 | def_bool y |
92 | 94 | ||
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index f2b489cf1602..3bf2dd0cf61f 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
59 | |||
60 | /* | ||
61 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and | ||
62 | * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset | ||
63 | * into this range. | ||
64 | */ | ||
65 | #define PTE_FILE_MAX_BITS 28 | ||
66 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | ||
67 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) | ||
68 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) | ||
69 | #define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1) | ||
70 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | ||
71 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | ||
72 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) | ||
73 | |||
74 | #define pte_to_pgoff(pte) \ | ||
75 | ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ | ||
76 | & ((1U << PTE_FILE_BITS1) - 1))) \ | ||
77 | + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ | ||
78 | & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
79 | << (PTE_FILE_BITS1)) \ | ||
80 | + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ | ||
81 | & ((1U << PTE_FILE_BITS3) - 1)) \ | ||
82 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
83 | + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ | ||
84 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) | ||
85 | |||
86 | #define pgoff_to_pte(off) \ | ||
87 | ((pte_t) { .pte_low = \ | ||
88 | ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | ||
89 | + ((((off) >> PTE_FILE_BITS1) \ | ||
90 | & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
91 | << PTE_FILE_SHIFT2) \ | ||
92 | + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
93 | & ((1U << PTE_FILE_BITS3) - 1)) \ | ||
94 | << PTE_FILE_SHIFT3) \ | ||
95 | + ((((off) >> \ | ||
96 | (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ | ||
97 | << PTE_FILE_SHIFT4) \ | ||
98 | + _PAGE_FILE }) | ||
99 | |||
100 | #else /* CONFIG_MEM_SOFT_DIRTY */ | ||
101 | |||
58 | /* | 102 | /* |
59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, | 103 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
60 | * split up the 29 bits of offset into this range: | 104 | * split up the 29 bits of offset into this range. |
61 | */ | 105 | */ |
62 | #define PTE_FILE_MAX_BITS 29 | 106 | #define PTE_FILE_MAX_BITS 29 |
63 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | 107 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) |
@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
88 | << PTE_FILE_SHIFT3) \ | 132 | << PTE_FILE_SHIFT3) \ |
89 | + _PAGE_FILE }) | 133 | + _PAGE_FILE }) |
90 | 134 | ||
135 | #endif /* CONFIG_MEM_SOFT_DIRTY */ | ||
136 | |||
91 | /* Encode and de-code a swap entry */ | 137 | /* Encode and de-code a swap entry */ |
92 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 138 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
93 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) | 139 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 4cc9f2b7cdc3..81bb91b49a88 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) | |||
179 | /* | 179 | /* |
180 | * Bits 0, 6 and 7 are taken in the low part of the pte, | 180 | * Bits 0, 6 and 7 are taken in the low part of the pte, |
181 | * put the 32 bits of offset into the high part. | 181 | * put the 32 bits of offset into the high part. |
182 | * | ||
183 | * For soft-dirty tracking 11 bit is taken from | ||
184 | * the low part of pte as well. | ||
182 | */ | 185 | */ |
183 | #define pte_to_pgoff(pte) ((pte).pte_high) | 186 | #define pte_to_pgoff(pte) ((pte).pte_high) |
184 | #define pgoff_to_pte(off) \ | 187 | #define pgoff_to_pte(off) \ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7dc305a46058..1c00631164c2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -314,6 +314,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
314 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 314 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
315 | } | 315 | } |
316 | 316 | ||
317 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
318 | { | ||
319 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
320 | } | ||
321 | |||
322 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
323 | { | ||
324 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
325 | } | ||
326 | |||
327 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
328 | { | ||
329 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
330 | } | ||
331 | |||
332 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) | ||
333 | { | ||
334 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | ||
335 | } | ||
336 | |||
337 | static inline pte_t pte_file_mksoft_dirty(pte_t pte) | ||
338 | { | ||
339 | return pte_set_flags(pte, _PAGE_SOFT_DIRTY); | ||
340 | } | ||
341 | |||
342 | static inline int pte_file_soft_dirty(pte_t pte) | ||
343 | { | ||
344 | return pte_flags(pte) & _PAGE_SOFT_DIRTY; | ||
345 | } | ||
346 | |||
317 | /* | 347 | /* |
318 | * Mask out unsupported bits in a present pgprot. Non-present pgprots | 348 | * Mask out unsupported bits in a present pgprot. Non-present pgprots |
319 | * can use those bits for other purposes, so leave them be. | 349 | * can use those bits for other purposes, so leave them be. |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index c98ac63aae48..f4843e031131 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -61,12 +61,27 @@ | |||
61 | * they do not conflict with each other. | 61 | * they do not conflict with each other. |
62 | */ | 62 | */ |
63 | 63 | ||
64 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN | ||
65 | |||
64 | #ifdef CONFIG_MEM_SOFT_DIRTY | 66 | #ifdef CONFIG_MEM_SOFT_DIRTY |
65 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | 67 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) |
66 | #else | 68 | #else |
67 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) | 69 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) |
68 | #endif | 70 | #endif |
69 | 71 | ||
72 | /* | ||
73 | * Tracking soft dirty bit when a page goes to a swap is tricky. | ||
74 | * We need a bit which can be stored in pte _and_ not conflict | ||
75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved | ||
76 | * into swap entry computation, but bit 6 is used for nonlinear | ||
77 | * file mapping, so we borrow bit 7 for soft dirty tracking. | ||
78 | */ | ||
79 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
80 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE | ||
81 | #else | ||
82 | #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) | ||
83 | #endif | ||
84 | |||
70 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 85 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
71 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | 86 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) |
72 | #else | 87 | #else |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..48f8375e4c6b 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
101 | *begin = new_begin; | 101 | *begin = new_begin; |
102 | } | 102 | } |
103 | } else { | 103 | } else { |
104 | *begin = TASK_UNMAPPED_BASE; | 104 | *begin = mmap_legacy_base(); |
105 | *end = TASK_SIZE; | 105 | *end = TASK_SIZE; |
106 | } | 106 | } |
107 | } | 107 | } |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 62c29a5bfe26..f63778cb2363 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -98,7 +98,7 @@ static unsigned long mmap_base(void) | |||
98 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | 98 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 |
99 | * does, but not when emulating X86_32 | 99 | * does, but not when emulating X86_32 |
100 | */ | 100 | */ |
101 | static unsigned long mmap_legacy_base(void) | 101 | unsigned long mmap_legacy_base(void) |
102 | { | 102 | { |
103 | if (mmap_is_ia32()) | 103 | if (mmap_is_ia32()) |
104 | return TASK_UNMAPPED_BASE; | 104 | return TASK_UNMAPPED_BASE; |
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 99cb944a002d..4d45dba7fb8f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -906,16 +906,10 @@ bio_pageinc(struct bio *bio) | |||
906 | int i; | 906 | int i; |
907 | 907 | ||
908 | bio_for_each_segment(bv, bio, i) { | 908 | bio_for_each_segment(bv, bio, i) { |
909 | page = bv->bv_page; | ||
910 | /* Non-zero page count for non-head members of | 909 | /* Non-zero page count for non-head members of |
911 | * compound pages is no longer allowed by the kernel, | 910 | * compound pages is no longer allowed by the kernel. |
912 | * but this has never been seen here. | ||
913 | */ | 911 | */ |
914 | if (unlikely(PageCompound(page))) | 912 | page = compound_trans_head(bv->bv_page); |
915 | if (compound_trans_head(page) != page) { | ||
916 | pr_crit("page tail used for block I/O\n"); | ||
917 | BUG(); | ||
918 | } | ||
919 | atomic_inc(&page->_count); | 913 | atomic_inc(&page->_count); |
920 | } | 914 | } |
921 | } | 915 | } |
@@ -924,10 +918,13 @@ static void | |||
924 | bio_pagedec(struct bio *bio) | 918 | bio_pagedec(struct bio *bio) |
925 | { | 919 | { |
926 | struct bio_vec *bv; | 920 | struct bio_vec *bv; |
921 | struct page *page; | ||
927 | int i; | 922 | int i; |
928 | 923 | ||
929 | bio_for_each_segment(bv, bio, i) | 924 | bio_for_each_segment(bv, bio, i) { |
930 | atomic_dec(&bv->bv_page->_count); | 925 | page = compound_trans_head(bv->bv_page); |
926 | atomic_dec(&page->_count); | ||
927 | } | ||
931 | } | 928 | } |
932 | 929 | ||
933 | static void | 930 | static void |
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index 767fee2ab340..26019531db15 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/platform_device.h> | 24 | #include <linux/platform_device.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/delay.h> | ||
26 | #include <linux/rtc.h> | 27 | #include <linux/rtc.h> |
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
28 | #include <linux/of_device.h> | 29 | #include <linux/of_device.h> |
@@ -119,24 +120,39 @@ static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) | |||
119 | } | 120 | } |
120 | #endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ | 121 | #endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ |
121 | 122 | ||
122 | static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) | 123 | static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) |
123 | { | 124 | { |
125 | int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */ | ||
124 | /* | 126 | /* |
125 | * The datasheet doesn't say which way round the | 127 | * The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010 |
126 | * NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0, | 128 | * states: |
127 | * 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS | 129 | * | The order in which registers are updated is |
130 | * | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds. | ||
131 | * | (This list is in bitfield order, from LSB to MSB, as they would | ||
132 | * | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT | ||
133 | * | register. For example, the Seconds register corresponds to | ||
134 | * | STALE_REGS or NEW_REGS containing 0x80.) | ||
128 | */ | 135 | */ |
129 | while (readl(rtc_data->io + STMP3XXX_RTC_STAT) & | 136 | do { |
130 | (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) | 137 | if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) & |
131 | cpu_relax(); | 138 | (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT))) |
139 | return 0; | ||
140 | udelay(1); | ||
141 | } while (--timeout > 0); | ||
142 | return (readl(rtc_data->io + STMP3XXX_RTC_STAT) & | ||
143 | (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0; | ||
132 | } | 144 | } |
133 | 145 | ||
134 | /* Time read/write */ | 146 | /* Time read/write */ |
135 | static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) | 147 | static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) |
136 | { | 148 | { |
149 | int ret; | ||
137 | struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); | 150 | struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); |
138 | 151 | ||
139 | stmp3xxx_wait_time(rtc_data); | 152 | ret = stmp3xxx_wait_time(rtc_data); |
153 | if (ret) | ||
154 | return ret; | ||
155 | |||
140 | rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm); | 156 | rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm); |
141 | return 0; | 157 | return 0; |
142 | } | 158 | } |
@@ -146,8 +162,7 @@ static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t) | |||
146 | struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); | 162 | struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); |
147 | 163 | ||
148 | writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS); | 164 | writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS); |
149 | stmp3xxx_wait_time(rtc_data); | 165 | return stmp3xxx_wait_time(rtc_data); |
150 | return 0; | ||
151 | } | 166 | } |
152 | 167 | ||
153 | /* interrupt(s) handler */ | 168 | /* interrupt(s) handler */ |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3f868ae3fd4..34423978b170 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, | |||
463 | return inode; | 463 | return inode; |
464 | } | 464 | } |
465 | 465 | ||
466 | /* | ||
467 | * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never | ||
468 | * be taken from reclaim -- unlike regular filesystems. This needs an | ||
469 | * annotation because huge_pmd_share() does an allocation under | ||
470 | * i_mmap_mutex. | ||
471 | */ | ||
472 | struct lock_class_key hugetlbfs_i_mmap_mutex_key; | ||
473 | |||
466 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, | 474 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, |
467 | struct inode *dir, | 475 | struct inode *dir, |
468 | umode_t mode, dev_t dev) | 476 | umode_t mode, dev_t dev) |
@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
474 | struct hugetlbfs_inode_info *info; | 482 | struct hugetlbfs_inode_info *info; |
475 | inode->i_ino = get_next_ino(); | 483 | inode->i_ino = get_next_ino(); |
476 | inode_init_owner(inode, dir, mode); | 484 | inode_init_owner(inode, dir, mode); |
485 | lockdep_set_class(&inode->i_mapping->i_mmap_mutex, | ||
486 | &hugetlbfs_i_mmap_mutex_key); | ||
477 | inode->i_mapping->a_ops = &hugetlbfs_aops; | 487 | inode->i_mapping->a_ops = &hugetlbfs_aops; |
478 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; | 488 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; |
479 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 489 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 79736a28d84f..2abf97b2a592 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1757,7 +1757,7 @@ try_again: | |||
1757 | goto out; | 1757 | goto out; |
1758 | } else if (ret == 1) { | 1758 | } else if (ret == 1) { |
1759 | clusters_need = wc->w_clen; | 1759 | clusters_need = wc->w_clen; |
1760 | ret = ocfs2_refcount_cow(inode, filp, di_bh, | 1760 | ret = ocfs2_refcount_cow(inode, di_bh, |
1761 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1761 | wc->w_cpos, wc->w_clen, UINT_MAX); |
1762 | if (ret) { | 1762 | if (ret) { |
1763 | mlog_errno(ret); | 1763 | mlog_errno(ret); |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index eb760d8acd50..30544ce8e9f7 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode) | |||
2153 | { | 2153 | { |
2154 | int ret; | 2154 | int ret; |
2155 | struct ocfs2_empty_dir_priv priv = { | 2155 | struct ocfs2_empty_dir_priv priv = { |
2156 | .ctx.actor = ocfs2_empty_dir_filldir | 2156 | .ctx.actor = ocfs2_empty_dir_filldir, |
2157 | }; | 2157 | }; |
2158 | 2158 | ||
2159 | memset(&priv, 0, sizeof(priv)); | ||
2160 | |||
2161 | if (ocfs2_dir_indexed(inode)) { | 2159 | if (ocfs2_dir_indexed(inode)) { |
2162 | ret = ocfs2_empty_dir_dx(inode, &priv); | 2160 | ret = ocfs2_empty_dir_dx(inode, &priv); |
2163 | if (ret) | 2161 | if (ret) |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41000f223ca4..3261d71319ee 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
371 | goto out; | 371 | goto out; |
372 | 372 | ||
373 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); | 373 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); |
374 | 374 | ||
375 | out: | 375 | out: |
376 | return status; | 376 | return status; |
@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
899 | zero_clusters = last_cpos - zero_cpos; | 899 | zero_clusters = last_cpos - zero_cpos; |
900 | 900 | ||
901 | if (needs_cow) { | 901 | if (needs_cow) { |
902 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, | 902 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, |
903 | zero_clusters, UINT_MAX); | 903 | zero_clusters, UINT_MAX); |
904 | if (rc) { | 904 | if (rc) { |
905 | mlog_errno(rc); | 905 | mlog_errno(rc); |
@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
2078 | 2078 | ||
2079 | *meta_level = 1; | 2079 | *meta_level = 1; |
2080 | 2080 | ||
2081 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); | 2081 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); |
2082 | if (ret) | 2082 | if (ret) |
2083 | mlog_errno(ret); | 2083 | mlog_errno(ret); |
2084 | out: | 2084 | out: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 96f9ac237e86..0a992737dcaf 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
537 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); | 537 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
538 | 538 | ||
539 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + | 539 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + |
540 | ocfs2_quota_trans_credits(sb) + bits_wanted; | 540 | ocfs2_quota_trans_credits(sb); |
541 | } | 541 | } |
542 | 542 | ||
543 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 543 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f1fc172175b6..452068b45749 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle, | |||
69 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); | 69 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); |
70 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); | 70 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); |
71 | 71 | ||
72 | ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, | 72 | ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, |
73 | p_cpos, new_p_cpos, len); | 73 | p_cpos, new_p_cpos, len); |
74 | if (ret) { | 74 | if (ret) { |
75 | mlog_errno(ret); | 75 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9f6b96a09615..a70d604593b6 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -49,7 +49,6 @@ | |||
49 | 49 | ||
50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct file *file; | ||
53 | u32 cow_start; | 52 | u32 cow_start; |
54 | u32 cow_len; | 53 | u32 cow_len; |
55 | struct ocfs2_extent_tree data_et; | 54 | struct ocfs2_extent_tree data_et; |
@@ -66,7 +65,7 @@ struct ocfs2_cow_context { | |||
66 | u32 *num_clusters, | 65 | u32 *num_clusters, |
67 | unsigned int *extent_flags); | 66 | unsigned int *extent_flags); |
68 | int (*cow_duplicate_clusters)(handle_t *handle, | 67 | int (*cow_duplicate_clusters)(handle_t *handle, |
69 | struct file *file, | 68 | struct inode *inode, |
70 | u32 cpos, u32 old_cluster, | 69 | u32 cpos, u32 old_cluster, |
71 | u32 new_cluster, u32 new_len); | 70 | u32 new_cluster, u32 new_len); |
72 | }; | 71 | }; |
@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | |||
2922 | } | 2921 | } |
2923 | 2922 | ||
2924 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 2923 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
2925 | struct file *file, | 2924 | struct inode *inode, |
2926 | u32 cpos, u32 old_cluster, | 2925 | u32 cpos, u32 old_cluster, |
2927 | u32 new_cluster, u32 new_len) | 2926 | u32 new_cluster, u32 new_len) |
2928 | { | 2927 | { |
2929 | int ret = 0, partial; | 2928 | int ret = 0, partial; |
2930 | struct inode *inode = file_inode(file); | 2929 | struct super_block *sb = inode->i_sb; |
2931 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
2932 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2930 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2934 | struct page *page; | 2931 | struct page *page; |
2935 | pgoff_t page_index; | 2932 | pgoff_t page_index; |
@@ -2978,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2978 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2975 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2979 | BUG_ON(PageDirty(page)); | 2976 | BUG_ON(PageDirty(page)); |
2980 | 2977 | ||
2981 | if (PageReadahead(page)) { | ||
2982 | page_cache_async_readahead(mapping, | ||
2983 | &file->f_ra, file, | ||
2984 | page, page_index, | ||
2985 | readahead_pages); | ||
2986 | } | ||
2987 | |||
2988 | if (!PageUptodate(page)) { | 2978 | if (!PageUptodate(page)) { |
2989 | ret = block_read_full_page(page, ocfs2_get_block); | 2979 | ret = block_read_full_page(page, ocfs2_get_block); |
2990 | if (ret) { | 2980 | if (ret) { |
@@ -3004,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
3004 | } | 2994 | } |
3005 | } | 2995 | } |
3006 | 2996 | ||
3007 | ocfs2_map_and_dirty_page(inode, handle, from, to, | 2997 | ocfs2_map_and_dirty_page(inode, |
2998 | handle, from, to, | ||
3008 | page, 0, &new_block); | 2999 | page, 0, &new_block); |
3009 | mark_page_accessed(page); | 3000 | mark_page_accessed(page); |
3010 | unlock: | 3001 | unlock: |
@@ -3020,12 +3011,11 @@ unlock: | |||
3020 | } | 3011 | } |
3021 | 3012 | ||
3022 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 3013 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
3023 | struct file *file, | 3014 | struct inode *inode, |
3024 | u32 cpos, u32 old_cluster, | 3015 | u32 cpos, u32 old_cluster, |
3025 | u32 new_cluster, u32 new_len) | 3016 | u32 new_cluster, u32 new_len) |
3026 | { | 3017 | { |
3027 | int ret = 0; | 3018 | int ret = 0; |
3028 | struct inode *inode = file_inode(file); | ||
3029 | struct super_block *sb = inode->i_sb; | 3019 | struct super_block *sb = inode->i_sb; |
3030 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | 3020 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); |
3031 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | 3021 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); |
@@ -3150,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle, | |||
3150 | 3140 | ||
3151 | /*If the old clusters is unwritten, no need to duplicate. */ | 3141 | /*If the old clusters is unwritten, no need to duplicate. */ |
3152 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 3142 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
3153 | ret = context->cow_duplicate_clusters(handle, context->file, | 3143 | ret = context->cow_duplicate_clusters(handle, context->inode, |
3154 | cpos, old, new, len); | 3144 | cpos, old, new, len); |
3155 | if (ret) { | 3145 | if (ret) { |
3156 | mlog_errno(ret); | 3146 | mlog_errno(ret); |
@@ -3428,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3428 | return ret; | 3418 | return ret; |
3429 | } | 3419 | } |
3430 | 3420 | ||
3431 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
3432 | struct file *file, | ||
3433 | u32 start, u32 len) | ||
3434 | { | ||
3435 | struct address_space *mapping; | ||
3436 | pgoff_t index; | ||
3437 | unsigned long num_pages; | ||
3438 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
3439 | |||
3440 | if (!file) | ||
3441 | return; | ||
3442 | |||
3443 | mapping = file->f_mapping; | ||
3444 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3445 | if (!num_pages) | ||
3446 | num_pages = 1; | ||
3447 | |||
3448 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3449 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
3450 | index, num_pages); | ||
3451 | } | ||
3452 | |||
3453 | /* | 3421 | /* |
3454 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3422 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
3455 | * past max_cpos. This will stop when it runs into a hole or an | 3423 | * past max_cpos. This will stop when it runs into a hole or an |
3456 | * unrefcounted extent. | 3424 | * unrefcounted extent. |
3457 | */ | 3425 | */ |
3458 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3426 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
3459 | struct file *file, | ||
3460 | struct buffer_head *di_bh, | 3427 | struct buffer_head *di_bh, |
3461 | u32 cpos, u32 write_len, u32 max_cpos) | 3428 | u32 cpos, u32 write_len, u32 max_cpos) |
3462 | { | 3429 | { |
@@ -3485,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3485 | 3452 | ||
3486 | BUG_ON(cow_len == 0); | 3453 | BUG_ON(cow_len == 0); |
3487 | 3454 | ||
3488 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
3489 | |||
3490 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3455 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
3491 | if (!context) { | 3456 | if (!context) { |
3492 | ret = -ENOMEM; | 3457 | ret = -ENOMEM; |
@@ -3508,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3508 | context->ref_root_bh = ref_root_bh; | 3473 | context->ref_root_bh = ref_root_bh; |
3509 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3474 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
3510 | context->get_clusters = ocfs2_di_get_clusters; | 3475 | context->get_clusters = ocfs2_di_get_clusters; |
3511 | context->file = file; | ||
3512 | 3476 | ||
3513 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3477 | ocfs2_init_dinode_extent_tree(&context->data_et, |
3514 | INODE_CACHE(inode), di_bh); | 3478 | INODE_CACHE(inode), di_bh); |
@@ -3537,7 +3501,6 @@ out: | |||
3537 | * clusters between cpos and cpos+write_len are safe to modify. | 3501 | * clusters between cpos and cpos+write_len are safe to modify. |
3538 | */ | 3502 | */ |
3539 | int ocfs2_refcount_cow(struct inode *inode, | 3503 | int ocfs2_refcount_cow(struct inode *inode, |
3540 | struct file *file, | ||
3541 | struct buffer_head *di_bh, | 3504 | struct buffer_head *di_bh, |
3542 | u32 cpos, u32 write_len, u32 max_cpos) | 3505 | u32 cpos, u32 write_len, u32 max_cpos) |
3543 | { | 3506 | { |
@@ -3557,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
3557 | num_clusters = write_len; | 3520 | num_clusters = write_len; |
3558 | 3521 | ||
3559 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3522 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
3560 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, | 3523 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, |
3561 | num_clusters, max_cpos); | 3524 | num_clusters, max_cpos); |
3562 | if (ret) { | 3525 | if (ret) { |
3563 | mlog_errno(ret); | 3526 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 7754608c83a4..6422bbcdb525 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
53 | int *credits, | 53 | int *credits, |
54 | int *ref_blocks); | 54 | int *ref_blocks); |
55 | int ocfs2_refcount_cow(struct inode *inode, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
56 | struct file *filep, struct buffer_head *di_bh, | 56 | struct buffer_head *di_bh, |
57 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
58 | 58 | ||
59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, | |||
85 | u32 cpos, u32 write_len, | 85 | u32 cpos, u32 write_len, |
86 | struct ocfs2_post_refcount *post); | 86 | struct ocfs2_post_refcount *post); |
87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
88 | struct file *file, | 88 | struct inode *inode, |
89 | u32 cpos, u32 old_cluster, | 89 | u32 cpos, u32 old_cluster, |
90 | u32 new_cluster, u32 new_len); | 90 | u32 new_cluster, u32 new_len); |
91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
92 | struct file *file, | 92 | struct inode *inode, |
93 | u32 cpos, u32 old_cluster, | 93 | u32 cpos, u32 old_cluster, |
94 | u32 new_cluster, u32 new_len); | 94 | u32 new_cluster, u32 new_len); |
95 | int ocfs2_cow_sync_writeback(struct super_block *sb, | 95 | int ocfs2_cow_sync_writeback(struct super_block *sb, |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbf61f6174f0..107d026f5d6e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, | |||
730 | * of how soft-dirty works. | 730 | * of how soft-dirty works. |
731 | */ | 731 | */ |
732 | pte_t ptent = *pte; | 732 | pte_t ptent = *pte; |
733 | ptent = pte_wrprotect(ptent); | 733 | |
734 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | 734 | if (pte_present(ptent)) { |
735 | ptent = pte_wrprotect(ptent); | ||
736 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
737 | } else if (is_swap_pte(ptent)) { | ||
738 | ptent = pte_swp_clear_soft_dirty(ptent); | ||
739 | } else if (pte_file(ptent)) { | ||
740 | ptent = pte_file_clear_soft_dirty(ptent); | ||
741 | } | ||
742 | |||
735 | set_pte_at(vma->vm_mm, addr, pte, ptent); | 743 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
736 | #endif | 744 | #endif |
737 | } | 745 | } |
@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
752 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 760 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
753 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 761 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
754 | ptent = *pte; | 762 | ptent = *pte; |
755 | if (!pte_present(ptent)) | ||
756 | continue; | ||
757 | 763 | ||
758 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | 764 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { |
759 | clear_soft_dirty(vma, addr, pte); | 765 | clear_soft_dirty(vma, addr, pte); |
760 | continue; | 766 | continue; |
761 | } | 767 | } |
762 | 768 | ||
769 | if (!pte_present(ptent)) | ||
770 | continue; | ||
771 | |||
763 | page = vm_normal_page(vma, addr, ptent); | 772 | page = vm_normal_page(vma, addr, ptent); |
764 | if (!page) | 773 | if (!page) |
765 | continue; | 774 | continue; |
@@ -859,7 +868,7 @@ typedef struct { | |||
859 | } pagemap_entry_t; | 868 | } pagemap_entry_t; |
860 | 869 | ||
861 | struct pagemapread { | 870 | struct pagemapread { |
862 | int pos, len; | 871 | int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ |
863 | pagemap_entry_t *buffer; | 872 | pagemap_entry_t *buffer; |
864 | bool v2; | 873 | bool v2; |
865 | }; | 874 | }; |
@@ -867,7 +876,7 @@ struct pagemapread { | |||
867 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 876 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
868 | #define PAGEMAP_WALK_MASK (PMD_MASK) | 877 | #define PAGEMAP_WALK_MASK (PMD_MASK) |
869 | 878 | ||
870 | #define PM_ENTRY_BYTES sizeof(u64) | 879 | #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) |
871 | #define PM_STATUS_BITS 3 | 880 | #define PM_STATUS_BITS 3 |
872 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 881 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
873 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 882 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) |
@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, | |||
930 | flags = PM_PRESENT; | 939 | flags = PM_PRESENT; |
931 | page = vm_normal_page(vma, addr, pte); | 940 | page = vm_normal_page(vma, addr, pte); |
932 | } else if (is_swap_pte(pte)) { | 941 | } else if (is_swap_pte(pte)) { |
933 | swp_entry_t entry = pte_to_swp_entry(pte); | 942 | swp_entry_t entry; |
934 | 943 | if (pte_swp_soft_dirty(pte)) | |
944 | flags2 |= __PM_SOFT_DIRTY; | ||
945 | entry = pte_to_swp_entry(pte); | ||
935 | frame = swp_type(entry) | | 946 | frame = swp_type(entry) | |
936 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | 947 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
937 | flags = PM_SWAP; | 948 | flags = PM_SWAP; |
@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1116 | goto out_task; | 1127 | goto out_task; |
1117 | 1128 | ||
1118 | pm.v2 = soft_dirty_cleared; | 1129 | pm.v2 = soft_dirty_cleared; |
1119 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1130 | pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
1120 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1131 | pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); |
1121 | ret = -ENOMEM; | 1132 | ret = -ENOMEM; |
1122 | if (!pm.buffer) | 1133 | if (!pm.buffer) |
1123 | goto out_task; | 1134 | goto out_task; |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2f47ade1b567..0807ddf97b05 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -417,6 +417,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
417 | { | 417 | { |
418 | return pmd; | 418 | return pmd; |
419 | } | 419 | } |
420 | |||
421 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
422 | { | ||
423 | return pte; | ||
424 | } | ||
425 | |||
426 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
427 | { | ||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
432 | { | ||
433 | return pte; | ||
434 | } | ||
435 | |||
436 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) | ||
437 | { | ||
438 | return pte; | ||
439 | } | ||
440 | |||
441 | static inline pte_t pte_file_mksoft_dirty(pte_t pte) | ||
442 | { | ||
443 | return pte; | ||
444 | } | ||
445 | |||
446 | static inline int pte_file_soft_dirty(pte_t pte) | ||
447 | { | ||
448 | return 0; | ||
449 | } | ||
420 | #endif | 450 | #endif |
421 | 451 | ||
422 | #ifndef __HAVE_PFNMAP_TRACKING | 452 | #ifndef __HAVE_PFNMAP_TRACKING |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 078066daffd4..e9995eb5985c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -314,6 +314,7 @@ struct nsproxy; | |||
314 | struct user_namespace; | 314 | struct user_namespace; |
315 | 315 | ||
316 | #ifdef CONFIG_MMU | 316 | #ifdef CONFIG_MMU |
317 | extern unsigned long mmap_legacy_base(void); | ||
317 | extern void arch_pick_mmap_layout(struct mm_struct *mm); | 318 | extern void arch_pick_mmap_layout(struct mm_struct *mm); |
318 | extern unsigned long | 319 | extern unsigned long |
319 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, | 320 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, |
diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5fd30d2a415..8d4fa82bfb91 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h | |||
@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) | |||
67 | swp_entry_t arch_entry; | 67 | swp_entry_t arch_entry; |
68 | 68 | ||
69 | BUG_ON(pte_file(pte)); | 69 | BUG_ON(pte_file(pte)); |
70 | if (pte_swp_soft_dirty(pte)) | ||
71 | pte = pte_swp_clear_soft_dirty(pte); | ||
70 | arch_entry = __pte_to_swp_entry(pte); | 72 | arch_entry = __pte_to_swp_entry(pte); |
71 | return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); | 73 | return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); |
72 | } | 74 | } |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..84662ecc7b51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void); | |||
802 | asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, | 802 | asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, |
803 | int __user *); | 803 | int __user *); |
804 | #else | 804 | #else |
805 | #ifdef CONFIG_CLONE_BACKWARDS3 | ||
806 | asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, | ||
807 | int __user *, int); | ||
808 | #else | ||
805 | asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, | 809 | asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, |
806 | int __user *, int); | 810 | int __user *, int); |
807 | #endif | 811 | #endif |
812 | #endif | ||
808 | 813 | ||
809 | asmlinkage long sys_execve(const char __user *filename, | 814 | asmlinkage long sys_execve(const char __user *filename, |
810 | const char __user *const __user *argv, | 815 | const char __user *const __user *argv, |
diff --git a/kernel/fork.c b/kernel/fork.c index 403d2bb8a968..e23bb19e2a3e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, | |||
1679 | int __user *, parent_tidptr, | 1679 | int __user *, parent_tidptr, |
1680 | int __user *, child_tidptr, | 1680 | int __user *, child_tidptr, |
1681 | int, tls_val) | 1681 | int, tls_val) |
1682 | #elif defined(CONFIG_CLONE_BACKWARDS3) | ||
1683 | SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, | ||
1684 | int, stack_size, | ||
1685 | int __user *, parent_tidptr, | ||
1686 | int __user *, child_tidptr, | ||
1687 | int, tls_val) | ||
1682 | #else | 1688 | #else |
1683 | SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, | 1689 | SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, |
1684 | int __user *, parent_tidptr, | 1690 | int __user *, parent_tidptr, |
diff --git a/mm/fremap.c b/mm/fremap.c index 87da3590c61e..5bff08147768 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -57,17 +57,22 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
57 | unsigned long addr, unsigned long pgoff, pgprot_t prot) | 57 | unsigned long addr, unsigned long pgoff, pgprot_t prot) |
58 | { | 58 | { |
59 | int err = -ENOMEM; | 59 | int err = -ENOMEM; |
60 | pte_t *pte; | 60 | pte_t *pte, ptfile; |
61 | spinlock_t *ptl; | 61 | spinlock_t *ptl; |
62 | 62 | ||
63 | pte = get_locked_pte(mm, addr, &ptl); | 63 | pte = get_locked_pte(mm, addr, &ptl); |
64 | if (!pte) | 64 | if (!pte) |
65 | goto out; | 65 | goto out; |
66 | 66 | ||
67 | if (!pte_none(*pte)) | 67 | ptfile = pgoff_to_pte(pgoff); |
68 | |||
69 | if (!pte_none(*pte)) { | ||
70 | if (pte_present(*pte) && pte_soft_dirty(*pte)) | ||
71 | pte_file_mksoft_dirty(ptfile); | ||
68 | zap_pte(mm, vma, addr, pte); | 72 | zap_pte(mm, vma, addr, pte); |
73 | } | ||
69 | 74 | ||
70 | set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); | 75 | set_pte_at(mm, addr, pte, ptfile); |
71 | /* | 76 | /* |
72 | * We don't need to run update_mmu_cache() here because the "file pte" | 77 | * We don't need to run update_mmu_cache() here because the "file pte" |
73 | * being installed by install_file_pte() is not a real pte - it's a | 78 | * being installed by install_file_pte() is not a real pte - it's a |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c290a1cf3862..c5792a5d87ce 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -3195,11 +3195,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, | |||
3195 | if (!s->memcg_params) | 3195 | if (!s->memcg_params) |
3196 | return -ENOMEM; | 3196 | return -ENOMEM; |
3197 | 3197 | ||
3198 | INIT_WORK(&s->memcg_params->destroy, | ||
3199 | kmem_cache_destroy_work_func); | ||
3200 | if (memcg) { | 3198 | if (memcg) { |
3201 | s->memcg_params->memcg = memcg; | 3199 | s->memcg_params->memcg = memcg; |
3202 | s->memcg_params->root_cache = root_cache; | 3200 | s->memcg_params->root_cache = root_cache; |
3201 | INIT_WORK(&s->memcg_params->destroy, | ||
3202 | kmem_cache_destroy_work_func); | ||
3203 | } else | 3203 | } else |
3204 | s->memcg_params->is_root_cache = true; | 3204 | s->memcg_params->is_root_cache = true; |
3205 | 3205 | ||
diff --git a/mm/memory.c b/mm/memory.c index 1ce2e2a734fc..40268410732a 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1141,9 +1141,12 @@ again: | |||
1141 | continue; | 1141 | continue; |
1142 | if (unlikely(details) && details->nonlinear_vma | 1142 | if (unlikely(details) && details->nonlinear_vma |
1143 | && linear_page_index(details->nonlinear_vma, | 1143 | && linear_page_index(details->nonlinear_vma, |
1144 | addr) != page->index) | 1144 | addr) != page->index) { |
1145 | set_pte_at(mm, addr, pte, | 1145 | pte_t ptfile = pgoff_to_pte(page->index); |
1146 | pgoff_to_pte(page->index)); | 1146 | if (pte_soft_dirty(ptent)) |
1147 | pte_file_mksoft_dirty(ptfile); | ||
1148 | set_pte_at(mm, addr, pte, ptfile); | ||
1149 | } | ||
1147 | if (PageAnon(page)) | 1150 | if (PageAnon(page)) |
1148 | rss[MM_ANONPAGES]--; | 1151 | rss[MM_ANONPAGES]--; |
1149 | else { | 1152 | else { |
@@ -3115,6 +3118,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3115 | exclusive = 1; | 3118 | exclusive = 1; |
3116 | } | 3119 | } |
3117 | flush_icache_page(vma, page); | 3120 | flush_icache_page(vma, page); |
3121 | if (pte_swp_soft_dirty(orig_pte)) | ||
3122 | pte = pte_mksoft_dirty(pte); | ||
3118 | set_pte_at(mm, address, page_table, pte); | 3123 | set_pte_at(mm, address, page_table, pte); |
3119 | if (page == swapcache) | 3124 | if (page == swapcache) |
3120 | do_page_add_anon_rmap(page, vma, address, exclusive); | 3125 | do_page_add_anon_rmap(page, vma, address, exclusive); |
@@ -3408,6 +3413,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3408 | entry = mk_pte(page, vma->vm_page_prot); | 3413 | entry = mk_pte(page, vma->vm_page_prot); |
3409 | if (flags & FAULT_FLAG_WRITE) | 3414 | if (flags & FAULT_FLAG_WRITE) |
3410 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 3415 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
3416 | else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte)) | ||
3417 | pte_mksoft_dirty(entry); | ||
3411 | if (anon) { | 3418 | if (anon) { |
3412 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 3419 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
3413 | page_add_new_anon_rmap(page, vma, address); | 3420 | page_add_new_anon_rmap(page, vma, address); |
@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1236 | swp_entry_to_pte(make_hwpoison_entry(page))); | 1236 | swp_entry_to_pte(make_hwpoison_entry(page))); |
1237 | } else if (PageAnon(page)) { | 1237 | } else if (PageAnon(page)) { |
1238 | swp_entry_t entry = { .val = page_private(page) }; | 1238 | swp_entry_t entry = { .val = page_private(page) }; |
1239 | pte_t swp_pte; | ||
1239 | 1240 | ||
1240 | if (PageSwapCache(page)) { | 1241 | if (PageSwapCache(page)) { |
1241 | /* | 1242 | /* |
@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1264 | BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); | 1265 | BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); |
1265 | entry = make_migration_entry(page, pte_write(pteval)); | 1266 | entry = make_migration_entry(page, pte_write(pteval)); |
1266 | } | 1267 | } |
1267 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 1268 | swp_pte = swp_entry_to_pte(entry); |
1269 | if (pte_soft_dirty(pteval)) | ||
1270 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||
1271 | set_pte_at(mm, address, pte, swp_pte); | ||
1268 | BUG_ON(pte_file(*pte)); | 1272 | BUG_ON(pte_file(*pte)); |
1269 | } else if (IS_ENABLED(CONFIG_MIGRATION) && | 1273 | } else if (IS_ENABLED(CONFIG_MIGRATION) && |
1270 | (TTU_ACTION(flags) == TTU_MIGRATION)) { | 1274 | (TTU_ACTION(flags) == TTU_MIGRATION)) { |
@@ -1401,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1401 | pteval = ptep_clear_flush(vma, address, pte); | 1405 | pteval = ptep_clear_flush(vma, address, pte); |
1402 | 1406 | ||
1403 | /* If nonlinear, store the file page offset in the pte. */ | 1407 | /* If nonlinear, store the file page offset in the pte. */ |
1404 | if (page->index != linear_page_index(vma, address)) | 1408 | if (page->index != linear_page_index(vma, address)) { |
1405 | set_pte_at(mm, address, pte, pgoff_to_pte(page->index)); | 1409 | pte_t ptfile = pgoff_to_pte(page->index); |
1410 | if (pte_soft_dirty(pteval)) | ||
1411 | pte_file_mksoft_dirty(ptfile); | ||
1412 | set_pte_at(mm, address, pte, ptfile); | ||
1413 | } | ||
1406 | 1414 | ||
1407 | /* Move the dirty bit to the physical page now the pte is gone. */ | 1415 | /* Move the dirty bit to the physical page now the pte is gone. */ |
1408 | if (pte_dirty(pteval)) | 1416 | if (pte_dirty(pteval)) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 36af6eeaa67e..6cf2e60983b7 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free) | |||
866 | } | 866 | } |
867 | #endif /* CONFIG_HIBERNATION */ | 867 | #endif /* CONFIG_HIBERNATION */ |
868 | 868 | ||
869 | static inline int maybe_same_pte(pte_t pte, pte_t swp_pte) | ||
870 | { | ||
871 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
872 | /* | ||
873 | * When pte keeps soft dirty bit the pte generated | ||
874 | * from swap entry does not has it, still it's same | ||
875 | * pte from logical point of view. | ||
876 | */ | ||
877 | pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte); | ||
878 | return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty); | ||
879 | #else | ||
880 | return pte_same(pte, swp_pte); | ||
881 | #endif | ||
882 | } | ||
883 | |||
869 | /* | 884 | /* |
870 | * No need to decide whether this PTE shares the swap entry with others, | 885 | * No need to decide whether this PTE shares the swap entry with others, |
871 | * just let do_wp_page work it out if a write is requested later - to | 886 | * just let do_wp_page work it out if a write is requested later - to |
@@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
892 | } | 907 | } |
893 | 908 | ||
894 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 909 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
895 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | 910 | if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { |
896 | mem_cgroup_cancel_charge_swapin(memcg); | 911 | mem_cgroup_cancel_charge_swapin(memcg); |
897 | ret = 0; | 912 | ret = 0; |
898 | goto out; | 913 | goto out; |
@@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
947 | * swapoff spends a _lot_ of time in this loop! | 962 | * swapoff spends a _lot_ of time in this loop! |
948 | * Test inline before going to call unuse_pte. | 963 | * Test inline before going to call unuse_pte. |
949 | */ | 964 | */ |
950 | if (unlikely(pte_same(*pte, swp_pte))) { | 965 | if (unlikely(maybe_same_pte(*pte, swp_pte))) { |
951 | pte_unmap(pte); | 966 | pte_unmap(pte); |
952 | ret = unuse_pte(vma, pmd, addr, entry, page); | 967 | ret = unuse_pte(vma, pmd, addr, entry, page); |
953 | if (ret) | 968 | if (ret) |