diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 23:50:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 23:50:02 -0500 |
commit | e34bac726d27056081d0250c0e173e4b155aa340 (patch) | |
tree | 85607d0b3b185380fb3267866020c6a4372b9298 | |
parent | fe6bce8d30a86c693bf7cfbf4759cbafd121289f (diff) | |
parent | 39a0e975c37dee93fa1b8ea5f7eacd1c4c8a586e (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- various misc bits
- most of MM (quite a lot of MM material is awaiting the merge of
linux-next dependencies)
- kasan
- printk updates
- procfs updates
- MAINTAINERS
- /lib updates
- checkpatch updates
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (123 commits)
init: reduce rootwait polling interval time to 5ms
binfmt_elf: use vmalloc() for allocation of vma_filesz
checkpatch: don't emit unified-diff error for rename-only patches
checkpatch: don't check c99 types like uint8_t under tools
checkpatch: avoid multiple line dereferences
checkpatch: don't check .pl files, improve absolute path commit log test
scripts/checkpatch.pl: fix spelling
checkpatch: don't try to get maintained status when --no-tree is given
lib/ida: document locking requirements a bit better
lib/rbtree.c: fix typo in comment of ____rb_erase_color
lib/Kconfig.debug: make CONFIG_STRICT_DEVMEM depend on CONFIG_DEVMEM
MAINTAINERS: add drm and drm/i915 irc channels
MAINTAINERS: add "C:" for URI for chat where developers hang out
MAINTAINERS: add drm and drm/i915 bug filing info
MAINTAINERS: add "B:" for URI where to file bugs
get_maintainer: look for arbitrary letter prefixes in sections
printk: add Kconfig option to set default console loglevel
printk/sound: handle more message headers
printk/btrfs: handle more message headers
printk/kdb: handle more message headers
...
113 files changed, 1550 insertions, 1041 deletions
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index 3f1437fbca6b..280d283304bb 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt | |||
@@ -974,6 +974,13 @@ compatibility. | |||
974 | 4Gb. Some vendors prefer splitting those ranges into smaller | 974 | 4Gb. Some vendors prefer splitting those ranges into smaller |
975 | segments, but the kernel doesn't care. | 975 | segments, but the kernel doesn't care. |
976 | 976 | ||
977 | Additional properties: | ||
978 | |||
979 | - hotpluggable : The presence of this property provides an explicit | ||
980 | hint to the operating system that this memory may potentially be | ||
981 | removed later. The kernel can take this into consideration when | ||
982 | doing nonmovable allocations and when laying out memory zones. | ||
983 | |||
977 | e) The /chosen node | 984 | e) The /chosen node |
978 | 985 | ||
979 | This node is a bit "special". Normally, that's where Open Firmware | 986 | This node is a bit "special". Normally, that's where Open Firmware |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 74329fd0add2..c03f2f91c6ab 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -191,6 +191,7 @@ read the file /proc/PID/status: | |||
191 | CapPrm: 0000000000000000 | 191 | CapPrm: 0000000000000000 |
192 | CapEff: 0000000000000000 | 192 | CapEff: 0000000000000000 |
193 | CapBnd: ffffffffffffffff | 193 | CapBnd: ffffffffffffffff |
194 | NoNewPrivs: 0 | ||
194 | Seccomp: 0 | 195 | Seccomp: 0 |
195 | voluntary_ctxt_switches: 0 | 196 | voluntary_ctxt_switches: 0 |
196 | nonvoluntary_ctxt_switches: 1 | 197 | nonvoluntary_ctxt_switches: 1 |
@@ -262,6 +263,7 @@ Table 1-2: Contents of the status files (as of 4.1) | |||
262 | CapPrm bitmap of permitted capabilities | 263 | CapPrm bitmap of permitted capabilities |
263 | CapEff bitmap of effective capabilities | 264 | CapEff bitmap of effective capabilities |
264 | CapBnd bitmap of capabilities bounding set | 265 | CapBnd bitmap of capabilities bounding set |
266 | NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...) | ||
265 | Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...) | 267 | Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...) |
266 | Cpus_allowed mask of CPUs on which this process may run | 268 | Cpus_allowed mask of CPUs on which this process may run |
267 | Cpus_allowed_list Same as previous, but in "list format" | 269 | Cpus_allowed_list Same as previous, but in "list format" |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c5f1546a440f..6c6141c76eaa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2397,7 +2397,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2397 | that the amount of memory usable for all allocations | 2397 | that the amount of memory usable for all allocations |
2398 | is not too small. | 2398 | is not too small. |
2399 | 2399 | ||
2400 | movable_node [KNL,X86] Boot-time switch to enable the effects | 2400 | movable_node [KNL] Boot-time switch to enable the effects |
2401 | of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details. | 2401 | of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details. |
2402 | 2402 | ||
2403 | MTD_Partition= [MTD] | 2403 | MTD_Partition= [MTD] |
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 2ec6adb5a4ce..c4171e4519c2 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt | |||
@@ -136,6 +136,11 @@ or enable it back by writing 1: | |||
136 | echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page | 136 | echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page |
137 | echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page | 137 | echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page |
138 | 138 | ||
139 | Some userspace (such as a test program, or an optimized memory allocation | ||
140 | library) may want to know the size (in bytes) of a transparent hugepage: | ||
141 | |||
142 | cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size | ||
143 | |||
139 | khugepaged will be automatically started when | 144 | khugepaged will be automatically started when |
140 | transparent_hugepage/enabled is set to "always" or "madvise, and it'll | 145 | transparent_hugepage/enabled is set to "always" or "madvise, and it'll |
141 | be automatically shutdown if it's set to "never". | 146 | be automatically shutdown if it's set to "never". |
diff --git a/MAINTAINERS b/MAINTAINERS index 4e62a0e67df9..88315cfcfb39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -74,6 +74,10 @@ Descriptions of section entries: | |||
74 | These reviewers should be CCed on patches. | 74 | These reviewers should be CCed on patches. |
75 | L: Mailing list that is relevant to this area | 75 | L: Mailing list that is relevant to this area |
76 | W: Web-page with status/info | 76 | W: Web-page with status/info |
77 | B: URI for where to file bugs. A web-page with detailed bug | ||
78 | filing info, a direct bug tracker link, or a mailto: URI. | ||
79 | C: URI for chat protocol, server and channel where developers | ||
80 | usually hang out, for example irc://server/channel. | ||
77 | Q: Patchwork web based patch tracking system site | 81 | Q: Patchwork web based patch tracking system site |
78 | T: SCM tree type and location. | 82 | T: SCM tree type and location. |
79 | Type is one of: git, hg, quilt, stgit, topgit | 83 | Type is one of: git, hg, quilt, stgit, topgit |
@@ -4024,6 +4028,8 @@ DRM DRIVERS | |||
4024 | M: David Airlie <airlied@linux.ie> | 4028 | M: David Airlie <airlied@linux.ie> |
4025 | L: dri-devel@lists.freedesktop.org | 4029 | L: dri-devel@lists.freedesktop.org |
4026 | T: git git://people.freedesktop.org/~airlied/linux | 4030 | T: git git://people.freedesktop.org/~airlied/linux |
4031 | B: https://bugs.freedesktop.org/ | ||
4032 | C: irc://chat.freenode.net/dri-devel | ||
4027 | S: Maintained | 4033 | S: Maintained |
4028 | F: drivers/gpu/drm/ | 4034 | F: drivers/gpu/drm/ |
4029 | F: drivers/gpu/vga/ | 4035 | F: drivers/gpu/vga/ |
@@ -4076,6 +4082,8 @@ M: Jani Nikula <jani.nikula@linux.intel.com> | |||
4076 | L: intel-gfx@lists.freedesktop.org | 4082 | L: intel-gfx@lists.freedesktop.org |
4077 | L: dri-devel@lists.freedesktop.org | 4083 | L: dri-devel@lists.freedesktop.org |
4078 | W: https://01.org/linuxgraphics/ | 4084 | W: https://01.org/linuxgraphics/ |
4085 | B: https://01.org/linuxgraphics/documentation/how-report-bugs | ||
4086 | C: irc://chat.freenode.net/intel-gfx | ||
4079 | Q: http://patchwork.freedesktop.org/project/intel-gfx/ | 4087 | Q: http://patchwork.freedesktop.org/project/intel-gfx/ |
4080 | T: git git://anongit.freedesktop.org/drm-intel | 4088 | T: git git://anongit.freedesktop.org/drm-intel |
4081 | S: Supported | 4089 | S: Supported |
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 1e25cd80589e..3f2eb76243e3 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h | |||
@@ -186,6 +186,8 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) | |||
186 | tlb_add_flush(tlb, addr); | 186 | tlb_add_flush(tlb, addr); |
187 | } | 187 | } |
188 | 188 | ||
189 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
190 | tlb_remove_tlb_entry(tlb, ptep, address) | ||
189 | /* | 191 | /* |
190 | * In the case of tlb vma handling, we can optimise these away in the | 192 | * In the case of tlb vma handling, we can optimise these away in the |
191 | * case where we're doing a full MM flush. When we're doing a munmap, | 193 | * case where we're doing a full MM flush. When we're doing a munmap, |
@@ -211,18 +213,17 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) | |||
211 | 213 | ||
212 | static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 214 | static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
213 | { | 215 | { |
216 | tlb->pages[tlb->nr++] = page; | ||
217 | VM_WARN_ON(tlb->nr > tlb->max); | ||
214 | if (tlb->nr == tlb->max) | 218 | if (tlb->nr == tlb->max) |
215 | return true; | 219 | return true; |
216 | tlb->pages[tlb->nr++] = page; | ||
217 | return false; | 220 | return false; |
218 | } | 221 | } |
219 | 222 | ||
220 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 223 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
221 | { | 224 | { |
222 | if (__tlb_remove_page(tlb, page)) { | 225 | if (__tlb_remove_page(tlb, page)) |
223 | tlb_flush_mmu(tlb); | 226 | tlb_flush_mmu(tlb); |
224 | __tlb_remove_page(tlb, page); | ||
225 | } | ||
226 | } | 227 | } |
227 | 228 | ||
228 | static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | 229 | static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, |
@@ -231,12 +232,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | |||
231 | return __tlb_remove_page(tlb, page); | 232 | return __tlb_remove_page(tlb, page); |
232 | } | 233 | } |
233 | 234 | ||
234 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, | ||
235 | struct page *page) | ||
236 | { | ||
237 | return __tlb_remove_page(tlb, page); | ||
238 | } | ||
239 | |||
240 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 235 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
241 | struct page *page, int page_size) | 236 | struct page *page, int page_size) |
242 | { | 237 | { |
@@ -284,5 +279,11 @@ tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr | |||
284 | 279 | ||
285 | #define tlb_migrate_finish(mm) do { } while (0) | 280 | #define tlb_migrate_finish(mm) do { } while (0) |
286 | 281 | ||
282 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
283 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
284 | unsigned int page_size) | ||
285 | { | ||
286 | } | ||
287 | |||
287 | #endif /* CONFIG_MMU */ | 288 | #endif /* CONFIG_MMU */ |
288 | #endif | 289 | #endif |
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 77e541cf0e5d..fced197b9626 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h | |||
@@ -207,15 +207,15 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) | |||
207 | */ | 207 | */ |
208 | static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 208 | static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
209 | { | 209 | { |
210 | if (tlb->nr == tlb->max) | ||
211 | return true; | ||
212 | |||
213 | tlb->need_flush = 1; | 210 | tlb->need_flush = 1; |
214 | 211 | ||
215 | if (!tlb->nr && tlb->pages == tlb->local) | 212 | if (!tlb->nr && tlb->pages == tlb->local) |
216 | __tlb_alloc_page(tlb); | 213 | __tlb_alloc_page(tlb); |
217 | 214 | ||
218 | tlb->pages[tlb->nr++] = page; | 215 | tlb->pages[tlb->nr++] = page; |
216 | VM_WARN_ON(tlb->nr > tlb->max); | ||
217 | if (tlb->nr == tlb->max) | ||
218 | return true; | ||
219 | return false; | 219 | return false; |
220 | } | 220 | } |
221 | 221 | ||
@@ -236,10 +236,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) | |||
236 | 236 | ||
237 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 237 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
238 | { | 238 | { |
239 | if (__tlb_remove_page(tlb, page)) { | 239 | if (__tlb_remove_page(tlb, page)) |
240 | tlb_flush_mmu(tlb); | 240 | tlb_flush_mmu(tlb); |
241 | __tlb_remove_page(tlb, page); | ||
242 | } | ||
243 | } | 241 | } |
244 | 242 | ||
245 | static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | 243 | static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, |
@@ -248,12 +246,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | |||
248 | return __tlb_remove_page(tlb, page); | 246 | return __tlb_remove_page(tlb, page); |
249 | } | 247 | } |
250 | 248 | ||
251 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, | ||
252 | struct page *page) | ||
253 | { | ||
254 | return __tlb_remove_page(tlb, page); | ||
255 | } | ||
256 | |||
257 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 249 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
258 | struct page *page, int page_size) | 250 | struct page *page, int page_size) |
259 | { | 251 | { |
@@ -283,6 +275,15 @@ do { \ | |||
283 | __tlb_remove_tlb_entry(tlb, ptep, addr); \ | 275 | __tlb_remove_tlb_entry(tlb, ptep, addr); \ |
284 | } while (0) | 276 | } while (0) |
285 | 277 | ||
278 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
279 | tlb_remove_tlb_entry(tlb, ptep, address) | ||
280 | |||
281 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
282 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
283 | unsigned int page_size) | ||
284 | { | ||
285 | } | ||
286 | |||
286 | #define pte_free_tlb(tlb, ptep, address) \ | 287 | #define pte_free_tlb(tlb, ptep, address) \ |
287 | do { \ | 288 | do { \ |
288 | tlb->need_flush = 1; \ | 289 | tlb->need_flush = 1; \ |
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 3cc8498fe0fe..d227a6988d6b 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig | |||
@@ -34,7 +34,7 @@ config NO_IOPORT_MAP | |||
34 | def_bool y | 34 | def_bool y |
35 | 35 | ||
36 | config NO_DMA | 36 | config NO_DMA |
37 | def_bool y | 37 | def_bool n |
38 | 38 | ||
39 | config HZ | 39 | config HZ |
40 | int | 40 | int |
diff --git a/arch/m32r/include/asm/device.h b/arch/m32r/include/asm/device.h index d8f9872b0e2d..4a9f35e0973f 100644 --- a/arch/m32r/include/asm/device.h +++ b/arch/m32r/include/asm/device.h | |||
@@ -3,5 +3,9 @@ | |||
3 | * | 3 | * |
4 | * This file is released under the GPLv2 | 4 | * This file is released under the GPLv2 |
5 | */ | 5 | */ |
6 | #include <asm-generic/device.h> | 6 | struct dev_archdata { |
7 | struct dma_map_ops *dma_ops; | ||
8 | }; | ||
7 | 9 | ||
10 | struct pdev_archdata { | ||
11 | }; | ||
diff --git a/arch/m32r/include/asm/dma-mapping.h b/arch/m32r/include/asm/dma-mapping.h new file mode 100644 index 000000000000..2c43a77fe942 --- /dev/null +++ b/arch/m32r/include/asm/dma-mapping.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef _ASM_M32R_DMA_MAPPING_H | ||
2 | #define _ASM_M32R_DMA_MAPPING_H | ||
3 | |||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/scatterlist.h> | ||
8 | #include <linux/dma-debug.h> | ||
9 | #include <linux/io.h> | ||
10 | |||
11 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) | ||
12 | |||
13 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) | ||
14 | { | ||
15 | if (dev && dev->archdata.dma_ops) | ||
16 | return dev->archdata.dma_ops; | ||
17 | return &dma_noop_ops; | ||
18 | } | ||
19 | |||
20 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | ||
21 | enum dma_data_direction direction) | ||
22 | { | ||
23 | } | ||
24 | |||
25 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | ||
26 | { | ||
27 | if (!dev->dma_mask) | ||
28 | return false; | ||
29 | return addr + size - 1 <= *dev->dma_mask; | ||
30 | } | ||
31 | |||
32 | #endif /* _ASM_M32R_DMA_MAPPING_H */ | ||
diff --git a/arch/m32r/platforms/m32700ut/setup.c b/arch/m32r/platforms/m32700ut/setup.c index 9a4ba8a8589d..349eb341752c 100644 --- a/arch/m32r/platforms/m32700ut/setup.c +++ b/arch/m32r/platforms/m32700ut/setup.c | |||
@@ -201,6 +201,7 @@ static struct irq_chip m32700ut_lanpld_irq_type = | |||
201 | #define lcdpldirq2port(x) (unsigned long)((int)M32700UT_LCD_ICUCR1 + \ | 201 | #define lcdpldirq2port(x) (unsigned long)((int)M32700UT_LCD_ICUCR1 + \ |
202 | (((x) - 1) * sizeof(unsigned short))) | 202 | (((x) - 1) * sizeof(unsigned short))) |
203 | 203 | ||
204 | #ifdef CONFIG_USB | ||
204 | static pld_icu_data_t lcdpld_icu_data[M32700UT_NUM_LCD_PLD_IRQ]; | 205 | static pld_icu_data_t lcdpld_icu_data[M32700UT_NUM_LCD_PLD_IRQ]; |
205 | 206 | ||
206 | static void disable_m32700ut_lcdpld_irq(unsigned int irq) | 207 | static void disable_m32700ut_lcdpld_irq(unsigned int irq) |
@@ -253,6 +254,7 @@ static struct irq_chip m32700ut_lcdpld_irq_type = | |||
253 | .irq_mask = mask_m32700ut_lcdpld, | 254 | .irq_mask = mask_m32700ut_lcdpld, |
254 | .irq_unmask = unmask_m32700ut_lcdpld, | 255 | .irq_unmask = unmask_m32700ut_lcdpld, |
255 | }; | 256 | }; |
257 | #endif | ||
256 | 258 | ||
257 | void __init init_IRQ(void) | 259 | void __init init_IRQ(void) |
258 | { | 260 | { |
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..0ebfbc8f0449 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h | |||
@@ -1009,7 +1009,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, | |||
1009 | #define pmd_move_must_withdraw pmd_move_must_withdraw | 1009 | #define pmd_move_must_withdraw pmd_move_must_withdraw |
1010 | struct spinlock; | 1010 | struct spinlock; |
1011 | static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, | 1011 | static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, |
1012 | struct spinlock *old_pmd_ptl) | 1012 | struct spinlock *old_pmd_ptl, |
1013 | struct vm_area_struct *vma) | ||
1013 | { | 1014 | { |
1014 | if (radix_enabled()) | 1015 | if (radix_enabled()) |
1015 | return false; | 1016 | return false; |
@@ -1020,6 +1021,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, | |||
1020 | */ | 1021 | */ |
1021 | return true; | 1022 | return true; |
1022 | } | 1023 | } |
1024 | |||
1025 | |||
1026 | #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit | ||
1027 | static inline bool arch_needs_pgtable_deposit(void) | ||
1028 | { | ||
1029 | if (radix_enabled()) | ||
1030 | return false; | ||
1031 | return true; | ||
1032 | } | ||
1033 | |||
1023 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1034 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1024 | #endif /* __ASSEMBLY__ */ | 1035 | #endif /* __ASSEMBLY__ */ |
1025 | #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ | 1036 | #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ |
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 99e1397b71da..609557569f65 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define tlb_start_vma(tlb, vma) do { } while (0) | 28 | #define tlb_start_vma(tlb, vma) do { } while (0) |
29 | #define tlb_end_vma(tlb, vma) do { } while (0) | 29 | #define tlb_end_vma(tlb, vma) do { } while (0) |
30 | #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry | 30 | #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry |
31 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
31 | 32 | ||
32 | extern void tlb_flush(struct mmu_gather *tlb); | 33 | extern void tlb_flush(struct mmu_gather *tlb); |
33 | 34 | ||
@@ -46,6 +47,21 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, | |||
46 | #endif | 47 | #endif |
47 | } | 48 | } |
48 | 49 | ||
50 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
51 | unsigned int page_size) | ||
52 | { | ||
53 | if (!tlb->page_size) | ||
54 | tlb->page_size = page_size; | ||
55 | else if (tlb->page_size != page_size) { | ||
56 | tlb_flush_mmu(tlb); | ||
57 | /* | ||
58 | * update the page size after flush for the new | ||
59 | * mmu_gather. | ||
60 | */ | ||
61 | tlb->page_size = page_size; | ||
62 | } | ||
63 | } | ||
64 | |||
49 | #ifdef CONFIG_SMP | 65 | #ifdef CONFIG_SMP |
50 | static inline int mm_is_core_local(struct mm_struct *mm) | 66 | static inline int mm_is_core_local(struct mm_struct *mm) |
51 | { | 67 | { |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a51c188b81f3..0cb6bd8bfccf 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -1085,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr) | |||
1085 | int hot_add_scn_to_nid(unsigned long scn_addr) | 1085 | int hot_add_scn_to_nid(unsigned long scn_addr) |
1086 | { | 1086 | { |
1087 | struct device_node *memory = NULL; | 1087 | struct device_node *memory = NULL; |
1088 | int nid, found = 0; | 1088 | int nid; |
1089 | 1089 | ||
1090 | if (!numa_enabled || (min_common_depth < 0)) | 1090 | if (!numa_enabled || (min_common_depth < 0)) |
1091 | return first_online_node; | 1091 | return first_online_node; |
@@ -1101,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
1101 | if (nid < 0 || !node_online(nid)) | 1101 | if (nid < 0 || !node_online(nid)) |
1102 | nid = first_online_node; | 1102 | nid = first_online_node; |
1103 | 1103 | ||
1104 | if (NODE_DATA(nid)->node_spanned_pages) | ||
1105 | return nid; | ||
1106 | |||
1107 | for_each_online_node(nid) { | ||
1108 | if (NODE_DATA(nid)->node_spanned_pages) { | ||
1109 | found = 1; | ||
1110 | break; | ||
1111 | } | ||
1112 | } | ||
1113 | |||
1114 | BUG_ON(!found); | ||
1115 | return nid; | 1104 | return nid; |
1116 | } | 1105 | } |
1117 | 1106 | ||
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 15711de10403..853b2a3d8dee 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h | |||
@@ -104,12 +104,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | |||
104 | return __tlb_remove_page(tlb, page); | 104 | return __tlb_remove_page(tlb, page); |
105 | } | 105 | } |
106 | 106 | ||
107 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, | ||
108 | struct page *page) | ||
109 | { | ||
110 | return __tlb_remove_page(tlb, page); | ||
111 | } | ||
112 | |||
113 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 107 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
114 | struct page *page, int page_size) | 108 | struct page *page, int page_size) |
115 | { | 109 | { |
@@ -162,5 +156,13 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | |||
162 | #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) | 156 | #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) |
163 | #define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) | 157 | #define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) |
164 | #define tlb_migrate_finish(mm) do { } while (0) | 158 | #define tlb_migrate_finish(mm) do { } while (0) |
159 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
160 | tlb_remove_tlb_entry(tlb, ptep, address) | ||
161 | |||
162 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
163 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
164 | unsigned int page_size) | ||
165 | { | ||
166 | } | ||
165 | 167 | ||
166 | #endif /* _S390_TLB_H */ | 168 | #endif /* _S390_TLB_H */ |
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 3ba622702ce4..ec1f0dedb948 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c | |||
@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, | |||
1015 | if (slot) { | 1015 | if (slot) { |
1016 | rmap->next = radix_tree_deref_slot_protected(slot, | 1016 | rmap->next = radix_tree_deref_slot_protected(slot, |
1017 | &sg->guest_table_lock); | 1017 | &sg->guest_table_lock); |
1018 | radix_tree_replace_slot(slot, rmap); | 1018 | radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); |
1019 | } else { | 1019 | } else { |
1020 | rmap->next = NULL; | 1020 | rmap->next = NULL; |
1021 | radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, | 1021 | radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, |
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 025cdb1032f6..46e0d635e36f 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h | |||
@@ -65,6 +65,9 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) | |||
65 | tlb->end = address + PAGE_SIZE; | 65 | tlb->end = address + PAGE_SIZE; |
66 | } | 66 | } |
67 | 67 | ||
68 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
69 | tlb_remove_tlb_entry(tlb, ptep, address) | ||
70 | |||
68 | /* | 71 | /* |
69 | * In the case of tlb vma handling, we can optimise these away in the | 72 | * In the case of tlb vma handling, we can optimise these away in the |
70 | * case where we're doing a full MM flush. When we're doing a munmap, | 73 | * case where we're doing a full MM flush. When we're doing a munmap, |
@@ -115,18 +118,18 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | |||
115 | return __tlb_remove_page(tlb, page); | 118 | return __tlb_remove_page(tlb, page); |
116 | } | 119 | } |
117 | 120 | ||
118 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, | ||
119 | struct page *page) | ||
120 | { | ||
121 | return __tlb_remove_page(tlb, page); | ||
122 | } | ||
123 | |||
124 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 121 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
125 | struct page *page, int page_size) | 122 | struct page *page, int page_size) |
126 | { | 123 | { |
127 | return tlb_remove_page(tlb, page); | 124 | return tlb_remove_page(tlb, page); |
128 | } | 125 | } |
129 | 126 | ||
127 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
128 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
129 | unsigned int page_size) | ||
130 | { | ||
131 | } | ||
132 | |||
130 | #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) | 133 | #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) |
131 | #define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) | 134 | #define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) |
132 | #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) | 135 | #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) |
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 821ff0acfe17..600a2e9bfee2 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h | |||
@@ -116,12 +116,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, | |||
116 | return __tlb_remove_page(tlb, page); | 116 | return __tlb_remove_page(tlb, page); |
117 | } | 117 | } |
118 | 118 | ||
119 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, | ||
120 | struct page *page) | ||
121 | { | ||
122 | return __tlb_remove_page(tlb, page); | ||
123 | } | ||
124 | |||
125 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 119 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
126 | struct page *page, int page_size) | 120 | struct page *page, int page_size) |
127 | { | 121 | { |
@@ -141,6 +135,15 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, | |||
141 | __tlb_remove_tlb_entry(tlb, ptep, address); \ | 135 | __tlb_remove_tlb_entry(tlb, ptep, address); \ |
142 | } while (0) | 136 | } while (0) |
143 | 137 | ||
138 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
139 | tlb_remove_tlb_entry(tlb, ptep, address) | ||
140 | |||
141 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
142 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
143 | unsigned int page_size) | ||
144 | { | ||
145 | } | ||
146 | |||
144 | #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) | 147 | #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) |
145 | 148 | ||
146 | #define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr) | 149 | #define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr) |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index f09df2ff1bcc..d4a15831ac58 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -93,7 +93,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) | |||
93 | 93 | ||
94 | paravirt_free_ldt(ldt->entries, ldt->size); | 94 | paravirt_free_ldt(ldt->entries, ldt->size); |
95 | if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) | 95 | if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) |
96 | vfree(ldt->entries); | 96 | vfree_atomic(ldt->entries); |
97 | else | 97 | else |
98 | free_page((unsigned long)ldt->entries); | 98 | free_page((unsigned long)ldt->entries); |
99 | kfree(ldt); | 99 | kfree(ldt); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9c337b0e8ba7..4cfba947d774 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -985,6 +985,30 @@ void __init setup_arch(char **cmdline_p) | |||
985 | 985 | ||
986 | parse_early_param(); | 986 | parse_early_param(); |
987 | 987 | ||
988 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
989 | /* | ||
990 | * Memory used by the kernel cannot be hot-removed because Linux | ||
991 | * cannot migrate the kernel pages. When memory hotplug is | ||
992 | * enabled, we should prevent memblock from allocating memory | ||
993 | * for the kernel. | ||
994 | * | ||
995 | * ACPI SRAT records all hotpluggable memory ranges. But before | ||
996 | * SRAT is parsed, we don't know about it. | ||
997 | * | ||
998 | * The kernel image is loaded into memory at very early time. We | ||
999 | * cannot prevent this anyway. So on NUMA system, we set any | ||
1000 | * node the kernel resides in as un-hotpluggable. | ||
1001 | * | ||
1002 | * Since on modern servers, one node could have double-digit | ||
1003 | * gigabytes memory, we can assume the memory around the kernel | ||
1004 | * image is also un-hotpluggable. So before SRAT is parsed, just | ||
1005 | * allocate memory near the kernel image to try the best to keep | ||
1006 | * the kernel away from hotpluggable memory. | ||
1007 | */ | ||
1008 | if (movable_node_is_enabled()) | ||
1009 | memblock_set_bottom_up(true); | ||
1010 | #endif | ||
1011 | |||
988 | x86_report_nx(); | 1012 | x86_report_nx(); |
989 | 1013 | ||
990 | /* after early param, so could get panic from serial */ | 1014 | /* after early param, so could get panic from serial */ |
diff --git a/block/blk-settings.c b/block/blk-settings.c index f679ae122843..65f16cf4f850 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -249,6 +249,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto | |||
249 | max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); | 249 | max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); |
250 | max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); | 250 | max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); |
251 | limits->max_sectors = max_sectors; | 251 | limits->max_sectors = max_sectors; |
252 | q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9); | ||
252 | } | 253 | } |
253 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); | 254 | EXPORT_SYMBOL(blk_queue_max_hw_sectors); |
254 | 255 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9cc8d7c5439a..ea374e820775 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -212,6 +212,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | |||
212 | 212 | ||
213 | spin_lock_irq(q->queue_lock); | 213 | spin_lock_irq(q->queue_lock); |
214 | q->limits.max_sectors = max_sectors_kb << 1; | 214 | q->limits.max_sectors = max_sectors_kb << 1; |
215 | q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); | ||
215 | spin_unlock_irq(q->queue_lock); | 216 | spin_unlock_irq(q->queue_lock); |
216 | 217 | ||
217 | return ret; | 218 | return ret; |
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c89d5d231a0e..c9b5cac03b36 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c | |||
@@ -1015,6 +1015,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, | |||
1015 | const char *type = of_get_flat_dt_prop(node, "device_type", NULL); | 1015 | const char *type = of_get_flat_dt_prop(node, "device_type", NULL); |
1016 | const __be32 *reg, *endp; | 1016 | const __be32 *reg, *endp; |
1017 | int l; | 1017 | int l; |
1018 | bool hotpluggable; | ||
1018 | 1019 | ||
1019 | /* We are scanning "memory" nodes only */ | 1020 | /* We are scanning "memory" nodes only */ |
1020 | if (type == NULL) { | 1021 | if (type == NULL) { |
@@ -1034,6 +1035,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, | |||
1034 | return 0; | 1035 | return 0; |
1035 | 1036 | ||
1036 | endp = reg + (l / sizeof(__be32)); | 1037 | endp = reg + (l / sizeof(__be32)); |
1038 | hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL); | ||
1037 | 1039 | ||
1038 | pr_debug("memory scan node %s, reg size %d,\n", uname, l); | 1040 | pr_debug("memory scan node %s, reg size %d,\n", uname, l); |
1039 | 1041 | ||
@@ -1049,6 +1051,13 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, | |||
1049 | (unsigned long long)size); | 1051 | (unsigned long long)size); |
1050 | 1052 | ||
1051 | early_init_dt_add_memory_arch(base, size); | 1053 | early_init_dt_add_memory_arch(base, size); |
1054 | |||
1055 | if (!hotpluggable) | ||
1056 | continue; | ||
1057 | |||
1058 | if (early_init_dt_mark_hotplug_memory_arch(base, size)) | ||
1059 | pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n", | ||
1060 | base, base + size); | ||
1052 | } | 1061 | } |
1053 | 1062 | ||
1054 | return 0; | 1063 | return 0; |
@@ -1146,6 +1155,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) | |||
1146 | memblock_add(base, size); | 1155 | memblock_add(base, size); |
1147 | } | 1156 | } |
1148 | 1157 | ||
1158 | int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size) | ||
1159 | { | ||
1160 | return memblock_mark_hotplug(base, size); | ||
1161 | } | ||
1162 | |||
1149 | int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, | 1163 | int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, |
1150 | phys_addr_t size, bool nomap) | 1164 | phys_addr_t size, bool nomap) |
1151 | { | 1165 | { |
@@ -1168,6 +1182,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) | |||
1168 | WARN_ON(1); | 1182 | WARN_ON(1); |
1169 | } | 1183 | } |
1170 | 1184 | ||
1185 | int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size) | ||
1186 | { | ||
1187 | return -ENOSYS; | ||
1188 | } | ||
1189 | |||
1171 | int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, | 1190 | int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, |
1172 | phys_addr_t size, bool nomap) | 1191 | phys_addr_t size, bool nomap) |
1173 | { | 1192 | { |
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c index eb126b98ed8a..e50bbf826188 100644 --- a/drivers/pcmcia/m32r_pcc.c +++ b/drivers/pcmcia/m32r_pcc.c | |||
@@ -296,10 +296,11 @@ static int __init is_alive(u_short sock) | |||
296 | return 0; | 296 | return 0; |
297 | } | 297 | } |
298 | 298 | ||
299 | static void add_pcc_socket(ulong base, int irq, ulong mapaddr, | 299 | static int add_pcc_socket(ulong base, int irq, ulong mapaddr, |
300 | unsigned int ioaddr) | 300 | unsigned int ioaddr) |
301 | { | 301 | { |
302 | pcc_socket_t *t = &socket[pcc_sockets]; | 302 | pcc_socket_t *t = &socket[pcc_sockets]; |
303 | int err; | ||
303 | 304 | ||
304 | /* add sockets */ | 305 | /* add sockets */ |
305 | t->ioaddr = ioaddr; | 306 | t->ioaddr = ioaddr; |
@@ -328,11 +329,16 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr, | |||
328 | t->socket.irq_mask = 0; | 329 | t->socket.irq_mask = 0; |
329 | t->socket.pci_irq = 2 + pcc_sockets; /* XXX */ | 330 | t->socket.pci_irq = 2 + pcc_sockets; /* XXX */ |
330 | 331 | ||
331 | request_irq(irq, pcc_interrupt, 0, "m32r-pcc", pcc_interrupt); | 332 | err = request_irq(irq, pcc_interrupt, 0, "m32r-pcc", pcc_interrupt); |
333 | if (err) { | ||
334 | if (t->base > 0) | ||
335 | release_region(t->base, 0x20); | ||
336 | return err; | ||
337 | } | ||
332 | 338 | ||
333 | pcc_sockets++; | 339 | pcc_sockets++; |
334 | 340 | ||
335 | return; | 341 | return 0; |
336 | } | 342 | } |
337 | 343 | ||
338 | 344 | ||
@@ -683,26 +689,29 @@ static int __init init_m32r_pcc(void) | |||
683 | return ret; | 689 | return ret; |
684 | 690 | ||
685 | ret = platform_device_register(&pcc_device); | 691 | ret = platform_device_register(&pcc_device); |
686 | if (ret){ | 692 | if (ret) |
687 | platform_driver_unregister(&pcc_driver); | 693 | goto unreg_driv; |
688 | return ret; | ||
689 | } | ||
690 | 694 | ||
691 | printk(KERN_INFO "m32r PCC probe:\n"); | 695 | printk(KERN_INFO "m32r PCC probe:\n"); |
692 | 696 | ||
693 | pcc_sockets = 0; | 697 | pcc_sockets = 0; |
694 | 698 | ||
695 | add_pcc_socket(M32R_PCC0_BASE, PCC0_IRQ, M32R_PCC0_MAPBASE, 0x1000); | 699 | ret = add_pcc_socket(M32R_PCC0_BASE, PCC0_IRQ, M32R_PCC0_MAPBASE, |
700 | 0x1000); | ||
701 | if (ret) | ||
702 | goto unreg_dev; | ||
696 | 703 | ||
697 | #ifdef CONFIG_M32RPCC_SLOT2 | 704 | #ifdef CONFIG_M32RPCC_SLOT2 |
698 | add_pcc_socket(M32R_PCC1_BASE, PCC1_IRQ, M32R_PCC1_MAPBASE, 0x2000); | 705 | ret = add_pcc_socket(M32R_PCC1_BASE, PCC1_IRQ, M32R_PCC1_MAPBASE, |
706 | 0x2000); | ||
707 | if (ret) | ||
708 | goto unreg_dev; | ||
699 | #endif | 709 | #endif |
700 | 710 | ||
701 | if (pcc_sockets == 0) { | 711 | if (pcc_sockets == 0) { |
702 | printk("socket is not found.\n"); | 712 | printk("socket is not found.\n"); |
703 | platform_device_unregister(&pcc_device); | 713 | ret = -ENODEV; |
704 | platform_driver_unregister(&pcc_driver); | 714 | goto unreg_dev; |
705 | return -ENODEV; | ||
706 | } | 715 | } |
707 | 716 | ||
708 | /* Set up interrupt handler(s) */ | 717 | /* Set up interrupt handler(s) */ |
@@ -728,6 +737,12 @@ static int __init init_m32r_pcc(void) | |||
728 | } | 737 | } |
729 | 738 | ||
730 | return 0; | 739 | return 0; |
740 | |||
741 | unreg_dev: | ||
742 | platform_device_unregister(&pcc_device); | ||
743 | unreg_driv: | ||
744 | platform_driver_unregister(&pcc_driver); | ||
745 | return ret; | ||
731 | } /* init_m32r_pcc */ | 746 | } /* init_m32r_pcc */ |
732 | 747 | ||
733 | static void __exit exit_m32r_pcc(void) | 748 | static void __exit exit_m32r_pcc(void) |
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c index e7899624aa0b..35bbe288ddb4 100644 --- a/drivers/sh/intc/virq.c +++ b/drivers/sh/intc/virq.c | |||
@@ -254,7 +254,7 @@ restart: | |||
254 | 254 | ||
255 | radix_tree_tag_clear(&d->tree, entry->enum_id, | 255 | radix_tree_tag_clear(&d->tree, entry->enum_id, |
256 | INTC_TAG_VIRQ_NEEDS_ALLOC); | 256 | INTC_TAG_VIRQ_NEEDS_ALLOC); |
257 | radix_tree_replace_slot((void **)entries[i], | 257 | radix_tree_replace_slot(&d->tree, (void **)entries[i], |
258 | &intc_irq_xlate[irq]); | 258 | &intc_irq_xlate[irq]); |
259 | } | 259 | } |
260 | 260 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2472af2798c7..e6c1bd443806 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -2204,7 +2204,9 @@ static int elf_core_dump(struct coredump_params *cprm) | |||
2204 | 2204 | ||
2205 | dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); | 2205 | dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); |
2206 | 2206 | ||
2207 | vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL); | 2207 | if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) |
2208 | goto end_coredump; | ||
2209 | vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz)); | ||
2208 | if (!vma_filesz) | 2210 | if (!vma_filesz) |
2209 | goto end_coredump; | 2211 | goto end_coredump; |
2210 | 2212 | ||
@@ -2311,7 +2313,7 @@ end_coredump: | |||
2311 | cleanup: | 2313 | cleanup: |
2312 | free_note_info(&info); | 2314 | free_note_info(&info); |
2313 | kfree(shdr4extnum); | 2315 | kfree(shdr4extnum); |
2314 | kfree(vma_filesz); | 2316 | vfree(vma_filesz); |
2315 | kfree(phdr4note); | 2317 | kfree(phdr4note); |
2316 | kfree(elf); | 2318 | kfree(elf); |
2317 | out: | 2319 | out: |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74ed5aae6cea..180f910339f4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = { | |||
202 | void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | 202 | void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) |
203 | { | 203 | { |
204 | struct super_block *sb = fs_info->sb; | 204 | struct super_block *sb = fs_info->sb; |
205 | char lvl[4]; | 205 | char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1]; |
206 | struct va_format vaf; | 206 | struct va_format vaf; |
207 | va_list args; | 207 | va_list args; |
208 | const char *type = logtypes[4]; | 208 | const char *type = NULL; |
209 | int kern_level; | 209 | int kern_level; |
210 | struct ratelimit_state *ratelimit; | 210 | struct ratelimit_state *ratelimit; |
211 | 211 | ||
212 | va_start(args, fmt); | 212 | va_start(args, fmt); |
213 | 213 | ||
214 | kern_level = printk_get_level(fmt); | 214 | while ((kern_level = printk_get_level(fmt)) != 0) { |
215 | if (kern_level) { | ||
216 | size_t size = printk_skip_level(fmt) - fmt; | 215 | size_t size = printk_skip_level(fmt) - fmt; |
217 | memcpy(lvl, fmt, size); | 216 | |
218 | lvl[size] = '\0'; | 217 | if (kern_level >= '0' && kern_level <= '7') { |
218 | memcpy(lvl, fmt, size); | ||
219 | lvl[size] = '\0'; | ||
220 | type = logtypes[kern_level - '0']; | ||
221 | ratelimit = &printk_limits[kern_level - '0']; | ||
222 | } | ||
219 | fmt += size; | 223 | fmt += size; |
220 | type = logtypes[kern_level - '0']; | 224 | } |
221 | ratelimit = &printk_limits[kern_level - '0']; | 225 | |
222 | } else { | 226 | if (!type) { |
223 | *lvl = '\0'; | 227 | *lvl = '\0'; |
224 | /* Default to debug output */ | 228 | type = logtypes[4]; |
225 | ratelimit = &printk_limits[7]; | 229 | ratelimit = &printk_limits[4]; |
226 | } | 230 | } |
227 | 231 | ||
228 | vaf.fmt = fmt; | 232 | vaf.fmt = fmt; |
@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) | |||
342 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 342 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); |
343 | 343 | ||
344 | entry |= RADIX_DAX_ENTRY_LOCK; | 344 | entry |= RADIX_DAX_ENTRY_LOCK; |
345 | radix_tree_replace_slot(slot, (void *)entry); | 345 | radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); |
346 | return (void *)entry; | 346 | return (void *)entry; |
347 | } | 347 | } |
348 | 348 | ||
@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) | |||
356 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 356 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); |
357 | 357 | ||
358 | entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; | 358 | entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; |
359 | radix_tree_replace_slot(slot, (void *)entry); | 359 | radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); |
360 | return (void *)entry; | 360 | return (void *)entry; |
361 | } | 361 | } |
362 | 362 | ||
@@ -643,12 +643,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
643 | } | 643 | } |
644 | mapping->nrexceptional++; | 644 | mapping->nrexceptional++; |
645 | } else { | 645 | } else { |
646 | struct radix_tree_node *node; | ||
646 | void **slot; | 647 | void **slot; |
647 | void *ret; | 648 | void *ret; |
648 | 649 | ||
649 | ret = __radix_tree_lookup(page_tree, index, NULL, &slot); | 650 | ret = __radix_tree_lookup(page_tree, index, &node, &slot); |
650 | WARN_ON_ONCE(ret != entry); | 651 | WARN_ON_ONCE(ret != entry); |
651 | radix_tree_replace_slot(slot, new_entry); | 652 | __radix_tree_replace(page_tree, node, slot, |
653 | new_entry, NULL, NULL); | ||
652 | } | 654 | } |
653 | if (vmf->flags & FAULT_FLAG_WRITE) | 655 | if (vmf->flags & FAULT_FLAG_WRITE) |
654 | radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); | 656 | radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05713a5da083..ef600591d96f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -1769,15 +1769,13 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
1769 | * become available for writeback. Otherwise | 1769 | * become available for writeback. Otherwise |
1770 | * we'll just busyloop. | 1770 | * we'll just busyloop. |
1771 | */ | 1771 | */ |
1772 | if (!list_empty(&wb->b_more_io)) { | 1772 | trace_writeback_wait(wb, work); |
1773 | trace_writeback_wait(wb, work); | 1773 | inode = wb_inode(wb->b_more_io.prev); |
1774 | inode = wb_inode(wb->b_more_io.prev); | 1774 | spin_lock(&inode->i_lock); |
1775 | spin_lock(&inode->i_lock); | 1775 | spin_unlock(&wb->list_lock); |
1776 | spin_unlock(&wb->list_lock); | 1776 | /* This function drops i_lock... */ |
1777 | /* This function drops i_lock... */ | 1777 | inode_sleep_on_writeback(inode); |
1778 | inode_sleep_on_writeback(inode); | 1778 | spin_lock(&wb->list_lock); |
1779 | spin_lock(&wb->list_lock); | ||
1780 | } | ||
1781 | } | 1779 | } |
1782 | spin_unlock(&wb->list_lock); | 1780 | spin_unlock(&wb->list_lock); |
1783 | blk_finish_plug(&plug); | 1781 | blk_finish_plug(&plug); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..9a88984f9f6f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1950,8 +1950,7 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, | |||
1950 | } | 1950 | } |
1951 | 1951 | ||
1952 | int ocfs2_write_end_nolock(struct address_space *mapping, | 1952 | int ocfs2_write_end_nolock(struct address_space *mapping, |
1953 | loff_t pos, unsigned len, unsigned copied, | 1953 | loff_t pos, unsigned len, unsigned copied, void *fsdata) |
1954 | struct page *page, void *fsdata) | ||
1955 | { | 1954 | { |
1956 | int i, ret; | 1955 | int i, ret; |
1957 | unsigned from, to, start = pos & (PAGE_SIZE - 1); | 1956 | unsigned from, to, start = pos & (PAGE_SIZE - 1); |
@@ -2064,7 +2063,7 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
2064 | int ret; | 2063 | int ret; |
2065 | struct inode *inode = mapping->host; | 2064 | struct inode *inode = mapping->host; |
2066 | 2065 | ||
2067 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 2066 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata); |
2068 | 2067 | ||
2069 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 2068 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
2070 | ocfs2_inode_unlock(inode, 1); | 2069 | ocfs2_inode_unlock(inode, 1); |
@@ -2241,7 +2240,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, | |||
2241 | dwc->dw_zero_count++; | 2240 | dwc->dw_zero_count++; |
2242 | } | 2241 | } |
2243 | 2242 | ||
2244 | ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc); | 2243 | ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); |
2245 | BUG_ON(ret != len); | 2244 | BUG_ON(ret != len); |
2246 | ret = 0; | 2245 | ret = 0; |
2247 | unlock: | 2246 | unlock: |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index b1c9f28a57b1..8614ff069d99 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -44,8 +44,7 @@ int walk_page_buffers( handle_t *handle, | |||
44 | struct buffer_head *bh)); | 44 | struct buffer_head *bh)); |
45 | 45 | ||
46 | int ocfs2_write_end_nolock(struct address_space *mapping, | 46 | int ocfs2_write_end_nolock(struct address_space *mapping, |
47 | loff_t pos, unsigned len, unsigned copied, | 47 | loff_t pos, unsigned len, unsigned copied, void *fsdata); |
48 | struct page *page, void *fsdata); | ||
49 | 48 | ||
50 | typedef enum { | 49 | typedef enum { |
51 | OCFS2_WRITE_BUFFER = 0, | 50 | OCFS2_WRITE_BUFFER = 0, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..9158c9825094 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -741,7 +741,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg, | |||
741 | hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block; | 741 | hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block; |
742 | memset(hb_block, 0, reg->hr_block_bytes); | 742 | memset(hb_block, 0, reg->hr_block_bytes); |
743 | /* TODO: time stuff */ | 743 | /* TODO: time stuff */ |
744 | cputime = CURRENT_TIME.tv_sec; | 744 | cputime = ktime_get_real_seconds(); |
745 | if (!cputime) | 745 | if (!cputime) |
746 | cputime = 1; | 746 | cputime = 1; |
747 | 747 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3f828a187049..a464c8088170 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -1609,8 +1609,6 @@ way_up_top: | |||
1609 | __dlm_insert_mle(dlm, mle); | 1609 | __dlm_insert_mle(dlm, mle); |
1610 | response = DLM_MASTER_RESP_NO; | 1610 | response = DLM_MASTER_RESP_NO; |
1611 | } else { | 1611 | } else { |
1612 | // mlog(0, "mle was found\n"); | ||
1613 | set_maybe = 1; | ||
1614 | spin_lock(&tmpmle->spinlock); | 1612 | spin_lock(&tmpmle->spinlock); |
1615 | if (tmpmle->master == dlm->node_num) { | 1613 | if (tmpmle->master == dlm->node_num) { |
1616 | mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); | 1614 | mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); |
@@ -1625,8 +1623,7 @@ way_up_top: | |||
1625 | response = DLM_MASTER_RESP_NO; | 1623 | response = DLM_MASTER_RESP_NO; |
1626 | } else | 1624 | } else |
1627 | response = DLM_MASTER_RESP_MAYBE; | 1625 | response = DLM_MASTER_RESP_MAYBE; |
1628 | if (set_maybe) | 1626 | set_bit(request->node_idx, tmpmle->maybe_map); |
1629 | set_bit(request->node_idx, tmpmle->maybe_map); | ||
1630 | spin_unlock(&tmpmle->spinlock); | 1627 | spin_unlock(&tmpmle->spinlock); |
1631 | } | 1628 | } |
1632 | spin_unlock(&dlm->master_lock); | 1629 | spin_unlock(&dlm->master_lock); |
@@ -1644,12 +1641,6 @@ send_response: | |||
1644 | * dlm_assert_master_worker() isn't called, we drop it here. | 1641 | * dlm_assert_master_worker() isn't called, we drop it here. |
1645 | */ | 1642 | */ |
1646 | if (dispatch_assert) { | 1643 | if (dispatch_assert) { |
1647 | if (response != DLM_MASTER_RESP_YES) | ||
1648 | mlog(ML_ERROR, "invalid response %d\n", response); | ||
1649 | if (!res) { | ||
1650 | mlog(ML_ERROR, "bad lockres while trying to assert!\n"); | ||
1651 | BUG(); | ||
1652 | } | ||
1653 | mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", | 1644 | mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", |
1654 | dlm->node_num, res->lockname.len, res->lockname.name); | 1645 | dlm->node_num, res->lockname.len, res->lockname.name); |
1655 | spin_lock(&res->spinlock); | 1646 | spin_lock(&res->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index dd5cb8bcefd1..74407c6dd592 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2966,8 +2966,6 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2966 | spin_unlock(&dlm->spinlock); | 2966 | spin_unlock(&dlm->spinlock); |
2967 | dlm_kick_recovery_thread(dlm); | 2967 | dlm_kick_recovery_thread(dlm); |
2968 | break; | 2968 | break; |
2969 | default: | ||
2970 | BUG(); | ||
2971 | } | 2969 | } |
2972 | 2970 | ||
2973 | mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", | 2971 | mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index c56a7679df93..382401d3e88f 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -703,7 +703,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
703 | goto bail_commit; | 703 | goto bail_commit; |
704 | } | 704 | } |
705 | 705 | ||
706 | di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); | 706 | di->i_dtime = cpu_to_le64(ktime_get_real_seconds()); |
707 | di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); | 707 | di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); |
708 | ocfs2_journal_dirty(handle, di_bh); | 708 | ocfs2_journal_dirty(handle, di_bh); |
709 | 709 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a244f14c6b87..d5e5fa7f0743 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1947,7 +1947,7 @@ static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1947 | */ | 1947 | */ |
1948 | seqno++; | 1948 | seqno++; |
1949 | os->os_count++; | 1949 | os->os_count++; |
1950 | os->os_scantime = CURRENT_TIME; | 1950 | os->os_scantime = ktime_get_seconds(); |
1951 | unlock: | 1951 | unlock: |
1952 | ocfs2_orphan_scan_unlock(osb, seqno); | 1952 | ocfs2_orphan_scan_unlock(osb, seqno); |
1953 | out: | 1953 | out: |
@@ -2004,7 +2004,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) | |||
2004 | struct ocfs2_orphan_scan *os; | 2004 | struct ocfs2_orphan_scan *os; |
2005 | 2005 | ||
2006 | os = &osb->osb_orphan_scan; | 2006 | os = &osb->osb_orphan_scan; |
2007 | os->os_scantime = CURRENT_TIME; | 2007 | os->os_scantime = ktime_get_seconds(); |
2008 | if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) | 2008 | if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) |
2009 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); | 2009 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); |
2010 | else { | 2010 | else { |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 71545ad4628c..429088786e93 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -120,8 +120,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
120 | ret = VM_FAULT_NOPAGE; | 120 | ret = VM_FAULT_NOPAGE; |
121 | goto out; | 121 | goto out; |
122 | } | 122 | } |
123 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | 123 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); |
124 | fsdata); | ||
125 | BUG_ON(ret != len); | 124 | BUG_ON(ret != len); |
126 | ret = VM_FAULT_LOCKED; | 125 | ret = VM_FAULT_LOCKED; |
127 | out: | 126 | out: |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8d887c75765c..3b0a10d9b36f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -516,6 +516,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
516 | struct ocfs2_extent_list *fel; | 516 | struct ocfs2_extent_list *fel; |
517 | u16 feat; | 517 | u16 feat; |
518 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 518 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
519 | struct timespec64 ts; | ||
519 | 520 | ||
520 | *new_fe_bh = NULL; | 521 | *new_fe_bh = NULL; |
521 | 522 | ||
@@ -564,10 +565,11 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
564 | fe->i_last_eb_blk = 0; | 565 | fe->i_last_eb_blk = 0; |
565 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 566 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
566 | fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); | 567 | fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); |
568 | ktime_get_real_ts64(&ts); | ||
567 | fe->i_atime = fe->i_ctime = fe->i_mtime = | 569 | fe->i_atime = fe->i_ctime = fe->i_mtime = |
568 | cpu_to_le64(CURRENT_TIME.tv_sec); | 570 | cpu_to_le64(ts.tv_sec); |
569 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = | 571 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = |
570 | cpu_to_le32(CURRENT_TIME.tv_nsec); | 572 | cpu_to_le32(ts.tv_nsec); |
571 | fe->i_dtime = 0; | 573 | fe->i_dtime = 0; |
572 | 574 | ||
573 | /* | 575 | /* |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e63af7ddfe68..7e5958b0be6b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -224,7 +224,7 @@ struct ocfs2_orphan_scan { | |||
224 | struct ocfs2_super *os_osb; | 224 | struct ocfs2_super *os_osb; |
225 | struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ | 225 | struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ |
226 | struct delayed_work os_orphan_scan_work; | 226 | struct delayed_work os_orphan_scan_work; |
227 | struct timespec os_scantime; /* time this node ran the scan */ | 227 | time64_t os_scantime; /* time this node ran the scan */ |
228 | u32 os_count; /* tracks node specific scans */ | 228 | u32 os_count; /* tracks node specific scans */ |
229 | u32 os_seqno; /* tracks cluster wide scans */ | 229 | u32 os_seqno; /* tracks cluster wide scans */ |
230 | atomic_t os_state; /* ACTIVE or INACTIVE */ | 230 | atomic_t os_state; /* ACTIVE or INACTIVE */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19238512a324..738b4ea8e990 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -478,7 +478,6 @@ again: | |||
478 | if (ret) { | 478 | if (ret) { |
479 | mlog_errno(ret); | 479 | mlog_errno(ret); |
480 | ocfs2_unlock_refcount_tree(osb, tree, rw); | 480 | ocfs2_unlock_refcount_tree(osb, tree, rw); |
481 | ocfs2_refcount_tree_put(tree); | ||
482 | goto out; | 481 | goto out; |
483 | } | 482 | } |
484 | 483 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f56fe39fab04..c894d945b084 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -337,7 +337,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
337 | out += snprintf(buf + out, len - out, "Disabled\n"); | 337 | out += snprintf(buf + out, len - out, "Disabled\n"); |
338 | else | 338 | else |
339 | out += snprintf(buf + out, len - out, "%lu seconds ago\n", | 339 | out += snprintf(buf + out, len - out, "%lu seconds ago\n", |
340 | (get_seconds() - os->os_scantime.tv_sec)); | 340 | (unsigned long)(ktime_get_seconds() - os->os_scantime)); |
341 | 341 | ||
342 | out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", | 342 | out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", |
343 | "Slots", "Num", "RecoGen"); | 343 | "Slots", "Num", "RecoGen"); |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 81818adb8e9e..51a4213afa2e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -245,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header, | |||
245 | if (sigismember(set, i+2)) x |= 2; | 245 | if (sigismember(set, i+2)) x |= 2; |
246 | if (sigismember(set, i+3)) x |= 4; | 246 | if (sigismember(set, i+3)) x |= 4; |
247 | if (sigismember(set, i+4)) x |= 8; | 247 | if (sigismember(set, i+4)) x |= 8; |
248 | seq_printf(m, "%x", x); | 248 | seq_putc(m, hex_asc[x]); |
249 | } while (i >= 4); | 249 | } while (i >= 4); |
250 | 250 | ||
251 | seq_putc(m, '\n'); | 251 | seq_putc(m, '\n'); |
@@ -342,10 +342,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) | |||
342 | 342 | ||
343 | static inline void task_seccomp(struct seq_file *m, struct task_struct *p) | 343 | static inline void task_seccomp(struct seq_file *m, struct task_struct *p) |
344 | { | 344 | { |
345 | seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p)); | ||
345 | #ifdef CONFIG_SECCOMP | 346 | #ifdef CONFIG_SECCOMP |
346 | seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); | 347 | seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); |
347 | seq_putc(m, '\n'); | ||
348 | #endif | 348 | #endif |
349 | seq_putc(m, '\n'); | ||
349 | } | 350 | } |
350 | 351 | ||
351 | static inline void task_context_switch_counts(struct seq_file *m, | 352 | static inline void task_context_switch_counts(struct seq_file *m, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index ca651ac00660..9b99df4893a4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -104,9 +104,12 @@ | |||
104 | * in /proc for a task before it execs a suid executable. | 104 | * in /proc for a task before it execs a suid executable. |
105 | */ | 105 | */ |
106 | 106 | ||
107 | static u8 nlink_tid; | ||
108 | static u8 nlink_tgid; | ||
109 | |||
107 | struct pid_entry { | 110 | struct pid_entry { |
108 | const char *name; | 111 | const char *name; |
109 | int len; | 112 | unsigned int len; |
110 | umode_t mode; | 113 | umode_t mode; |
111 | const struct inode_operations *iop; | 114 | const struct inode_operations *iop; |
112 | const struct file_operations *fop; | 115 | const struct file_operations *fop; |
@@ -139,13 +142,13 @@ struct pid_entry { | |||
139 | * Count the number of hardlinks for the pid_entry table, excluding the . | 142 | * Count the number of hardlinks for the pid_entry table, excluding the . |
140 | * and .. links. | 143 | * and .. links. |
141 | */ | 144 | */ |
142 | static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | 145 | static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, |
143 | unsigned int n) | 146 | unsigned int n) |
144 | { | 147 | { |
145 | unsigned int i; | 148 | unsigned int i; |
146 | unsigned int count; | 149 | unsigned int count; |
147 | 150 | ||
148 | count = 0; | 151 | count = 2; |
149 | for (i = 0; i < n; ++i) { | 152 | for (i = 0; i < n; ++i) { |
150 | if (S_ISDIR(entries[i].mode)) | 153 | if (S_ISDIR(entries[i].mode)) |
151 | ++count; | 154 | ++count; |
@@ -1967,7 +1970,7 @@ out: | |||
1967 | 1970 | ||
1968 | struct map_files_info { | 1971 | struct map_files_info { |
1969 | fmode_t mode; | 1972 | fmode_t mode; |
1970 | unsigned long len; | 1973 | unsigned int len; |
1971 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1974 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
1972 | }; | 1975 | }; |
1973 | 1976 | ||
@@ -2412,14 +2415,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
2412 | * Yes, it does not scale. And it should not. Don't add | 2415 | * Yes, it does not scale. And it should not. Don't add |
2413 | * new entries into /proc/<tgid>/ without very good reasons. | 2416 | * new entries into /proc/<tgid>/ without very good reasons. |
2414 | */ | 2417 | */ |
2415 | last = &ents[nents - 1]; | 2418 | last = &ents[nents]; |
2416 | for (p = ents; p <= last; p++) { | 2419 | for (p = ents; p < last; p++) { |
2417 | if (p->len != dentry->d_name.len) | 2420 | if (p->len != dentry->d_name.len) |
2418 | continue; | 2421 | continue; |
2419 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | 2422 | if (!memcmp(dentry->d_name.name, p->name, p->len)) |
2420 | break; | 2423 | break; |
2421 | } | 2424 | } |
2422 | if (p > last) | 2425 | if (p >= last) |
2423 | goto out; | 2426 | goto out; |
2424 | 2427 | ||
2425 | error = proc_pident_instantiate(dir, dentry, task, p); | 2428 | error = proc_pident_instantiate(dir, dentry, task, p); |
@@ -2444,7 +2447,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx, | |||
2444 | if (ctx->pos >= nents + 2) | 2447 | if (ctx->pos >= nents + 2) |
2445 | goto out; | 2448 | goto out; |
2446 | 2449 | ||
2447 | for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { | 2450 | for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { |
2448 | if (!proc_fill_cache(file, ctx, p->name, p->len, | 2451 | if (!proc_fill_cache(file, ctx, p->name, p->len, |
2449 | proc_pident_instantiate, task, p)) | 2452 | proc_pident_instantiate, task, p)) |
2450 | break; | 2453 | break; |
@@ -3068,8 +3071,7 @@ static int proc_pid_instantiate(struct inode *dir, | |||
3068 | inode->i_fop = &proc_tgid_base_operations; | 3071 | inode->i_fop = &proc_tgid_base_operations; |
3069 | inode->i_flags|=S_IMMUTABLE; | 3072 | inode->i_flags|=S_IMMUTABLE; |
3070 | 3073 | ||
3071 | set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, | 3074 | set_nlink(inode, nlink_tgid); |
3072 | ARRAY_SIZE(tgid_base_stuff))); | ||
3073 | 3075 | ||
3074 | d_set_d_op(dentry, &pid_dentry_operations); | 3076 | d_set_d_op(dentry, &pid_dentry_operations); |
3075 | 3077 | ||
@@ -3361,8 +3363,7 @@ static int proc_task_instantiate(struct inode *dir, | |||
3361 | inode->i_fop = &proc_tid_base_operations; | 3363 | inode->i_fop = &proc_tid_base_operations; |
3362 | inode->i_flags|=S_IMMUTABLE; | 3364 | inode->i_flags|=S_IMMUTABLE; |
3363 | 3365 | ||
3364 | set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, | 3366 | set_nlink(inode, nlink_tid); |
3365 | ARRAY_SIZE(tid_base_stuff))); | ||
3366 | 3367 | ||
3367 | d_set_d_op(dentry, &pid_dentry_operations); | 3368 | d_set_d_op(dentry, &pid_dentry_operations); |
3368 | 3369 | ||
@@ -3552,3 +3553,9 @@ static const struct file_operations proc_task_operations = { | |||
3552 | .iterate_shared = proc_task_readdir, | 3553 | .iterate_shared = proc_task_readdir, |
3553 | .llseek = generic_file_llseek, | 3554 | .llseek = generic_file_llseek, |
3554 | }; | 3555 | }; |
3556 | |||
3557 | void __init set_proc_pid_nlink(void) | ||
3558 | { | ||
3559 | nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); | ||
3560 | nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); | ||
3561 | } | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e69ebe648a34..783bc19644d1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -138,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde) | |||
138 | /* pde is locked */ | 138 | /* pde is locked */ |
139 | static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | 139 | static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) |
140 | { | 140 | { |
141 | /* | ||
142 | * close() (proc_reg_release()) can't delete an entry and proceed: | ||
143 | * ->release hook needs to be available at the right moment. | ||
144 | * | ||
145 | * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: | ||
146 | * "struct file" needs to be available at the right moment. | ||
147 | * | ||
148 | * Therefore, first process to enter this function does ->release() and | ||
149 | * signals its completion to the other process which does nothing. | ||
150 | */ | ||
141 | if (pdeo->closing) { | 151 | if (pdeo->closing) { |
142 | /* somebody else is doing that, just wait */ | 152 | /* somebody else is doing that, just wait */ |
143 | DECLARE_COMPLETION_ONSTACK(c); | 153 | DECLARE_COMPLETION_ONSTACK(c); |
@@ -147,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | |||
147 | spin_lock(&pde->pde_unload_lock); | 157 | spin_lock(&pde->pde_unload_lock); |
148 | } else { | 158 | } else { |
149 | struct file *file; | 159 | struct file *file; |
150 | pdeo->closing = 1; | 160 | pdeo->closing = true; |
151 | spin_unlock(&pde->pde_unload_lock); | 161 | spin_unlock(&pde->pde_unload_lock); |
152 | file = pdeo->file; | 162 | file = pdeo->file; |
153 | pde->proc_fops->release(file_inode(file), file); | 163 | pde->proc_fops->release(file_inode(file), file); |
154 | spin_lock(&pde->pde_unload_lock); | 164 | spin_lock(&pde->pde_unload_lock); |
155 | list_del_init(&pdeo->lh); | 165 | /* After ->release. */ |
166 | list_del(&pdeo->lh); | ||
156 | if (pdeo->c) | 167 | if (pdeo->c) |
157 | complete(pdeo->c); | 168 | complete(pdeo->c); |
158 | kfree(pdeo); | 169 | kfree(pdeo); |
@@ -167,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de) | |||
167 | if (atomic_add_return(BIAS, &de->in_use) != BIAS) | 178 | if (atomic_add_return(BIAS, &de->in_use) != BIAS) |
168 | wait_for_completion(&c); | 179 | wait_for_completion(&c); |
169 | 180 | ||
181 | /* ->pde_openers list can't grow from now on. */ | ||
182 | |||
170 | spin_lock(&de->pde_unload_lock); | 183 | spin_lock(&de->pde_unload_lock); |
171 | while (!list_empty(&de->pde_openers)) { | 184 | while (!list_empty(&de->pde_openers)) { |
172 | struct pde_opener *pdeo; | 185 | struct pde_opener *pdeo; |
@@ -312,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
312 | struct pde_opener *pdeo; | 325 | struct pde_opener *pdeo; |
313 | 326 | ||
314 | /* | 327 | /* |
315 | * What for, you ask? Well, we can have open, rmmod, remove_proc_entry | 328 | * Ensure that |
316 | * sequence. ->release won't be called because ->proc_fops will be | 329 | * 1) PDE's ->release hook will be called no matter what |
317 | * cleared. Depending on complexity of ->release, consequences vary. | 330 | * either normally by close()/->release, or forcefully by |
331 | * rmmod/remove_proc_entry. | ||
332 | * | ||
333 | * 2) rmmod isn't blocked by opening file in /proc and sitting on | ||
334 | * the descriptor (including "rmmod foo </proc/foo" scenario). | ||
318 | * | 335 | * |
319 | * We can't wait for mercy when close will be done for real, it's | 336 | * Save every "struct file" with custom ->release hook. |
320 | * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release | ||
321 | * by hand in remove_proc_entry(). For this, save opener's credentials | ||
322 | * for later. | ||
323 | */ | 337 | */ |
324 | pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL); | 338 | pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); |
325 | if (!pdeo) | 339 | if (!pdeo) |
326 | return -ENOMEM; | 340 | return -ENOMEM; |
327 | 341 | ||
@@ -338,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
338 | if (rv == 0 && release) { | 352 | if (rv == 0 && release) { |
339 | /* To know what to release. */ | 353 | /* To know what to release. */ |
340 | pdeo->file = file; | 354 | pdeo->file = file; |
341 | /* Strictly for "too late" ->release in proc_reg_release(). */ | 355 | pdeo->closing = false; |
356 | pdeo->c = NULL; | ||
342 | spin_lock(&pde->pde_unload_lock); | 357 | spin_lock(&pde->pde_unload_lock); |
343 | list_add(&pdeo->lh, &pde->pde_openers); | 358 | list_add(&pdeo->lh, &pde->pde_openers); |
344 | spin_unlock(&pde->pde_unload_lock); | 359 | spin_unlock(&pde->pde_unload_lock); |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5378441ec1b7..bbba5d22aada 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -203,7 +203,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name); | |||
203 | struct pde_opener { | 203 | struct pde_opener { |
204 | struct file *file; | 204 | struct file *file; |
205 | struct list_head lh; | 205 | struct list_head lh; |
206 | int closing; | 206 | bool closing; |
207 | struct completion *c; | 207 | struct completion *c; |
208 | }; | 208 | }; |
209 | extern const struct inode_operations proc_link_inode_operations; | 209 | extern const struct inode_operations proc_link_inode_operations; |
@@ -211,6 +211,7 @@ extern const struct inode_operations proc_link_inode_operations; | |||
211 | extern const struct inode_operations proc_pid_link_inode_operations; | 211 | extern const struct inode_operations proc_pid_link_inode_operations; |
212 | 212 | ||
213 | extern void proc_init_inodecache(void); | 213 | extern void proc_init_inodecache(void); |
214 | void set_proc_pid_nlink(void); | ||
214 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 215 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
215 | extern int proc_fill_super(struct super_block *, void *data, int flags); | 216 | extern int proc_fill_super(struct super_block *, void *data, int flags); |
216 | extern void proc_entry_rundown(struct proc_dir_entry *); | 217 | extern void proc_entry_rundown(struct proc_dir_entry *); |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 8d3e484055a6..4bd0373576b5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -122,6 +122,7 @@ void __init proc_root_init(void) | |||
122 | int err; | 122 | int err; |
123 | 123 | ||
124 | proc_init_inodecache(); | 124 | proc_init_inodecache(); |
125 | set_proc_pid_nlink(); | ||
125 | err = register_filesystem(&proc_fs_type); | 126 | err = register_filesystem(&proc_fs_type); |
126 | if (err) | 127 | if (err) |
127 | return; | 128 | return; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 35b92d81692f..958f32545064 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1588,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1588 | 1588 | ||
1589 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1589 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1590 | pte_unmap_unlock(orig_pte, ptl); | 1590 | pte_unmap_unlock(orig_pte, ptl); |
1591 | cond_resched(); | ||
1591 | return 0; | 1592 | return 0; |
1592 | } | 1593 | } |
1593 | #ifdef CONFIG_HUGETLB_PAGE | 1594 | #ifdef CONFIG_HUGETLB_PAGE |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 41b95d82a185..18af2bcefe6a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -652,18 +652,9 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) | |||
652 | } | 652 | } |
653 | #endif | 653 | #endif |
654 | 654 | ||
655 | #ifndef pmd_move_must_withdraw | 655 | #ifndef arch_needs_pgtable_deposit |
656 | static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, | 656 | #define arch_needs_pgtable_deposit() (false) |
657 | spinlock_t *old_pmd_ptl) | ||
658 | { | ||
659 | /* | ||
660 | * With split pmd lock we also need to move preallocated | ||
661 | * PTE page table if new_pmd is on different PMD page table. | ||
662 | */ | ||
663 | return new_pmd_ptl != old_pmd_ptl; | ||
664 | } | ||
665 | #endif | 657 | #endif |
666 | |||
667 | /* | 658 | /* |
668 | * This function is meant to be used by sites walking pagetables with | 659 | * This function is meant to be used by sites walking pagetables with |
669 | * the mmap_sem hold in read mode to protect against MADV_DONTNEED and | 660 | * the mmap_sem hold in read mode to protect against MADV_DONTNEED and |
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index c6d667187608..7eed8cf3130a 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h | |||
@@ -107,11 +107,6 @@ struct mmu_gather { | |||
107 | struct mmu_gather_batch local; | 107 | struct mmu_gather_batch local; |
108 | struct page *__pages[MMU_GATHER_BUNDLE]; | 108 | struct page *__pages[MMU_GATHER_BUNDLE]; |
109 | unsigned int batch_count; | 109 | unsigned int batch_count; |
110 | /* | ||
111 | * __tlb_adjust_range will track the new addr here, | ||
112 | * that that we can adjust the range after the flush | ||
113 | */ | ||
114 | unsigned long addr; | ||
115 | int page_size; | 110 | int page_size; |
116 | }; | 111 | }; |
117 | 112 | ||
@@ -125,16 +120,11 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, | |||
125 | int page_size); | 120 | int page_size); |
126 | 121 | ||
127 | static inline void __tlb_adjust_range(struct mmu_gather *tlb, | 122 | static inline void __tlb_adjust_range(struct mmu_gather *tlb, |
128 | unsigned long address) | 123 | unsigned long address, |
124 | unsigned int range_size) | ||
129 | { | 125 | { |
130 | tlb->start = min(tlb->start, address); | 126 | tlb->start = min(tlb->start, address); |
131 | tlb->end = max(tlb->end, address + PAGE_SIZE); | 127 | tlb->end = max(tlb->end, address + range_size); |
132 | /* | ||
133 | * Track the last address with which we adjusted the range. This | ||
134 | * will be used later to adjust again after a mmu_flush due to | ||
135 | * failed __tlb_remove_page | ||
136 | */ | ||
137 | tlb->addr = address; | ||
138 | } | 128 | } |
139 | 129 | ||
140 | static inline void __tlb_reset_range(struct mmu_gather *tlb) | 130 | static inline void __tlb_reset_range(struct mmu_gather *tlb) |
@@ -150,15 +140,11 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) | |||
150 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, | 140 | static inline void tlb_remove_page_size(struct mmu_gather *tlb, |
151 | struct page *page, int page_size) | 141 | struct page *page, int page_size) |
152 | { | 142 | { |
153 | if (__tlb_remove_page_size(tlb, page, page_size)) { | 143 | if (__tlb_remove_page_size(tlb, page, page_size)) |
154 | tlb_flush_mmu(tlb); | 144 | tlb_flush_mmu(tlb); |
155 | tlb->page_size = page_size; | ||
156 | __tlb_adjust_range(tlb, tlb->addr); | ||
157 | __tlb_remove_page_size(tlb, page, page_size); | ||
158 | } | ||
159 | } | 145 | } |
160 | 146 | ||
161 | static bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 147 | static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
162 | { | 148 | { |
163 | return __tlb_remove_page_size(tlb, page, PAGE_SIZE); | 149 | return __tlb_remove_page_size(tlb, page, PAGE_SIZE); |
164 | } | 150 | } |
@@ -172,14 +158,21 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) | |||
172 | return tlb_remove_page_size(tlb, page, PAGE_SIZE); | 158 | return tlb_remove_page_size(tlb, page, PAGE_SIZE); |
173 | } | 159 | } |
174 | 160 | ||
175 | static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page) | 161 | #ifndef tlb_remove_check_page_size_change |
162 | #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change | ||
163 | static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | ||
164 | unsigned int page_size) | ||
176 | { | 165 | { |
177 | /* active->nr should be zero when we call this */ | 166 | /* |
178 | VM_BUG_ON_PAGE(tlb->active->nr, page); | 167 | * We don't care about page size change, just update |
179 | tlb->page_size = PAGE_SIZE; | 168 | * mmu_gather page size here so that debug checks |
180 | __tlb_adjust_range(tlb, tlb->addr); | 169 | * doesn't throw false warning. |
181 | return __tlb_remove_page(tlb, page); | 170 | */ |
171 | #ifdef CONFIG_DEBUG_VM | ||
172 | tlb->page_size = page_size; | ||
173 | #endif | ||
182 | } | 174 | } |
175 | #endif | ||
183 | 176 | ||
184 | /* | 177 | /* |
185 | * In the case of tlb vma handling, we can optimise these away in the | 178 | * In the case of tlb vma handling, we can optimise these away in the |
@@ -215,10 +208,16 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa | |||
215 | */ | 208 | */ |
216 | #define tlb_remove_tlb_entry(tlb, ptep, address) \ | 209 | #define tlb_remove_tlb_entry(tlb, ptep, address) \ |
217 | do { \ | 210 | do { \ |
218 | __tlb_adjust_range(tlb, address); \ | 211 | __tlb_adjust_range(tlb, address, PAGE_SIZE); \ |
219 | __tlb_remove_tlb_entry(tlb, ptep, address); \ | 212 | __tlb_remove_tlb_entry(tlb, ptep, address); \ |
220 | } while (0) | 213 | } while (0) |
221 | 214 | ||
215 | #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ | ||
216 | do { \ | ||
217 | __tlb_adjust_range(tlb, address, huge_page_size(h)); \ | ||
218 | __tlb_remove_tlb_entry(tlb, ptep, address); \ | ||
219 | } while (0) | ||
220 | |||
222 | /** | 221 | /** |
223 | * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation | 222 | * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation |
224 | * This is a nop so far, because only x86 needs it. | 223 | * This is a nop so far, because only x86 needs it. |
@@ -227,29 +226,47 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa | |||
227 | #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) | 226 | #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) |
228 | #endif | 227 | #endif |
229 | 228 | ||
230 | #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ | 229 | #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ |
231 | do { \ | 230 | do { \ |
232 | __tlb_adjust_range(tlb, address); \ | 231 | __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ |
233 | __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ | 232 | __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ |
234 | } while (0) | 233 | } while (0) |
235 | 234 | ||
235 | /* | ||
236 | * For things like page tables caches (ie caching addresses "inside" the | ||
237 | * page tables, like x86 does), for legacy reasons, flushing an | ||
238 | * individual page had better flush the page table caches behind it. This | ||
239 | * is definitely how x86 works, for example. And if you have an | ||
240 | * architected non-legacy page table cache (which I'm not aware of | ||
241 | * anybody actually doing), you're going to have some architecturally | ||
242 | * explicit flushing for that, likely *separate* from a regular TLB entry | ||
243 | * flush, and thus you'd need more than just some range expansion.. | ||
244 | * | ||
245 | * So if we ever find an architecture | ||
246 | * that would want something that odd, I think it is up to that | ||
247 | * architecture to do its own odd thing, not cause pain for others | ||
248 | * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com | ||
249 | * | ||
250 | * For now w.r.t page table cache, mark the range_size as PAGE_SIZE | ||
251 | */ | ||
252 | |||
236 | #define pte_free_tlb(tlb, ptep, address) \ | 253 | #define pte_free_tlb(tlb, ptep, address) \ |
237 | do { \ | 254 | do { \ |
238 | __tlb_adjust_range(tlb, address); \ | 255 | __tlb_adjust_range(tlb, address, PAGE_SIZE); \ |
239 | __pte_free_tlb(tlb, ptep, address); \ | 256 | __pte_free_tlb(tlb, ptep, address); \ |
240 | } while (0) | 257 | } while (0) |
241 | 258 | ||
242 | #ifndef __ARCH_HAS_4LEVEL_HACK | 259 | #ifndef __ARCH_HAS_4LEVEL_HACK |
243 | #define pud_free_tlb(tlb, pudp, address) \ | 260 | #define pud_free_tlb(tlb, pudp, address) \ |
244 | do { \ | 261 | do { \ |
245 | __tlb_adjust_range(tlb, address); \ | 262 | __tlb_adjust_range(tlb, address, PAGE_SIZE); \ |
246 | __pud_free_tlb(tlb, pudp, address); \ | 263 | __pud_free_tlb(tlb, pudp, address); \ |
247 | } while (0) | 264 | } while (0) |
248 | #endif | 265 | #endif |
249 | 266 | ||
250 | #define pmd_free_tlb(tlb, pmdp, address) \ | 267 | #define pmd_free_tlb(tlb, pmdp, address) \ |
251 | do { \ | 268 | do { \ |
252 | __tlb_adjust_range(tlb, address); \ | 269 | __tlb_adjust_range(tlb, address, PAGE_SIZE); \ |
253 | __pmd_free_tlb(tlb, pmdp, address); \ | 270 | __pmd_free_tlb(tlb, pmdp, address); \ |
254 | } while (0) | 271 | } while (0) |
255 | 272 | ||
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c357f27d5483..0b5b1af35e5e 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h | |||
@@ -136,12 +136,13 @@ struct bdi_writeback { | |||
136 | struct backing_dev_info { | 136 | struct backing_dev_info { |
137 | struct list_head bdi_list; | 137 | struct list_head bdi_list; |
138 | unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ | 138 | unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ |
139 | unsigned int capabilities; /* Device capabilities */ | 139 | unsigned long io_pages; /* max allowed IO size */ |
140 | congested_fn *congested_fn; /* Function pointer if device is md/dm */ | 140 | congested_fn *congested_fn; /* Function pointer if device is md/dm */ |
141 | void *congested_data; /* Pointer to aux data for congested func */ | 141 | void *congested_data; /* Pointer to aux data for congested func */ |
142 | 142 | ||
143 | char *name; | 143 | char *name; |
144 | 144 | ||
145 | unsigned int capabilities; /* Device capabilities */ | ||
145 | unsigned int min_ratio; | 146 | unsigned int min_ratio; |
146 | unsigned int max_ratio, max_prop_frac; | 147 | unsigned int max_ratio, max_prop_frac; |
147 | 148 | ||
diff --git a/include/linux/cma.h b/include/linux/cma.h index 29f9e774ab76..6f0a91b37f68 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h | |||
@@ -1,6 +1,9 @@ | |||
1 | #ifndef __CMA_H__ | 1 | #ifndef __CMA_H__ |
2 | #define __CMA_H__ | 2 | #define __CMA_H__ |
3 | 3 | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/types.h> | ||
6 | |||
4 | /* | 7 | /* |
5 | * There is always at least global CMA area and a few optional | 8 | * There is always at least global CMA area and a few optional |
6 | * areas configured in kernel .config. | 9 | * areas configured in kernel .config. |
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 928e5ca0caee..0444b1336268 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
@@ -21,7 +21,7 @@ | |||
21 | * clobbered. The issue is as follows: while the inline asm might | 21 | * clobbered. The issue is as follows: while the inline asm might |
22 | * access any memory it wants, the compiler could have fit all of | 22 | * access any memory it wants, the compiler could have fit all of |
23 | * @ptr into memory registers instead, and since @ptr never escaped | 23 | * @ptr into memory registers instead, and since @ptr never escaped |
24 | * from that, it proofed that the inline asm wasn't touching any of | 24 | * from that, it proved that the inline asm wasn't touching any of |
25 | * it. This version works well with both compilers, i.e. we're telling | 25 | * it. This version works well with both compilers, i.e. we're telling |
26 | * the compiler that the inline asm absolutely may see the contents | 26 | * the compiler that the inline asm absolutely may see the contents |
27 | * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 | 27 | * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e35e6de633b9..1f782aa1d8e6 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -189,6 +189,8 @@ static inline void deferred_split_huge_page(struct page *page) {} | |||
189 | #define split_huge_pmd(__vma, __pmd, __address) \ | 189 | #define split_huge_pmd(__vma, __pmd, __address) \ |
190 | do { } while (0) | 190 | do { } while (0) |
191 | 191 | ||
192 | static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | ||
193 | unsigned long address, bool freeze, struct page *page) {} | ||
192 | static inline void split_huge_pmd_address(struct vm_area_struct *vma, | 194 | static inline void split_huge_pmd_address(struct vm_area_struct *vma, |
193 | unsigned long address, bool freeze, struct page *page) {} | 195 | unsigned long address, bool freeze, struct page *page) {} |
194 | 196 | ||
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c1c3e63d52c1..4fec8b775895 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h | |||
@@ -175,7 +175,7 @@ __printf(2, 3) | |||
175 | struct kthread_worker * | 175 | struct kthread_worker * |
176 | kthread_create_worker(unsigned int flags, const char namefmt[], ...); | 176 | kthread_create_worker(unsigned int flags, const char namefmt[], ...); |
177 | 177 | ||
178 | struct kthread_worker * | 178 | __printf(3, 4) struct kthread_worker * |
179 | kthread_create_worker_on_cpu(int cpu, unsigned int flags, | 179 | kthread_create_worker_on_cpu(int cpu, unsigned int flags, |
180 | const char namefmt[], ...); | 180 | const char namefmt[], ...); |
181 | 181 | ||
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5e5b2969d931..5f4d8281832b 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | 8 | ||
9 | #include <linux/mmzone.h> | 9 | #include <linux/mmzone.h> |
10 | #include <linux/dax.h> | ||
10 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
11 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
12 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
@@ -177,6 +178,13 @@ static inline bool vma_migratable(struct vm_area_struct *vma) | |||
177 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | 178 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) |
178 | return false; | 179 | return false; |
179 | 180 | ||
181 | /* | ||
182 | * DAX device mappings require predictable access latency, so avoid | ||
183 | * incurring periodic faults. | ||
184 | */ | ||
185 | if (vma_is_dax(vma)) | ||
186 | return false; | ||
187 | |||
180 | #ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION | 188 | #ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION |
181 | if (vma->vm_flags & VM_HUGETLB) | 189 | if (vma->vm_flags & VM_HUGETLB) |
182 | return false; | 190 | return false; |
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 4341f32516d8..271b3fdf0070 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h | |||
@@ -71,6 +71,7 @@ extern int early_init_dt_scan_chosen_stdout(void); | |||
71 | extern void early_init_fdt_scan_reserved_mem(void); | 71 | extern void early_init_fdt_scan_reserved_mem(void); |
72 | extern void early_init_fdt_reserve_self(void); | 72 | extern void early_init_fdt_reserve_self(void); |
73 | extern void early_init_dt_add_memory_arch(u64 base, u64 size); | 73 | extern void early_init_dt_add_memory_arch(u64 base, u64 size); |
74 | extern int early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size); | ||
74 | extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, | 75 | extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, |
75 | bool no_map); | 76 | bool no_map); |
76 | extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); | 77 | extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); |
diff --git a/include/linux/printk.h b/include/linux/printk.h index eac1af8502bb..3472cc6b7a60 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h | |||
@@ -10,6 +10,8 @@ | |||
10 | extern const char linux_banner[]; | 10 | extern const char linux_banner[]; |
11 | extern const char linux_proc_banner[]; | 11 | extern const char linux_proc_banner[]; |
12 | 12 | ||
13 | #define PRINTK_MAX_SINGLE_HEADER_LEN 2 | ||
14 | |||
13 | static inline int printk_get_level(const char *buffer) | 15 | static inline int printk_get_level(const char *buffer) |
14 | { | 16 | { |
15 | if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { | 17 | if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { |
@@ -31,6 +33,14 @@ static inline const char *printk_skip_level(const char *buffer) | |||
31 | return buffer; | 33 | return buffer; |
32 | } | 34 | } |
33 | 35 | ||
36 | static inline const char *printk_skip_headers(const char *buffer) | ||
37 | { | ||
38 | while (printk_get_level(buffer)) | ||
39 | buffer = printk_skip_level(buffer); | ||
40 | |||
41 | return buffer; | ||
42 | } | ||
43 | |||
34 | #define CONSOLE_EXT_LOG_MAX 8192 | 44 | #define CONSOLE_EXT_LOG_MAX 8192 |
35 | 45 | ||
36 | /* printk's without a loglevel use this.. */ | 46 | /* printk's without a loglevel use this.. */ |
@@ -40,10 +50,15 @@ static inline const char *printk_skip_level(const char *buffer) | |||
40 | #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ | 50 | #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ |
41 | #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ | 51 | #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ |
42 | #define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ | 52 | #define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ |
43 | #define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */ | ||
44 | #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ | 53 | #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ |
45 | #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ | 54 | #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ |
46 | 55 | ||
56 | /* | ||
57 | * Default used to be hard-coded at 7, we're now allowing it to be set from | ||
58 | * kernel config. | ||
59 | */ | ||
60 | #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT | ||
61 | |||
47 | extern int console_printk[]; | 62 | extern int console_printk[]; |
48 | 63 | ||
49 | #define console_loglevel (console_printk[0]) | 64 | #define console_loglevel (console_printk[0]) |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index af3581b8a451..744486057e9e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -80,14 +80,11 @@ static inline bool radix_tree_is_internal_node(void *ptr) | |||
80 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ | 80 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ |
81 | RADIX_TREE_MAP_SHIFT)) | 81 | RADIX_TREE_MAP_SHIFT)) |
82 | 82 | ||
83 | /* Internally used bits of node->count */ | ||
84 | #define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1) | ||
85 | #define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) | ||
86 | |||
87 | struct radix_tree_node { | 83 | struct radix_tree_node { |
88 | unsigned char shift; /* Bits remaining in each slot */ | 84 | unsigned char shift; /* Bits remaining in each slot */ |
89 | unsigned char offset; /* Slot offset in parent */ | 85 | unsigned char offset; /* Slot offset in parent */ |
90 | unsigned int count; | 86 | unsigned char count; /* Total entry count */ |
87 | unsigned char exceptional; /* Exceptional entry count */ | ||
91 | union { | 88 | union { |
92 | struct { | 89 | struct { |
93 | /* Used when ascending tree */ | 90 | /* Used when ascending tree */ |
@@ -248,20 +245,6 @@ static inline int radix_tree_exception(void *arg) | |||
248 | return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); | 245 | return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); |
249 | } | 246 | } |
250 | 247 | ||
251 | /** | ||
252 | * radix_tree_replace_slot - replace item in a slot | ||
253 | * @pslot: pointer to slot, returned by radix_tree_lookup_slot | ||
254 | * @item: new item to store in the slot. | ||
255 | * | ||
256 | * For use with radix_tree_lookup_slot(). Caller must hold tree write locked | ||
257 | * across slot lookup and replacement. | ||
258 | */ | ||
259 | static inline void radix_tree_replace_slot(void **pslot, void *item) | ||
260 | { | ||
261 | BUG_ON(radix_tree_is_internal_node(item)); | ||
262 | rcu_assign_pointer(*pslot, item); | ||
263 | } | ||
264 | |||
265 | int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | 248 | int __radix_tree_create(struct radix_tree_root *root, unsigned long index, |
266 | unsigned order, struct radix_tree_node **nodep, | 249 | unsigned order, struct radix_tree_node **nodep, |
267 | void ***slotp); | 250 | void ***slotp); |
@@ -276,7 +259,14 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, | |||
276 | struct radix_tree_node **nodep, void ***slotp); | 259 | struct radix_tree_node **nodep, void ***slotp); |
277 | void *radix_tree_lookup(struct radix_tree_root *, unsigned long); | 260 | void *radix_tree_lookup(struct radix_tree_root *, unsigned long); |
278 | void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); | 261 | void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); |
279 | bool __radix_tree_delete_node(struct radix_tree_root *root, | 262 | typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); |
263 | void __radix_tree_replace(struct radix_tree_root *root, | ||
264 | struct radix_tree_node *node, | ||
265 | void **slot, void *item, | ||
266 | radix_tree_update_node_t update_node, void *private); | ||
267 | void radix_tree_replace_slot(struct radix_tree_root *root, | ||
268 | void **slot, void *item); | ||
269 | void __radix_tree_delete_node(struct radix_tree_root *root, | ||
280 | struct radix_tree_node *node); | 270 | struct radix_tree_node *node); |
281 | void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); | 271 | void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); |
282 | void *radix_tree_delete(struct radix_tree_root *, unsigned long); | 272 | void *radix_tree_delete(struct radix_tree_root *, unsigned long); |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b46bb5620a76..15321fb1df6b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -137,11 +137,19 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) | |||
137 | * anon_vma helper functions. | 137 | * anon_vma helper functions. |
138 | */ | 138 | */ |
139 | void anon_vma_init(void); /* create anon_vma_cachep */ | 139 | void anon_vma_init(void); /* create anon_vma_cachep */ |
140 | int anon_vma_prepare(struct vm_area_struct *); | 140 | int __anon_vma_prepare(struct vm_area_struct *); |
141 | void unlink_anon_vmas(struct vm_area_struct *); | 141 | void unlink_anon_vmas(struct vm_area_struct *); |
142 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); | 142 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); |
143 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); | 143 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); |
144 | 144 | ||
145 | static inline int anon_vma_prepare(struct vm_area_struct *vma) | ||
146 | { | ||
147 | if (likely(vma->anon_vma)) | ||
148 | return 0; | ||
149 | |||
150 | return __anon_vma_prepare(vma); | ||
151 | } | ||
152 | |||
145 | static inline void anon_vma_merge(struct vm_area_struct *vma, | 153 | static inline void anon_vma_merge(struct vm_area_struct *vma, |
146 | struct vm_area_struct *next) | 154 | struct vm_area_struct *next) |
147 | { | 155 | { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7551d3e2ab70..0e90f2973719 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -540,7 +540,11 @@ static inline int get_dumpable(struct mm_struct *mm) | |||
540 | /* leave room for more dump flags */ | 540 | /* leave room for more dump flags */ |
541 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ | 541 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ |
542 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ | 542 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ |
543 | #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ | 543 | /* |
544 | * This one-shot flag is dropped due to necessity of changing exe once again | ||
545 | * on NFS restore | ||
546 | */ | ||
547 | //#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ | ||
544 | 548 | ||
545 | #define MMF_HAS_UPROBES 19 /* has uprobes */ | 549 | #define MMF_HAS_UPROBES 19 /* has uprobes */ |
546 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ | 550 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..09b212d37f1d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -246,39 +246,7 @@ struct swap_info_struct { | |||
246 | void *workingset_eviction(struct address_space *mapping, struct page *page); | 246 | void *workingset_eviction(struct address_space *mapping, struct page *page); |
247 | bool workingset_refault(void *shadow); | 247 | bool workingset_refault(void *shadow); |
248 | void workingset_activation(struct page *page); | 248 | void workingset_activation(struct page *page); |
249 | extern struct list_lru workingset_shadow_nodes; | 249 | void workingset_update_node(struct radix_tree_node *node, void *private); |
250 | |||
251 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | ||
252 | { | ||
253 | return node->count & RADIX_TREE_COUNT_MASK; | ||
254 | } | ||
255 | |||
256 | static inline void workingset_node_pages_inc(struct radix_tree_node *node) | ||
257 | { | ||
258 | node->count++; | ||
259 | } | ||
260 | |||
261 | static inline void workingset_node_pages_dec(struct radix_tree_node *node) | ||
262 | { | ||
263 | VM_WARN_ON_ONCE(!workingset_node_pages(node)); | ||
264 | node->count--; | ||
265 | } | ||
266 | |||
267 | static inline unsigned int workingset_node_shadows(struct radix_tree_node *node) | ||
268 | { | ||
269 | return node->count >> RADIX_TREE_COUNT_SHIFT; | ||
270 | } | ||
271 | |||
272 | static inline void workingset_node_shadows_inc(struct radix_tree_node *node) | ||
273 | { | ||
274 | node->count += 1U << RADIX_TREE_COUNT_SHIFT; | ||
275 | } | ||
276 | |||
277 | static inline void workingset_node_shadows_dec(struct radix_tree_node *node) | ||
278 | { | ||
279 | VM_WARN_ON_ONCE(!workingset_node_shadows(node)); | ||
280 | node->count -= 1U << RADIX_TREE_COUNT_SHIFT; | ||
281 | } | ||
282 | 250 | ||
283 | /* linux/mm/page_alloc.c */ | 251 | /* linux/mm/page_alloc.c */ |
284 | extern unsigned long totalram_pages; | 252 | extern unsigned long totalram_pages; |
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3d9d786a943c..d68edffbf142 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h | |||
@@ -82,6 +82,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, | |||
82 | const void *caller); | 82 | const void *caller); |
83 | 83 | ||
84 | extern void vfree(const void *addr); | 84 | extern void vfree(const void *addr); |
85 | extern void vfree_atomic(const void *addr); | ||
85 | 86 | ||
86 | extern void *vmap(struct page **pages, unsigned int count, | 87 | extern void *vmap(struct page **pages, unsigned int count, |
87 | unsigned long flags, pgprot_t prot); | 88 | unsigned long flags, pgprot_t prot); |
diff --git a/init/do_mounts.c b/init/do_mounts.c index dea5de95c2dd..c2de5104aad2 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c | |||
@@ -588,7 +588,7 @@ void __init prepare_namespace(void) | |||
588 | saved_root_name); | 588 | saved_root_name); |
589 | while (driver_probe_done() != 0 || | 589 | while (driver_probe_done() != 0 || |
590 | (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) | 590 | (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) |
591 | msleep(100); | 591 | msleep(5); |
592 | async_synchronize_full(); | 592 | async_synchronize_full(); |
593 | } | 593 | } |
594 | 594 | ||
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index fc1ef736253c..98c9011eac78 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
@@ -697,7 +697,7 @@ kdb_printit: | |||
697 | * Write to all consoles. | 697 | * Write to all consoles. |
698 | */ | 698 | */ |
699 | retlen = strlen(kdb_buffer); | 699 | retlen = strlen(kdb_buffer); |
700 | cp = (char *) printk_skip_level(kdb_buffer); | 700 | cp = (char *) printk_skip_headers(kdb_buffer); |
701 | if (!dbg_kdb_mode && kgdb_connected) { | 701 | if (!dbg_kdb_mode && kgdb_connected) { |
702 | gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); | 702 | gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); |
703 | } else { | 703 | } else { |
diff --git a/kernel/fork.c b/kernel/fork.c index 5957cf8b4c4b..7377f414f3ce 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -229,7 +229,7 @@ static inline void free_thread_stack(struct task_struct *tsk) | |||
229 | } | 229 | } |
230 | local_irq_restore(flags); | 230 | local_irq_restore(flags); |
231 | 231 | ||
232 | vfree(tsk->stack); | 232 | vfree_atomic(tsk->stack); |
233 | return; | 233 | return; |
234 | } | 234 | } |
235 | #endif | 235 | #endif |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 2b59c82cc3e1..40c07e4fa116 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
@@ -106,7 +106,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
106 | * complain: | 106 | * complain: |
107 | */ | 107 | */ |
108 | if (sysctl_hung_task_warnings) { | 108 | if (sysctl_hung_task_warnings) { |
109 | sysctl_hung_task_warnings--; | 109 | if (sysctl_hung_task_warnings > 0) |
110 | sysctl_hung_task_warnings--; | ||
110 | pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", | 111 | pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", |
111 | t->comm, t->pid, timeout); | 112 | t->comm, t->pid, timeout); |
112 | pr_err(" %s %s %.*s\n", | 113 | pr_err(" %s %s %.*s\n", |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 956495f0efaf..2318fba86277 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -261,7 +261,8 @@ static void create_kthread(struct kthread_create_info *create) | |||
261 | } | 261 | } |
262 | } | 262 | } |
263 | 263 | ||
264 | static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), | 264 | static __printf(4, 0) |
265 | struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), | ||
265 | void *data, int node, | 266 | void *data, int node, |
266 | const char namefmt[], | 267 | const char namefmt[], |
267 | va_list args) | 268 | va_list args) |
@@ -635,7 +636,7 @@ repeat: | |||
635 | } | 636 | } |
636 | EXPORT_SYMBOL_GPL(kthread_worker_fn); | 637 | EXPORT_SYMBOL_GPL(kthread_worker_fn); |
637 | 638 | ||
638 | static struct kthread_worker * | 639 | static __printf(3, 0) struct kthread_worker * |
639 | __kthread_create_worker(int cpu, unsigned int flags, | 640 | __kthread_create_worker(int cpu, unsigned int flags, |
640 | const char namefmt[], va_list args) | 641 | const char namefmt[], va_list args) |
641 | { | 642 | { |
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index 16bab471c7e2..f011aaef583c 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c | |||
@@ -67,7 +67,8 @@ static int vprintk_nmi(const char *fmt, va_list args) | |||
67 | again: | 67 | again: |
68 | len = atomic_read(&s->len); | 68 | len = atomic_read(&s->len); |
69 | 69 | ||
70 | if (len >= sizeof(s->buffer)) { | 70 | /* The trailing '\0' is not counted into len. */ |
71 | if (len >= sizeof(s->buffer) - 1) { | ||
71 | atomic_inc(&nmi_message_lost); | 72 | atomic_inc(&nmi_message_lost); |
72 | return 0; | 73 | return 0; |
73 | } | 74 | } |
@@ -79,7 +80,7 @@ again: | |||
79 | if (!len) | 80 | if (!len) |
80 | smp_rmb(); | 81 | smp_rmb(); |
81 | 82 | ||
82 | add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); | 83 | add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); |
83 | 84 | ||
84 | /* | 85 | /* |
85 | * Do it once again if the buffer has been flushed in the meantime. | 86 | * Do it once again if the buffer has been flushed in the meantime. |
@@ -113,16 +114,51 @@ static void printk_nmi_flush_line(const char *text, int len) | |||
113 | 114 | ||
114 | } | 115 | } |
115 | 116 | ||
116 | /* | 117 | /* printk part of the temporary buffer line by line */ |
117 | * printk one line from the temporary buffer from @start index until | 118 | static int printk_nmi_flush_buffer(const char *start, size_t len) |
118 | * and including the @end index. | ||
119 | */ | ||
120 | static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s, | ||
121 | int start, int end) | ||
122 | { | 119 | { |
123 | const char *buf = s->buffer + start; | 120 | const char *c, *end; |
121 | bool header; | ||
122 | |||
123 | c = start; | ||
124 | end = start + len; | ||
125 | header = true; | ||
126 | |||
127 | /* Print line by line. */ | ||
128 | while (c < end) { | ||
129 | if (*c == '\n') { | ||
130 | printk_nmi_flush_line(start, c - start + 1); | ||
131 | start = ++c; | ||
132 | header = true; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | /* Handle continuous lines or missing new line. */ | ||
137 | if ((c + 1 < end) && printk_get_level(c)) { | ||
138 | if (header) { | ||
139 | c = printk_skip_level(c); | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | printk_nmi_flush_line(start, c - start); | ||
144 | start = c++; | ||
145 | header = true; | ||
146 | continue; | ||
147 | } | ||
148 | |||
149 | header = false; | ||
150 | c++; | ||
151 | } | ||
124 | 152 | ||
125 | printk_nmi_flush_line(buf, (end - start) + 1); | 153 | /* Check if there was a partial line. Ignore pure header. */ |
154 | if (start < end && !header) { | ||
155 | static const char newline[] = KERN_CONT "\n"; | ||
156 | |||
157 | printk_nmi_flush_line(start, end - start); | ||
158 | printk_nmi_flush_line(newline, strlen(newline)); | ||
159 | } | ||
160 | |||
161 | return len; | ||
126 | } | 162 | } |
127 | 163 | ||
128 | /* | 164 | /* |
@@ -135,8 +171,8 @@ static void __printk_nmi_flush(struct irq_work *work) | |||
135 | __RAW_SPIN_LOCK_INITIALIZER(read_lock); | 171 | __RAW_SPIN_LOCK_INITIALIZER(read_lock); |
136 | struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); | 172 | struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); |
137 | unsigned long flags; | 173 | unsigned long flags; |
138 | size_t len, size; | 174 | size_t len; |
139 | int i, last_i; | 175 | int i; |
140 | 176 | ||
141 | /* | 177 | /* |
142 | * The lock has two functions. First, one reader has to flush all | 178 | * The lock has two functions. First, one reader has to flush all |
@@ -154,12 +190,14 @@ more: | |||
154 | /* | 190 | /* |
155 | * This is just a paranoid check that nobody has manipulated | 191 | * This is just a paranoid check that nobody has manipulated |
156 | * the buffer an unexpected way. If we printed something then | 192 | * the buffer an unexpected way. If we printed something then |
157 | * @len must only increase. | 193 | * @len must only increase. Also it should never overflow the |
194 | * buffer size. | ||
158 | */ | 195 | */ |
159 | if (i && i >= len) { | 196 | if ((i && i >= len) || len > sizeof(s->buffer)) { |
160 | const char *msg = "printk_nmi_flush: internal error\n"; | 197 | const char *msg = "printk_nmi_flush: internal error\n"; |
161 | 198 | ||
162 | printk_nmi_flush_line(msg, strlen(msg)); | 199 | printk_nmi_flush_line(msg, strlen(msg)); |
200 | len = 0; | ||
163 | } | 201 | } |
164 | 202 | ||
165 | if (!len) | 203 | if (!len) |
@@ -167,22 +205,7 @@ more: | |||
167 | 205 | ||
168 | /* Make sure that data has been written up to the @len */ | 206 | /* Make sure that data has been written up to the @len */ |
169 | smp_rmb(); | 207 | smp_rmb(); |
170 | 208 | i += printk_nmi_flush_buffer(s->buffer + i, len - i); | |
171 | size = min(len, sizeof(s->buffer)); | ||
172 | last_i = i; | ||
173 | |||
174 | /* Print line by line. */ | ||
175 | for (; i < size; i++) { | ||
176 | if (s->buffer[i] == '\n') { | ||
177 | printk_nmi_flush_seq_line(s, last_i, i); | ||
178 | last_i = i + 1; | ||
179 | } | ||
180 | } | ||
181 | /* Check if there was a partial line. */ | ||
182 | if (last_i < size) { | ||
183 | printk_nmi_flush_seq_line(s, last_i, size - 1); | ||
184 | printk_nmi_flush_line("\n", strlen("\n")); | ||
185 | } | ||
186 | 209 | ||
187 | /* | 210 | /* |
188 | * Check that nothing has got added in the meantime and truncate | 211 | * Check that nothing has got added in the meantime and truncate |
diff --git a/kernel/sys.c b/kernel/sys.c index 78c9fb7dd680..9758892a2d09 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1697,16 +1697,6 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1697 | fput(exe_file); | 1697 | fput(exe_file); |
1698 | } | 1698 | } |
1699 | 1699 | ||
1700 | /* | ||
1701 | * The symlink can be changed only once, just to disallow arbitrary | ||
1702 | * transitions malicious software might bring in. This means one | ||
1703 | * could make a snapshot over all processes running and monitor | ||
1704 | * /proc/pid/exe changes to notice unusual activity if needed. | ||
1705 | */ | ||
1706 | err = -EPERM; | ||
1707 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) | ||
1708 | goto exit; | ||
1709 | |||
1710 | err = 0; | 1700 | err = 0; |
1711 | /* set the new file, lockless */ | 1701 | /* set the new file, lockless */ |
1712 | get_file(exe.file); | 1702 | get_file(exe.file); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9bb7d825ba14..e40a0715f422 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -15,6 +15,21 @@ config PRINTK_TIME | |||
15 | The behavior is also controlled by the kernel command line | 15 | The behavior is also controlled by the kernel command line |
16 | parameter printk.time=1. See Documentation/kernel-parameters.txt | 16 | parameter printk.time=1. See Documentation/kernel-parameters.txt |
17 | 17 | ||
18 | config CONSOLE_LOGLEVEL_DEFAULT | ||
19 | int "Default console loglevel (1-15)" | ||
20 | range 1 15 | ||
21 | default "7" | ||
22 | help | ||
23 | Default loglevel to determine what will be printed on the console. | ||
24 | |||
25 | Setting a default here is equivalent to passing in loglevel=<x> in | ||
26 | the kernel bootargs. loglevel=<x> continues to override whatever | ||
27 | value is specified here as well. | ||
28 | |||
29 | Note: This does not affect the log level of un-prefixed prink() | ||
30 | usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT | ||
31 | option. | ||
32 | |||
18 | config MESSAGE_LOGLEVEL_DEFAULT | 33 | config MESSAGE_LOGLEVEL_DEFAULT |
19 | int "Default message log level (1-7)" | 34 | int "Default message log level (1-7)" |
20 | range 1 7 | 35 | range 1 7 |
@@ -26,6 +41,10 @@ config MESSAGE_LOGLEVEL_DEFAULT | |||
26 | that are auditing their logs closely may want to set it to a lower | 41 | that are auditing their logs closely may want to set it to a lower |
27 | priority. | 42 | priority. |
28 | 43 | ||
44 | Note: This does not affect what message level gets printed on the console | ||
45 | by default. To change that, use loglevel=<x> in the kernel bootargs, | ||
46 | or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value. | ||
47 | |||
29 | config BOOT_PRINTK_DELAY | 48 | config BOOT_PRINTK_DELAY |
30 | bool "Delay each boot printk message by N milliseconds" | 49 | bool "Delay each boot printk message by N milliseconds" |
31 | depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY | 50 | depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY |
@@ -1986,7 +2005,7 @@ config ARCH_HAS_DEVMEM_IS_ALLOWED | |||
1986 | 2005 | ||
1987 | config STRICT_DEVMEM | 2006 | config STRICT_DEVMEM |
1988 | bool "Filter access to /dev/mem" | 2007 | bool "Filter access to /dev/mem" |
1989 | depends on MMU | 2008 | depends on MMU && DEVMEM |
1990 | depends on ARCH_HAS_DEVMEM_IS_ALLOWED | 2009 | depends on ARCH_HAS_DEVMEM_IS_ALLOWED |
1991 | default y if TILE || PPC | 2010 | default y if TILE || PPC |
1992 | ---help--- | 2011 | ---help--- |
@@ -927,6 +927,9 @@ EXPORT_SYMBOL(ida_pre_get); | |||
927 | * and go back to the ida_pre_get() call. If the ida is full, it will | 927 | * and go back to the ida_pre_get() call. If the ida is full, it will |
928 | * return %-ENOSPC. | 928 | * return %-ENOSPC. |
929 | * | 929 | * |
930 | * Note that callers must ensure that concurrent access to @ida is not possible. | ||
931 | * See ida_simple_get() for a varaint which takes care of locking. | ||
932 | * | ||
930 | * @p_id returns a value in the range @starting_id ... %0x7fffffff. | 933 | * @p_id returns a value in the range @starting_id ... %0x7fffffff. |
931 | */ | 934 | */ |
932 | int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) | 935 | int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) |
@@ -1073,6 +1076,9 @@ EXPORT_SYMBOL(ida_destroy); | |||
1073 | * Allocates an id in the range start <= id < end, or returns -ENOSPC. | 1076 | * Allocates an id in the range start <= id < end, or returns -ENOSPC. |
1074 | * On memory allocation failure, returns -ENOMEM. | 1077 | * On memory allocation failure, returns -ENOMEM. |
1075 | * | 1078 | * |
1079 | * Compared to ida_get_new_above() this function does its own locking, and | ||
1080 | * should be used unless there are special requirements. | ||
1081 | * | ||
1076 | * Use ida_simple_remove() to get rid of an id. | 1082 | * Use ida_simple_remove() to get rid of an id. |
1077 | */ | 1083 | */ |
1078 | int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, | 1084 | int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, |
@@ -1119,6 +1125,11 @@ EXPORT_SYMBOL(ida_simple_get); | |||
1119 | * ida_simple_remove - remove an allocated id. | 1125 | * ida_simple_remove - remove an allocated id. |
1120 | * @ida: the (initialized) ida. | 1126 | * @ida: the (initialized) ida. |
1121 | * @id: the id returned by ida_simple_get. | 1127 | * @id: the id returned by ida_simple_get. |
1128 | * | ||
1129 | * Use to release an id allocated with ida_simple_get(). | ||
1130 | * | ||
1131 | * Compared to ida_remove() this function does its own locking, and should be | ||
1132 | * used unless there are special requirements. | ||
1122 | */ | 1133 | */ |
1123 | void ida_simple_remove(struct ida *ida, unsigned int id) | 1134 | void ida_simple_remove(struct ida *ida, unsigned int id) |
1124 | { | 1135 | { |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 4b8bb3618b83..2e8c6f7aa56e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -220,10 +220,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) | |||
220 | { | 220 | { |
221 | unsigned long i; | 221 | unsigned long i; |
222 | 222 | ||
223 | pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n", | 223 | pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n", |
224 | node, node->offset, | 224 | node, node->offset, |
225 | node->tags[0][0], node->tags[1][0], node->tags[2][0], | 225 | node->tags[0][0], node->tags[1][0], node->tags[2][0], |
226 | node->shift, node->count, node->parent); | 226 | node->shift, node->count, node->exceptional, node->parent); |
227 | 227 | ||
228 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { | 228 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { |
229 | unsigned long first = index | (i << node->shift); | 229 | unsigned long first = index | (i << node->shift); |
@@ -325,7 +325,6 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) | |||
325 | tag_clear(node, i, 0); | 325 | tag_clear(node, i, 0); |
326 | 326 | ||
327 | node->slots[0] = NULL; | 327 | node->slots[0] = NULL; |
328 | node->count = 0; | ||
329 | 328 | ||
330 | kmem_cache_free(radix_tree_node_cachep, node); | 329 | kmem_cache_free(radix_tree_node_cachep, node); |
331 | } | 330 | } |
@@ -522,8 +521,13 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
522 | node->offset = 0; | 521 | node->offset = 0; |
523 | node->count = 1; | 522 | node->count = 1; |
524 | node->parent = NULL; | 523 | node->parent = NULL; |
525 | if (radix_tree_is_internal_node(slot)) | 524 | if (radix_tree_is_internal_node(slot)) { |
526 | entry_to_node(slot)->parent = node; | 525 | entry_to_node(slot)->parent = node; |
526 | } else { | ||
527 | /* Moving an exceptional root->rnode to a node */ | ||
528 | if (radix_tree_exceptional_entry(slot)) | ||
529 | node->exceptional = 1; | ||
530 | } | ||
527 | node->slots[0] = slot; | 531 | node->slots[0] = slot; |
528 | slot = node_to_entry(node); | 532 | slot = node_to_entry(node); |
529 | rcu_assign_pointer(root->rnode, slot); | 533 | rcu_assign_pointer(root->rnode, slot); |
@@ -534,6 +538,104 @@ out: | |||
534 | } | 538 | } |
535 | 539 | ||
536 | /** | 540 | /** |
541 | * radix_tree_shrink - shrink radix tree to minimum height | ||
542 | * @root radix tree root | ||
543 | */ | ||
544 | static inline void radix_tree_shrink(struct radix_tree_root *root, | ||
545 | radix_tree_update_node_t update_node, | ||
546 | void *private) | ||
547 | { | ||
548 | for (;;) { | ||
549 | struct radix_tree_node *node = root->rnode; | ||
550 | struct radix_tree_node *child; | ||
551 | |||
552 | if (!radix_tree_is_internal_node(node)) | ||
553 | break; | ||
554 | node = entry_to_node(node); | ||
555 | |||
556 | /* | ||
557 | * The candidate node has more than one child, or its child | ||
558 | * is not at the leftmost slot, or the child is a multiorder | ||
559 | * entry, we cannot shrink. | ||
560 | */ | ||
561 | if (node->count != 1) | ||
562 | break; | ||
563 | child = node->slots[0]; | ||
564 | if (!child) | ||
565 | break; | ||
566 | if (!radix_tree_is_internal_node(child) && node->shift) | ||
567 | break; | ||
568 | |||
569 | if (radix_tree_is_internal_node(child)) | ||
570 | entry_to_node(child)->parent = NULL; | ||
571 | |||
572 | /* | ||
573 | * We don't need rcu_assign_pointer(), since we are simply | ||
574 | * moving the node from one part of the tree to another: if it | ||
575 | * was safe to dereference the old pointer to it | ||
576 | * (node->slots[0]), it will be safe to dereference the new | ||
577 | * one (root->rnode) as far as dependent read barriers go. | ||
578 | */ | ||
579 | root->rnode = child; | ||
580 | |||
581 | /* | ||
582 | * We have a dilemma here. The node's slot[0] must not be | ||
583 | * NULLed in case there are concurrent lookups expecting to | ||
584 | * find the item. However if this was a bottom-level node, | ||
585 | * then it may be subject to the slot pointer being visible | ||
586 | * to callers dereferencing it. If item corresponding to | ||
587 | * slot[0] is subsequently deleted, these callers would expect | ||
588 | * their slot to become empty sooner or later. | ||
589 | * | ||
590 | * For example, lockless pagecache will look up a slot, deref | ||
591 | * the page pointer, and if the page has 0 refcount it means it | ||
592 | * was concurrently deleted from pagecache so try the deref | ||
593 | * again. Fortunately there is already a requirement for logic | ||
594 | * to retry the entire slot lookup -- the indirect pointer | ||
595 | * problem (replacing direct root node with an indirect pointer | ||
596 | * also results in a stale slot). So tag the slot as indirect | ||
597 | * to force callers to retry. | ||
598 | */ | ||
599 | node->count = 0; | ||
600 | if (!radix_tree_is_internal_node(child)) { | ||
601 | node->slots[0] = RADIX_TREE_RETRY; | ||
602 | if (update_node) | ||
603 | update_node(node, private); | ||
604 | } | ||
605 | |||
606 | radix_tree_node_free(node); | ||
607 | } | ||
608 | } | ||
609 | |||
610 | static void delete_node(struct radix_tree_root *root, | ||
611 | struct radix_tree_node *node, | ||
612 | radix_tree_update_node_t update_node, void *private) | ||
613 | { | ||
614 | do { | ||
615 | struct radix_tree_node *parent; | ||
616 | |||
617 | if (node->count) { | ||
618 | if (node == entry_to_node(root->rnode)) | ||
619 | radix_tree_shrink(root, update_node, private); | ||
620 | return; | ||
621 | } | ||
622 | |||
623 | parent = node->parent; | ||
624 | if (parent) { | ||
625 | parent->slots[node->offset] = NULL; | ||
626 | parent->count--; | ||
627 | } else { | ||
628 | root_tag_clear_all(root); | ||
629 | root->rnode = NULL; | ||
630 | } | ||
631 | |||
632 | radix_tree_node_free(node); | ||
633 | |||
634 | node = parent; | ||
635 | } while (node); | ||
636 | } | ||
637 | |||
638 | /** | ||
537 | * __radix_tree_create - create a slot in a radix tree | 639 | * __radix_tree_create - create a slot in a radix tree |
538 | * @root: radix tree root | 640 | * @root: radix tree root |
539 | * @index: index key | 641 | * @index: index key |
@@ -649,6 +751,8 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, | |||
649 | if (node) { | 751 | if (node) { |
650 | unsigned offset = get_slot_offset(node, slot); | 752 | unsigned offset = get_slot_offset(node, slot); |
651 | node->count++; | 753 | node->count++; |
754 | if (radix_tree_exceptional_entry(item)) | ||
755 | node->exceptional++; | ||
652 | BUG_ON(tag_get(node, 0, offset)); | 756 | BUG_ON(tag_get(node, 0, offset)); |
653 | BUG_ON(tag_get(node, 1, offset)); | 757 | BUG_ON(tag_get(node, 1, offset)); |
654 | BUG_ON(tag_get(node, 2, offset)); | 758 | BUG_ON(tag_get(node, 2, offset)); |
@@ -746,6 +850,85 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) | |||
746 | } | 850 | } |
747 | EXPORT_SYMBOL(radix_tree_lookup); | 851 | EXPORT_SYMBOL(radix_tree_lookup); |
748 | 852 | ||
853 | static void replace_slot(struct radix_tree_root *root, | ||
854 | struct radix_tree_node *node, | ||
855 | void **slot, void *item, | ||
856 | bool warn_typeswitch) | ||
857 | { | ||
858 | void *old = rcu_dereference_raw(*slot); | ||
859 | int count, exceptional; | ||
860 | |||
861 | WARN_ON_ONCE(radix_tree_is_internal_node(item)); | ||
862 | |||
863 | count = !!item - !!old; | ||
864 | exceptional = !!radix_tree_exceptional_entry(item) - | ||
865 | !!radix_tree_exceptional_entry(old); | ||
866 | |||
867 | WARN_ON_ONCE(warn_typeswitch && (count || exceptional)); | ||
868 | |||
869 | if (node) { | ||
870 | node->count += count; | ||
871 | node->exceptional += exceptional; | ||
872 | } | ||
873 | |||
874 | rcu_assign_pointer(*slot, item); | ||
875 | } | ||
876 | |||
877 | /** | ||
878 | * __radix_tree_replace - replace item in a slot | ||
879 | * @root: radix tree root | ||
880 | * @node: pointer to tree node | ||
881 | * @slot: pointer to slot in @node | ||
882 | * @item: new item to store in the slot. | ||
883 | * @update_node: callback for changing leaf nodes | ||
884 | * @private: private data to pass to @update_node | ||
885 | * | ||
886 | * For use with __radix_tree_lookup(). Caller must hold tree write locked | ||
887 | * across slot lookup and replacement. | ||
888 | */ | ||
889 | void __radix_tree_replace(struct radix_tree_root *root, | ||
890 | struct radix_tree_node *node, | ||
891 | void **slot, void *item, | ||
892 | radix_tree_update_node_t update_node, void *private) | ||
893 | { | ||
894 | /* | ||
895 | * This function supports replacing exceptional entries and | ||
896 | * deleting entries, but that needs accounting against the | ||
897 | * node unless the slot is root->rnode. | ||
898 | */ | ||
899 | replace_slot(root, node, slot, item, | ||
900 | !node && slot != (void **)&root->rnode); | ||
901 | |||
902 | if (!node) | ||
903 | return; | ||
904 | |||
905 | if (update_node) | ||
906 | update_node(node, private); | ||
907 | |||
908 | delete_node(root, node, update_node, private); | ||
909 | } | ||
910 | |||
911 | /** | ||
912 | * radix_tree_replace_slot - replace item in a slot | ||
913 | * @root: radix tree root | ||
914 | * @slot: pointer to slot | ||
915 | * @item: new item to store in the slot. | ||
916 | * | ||
917 | * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), | ||
918 | * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked | ||
919 | * across slot lookup and replacement. | ||
920 | * | ||
921 | * NOTE: This cannot be used to switch between non-entries (empty slots), | ||
922 | * regular entries, and exceptional entries, as that requires accounting | ||
923 | * inside the radix tree node. When switching from one type of entry or | ||
924 | * deleting, use __radix_tree_lookup() and __radix_tree_replace(). | ||
925 | */ | ||
926 | void radix_tree_replace_slot(struct radix_tree_root *root, | ||
927 | void **slot, void *item) | ||
928 | { | ||
929 | replace_slot(root, NULL, slot, item, true); | ||
930 | } | ||
931 | |||
749 | /** | 932 | /** |
750 | * radix_tree_tag_set - set a tag on a radix tree node | 933 | * radix_tree_tag_set - set a tag on a radix tree node |
751 | * @root: radix tree root | 934 | * @root: radix tree root |
@@ -1394,75 +1577,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) | |||
1394 | #endif /* CONFIG_SHMEM && CONFIG_SWAP */ | 1577 | #endif /* CONFIG_SHMEM && CONFIG_SWAP */ |
1395 | 1578 | ||
1396 | /** | 1579 | /** |
1397 | * radix_tree_shrink - shrink radix tree to minimum height | ||
1398 | * @root radix tree root | ||
1399 | */ | ||
1400 | static inline bool radix_tree_shrink(struct radix_tree_root *root) | ||
1401 | { | ||
1402 | bool shrunk = false; | ||
1403 | |||
1404 | for (;;) { | ||
1405 | struct radix_tree_node *node = root->rnode; | ||
1406 | struct radix_tree_node *child; | ||
1407 | |||
1408 | if (!radix_tree_is_internal_node(node)) | ||
1409 | break; | ||
1410 | node = entry_to_node(node); | ||
1411 | |||
1412 | /* | ||
1413 | * The candidate node has more than one child, or its child | ||
1414 | * is not at the leftmost slot, or the child is a multiorder | ||
1415 | * entry, we cannot shrink. | ||
1416 | */ | ||
1417 | if (node->count != 1) | ||
1418 | break; | ||
1419 | child = node->slots[0]; | ||
1420 | if (!child) | ||
1421 | break; | ||
1422 | if (!radix_tree_is_internal_node(child) && node->shift) | ||
1423 | break; | ||
1424 | |||
1425 | if (radix_tree_is_internal_node(child)) | ||
1426 | entry_to_node(child)->parent = NULL; | ||
1427 | |||
1428 | /* | ||
1429 | * We don't need rcu_assign_pointer(), since we are simply | ||
1430 | * moving the node from one part of the tree to another: if it | ||
1431 | * was safe to dereference the old pointer to it | ||
1432 | * (node->slots[0]), it will be safe to dereference the new | ||
1433 | * one (root->rnode) as far as dependent read barriers go. | ||
1434 | */ | ||
1435 | root->rnode = child; | ||
1436 | |||
1437 | /* | ||
1438 | * We have a dilemma here. The node's slot[0] must not be | ||
1439 | * NULLed in case there are concurrent lookups expecting to | ||
1440 | * find the item. However if this was a bottom-level node, | ||
1441 | * then it may be subject to the slot pointer being visible | ||
1442 | * to callers dereferencing it. If item corresponding to | ||
1443 | * slot[0] is subsequently deleted, these callers would expect | ||
1444 | * their slot to become empty sooner or later. | ||
1445 | * | ||
1446 | * For example, lockless pagecache will look up a slot, deref | ||
1447 | * the page pointer, and if the page has 0 refcount it means it | ||
1448 | * was concurrently deleted from pagecache so try the deref | ||
1449 | * again. Fortunately there is already a requirement for logic | ||
1450 | * to retry the entire slot lookup -- the indirect pointer | ||
1451 | * problem (replacing direct root node with an indirect pointer | ||
1452 | * also results in a stale slot). So tag the slot as indirect | ||
1453 | * to force callers to retry. | ||
1454 | */ | ||
1455 | if (!radix_tree_is_internal_node(child)) | ||
1456 | node->slots[0] = RADIX_TREE_RETRY; | ||
1457 | |||
1458 | radix_tree_node_free(node); | ||
1459 | shrunk = true; | ||
1460 | } | ||
1461 | |||
1462 | return shrunk; | ||
1463 | } | ||
1464 | |||
1465 | /** | ||
1466 | * __radix_tree_delete_node - try to free node after clearing a slot | 1580 | * __radix_tree_delete_node - try to free node after clearing a slot |
1467 | * @root: radix tree root | 1581 | * @root: radix tree root |
1468 | * @node: node containing @index | 1582 | * @node: node containing @index |
@@ -1470,39 +1584,11 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) | |||
1470 | * After clearing the slot at @index in @node from radix tree | 1584 | * After clearing the slot at @index in @node from radix tree |
1471 | * rooted at @root, call this function to attempt freeing the | 1585 | * rooted at @root, call this function to attempt freeing the |
1472 | * node and shrinking the tree. | 1586 | * node and shrinking the tree. |
1473 | * | ||
1474 | * Returns %true if @node was freed, %false otherwise. | ||
1475 | */ | 1587 | */ |
1476 | bool __radix_tree_delete_node(struct radix_tree_root *root, | 1588 | void __radix_tree_delete_node(struct radix_tree_root *root, |
1477 | struct radix_tree_node *node) | 1589 | struct radix_tree_node *node) |
1478 | { | 1590 | { |
1479 | bool deleted = false; | 1591 | delete_node(root, node, NULL, NULL); |
1480 | |||
1481 | do { | ||
1482 | struct radix_tree_node *parent; | ||
1483 | |||
1484 | if (node->count) { | ||
1485 | if (node == entry_to_node(root->rnode)) | ||
1486 | deleted |= radix_tree_shrink(root); | ||
1487 | return deleted; | ||
1488 | } | ||
1489 | |||
1490 | parent = node->parent; | ||
1491 | if (parent) { | ||
1492 | parent->slots[node->offset] = NULL; | ||
1493 | parent->count--; | ||
1494 | } else { | ||
1495 | root_tag_clear_all(root); | ||
1496 | root->rnode = NULL; | ||
1497 | } | ||
1498 | |||
1499 | radix_tree_node_free(node); | ||
1500 | deleted = true; | ||
1501 | |||
1502 | node = parent; | ||
1503 | } while (node); | ||
1504 | |||
1505 | return deleted; | ||
1506 | } | 1592 | } |
1507 | 1593 | ||
1508 | static inline void delete_sibling_entries(struct radix_tree_node *node, | 1594 | static inline void delete_sibling_entries(struct radix_tree_node *node, |
@@ -1559,10 +1645,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, | |||
1559 | node_tag_clear(root, node, tag, offset); | 1645 | node_tag_clear(root, node, tag, offset); |
1560 | 1646 | ||
1561 | delete_sibling_entries(node, node_to_entry(slot), offset); | 1647 | delete_sibling_entries(node, node_to_entry(slot), offset); |
1562 | node->slots[offset] = NULL; | 1648 | __radix_tree_replace(root, node, slot, NULL, NULL, NULL); |
1563 | node->count--; | ||
1564 | |||
1565 | __radix_tree_delete_node(root, node); | ||
1566 | 1649 | ||
1567 | return entry; | 1650 | return entry; |
1568 | } | 1651 | } |
diff --git a/lib/rbtree.c b/lib/rbtree.c index eb8a19fee110..1f8b112a7c35 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c | |||
@@ -296,11 +296,26 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
296 | * | 296 | * |
297 | * (p) (p) | 297 | * (p) (p) |
298 | * / \ / \ | 298 | * / \ / \ |
299 | * N S --> N Sl | 299 | * N S --> N sl |
300 | * / \ \ | 300 | * / \ \ |
301 | * sl Sr s | 301 | * sl Sr S |
302 | * \ | 302 | * \ |
303 | * Sr | 303 | * Sr |
304 | * | ||
305 | * Note: p might be red, and then both | ||
306 | * p and sl are red after rotation(which | ||
307 | * breaks property 4). This is fixed in | ||
308 | * Case 4 (in __rb_rotate_set_parents() | ||
309 | * which set sl the color of p | ||
310 | * and set p RB_BLACK) | ||
311 | * | ||
312 | * (p) (sl) | ||
313 | * / \ / \ | ||
314 | * N sl --> P S | ||
315 | * \ / \ | ||
316 | * S N Sr | ||
317 | * \ | ||
318 | * Sr | ||
304 | */ | 319 | */ |
305 | tmp1 = tmp2->rb_right; | 320 | tmp1 = tmp2->rb_right; |
306 | WRITE_ONCE(sibling->rb_left, tmp1); | 321 | WRITE_ONCE(sibling->rb_left, tmp1); |
@@ -365,7 +380,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
365 | } | 380 | } |
366 | break; | 381 | break; |
367 | } | 382 | } |
368 | /* Case 3 - right rotate at sibling */ | 383 | /* Case 3 - left rotate at sibling */ |
369 | tmp1 = tmp2->rb_left; | 384 | tmp1 = tmp2->rb_left; |
370 | WRITE_ONCE(sibling->rb_right, tmp1); | 385 | WRITE_ONCE(sibling->rb_right, tmp1); |
371 | WRITE_ONCE(tmp2->rb_left, sibling); | 386 | WRITE_ONCE(tmp2->rb_left, sibling); |
@@ -377,7 +392,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, | |||
377 | tmp1 = sibling; | 392 | tmp1 = sibling; |
378 | sibling = tmp2; | 393 | sibling = tmp2; |
379 | } | 394 | } |
380 | /* Case 4 - left rotate at parent + color flips */ | 395 | /* Case 4 - right rotate at parent + color flips */ |
381 | tmp2 = sibling->rb_right; | 396 | tmp2 = sibling->rb_right; |
382 | WRITE_ONCE(parent->rb_left, tmp2); | 397 | WRITE_ONCE(parent->rb_left, tmp2); |
383 | WRITE_ONCE(sibling->rb_right, parent); | 398 | WRITE_ONCE(sibling->rb_right, parent); |
diff --git a/mm/Kconfig b/mm/Kconfig index 86e3e0e74d20..9b8fccb969dc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -153,7 +153,7 @@ config MOVABLE_NODE | |||
153 | bool "Enable to assign a node which has only movable memory" | 153 | bool "Enable to assign a node which has only movable memory" |
154 | depends on HAVE_MEMBLOCK | 154 | depends on HAVE_MEMBLOCK |
155 | depends on NO_BOOTMEM | 155 | depends on NO_BOOTMEM |
156 | depends on X86_64 | 156 | depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG |
157 | depends on NUMA | 157 | depends on NUMA |
158 | default n | 158 | default n |
159 | help | 159 | help |
@@ -447,13 +447,9 @@ choice | |||
447 | benefit. | 447 | benefit. |
448 | endchoice | 448 | endchoice |
449 | 449 | ||
450 | # | ||
451 | # We don't deposit page tables on file THP mapping, | ||
452 | # but Power makes use of them to address MMU quirk. | ||
453 | # | ||
454 | config TRANSPARENT_HUGE_PAGECACHE | 450 | config TRANSPARENT_HUGE_PAGECACHE |
455 | def_bool y | 451 | def_bool y |
456 | depends on TRANSPARENT_HUGEPAGE && !PPC | 452 | depends on TRANSPARENT_HUGEPAGE |
457 | 453 | ||
458 | # | 454 | # |
459 | # UP and nommu archs use km based percpu allocator | 455 | # UP and nommu archs use km based percpu allocator |
diff --git a/mm/compaction.c b/mm/compaction.c index 0d37192d9423..223464227299 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -634,22 +634,6 @@ isolate_freepages_range(struct compact_control *cc, | |||
634 | return pfn; | 634 | return pfn; |
635 | } | 635 | } |
636 | 636 | ||
637 | /* Update the number of anon and file isolated pages in the zone */ | ||
638 | static void acct_isolated(struct zone *zone, struct compact_control *cc) | ||
639 | { | ||
640 | struct page *page; | ||
641 | unsigned int count[2] = { 0, }; | ||
642 | |||
643 | if (list_empty(&cc->migratepages)) | ||
644 | return; | ||
645 | |||
646 | list_for_each_entry(page, &cc->migratepages, lru) | ||
647 | count[!!page_is_file_cache(page)]++; | ||
648 | |||
649 | mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]); | ||
650 | mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]); | ||
651 | } | ||
652 | |||
653 | /* Similar to reclaim, but different enough that they don't share logic */ | 637 | /* Similar to reclaim, but different enough that they don't share logic */ |
654 | static bool too_many_isolated(struct zone *zone) | 638 | static bool too_many_isolated(struct zone *zone) |
655 | { | 639 | { |
@@ -866,6 +850,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, | |||
866 | 850 | ||
867 | /* Successfully isolated */ | 851 | /* Successfully isolated */ |
868 | del_page_from_lru_list(page, lruvec, page_lru(page)); | 852 | del_page_from_lru_list(page, lruvec, page_lru(page)); |
853 | inc_node_page_state(page, | ||
854 | NR_ISOLATED_ANON + page_is_file_cache(page)); | ||
869 | 855 | ||
870 | isolate_success: | 856 | isolate_success: |
871 | list_add(&page->lru, &cc->migratepages); | 857 | list_add(&page->lru, &cc->migratepages); |
@@ -902,7 +888,6 @@ isolate_fail: | |||
902 | spin_unlock_irqrestore(zone_lru_lock(zone), flags); | 888 | spin_unlock_irqrestore(zone_lru_lock(zone), flags); |
903 | locked = false; | 889 | locked = false; |
904 | } | 890 | } |
905 | acct_isolated(zone, cc); | ||
906 | putback_movable_pages(&cc->migratepages); | 891 | putback_movable_pages(&cc->migratepages); |
907 | cc->nr_migratepages = 0; | 892 | cc->nr_migratepages = 0; |
908 | cc->last_migrated_pfn = 0; | 893 | cc->last_migrated_pfn = 0; |
@@ -988,7 +973,6 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, | |||
988 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) | 973 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) |
989 | break; | 974 | break; |
990 | } | 975 | } |
991 | acct_isolated(cc->zone, cc); | ||
992 | 976 | ||
993 | return pfn; | 977 | return pfn; |
994 | } | 978 | } |
@@ -1258,10 +1242,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1258 | low_pfn = isolate_migratepages_block(cc, low_pfn, | 1242 | low_pfn = isolate_migratepages_block(cc, low_pfn, |
1259 | block_end_pfn, isolate_mode); | 1243 | block_end_pfn, isolate_mode); |
1260 | 1244 | ||
1261 | if (!low_pfn || cc->contended) { | 1245 | if (!low_pfn || cc->contended) |
1262 | acct_isolated(zone, cc); | ||
1263 | return ISOLATE_ABORT; | 1246 | return ISOLATE_ABORT; |
1264 | } | ||
1265 | 1247 | ||
1266 | /* | 1248 | /* |
1267 | * Either we isolated something and proceed with migration. Or | 1249 | * Either we isolated something and proceed with migration. Or |
@@ -1271,7 +1253,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1271 | break; | 1253 | break; |
1272 | } | 1254 | } |
1273 | 1255 | ||
1274 | acct_isolated(zone, cc); | ||
1275 | /* Record where migration scanner will be restarted. */ | 1256 | /* Record where migration scanner will be restarted. */ |
1276 | cc->migrate_pfn = low_pfn; | 1257 | cc->migrate_pfn = low_pfn; |
1277 | 1258 | ||
diff --git a/mm/debug.c b/mm/debug.c index 9feb699c5d25..db1cd26d8752 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -59,6 +59,10 @@ void __dump_page(struct page *page, const char *reason) | |||
59 | 59 | ||
60 | pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); | 60 | pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); |
61 | 61 | ||
62 | print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32, | ||
63 | sizeof(unsigned long), page, | ||
64 | sizeof(struct page), false); | ||
65 | |||
62 | if (reason) | 66 | if (reason) |
63 | pr_alert("page dumped because: %s\n", reason); | 67 | pr_alert("page dumped because: %s\n", reason); |
64 | 68 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 50b52fe51937..5b4dd03130da 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -132,44 +132,29 @@ static int page_cache_tree_insert(struct address_space *mapping, | |||
132 | if (!dax_mapping(mapping)) { | 132 | if (!dax_mapping(mapping)) { |
133 | if (shadowp) | 133 | if (shadowp) |
134 | *shadowp = p; | 134 | *shadowp = p; |
135 | if (node) | ||
136 | workingset_node_shadows_dec(node); | ||
137 | } else { | 135 | } else { |
138 | /* DAX can replace empty locked entry with a hole */ | 136 | /* DAX can replace empty locked entry with a hole */ |
139 | WARN_ON_ONCE(p != | 137 | WARN_ON_ONCE(p != |
140 | (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | | 138 | (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | |
141 | RADIX_DAX_ENTRY_LOCK)); | 139 | RADIX_DAX_ENTRY_LOCK)); |
142 | /* DAX accounts exceptional entries as normal pages */ | ||
143 | if (node) | ||
144 | workingset_node_pages_dec(node); | ||
145 | /* Wakeup waiters for exceptional entry lock */ | 140 | /* Wakeup waiters for exceptional entry lock */ |
146 | dax_wake_mapping_entry_waiter(mapping, page->index, | 141 | dax_wake_mapping_entry_waiter(mapping, page->index, |
147 | false); | 142 | false); |
148 | } | 143 | } |
149 | } | 144 | } |
150 | radix_tree_replace_slot(slot, page); | 145 | __radix_tree_replace(&mapping->page_tree, node, slot, page, |
146 | workingset_update_node, mapping); | ||
151 | mapping->nrpages++; | 147 | mapping->nrpages++; |
152 | if (node) { | ||
153 | workingset_node_pages_inc(node); | ||
154 | /* | ||
155 | * Don't track node that contains actual pages. | ||
156 | * | ||
157 | * Avoid acquiring the list_lru lock if already | ||
158 | * untracked. The list_empty() test is safe as | ||
159 | * node->private_list is protected by | ||
160 | * mapping->tree_lock. | ||
161 | */ | ||
162 | if (!list_empty(&node->private_list)) | ||
163 | list_lru_del(&workingset_shadow_nodes, | ||
164 | &node->private_list); | ||
165 | } | ||
166 | return 0; | 148 | return 0; |
167 | } | 149 | } |
168 | 150 | ||
169 | static void page_cache_tree_delete(struct address_space *mapping, | 151 | static void page_cache_tree_delete(struct address_space *mapping, |
170 | struct page *page, void *shadow) | 152 | struct page *page, void *shadow) |
171 | { | 153 | { |
172 | int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page); | 154 | int i, nr; |
155 | |||
156 | /* hugetlb pages are represented by one entry in the radix tree */ | ||
157 | nr = PageHuge(page) ? 1 : hpage_nr_pages(page); | ||
173 | 158 | ||
174 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 159 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
175 | VM_BUG_ON_PAGE(PageTail(page), page); | 160 | VM_BUG_ON_PAGE(PageTail(page), page); |
@@ -182,44 +167,11 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
182 | __radix_tree_lookup(&mapping->page_tree, page->index + i, | 167 | __radix_tree_lookup(&mapping->page_tree, page->index + i, |
183 | &node, &slot); | 168 | &node, &slot); |
184 | 169 | ||
185 | radix_tree_clear_tags(&mapping->page_tree, node, slot); | 170 | VM_BUG_ON_PAGE(!node && nr != 1, page); |
186 | |||
187 | if (!node) { | ||
188 | VM_BUG_ON_PAGE(nr != 1, page); | ||
189 | /* | ||
190 | * We need a node to properly account shadow | ||
191 | * entries. Don't plant any without. XXX | ||
192 | */ | ||
193 | shadow = NULL; | ||
194 | } | ||
195 | |||
196 | radix_tree_replace_slot(slot, shadow); | ||
197 | 171 | ||
198 | if (!node) | 172 | radix_tree_clear_tags(&mapping->page_tree, node, slot); |
199 | break; | 173 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, |
200 | 174 | workingset_update_node, mapping); | |
201 | workingset_node_pages_dec(node); | ||
202 | if (shadow) | ||
203 | workingset_node_shadows_inc(node); | ||
204 | else | ||
205 | if (__radix_tree_delete_node(&mapping->page_tree, node)) | ||
206 | continue; | ||
207 | |||
208 | /* | ||
209 | * Track node that only contains shadow entries. DAX mappings | ||
210 | * contain no shadow entries and may contain other exceptional | ||
211 | * entries so skip those. | ||
212 | * | ||
213 | * Avoid acquiring the list_lru lock if already tracked. | ||
214 | * The list_empty() test is safe as node->private_list is | ||
215 | * protected by mapping->tree_lock. | ||
216 | */ | ||
217 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | ||
218 | list_empty(&node->private_list)) { | ||
219 | node->private_data = mapping; | ||
220 | list_lru_add(&workingset_shadow_nodes, | ||
221 | &node->private_list); | ||
222 | } | ||
223 | } | 175 | } |
224 | 176 | ||
225 | if (shadow) { | 177 | if (shadow) { |
@@ -632,7 +632,8 @@ next_page: | |||
632 | return i; | 632 | return i; |
633 | } | 633 | } |
634 | 634 | ||
635 | bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) | 635 | static bool vma_permits_fault(struct vm_area_struct *vma, |
636 | unsigned int fault_flags) | ||
636 | { | 637 | { |
637 | bool write = !!(fault_flags & FAULT_FLAG_WRITE); | 638 | bool write = !!(fault_flags & FAULT_FLAG_WRITE); |
638 | bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE); | 639 | bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE); |
@@ -857,14 +858,12 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, | |||
857 | EXPORT_SYMBOL(get_user_pages_locked); | 858 | EXPORT_SYMBOL(get_user_pages_locked); |
858 | 859 | ||
859 | /* | 860 | /* |
860 | * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to | 861 | * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for |
861 | * pass additional gup_flags as last parameter (like FOLL_HWPOISON). | 862 | * tsk, mm to be specified. |
862 | * | 863 | * |
863 | * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the | 864 | * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the |
864 | * caller if required (just like with __get_user_pages). "FOLL_GET", | 865 | * caller if required (just like with __get_user_pages). "FOLL_GET" |
865 | * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed | 866 | * is set implicitly if "pages" is non-NULL. |
866 | * according to the parameters "pages", "write", "force" | ||
867 | * respectively. | ||
868 | */ | 867 | */ |
869 | __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, | 868 | __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, |
870 | unsigned long start, unsigned long nr_pages, | 869 | unsigned long start, unsigned long nr_pages, |
@@ -894,10 +893,8 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); | |||
894 | * get_user_pages_unlocked(tsk, mm, ..., pages); | 893 | * get_user_pages_unlocked(tsk, mm, ..., pages); |
895 | * | 894 | * |
896 | * It is functionally equivalent to get_user_pages_fast so | 895 | * It is functionally equivalent to get_user_pages_fast so |
897 | * get_user_pages_fast should be used instead, if the two parameters | 896 | * get_user_pages_fast should be used instead if specific gup_flags |
898 | * "tsk" and "mm" are respectively equal to current and current->mm, | 897 | * (e.g. FOLL_FORCE) are not required. |
899 | * or if "force" shall be set to 1 (get_user_pages_fast misses the | ||
900 | * "force" parameter). | ||
901 | */ | 898 | */ |
902 | long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, | 899 | long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, |
903 | struct page **pages, unsigned int gup_flags) | 900 | struct page **pages, unsigned int gup_flags) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f8e35cc66d32..cee42cf05477 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -285,6 +285,15 @@ static ssize_t use_zero_page_store(struct kobject *kobj, | |||
285 | } | 285 | } |
286 | static struct kobj_attribute use_zero_page_attr = | 286 | static struct kobj_attribute use_zero_page_attr = |
287 | __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store); | 287 | __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store); |
288 | |||
289 | static ssize_t hpage_pmd_size_show(struct kobject *kobj, | ||
290 | struct kobj_attribute *attr, char *buf) | ||
291 | { | ||
292 | return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE); | ||
293 | } | ||
294 | static struct kobj_attribute hpage_pmd_size_attr = | ||
295 | __ATTR_RO(hpage_pmd_size); | ||
296 | |||
288 | #ifdef CONFIG_DEBUG_VM | 297 | #ifdef CONFIG_DEBUG_VM |
289 | static ssize_t debug_cow_show(struct kobject *kobj, | 298 | static ssize_t debug_cow_show(struct kobject *kobj, |
290 | struct kobj_attribute *attr, char *buf) | 299 | struct kobj_attribute *attr, char *buf) |
@@ -307,6 +316,7 @@ static struct attribute *hugepage_attr[] = { | |||
307 | &enabled_attr.attr, | 316 | &enabled_attr.attr, |
308 | &defrag_attr.attr, | 317 | &defrag_attr.attr, |
309 | &use_zero_page_attr.attr, | 318 | &use_zero_page_attr.attr, |
319 | &hpage_pmd_size_attr.attr, | ||
310 | #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) | 320 | #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) |
311 | &shmem_enabled_attr.attr, | 321 | &shmem_enabled_attr.attr, |
312 | #endif | 322 | #endif |
@@ -1323,6 +1333,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1323 | struct mm_struct *mm = tlb->mm; | 1333 | struct mm_struct *mm = tlb->mm; |
1324 | bool ret = false; | 1334 | bool ret = false; |
1325 | 1335 | ||
1336 | tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE); | ||
1337 | |||
1326 | ptl = pmd_trans_huge_lock(pmd, vma); | 1338 | ptl = pmd_trans_huge_lock(pmd, vma); |
1327 | if (!ptl) | 1339 | if (!ptl) |
1328 | goto out_unlocked; | 1340 | goto out_unlocked; |
@@ -1378,12 +1390,23 @@ out_unlocked: | |||
1378 | return ret; | 1390 | return ret; |
1379 | } | 1391 | } |
1380 | 1392 | ||
1393 | static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) | ||
1394 | { | ||
1395 | pgtable_t pgtable; | ||
1396 | |||
1397 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); | ||
1398 | pte_free(mm, pgtable); | ||
1399 | atomic_long_dec(&mm->nr_ptes); | ||
1400 | } | ||
1401 | |||
1381 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | 1402 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, |
1382 | pmd_t *pmd, unsigned long addr) | 1403 | pmd_t *pmd, unsigned long addr) |
1383 | { | 1404 | { |
1384 | pmd_t orig_pmd; | 1405 | pmd_t orig_pmd; |
1385 | spinlock_t *ptl; | 1406 | spinlock_t *ptl; |
1386 | 1407 | ||
1408 | tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE); | ||
1409 | |||
1387 | ptl = __pmd_trans_huge_lock(pmd, vma); | 1410 | ptl = __pmd_trans_huge_lock(pmd, vma); |
1388 | if (!ptl) | 1411 | if (!ptl) |
1389 | return 0; | 1412 | return 0; |
@@ -1399,12 +1422,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1399 | if (vma_is_dax(vma)) { | 1422 | if (vma_is_dax(vma)) { |
1400 | spin_unlock(ptl); | 1423 | spin_unlock(ptl); |
1401 | if (is_huge_zero_pmd(orig_pmd)) | 1424 | if (is_huge_zero_pmd(orig_pmd)) |
1402 | tlb_remove_page(tlb, pmd_page(orig_pmd)); | 1425 | tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); |
1403 | } else if (is_huge_zero_pmd(orig_pmd)) { | 1426 | } else if (is_huge_zero_pmd(orig_pmd)) { |
1404 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); | 1427 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); |
1405 | atomic_long_dec(&tlb->mm->nr_ptes); | 1428 | atomic_long_dec(&tlb->mm->nr_ptes); |
1406 | spin_unlock(ptl); | 1429 | spin_unlock(ptl); |
1407 | tlb_remove_page(tlb, pmd_page(orig_pmd)); | 1430 | tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); |
1408 | } else { | 1431 | } else { |
1409 | struct page *page = pmd_page(orig_pmd); | 1432 | struct page *page = pmd_page(orig_pmd); |
1410 | page_remove_rmap(page, true); | 1433 | page_remove_rmap(page, true); |
@@ -1417,6 +1440,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1417 | atomic_long_dec(&tlb->mm->nr_ptes); | 1440 | atomic_long_dec(&tlb->mm->nr_ptes); |
1418 | add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); | 1441 | add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); |
1419 | } else { | 1442 | } else { |
1443 | if (arch_needs_pgtable_deposit()) | ||
1444 | zap_deposited_table(tlb->mm, pmd); | ||
1420 | add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR); | 1445 | add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR); |
1421 | } | 1446 | } |
1422 | spin_unlock(ptl); | 1447 | spin_unlock(ptl); |
@@ -1425,6 +1450,21 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1425 | return 1; | 1450 | return 1; |
1426 | } | 1451 | } |
1427 | 1452 | ||
1453 | #ifndef pmd_move_must_withdraw | ||
1454 | static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, | ||
1455 | spinlock_t *old_pmd_ptl, | ||
1456 | struct vm_area_struct *vma) | ||
1457 | { | ||
1458 | /* | ||
1459 | * With split pmd lock we also need to move preallocated | ||
1460 | * PTE page table if new_pmd is on different PMD page table. | ||
1461 | * | ||
1462 | * We also don't deposit and withdraw tables for file pages. | ||
1463 | */ | ||
1464 | return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); | ||
1465 | } | ||
1466 | #endif | ||
1467 | |||
1428 | bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, | 1468 | bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, |
1429 | unsigned long new_addr, unsigned long old_end, | 1469 | unsigned long new_addr, unsigned long old_end, |
1430 | pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) | 1470 | pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) |
@@ -1462,8 +1502,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, | |||
1462 | force_flush = true; | 1502 | force_flush = true; |
1463 | VM_BUG_ON(!pmd_none(*new_pmd)); | 1503 | VM_BUG_ON(!pmd_none(*new_pmd)); |
1464 | 1504 | ||
1465 | if (pmd_move_must_withdraw(new_ptl, old_ptl) && | 1505 | if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) { |
1466 | vma_is_anonymous(vma)) { | ||
1467 | pgtable_t pgtable; | 1506 | pgtable_t pgtable; |
1468 | pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); | 1507 | pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); |
1469 | pgtable_trans_huge_deposit(mm, new_pmd, pgtable); | 1508 | pgtable_trans_huge_deposit(mm, new_pmd, pgtable); |
@@ -1589,6 +1628,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | |||
1589 | 1628 | ||
1590 | if (!vma_is_anonymous(vma)) { | 1629 | if (!vma_is_anonymous(vma)) { |
1591 | _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); | 1630 | _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); |
1631 | /* | ||
1632 | * We are going to unmap this huge page. So | ||
1633 | * just go ahead and zap it | ||
1634 | */ | ||
1635 | if (arch_needs_pgtable_deposit()) | ||
1636 | zap_deposited_table(mm, pmd); | ||
1592 | if (vma_is_dax(vma)) | 1637 | if (vma_is_dax(vma)) |
1593 | return; | 1638 | return; |
1594 | page = pmd_page(_pmd); | 1639 | page = pmd_page(_pmd); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 418bf01a50ed..3edb759c5c7d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3286,6 +3286,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3286 | BUG_ON(start & ~huge_page_mask(h)); | 3286 | BUG_ON(start & ~huge_page_mask(h)); |
3287 | BUG_ON(end & ~huge_page_mask(h)); | 3287 | BUG_ON(end & ~huge_page_mask(h)); |
3288 | 3288 | ||
3289 | /* | ||
3290 | * This is a hugetlb vma, all the pte entries should point | ||
3291 | * to huge page. | ||
3292 | */ | ||
3293 | tlb_remove_check_page_size_change(tlb, sz); | ||
3289 | tlb_start_vma(tlb, vma); | 3294 | tlb_start_vma(tlb, vma); |
3290 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 3295 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
3291 | address = start; | 3296 | address = start; |
@@ -3336,7 +3341,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
3336 | } | 3341 | } |
3337 | 3342 | ||
3338 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 3343 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
3339 | tlb_remove_tlb_entry(tlb, ptep, address); | 3344 | tlb_remove_huge_tlb_entry(h, tlb, ptep, address); |
3340 | if (huge_pte_dirty(pte)) | 3345 | if (huge_pte_dirty(pte)) |
3341 | set_page_dirty(page); | 3346 | set_page_dirty(page); |
3342 | 3347 | ||
@@ -3450,15 +3455,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3450 | * Keep the pte_same checks anyway to make transition from the mutex easier. | 3455 | * Keep the pte_same checks anyway to make transition from the mutex easier. |
3451 | */ | 3456 | */ |
3452 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | 3457 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
3453 | unsigned long address, pte_t *ptep, pte_t pte, | 3458 | unsigned long address, pte_t *ptep, |
3454 | struct page *pagecache_page, spinlock_t *ptl) | 3459 | struct page *pagecache_page, spinlock_t *ptl) |
3455 | { | 3460 | { |
3461 | pte_t pte; | ||
3456 | struct hstate *h = hstate_vma(vma); | 3462 | struct hstate *h = hstate_vma(vma); |
3457 | struct page *old_page, *new_page; | 3463 | struct page *old_page, *new_page; |
3458 | int ret = 0, outside_reserve = 0; | 3464 | int ret = 0, outside_reserve = 0; |
3459 | unsigned long mmun_start; /* For mmu_notifiers */ | 3465 | unsigned long mmun_start; /* For mmu_notifiers */ |
3460 | unsigned long mmun_end; /* For mmu_notifiers */ | 3466 | unsigned long mmun_end; /* For mmu_notifiers */ |
3461 | 3467 | ||
3468 | pte = huge_ptep_get(ptep); | ||
3462 | old_page = pte_page(pte); | 3469 | old_page = pte_page(pte); |
3463 | 3470 | ||
3464 | retry_avoidcopy: | 3471 | retry_avoidcopy: |
@@ -3711,8 +3718,7 @@ retry: | |||
3711 | vma_end_reservation(h, vma, address); | 3718 | vma_end_reservation(h, vma, address); |
3712 | } | 3719 | } |
3713 | 3720 | ||
3714 | ptl = huge_pte_lockptr(h, mm, ptep); | 3721 | ptl = huge_pte_lock(h, mm, ptep); |
3715 | spin_lock(ptl); | ||
3716 | size = i_size_read(mapping->host) >> huge_page_shift(h); | 3722 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
3717 | if (idx >= size) | 3723 | if (idx >= size) |
3718 | goto backout; | 3724 | goto backout; |
@@ -3733,7 +3739,7 @@ retry: | |||
3733 | hugetlb_count_add(pages_per_huge_page(h), mm); | 3739 | hugetlb_count_add(pages_per_huge_page(h), mm); |
3734 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { | 3740 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
3735 | /* Optimization, do the COW without a second fault */ | 3741 | /* Optimization, do the COW without a second fault */ |
3736 | ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); | 3742 | ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); |
3737 | } | 3743 | } |
3738 | 3744 | ||
3739 | spin_unlock(ptl); | 3745 | spin_unlock(ptl); |
@@ -3888,8 +3894,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3888 | 3894 | ||
3889 | if (flags & FAULT_FLAG_WRITE) { | 3895 | if (flags & FAULT_FLAG_WRITE) { |
3890 | if (!huge_pte_write(entry)) { | 3896 | if (!huge_pte_write(entry)) { |
3891 | ret = hugetlb_cow(mm, vma, address, ptep, entry, | 3897 | ret = hugetlb_cow(mm, vma, address, ptep, |
3892 | pagecache_page, ptl); | 3898 | pagecache_page, ptl); |
3893 | goto out_put_page; | 3899 | goto out_put_page; |
3894 | } | 3900 | } |
3895 | entry = huge_pte_mkdirty(entry); | 3901 | entry = huge_pte_mkdirty(entry); |
@@ -4330,8 +4336,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
4330 | if (!spte) | 4336 | if (!spte) |
4331 | goto out; | 4337 | goto out; |
4332 | 4338 | ||
4333 | ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); | 4339 | ptl = huge_pte_lock(hstate_vma(vma), mm, spte); |
4334 | spin_lock(ptl); | ||
4335 | if (pud_none(*pud)) { | 4340 | if (pud_none(*pud)) { |
4336 | pud_populate(mm, pud, | 4341 | pud_populate(mm, pud, |
4337 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); | 4342 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); |
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index baabaad4a4aa..dae929c02bbb 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c | |||
@@ -86,24 +86,9 @@ static void qlist_move_all(struct qlist_head *from, struct qlist_head *to) | |||
86 | qlist_init(from); | 86 | qlist_init(from); |
87 | } | 87 | } |
88 | 88 | ||
89 | static void qlist_move(struct qlist_head *from, struct qlist_node *last, | 89 | #define QUARANTINE_PERCPU_SIZE (1 << 20) |
90 | struct qlist_head *to, size_t size) | 90 | #define QUARANTINE_BATCHES \ |
91 | { | 91 | (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS) |
92 | if (unlikely(last == from->tail)) { | ||
93 | qlist_move_all(from, to); | ||
94 | return; | ||
95 | } | ||
96 | if (qlist_empty(to)) | ||
97 | to->head = from->head; | ||
98 | else | ||
99 | to->tail->next = from->head; | ||
100 | to->tail = last; | ||
101 | from->head = last->next; | ||
102 | last->next = NULL; | ||
103 | from->bytes -= size; | ||
104 | to->bytes += size; | ||
105 | } | ||
106 | |||
107 | 92 | ||
108 | /* | 93 | /* |
109 | * The object quarantine consists of per-cpu queues and a global queue, | 94 | * The object quarantine consists of per-cpu queues and a global queue, |
@@ -111,11 +96,22 @@ static void qlist_move(struct qlist_head *from, struct qlist_node *last, | |||
111 | */ | 96 | */ |
112 | static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine); | 97 | static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine); |
113 | 98 | ||
114 | static struct qlist_head global_quarantine; | 99 | /* Round-robin FIFO array of batches. */ |
100 | static struct qlist_head global_quarantine[QUARANTINE_BATCHES]; | ||
101 | static int quarantine_head; | ||
102 | static int quarantine_tail; | ||
103 | /* Total size of all objects in global_quarantine across all batches. */ | ||
104 | static unsigned long quarantine_size; | ||
115 | static DEFINE_SPINLOCK(quarantine_lock); | 105 | static DEFINE_SPINLOCK(quarantine_lock); |
116 | 106 | ||
117 | /* Maximum size of the global queue. */ | 107 | /* Maximum size of the global queue. */ |
118 | static unsigned long quarantine_size; | 108 | static unsigned long quarantine_max_size; |
109 | |||
110 | /* | ||
111 | * Target size of a batch in global_quarantine. | ||
112 | * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM. | ||
113 | */ | ||
114 | static unsigned long quarantine_batch_size; | ||
119 | 115 | ||
120 | /* | 116 | /* |
121 | * The fraction of physical memory the quarantine is allowed to occupy. | 117 | * The fraction of physical memory the quarantine is allowed to occupy. |
@@ -124,9 +120,6 @@ static unsigned long quarantine_size; | |||
124 | */ | 120 | */ |
125 | #define QUARANTINE_FRACTION 32 | 121 | #define QUARANTINE_FRACTION 32 |
126 | 122 | ||
127 | #define QUARANTINE_LOW_SIZE (READ_ONCE(quarantine_size) * 3 / 4) | ||
128 | #define QUARANTINE_PERCPU_SIZE (1 << 20) | ||
129 | |||
130 | static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink) | 123 | static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink) |
131 | { | 124 | { |
132 | return virt_to_head_page(qlink)->slab_cache; | 125 | return virt_to_head_page(qlink)->slab_cache; |
@@ -191,21 +184,30 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) | |||
191 | 184 | ||
192 | if (unlikely(!qlist_empty(&temp))) { | 185 | if (unlikely(!qlist_empty(&temp))) { |
193 | spin_lock_irqsave(&quarantine_lock, flags); | 186 | spin_lock_irqsave(&quarantine_lock, flags); |
194 | qlist_move_all(&temp, &global_quarantine); | 187 | WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); |
188 | qlist_move_all(&temp, &global_quarantine[quarantine_tail]); | ||
189 | if (global_quarantine[quarantine_tail].bytes >= | ||
190 | READ_ONCE(quarantine_batch_size)) { | ||
191 | int new_tail; | ||
192 | |||
193 | new_tail = quarantine_tail + 1; | ||
194 | if (new_tail == QUARANTINE_BATCHES) | ||
195 | new_tail = 0; | ||
196 | if (new_tail != quarantine_head) | ||
197 | quarantine_tail = new_tail; | ||
198 | } | ||
195 | spin_unlock_irqrestore(&quarantine_lock, flags); | 199 | spin_unlock_irqrestore(&quarantine_lock, flags); |
196 | } | 200 | } |
197 | } | 201 | } |
198 | 202 | ||
199 | void quarantine_reduce(void) | 203 | void quarantine_reduce(void) |
200 | { | 204 | { |
201 | size_t new_quarantine_size, percpu_quarantines; | 205 | size_t total_size, new_quarantine_size, percpu_quarantines; |
202 | unsigned long flags; | 206 | unsigned long flags; |
203 | struct qlist_head to_free = QLIST_INIT; | 207 | struct qlist_head to_free = QLIST_INIT; |
204 | size_t size_to_free = 0; | ||
205 | struct qlist_node *last; | ||
206 | 208 | ||
207 | if (likely(READ_ONCE(global_quarantine.bytes) <= | 209 | if (likely(READ_ONCE(quarantine_size) <= |
208 | READ_ONCE(quarantine_size))) | 210 | READ_ONCE(quarantine_max_size))) |
209 | return; | 211 | return; |
210 | 212 | ||
211 | spin_lock_irqsave(&quarantine_lock, flags); | 213 | spin_lock_irqsave(&quarantine_lock, flags); |
@@ -214,24 +216,23 @@ void quarantine_reduce(void) | |||
214 | * Update quarantine size in case of hotplug. Allocate a fraction of | 216 | * Update quarantine size in case of hotplug. Allocate a fraction of |
215 | * the installed memory to quarantine minus per-cpu queue limits. | 217 | * the installed memory to quarantine minus per-cpu queue limits. |
216 | */ | 218 | */ |
217 | new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / | 219 | total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / |
218 | QUARANTINE_FRACTION; | 220 | QUARANTINE_FRACTION; |
219 | percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus(); | 221 | percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus(); |
220 | new_quarantine_size = (new_quarantine_size < percpu_quarantines) ? | 222 | new_quarantine_size = (total_size < percpu_quarantines) ? |
221 | 0 : new_quarantine_size - percpu_quarantines; | 223 | 0 : total_size - percpu_quarantines; |
222 | WRITE_ONCE(quarantine_size, new_quarantine_size); | 224 | WRITE_ONCE(quarantine_max_size, new_quarantine_size); |
223 | 225 | /* Aim at consuming at most 1/2 of slots in quarantine. */ | |
224 | last = global_quarantine.head; | 226 | WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE, |
225 | while (last) { | 227 | 2 * total_size / QUARANTINE_BATCHES)); |
226 | struct kmem_cache *cache = qlink_to_cache(last); | 228 | |
227 | 229 | if (likely(quarantine_size > quarantine_max_size)) { | |
228 | size_to_free += cache->size; | 230 | qlist_move_all(&global_quarantine[quarantine_head], &to_free); |
229 | if (!last->next || size_to_free > | 231 | WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes); |
230 | global_quarantine.bytes - QUARANTINE_LOW_SIZE) | 232 | quarantine_head++; |
231 | break; | 233 | if (quarantine_head == QUARANTINE_BATCHES) |
232 | last = last->next; | 234 | quarantine_head = 0; |
233 | } | 235 | } |
234 | qlist_move(&global_quarantine, last, &to_free, size_to_free); | ||
235 | 236 | ||
236 | spin_unlock_irqrestore(&quarantine_lock, flags); | 237 | spin_unlock_irqrestore(&quarantine_lock, flags); |
237 | 238 | ||
@@ -275,13 +276,14 @@ static void per_cpu_remove_cache(void *arg) | |||
275 | 276 | ||
276 | void quarantine_remove_cache(struct kmem_cache *cache) | 277 | void quarantine_remove_cache(struct kmem_cache *cache) |
277 | { | 278 | { |
278 | unsigned long flags; | 279 | unsigned long flags, i; |
279 | struct qlist_head to_free = QLIST_INIT; | 280 | struct qlist_head to_free = QLIST_INIT; |
280 | 281 | ||
281 | on_each_cpu(per_cpu_remove_cache, cache, 1); | 282 | on_each_cpu(per_cpu_remove_cache, cache, 1); |
282 | 283 | ||
283 | spin_lock_irqsave(&quarantine_lock, flags); | 284 | spin_lock_irqsave(&quarantine_lock, flags); |
284 | qlist_move_cache(&global_quarantine, &to_free, cache); | 285 | for (i = 0; i < QUARANTINE_BATCHES; i++) |
286 | qlist_move_cache(&global_quarantine[i], &to_free, cache); | ||
285 | spin_unlock_irqrestore(&quarantine_lock, flags); | 287 | spin_unlock_irqrestore(&quarantine_lock, flags); |
286 | 288 | ||
287 | qlist_free_all(&to_free, cache); | 289 | qlist_free_all(&to_free, cache); |
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 073325aedc68..b82b3e215157 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c | |||
@@ -136,6 +136,8 @@ static void kasan_end_report(unsigned long *flags) | |||
136 | pr_err("==================================================================\n"); | 136 | pr_err("==================================================================\n"); |
137 | add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); | 137 | add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
138 | spin_unlock_irqrestore(&report_lock, *flags); | 138 | spin_unlock_irqrestore(&report_lock, *flags); |
139 | if (panic_on_warn) | ||
140 | panic("panic_on_warn set ...\n"); | ||
139 | kasan_enable_current(); | 141 | kasan_enable_current(); |
140 | } | 142 | } |
141 | 143 | ||
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 87e1a7ca3846..09460955e818 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c | |||
@@ -1242,6 +1242,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1242 | struct vm_area_struct *vma; | 1242 | struct vm_area_struct *vma; |
1243 | unsigned long addr; | 1243 | unsigned long addr; |
1244 | pmd_t *pmd, _pmd; | 1244 | pmd_t *pmd, _pmd; |
1245 | bool deposited = false; | ||
1245 | 1246 | ||
1246 | i_mmap_lock_write(mapping); | 1247 | i_mmap_lock_write(mapping); |
1247 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | 1248 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
@@ -1266,10 +1267,26 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1266 | spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); | 1267 | spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); |
1267 | /* assume page table is clear */ | 1268 | /* assume page table is clear */ |
1268 | _pmd = pmdp_collapse_flush(vma, addr, pmd); | 1269 | _pmd = pmdp_collapse_flush(vma, addr, pmd); |
1270 | /* | ||
1271 | * now deposit the pgtable for arch that need it | ||
1272 | * otherwise free it. | ||
1273 | */ | ||
1274 | if (arch_needs_pgtable_deposit()) { | ||
1275 | /* | ||
1276 | * The deposit should be visibile only after | ||
1277 | * collapse is seen by others. | ||
1278 | */ | ||
1279 | smp_wmb(); | ||
1280 | pgtable_trans_huge_deposit(vma->vm_mm, pmd, | ||
1281 | pmd_pgtable(_pmd)); | ||
1282 | deposited = true; | ||
1283 | } | ||
1269 | spin_unlock(ptl); | 1284 | spin_unlock(ptl); |
1270 | up_write(&vma->vm_mm->mmap_sem); | 1285 | up_write(&vma->vm_mm->mmap_sem); |
1271 | atomic_long_dec(&vma->vm_mm->nr_ptes); | 1286 | if (!deposited) { |
1272 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); | 1287 | atomic_long_dec(&vma->vm_mm->nr_ptes); |
1288 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); | ||
1289 | } | ||
1273 | } | 1290 | } |
1274 | } | 1291 | } |
1275 | i_mmap_unlock_write(mapping); | 1292 | i_mmap_unlock_write(mapping); |
@@ -1403,6 +1420,9 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1403 | 1420 | ||
1404 | spin_lock_irq(&mapping->tree_lock); | 1421 | spin_lock_irq(&mapping->tree_lock); |
1405 | 1422 | ||
1423 | slot = radix_tree_lookup_slot(&mapping->page_tree, index); | ||
1424 | VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot, | ||
1425 | &mapping->tree_lock), page); | ||
1406 | VM_BUG_ON_PAGE(page_mapped(page), page); | 1426 | VM_BUG_ON_PAGE(page_mapped(page), page); |
1407 | 1427 | ||
1408 | /* | 1428 | /* |
@@ -1423,9 +1443,10 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1423 | list_add_tail(&page->lru, &pagelist); | 1443 | list_add_tail(&page->lru, &pagelist); |
1424 | 1444 | ||
1425 | /* Finally, replace with the new page. */ | 1445 | /* Finally, replace with the new page. */ |
1426 | radix_tree_replace_slot(slot, | 1446 | radix_tree_replace_slot(&mapping->page_tree, slot, |
1427 | new_page + (index % HPAGE_PMD_NR)); | 1447 | new_page + (index % HPAGE_PMD_NR)); |
1428 | 1448 | ||
1449 | slot = radix_tree_iter_next(&iter); | ||
1429 | index++; | 1450 | index++; |
1430 | continue; | 1451 | continue; |
1431 | out_lru: | 1452 | out_lru: |
@@ -1521,9 +1542,11 @@ tree_unlocked: | |||
1521 | if (!page || iter.index < page->index) { | 1542 | if (!page || iter.index < page->index) { |
1522 | if (!nr_none) | 1543 | if (!nr_none) |
1523 | break; | 1544 | break; |
1524 | /* Put holes back where they were */ | ||
1525 | radix_tree_replace_slot(slot, NULL); | ||
1526 | nr_none--; | 1545 | nr_none--; |
1546 | /* Put holes back where they were */ | ||
1547 | radix_tree_delete(&mapping->page_tree, | ||
1548 | iter.index); | ||
1549 | slot = radix_tree_iter_next(&iter); | ||
1527 | continue; | 1550 | continue; |
1528 | } | 1551 | } |
1529 | 1552 | ||
@@ -1532,11 +1555,13 @@ tree_unlocked: | |||
1532 | /* Unfreeze the page. */ | 1555 | /* Unfreeze the page. */ |
1533 | list_del(&page->lru); | 1556 | list_del(&page->lru); |
1534 | page_ref_unfreeze(page, 2); | 1557 | page_ref_unfreeze(page, 2); |
1535 | radix_tree_replace_slot(slot, page); | 1558 | radix_tree_replace_slot(&mapping->page_tree, |
1559 | slot, page); | ||
1536 | spin_unlock_irq(&mapping->tree_lock); | 1560 | spin_unlock_irq(&mapping->tree_lock); |
1537 | putback_lru_page(page); | 1561 | putback_lru_page(page); |
1538 | unlock_page(page); | 1562 | unlock_page(page); |
1539 | spin_lock_irq(&mapping->tree_lock); | 1563 | spin_lock_irq(&mapping->tree_lock); |
1564 | slot = radix_tree_iter_next(&iter); | ||
1540 | } | 1565 | } |
1541 | VM_BUG_ON(nr_none); | 1566 | VM_BUG_ON(nr_none); |
1542 | spin_unlock_irq(&mapping->tree_lock); | 1567 | spin_unlock_irq(&mapping->tree_lock); |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d1380ed93fdf..da3436953022 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -19,7 +19,7 @@ | |||
19 | * | 19 | * |
20 | * | 20 | * |
21 | * For more information on the algorithm and kmemleak usage, please see | 21 | * For more information on the algorithm and kmemleak usage, please see |
22 | * Documentation/kmemleak.txt. | 22 | * Documentation/dev-tools/kmemleak.rst. |
23 | * | 23 | * |
24 | * Notes on locking | 24 | * Notes on locking |
25 | * ---------------- | 25 | * ---------------- |
diff --git a/mm/madvise.c b/mm/madvise.c index 93fb63e88b5e..0e3828eae9f8 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -281,6 +281,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, | |||
281 | if (pmd_trans_unstable(pmd)) | 281 | if (pmd_trans_unstable(pmd)) |
282 | return 0; | 282 | return 0; |
283 | 283 | ||
284 | tlb_remove_check_page_size_change(tlb, PAGE_SIZE); | ||
284 | orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 285 | orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
285 | arch_enter_lazy_mmu_mode(); | 286 | arch_enter_lazy_mmu_mode(); |
286 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 287 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6c2043509fb5..175ec51c346d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2145,6 +2145,8 @@ struct memcg_kmem_cache_create_work { | |||
2145 | struct work_struct work; | 2145 | struct work_struct work; |
2146 | }; | 2146 | }; |
2147 | 2147 | ||
2148 | static struct workqueue_struct *memcg_kmem_cache_create_wq; | ||
2149 | |||
2148 | static void memcg_kmem_cache_create_func(struct work_struct *w) | 2150 | static void memcg_kmem_cache_create_func(struct work_struct *w) |
2149 | { | 2151 | { |
2150 | struct memcg_kmem_cache_create_work *cw = | 2152 | struct memcg_kmem_cache_create_work *cw = |
@@ -2176,7 +2178,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, | |||
2176 | cw->cachep = cachep; | 2178 | cw->cachep = cachep; |
2177 | INIT_WORK(&cw->work, memcg_kmem_cache_create_func); | 2179 | INIT_WORK(&cw->work, memcg_kmem_cache_create_func); |
2178 | 2180 | ||
2179 | schedule_work(&cw->work); | 2181 | queue_work(memcg_kmem_cache_create_wq, &cw->work); |
2180 | } | 2182 | } |
2181 | 2183 | ||
2182 | static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, | 2184 | static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, |
@@ -5774,6 +5776,17 @@ static int __init mem_cgroup_init(void) | |||
5774 | { | 5776 | { |
5775 | int cpu, node; | 5777 | int cpu, node; |
5776 | 5778 | ||
5779 | #ifndef CONFIG_SLOB | ||
5780 | /* | ||
5781 | * Kmem cache creation is mostly done with the slab_mutex held, | ||
5782 | * so use a special workqueue to avoid stalling all worker | ||
5783 | * threads in case lots of cgroups are created simultaneously. | ||
5784 | */ | ||
5785 | memcg_kmem_cache_create_wq = | ||
5786 | alloc_ordered_workqueue("memcg_kmem_cache_create", 0); | ||
5787 | BUG_ON(!memcg_kmem_cache_create_wq); | ||
5788 | #endif | ||
5789 | |||
5777 | cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, | 5790 | cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, |
5778 | memcg_hotplug_cpu_dead); | 5791 | memcg_hotplug_cpu_dead); |
5779 | 5792 | ||
diff --git a/mm/memory.c b/mm/memory.c index 33f45edf8272..32e9b7aec366 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -300,15 +300,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ | |||
300 | struct mmu_gather_batch *batch; | 300 | struct mmu_gather_batch *batch; |
301 | 301 | ||
302 | VM_BUG_ON(!tlb->end); | 302 | VM_BUG_ON(!tlb->end); |
303 | 303 | VM_WARN_ON(tlb->page_size != page_size); | |
304 | if (!tlb->page_size) | ||
305 | tlb->page_size = page_size; | ||
306 | else { | ||
307 | if (page_size != tlb->page_size) | ||
308 | return true; | ||
309 | } | ||
310 | 304 | ||
311 | batch = tlb->active; | 305 | batch = tlb->active; |
306 | /* | ||
307 | * Add the page and check if we are full. If so | ||
308 | * force a flush. | ||
309 | */ | ||
310 | batch->pages[batch->nr++] = page; | ||
312 | if (batch->nr == batch->max) { | 311 | if (batch->nr == batch->max) { |
313 | if (!tlb_next_batch(tlb)) | 312 | if (!tlb_next_batch(tlb)) |
314 | return true; | 313 | return true; |
@@ -316,7 +315,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ | |||
316 | } | 315 | } |
317 | VM_BUG_ON_PAGE(batch->nr > batch->max, page); | 316 | VM_BUG_ON_PAGE(batch->nr > batch->max, page); |
318 | 317 | ||
319 | batch->pages[batch->nr++] = page; | ||
320 | return false; | 318 | return false; |
321 | } | 319 | } |
322 | 320 | ||
@@ -528,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb, | |||
528 | end -= PMD_SIZE; | 526 | end -= PMD_SIZE; |
529 | if (addr > end - 1) | 527 | if (addr > end - 1) |
530 | return; | 528 | return; |
531 | 529 | /* | |
530 | * We add page table cache pages with PAGE_SIZE, | ||
531 | * (see pte_free_tlb()), flush the tlb if we need | ||
532 | */ | ||
533 | tlb_remove_check_page_size_change(tlb, PAGE_SIZE); | ||
532 | pgd = pgd_offset(tlb->mm, addr); | 534 | pgd = pgd_offset(tlb->mm, addr); |
533 | do { | 535 | do { |
534 | next = pgd_addr_end(addr, end); | 536 | next = pgd_addr_end(addr, end); |
@@ -1118,8 +1120,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
1118 | pte_t *start_pte; | 1120 | pte_t *start_pte; |
1119 | pte_t *pte; | 1121 | pte_t *pte; |
1120 | swp_entry_t entry; | 1122 | swp_entry_t entry; |
1121 | struct page *pending_page = NULL; | ||
1122 | 1123 | ||
1124 | tlb_remove_check_page_size_change(tlb, PAGE_SIZE); | ||
1123 | again: | 1125 | again: |
1124 | init_rss_vec(rss); | 1126 | init_rss_vec(rss); |
1125 | start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 1127 | start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
@@ -1172,7 +1174,6 @@ again: | |||
1172 | print_bad_pte(vma, addr, ptent, page); | 1174 | print_bad_pte(vma, addr, ptent, page); |
1173 | if (unlikely(__tlb_remove_page(tlb, page))) { | 1175 | if (unlikely(__tlb_remove_page(tlb, page))) { |
1174 | force_flush = 1; | 1176 | force_flush = 1; |
1175 | pending_page = page; | ||
1176 | addr += PAGE_SIZE; | 1177 | addr += PAGE_SIZE; |
1177 | break; | 1178 | break; |
1178 | } | 1179 | } |
@@ -1213,11 +1214,6 @@ again: | |||
1213 | if (force_flush) { | 1214 | if (force_flush) { |
1214 | force_flush = 0; | 1215 | force_flush = 0; |
1215 | tlb_flush_mmu_free(tlb); | 1216 | tlb_flush_mmu_free(tlb); |
1216 | if (pending_page) { | ||
1217 | /* remove the page with new size */ | ||
1218 | __tlb_remove_pte_page(tlb, pending_page); | ||
1219 | pending_page = NULL; | ||
1220 | } | ||
1221 | if (addr != end) | 1217 | if (addr != end) |
1222 | goto again; | 1218 | goto again; |
1223 | } | 1219 | } |
@@ -1240,7 +1236,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1240 | if (next - addr != HPAGE_PMD_SIZE) { | 1236 | if (next - addr != HPAGE_PMD_SIZE) { |
1241 | VM_BUG_ON_VMA(vma_is_anonymous(vma) && | 1237 | VM_BUG_ON_VMA(vma_is_anonymous(vma) && |
1242 | !rwsem_is_locked(&tlb->mm->mmap_sem), vma); | 1238 | !rwsem_is_locked(&tlb->mm->mmap_sem), vma); |
1243 | split_huge_pmd(vma, pmd, addr); | 1239 | __split_huge_pmd(vma, pmd, addr, false, NULL); |
1244 | } else if (zap_huge_pmd(tlb, vma, pmd, addr)) | 1240 | } else if (zap_huge_pmd(tlb, vma, pmd, addr)) |
1245 | goto next; | 1241 | goto next; |
1246 | /* fall through */ | 1242 | /* fall through */ |
@@ -2939,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, | |||
2939 | return true; | 2935 | return true; |
2940 | } | 2936 | } |
2941 | 2937 | ||
2938 | static void deposit_prealloc_pte(struct fault_env *fe) | ||
2939 | { | ||
2940 | struct vm_area_struct *vma = fe->vma; | ||
2941 | |||
2942 | pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); | ||
2943 | /* | ||
2944 | * We are going to consume the prealloc table, | ||
2945 | * count that as nr_ptes. | ||
2946 | */ | ||
2947 | atomic_long_inc(&vma->vm_mm->nr_ptes); | ||
2948 | fe->prealloc_pte = 0; | ||
2949 | } | ||
2950 | |||
2942 | static int do_set_pmd(struct fault_env *fe, struct page *page) | 2951 | static int do_set_pmd(struct fault_env *fe, struct page *page) |
2943 | { | 2952 | { |
2944 | struct vm_area_struct *vma = fe->vma; | 2953 | struct vm_area_struct *vma = fe->vma; |
@@ -2953,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2953 | ret = VM_FAULT_FALLBACK; | 2962 | ret = VM_FAULT_FALLBACK; |
2954 | page = compound_head(page); | 2963 | page = compound_head(page); |
2955 | 2964 | ||
2965 | /* | ||
2966 | * Archs like ppc64 need additonal space to store information | ||
2967 | * related to pte entry. Use the preallocated table for that. | ||
2968 | */ | ||
2969 | if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { | ||
2970 | fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); | ||
2971 | if (!fe->prealloc_pte) | ||
2972 | return VM_FAULT_OOM; | ||
2973 | smp_wmb(); /* See comment in __pte_alloc() */ | ||
2974 | } | ||
2975 | |||
2956 | fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); | 2976 | fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); |
2957 | if (unlikely(!pmd_none(*fe->pmd))) | 2977 | if (unlikely(!pmd_none(*fe->pmd))) |
2958 | goto out; | 2978 | goto out; |
@@ -2966,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2966 | 2986 | ||
2967 | add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); | 2987 | add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); |
2968 | page_add_file_rmap(page, true); | 2988 | page_add_file_rmap(page, true); |
2989 | /* | ||
2990 | * deposit and withdraw with pmd lock held | ||
2991 | */ | ||
2992 | if (arch_needs_pgtable_deposit()) | ||
2993 | deposit_prealloc_pte(fe); | ||
2969 | 2994 | ||
2970 | set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); | 2995 | set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); |
2971 | 2996 | ||
@@ -2975,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2975 | ret = 0; | 3000 | ret = 0; |
2976 | count_vm_event(THP_FILE_MAPPED); | 3001 | count_vm_event(THP_FILE_MAPPED); |
2977 | out: | 3002 | out: |
3003 | /* | ||
3004 | * If we are going to fallback to pte mapping, do a | ||
3005 | * withdraw with pmd lock held. | ||
3006 | */ | ||
3007 | if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) | ||
3008 | fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, | ||
3009 | fe->pmd); | ||
2978 | spin_unlock(fe->ptl); | 3010 | spin_unlock(fe->ptl); |
2979 | return ret; | 3011 | return ret; |
2980 | } | 3012 | } |
@@ -3014,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, | |||
3014 | 3046 | ||
3015 | ret = do_set_pmd(fe, page); | 3047 | ret = do_set_pmd(fe, page); |
3016 | if (ret != VM_FAULT_FALLBACK) | 3048 | if (ret != VM_FAULT_FALLBACK) |
3017 | return ret; | 3049 | goto fault_handled; |
3018 | } | 3050 | } |
3019 | 3051 | ||
3020 | if (!fe->pte) { | 3052 | if (!fe->pte) { |
3021 | ret = pte_alloc_one_map(fe); | 3053 | ret = pte_alloc_one_map(fe); |
3022 | if (ret) | 3054 | if (ret) |
3023 | return ret; | 3055 | goto fault_handled; |
3024 | } | 3056 | } |
3025 | 3057 | ||
3026 | /* Re-check under ptl */ | 3058 | /* Re-check under ptl */ |
3027 | if (unlikely(!pte_none(*fe->pte))) | 3059 | if (unlikely(!pte_none(*fe->pte))) { |
3028 | return VM_FAULT_NOPAGE; | 3060 | ret = VM_FAULT_NOPAGE; |
3061 | goto fault_handled; | ||
3062 | } | ||
3029 | 3063 | ||
3030 | flush_icache_page(vma, page); | 3064 | flush_icache_page(vma, page); |
3031 | entry = mk_pte(page, vma->vm_page_prot); | 3065 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -3045,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, | |||
3045 | 3079 | ||
3046 | /* no need to invalidate: a not-present page won't be cached */ | 3080 | /* no need to invalidate: a not-present page won't be cached */ |
3047 | update_mmu_cache(vma, fe->address, fe->pte); | 3081 | update_mmu_cache(vma, fe->address, fe->pte); |
3082 | ret = 0; | ||
3048 | 3083 | ||
3049 | return 0; | 3084 | fault_handled: |
3085 | /* preallocated pagetable is unused: free it */ | ||
3086 | if (fe->prealloc_pte) { | ||
3087 | pte_free(fe->vma->vm_mm, fe->prealloc_pte); | ||
3088 | fe->prealloc_pte = 0; | ||
3089 | } | ||
3090 | return ret; | ||
3050 | } | 3091 | } |
3051 | 3092 | ||
3052 | static unsigned long fault_around_bytes __read_mostly = | 3093 | static unsigned long fault_around_bytes __read_mostly = |
@@ -3145,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) | |||
3145 | 3186 | ||
3146 | fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); | 3187 | fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); |
3147 | 3188 | ||
3148 | /* preallocated pagetable is unused: free it */ | ||
3149 | if (fe->prealloc_pte) { | ||
3150 | pte_free(fe->vma->vm_mm, fe->prealloc_pte); | ||
3151 | fe->prealloc_pte = 0; | ||
3152 | } | ||
3153 | /* Huge page is mapped? Page fault is solved */ | 3189 | /* Huge page is mapped? Page fault is solved */ |
3154 | if (pmd_trans_huge(*fe->pmd)) { | 3190 | if (pmd_trans_huge(*fe->pmd)) { |
3155 | ret = VM_FAULT_NOPAGE; | 3191 | ret = VM_FAULT_NOPAGE; |
@@ -3454,7 +3490,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) | |||
3454 | 3490 | ||
3455 | /* COW handled on pte level: split pmd */ | 3491 | /* COW handled on pte level: split pmd */ |
3456 | VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); | 3492 | VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); |
3457 | split_huge_pmd(fe->vma, fe->pmd, fe->address); | 3493 | __split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL); |
3458 | 3494 | ||
3459 | return VM_FAULT_FALLBACK; | 3495 | return VM_FAULT_FALLBACK; |
3460 | } | 3496 | } |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cad4b9125695..e43142c15631 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1727,26 +1727,6 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages) | |||
1727 | static int __init cmdline_parse_movable_node(char *p) | 1727 | static int __init cmdline_parse_movable_node(char *p) |
1728 | { | 1728 | { |
1729 | #ifdef CONFIG_MOVABLE_NODE | 1729 | #ifdef CONFIG_MOVABLE_NODE |
1730 | /* | ||
1731 | * Memory used by the kernel cannot be hot-removed because Linux | ||
1732 | * cannot migrate the kernel pages. When memory hotplug is | ||
1733 | * enabled, we should prevent memblock from allocating memory | ||
1734 | * for the kernel. | ||
1735 | * | ||
1736 | * ACPI SRAT records all hotpluggable memory ranges. But before | ||
1737 | * SRAT is parsed, we don't know about it. | ||
1738 | * | ||
1739 | * The kernel image is loaded into memory at very early time. We | ||
1740 | * cannot prevent this anyway. So on NUMA system, we set any | ||
1741 | * node the kernel resides in as un-hotpluggable. | ||
1742 | * | ||
1743 | * Since on modern servers, one node could have double-digit | ||
1744 | * gigabytes memory, we can assume the memory around the kernel | ||
1745 | * image is also un-hotpluggable. So before SRAT is parsed, just | ||
1746 | * allocate memory near the kernel image to try the best to keep | ||
1747 | * the kernel away from hotpluggable memory. | ||
1748 | */ | ||
1749 | memblock_set_bottom_up(true); | ||
1750 | movable_node_enabled = true; | 1730 | movable_node_enabled = true; |
1751 | #else | 1731 | #else |
1752 | pr_warn("movable_node option not supported\n"); | 1732 | pr_warn("movable_node option not supported\n"); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0b859af06b87..6d3639e1f254 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -276,7 +276,9 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
276 | return ERR_PTR(-EINVAL); | 276 | return ERR_PTR(-EINVAL); |
277 | } | 277 | } |
278 | } else if (mode == MPOL_LOCAL) { | 278 | } else if (mode == MPOL_LOCAL) { |
279 | if (!nodes_empty(*nodes)) | 279 | if (!nodes_empty(*nodes) || |
280 | (flags & MPOL_F_STATIC_NODES) || | ||
281 | (flags & MPOL_F_RELATIVE_NODES)) | ||
280 | return ERR_PTR(-EINVAL); | 282 | return ERR_PTR(-EINVAL); |
281 | mode = MPOL_PREFERRED; | 283 | mode = MPOL_PREFERRED; |
282 | } else if (nodes_empty(*nodes)) | 284 | } else if (nodes_empty(*nodes)) |
@@ -496,7 +498,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, | |||
496 | page = pmd_page(*pmd); | 498 | page = pmd_page(*pmd); |
497 | if (is_huge_zero_page(page)) { | 499 | if (is_huge_zero_page(page)) { |
498 | spin_unlock(ptl); | 500 | spin_unlock(ptl); |
499 | split_huge_pmd(vma, pmd, addr); | 501 | __split_huge_pmd(vma, pmd, addr, false, NULL); |
500 | } else { | 502 | } else { |
501 | get_page(page); | 503 | get_page(page); |
502 | spin_unlock(ptl); | 504 | spin_unlock(ptl); |
@@ -1679,25 +1681,17 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) | |||
1679 | static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy, | 1681 | static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy, |
1680 | int nd) | 1682 | int nd) |
1681 | { | 1683 | { |
1682 | switch (policy->mode) { | 1684 | if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL)) |
1683 | case MPOL_PREFERRED: | 1685 | nd = policy->v.preferred_node; |
1684 | if (!(policy->flags & MPOL_F_LOCAL)) | 1686 | else { |
1685 | nd = policy->v.preferred_node; | ||
1686 | break; | ||
1687 | case MPOL_BIND: | ||
1688 | /* | 1687 | /* |
1689 | * Normally, MPOL_BIND allocations are node-local within the | 1688 | * __GFP_THISNODE shouldn't even be used with the bind policy |
1690 | * allowed nodemask. However, if __GFP_THISNODE is set and the | 1689 | * because we might easily break the expectation to stay on the |
1691 | * current node isn't part of the mask, we use the zonelist for | 1690 | * requested node and not break the policy. |
1692 | * the first node in the mask instead. | ||
1693 | */ | 1691 | */ |
1694 | if (unlikely(gfp & __GFP_THISNODE) && | 1692 | WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE)); |
1695 | unlikely(!node_isset(nd, policy->v.nodes))) | ||
1696 | nd = first_node(policy->v.nodes); | ||
1697 | break; | ||
1698 | default: | ||
1699 | BUG(); | ||
1700 | } | 1693 | } |
1694 | |||
1701 | return node_zonelist(nd, gfp); | 1695 | return node_zonelist(nd, gfp); |
1702 | } | 1696 | } |
1703 | 1697 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 99250aee1ac1..0ed24b1fa77b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -168,8 +168,6 @@ void putback_movable_pages(struct list_head *l) | |||
168 | continue; | 168 | continue; |
169 | } | 169 | } |
170 | list_del(&page->lru); | 170 | list_del(&page->lru); |
171 | dec_node_page_state(page, NR_ISOLATED_ANON + | ||
172 | page_is_file_cache(page)); | ||
173 | /* | 171 | /* |
174 | * We isolated non-lru movable page so here we can use | 172 | * We isolated non-lru movable page so here we can use |
175 | * __PageMovable because LRU page's mapping cannot have | 173 | * __PageMovable because LRU page's mapping cannot have |
@@ -186,6 +184,8 @@ void putback_movable_pages(struct list_head *l) | |||
186 | put_page(page); | 184 | put_page(page); |
187 | } else { | 185 | } else { |
188 | putback_lru_page(page); | 186 | putback_lru_page(page); |
187 | dec_node_page_state(page, NR_ISOLATED_ANON + | ||
188 | page_is_file_cache(page)); | ||
189 | } | 189 | } |
190 | } | 190 | } |
191 | } | 191 | } |
@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
482 | SetPageDirty(newpage); | 482 | SetPageDirty(newpage); |
483 | } | 483 | } |
484 | 484 | ||
485 | radix_tree_replace_slot(pslot, newpage); | 485 | radix_tree_replace_slot(&mapping->page_tree, pslot, newpage); |
486 | 486 | ||
487 | /* | 487 | /* |
488 | * Drop cache reference from old page by unfreezing | 488 | * Drop cache reference from old page by unfreezing |
@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
556 | 556 | ||
557 | get_page(newpage); | 557 | get_page(newpage); |
558 | 558 | ||
559 | radix_tree_replace_slot(pslot, newpage); | 559 | radix_tree_replace_slot(&mapping->page_tree, pslot, newpage); |
560 | 560 | ||
561 | page_ref_unfreeze(page, expected_count - 1); | 561 | page_ref_unfreeze(page, expected_count - 1); |
562 | 562 | ||
@@ -1121,8 +1121,15 @@ out: | |||
1121 | * restored. | 1121 | * restored. |
1122 | */ | 1122 | */ |
1123 | list_del(&page->lru); | 1123 | list_del(&page->lru); |
1124 | dec_node_page_state(page, NR_ISOLATED_ANON + | 1124 | |
1125 | page_is_file_cache(page)); | 1125 | /* |
1126 | * Compaction can migrate also non-LRU pages which are | ||
1127 | * not accounted to NR_ISOLATED_*. They can be recognized | ||
1128 | * as __PageMovable | ||
1129 | */ | ||
1130 | if (likely(!__PageMovable(page))) | ||
1131 | dec_node_page_state(page, NR_ISOLATED_ANON + | ||
1132 | page_is_file_cache(page)); | ||
1126 | } | 1133 | } |
1127 | 1134 | ||
1128 | /* | 1135 | /* |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 11936526b08b..cc2459c57f60 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -69,11 +69,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
69 | pte_t *pte, oldpte; | 69 | pte_t *pte, oldpte; |
70 | spinlock_t *ptl; | 70 | spinlock_t *ptl; |
71 | unsigned long pages = 0; | 71 | unsigned long pages = 0; |
72 | int target_node = NUMA_NO_NODE; | ||
72 | 73 | ||
73 | pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); | 74 | pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); |
74 | if (!pte) | 75 | if (!pte) |
75 | return 0; | 76 | return 0; |
76 | 77 | ||
78 | /* Get target node for single threaded private VMAs */ | ||
79 | if (prot_numa && !(vma->vm_flags & VM_SHARED) && | ||
80 | atomic_read(&vma->vm_mm->mm_users) == 1) | ||
81 | target_node = numa_node_id(); | ||
82 | |||
77 | arch_enter_lazy_mmu_mode(); | 83 | arch_enter_lazy_mmu_mode(); |
78 | do { | 84 | do { |
79 | oldpte = *pte; | 85 | oldpte = *pte; |
@@ -95,6 +101,13 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
95 | /* Avoid TLB flush if possible */ | 101 | /* Avoid TLB flush if possible */ |
96 | if (pte_protnone(oldpte)) | 102 | if (pte_protnone(oldpte)) |
97 | continue; | 103 | continue; |
104 | |||
105 | /* | ||
106 | * Don't mess with PTEs if page is already on the node | ||
107 | * a single-threaded process is running on. | ||
108 | */ | ||
109 | if (target_node == page_to_nid(page)) | ||
110 | continue; | ||
98 | } | 111 | } |
99 | 112 | ||
100 | ptent = ptep_modify_prot_start(mm, addr, pte); | 113 | ptent = ptep_modify_prot_start(mm, addr, pte); |
@@ -163,7 +176,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
163 | 176 | ||
164 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { | 177 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { |
165 | if (next - addr != HPAGE_PMD_SIZE) { | 178 | if (next - addr != HPAGE_PMD_SIZE) { |
166 | split_huge_pmd(vma, pmd, addr); | 179 | __split_huge_pmd(vma, pmd, addr, false, NULL); |
167 | if (pmd_trans_unstable(pmd)) | 180 | if (pmd_trans_unstable(pmd)) |
168 | continue; | 181 | continue; |
169 | } else { | 182 | } else { |
@@ -484,6 +497,8 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, | |||
484 | return do_mprotect_pkey(start, len, prot, -1); | 497 | return do_mprotect_pkey(start, len, prot, -1); |
485 | } | 498 | } |
486 | 499 | ||
500 | #ifdef CONFIG_ARCH_HAS_PKEYS | ||
501 | |||
487 | SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, | 502 | SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, |
488 | unsigned long, prot, int, pkey) | 503 | unsigned long, prot, int, pkey) |
489 | { | 504 | { |
@@ -534,3 +549,5 @@ SYSCALL_DEFINE1(pkey_free, int, pkey) | |||
534 | */ | 549 | */ |
535 | return ret; | 550 | return ret; |
536 | } | 551 | } |
552 | |||
553 | #endif /* CONFIG_ARCH_HAS_PKEYS */ | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3dcc54da5637..f64e7bcb43b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2058,8 +2058,12 @@ out_unlock: | |||
2058 | * potentially hurts the reliability of high-order allocations when under | 2058 | * potentially hurts the reliability of high-order allocations when under |
2059 | * intense memory pressure but failed atomic allocations should be easier | 2059 | * intense memory pressure but failed atomic allocations should be easier |
2060 | * to recover from than an OOM. | 2060 | * to recover from than an OOM. |
2061 | * | ||
2062 | * If @force is true, try to unreserve a pageblock even though highatomic | ||
2063 | * pageblock is exhausted. | ||
2061 | */ | 2064 | */ |
2062 | static void unreserve_highatomic_pageblock(const struct alloc_context *ac) | 2065 | static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, |
2066 | bool force) | ||
2063 | { | 2067 | { |
2064 | struct zonelist *zonelist = ac->zonelist; | 2068 | struct zonelist *zonelist = ac->zonelist; |
2065 | unsigned long flags; | 2069 | unsigned long flags; |
@@ -2067,11 +2071,16 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) | |||
2067 | struct zone *zone; | 2071 | struct zone *zone; |
2068 | struct page *page; | 2072 | struct page *page; |
2069 | int order; | 2073 | int order; |
2074 | bool ret; | ||
2070 | 2075 | ||
2071 | for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx, | 2076 | for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx, |
2072 | ac->nodemask) { | 2077 | ac->nodemask) { |
2073 | /* Preserve at least one pageblock */ | 2078 | /* |
2074 | if (zone->nr_reserved_highatomic <= pageblock_nr_pages) | 2079 | * Preserve at least one pageblock unless memory pressure |
2080 | * is really high. | ||
2081 | */ | ||
2082 | if (!force && zone->nr_reserved_highatomic <= | ||
2083 | pageblock_nr_pages) | ||
2075 | continue; | 2084 | continue; |
2076 | 2085 | ||
2077 | spin_lock_irqsave(&zone->lock, flags); | 2086 | spin_lock_irqsave(&zone->lock, flags); |
@@ -2085,13 +2094,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) | |||
2085 | continue; | 2094 | continue; |
2086 | 2095 | ||
2087 | /* | 2096 | /* |
2088 | * It should never happen but changes to locking could | 2097 | * In page freeing path, migratetype change is racy so |
2089 | * inadvertently allow a per-cpu drain to add pages | 2098 | * we can counter several free pages in a pageblock |
2090 | * to MIGRATE_HIGHATOMIC while unreserving so be safe | 2099 | * in this loop althoug we changed the pageblock type |
2091 | * and watch for underflows. | 2100 | * from highatomic to ac->migratetype. So we should |
2101 | * adjust the count once. | ||
2092 | */ | 2102 | */ |
2093 | zone->nr_reserved_highatomic -= min(pageblock_nr_pages, | 2103 | if (get_pageblock_migratetype(page) == |
2094 | zone->nr_reserved_highatomic); | 2104 | MIGRATE_HIGHATOMIC) { |
2105 | /* | ||
2106 | * It should never happen but changes to | ||
2107 | * locking could inadvertently allow a per-cpu | ||
2108 | * drain to add pages to MIGRATE_HIGHATOMIC | ||
2109 | * while unreserving so be safe and watch for | ||
2110 | * underflows. | ||
2111 | */ | ||
2112 | zone->nr_reserved_highatomic -= min( | ||
2113 | pageblock_nr_pages, | ||
2114 | zone->nr_reserved_highatomic); | ||
2115 | } | ||
2095 | 2116 | ||
2096 | /* | 2117 | /* |
2097 | * Convert to ac->migratetype and avoid the normal | 2118 | * Convert to ac->migratetype and avoid the normal |
@@ -2103,12 +2124,16 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) | |||
2103 | * may increase. | 2124 | * may increase. |
2104 | */ | 2125 | */ |
2105 | set_pageblock_migratetype(page, ac->migratetype); | 2126 | set_pageblock_migratetype(page, ac->migratetype); |
2106 | move_freepages_block(zone, page, ac->migratetype); | 2127 | ret = move_freepages_block(zone, page, ac->migratetype); |
2107 | spin_unlock_irqrestore(&zone->lock, flags); | 2128 | if (ret) { |
2108 | return; | 2129 | spin_unlock_irqrestore(&zone->lock, flags); |
2130 | return ret; | ||
2131 | } | ||
2109 | } | 2132 | } |
2110 | spin_unlock_irqrestore(&zone->lock, flags); | 2133 | spin_unlock_irqrestore(&zone->lock, flags); |
2111 | } | 2134 | } |
2135 | |||
2136 | return false; | ||
2112 | } | 2137 | } |
2113 | 2138 | ||
2114 | /* Remove an element from the buddy allocator from the fallback list */ | 2139 | /* Remove an element from the buddy allocator from the fallback list */ |
@@ -2133,7 +2158,8 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
2133 | 2158 | ||
2134 | page = list_first_entry(&area->free_list[fallback_mt], | 2159 | page = list_first_entry(&area->free_list[fallback_mt], |
2135 | struct page, lru); | 2160 | struct page, lru); |
2136 | if (can_steal) | 2161 | if (can_steal && |
2162 | get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC) | ||
2137 | steal_suitable_fallback(zone, page, start_migratetype); | 2163 | steal_suitable_fallback(zone, page, start_migratetype); |
2138 | 2164 | ||
2139 | /* Remove the page from the freelists */ | 2165 | /* Remove the page from the freelists */ |
@@ -2192,7 +2218,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2192 | unsigned long count, struct list_head *list, | 2218 | unsigned long count, struct list_head *list, |
2193 | int migratetype, bool cold) | 2219 | int migratetype, bool cold) |
2194 | { | 2220 | { |
2195 | int i; | 2221 | int i, alloced = 0; |
2196 | 2222 | ||
2197 | spin_lock(&zone->lock); | 2223 | spin_lock(&zone->lock); |
2198 | for (i = 0; i < count; ++i) { | 2224 | for (i = 0; i < count; ++i) { |
@@ -2217,13 +2243,21 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2217 | else | 2243 | else |
2218 | list_add_tail(&page->lru, list); | 2244 | list_add_tail(&page->lru, list); |
2219 | list = &page->lru; | 2245 | list = &page->lru; |
2246 | alloced++; | ||
2220 | if (is_migrate_cma(get_pcppage_migratetype(page))) | 2247 | if (is_migrate_cma(get_pcppage_migratetype(page))) |
2221 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | 2248 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, |
2222 | -(1 << order)); | 2249 | -(1 << order)); |
2223 | } | 2250 | } |
2251 | |||
2252 | /* | ||
2253 | * i pages were removed from the buddy list even if some leak due | ||
2254 | * to check_pcp_refill failing so adjust NR_FREE_PAGES based | ||
2255 | * on i. Do not confuse with 'alloced' which is the number of | ||
2256 | * pages added to the pcp list. | ||
2257 | */ | ||
2224 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 2258 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
2225 | spin_unlock(&zone->lock); | 2259 | spin_unlock(&zone->lock); |
2226 | return i; | 2260 | return alloced; |
2227 | } | 2261 | } |
2228 | 2262 | ||
2229 | #ifdef CONFIG_NUMA | 2263 | #ifdef CONFIG_NUMA |
@@ -2534,7 +2568,8 @@ int __isolate_free_page(struct page *page, unsigned int order) | |||
2534 | struct page *endpage = page + (1 << order) - 1; | 2568 | struct page *endpage = page + (1 << order) - 1; |
2535 | for (; page < endpage; page += pageblock_nr_pages) { | 2569 | for (; page < endpage; page += pageblock_nr_pages) { |
2536 | int mt = get_pageblock_migratetype(page); | 2570 | int mt = get_pageblock_migratetype(page); |
2537 | if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)) | 2571 | if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) |
2572 | && mt != MIGRATE_HIGHATOMIC) | ||
2538 | set_pageblock_migratetype(page, | 2573 | set_pageblock_migratetype(page, |
2539 | MIGRATE_MOVABLE); | 2574 | MIGRATE_MOVABLE); |
2540 | } | 2575 | } |
@@ -3305,7 +3340,7 @@ retry: | |||
3305 | * Shrink them them and try again | 3340 | * Shrink them them and try again |
3306 | */ | 3341 | */ |
3307 | if (!page && !drained) { | 3342 | if (!page && !drained) { |
3308 | unreserve_highatomic_pageblock(ac); | 3343 | unreserve_highatomic_pageblock(ac, false); |
3309 | drain_all_pages(NULL); | 3344 | drain_all_pages(NULL); |
3310 | drained = true; | 3345 | drained = true; |
3311 | goto retry; | 3346 | goto retry; |
@@ -3422,8 +3457,10 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, | |||
3422 | * Make sure we converge to OOM if we cannot make any progress | 3457 | * Make sure we converge to OOM if we cannot make any progress |
3423 | * several times in the row. | 3458 | * several times in the row. |
3424 | */ | 3459 | */ |
3425 | if (*no_progress_loops > MAX_RECLAIM_RETRIES) | 3460 | if (*no_progress_loops > MAX_RECLAIM_RETRIES) { |
3426 | return false; | 3461 | /* Before OOM, exhaust highatomic_reserve */ |
3462 | return unreserve_highatomic_pageblock(ac, true); | ||
3463 | } | ||
3427 | 3464 | ||
3428 | /* | 3465 | /* |
3429 | * Keep reclaiming pages while there is a chance this will lead | 3466 | * Keep reclaiming pages while there is a chance this will lead |
diff --git a/mm/percpu.c b/mm/percpu.c index 255714302394..f696385bcc44 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -2093,6 +2093,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size, | |||
2093 | size_t pages_size; | 2093 | size_t pages_size; |
2094 | struct page **pages; | 2094 | struct page **pages; |
2095 | int unit, i, j, rc; | 2095 | int unit, i, j, rc; |
2096 | int upa; | ||
2097 | int nr_g0_units; | ||
2096 | 2098 | ||
2097 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); | 2099 | snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); |
2098 | 2100 | ||
@@ -2100,7 +2102,12 @@ int __init pcpu_page_first_chunk(size_t reserved_size, | |||
2100 | if (IS_ERR(ai)) | 2102 | if (IS_ERR(ai)) |
2101 | return PTR_ERR(ai); | 2103 | return PTR_ERR(ai); |
2102 | BUG_ON(ai->nr_groups != 1); | 2104 | BUG_ON(ai->nr_groups != 1); |
2103 | BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); | 2105 | upa = ai->alloc_size/ai->unit_size; |
2106 | nr_g0_units = roundup(num_possible_cpus(), upa); | ||
2107 | if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) { | ||
2108 | pcpu_free_alloc_info(ai); | ||
2109 | return -EINVAL; | ||
2110 | } | ||
2104 | 2111 | ||
2105 | unit_pages = ai->unit_size >> PAGE_SHIFT; | 2112 | unit_pages = ai->unit_size >> PAGE_SHIFT; |
2106 | 2113 | ||
@@ -2111,21 +2118,22 @@ int __init pcpu_page_first_chunk(size_t reserved_size, | |||
2111 | 2118 | ||
2112 | /* allocate pages */ | 2119 | /* allocate pages */ |
2113 | j = 0; | 2120 | j = 0; |
2114 | for (unit = 0; unit < num_possible_cpus(); unit++) | 2121 | for (unit = 0; unit < num_possible_cpus(); unit++) { |
2122 | unsigned int cpu = ai->groups[0].cpu_map[unit]; | ||
2115 | for (i = 0; i < unit_pages; i++) { | 2123 | for (i = 0; i < unit_pages; i++) { |
2116 | unsigned int cpu = ai->groups[0].cpu_map[unit]; | ||
2117 | void *ptr; | 2124 | void *ptr; |
2118 | 2125 | ||
2119 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); | 2126 | ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); |
2120 | if (!ptr) { | 2127 | if (!ptr) { |
2121 | pr_warn("failed to allocate %s page for cpu%u\n", | 2128 | pr_warn("failed to allocate %s page for cpu%u\n", |
2122 | psize_str, cpu); | 2129 | psize_str, cpu); |
2123 | goto enomem; | 2130 | goto enomem; |
2124 | } | 2131 | } |
2125 | /* kmemleak tracks the percpu allocations separately */ | 2132 | /* kmemleak tracks the percpu allocations separately */ |
2126 | kmemleak_free(ptr); | 2133 | kmemleak_free(ptr); |
2127 | pages[j++] = virt_to_page(ptr); | 2134 | pages[j++] = virt_to_page(ptr); |
2128 | } | 2135 | } |
2136 | } | ||
2129 | 2137 | ||
2130 | /* allocate vm area, map the pages and copy static data */ | 2138 | /* allocate vm area, map the pages and copy static data */ |
2131 | vm.flags = VM_ALLOC; | 2139 | vm.flags = VM_ALLOC; |
diff --git a/mm/readahead.c b/mm/readahead.c index c8a955b1297e..c4ca70239233 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -207,12 +207,21 @@ out: | |||
207 | * memory at once. | 207 | * memory at once. |
208 | */ | 208 | */ |
209 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, | 209 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
210 | pgoff_t offset, unsigned long nr_to_read) | 210 | pgoff_t offset, unsigned long nr_to_read) |
211 | { | 211 | { |
212 | struct backing_dev_info *bdi = inode_to_bdi(mapping->host); | ||
213 | struct file_ra_state *ra = &filp->f_ra; | ||
214 | unsigned long max_pages; | ||
215 | |||
212 | if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) | 216 | if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) |
213 | return -EINVAL; | 217 | return -EINVAL; |
214 | 218 | ||
215 | nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages); | 219 | /* |
220 | * If the request exceeds the readahead window, allow the read to | ||
221 | * be up to the optimal hardware IO size | ||
222 | */ | ||
223 | max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); | ||
224 | nr_to_read = min(nr_to_read, max_pages); | ||
216 | while (nr_to_read) { | 225 | while (nr_to_read) { |
217 | int err; | 226 | int err; |
218 | 227 | ||
@@ -369,10 +378,18 @@ ondemand_readahead(struct address_space *mapping, | |||
369 | bool hit_readahead_marker, pgoff_t offset, | 378 | bool hit_readahead_marker, pgoff_t offset, |
370 | unsigned long req_size) | 379 | unsigned long req_size) |
371 | { | 380 | { |
372 | unsigned long max = ra->ra_pages; | 381 | struct backing_dev_info *bdi = inode_to_bdi(mapping->host); |
382 | unsigned long max_pages = ra->ra_pages; | ||
373 | pgoff_t prev_offset; | 383 | pgoff_t prev_offset; |
374 | 384 | ||
375 | /* | 385 | /* |
386 | * If the request exceeds the readahead window, allow the read to | ||
387 | * be up to the optimal hardware IO size | ||
388 | */ | ||
389 | if (req_size > max_pages && bdi->io_pages > max_pages) | ||
390 | max_pages = min(req_size, bdi->io_pages); | ||
391 | |||
392 | /* | ||
376 | * start of file | 393 | * start of file |
377 | */ | 394 | */ |
378 | if (!offset) | 395 | if (!offset) |
@@ -385,7 +402,7 @@ ondemand_readahead(struct address_space *mapping, | |||
385 | if ((offset == (ra->start + ra->size - ra->async_size) || | 402 | if ((offset == (ra->start + ra->size - ra->async_size) || |
386 | offset == (ra->start + ra->size))) { | 403 | offset == (ra->start + ra->size))) { |
387 | ra->start += ra->size; | 404 | ra->start += ra->size; |
388 | ra->size = get_next_ra_size(ra, max); | 405 | ra->size = get_next_ra_size(ra, max_pages); |
389 | ra->async_size = ra->size; | 406 | ra->async_size = ra->size; |
390 | goto readit; | 407 | goto readit; |
391 | } | 408 | } |
@@ -400,16 +417,16 @@ ondemand_readahead(struct address_space *mapping, | |||
400 | pgoff_t start; | 417 | pgoff_t start; |
401 | 418 | ||
402 | rcu_read_lock(); | 419 | rcu_read_lock(); |
403 | start = page_cache_next_hole(mapping, offset + 1, max); | 420 | start = page_cache_next_hole(mapping, offset + 1, max_pages); |
404 | rcu_read_unlock(); | 421 | rcu_read_unlock(); |
405 | 422 | ||
406 | if (!start || start - offset > max) | 423 | if (!start || start - offset > max_pages) |
407 | return 0; | 424 | return 0; |
408 | 425 | ||
409 | ra->start = start; | 426 | ra->start = start; |
410 | ra->size = start - offset; /* old async_size */ | 427 | ra->size = start - offset; /* old async_size */ |
411 | ra->size += req_size; | 428 | ra->size += req_size; |
412 | ra->size = get_next_ra_size(ra, max); | 429 | ra->size = get_next_ra_size(ra, max_pages); |
413 | ra->async_size = ra->size; | 430 | ra->async_size = ra->size; |
414 | goto readit; | 431 | goto readit; |
415 | } | 432 | } |
@@ -417,7 +434,7 @@ ondemand_readahead(struct address_space *mapping, | |||
417 | /* | 434 | /* |
418 | * oversize read | 435 | * oversize read |
419 | */ | 436 | */ |
420 | if (req_size > max) | 437 | if (req_size > max_pages) |
421 | goto initial_readahead; | 438 | goto initial_readahead; |
422 | 439 | ||
423 | /* | 440 | /* |
@@ -433,7 +450,7 @@ ondemand_readahead(struct address_space *mapping, | |||
433 | * Query the page cache and look for the traces(cached history pages) | 450 | * Query the page cache and look for the traces(cached history pages) |
434 | * that a sequential stream would leave behind. | 451 | * that a sequential stream would leave behind. |
435 | */ | 452 | */ |
436 | if (try_context_readahead(mapping, ra, offset, req_size, max)) | 453 | if (try_context_readahead(mapping, ra, offset, req_size, max_pages)) |
437 | goto readit; | 454 | goto readit; |
438 | 455 | ||
439 | /* | 456 | /* |
@@ -444,7 +461,7 @@ ondemand_readahead(struct address_space *mapping, | |||
444 | 461 | ||
445 | initial_readahead: | 462 | initial_readahead: |
446 | ra->start = offset; | 463 | ra->start = offset; |
447 | ra->size = get_init_ra_size(req_size, max); | 464 | ra->size = get_init_ra_size(req_size, max_pages); |
448 | ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; | 465 | ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; |
449 | 466 | ||
450 | readit: | 467 | readit: |
@@ -454,7 +471,7 @@ readit: | |||
454 | * the resulted next readahead window into the current one. | 471 | * the resulted next readahead window into the current one. |
455 | */ | 472 | */ |
456 | if (offset == ra->start && ra->size == ra->async_size) { | 473 | if (offset == ra->start && ra->size == ra->async_size) { |
457 | ra->async_size = get_next_ra_size(ra, max); | 474 | ra->async_size = get_next_ra_size(ra, max_pages); |
458 | ra->size += ra->async_size; | 475 | ra->size += ra->async_size; |
459 | } | 476 | } |
460 | 477 | ||
@@ -141,14 +141,15 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | /** | 143 | /** |
144 | * anon_vma_prepare - attach an anon_vma to a memory region | 144 | * __anon_vma_prepare - attach an anon_vma to a memory region |
145 | * @vma: the memory region in question | 145 | * @vma: the memory region in question |
146 | * | 146 | * |
147 | * This makes sure the memory mapping described by 'vma' has | 147 | * This makes sure the memory mapping described by 'vma' has |
148 | * an 'anon_vma' attached to it, so that we can associate the | 148 | * an 'anon_vma' attached to it, so that we can associate the |
149 | * anonymous pages mapped into it with that anon_vma. | 149 | * anonymous pages mapped into it with that anon_vma. |
150 | * | 150 | * |
151 | * The common case will be that we already have one, but if | 151 | * The common case will be that we already have one, which |
152 | * is handled inline by anon_vma_prepare(). But if | ||
152 | * not we either need to find an adjacent mapping that we | 153 | * not we either need to find an adjacent mapping that we |
153 | * can re-use the anon_vma from (very common when the only | 154 | * can re-use the anon_vma from (very common when the only |
154 | * reason for splitting a vma has been mprotect()), or we | 155 | * reason for splitting a vma has been mprotect()), or we |
@@ -167,48 +168,46 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, | |||
167 | * | 168 | * |
168 | * This must be called with the mmap_sem held for reading. | 169 | * This must be called with the mmap_sem held for reading. |
169 | */ | 170 | */ |
170 | int anon_vma_prepare(struct vm_area_struct *vma) | 171 | int __anon_vma_prepare(struct vm_area_struct *vma) |
171 | { | 172 | { |
172 | struct anon_vma *anon_vma = vma->anon_vma; | 173 | struct mm_struct *mm = vma->vm_mm; |
174 | struct anon_vma *anon_vma, *allocated; | ||
173 | struct anon_vma_chain *avc; | 175 | struct anon_vma_chain *avc; |
174 | 176 | ||
175 | might_sleep(); | 177 | might_sleep(); |
176 | if (unlikely(!anon_vma)) { | ||
177 | struct mm_struct *mm = vma->vm_mm; | ||
178 | struct anon_vma *allocated; | ||
179 | 178 | ||
180 | avc = anon_vma_chain_alloc(GFP_KERNEL); | 179 | avc = anon_vma_chain_alloc(GFP_KERNEL); |
181 | if (!avc) | 180 | if (!avc) |
182 | goto out_enomem; | 181 | goto out_enomem; |
182 | |||
183 | anon_vma = find_mergeable_anon_vma(vma); | ||
184 | allocated = NULL; | ||
185 | if (!anon_vma) { | ||
186 | anon_vma = anon_vma_alloc(); | ||
187 | if (unlikely(!anon_vma)) | ||
188 | goto out_enomem_free_avc; | ||
189 | allocated = anon_vma; | ||
190 | } | ||
183 | 191 | ||
184 | anon_vma = find_mergeable_anon_vma(vma); | 192 | anon_vma_lock_write(anon_vma); |
193 | /* page_table_lock to protect against threads */ | ||
194 | spin_lock(&mm->page_table_lock); | ||
195 | if (likely(!vma->anon_vma)) { | ||
196 | vma->anon_vma = anon_vma; | ||
197 | anon_vma_chain_link(vma, avc, anon_vma); | ||
198 | /* vma reference or self-parent link for new root */ | ||
199 | anon_vma->degree++; | ||
185 | allocated = NULL; | 200 | allocated = NULL; |
186 | if (!anon_vma) { | 201 | avc = NULL; |
187 | anon_vma = anon_vma_alloc(); | 202 | } |
188 | if (unlikely(!anon_vma)) | 203 | spin_unlock(&mm->page_table_lock); |
189 | goto out_enomem_free_avc; | 204 | anon_vma_unlock_write(anon_vma); |
190 | allocated = anon_vma; | ||
191 | } | ||
192 | 205 | ||
193 | anon_vma_lock_write(anon_vma); | 206 | if (unlikely(allocated)) |
194 | /* page_table_lock to protect against threads */ | 207 | put_anon_vma(allocated); |
195 | spin_lock(&mm->page_table_lock); | 208 | if (unlikely(avc)) |
196 | if (likely(!vma->anon_vma)) { | 209 | anon_vma_chain_free(avc); |
197 | vma->anon_vma = anon_vma; | ||
198 | anon_vma_chain_link(vma, avc, anon_vma); | ||
199 | /* vma reference or self-parent link for new root */ | ||
200 | anon_vma->degree++; | ||
201 | allocated = NULL; | ||
202 | avc = NULL; | ||
203 | } | ||
204 | spin_unlock(&mm->page_table_lock); | ||
205 | anon_vma_unlock_write(anon_vma); | ||
206 | 210 | ||
207 | if (unlikely(allocated)) | ||
208 | put_anon_vma(allocated); | ||
209 | if (unlikely(avc)) | ||
210 | anon_vma_chain_free(avc); | ||
211 | } | ||
212 | return 0; | 211 | return 0; |
213 | 212 | ||
214 | out_enomem_free_avc: | 213 | out_enomem_free_avc: |
diff --git a/mm/shmem.c b/mm/shmem.c index 9d32e1cb9f38..abd7403aba41 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -300,18 +300,19 @@ void shmem_uncharge(struct inode *inode, long pages) | |||
300 | static int shmem_radix_tree_replace(struct address_space *mapping, | 300 | static int shmem_radix_tree_replace(struct address_space *mapping, |
301 | pgoff_t index, void *expected, void *replacement) | 301 | pgoff_t index, void *expected, void *replacement) |
302 | { | 302 | { |
303 | struct radix_tree_node *node; | ||
303 | void **pslot; | 304 | void **pslot; |
304 | void *item; | 305 | void *item; |
305 | 306 | ||
306 | VM_BUG_ON(!expected); | 307 | VM_BUG_ON(!expected); |
307 | VM_BUG_ON(!replacement); | 308 | VM_BUG_ON(!replacement); |
308 | pslot = radix_tree_lookup_slot(&mapping->page_tree, index); | 309 | item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot); |
309 | if (!pslot) | 310 | if (!item) |
310 | return -ENOENT; | 311 | return -ENOENT; |
311 | item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock); | ||
312 | if (item != expected) | 312 | if (item != expected) |
313 | return -ENOENT; | 313 | return -ENOENT; |
314 | radix_tree_replace_slot(pslot, replacement); | 314 | __radix_tree_replace(&mapping->page_tree, node, pslot, |
315 | replacement, NULL, NULL); | ||
315 | return 0; | 316 | return 0; |
316 | } | 317 | } |
317 | 318 | ||
@@ -370,6 +371,7 @@ static bool shmem_confirm_swap(struct address_space *mapping, | |||
370 | 371 | ||
371 | int shmem_huge __read_mostly; | 372 | int shmem_huge __read_mostly; |
372 | 373 | ||
374 | #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) | ||
373 | static int shmem_parse_huge(const char *str) | 375 | static int shmem_parse_huge(const char *str) |
374 | { | 376 | { |
375 | if (!strcmp(str, "never")) | 377 | if (!strcmp(str, "never")) |
@@ -407,6 +409,7 @@ static const char *shmem_format_huge(int huge) | |||
407 | return "bad_val"; | 409 | return "bad_val"; |
408 | } | 410 | } |
409 | } | 411 | } |
412 | #endif | ||
410 | 413 | ||
411 | static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, | 414 | static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, |
412 | struct shrink_control *sc, unsigned long nr_to_split) | 415 | struct shrink_control *sc, unsigned long nr_to_split) |
@@ -1539,7 +1542,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | |||
1539 | struct mm_struct *fault_mm, int *fault_type) | 1542 | struct mm_struct *fault_mm, int *fault_type) |
1540 | { | 1543 | { |
1541 | struct address_space *mapping = inode->i_mapping; | 1544 | struct address_space *mapping = inode->i_mapping; |
1542 | struct shmem_inode_info *info; | 1545 | struct shmem_inode_info *info = SHMEM_I(inode); |
1543 | struct shmem_sb_info *sbinfo; | 1546 | struct shmem_sb_info *sbinfo; |
1544 | struct mm_struct *charge_mm; | 1547 | struct mm_struct *charge_mm; |
1545 | struct mem_cgroup *memcg; | 1548 | struct mem_cgroup *memcg; |
@@ -1589,7 +1592,6 @@ repeat: | |||
1589 | * Fast cache lookup did not find it: | 1592 | * Fast cache lookup did not find it: |
1590 | * bring it back from swap or allocate. | 1593 | * bring it back from swap or allocate. |
1591 | */ | 1594 | */ |
1592 | info = SHMEM_I(inode); | ||
1593 | sbinfo = SHMEM_SB(inode->i_sb); | 1595 | sbinfo = SHMEM_SB(inode->i_sb); |
1594 | charge_mm = fault_mm ? : current->mm; | 1596 | charge_mm = fault_mm ? : current->mm; |
1595 | 1597 | ||
@@ -1837,7 +1839,6 @@ unlock: | |||
1837 | put_page(page); | 1839 | put_page(page); |
1838 | } | 1840 | } |
1839 | if (error == -ENOSPC && !once++) { | 1841 | if (error == -ENOSPC && !once++) { |
1840 | info = SHMEM_I(inode); | ||
1841 | spin_lock_irq(&info->lock); | 1842 | spin_lock_irq(&info->lock); |
1842 | shmem_recalc_inode(inode); | 1843 | shmem_recalc_inode(inode); |
1843 | spin_unlock_irq(&info->lock); | 1844 | spin_unlock_irq(&info->lock); |
@@ -227,13 +227,14 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) | |||
227 | INIT_LIST_HEAD(&parent->slabs_full); | 227 | INIT_LIST_HEAD(&parent->slabs_full); |
228 | INIT_LIST_HEAD(&parent->slabs_partial); | 228 | INIT_LIST_HEAD(&parent->slabs_partial); |
229 | INIT_LIST_HEAD(&parent->slabs_free); | 229 | INIT_LIST_HEAD(&parent->slabs_free); |
230 | parent->total_slabs = 0; | ||
231 | parent->free_slabs = 0; | ||
230 | parent->shared = NULL; | 232 | parent->shared = NULL; |
231 | parent->alien = NULL; | 233 | parent->alien = NULL; |
232 | parent->colour_next = 0; | 234 | parent->colour_next = 0; |
233 | spin_lock_init(&parent->list_lock); | 235 | spin_lock_init(&parent->list_lock); |
234 | parent->free_objects = 0; | 236 | parent->free_objects = 0; |
235 | parent->free_touched = 0; | 237 | parent->free_touched = 0; |
236 | parent->num_slabs = 0; | ||
237 | } | 238 | } |
238 | 239 | ||
239 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 240 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
@@ -1366,7 +1367,6 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1366 | { | 1367 | { |
1367 | #if DEBUG | 1368 | #if DEBUG |
1368 | struct kmem_cache_node *n; | 1369 | struct kmem_cache_node *n; |
1369 | struct page *page; | ||
1370 | unsigned long flags; | 1370 | unsigned long flags; |
1371 | int node; | 1371 | int node; |
1372 | static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL, | 1372 | static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL, |
@@ -1381,32 +1381,18 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1381 | cachep->name, cachep->size, cachep->gfporder); | 1381 | cachep->name, cachep->size, cachep->gfporder); |
1382 | 1382 | ||
1383 | for_each_kmem_cache_node(cachep, node, n) { | 1383 | for_each_kmem_cache_node(cachep, node, n) { |
1384 | unsigned long active_objs = 0, num_objs = 0, free_objects = 0; | 1384 | unsigned long total_slabs, free_slabs, free_objs; |
1385 | unsigned long active_slabs = 0, num_slabs = 0; | ||
1386 | unsigned long num_slabs_partial = 0, num_slabs_free = 0; | ||
1387 | unsigned long num_slabs_full; | ||
1388 | 1385 | ||
1389 | spin_lock_irqsave(&n->list_lock, flags); | 1386 | spin_lock_irqsave(&n->list_lock, flags); |
1390 | num_slabs = n->num_slabs; | 1387 | total_slabs = n->total_slabs; |
1391 | list_for_each_entry(page, &n->slabs_partial, lru) { | 1388 | free_slabs = n->free_slabs; |
1392 | active_objs += page->active; | 1389 | free_objs = n->free_objects; |
1393 | num_slabs_partial++; | ||
1394 | } | ||
1395 | list_for_each_entry(page, &n->slabs_free, lru) | ||
1396 | num_slabs_free++; | ||
1397 | |||
1398 | free_objects += n->free_objects; | ||
1399 | spin_unlock_irqrestore(&n->list_lock, flags); | 1390 | spin_unlock_irqrestore(&n->list_lock, flags); |
1400 | 1391 | ||
1401 | num_objs = num_slabs * cachep->num; | 1392 | pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", |
1402 | active_slabs = num_slabs - num_slabs_free; | 1393 | node, total_slabs - free_slabs, total_slabs, |
1403 | num_slabs_full = num_slabs - | 1394 | (total_slabs * cachep->num) - free_objs, |
1404 | (num_slabs_partial + num_slabs_free); | 1395 | total_slabs * cachep->num); |
1405 | active_objs += (num_slabs_full * cachep->num); | ||
1406 | |||
1407 | pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", | ||
1408 | node, active_slabs, num_slabs, active_objs, num_objs, | ||
1409 | free_objects); | ||
1410 | } | 1396 | } |
1411 | #endif | 1397 | #endif |
1412 | } | 1398 | } |
@@ -2318,7 +2304,8 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2318 | 2304 | ||
2319 | page = list_entry(p, struct page, lru); | 2305 | page = list_entry(p, struct page, lru); |
2320 | list_del(&page->lru); | 2306 | list_del(&page->lru); |
2321 | n->num_slabs--; | 2307 | n->free_slabs--; |
2308 | n->total_slabs--; | ||
2322 | /* | 2309 | /* |
2323 | * Safe to drop the lock. The slab is no longer linked | 2310 | * Safe to drop the lock. The slab is no longer linked |
2324 | * to the cache. | 2311 | * to the cache. |
@@ -2332,7 +2319,7 @@ out: | |||
2332 | return nr_freed; | 2319 | return nr_freed; |
2333 | } | 2320 | } |
2334 | 2321 | ||
2335 | int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) | 2322 | int __kmem_cache_shrink(struct kmem_cache *cachep) |
2336 | { | 2323 | { |
2337 | int ret = 0; | 2324 | int ret = 0; |
2338 | int node; | 2325 | int node; |
@@ -2352,7 +2339,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) | |||
2352 | 2339 | ||
2353 | int __kmem_cache_shutdown(struct kmem_cache *cachep) | 2340 | int __kmem_cache_shutdown(struct kmem_cache *cachep) |
2354 | { | 2341 | { |
2355 | return __kmem_cache_shrink(cachep, false); | 2342 | return __kmem_cache_shrink(cachep); |
2356 | } | 2343 | } |
2357 | 2344 | ||
2358 | void __kmem_cache_release(struct kmem_cache *cachep) | 2345 | void __kmem_cache_release(struct kmem_cache *cachep) |
@@ -2753,12 +2740,13 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page) | |||
2753 | n = get_node(cachep, page_to_nid(page)); | 2740 | n = get_node(cachep, page_to_nid(page)); |
2754 | 2741 | ||
2755 | spin_lock(&n->list_lock); | 2742 | spin_lock(&n->list_lock); |
2756 | if (!page->active) | 2743 | n->total_slabs++; |
2744 | if (!page->active) { | ||
2757 | list_add_tail(&page->lru, &(n->slabs_free)); | 2745 | list_add_tail(&page->lru, &(n->slabs_free)); |
2758 | else | 2746 | n->free_slabs++; |
2747 | } else | ||
2759 | fixup_slab_list(cachep, n, page, &list); | 2748 | fixup_slab_list(cachep, n, page, &list); |
2760 | 2749 | ||
2761 | n->num_slabs++; | ||
2762 | STATS_INC_GROWN(cachep); | 2750 | STATS_INC_GROWN(cachep); |
2763 | n->free_objects += cachep->num - page->active; | 2751 | n->free_objects += cachep->num - page->active; |
2764 | spin_unlock(&n->list_lock); | 2752 | spin_unlock(&n->list_lock); |
@@ -2903,9 +2891,10 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, | |||
2903 | 2891 | ||
2904 | /* Move pfmemalloc slab to the end of list to speed up next search */ | 2892 | /* Move pfmemalloc slab to the end of list to speed up next search */ |
2905 | list_del(&page->lru); | 2893 | list_del(&page->lru); |
2906 | if (!page->active) | 2894 | if (!page->active) { |
2907 | list_add_tail(&page->lru, &n->slabs_free); | 2895 | list_add_tail(&page->lru, &n->slabs_free); |
2908 | else | 2896 | n->free_slabs++; |
2897 | } else | ||
2909 | list_add_tail(&page->lru, &n->slabs_partial); | 2898 | list_add_tail(&page->lru, &n->slabs_partial); |
2910 | 2899 | ||
2911 | list_for_each_entry(page, &n->slabs_partial, lru) { | 2900 | list_for_each_entry(page, &n->slabs_partial, lru) { |
@@ -2913,9 +2902,12 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, | |||
2913 | return page; | 2902 | return page; |
2914 | } | 2903 | } |
2915 | 2904 | ||
2905 | n->free_touched = 1; | ||
2916 | list_for_each_entry(page, &n->slabs_free, lru) { | 2906 | list_for_each_entry(page, &n->slabs_free, lru) { |
2917 | if (!PageSlabPfmemalloc(page)) | 2907 | if (!PageSlabPfmemalloc(page)) { |
2908 | n->free_slabs--; | ||
2918 | return page; | 2909 | return page; |
2910 | } | ||
2919 | } | 2911 | } |
2920 | 2912 | ||
2921 | return NULL; | 2913 | return NULL; |
@@ -2925,16 +2917,18 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) | |||
2925 | { | 2917 | { |
2926 | struct page *page; | 2918 | struct page *page; |
2927 | 2919 | ||
2928 | page = list_first_entry_or_null(&n->slabs_partial, | 2920 | assert_spin_locked(&n->list_lock); |
2929 | struct page, lru); | 2921 | page = list_first_entry_or_null(&n->slabs_partial, struct page, lru); |
2930 | if (!page) { | 2922 | if (!page) { |
2931 | n->free_touched = 1; | 2923 | n->free_touched = 1; |
2932 | page = list_first_entry_or_null(&n->slabs_free, | 2924 | page = list_first_entry_or_null(&n->slabs_free, struct page, |
2933 | struct page, lru); | 2925 | lru); |
2926 | if (page) | ||
2927 | n->free_slabs--; | ||
2934 | } | 2928 | } |
2935 | 2929 | ||
2936 | if (sk_memalloc_socks()) | 2930 | if (sk_memalloc_socks()) |
2937 | return get_valid_first_slab(n, page, pfmemalloc); | 2931 | page = get_valid_first_slab(n, page, pfmemalloc); |
2938 | 2932 | ||
2939 | return page; | 2933 | return page; |
2940 | } | 2934 | } |
@@ -3434,9 +3428,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, | |||
3434 | STATS_DEC_ACTIVE(cachep); | 3428 | STATS_DEC_ACTIVE(cachep); |
3435 | 3429 | ||
3436 | /* fixup slab chains */ | 3430 | /* fixup slab chains */ |
3437 | if (page->active == 0) | 3431 | if (page->active == 0) { |
3438 | list_add(&page->lru, &n->slabs_free); | 3432 | list_add(&page->lru, &n->slabs_free); |
3439 | else { | 3433 | n->free_slabs++; |
3434 | } else { | ||
3440 | /* Unconditionally move a slab to the end of the | 3435 | /* Unconditionally move a slab to the end of the |
3441 | * partial list on free - maximum time for the | 3436 | * partial list on free - maximum time for the |
3442 | * other objects to be freed, too. | 3437 | * other objects to be freed, too. |
@@ -3450,7 +3445,8 @@ static void free_block(struct kmem_cache *cachep, void **objpp, | |||
3450 | 3445 | ||
3451 | page = list_last_entry(&n->slabs_free, struct page, lru); | 3446 | page = list_last_entry(&n->slabs_free, struct page, lru); |
3452 | list_move(&page->lru, list); | 3447 | list_move(&page->lru, list); |
3453 | n->num_slabs--; | 3448 | n->free_slabs--; |
3449 | n->total_slabs--; | ||
3454 | } | 3450 | } |
3455 | } | 3451 | } |
3456 | 3452 | ||
@@ -4102,64 +4098,33 @@ out: | |||
4102 | #ifdef CONFIG_SLABINFO | 4098 | #ifdef CONFIG_SLABINFO |
4103 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | 4099 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) |
4104 | { | 4100 | { |
4105 | struct page *page; | 4101 | unsigned long active_objs, num_objs, active_slabs; |
4106 | unsigned long active_objs; | 4102 | unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0; |
4107 | unsigned long num_objs; | 4103 | unsigned long free_slabs = 0; |
4108 | unsigned long active_slabs = 0; | ||
4109 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; | ||
4110 | unsigned long num_slabs_partial = 0, num_slabs_free = 0; | ||
4111 | unsigned long num_slabs_full = 0; | ||
4112 | const char *name; | ||
4113 | char *error = NULL; | ||
4114 | int node; | 4104 | int node; |
4115 | struct kmem_cache_node *n; | 4105 | struct kmem_cache_node *n; |
4116 | 4106 | ||
4117 | active_objs = 0; | ||
4118 | num_slabs = 0; | ||
4119 | for_each_kmem_cache_node(cachep, node, n) { | 4107 | for_each_kmem_cache_node(cachep, node, n) { |
4120 | |||
4121 | check_irq_on(); | 4108 | check_irq_on(); |
4122 | spin_lock_irq(&n->list_lock); | 4109 | spin_lock_irq(&n->list_lock); |
4123 | 4110 | ||
4124 | num_slabs += n->num_slabs; | 4111 | total_slabs += n->total_slabs; |
4112 | free_slabs += n->free_slabs; | ||
4113 | free_objs += n->free_objects; | ||
4125 | 4114 | ||
4126 | list_for_each_entry(page, &n->slabs_partial, lru) { | ||
4127 | if (page->active == cachep->num && !error) | ||
4128 | error = "slabs_partial accounting error"; | ||
4129 | if (!page->active && !error) | ||
4130 | error = "slabs_partial accounting error"; | ||
4131 | active_objs += page->active; | ||
4132 | num_slabs_partial++; | ||
4133 | } | ||
4134 | |||
4135 | list_for_each_entry(page, &n->slabs_free, lru) { | ||
4136 | if (page->active && !error) | ||
4137 | error = "slabs_free accounting error"; | ||
4138 | num_slabs_free++; | ||
4139 | } | ||
4140 | |||
4141 | free_objects += n->free_objects; | ||
4142 | if (n->shared) | 4115 | if (n->shared) |
4143 | shared_avail += n->shared->avail; | 4116 | shared_avail += n->shared->avail; |
4144 | 4117 | ||
4145 | spin_unlock_irq(&n->list_lock); | 4118 | spin_unlock_irq(&n->list_lock); |
4146 | } | 4119 | } |
4147 | num_objs = num_slabs * cachep->num; | 4120 | num_objs = total_slabs * cachep->num; |
4148 | active_slabs = num_slabs - num_slabs_free; | 4121 | active_slabs = total_slabs - free_slabs; |
4149 | num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free); | 4122 | active_objs = num_objs - free_objs; |
4150 | active_objs += (num_slabs_full * cachep->num); | ||
4151 | |||
4152 | if (num_objs - active_objs != free_objects && !error) | ||
4153 | error = "free_objects accounting error"; | ||
4154 | |||
4155 | name = cachep->name; | ||
4156 | if (error) | ||
4157 | pr_err("slab: cache %s error: %s\n", name, error); | ||
4158 | 4123 | ||
4159 | sinfo->active_objs = active_objs; | 4124 | sinfo->active_objs = active_objs; |
4160 | sinfo->num_objs = num_objs; | 4125 | sinfo->num_objs = num_objs; |
4161 | sinfo->active_slabs = active_slabs; | 4126 | sinfo->active_slabs = active_slabs; |
4162 | sinfo->num_slabs = num_slabs; | 4127 | sinfo->num_slabs = total_slabs; |
4163 | sinfo->shared_avail = shared_avail; | 4128 | sinfo->shared_avail = shared_avail; |
4164 | sinfo->limit = cachep->limit; | 4129 | sinfo->limit = cachep->limit; |
4165 | sinfo->batchcount = cachep->batchcount; | 4130 | sinfo->batchcount = cachep->batchcount; |
@@ -142,11 +142,26 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
142 | #define SLAB_CACHE_FLAGS (0) | 142 | #define SLAB_CACHE_FLAGS (0) |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | /* Common flags available with current configuration */ | ||
145 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) | 146 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) |
146 | 147 | ||
148 | /* Common flags permitted for kmem_cache_create */ | ||
149 | #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \ | ||
150 | SLAB_RED_ZONE | \ | ||
151 | SLAB_POISON | \ | ||
152 | SLAB_STORE_USER | \ | ||
153 | SLAB_TRACE | \ | ||
154 | SLAB_CONSISTENCY_CHECKS | \ | ||
155 | SLAB_MEM_SPREAD | \ | ||
156 | SLAB_NOLEAKTRACE | \ | ||
157 | SLAB_RECLAIM_ACCOUNT | \ | ||
158 | SLAB_TEMPORARY | \ | ||
159 | SLAB_NOTRACK | \ | ||
160 | SLAB_ACCOUNT) | ||
161 | |||
147 | int __kmem_cache_shutdown(struct kmem_cache *); | 162 | int __kmem_cache_shutdown(struct kmem_cache *); |
148 | void __kmem_cache_release(struct kmem_cache *); | 163 | void __kmem_cache_release(struct kmem_cache *); |
149 | int __kmem_cache_shrink(struct kmem_cache *, bool); | 164 | int __kmem_cache_shrink(struct kmem_cache *); |
150 | void slab_kmem_cache_release(struct kmem_cache *); | 165 | void slab_kmem_cache_release(struct kmem_cache *); |
151 | 166 | ||
152 | struct seq_file; | 167 | struct seq_file; |
@@ -432,7 +447,8 @@ struct kmem_cache_node { | |||
432 | struct list_head slabs_partial; /* partial list first, better asm code */ | 447 | struct list_head slabs_partial; /* partial list first, better asm code */ |
433 | struct list_head slabs_full; | 448 | struct list_head slabs_full; |
434 | struct list_head slabs_free; | 449 | struct list_head slabs_free; |
435 | unsigned long num_slabs; | 450 | unsigned long total_slabs; /* length of all slab lists */ |
451 | unsigned long free_slabs; /* length of free slab list only */ | ||
436 | unsigned long free_objects; | 452 | unsigned long free_objects; |
437 | unsigned int free_limit; | 453 | unsigned int free_limit; |
438 | unsigned int colour_next; /* Per-node cache coloring */ | 454 | unsigned int colour_next; /* Per-node cache coloring */ |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 329b03843863..ae323841adb1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -404,6 +404,12 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
404 | goto out_unlock; | 404 | goto out_unlock; |
405 | } | 405 | } |
406 | 406 | ||
407 | /* Refuse requests with allocator specific flags */ | ||
408 | if (flags & ~SLAB_FLAGS_PERMITTED) { | ||
409 | err = -EINVAL; | ||
410 | goto out_unlock; | ||
411 | } | ||
412 | |||
407 | /* | 413 | /* |
408 | * Some allocators will constraint the set of valid flags to a subset | 414 | * Some allocators will constraint the set of valid flags to a subset |
409 | * of all flags. We expect them to define CACHE_CREATE_MASK in this | 415 | * of all flags. We expect them to define CACHE_CREATE_MASK in this |
@@ -573,6 +579,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | |||
573 | get_online_cpus(); | 579 | get_online_cpus(); |
574 | get_online_mems(); | 580 | get_online_mems(); |
575 | 581 | ||
582 | #ifdef CONFIG_SLUB | ||
583 | /* | ||
584 | * In case of SLUB, we need to disable empty slab caching to | ||
585 | * avoid pinning the offline memory cgroup by freeable kmem | ||
586 | * pages charged to it. SLAB doesn't need this, as it | ||
587 | * periodically purges unused slabs. | ||
588 | */ | ||
589 | mutex_lock(&slab_mutex); | ||
590 | list_for_each_entry(s, &slab_caches, list) { | ||
591 | c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL; | ||
592 | if (c) { | ||
593 | c->cpu_partial = 0; | ||
594 | c->min_partial = 0; | ||
595 | } | ||
596 | } | ||
597 | mutex_unlock(&slab_mutex); | ||
598 | /* | ||
599 | * kmem_cache->cpu_partial is checked locklessly (see | ||
600 | * put_cpu_partial()). Make sure the change is visible. | ||
601 | */ | ||
602 | synchronize_sched(); | ||
603 | #endif | ||
604 | |||
576 | mutex_lock(&slab_mutex); | 605 | mutex_lock(&slab_mutex); |
577 | list_for_each_entry(s, &slab_caches, list) { | 606 | list_for_each_entry(s, &slab_caches, list) { |
578 | if (!is_root_cache(s)) | 607 | if (!is_root_cache(s)) |
@@ -584,7 +613,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | |||
584 | if (!c) | 613 | if (!c) |
585 | continue; | 614 | continue; |
586 | 615 | ||
587 | __kmem_cache_shrink(c, true); | 616 | __kmem_cache_shrink(c); |
588 | arr->entries[idx] = NULL; | 617 | arr->entries[idx] = NULL; |
589 | } | 618 | } |
590 | mutex_unlock(&slab_mutex); | 619 | mutex_unlock(&slab_mutex); |
@@ -755,7 +784,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep) | |||
755 | get_online_cpus(); | 784 | get_online_cpus(); |
756 | get_online_mems(); | 785 | get_online_mems(); |
757 | kasan_cache_shrink(cachep); | 786 | kasan_cache_shrink(cachep); |
758 | ret = __kmem_cache_shrink(cachep, false); | 787 | ret = __kmem_cache_shrink(cachep); |
759 | put_online_mems(); | 788 | put_online_mems(); |
760 | put_online_cpus(); | 789 | put_online_cpus(); |
761 | return ret; | 790 | return ret; |
@@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c) | |||
634 | { | 634 | { |
635 | } | 635 | } |
636 | 636 | ||
637 | int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) | 637 | int __kmem_cache_shrink(struct kmem_cache *d) |
638 | { | 638 | { |
639 | return 0; | 639 | return 0; |
640 | } | 640 | } |
@@ -3076,7 +3076,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) | |||
3076 | struct detached_freelist df; | 3076 | struct detached_freelist df; |
3077 | 3077 | ||
3078 | size = build_detached_freelist(s, size, p, &df); | 3078 | size = build_detached_freelist(s, size, p, &df); |
3079 | if (unlikely(!df.page)) | 3079 | if (!df.page) |
3080 | continue; | 3080 | continue; |
3081 | 3081 | ||
3082 | slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); | 3082 | slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); |
@@ -3883,7 +3883,7 @@ EXPORT_SYMBOL(kfree); | |||
3883 | * being allocated from last increasing the chance that the last objects | 3883 | * being allocated from last increasing the chance that the last objects |
3884 | * are freed in them. | 3884 | * are freed in them. |
3885 | */ | 3885 | */ |
3886 | int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | 3886 | int __kmem_cache_shrink(struct kmem_cache *s) |
3887 | { | 3887 | { |
3888 | int node; | 3888 | int node; |
3889 | int i; | 3889 | int i; |
@@ -3895,21 +3895,6 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |||
3895 | unsigned long flags; | 3895 | unsigned long flags; |
3896 | int ret = 0; | 3896 | int ret = 0; |
3897 | 3897 | ||
3898 | if (deactivate) { | ||
3899 | /* | ||
3900 | * Disable empty slabs caching. Used to avoid pinning offline | ||
3901 | * memory cgroups by kmem pages that can be freed. | ||
3902 | */ | ||
3903 | s->cpu_partial = 0; | ||
3904 | s->min_partial = 0; | ||
3905 | |||
3906 | /* | ||
3907 | * s->cpu_partial is checked locklessly (see put_cpu_partial), | ||
3908 | * so we have to make sure the change is visible. | ||
3909 | */ | ||
3910 | synchronize_sched(); | ||
3911 | } | ||
3912 | |||
3913 | flush_all(s); | 3898 | flush_all(s); |
3914 | for_each_kmem_cache_node(s, node, n) { | 3899 | for_each_kmem_cache_node(s, node, n) { |
3915 | INIT_LIST_HEAD(&discard); | 3900 | INIT_LIST_HEAD(&discard); |
@@ -3966,7 +3951,7 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3966 | 3951 | ||
3967 | mutex_lock(&slab_mutex); | 3952 | mutex_lock(&slab_mutex); |
3968 | list_for_each_entry(s, &slab_caches, list) | 3953 | list_for_each_entry(s, &slab_caches, list) |
3969 | __kmem_cache_shrink(s, false); | 3954 | __kmem_cache_shrink(s); |
3970 | mutex_unlock(&slab_mutex); | 3955 | mutex_unlock(&slab_mutex); |
3971 | 3956 | ||
3972 | return 0; | 3957 | return 0; |
diff --git a/mm/swapfile.c b/mm/swapfile.c index f30438970cd1..1c6e0321205d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1234,6 +1234,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
1234 | 1234 | ||
1235 | pmd = pmd_offset(pud, addr); | 1235 | pmd = pmd_offset(pud, addr); |
1236 | do { | 1236 | do { |
1237 | cond_resched(); | ||
1237 | next = pmd_addr_end(addr, end); | 1238 | next = pmd_addr_end(addr, end); |
1238 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) | 1239 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) |
1239 | continue; | 1240 | continue; |
@@ -1313,6 +1314,7 @@ static int unuse_mm(struct mm_struct *mm, | |||
1313 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 1314 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
1314 | if (vma->anon_vma && (ret = unuse_vma(vma, entry, page))) | 1315 | if (vma->anon_vma && (ret = unuse_vma(vma, entry, page))) |
1315 | break; | 1316 | break; |
1317 | cond_resched(); | ||
1316 | } | 1318 | } |
1317 | up_read(&mm->mmap_sem); | 1319 | up_read(&mm->mmap_sem); |
1318 | return (ret < 0)? ret: 0; | 1320 | return (ret < 0)? ret: 0; |
@@ -1350,15 +1352,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1350 | prev = 0; | 1352 | prev = 0; |
1351 | i = 1; | 1353 | i = 1; |
1352 | } | 1354 | } |
1353 | if (frontswap) { | ||
1354 | if (frontswap_test(si, i)) | ||
1355 | break; | ||
1356 | else | ||
1357 | continue; | ||
1358 | } | ||
1359 | count = READ_ONCE(si->swap_map[i]); | 1355 | count = READ_ONCE(si->swap_map[i]); |
1360 | if (count && swap_count(count) != SWAP_MAP_BAD) | 1356 | if (count && swap_count(count) != SWAP_MAP_BAD) |
1361 | break; | 1357 | if (!frontswap || frontswap_test(si, i)) |
1358 | break; | ||
1359 | if ((i % LATENCY_LIMIT) == 0) | ||
1360 | cond_resched(); | ||
1362 | } | 1361 | } |
1363 | return i; | 1362 | return i; |
1364 | } | 1363 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index 8d8c62d89e6d..fd97f1dbce29 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -44,28 +44,13 @@ static void clear_exceptional_entry(struct address_space *mapping, | |||
44 | * without the tree itself locked. These unlocked entries | 44 | * without the tree itself locked. These unlocked entries |
45 | * need verification under the tree lock. | 45 | * need verification under the tree lock. |
46 | */ | 46 | */ |
47 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, | 47 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) |
48 | &slot)) | ||
49 | goto unlock; | 48 | goto unlock; |
50 | if (*slot != entry) | 49 | if (*slot != entry) |
51 | goto unlock; | 50 | goto unlock; |
52 | radix_tree_replace_slot(slot, NULL); | 51 | __radix_tree_replace(&mapping->page_tree, node, slot, NULL, |
52 | workingset_update_node, mapping); | ||
53 | mapping->nrexceptional--; | 53 | mapping->nrexceptional--; |
54 | if (!node) | ||
55 | goto unlock; | ||
56 | workingset_node_shadows_dec(node); | ||
57 | /* | ||
58 | * Don't track node without shadow entries. | ||
59 | * | ||
60 | * Avoid acquiring the list_lru lock if already untracked. | ||
61 | * The list_empty() test is safe as node->private_list is | ||
62 | * protected by mapping->tree_lock. | ||
63 | */ | ||
64 | if (!workingset_node_shadows(node) && | ||
65 | !list_empty(&node->private_list)) | ||
66 | list_lru_del(&workingset_shadow_nodes, | ||
67 | &node->private_list); | ||
68 | __radix_tree_delete_node(&mapping->page_tree, node); | ||
69 | unlock: | 54 | unlock: |
70 | spin_unlock_irq(&mapping->tree_lock); | 55 | spin_unlock_irq(&mapping->tree_lock); |
71 | } | 56 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f2481cb4e6b2..a5584384eabc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -365,7 +365,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
365 | BUG_ON(offset_in_page(size)); | 365 | BUG_ON(offset_in_page(size)); |
366 | BUG_ON(!is_power_of_2(align)); | 366 | BUG_ON(!is_power_of_2(align)); |
367 | 367 | ||
368 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | 368 | might_sleep(); |
369 | 369 | ||
370 | va = kmalloc_node(sizeof(struct vmap_area), | 370 | va = kmalloc_node(sizeof(struct vmap_area), |
371 | gfp_mask & GFP_RECLAIM_MASK, node); | 371 | gfp_mask & GFP_RECLAIM_MASK, node); |
@@ -601,6 +601,13 @@ static unsigned long lazy_max_pages(void) | |||
601 | 601 | ||
602 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | 602 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); |
603 | 603 | ||
604 | /* | ||
605 | * Serialize vmap purging. There is no actual criticial section protected | ||
606 | * by this look, but we want to avoid concurrent calls for performance | ||
607 | * reasons and to make the pcpu_get_vm_areas more deterministic. | ||
608 | */ | ||
609 | static DEFINE_MUTEX(vmap_purge_lock); | ||
610 | |||
604 | /* for per-CPU blocks */ | 611 | /* for per-CPU blocks */ |
605 | static void purge_fragmented_blocks_allcpus(void); | 612 | static void purge_fragmented_blocks_allcpus(void); |
606 | 613 | ||
@@ -615,59 +622,40 @@ void set_iounmap_nonlazy(void) | |||
615 | 622 | ||
616 | /* | 623 | /* |
617 | * Purges all lazily-freed vmap areas. | 624 | * Purges all lazily-freed vmap areas. |
618 | * | ||
619 | * If sync is 0 then don't purge if there is already a purge in progress. | ||
620 | * If force_flush is 1, then flush kernel TLBs between *start and *end even | ||
621 | * if we found no lazy vmap areas to unmap (callers can use this to optimise | ||
622 | * their own TLB flushing). | ||
623 | * Returns with *start = min(*start, lowest purged address) | ||
624 | * *end = max(*end, highest purged address) | ||
625 | */ | 625 | */ |
626 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | 626 | static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) |
627 | int sync, int force_flush) | ||
628 | { | 627 | { |
629 | static DEFINE_SPINLOCK(purge_lock); | ||
630 | struct llist_node *valist; | 628 | struct llist_node *valist; |
631 | struct vmap_area *va; | 629 | struct vmap_area *va; |
632 | struct vmap_area *n_va; | 630 | struct vmap_area *n_va; |
633 | int nr = 0; | 631 | bool do_free = false; |
634 | 632 | ||
635 | /* | 633 | lockdep_assert_held(&vmap_purge_lock); |
636 | * If sync is 0 but force_flush is 1, we'll go sync anyway but callers | ||
637 | * should not expect such behaviour. This just simplifies locking for | ||
638 | * the case that isn't actually used at the moment anyway. | ||
639 | */ | ||
640 | if (!sync && !force_flush) { | ||
641 | if (!spin_trylock(&purge_lock)) | ||
642 | return; | ||
643 | } else | ||
644 | spin_lock(&purge_lock); | ||
645 | |||
646 | if (sync) | ||
647 | purge_fragmented_blocks_allcpus(); | ||
648 | 634 | ||
649 | valist = llist_del_all(&vmap_purge_list); | 635 | valist = llist_del_all(&vmap_purge_list); |
650 | llist_for_each_entry(va, valist, purge_list) { | 636 | llist_for_each_entry(va, valist, purge_list) { |
651 | if (va->va_start < *start) | 637 | if (va->va_start < start) |
652 | *start = va->va_start; | 638 | start = va->va_start; |
653 | if (va->va_end > *end) | 639 | if (va->va_end > end) |
654 | *end = va->va_end; | 640 | end = va->va_end; |
655 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | 641 | do_free = true; |
656 | } | 642 | } |
657 | 643 | ||
658 | if (nr) | 644 | if (!do_free) |
659 | atomic_sub(nr, &vmap_lazy_nr); | 645 | return false; |
660 | 646 | ||
661 | if (nr || force_flush) | 647 | flush_tlb_kernel_range(start, end); |
662 | flush_tlb_kernel_range(*start, *end); | ||
663 | 648 | ||
664 | if (nr) { | 649 | spin_lock(&vmap_area_lock); |
665 | spin_lock(&vmap_area_lock); | 650 | llist_for_each_entry_safe(va, n_va, valist, purge_list) { |
666 | llist_for_each_entry_safe(va, n_va, valist, purge_list) | 651 | int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; |
667 | __free_vmap_area(va); | 652 | |
668 | spin_unlock(&vmap_area_lock); | 653 | __free_vmap_area(va); |
654 | atomic_sub(nr, &vmap_lazy_nr); | ||
655 | cond_resched_lock(&vmap_area_lock); | ||
669 | } | 656 | } |
670 | spin_unlock(&purge_lock); | 657 | spin_unlock(&vmap_area_lock); |
658 | return true; | ||
671 | } | 659 | } |
672 | 660 | ||
673 | /* | 661 | /* |
@@ -676,9 +664,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
676 | */ | 664 | */ |
677 | static void try_purge_vmap_area_lazy(void) | 665 | static void try_purge_vmap_area_lazy(void) |
678 | { | 666 | { |
679 | unsigned long start = ULONG_MAX, end = 0; | 667 | if (mutex_trylock(&vmap_purge_lock)) { |
680 | 668 | __purge_vmap_area_lazy(ULONG_MAX, 0); | |
681 | __purge_vmap_area_lazy(&start, &end, 0, 0); | 669 | mutex_unlock(&vmap_purge_lock); |
670 | } | ||
682 | } | 671 | } |
683 | 672 | ||
684 | /* | 673 | /* |
@@ -686,9 +675,10 @@ static void try_purge_vmap_area_lazy(void) | |||
686 | */ | 675 | */ |
687 | static void purge_vmap_area_lazy(void) | 676 | static void purge_vmap_area_lazy(void) |
688 | { | 677 | { |
689 | unsigned long start = ULONG_MAX, end = 0; | 678 | mutex_lock(&vmap_purge_lock); |
690 | 679 | purge_fragmented_blocks_allcpus(); | |
691 | __purge_vmap_area_lazy(&start, &end, 1, 0); | 680 | __purge_vmap_area_lazy(ULONG_MAX, 0); |
681 | mutex_unlock(&vmap_purge_lock); | ||
692 | } | 682 | } |
693 | 683 | ||
694 | /* | 684 | /* |
@@ -711,22 +701,13 @@ static void free_vmap_area_noflush(struct vmap_area *va) | |||
711 | } | 701 | } |
712 | 702 | ||
713 | /* | 703 | /* |
714 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | ||
715 | * called for the correct range previously. | ||
716 | */ | ||
717 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | ||
718 | { | ||
719 | unmap_vmap_area(va); | ||
720 | free_vmap_area_noflush(va); | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * Free and unmap a vmap area | 704 | * Free and unmap a vmap area |
725 | */ | 705 | */ |
726 | static void free_unmap_vmap_area(struct vmap_area *va) | 706 | static void free_unmap_vmap_area(struct vmap_area *va) |
727 | { | 707 | { |
728 | flush_cache_vunmap(va->va_start, va->va_end); | 708 | flush_cache_vunmap(va->va_start, va->va_end); |
729 | free_unmap_vmap_area_noflush(va); | 709 | unmap_vmap_area(va); |
710 | free_vmap_area_noflush(va); | ||
730 | } | 711 | } |
731 | 712 | ||
732 | static struct vmap_area *find_vmap_area(unsigned long addr) | 713 | static struct vmap_area *find_vmap_area(unsigned long addr) |
@@ -740,16 +721,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr) | |||
740 | return va; | 721 | return va; |
741 | } | 722 | } |
742 | 723 | ||
743 | static void free_unmap_vmap_area_addr(unsigned long addr) | ||
744 | { | ||
745 | struct vmap_area *va; | ||
746 | |||
747 | va = find_vmap_area(addr); | ||
748 | BUG_ON(!va); | ||
749 | free_unmap_vmap_area(va); | ||
750 | } | ||
751 | |||
752 | |||
753 | /*** Per cpu kva allocator ***/ | 724 | /*** Per cpu kva allocator ***/ |
754 | 725 | ||
755 | /* | 726 | /* |
@@ -1070,6 +1041,8 @@ void vm_unmap_aliases(void) | |||
1070 | if (unlikely(!vmap_initialized)) | 1041 | if (unlikely(!vmap_initialized)) |
1071 | return; | 1042 | return; |
1072 | 1043 | ||
1044 | might_sleep(); | ||
1045 | |||
1073 | for_each_possible_cpu(cpu) { | 1046 | for_each_possible_cpu(cpu) { |
1074 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 1047 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
1075 | struct vmap_block *vb; | 1048 | struct vmap_block *vb; |
@@ -1094,7 +1067,11 @@ void vm_unmap_aliases(void) | |||
1094 | rcu_read_unlock(); | 1067 | rcu_read_unlock(); |
1095 | } | 1068 | } |
1096 | 1069 | ||
1097 | __purge_vmap_area_lazy(&start, &end, 1, flush); | 1070 | mutex_lock(&vmap_purge_lock); |
1071 | purge_fragmented_blocks_allcpus(); | ||
1072 | if (!__purge_vmap_area_lazy(start, end) && flush) | ||
1073 | flush_tlb_kernel_range(start, end); | ||
1074 | mutex_unlock(&vmap_purge_lock); | ||
1098 | } | 1075 | } |
1099 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); | 1076 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); |
1100 | 1077 | ||
@@ -1107,7 +1084,9 @@ void vm_unmap_ram(const void *mem, unsigned int count) | |||
1107 | { | 1084 | { |
1108 | unsigned long size = (unsigned long)count << PAGE_SHIFT; | 1085 | unsigned long size = (unsigned long)count << PAGE_SHIFT; |
1109 | unsigned long addr = (unsigned long)mem; | 1086 | unsigned long addr = (unsigned long)mem; |
1087 | struct vmap_area *va; | ||
1110 | 1088 | ||
1089 | might_sleep(); | ||
1111 | BUG_ON(!addr); | 1090 | BUG_ON(!addr); |
1112 | BUG_ON(addr < VMALLOC_START); | 1091 | BUG_ON(addr < VMALLOC_START); |
1113 | BUG_ON(addr > VMALLOC_END); | 1092 | BUG_ON(addr > VMALLOC_END); |
@@ -1116,10 +1095,14 @@ void vm_unmap_ram(const void *mem, unsigned int count) | |||
1116 | debug_check_no_locks_freed(mem, size); | 1095 | debug_check_no_locks_freed(mem, size); |
1117 | vmap_debug_free_range(addr, addr+size); | 1096 | vmap_debug_free_range(addr, addr+size); |
1118 | 1097 | ||
1119 | if (likely(count <= VMAP_MAX_ALLOC)) | 1098 | if (likely(count <= VMAP_MAX_ALLOC)) { |
1120 | vb_free(mem, size); | 1099 | vb_free(mem, size); |
1121 | else | 1100 | return; |
1122 | free_unmap_vmap_area_addr(addr); | 1101 | } |
1102 | |||
1103 | va = find_vmap_area(addr); | ||
1104 | BUG_ON(!va); | ||
1105 | free_unmap_vmap_area(va); | ||
1123 | } | 1106 | } |
1124 | EXPORT_SYMBOL(vm_unmap_ram); | 1107 | EXPORT_SYMBOL(vm_unmap_ram); |
1125 | 1108 | ||
@@ -1455,6 +1438,8 @@ struct vm_struct *remove_vm_area(const void *addr) | |||
1455 | { | 1438 | { |
1456 | struct vmap_area *va; | 1439 | struct vmap_area *va; |
1457 | 1440 | ||
1441 | might_sleep(); | ||
1442 | |||
1458 | va = find_vmap_area((unsigned long)addr); | 1443 | va = find_vmap_area((unsigned long)addr); |
1459 | if (va && va->flags & VM_VM_AREA) { | 1444 | if (va && va->flags & VM_VM_AREA) { |
1460 | struct vm_struct *vm = va->vm; | 1445 | struct vm_struct *vm = va->vm; |
@@ -1510,7 +1495,39 @@ static void __vunmap(const void *addr, int deallocate_pages) | |||
1510 | kfree(area); | 1495 | kfree(area); |
1511 | return; | 1496 | return; |
1512 | } | 1497 | } |
1513 | 1498 | ||
1499 | static inline void __vfree_deferred(const void *addr) | ||
1500 | { | ||
1501 | /* | ||
1502 | * Use raw_cpu_ptr() because this can be called from preemptible | ||
1503 | * context. Preemption is absolutely fine here, because the llist_add() | ||
1504 | * implementation is lockless, so it works even if we are adding to | ||
1505 | * nother cpu's list. schedule_work() should be fine with this too. | ||
1506 | */ | ||
1507 | struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred); | ||
1508 | |||
1509 | if (llist_add((struct llist_node *)addr, &p->list)) | ||
1510 | schedule_work(&p->wq); | ||
1511 | } | ||
1512 | |||
1513 | /** | ||
1514 | * vfree_atomic - release memory allocated by vmalloc() | ||
1515 | * @addr: memory base address | ||
1516 | * | ||
1517 | * This one is just like vfree() but can be called in any atomic context | ||
1518 | * except NMIs. | ||
1519 | */ | ||
1520 | void vfree_atomic(const void *addr) | ||
1521 | { | ||
1522 | BUG_ON(in_nmi()); | ||
1523 | |||
1524 | kmemleak_free(addr); | ||
1525 | |||
1526 | if (!addr) | ||
1527 | return; | ||
1528 | __vfree_deferred(addr); | ||
1529 | } | ||
1530 | |||
1514 | /** | 1531 | /** |
1515 | * vfree - release memory allocated by vmalloc() | 1532 | * vfree - release memory allocated by vmalloc() |
1516 | * @addr: memory base address | 1533 | * @addr: memory base address |
@@ -1533,11 +1550,9 @@ void vfree(const void *addr) | |||
1533 | 1550 | ||
1534 | if (!addr) | 1551 | if (!addr) |
1535 | return; | 1552 | return; |
1536 | if (unlikely(in_interrupt())) { | 1553 | if (unlikely(in_interrupt())) |
1537 | struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred); | 1554 | __vfree_deferred(addr); |
1538 | if (llist_add((struct llist_node *)addr, &p->list)) | 1555 | else |
1539 | schedule_work(&p->wq); | ||
1540 | } else | ||
1541 | __vunmap(addr, 1); | 1556 | __vunmap(addr, 1); |
1542 | } | 1557 | } |
1543 | EXPORT_SYMBOL(vfree); | 1558 | EXPORT_SYMBOL(vfree); |
@@ -2574,32 +2589,13 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | |||
2574 | static void *s_start(struct seq_file *m, loff_t *pos) | 2589 | static void *s_start(struct seq_file *m, loff_t *pos) |
2575 | __acquires(&vmap_area_lock) | 2590 | __acquires(&vmap_area_lock) |
2576 | { | 2591 | { |
2577 | loff_t n = *pos; | ||
2578 | struct vmap_area *va; | ||
2579 | |||
2580 | spin_lock(&vmap_area_lock); | 2592 | spin_lock(&vmap_area_lock); |
2581 | va = list_first_entry(&vmap_area_list, typeof(*va), list); | 2593 | return seq_list_start(&vmap_area_list, *pos); |
2582 | while (n > 0 && &va->list != &vmap_area_list) { | ||
2583 | n--; | ||
2584 | va = list_next_entry(va, list); | ||
2585 | } | ||
2586 | if (!n && &va->list != &vmap_area_list) | ||
2587 | return va; | ||
2588 | |||
2589 | return NULL; | ||
2590 | |||
2591 | } | 2594 | } |
2592 | 2595 | ||
2593 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | 2596 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
2594 | { | 2597 | { |
2595 | struct vmap_area *va = p, *next; | 2598 | return seq_list_next(p, &vmap_area_list, pos); |
2596 | |||
2597 | ++*pos; | ||
2598 | next = list_next_entry(va, list); | ||
2599 | if (&next->list != &vmap_area_list) | ||
2600 | return next; | ||
2601 | |||
2602 | return NULL; | ||
2603 | } | 2599 | } |
2604 | 2600 | ||
2605 | static void s_stop(struct seq_file *m, void *p) | 2601 | static void s_stop(struct seq_file *m, void *p) |
@@ -2634,9 +2630,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) | |||
2634 | 2630 | ||
2635 | static int s_show(struct seq_file *m, void *p) | 2631 | static int s_show(struct seq_file *m, void *p) |
2636 | { | 2632 | { |
2637 | struct vmap_area *va = p; | 2633 | struct vmap_area *va; |
2638 | struct vm_struct *v; | 2634 | struct vm_struct *v; |
2639 | 2635 | ||
2636 | va = list_entry(p, struct vmap_area, list); | ||
2637 | |||
2640 | /* | 2638 | /* |
2641 | * s_show can encounter race with remove_vm_area, !VM_VM_AREA on | 2639 | * s_show can encounter race with remove_vm_area, !VM_VM_AREA on |
2642 | * behalf of vmap area is being tear down or vm_map_ram allocation. | 2640 | * behalf of vmap area is being tear down or vm_map_ram allocation. |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 0c8f28a6d89f..6aa5b01d3e75 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -291,6 +291,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | |||
291 | int nid = shrinkctl->nid; | 291 | int nid = shrinkctl->nid; |
292 | long batch_size = shrinker->batch ? shrinker->batch | 292 | long batch_size = shrinker->batch ? shrinker->batch |
293 | : SHRINK_BATCH; | 293 | : SHRINK_BATCH; |
294 | long scanned = 0, next_deferred; | ||
294 | 295 | ||
295 | freeable = shrinker->count_objects(shrinker, shrinkctl); | 296 | freeable = shrinker->count_objects(shrinker, shrinkctl); |
296 | if (freeable == 0) | 297 | if (freeable == 0) |
@@ -312,7 +313,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | |||
312 | pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", | 313 | pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", |
313 | shrinker->scan_objects, total_scan); | 314 | shrinker->scan_objects, total_scan); |
314 | total_scan = freeable; | 315 | total_scan = freeable; |
315 | } | 316 | next_deferred = nr; |
317 | } else | ||
318 | next_deferred = total_scan; | ||
316 | 319 | ||
317 | /* | 320 | /* |
318 | * We need to avoid excessive windup on filesystem shrinkers | 321 | * We need to avoid excessive windup on filesystem shrinkers |
@@ -369,17 +372,22 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | |||
369 | 372 | ||
370 | count_vm_events(SLABS_SCANNED, nr_to_scan); | 373 | count_vm_events(SLABS_SCANNED, nr_to_scan); |
371 | total_scan -= nr_to_scan; | 374 | total_scan -= nr_to_scan; |
375 | scanned += nr_to_scan; | ||
372 | 376 | ||
373 | cond_resched(); | 377 | cond_resched(); |
374 | } | 378 | } |
375 | 379 | ||
380 | if (next_deferred >= scanned) | ||
381 | next_deferred -= scanned; | ||
382 | else | ||
383 | next_deferred = 0; | ||
376 | /* | 384 | /* |
377 | * move the unused scan count back into the shrinker in a | 385 | * move the unused scan count back into the shrinker in a |
378 | * manner that handles concurrent updates. If we exhausted the | 386 | * manner that handles concurrent updates. If we exhausted the |
379 | * scan, there is no need to do an update. | 387 | * scan, there is no need to do an update. |
380 | */ | 388 | */ |
381 | if (total_scan > 0) | 389 | if (next_deferred > 0) |
382 | new_nr = atomic_long_add_return(total_scan, | 390 | new_nr = atomic_long_add_return(next_deferred, |
383 | &shrinker->nr_deferred[nid]); | 391 | &shrinker->nr_deferred[nid]); |
384 | else | 392 | else |
385 | new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); | 393 | new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); |
diff --git a/mm/workingset.c b/mm/workingset.c index fb1f9183d89a..241fa5d6b3b2 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/atomic.h> | 10 | #include <linux/atomic.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/dax.h> | ||
13 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
14 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
15 | 16 | ||
@@ -334,48 +335,81 @@ out: | |||
334 | * point where they would still be useful. | 335 | * point where they would still be useful. |
335 | */ | 336 | */ |
336 | 337 | ||
337 | struct list_lru workingset_shadow_nodes; | 338 | static struct list_lru shadow_nodes; |
339 | |||
340 | void workingset_update_node(struct radix_tree_node *node, void *private) | ||
341 | { | ||
342 | struct address_space *mapping = private; | ||
343 | |||
344 | /* Only regular page cache has shadow entries */ | ||
345 | if (dax_mapping(mapping) || shmem_mapping(mapping)) | ||
346 | return; | ||
347 | |||
348 | /* | ||
349 | * Track non-empty nodes that contain only shadow entries; | ||
350 | * unlink those that contain pages or are being freed. | ||
351 | * | ||
352 | * Avoid acquiring the list_lru lock when the nodes are | ||
353 | * already where they should be. The list_empty() test is safe | ||
354 | * as node->private_list is protected by &mapping->tree_lock. | ||
355 | */ | ||
356 | if (node->count && node->count == node->exceptional) { | ||
357 | if (list_empty(&node->private_list)) { | ||
358 | node->private_data = mapping; | ||
359 | list_lru_add(&shadow_nodes, &node->private_list); | ||
360 | } | ||
361 | } else { | ||
362 | if (!list_empty(&node->private_list)) | ||
363 | list_lru_del(&shadow_nodes, &node->private_list); | ||
364 | } | ||
365 | } | ||
338 | 366 | ||
339 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | 367 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, |
340 | struct shrink_control *sc) | 368 | struct shrink_control *sc) |
341 | { | 369 | { |
342 | unsigned long shadow_nodes; | ||
343 | unsigned long max_nodes; | 370 | unsigned long max_nodes; |
344 | unsigned long pages; | 371 | unsigned long nodes; |
372 | unsigned long cache; | ||
345 | 373 | ||
346 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | 374 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ |
347 | local_irq_disable(); | 375 | local_irq_disable(); |
348 | shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | 376 | nodes = list_lru_shrink_count(&shadow_nodes, sc); |
349 | local_irq_enable(); | 377 | local_irq_enable(); |
350 | 378 | ||
351 | if (sc->memcg) { | ||
352 | pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, | ||
353 | LRU_ALL_FILE); | ||
354 | } else { | ||
355 | pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + | ||
356 | node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); | ||
357 | } | ||
358 | |||
359 | /* | 379 | /* |
360 | * Active cache pages are limited to 50% of memory, and shadow | 380 | * Approximate a reasonable limit for the radix tree nodes |
361 | * entries that represent a refault distance bigger than that | 381 | * containing shadow entries. We don't need to keep more |
362 | * do not have any effect. Limit the number of shadow nodes | 382 | * shadow entries than possible pages on the active list, |
363 | * such that shadow entries do not exceed the number of active | 383 | * since refault distances bigger than that are dismissed. |
364 | * cache pages, assuming a worst-case node population density | 384 | * |
365 | * of 1/8th on average. | 385 | * The size of the active list converges toward 100% of |
386 | * overall page cache as memory grows, with only a tiny | ||
387 | * inactive list. Assume the total cache size for that. | ||
388 | * | ||
389 | * Nodes might be sparsely populated, with only one shadow | ||
390 | * entry in the extreme case. Obviously, we cannot keep one | ||
391 | * node for every eligible shadow entry, so compromise on a | ||
392 | * worst-case density of 1/8th. Below that, not all eligible | ||
393 | * refaults can be detected anymore. | ||
366 | * | 394 | * |
367 | * On 64-bit with 7 radix_tree_nodes per page and 64 slots | 395 | * On 64-bit with 7 radix_tree_nodes per page and 64 slots |
368 | * each, this will reclaim shadow entries when they consume | 396 | * each, this will reclaim shadow entries when they consume |
369 | * ~2% of available memory: | 397 | * ~1.8% of available memory: |
370 | * | 398 | * |
371 | * PAGE_SIZE / radix_tree_nodes / node_entries / PAGE_SIZE | 399 | * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE |
372 | */ | 400 | */ |
373 | max_nodes = pages >> (1 + RADIX_TREE_MAP_SHIFT - 3); | 401 | if (sc->memcg) { |
402 | cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, | ||
403 | LRU_ALL_FILE); | ||
404 | } else { | ||
405 | cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + | ||
406 | node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); | ||
407 | } | ||
408 | max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3); | ||
374 | 409 | ||
375 | if (shadow_nodes <= max_nodes) | 410 | if (nodes <= max_nodes) |
376 | return 0; | 411 | return 0; |
377 | 412 | return nodes - max_nodes; | |
378 | return shadow_nodes - max_nodes; | ||
379 | } | 413 | } |
380 | 414 | ||
381 | static enum lru_status shadow_lru_isolate(struct list_head *item, | 415 | static enum lru_status shadow_lru_isolate(struct list_head *item, |
@@ -418,23 +452,30 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |||
418 | * no pages, so we expect to be able to remove them all and | 452 | * no pages, so we expect to be able to remove them all and |
419 | * delete and free the empty node afterwards. | 453 | * delete and free the empty node afterwards. |
420 | */ | 454 | */ |
421 | BUG_ON(!workingset_node_shadows(node)); | 455 | if (WARN_ON_ONCE(!node->exceptional)) |
422 | BUG_ON(workingset_node_pages(node)); | 456 | goto out_invalid; |
423 | 457 | if (WARN_ON_ONCE(node->count != node->exceptional)) | |
458 | goto out_invalid; | ||
424 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { | 459 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { |
425 | if (node->slots[i]) { | 460 | if (node->slots[i]) { |
426 | BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); | 461 | if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i]))) |
462 | goto out_invalid; | ||
463 | if (WARN_ON_ONCE(!node->exceptional)) | ||
464 | goto out_invalid; | ||
465 | if (WARN_ON_ONCE(!mapping->nrexceptional)) | ||
466 | goto out_invalid; | ||
427 | node->slots[i] = NULL; | 467 | node->slots[i] = NULL; |
428 | workingset_node_shadows_dec(node); | 468 | node->exceptional--; |
429 | BUG_ON(!mapping->nrexceptional); | 469 | node->count--; |
430 | mapping->nrexceptional--; | 470 | mapping->nrexceptional--; |
431 | } | 471 | } |
432 | } | 472 | } |
433 | BUG_ON(workingset_node_shadows(node)); | 473 | if (WARN_ON_ONCE(node->exceptional)) |
474 | goto out_invalid; | ||
434 | inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); | 475 | inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); |
435 | if (!__radix_tree_delete_node(&mapping->page_tree, node)) | 476 | __radix_tree_delete_node(&mapping->page_tree, node); |
436 | BUG(); | ||
437 | 477 | ||
478 | out_invalid: | ||
438 | spin_unlock(&mapping->tree_lock); | 479 | spin_unlock(&mapping->tree_lock); |
439 | ret = LRU_REMOVED_RETRY; | 480 | ret = LRU_REMOVED_RETRY; |
440 | out: | 481 | out: |
@@ -452,8 +493,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |||
452 | 493 | ||
453 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | 494 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ |
454 | local_irq_disable(); | 495 | local_irq_disable(); |
455 | ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | 496 | ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL); |
456 | shadow_lru_isolate, NULL); | ||
457 | local_irq_enable(); | 497 | local_irq_enable(); |
458 | return ret; | 498 | return ret; |
459 | } | 499 | } |
@@ -492,7 +532,7 @@ static int __init workingset_init(void) | |||
492 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | 532 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", |
493 | timestamp_bits, max_order, bucket_order); | 533 | timestamp_bits, max_order, bucket_order); |
494 | 534 | ||
495 | ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | 535 | ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key); |
496 | if (ret) | 536 | if (ret) |
497 | goto err; | 537 | goto err; |
498 | ret = register_shrinker(&workingset_shadow_shrinker); | 538 | ret = register_shrinker(&workingset_shadow_shrinker); |
@@ -500,7 +540,7 @@ static int __init workingset_init(void) | |||
500 | goto err_list_lru; | 540 | goto err_list_lru; |
501 | return 0; | 541 | return 0; |
502 | err_list_lru: | 542 | err_list_lru: |
503 | list_lru_destroy(&workingset_shadow_nodes); | 543 | list_lru_destroy(&shadow_nodes); |
504 | err: | 544 | err: |
505 | return ret; | 545 | return ret; |
506 | } | 546 | } |
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 37323b0df374..9576775a86f6 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan | |||
@@ -28,4 +28,6 @@ else | |||
28 | CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) | 28 | CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) |
29 | endif | 29 | endif |
30 | endif | 30 | endif |
31 | |||
32 | CFLAGS_KASAN += $(call cc-option, -fsanitize-address-use-after-scope) | ||
31 | endif | 33 | endif |
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index d9ff038c1b28..a27677146410 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter | |||
@@ -16,19 +16,22 @@ if len(sys.argv) != 3: | |||
16 | sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0]) | 16 | sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0]) |
17 | sys.exit(-1) | 17 | sys.exit(-1) |
18 | 18 | ||
19 | re_NUMBER = re.compile(r'\.[0-9]+') | ||
20 | |||
19 | def getsizes(file): | 21 | def getsizes(file): |
20 | sym = {} | 22 | sym = {} |
21 | for l in os.popen("nm --size-sort " + file).readlines(): | 23 | with os.popen("nm --size-sort " + file) as f: |
22 | size, type, name = l[:-1].split() | 24 | for line in f: |
23 | if type in "tTdDbBrR": | 25 | size, type, name = line.split() |
24 | # strip generated symbols | 26 | if type in "tTdDbBrR": |
25 | if name.startswith("__mod_"): continue | 27 | # strip generated symbols |
26 | if name.startswith("SyS_"): continue | 28 | if name.startswith("__mod_"): continue |
27 | if name.startswith("compat_SyS_"): continue | 29 | if name.startswith("SyS_"): continue |
28 | if name == "linux_banner": continue | 30 | if name.startswith("compat_SyS_"): continue |
29 | # statics and some other optimizations adds random .NUMBER | 31 | if name == "linux_banner": continue |
30 | name = re.sub(r'\.[0-9]+', '', name) | 32 | # statics and some other optimizations adds random .NUMBER |
31 | sym[name] = sym.get(name, 0) + int(size, 16) | 33 | name = re_NUMBER.sub('', name) |
34 | sym[name] = sym.get(name, 0) + int(size, 16) | ||
32 | return sym | 35 | return sym |
33 | 36 | ||
34 | old = getsizes(sys.argv[1]) | 37 | old = getsizes(sys.argv[1]) |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 23f462f64a3f..ac5656ef2aec 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -761,7 +761,7 @@ sub seed_camelcase_file { | |||
761 | sub is_maintained_obsolete { | 761 | sub is_maintained_obsolete { |
762 | my ($filename) = @_; | 762 | my ($filename) = @_; |
763 | 763 | ||
764 | return 0 if (!(-e "$root/scripts/get_maintainer.pl")); | 764 | return 0 if (!$tree || !(-e "$root/scripts/get_maintainer.pl")); |
765 | 765 | ||
766 | my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`; | 766 | my $status = `perl $root/scripts/get_maintainer.pl --status --nom --nol --nogit --nogit-fallback -f $filename 2>&1`; |
767 | 767 | ||
@@ -2589,6 +2589,7 @@ sub process { | |||
2589 | $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ || | 2589 | $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ || |
2590 | ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ && | 2590 | ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ && |
2591 | (defined($1) || defined($2))))) { | 2591 | (defined($1) || defined($2))))) { |
2592 | $is_patch = 1; | ||
2592 | $reported_maintainer_file = 1; | 2593 | $reported_maintainer_file = 1; |
2593 | WARN("FILE_PATH_CHANGES", | 2594 | WARN("FILE_PATH_CHANGES", |
2594 | "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr); | 2595 | "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr); |
@@ -2601,20 +2602,6 @@ sub process { | |||
2601 | $herecurr) if (!$emitted_corrupt++); | 2602 | $herecurr) if (!$emitted_corrupt++); |
2602 | } | 2603 | } |
2603 | 2604 | ||
2604 | # Check for absolute kernel paths. | ||
2605 | if ($tree) { | ||
2606 | while ($line =~ m{(?:^|\s)(/\S*)}g) { | ||
2607 | my $file = $1; | ||
2608 | |||
2609 | if ($file =~ m{^(.*?)(?::\d+)+:?$} && | ||
2610 | check_absolute_file($1, $herecurr)) { | ||
2611 | # | ||
2612 | } else { | ||
2613 | check_absolute_file($file, $herecurr); | ||
2614 | } | ||
2615 | } | ||
2616 | } | ||
2617 | |||
2618 | # UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php | 2605 | # UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php |
2619 | if (($realfile =~ /^$/ || $line =~ /^\+/) && | 2606 | if (($realfile =~ /^$/ || $line =~ /^\+/) && |
2620 | $rawline !~ m/^$UTF8*$/) { | 2607 | $rawline !~ m/^$UTF8*$/) { |
@@ -2652,6 +2639,20 @@ sub process { | |||
2652 | "8-bit UTF-8 used in possible commit log\n" . $herecurr); | 2639 | "8-bit UTF-8 used in possible commit log\n" . $herecurr); |
2653 | } | 2640 | } |
2654 | 2641 | ||
2642 | # Check for absolute kernel paths in commit message | ||
2643 | if ($tree && $in_commit_log) { | ||
2644 | while ($line =~ m{(?:^|\s)(/\S*)}g) { | ||
2645 | my $file = $1; | ||
2646 | |||
2647 | if ($file =~ m{^(.*?)(?::\d+)+:?$} && | ||
2648 | check_absolute_file($1, $herecurr)) { | ||
2649 | # | ||
2650 | } else { | ||
2651 | check_absolute_file($file, $herecurr); | ||
2652 | } | ||
2653 | } | ||
2654 | } | ||
2655 | |||
2655 | # Check for various typo / spelling mistakes | 2656 | # Check for various typo / spelling mistakes |
2656 | if (defined($misspellings) && | 2657 | if (defined($misspellings) && |
2657 | ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { | 2658 | ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { |
@@ -2805,7 +2806,7 @@ sub process { | |||
2805 | } | 2806 | } |
2806 | 2807 | ||
2807 | # check we are in a valid source file if not then ignore this hunk | 2808 | # check we are in a valid source file if not then ignore this hunk |
2808 | next if ($realfile !~ /\.(h|c|s|S|pl|sh|dtsi|dts)$/); | 2809 | next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/); |
2809 | 2810 | ||
2810 | # line length limit (with some exclusions) | 2811 | # line length limit (with some exclusions) |
2811 | # | 2812 | # |
@@ -3440,6 +3441,18 @@ sub process { | |||
3440 | #ignore lines not being added | 3441 | #ignore lines not being added |
3441 | next if ($line =~ /^[^\+]/); | 3442 | next if ($line =~ /^[^\+]/); |
3442 | 3443 | ||
3444 | # check for dereferences that span multiple lines | ||
3445 | if ($prevline =~ /^\+.*$Lval\s*(?:\.|->)\s*$/ && | ||
3446 | $line =~ /^\+\s*(?!\#\s*(?!define\s+|if))\s*$Lval/) { | ||
3447 | $prevline =~ /($Lval\s*(?:\.|->))\s*$/; | ||
3448 | my $ref = $1; | ||
3449 | $line =~ /^.\s*($Lval)/; | ||
3450 | $ref .= $1; | ||
3451 | $ref =~ s/\s//g; | ||
3452 | WARN("MULTILINE_DEREFERENCE", | ||
3453 | "Avoid multiple line dereference - prefer '$ref'\n" . $hereprev); | ||
3454 | } | ||
3455 | |||
3443 | # check for declarations of signed or unsigned without int | 3456 | # check for declarations of signed or unsigned without int |
3444 | while ($line =~ m{\b($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) { | 3457 | while ($line =~ m{\b($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) { |
3445 | my $type = $1; | 3458 | my $type = $1; |
@@ -5548,8 +5561,9 @@ sub process { | |||
5548 | "Using weak declarations can have unintended link defects\n" . $herecurr); | 5561 | "Using weak declarations can have unintended link defects\n" . $herecurr); |
5549 | } | 5562 | } |
5550 | 5563 | ||
5551 | # check for c99 types like uint8_t used outside of uapi/ | 5564 | # check for c99 types like uint8_t used outside of uapi/ and tools/ |
5552 | if ($realfile !~ m@\binclude/uapi/@ && | 5565 | if ($realfile !~ m@\binclude/uapi/@ && |
5566 | $realfile !~ m@\btools/@ && | ||
5553 | $line =~ /\b($Declare)\s*$Ident\s*[=;,\[]/) { | 5567 | $line =~ /\b($Declare)\s*$Ident\s*[=;,\[]/) { |
5554 | my $type = $1; | 5568 | my $type = $1; |
5555 | if ($type =~ /\b($typeC99Typedefs)\b/) { | 5569 | if ($type =~ /\b($typeC99Typedefs)\b/) { |
@@ -5925,7 +5939,7 @@ sub process { | |||
5925 | } | 5939 | } |
5926 | if (!$has_break && $has_statement) { | 5940 | if (!$has_break && $has_statement) { |
5927 | WARN("MISSING_BREAK", | 5941 | WARN("MISSING_BREAK", |
5928 | "Possible switch case/default not preceeded by break or fallthrough comment\n" . $herecurr); | 5942 | "Possible switch case/default not preceded by break or fallthrough comment\n" . $herecurr); |
5929 | } | 5943 | } |
5930 | } | 5944 | } |
5931 | 5945 | ||
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index aed4511f0304..633f2dd3de27 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl | |||
@@ -49,6 +49,7 @@ my $scm = 0; | |||
49 | my $web = 0; | 49 | my $web = 0; |
50 | my $subsystem = 0; | 50 | my $subsystem = 0; |
51 | my $status = 0; | 51 | my $status = 0; |
52 | my $letters = ""; | ||
52 | my $keywords = 1; | 53 | my $keywords = 1; |
53 | my $sections = 0; | 54 | my $sections = 0; |
54 | my $file_emails = 0; | 55 | my $file_emails = 0; |
@@ -241,6 +242,7 @@ if (!GetOptions( | |||
241 | 'status!' => \$status, | 242 | 'status!' => \$status, |
242 | 'scm!' => \$scm, | 243 | 'scm!' => \$scm, |
243 | 'web!' => \$web, | 244 | 'web!' => \$web, |
245 | 'letters=s' => \$letters, | ||
244 | 'pattern-depth=i' => \$pattern_depth, | 246 | 'pattern-depth=i' => \$pattern_depth, |
245 | 'k|keywords!' => \$keywords, | 247 | 'k|keywords!' => \$keywords, |
246 | 'sections!' => \$sections, | 248 | 'sections!' => \$sections, |
@@ -271,7 +273,8 @@ $output_multiline = 0 if ($output_separator ne ", "); | |||
271 | $output_rolestats = 1 if ($interactive); | 273 | $output_rolestats = 1 if ($interactive); |
272 | $output_roles = 1 if ($output_rolestats); | 274 | $output_roles = 1 if ($output_rolestats); |
273 | 275 | ||
274 | if ($sections) { | 276 | if ($sections || $letters ne "") { |
277 | $sections = 1; | ||
275 | $email = 0; | 278 | $email = 0; |
276 | $email_list = 0; | 279 | $email_list = 0; |
277 | $scm = 0; | 280 | $scm = 0; |
@@ -682,8 +685,10 @@ sub get_maintainers { | |||
682 | $line =~ s/\\\./\./g; ##Convert \. to . | 685 | $line =~ s/\\\./\./g; ##Convert \. to . |
683 | $line =~ s/\.\*/\*/g; ##Convert .* to * | 686 | $line =~ s/\.\*/\*/g; ##Convert .* to * |
684 | } | 687 | } |
685 | $line =~ s/^([A-Z]):/$1:\t/g; | 688 | my $count = $line =~ s/^([A-Z]):/$1:\t/g; |
686 | print("$line\n"); | 689 | if ($letters eq "" || (!$count || $letters =~ /$1/i)) { |
690 | print("$line\n"); | ||
691 | } | ||
687 | } | 692 | } |
688 | print("\n"); | 693 | print("\n"); |
689 | } | 694 | } |
@@ -814,6 +819,7 @@ Other options: | |||
814 | --pattern-depth => Number of pattern directory traversals (default: 0 (all)) | 819 | --pattern-depth => Number of pattern directory traversals (default: 0 (all)) |
815 | --keywords => scan patch for keywords (default: $keywords) | 820 | --keywords => scan patch for keywords (default: $keywords) |
816 | --sections => print all of the subsystem sections with pattern matches | 821 | --sections => print all of the subsystem sections with pattern matches |
822 | --letters => print all matching 'letter' types from all matching sections | ||
817 | --mailmap => use .mailmap file (default: $email_use_mailmap) | 823 | --mailmap => use .mailmap file (default: $email_use_mailmap) |
818 | --version => show version | 824 | --version => show version |
819 | --help => show this help information | 825 | --help => show this help information |
diff --git a/scripts/tags.sh b/scripts/tags.sh index a2ff3388e5ea..df5fa777d300 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh | |||
@@ -304,11 +304,26 @@ if [ "${ARCH}" = "um" ]; then | |||
304 | elif [ "${SRCARCH}" = "arm" -a "${SUBARCH}" != "" ]; then | 304 | elif [ "${SRCARCH}" = "arm" -a "${SUBARCH}" != "" ]; then |
305 | subarchdir=$(find ${tree}arch/$SRCARCH/ -name "mach-*" -type d -o \ | 305 | subarchdir=$(find ${tree}arch/$SRCARCH/ -name "mach-*" -type d -o \ |
306 | -name "plat-*" -type d); | 306 | -name "plat-*" -type d); |
307 | mach_suffix=$SUBARCH | ||
308 | plat_suffix=$SUBARCH | ||
309 | |||
310 | # Special cases when $plat_suffix != $mach_suffix | ||
311 | case $mach_suffix in | ||
312 | "omap1" | "omap2") | ||
313 | plat_suffix="omap" | ||
314 | ;; | ||
315 | esac | ||
316 | |||
317 | if [ ! -d ${tree}arch/$SRCARCH/mach-$mach_suffix ]; then | ||
318 | echo "Warning: arch/arm/mach-$mach_suffix/ not found." >&2 | ||
319 | echo " Fix your \$SUBARCH appropriately" >&2 | ||
320 | fi | ||
321 | |||
307 | for i in $subarchdir; do | 322 | for i in $subarchdir; do |
308 | case "$i" in | 323 | case "$i" in |
309 | *"mach-"${SUBARCH}) | 324 | *"mach-"${mach_suffix}) |
310 | ;; | 325 | ;; |
311 | *"plat-"${SUBARCH}) | 326 | *"plat-"${plat_suffix}) |
312 | ;; | 327 | ;; |
313 | *) | 328 | *) |
314 | subarchprune="$subarchprune \ | 329 | subarchprune="$subarchprune \ |
diff --git a/sound/core/misc.c b/sound/core/misc.c index f2e8226c88fb..21b228046e88 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c | |||
@@ -71,6 +71,7 @@ void __snd_printk(unsigned int level, const char *path, int line, | |||
71 | int kern_level; | 71 | int kern_level; |
72 | struct va_format vaf; | 72 | struct va_format vaf; |
73 | char verbose_fmt[] = KERN_DEFAULT "ALSA %s:%d %pV"; | 73 | char verbose_fmt[] = KERN_DEFAULT "ALSA %s:%d %pV"; |
74 | bool level_found = false; | ||
74 | #endif | 75 | #endif |
75 | 76 | ||
76 | #ifdef CONFIG_SND_DEBUG | 77 | #ifdef CONFIG_SND_DEBUG |
@@ -83,15 +84,22 @@ void __snd_printk(unsigned int level, const char *path, int line, | |||
83 | vaf.fmt = format; | 84 | vaf.fmt = format; |
84 | vaf.va = &args; | 85 | vaf.va = &args; |
85 | 86 | ||
86 | kern_level = printk_get_level(format); | 87 | while ((kern_level = printk_get_level(vaf.fmt)) != 0) { |
87 | if (kern_level) { | 88 | const char *end_of_header = printk_skip_level(vaf.fmt); |
88 | const char *end_of_header = printk_skip_level(format); | 89 | |
89 | memcpy(verbose_fmt, format, end_of_header - format); | 90 | /* Ignore KERN_CONT. We print filename:line for each piece. */ |
91 | if (kern_level >= '0' && kern_level <= '7') { | ||
92 | memcpy(verbose_fmt, vaf.fmt, end_of_header - vaf.fmt); | ||
93 | level_found = true; | ||
94 | } | ||
95 | |||
90 | vaf.fmt = end_of_header; | 96 | vaf.fmt = end_of_header; |
91 | } else if (level) | 97 | } |
98 | |||
99 | if (!level_found && level) | ||
92 | memcpy(verbose_fmt, KERN_DEBUG, sizeof(KERN_DEBUG) - 1); | 100 | memcpy(verbose_fmt, KERN_DEBUG, sizeof(KERN_DEBUG) - 1); |
93 | printk(verbose_fmt, sanity_file_name(path), line, &vaf); | ||
94 | 101 | ||
102 | printk(verbose_fmt, sanity_file_name(path), line, &vaf); | ||
95 | #else | 103 | #else |
96 | vprintk(format, args); | 104 | vprintk(format, args); |
97 | #endif | 105 | #endif |
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 05d7bc488971..d1be94667a30 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c | |||
@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order) | |||
146 | 146 | ||
147 | slot = radix_tree_lookup_slot(&tree, index); | 147 | slot = radix_tree_lookup_slot(&tree, index); |
148 | free(*slot); | 148 | free(*slot); |
149 | radix_tree_replace_slot(slot, item2); | 149 | radix_tree_replace_slot(&tree, slot, item2); |
150 | for (i = min; i < max; i++) { | 150 | for (i = min; i < max; i++) { |
151 | struct item *item = item_lookup(&tree, i); | 151 | struct item *item = item_lookup(&tree, i); |
152 | assert(item != 0); | 152 | assert(item != 0); |