diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 14:41:44 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 14:41:44 -0500 |
commit | 875fc4f5ddf35605581f9a5900c14afef48611f2 (patch) | |
tree | e237a28a71a5d1e72eaf0ecda737eb5c8614c72c | |
parent | 7d1fc01afc5af35e5197e0e75abe900f6bd279b8 (diff) | |
parent | 7dfa4612204b511c934ca2a0e4f306f9981bd9aa (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge first patch-bomb from Andrew Morton:
- A few hotfixes which missed 4.4 becasue I was asleep. cc'ed to
-stable
- A few misc fixes
- OCFS2 updates
- Part of MM. Including pretty large changes to page-flags handling
and to thp management which have been buffered up for 2-3 cycles now.
I have a lot of MM material this time.
[ It turns out the THP part wasn't quite ready, so that got dropped from
this series - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits)
zsmalloc: reorganize struct size_class to pack 4 bytes hole
mm/zbud.c: use list_last_entry() instead of list_tail_entry()
zram/zcomp: do not zero out zcomp private pages
zram: pass gfp from zcomp frontend to backend
zram: try vmalloc() after kmalloc()
zram/zcomp: use GFP_NOIO to allocate streams
mm: add tracepoint for scanning pages
drivers/base/memory.c: fix kernel warning during memory hotplug on ppc64
mm/page_isolation: use macro to judge the alignment
mm: fix noisy sparse warning in LIBCFS_ALLOC_PRE()
mm: rework virtual memory accounting
include/linux/memblock.h: fix ordering of 'flags' argument in comments
mm: move lru_to_page to mm_inline.h
Documentation/filesystems: describe the shared memory usage/accounting
memory-hotplug: don't BUG() in register_memory_resource()
hugetlb: make mm and fs code explicitly non-modular
mm/swapfile.c: use list_for_each_entry_safe in free_swap_count_continuations
mm: /proc/pid/clear_refs: no need to clear VM_SOFTDIRTY in clear_soft_dirty_pmd()
mm: make sure isolate_lru_page() is never called for tail page
vmstat: make vmstat_updater deferrable again and shut down on idle
...
176 files changed, 1852 insertions, 1284 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 402ab99e409f..e95aa1c6eadf 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -169,6 +169,9 @@ read the file /proc/PID/status: | |||
169 | VmLck: 0 kB | 169 | VmLck: 0 kB |
170 | VmHWM: 476 kB | 170 | VmHWM: 476 kB |
171 | VmRSS: 476 kB | 171 | VmRSS: 476 kB |
172 | RssAnon: 352 kB | ||
173 | RssFile: 120 kB | ||
174 | RssShmem: 4 kB | ||
172 | VmData: 156 kB | 175 | VmData: 156 kB |
173 | VmStk: 88 kB | 176 | VmStk: 88 kB |
174 | VmExe: 68 kB | 177 | VmExe: 68 kB |
@@ -231,14 +234,20 @@ Table 1-2: Contents of the status files (as of 4.1) | |||
231 | VmSize total program size | 234 | VmSize total program size |
232 | VmLck locked memory size | 235 | VmLck locked memory size |
233 | VmHWM peak resident set size ("high water mark") | 236 | VmHWM peak resident set size ("high water mark") |
234 | VmRSS size of memory portions | 237 | VmRSS size of memory portions. It contains the three |
238 | following parts (VmRSS = RssAnon + RssFile + RssShmem) | ||
239 | RssAnon size of resident anonymous memory | ||
240 | RssFile size of resident file mappings | ||
241 | RssShmem size of resident shmem memory (includes SysV shm, | ||
242 | mapping of tmpfs and shared anonymous mappings) | ||
235 | VmData size of data, stack, and text segments | 243 | VmData size of data, stack, and text segments |
236 | VmStk size of data, stack, and text segments | 244 | VmStk size of data, stack, and text segments |
237 | VmExe size of text segment | 245 | VmExe size of text segment |
238 | VmLib size of shared library code | 246 | VmLib size of shared library code |
239 | VmPTE size of page table entries | 247 | VmPTE size of page table entries |
240 | VmPMD size of second level page tables | 248 | VmPMD size of second level page tables |
241 | VmSwap size of swap usage (the number of referred swapents) | 249 | VmSwap amount of swap used by anonymous private data |
250 | (shmem swap usage is not included) | ||
242 | HugetlbPages size of hugetlb memory portions | 251 | HugetlbPages size of hugetlb memory portions |
243 | Threads number of threads | 252 | Threads number of threads |
244 | SigQ number of signals queued/max. number for queue | 253 | SigQ number of signals queued/max. number for queue |
@@ -265,7 +274,8 @@ Table 1-3: Contents of the statm files (as of 2.6.8-rc3) | |||
265 | Field Content | 274 | Field Content |
266 | size total program size (pages) (same as VmSize in status) | 275 | size total program size (pages) (same as VmSize in status) |
267 | resident size of memory portions (pages) (same as VmRSS in status) | 276 | resident size of memory portions (pages) (same as VmRSS in status) |
268 | shared number of pages that are shared (i.e. backed by a file) | 277 | shared number of pages that are shared (i.e. backed by a file, same |
278 | as RssFile+RssShmem in status) | ||
269 | trs number of pages that are 'code' (not including libs; broken, | 279 | trs number of pages that are 'code' (not including libs; broken, |
270 | includes data segment) | 280 | includes data segment) |
271 | lrs number of pages of library (always 0 on 2.6) | 281 | lrs number of pages of library (always 0 on 2.6) |
@@ -459,7 +469,10 @@ and a page is modified, the file page is replaced by a private anonymous copy. | |||
459 | hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical | 469 | hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical |
460 | reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. | 470 | reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. |
461 | "Swap" shows how much would-be-anonymous memory is also used, but out on swap. | 471 | "Swap" shows how much would-be-anonymous memory is also used, but out on swap. |
462 | "SwapPss" shows proportional swap share of this mapping. | 472 | For shmem mappings, "Swap" includes also the size of the mapped (and not |
473 | replaced by copy-on-write) part of the underlying shmem object out on swap. | ||
474 | "SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this | ||
475 | does not take into account swapped out page of underlying shmem objects. | ||
463 | "Locked" indicates whether the mapping is locked in memory or not. | 476 | "Locked" indicates whether the mapping is locked in memory or not. |
464 | 477 | ||
465 | "VmFlags" field deserves a separate description. This member represents the kernel | 478 | "VmFlags" field deserves a separate description. This member represents the kernel |
@@ -842,6 +855,7 @@ Dirty: 968 kB | |||
842 | Writeback: 0 kB | 855 | Writeback: 0 kB |
843 | AnonPages: 861800 kB | 856 | AnonPages: 861800 kB |
844 | Mapped: 280372 kB | 857 | Mapped: 280372 kB |
858 | Shmem: 644 kB | ||
845 | Slab: 284364 kB | 859 | Slab: 284364 kB |
846 | SReclaimable: 159856 kB | 860 | SReclaimable: 159856 kB |
847 | SUnreclaim: 124508 kB | 861 | SUnreclaim: 124508 kB |
@@ -898,6 +912,7 @@ MemAvailable: An estimate of how much memory is available for starting new | |||
898 | AnonPages: Non-file backed pages mapped into userspace page tables | 912 | AnonPages: Non-file backed pages mapped into userspace page tables |
899 | AnonHugePages: Non-file backed huge pages mapped into userspace page tables | 913 | AnonHugePages: Non-file backed huge pages mapped into userspace page tables |
900 | Mapped: files which have been mmaped, such as libraries | 914 | Mapped: files which have been mmaped, such as libraries |
915 | Shmem: Total memory used by shared memory (shmem) and tmpfs | ||
901 | Slab: in-kernel data structures cache | 916 | Slab: in-kernel data structures cache |
902 | SReclaimable: Part of Slab, that might be reclaimed, such as caches | 917 | SReclaimable: Part of Slab, that might be reclaimed, such as caches |
903 | SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure | 918 | SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure |
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index 98ef55124158..d392e1505f17 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt | |||
@@ -17,10 +17,10 @@ RAM, where you have to create an ordinary filesystem on top. Ramdisks | |||
17 | cannot swap and you do not have the possibility to resize them. | 17 | cannot swap and you do not have the possibility to resize them. |
18 | 18 | ||
19 | Since tmpfs lives completely in the page cache and on swap, all tmpfs | 19 | Since tmpfs lives completely in the page cache and on swap, all tmpfs |
20 | pages currently in memory will show up as cached. It will not show up | 20 | pages will be shown as "Shmem" in /proc/meminfo and "Shared" in |
21 | as shared or something like that. Further on you can check the actual | 21 | free(1). Notice that these counters also include shared memory |
22 | RAM+swap use of a tmpfs instance with df(1) and du(1). | 22 | (shmem, see ipcs(1)). The most reliable way to get the count is |
23 | 23 | using df(1) and du(1). | |
24 | 24 | ||
25 | tmpfs has the following uses: | 25 | tmpfs has the following uses: |
26 | 26 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b7d44871effc..168fd79dc697 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -608,6 +608,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
608 | cut the overhead, others just disable the usage. So | 608 | cut the overhead, others just disable the usage. So |
609 | only cgroup_disable=memory is actually worthy} | 609 | only cgroup_disable=memory is actually worthy} |
610 | 610 | ||
611 | cgroup.memory= [KNL] Pass options to the cgroup memory controller. | ||
612 | Format: <string> | ||
613 | nosocket -- Disable socket memory accounting. | ||
614 | |||
611 | checkreqprot [SELINUX] Set initial checkreqprot flag value. | 615 | checkreqprot [SELINUX] Set initial checkreqprot flag value. |
612 | Format: { "0" | "1" } | 616 | Format: { "0" | "1" } |
613 | See security/selinux/Kconfig help text. | 617 | See security/selinux/Kconfig help text. |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 8ee925c046aa..89a887c76629 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -42,6 +42,8 @@ Currently, these files are in /proc/sys/vm: | |||
42 | - min_slab_ratio | 42 | - min_slab_ratio |
43 | - min_unmapped_ratio | 43 | - min_unmapped_ratio |
44 | - mmap_min_addr | 44 | - mmap_min_addr |
45 | - mmap_rnd_bits | ||
46 | - mmap_rnd_compat_bits | ||
45 | - nr_hugepages | 47 | - nr_hugepages |
46 | - nr_overcommit_hugepages | 48 | - nr_overcommit_hugepages |
47 | - nr_trim_pages (only if CONFIG_MMU=n) | 49 | - nr_trim_pages (only if CONFIG_MMU=n) |
@@ -485,6 +487,33 @@ against future potential kernel bugs. | |||
485 | 487 | ||
486 | ============================================================== | 488 | ============================================================== |
487 | 489 | ||
490 | mmap_rnd_bits: | ||
491 | |||
492 | This value can be used to select the number of bits to use to | ||
493 | determine the random offset to the base address of vma regions | ||
494 | resulting from mmap allocations on architectures which support | ||
495 | tuning address space randomization. This value will be bounded | ||
496 | by the architecture's minimum and maximum supported values. | ||
497 | |||
498 | This value can be changed after boot using the | ||
499 | /proc/sys/vm/mmap_rnd_bits tunable | ||
500 | |||
501 | ============================================================== | ||
502 | |||
503 | mmap_rnd_compat_bits: | ||
504 | |||
505 | This value can be used to select the number of bits to use to | ||
506 | determine the random offset to the base address of vma regions | ||
507 | resulting from mmap allocations for applications run in | ||
508 | compatibility mode on architectures which support tuning address | ||
509 | space randomization. This value will be bounded by the | ||
510 | architecture's minimum and maximum supported values. | ||
511 | |||
512 | This value can be changed after boot using the | ||
513 | /proc/sys/vm/mmap_rnd_compat_bits tunable | ||
514 | |||
515 | ============================================================== | ||
516 | |||
488 | nr_hugepages | 517 | nr_hugepages |
489 | 518 | ||
490 | Change the minimum size of the hugepage pool. | 519 | Change the minimum size of the hugepage pool. |
diff --git a/arch/Kconfig b/arch/Kconfig index 4e949e58b192..ba1b626bca00 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -511,6 +511,74 @@ config ARCH_HAS_ELF_RANDOMIZE | |||
511 | - arch_mmap_rnd() | 511 | - arch_mmap_rnd() |
512 | - arch_randomize_brk() | 512 | - arch_randomize_brk() |
513 | 513 | ||
514 | config HAVE_ARCH_MMAP_RND_BITS | ||
515 | bool | ||
516 | help | ||
517 | An arch should select this symbol if it supports setting a variable | ||
518 | number of bits for use in establishing the base address for mmap | ||
519 | allocations, has MMU enabled and provides values for both: | ||
520 | - ARCH_MMAP_RND_BITS_MIN | ||
521 | - ARCH_MMAP_RND_BITS_MAX | ||
522 | |||
523 | config ARCH_MMAP_RND_BITS_MIN | ||
524 | int | ||
525 | |||
526 | config ARCH_MMAP_RND_BITS_MAX | ||
527 | int | ||
528 | |||
529 | config ARCH_MMAP_RND_BITS_DEFAULT | ||
530 | int | ||
531 | |||
532 | config ARCH_MMAP_RND_BITS | ||
533 | int "Number of bits to use for ASLR of mmap base address" if EXPERT | ||
534 | range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX | ||
535 | default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT | ||
536 | default ARCH_MMAP_RND_BITS_MIN | ||
537 | depends on HAVE_ARCH_MMAP_RND_BITS | ||
538 | help | ||
539 | This value can be used to select the number of bits to use to | ||
540 | determine the random offset to the base address of vma regions | ||
541 | resulting from mmap allocations. This value will be bounded | ||
542 | by the architecture's minimum and maximum supported values. | ||
543 | |||
544 | This value can be changed after boot using the | ||
545 | /proc/sys/vm/mmap_rnd_bits tunable | ||
546 | |||
547 | config HAVE_ARCH_MMAP_RND_COMPAT_BITS | ||
548 | bool | ||
549 | help | ||
550 | An arch should select this symbol if it supports running applications | ||
551 | in compatibility mode, supports setting a variable number of bits for | ||
552 | use in establishing the base address for mmap allocations, has MMU | ||
553 | enabled and provides values for both: | ||
554 | - ARCH_MMAP_RND_COMPAT_BITS_MIN | ||
555 | - ARCH_MMAP_RND_COMPAT_BITS_MAX | ||
556 | |||
557 | config ARCH_MMAP_RND_COMPAT_BITS_MIN | ||
558 | int | ||
559 | |||
560 | config ARCH_MMAP_RND_COMPAT_BITS_MAX | ||
561 | int | ||
562 | |||
563 | config ARCH_MMAP_RND_COMPAT_BITS_DEFAULT | ||
564 | int | ||
565 | |||
566 | config ARCH_MMAP_RND_COMPAT_BITS | ||
567 | int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT | ||
568 | range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX | ||
569 | default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT | ||
570 | default ARCH_MMAP_RND_COMPAT_BITS_MIN | ||
571 | depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS | ||
572 | help | ||
573 | This value can be used to select the number of bits to use to | ||
574 | determine the random offset to the base address of vma regions | ||
575 | resulting from mmap allocations for compatible applications This | ||
576 | value will be bounded by the architecture's minimum and maximum | ||
577 | supported values. | ||
578 | |||
579 | This value can be changed after boot using the | ||
580 | /proc/sys/vm/mmap_rnd_compat_bits tunable | ||
581 | |||
514 | config HAVE_COPY_THREAD_TLS | 582 | config HAVE_COPY_THREAD_TLS |
515 | bool | 583 | bool |
516 | help | 584 | help |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 84b1b21b08ae..4e489cc5c45e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -37,6 +37,7 @@ config ARM | |||
37 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | 37 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 |
38 | select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | 38 | select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU |
39 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | 39 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU |
40 | select HAVE_ARCH_MMAP_RND_BITS if MMU | ||
40 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | 41 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) |
41 | select HAVE_ARCH_TRACEHOOK | 42 | select HAVE_ARCH_TRACEHOOK |
42 | select HAVE_ARM_SMCCC if CPU_V7 | 43 | select HAVE_ARM_SMCCC if CPU_V7 |
@@ -311,6 +312,14 @@ config MMU | |||
311 | Select if you want MMU-based virtualised addressing space | 312 | Select if you want MMU-based virtualised addressing space |
312 | support by paged memory management. If unsure, say 'Y'. | 313 | support by paged memory management. If unsure, say 'Y'. |
313 | 314 | ||
315 | config ARCH_MMAP_RND_BITS_MIN | ||
316 | default 8 | ||
317 | |||
318 | config ARCH_MMAP_RND_BITS_MAX | ||
319 | default 14 if PAGE_OFFSET=0x40000000 | ||
320 | default 15 if PAGE_OFFSET=0x80000000 | ||
321 | default 16 | ||
322 | |||
314 | # | 323 | # |
315 | # The "ARM system type" choice list is ordered alphabetically by option | 324 | # The "ARM system type" choice list is ordered alphabetically by option |
316 | # text. Please add new entries in the option alphabetic order. | 325 | # text. Please add new entries in the option alphabetic order. |
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 407dc786583a..4b4058db0781 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c | |||
@@ -173,8 +173,7 @@ unsigned long arch_mmap_rnd(void) | |||
173 | { | 173 | { |
174 | unsigned long rnd; | 174 | unsigned long rnd; |
175 | 175 | ||
176 | /* 8 bits of randomness in 20 address space bits */ | 176 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); |
177 | rnd = (unsigned long)get_random_int() % (1 << 8); | ||
178 | 177 | ||
179 | return rnd << PAGE_SHIFT; | 178 | return rnd << PAGE_SHIFT; |
180 | } | 179 | } |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4d5b416e2e4b..6be3fa2310ee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -52,6 +52,8 @@ config ARM64 | |||
52 | select HAVE_ARCH_JUMP_LABEL | 52 | select HAVE_ARCH_JUMP_LABEL |
53 | select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) | 53 | select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) |
54 | select HAVE_ARCH_KGDB | 54 | select HAVE_ARCH_KGDB |
55 | select HAVE_ARCH_MMAP_RND_BITS | ||
56 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT | ||
55 | select HAVE_ARCH_SECCOMP_FILTER | 57 | select HAVE_ARCH_SECCOMP_FILTER |
56 | select HAVE_ARCH_TRACEHOOK | 58 | select HAVE_ARCH_TRACEHOOK |
57 | select HAVE_BPF_JIT | 59 | select HAVE_BPF_JIT |
@@ -107,6 +109,33 @@ config ARCH_PHYS_ADDR_T_64BIT | |||
107 | config MMU | 109 | config MMU |
108 | def_bool y | 110 | def_bool y |
109 | 111 | ||
112 | config ARCH_MMAP_RND_BITS_MIN | ||
113 | default 14 if ARM64_64K_PAGES | ||
114 | default 16 if ARM64_16K_PAGES | ||
115 | default 18 | ||
116 | |||
117 | # max bits determined by the following formula: | ||
118 | # VA_BITS - PAGE_SHIFT - 3 | ||
119 | config ARCH_MMAP_RND_BITS_MAX | ||
120 | default 19 if ARM64_VA_BITS=36 | ||
121 | default 24 if ARM64_VA_BITS=39 | ||
122 | default 27 if ARM64_VA_BITS=42 | ||
123 | default 30 if ARM64_VA_BITS=47 | ||
124 | default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES | ||
125 | default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES | ||
126 | default 33 if ARM64_VA_BITS=48 | ||
127 | default 14 if ARM64_64K_PAGES | ||
128 | default 16 if ARM64_16K_PAGES | ||
129 | default 18 | ||
130 | |||
131 | config ARCH_MMAP_RND_COMPAT_BITS_MIN | ||
132 | default 7 if ARM64_64K_PAGES | ||
133 | default 9 if ARM64_16K_PAGES | ||
134 | default 11 | ||
135 | |||
136 | config ARCH_MMAP_RND_COMPAT_BITS_MAX | ||
137 | default 16 | ||
138 | |||
110 | config NO_IOPORT_MAP | 139 | config NO_IOPORT_MAP |
111 | def_bool y if !PCI | 140 | def_bool y if !PCI |
112 | 141 | ||
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index ed177475dd8c..4c893b5189dd 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c | |||
@@ -51,8 +51,12 @@ unsigned long arch_mmap_rnd(void) | |||
51 | { | 51 | { |
52 | unsigned long rnd; | 52 | unsigned long rnd; |
53 | 53 | ||
54 | rnd = (unsigned long)get_random_int() & STACK_RND_MASK; | 54 | #ifdef CONFIG_COMPAT |
55 | 55 | if (test_thread_flag(TIF_32BIT)) | |
56 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); | ||
57 | else | ||
58 | #endif | ||
59 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); | ||
56 | return rnd << PAGE_SHIFT; | 60 | return rnd << PAGE_SHIFT; |
57 | } | 61 | } |
58 | 62 | ||
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 60e02f7747ff..9cd607b06964 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c | |||
@@ -2332,8 +2332,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t | |||
2332 | */ | 2332 | */ |
2333 | insert_vm_struct(mm, vma); | 2333 | insert_vm_struct(mm, vma); |
2334 | 2334 | ||
2335 | vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, | 2335 | vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma)); |
2336 | vma_pages(vma)); | ||
2337 | up_write(&task->mm->mmap_sem); | 2336 | up_write(&task->mm->mmap_sem); |
2338 | 2337 | ||
2339 | /* | 2338 | /* |
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0392112a5d70..a5ecef7188ba 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c | |||
@@ -81,7 +81,10 @@ static struct resource code_resource = { | |||
81 | }; | 81 | }; |
82 | 82 | ||
83 | unsigned long memory_start; | 83 | unsigned long memory_start; |
84 | EXPORT_SYMBOL(memory_start); | ||
85 | |||
84 | unsigned long memory_end; | 86 | unsigned long memory_end; |
87 | EXPORT_SYMBOL(memory_end); | ||
85 | 88 | ||
86 | void __init setup_arch(char **); | 89 | void __init setup_arch(char **); |
87 | int get_cpuinfo(char *); | 90 | int get_cpuinfo(char *); |
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 11634fa7ab3c..ad4840f86be1 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c | |||
@@ -767,7 +767,7 @@ static int __init spufs_init(void) | |||
767 | ret = -ENOMEM; | 767 | ret = -ENOMEM; |
768 | spufs_inode_cache = kmem_cache_create("spufs_inode_cache", | 768 | spufs_inode_cache = kmem_cache_create("spufs_inode_cache", |
769 | sizeof(struct spufs_inode_info), 0, | 769 | sizeof(struct spufs_inode_info), 0, |
770 | SLAB_HWCACHE_ALIGN, spufs_init_once); | 770 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once); |
771 | 771 | ||
772 | if (!spufs_inode_cache) | 772 | if (!spufs_inode_cache) |
773 | goto out; | 773 | goto out; |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 63b039899a5e..aa34af0a0b26 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -603,10 +603,7 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) | |||
603 | else if (is_migration_entry(entry)) { | 603 | else if (is_migration_entry(entry)) { |
604 | struct page *page = migration_entry_to_page(entry); | 604 | struct page *page = migration_entry_to_page(entry); |
605 | 605 | ||
606 | if (PageAnon(page)) | 606 | dec_mm_counter(mm, mm_counter(page)); |
607 | dec_mm_counter(mm, MM_ANONPAGES); | ||
608 | else | ||
609 | dec_mm_counter(mm, MM_FILEPAGES); | ||
610 | } | 607 | } |
611 | free_swap_and_cache(entry); | 608 | free_swap_and_cache(entry); |
612 | } | 609 | } |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5d2293417946..24f362bf3ec6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -83,6 +83,8 @@ config X86 | |||
83 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 83 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
84 | select HAVE_ARCH_KGDB | 84 | select HAVE_ARCH_KGDB |
85 | select HAVE_ARCH_KMEMCHECK | 85 | select HAVE_ARCH_KMEMCHECK |
86 | select HAVE_ARCH_MMAP_RND_BITS if MMU | ||
87 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT | ||
86 | select HAVE_ARCH_SECCOMP_FILTER | 88 | select HAVE_ARCH_SECCOMP_FILTER |
87 | select HAVE_ARCH_SOFT_DIRTY if X86_64 | 89 | select HAVE_ARCH_SOFT_DIRTY if X86_64 |
88 | select HAVE_ARCH_TRACEHOOK | 90 | select HAVE_ARCH_TRACEHOOK |
@@ -184,6 +186,20 @@ config HAVE_LATENCYTOP_SUPPORT | |||
184 | config MMU | 186 | config MMU |
185 | def_bool y | 187 | def_bool y |
186 | 188 | ||
189 | config ARCH_MMAP_RND_BITS_MIN | ||
190 | default 28 if 64BIT | ||
191 | default 8 | ||
192 | |||
193 | config ARCH_MMAP_RND_BITS_MAX | ||
194 | default 32 if 64BIT | ||
195 | default 16 | ||
196 | |||
197 | config ARCH_MMAP_RND_COMPAT_BITS_MIN | ||
198 | default 8 | ||
199 | |||
200 | config ARCH_MMAP_RND_COMPAT_BITS_MAX | ||
201 | default 16 | ||
202 | |||
187 | config SBUS | 203 | config SBUS |
188 | bool | 204 | bool |
189 | 205 | ||
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 844b06d67df4..96bd1e2bffaf 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -69,14 +69,14 @@ unsigned long arch_mmap_rnd(void) | |||
69 | { | 69 | { |
70 | unsigned long rnd; | 70 | unsigned long rnd; |
71 | 71 | ||
72 | /* | ||
73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits | ||
74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits | ||
75 | */ | ||
76 | if (mmap_is_ia32()) | 72 | if (mmap_is_ia32()) |
77 | rnd = (unsigned long)get_random_int() % (1<<8); | 73 | #ifdef CONFIG_COMPAT |
74 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1); | ||
75 | #else | ||
76 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); | ||
77 | #endif | ||
78 | else | 78 | else |
79 | rnd = (unsigned long)get_random_int() % (1<<28); | 79 | rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1); |
80 | 80 | ||
81 | return rnd << PAGE_SHIFT; | 81 | return rnd << PAGE_SHIFT; |
82 | } | 82 | } |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 25425d3f2575..619fe584a44c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -450,8 +450,7 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, | |||
450 | const char *buf, size_t count) | 450 | const char *buf, size_t count) |
451 | { | 451 | { |
452 | u64 phys_addr; | 452 | u64 phys_addr; |
453 | int nid; | 453 | int nid, ret; |
454 | int i, ret; | ||
455 | unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; | 454 | unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; |
456 | 455 | ||
457 | ret = kstrtoull(buf, 0, &phys_addr); | 456 | ret = kstrtoull(buf, 0, &phys_addr); |
@@ -461,15 +460,12 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, | |||
461 | if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) | 460 | if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) |
462 | return -EINVAL; | 461 | return -EINVAL; |
463 | 462 | ||
464 | for (i = 0; i < sections_per_block; i++) { | 463 | nid = memory_add_physaddr_to_nid(phys_addr); |
465 | nid = memory_add_physaddr_to_nid(phys_addr); | 464 | ret = add_memory(nid, phys_addr, |
466 | ret = add_memory(nid, phys_addr, | 465 | MIN_MEMORY_BLOCK_SIZE * sections_per_block); |
467 | PAGES_PER_SECTION << PAGE_SHIFT); | ||
468 | if (ret) | ||
469 | goto out; | ||
470 | 466 | ||
471 | phys_addr += MIN_MEMORY_BLOCK_SIZE; | 467 | if (ret) |
472 | } | 468 | goto out; |
473 | 469 | ||
474 | ret = count; | 470 | ret = count; |
475 | out: | 471 | out: |
@@ -618,7 +614,6 @@ static int init_memory_block(struct memory_block **memory, | |||
618 | base_memory_block_id(scn_nr) * sections_per_block; | 614 | base_memory_block_id(scn_nr) * sections_per_block; |
619 | mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; | 615 | mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; |
620 | mem->state = state; | 616 | mem->state = state; |
621 | mem->section_count++; | ||
622 | start_pfn = section_nr_to_pfn(mem->start_section_nr); | 617 | start_pfn = section_nr_to_pfn(mem->start_section_nr); |
623 | mem->phys_device = arch_get_memory_phys_device(start_pfn); | 618 | mem->phys_device = arch_get_memory_phys_device(start_pfn); |
624 | 619 | ||
@@ -672,6 +667,7 @@ int register_new_memory(int nid, struct mem_section *section) | |||
672 | ret = init_memory_block(&mem, section, MEM_OFFLINE); | 667 | ret = init_memory_block(&mem, section, MEM_OFFLINE); |
673 | if (ret) | 668 | if (ret) |
674 | goto out; | 669 | goto out; |
670 | mem->section_count++; | ||
675 | } | 671 | } |
676 | 672 | ||
677 | if (mem->section_count == sections_per_block) | 673 | if (mem->section_count == sections_per_block) |
@@ -692,7 +688,7 @@ unregister_memory(struct memory_block *memory) | |||
692 | device_unregister(&memory->dev); | 688 | device_unregister(&memory->dev); |
693 | } | 689 | } |
694 | 690 | ||
695 | static int remove_memory_block(unsigned long node_id, | 691 | static int remove_memory_section(unsigned long node_id, |
696 | struct mem_section *section, int phys_device) | 692 | struct mem_section *section, int phys_device) |
697 | { | 693 | { |
698 | struct memory_block *mem; | 694 | struct memory_block *mem; |
@@ -716,7 +712,7 @@ int unregister_memory_section(struct mem_section *section) | |||
716 | if (!present_section(section)) | 712 | if (!present_section(section)) |
717 | return -EINVAL; | 713 | return -EINVAL; |
718 | 714 | ||
719 | return remove_memory_block(0, section, 0); | 715 | return remove_memory_section(0, section, 0); |
720 | } | 716 | } |
721 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 717 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
722 | 718 | ||
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 5cb13ca3a3ac..3ef42e563bb5 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c | |||
@@ -74,18 +74,18 @@ static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) | |||
74 | * allocate new zcomp_strm structure with ->private initialized by | 74 | * allocate new zcomp_strm structure with ->private initialized by |
75 | * backend, return NULL on error | 75 | * backend, return NULL on error |
76 | */ | 76 | */ |
77 | static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) | 77 | static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) |
78 | { | 78 | { |
79 | struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); | 79 | struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), flags); |
80 | if (!zstrm) | 80 | if (!zstrm) |
81 | return NULL; | 81 | return NULL; |
82 | 82 | ||
83 | zstrm->private = comp->backend->create(); | 83 | zstrm->private = comp->backend->create(flags); |
84 | /* | 84 | /* |
85 | * allocate 2 pages. 1 for compressed data, plus 1 extra for the | 85 | * allocate 2 pages. 1 for compressed data, plus 1 extra for the |
86 | * case when compressed size is larger than the original one | 86 | * case when compressed size is larger than the original one |
87 | */ | 87 | */ |
88 | zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); | 88 | zstrm->buffer = (void *)__get_free_pages(flags | __GFP_ZERO, 1); |
89 | if (!zstrm->private || !zstrm->buffer) { | 89 | if (!zstrm->private || !zstrm->buffer) { |
90 | zcomp_strm_free(comp, zstrm); | 90 | zcomp_strm_free(comp, zstrm); |
91 | zstrm = NULL; | 91 | zstrm = NULL; |
@@ -120,8 +120,16 @@ static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp) | |||
120 | /* allocate new zstrm stream */ | 120 | /* allocate new zstrm stream */ |
121 | zs->avail_strm++; | 121 | zs->avail_strm++; |
122 | spin_unlock(&zs->strm_lock); | 122 | spin_unlock(&zs->strm_lock); |
123 | 123 | /* | |
124 | zstrm = zcomp_strm_alloc(comp); | 124 | * This function can be called in swapout/fs write path |
125 | * so we can't use GFP_FS|IO. And it assumes we already | ||
126 | * have at least one stream in zram initialization so we | ||
127 | * don't do best effort to allocate more stream in here. | ||
128 | * A default stream will work well without further multiple | ||
129 | * streams. That's why we use NORETRY | NOWARN. | ||
130 | */ | ||
131 | zstrm = zcomp_strm_alloc(comp, GFP_NOIO | __GFP_NORETRY | | ||
132 | __GFP_NOWARN); | ||
125 | if (!zstrm) { | 133 | if (!zstrm) { |
126 | spin_lock(&zs->strm_lock); | 134 | spin_lock(&zs->strm_lock); |
127 | zs->avail_strm--; | 135 | zs->avail_strm--; |
@@ -209,7 +217,7 @@ static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) | |||
209 | zs->max_strm = max_strm; | 217 | zs->max_strm = max_strm; |
210 | zs->avail_strm = 1; | 218 | zs->avail_strm = 1; |
211 | 219 | ||
212 | zstrm = zcomp_strm_alloc(comp); | 220 | zstrm = zcomp_strm_alloc(comp, GFP_KERNEL); |
213 | if (!zstrm) { | 221 | if (!zstrm) { |
214 | kfree(zs); | 222 | kfree(zs); |
215 | return -ENOMEM; | 223 | return -ENOMEM; |
@@ -259,7 +267,7 @@ static int zcomp_strm_single_create(struct zcomp *comp) | |||
259 | 267 | ||
260 | comp->stream = zs; | 268 | comp->stream = zs; |
261 | mutex_init(&zs->strm_lock); | 269 | mutex_init(&zs->strm_lock); |
262 | zs->zstrm = zcomp_strm_alloc(comp); | 270 | zs->zstrm = zcomp_strm_alloc(comp, GFP_KERNEL); |
263 | if (!zs->zstrm) { | 271 | if (!zs->zstrm) { |
264 | kfree(zs); | 272 | kfree(zs); |
265 | return -ENOMEM; | 273 | return -ENOMEM; |
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 46e2b9f8f1f0..b7d2a4bcae54 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h | |||
@@ -33,7 +33,7 @@ struct zcomp_backend { | |||
33 | int (*decompress)(const unsigned char *src, size_t src_len, | 33 | int (*decompress)(const unsigned char *src, size_t src_len, |
34 | unsigned char *dst); | 34 | unsigned char *dst); |
35 | 35 | ||
36 | void *(*create)(void); | 36 | void *(*create)(gfp_t flags); |
37 | void (*destroy)(void *private); | 37 | void (*destroy)(void *private); |
38 | 38 | ||
39 | const char *name; | 39 | const char *name; |
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c index f2afb7e988c3..0110086accba 100644 --- a/drivers/block/zram/zcomp_lz4.c +++ b/drivers/block/zram/zcomp_lz4.c | |||
@@ -10,17 +10,26 @@ | |||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/lz4.h> | 12 | #include <linux/lz4.h> |
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/mm.h> | ||
13 | 15 | ||
14 | #include "zcomp_lz4.h" | 16 | #include "zcomp_lz4.h" |
15 | 17 | ||
16 | static void *zcomp_lz4_create(void) | 18 | static void *zcomp_lz4_create(gfp_t flags) |
17 | { | 19 | { |
18 | return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); | 20 | void *ret; |
21 | |||
22 | ret = kmalloc(LZ4_MEM_COMPRESS, flags); | ||
23 | if (!ret) | ||
24 | ret = __vmalloc(LZ4_MEM_COMPRESS, | ||
25 | flags | __GFP_HIGHMEM, | ||
26 | PAGE_KERNEL); | ||
27 | return ret; | ||
19 | } | 28 | } |
20 | 29 | ||
21 | static void zcomp_lz4_destroy(void *private) | 30 | static void zcomp_lz4_destroy(void *private) |
22 | { | 31 | { |
23 | kfree(private); | 32 | kvfree(private); |
24 | } | 33 | } |
25 | 34 | ||
26 | static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, | 35 | static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, |
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c index da1bc47d588e..ed7a1f0549ec 100644 --- a/drivers/block/zram/zcomp_lzo.c +++ b/drivers/block/zram/zcomp_lzo.c | |||
@@ -10,17 +10,26 @@ | |||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/lzo.h> | 12 | #include <linux/lzo.h> |
13 | #include <linux/vmalloc.h> | ||
14 | #include <linux/mm.h> | ||
13 | 15 | ||
14 | #include "zcomp_lzo.h" | 16 | #include "zcomp_lzo.h" |
15 | 17 | ||
16 | static void *lzo_create(void) | 18 | static void *lzo_create(gfp_t flags) |
17 | { | 19 | { |
18 | return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); | 20 | void *ret; |
21 | |||
22 | ret = kmalloc(LZO1X_MEM_COMPRESS, flags); | ||
23 | if (!ret) | ||
24 | ret = __vmalloc(LZO1X_MEM_COMPRESS, | ||
25 | flags | __GFP_HIGHMEM, | ||
26 | PAGE_KERNEL); | ||
27 | return ret; | ||
19 | } | 28 | } |
20 | 29 | ||
21 | static void lzo_destroy(void *private) | 30 | static void lzo_destroy(void *private) |
22 | { | 31 | { |
23 | kfree(private); | 32 | kvfree(private); |
24 | } | 33 | } |
25 | 34 | ||
26 | static int lzo_compress(const unsigned char *src, unsigned char *dst, | 35 | static int lzo_compress(const unsigned char *src, unsigned char *dst, |
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 7a9fafc67693..86c371ef71ea 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c | |||
@@ -106,7 +106,8 @@ static int __init init_lustre_lite(void) | |||
106 | rc = -ENOMEM; | 106 | rc = -ENOMEM; |
107 | ll_inode_cachep = kmem_cache_create("lustre_inode_cache", | 107 | ll_inode_cachep = kmem_cache_create("lustre_inode_cache", |
108 | sizeof(struct ll_inode_info), | 108 | sizeof(struct ll_inode_info), |
109 | 0, SLAB_HWCACHE_ALIGN, NULL); | 109 | 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, |
110 | NULL); | ||
110 | if (ll_inode_cachep == NULL) | 111 | if (ll_inode_cachep == NULL) |
111 | goto out_cache; | 112 | goto out_cache; |
112 | 113 | ||
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6caca025019d..072e7599583a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -575,7 +575,7 @@ static int v9fs_init_inode_cache(void) | |||
575 | v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache", | 575 | v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache", |
576 | sizeof(struct v9fs_inode), | 576 | sizeof(struct v9fs_inode), |
577 | 0, (SLAB_RECLAIM_ACCOUNT| | 577 | 0, (SLAB_RECLAIM_ACCOUNT| |
578 | SLAB_MEM_SPREAD), | 578 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
579 | v9fs_inode_init_once); | 579 | v9fs_inode_init_once); |
580 | if (!v9fs_inode_cache) | 580 | if (!v9fs_inode_cache) |
581 | return -ENOMEM; | 581 | return -ENOMEM; |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 4d4a0df8344f..c9fdfb112933 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -271,7 +271,7 @@ static int __init init_inodecache(void) | |||
271 | adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", | 271 | adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", |
272 | sizeof(struct adfs_inode_info), | 272 | sizeof(struct adfs_inode_info), |
273 | 0, (SLAB_RECLAIM_ACCOUNT| | 273 | 0, (SLAB_RECLAIM_ACCOUNT| |
274 | SLAB_MEM_SPREAD), | 274 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
275 | init_once); | 275 | init_once); |
276 | if (adfs_inode_cachep == NULL) | 276 | if (adfs_inode_cachep == NULL) |
277 | return -ENOMEM; | 277 | return -ENOMEM; |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 8836df5f1e11..2a6713b6b9f4 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -132,7 +132,7 @@ static int __init init_inodecache(void) | |||
132 | affs_inode_cachep = kmem_cache_create("affs_inode_cache", | 132 | affs_inode_cachep = kmem_cache_create("affs_inode_cache", |
133 | sizeof(struct affs_inode_info), | 133 | sizeof(struct affs_inode_info), |
134 | 0, (SLAB_RECLAIM_ACCOUNT| | 134 | 0, (SLAB_RECLAIM_ACCOUNT| |
135 | SLAB_MEM_SPREAD), | 135 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
136 | init_once); | 136 | init_once); |
137 | if (affs_inode_cachep == NULL) | 137 | if (affs_inode_cachep == NULL) |
138 | return -ENOMEM; | 138 | return -ENOMEM; |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 1fb4a5129f7d..81afefe7d8a6 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -91,7 +91,7 @@ int __init afs_fs_init(void) | |||
91 | afs_inode_cachep = kmem_cache_create("afs_inode_cache", | 91 | afs_inode_cachep = kmem_cache_create("afs_inode_cache", |
92 | sizeof(struct afs_vnode), | 92 | sizeof(struct afs_vnode), |
93 | 0, | 93 | 0, |
94 | SLAB_HWCACHE_ALIGN, | 94 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, |
95 | afs_i_init_once); | 95 | afs_i_init_once); |
96 | if (!afs_inode_cachep) { | 96 | if (!afs_inode_cachep) { |
97 | printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); | 97 | printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 25250fa87086..cc0e08252913 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -434,7 +434,7 @@ befs_init_inodecache(void) | |||
434 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", | 434 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", |
435 | sizeof (struct befs_inode_info), | 435 | sizeof (struct befs_inode_info), |
436 | 0, (SLAB_RECLAIM_ACCOUNT| | 436 | 0, (SLAB_RECLAIM_ACCOUNT| |
437 | SLAB_MEM_SPREAD), | 437 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
438 | init_once); | 438 | init_once); |
439 | if (befs_inode_cachep == NULL) { | 439 | if (befs_inode_cachep == NULL) { |
440 | pr_err("%s: Couldn't initialize inode slabcache\n", __func__); | 440 | pr_err("%s: Couldn't initialize inode slabcache\n", __func__); |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index fdcb4d69f430..1e5c896f6b79 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -270,7 +270,7 @@ static int __init init_inodecache(void) | |||
270 | bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", | 270 | bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", |
271 | sizeof(struct bfs_inode_info), | 271 | sizeof(struct bfs_inode_info), |
272 | 0, (SLAB_RECLAIM_ACCOUNT| | 272 | 0, (SLAB_RECLAIM_ACCOUNT| |
273 | SLAB_MEM_SPREAD), | 273 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
274 | init_once); | 274 | init_once); |
275 | if (bfs_inode_cachep == NULL) | 275 | if (bfs_inode_cachep == NULL) |
276 | return -ENOMEM; | 276 | return -ENOMEM; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index d878e4860fb7..81c0705558be 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -437,7 +437,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, | |||
437 | 437 | ||
438 | if (!ops->rw_page || bdev_get_integrity(bdev)) | 438 | if (!ops->rw_page || bdev_get_integrity(bdev)) |
439 | return -EOPNOTSUPP; | 439 | return -EOPNOTSUPP; |
440 | result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); | 440 | result = blk_queue_enter(bdev->bd_queue, GFP_NOIO); |
441 | if (result) | 441 | if (result) |
442 | return result; | 442 | return result; |
443 | 443 | ||
@@ -595,7 +595,7 @@ void __init bdev_cache_init(void) | |||
595 | 595 | ||
596 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), | 596 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), |
597 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 597 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
598 | SLAB_MEM_SPREAD|SLAB_PANIC), | 598 | SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC), |
599 | init_once); | 599 | init_once); |
600 | err = register_filesystem(&bd_type); | 600 | err = register_filesystem(&bd_type); |
601 | if (err) | 601 | if (err) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b8856e182ae..394017831692 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -9161,7 +9161,8 @@ int btrfs_init_cachep(void) | |||
9161 | { | 9161 | { |
9162 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode", | 9162 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode", |
9163 | sizeof(struct btrfs_inode), 0, | 9163 | sizeof(struct btrfs_inode), 0, |
9164 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); | 9164 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT, |
9165 | init_once); | ||
9165 | if (!btrfs_inode_cachep) | 9166 | if (!btrfs_inode_cachep) |
9166 | goto fail; | 9167 | goto fail; |
9167 | 9168 | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f446afada328..ca4d5e8457f1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -639,8 +639,8 @@ static int __init init_caches(void) | |||
639 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 639 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
640 | sizeof(struct ceph_inode_info), | 640 | sizeof(struct ceph_inode_info), |
641 | __alignof__(struct ceph_inode_info), | 641 | __alignof__(struct ceph_inode_info), |
642 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | 642 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| |
643 | ceph_inode_init_once); | 643 | SLAB_ACCOUNT, ceph_inode_init_once); |
644 | if (ceph_inode_cachep == NULL) | 644 | if (ceph_inode_cachep == NULL) |
645 | return -ENOMEM; | 645 | return -ENOMEM; |
646 | 646 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b7fcb3151103..c4c1169814b2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -1092,7 +1092,7 @@ cifs_init_inodecache(void) | |||
1092 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", | 1092 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", |
1093 | sizeof(struct cifsInodeInfo), | 1093 | sizeof(struct cifsInodeInfo), |
1094 | 0, (SLAB_RECLAIM_ACCOUNT| | 1094 | 0, (SLAB_RECLAIM_ACCOUNT| |
1095 | SLAB_MEM_SPREAD), | 1095 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1096 | cifs_init_once); | 1096 | cifs_init_once); |
1097 | if (cifs_inode_cachep == NULL) | 1097 | if (cifs_inode_cachep == NULL) |
1098 | return -ENOMEM; | 1098 | return -ENOMEM; |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index cac1390b87a3..57e81cbba0fa 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -74,9 +74,9 @@ static void init_once(void *foo) | |||
74 | int __init coda_init_inodecache(void) | 74 | int __init coda_init_inodecache(void) |
75 | { | 75 | { |
76 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", | 76 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", |
77 | sizeof(struct coda_inode_info), | 77 | sizeof(struct coda_inode_info), 0, |
78 | 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 78 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| |
79 | init_once); | 79 | SLAB_ACCOUNT, init_once); |
80 | if (coda_inode_cachep == NULL) | 80 | if (coda_inode_cachep == NULL) |
81 | return -ENOMEM; | 81 | return -ENOMEM; |
82 | return 0; | 82 | return 0; |
diff --git a/fs/dcache.c b/fs/dcache.c index 8d38cd07b207..b4539e84e577 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1571,7 +1571,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1571 | dentry->d_iname[DNAME_INLINE_LEN-1] = 0; | 1571 | dentry->d_iname[DNAME_INLINE_LEN-1] = 0; |
1572 | if (name->len > DNAME_INLINE_LEN-1) { | 1572 | if (name->len > DNAME_INLINE_LEN-1) { |
1573 | size_t size = offsetof(struct external_name, name[1]); | 1573 | size_t size = offsetof(struct external_name, name[1]); |
1574 | struct external_name *p = kmalloc(size + name->len, GFP_KERNEL); | 1574 | struct external_name *p = kmalloc(size + name->len, |
1575 | GFP_KERNEL_ACCOUNT); | ||
1575 | if (!p) { | 1576 | if (!p) { |
1576 | kmem_cache_free(dentry_cache, dentry); | 1577 | kmem_cache_free(dentry_cache, dentry); |
1577 | return NULL; | 1578 | return NULL; |
@@ -3415,7 +3416,7 @@ static void __init dcache_init(void) | |||
3415 | * of the dcache. | 3416 | * of the dcache. |
3416 | */ | 3417 | */ |
3417 | dentry_cache = KMEM_CACHE(dentry, | 3418 | dentry_cache = KMEM_CACHE(dentry, |
3418 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); | 3419 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); |
3419 | 3420 | ||
3420 | /* Hash may have been set up in dcache_init_early */ | 3421 | /* Hash may have been set up in dcache_init_early */ |
3421 | if (!hashdist) | 3422 | if (!hashdist) |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4f4d0474bee9..e25b6b06bacf 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -663,6 +663,7 @@ static struct ecryptfs_cache_info { | |||
663 | struct kmem_cache **cache; | 663 | struct kmem_cache **cache; |
664 | const char *name; | 664 | const char *name; |
665 | size_t size; | 665 | size_t size; |
666 | unsigned long flags; | ||
666 | void (*ctor)(void *obj); | 667 | void (*ctor)(void *obj); |
667 | } ecryptfs_cache_infos[] = { | 668 | } ecryptfs_cache_infos[] = { |
668 | { | 669 | { |
@@ -684,6 +685,7 @@ static struct ecryptfs_cache_info { | |||
684 | .cache = &ecryptfs_inode_info_cache, | 685 | .cache = &ecryptfs_inode_info_cache, |
685 | .name = "ecryptfs_inode_cache", | 686 | .name = "ecryptfs_inode_cache", |
686 | .size = sizeof(struct ecryptfs_inode_info), | 687 | .size = sizeof(struct ecryptfs_inode_info), |
688 | .flags = SLAB_ACCOUNT, | ||
687 | .ctor = inode_info_init_once, | 689 | .ctor = inode_info_init_once, |
688 | }, | 690 | }, |
689 | { | 691 | { |
@@ -755,8 +757,8 @@ static int ecryptfs_init_kmem_caches(void) | |||
755 | struct ecryptfs_cache_info *info; | 757 | struct ecryptfs_cache_info *info; |
756 | 758 | ||
757 | info = &ecryptfs_cache_infos[i]; | 759 | info = &ecryptfs_cache_infos[i]; |
758 | *(info->cache) = kmem_cache_create(info->name, info->size, | 760 | *(info->cache) = kmem_cache_create(info->name, info->size, 0, |
759 | 0, SLAB_HWCACHE_ALIGN, info->ctor); | 761 | SLAB_HWCACHE_ALIGN | info->flags, info->ctor); |
760 | if (!*(info->cache)) { | 762 | if (!*(info->cache)) { |
761 | ecryptfs_free_kmem_caches(); | 763 | ecryptfs_free_kmem_caches(); |
762 | ecryptfs_printk(KERN_WARNING, "%s: " | 764 | ecryptfs_printk(KERN_WARNING, "%s: " |
diff --git a/fs/efs/super.c b/fs/efs/super.c index c8411a30f7da..cb68dac4f9d3 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -94,9 +94,9 @@ static void init_once(void *foo) | |||
94 | static int __init init_inodecache(void) | 94 | static int __init init_inodecache(void) |
95 | { | 95 | { |
96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", | 96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", |
97 | sizeof(struct efs_inode_info), | 97 | sizeof(struct efs_inode_info), 0, |
98 | 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 98 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| |
99 | init_once); | 99 | SLAB_ACCOUNT, init_once); |
100 | if (efs_inode_cachep == NULL) | 100 | if (efs_inode_cachep == NULL) |
101 | return -ENOMEM; | 101 | return -ENOMEM; |
102 | return 0; | 102 | return 0; |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index b795c567b5e1..6658a50530a0 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -194,8 +194,8 @@ static int init_inodecache(void) | |||
194 | { | 194 | { |
195 | exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", | 195 | exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", |
196 | sizeof(struct exofs_i_info), 0, | 196 | sizeof(struct exofs_i_info), 0, |
197 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 197 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | |
198 | exofs_init_once); | 198 | SLAB_ACCOUNT, exofs_init_once); |
199 | if (exofs_inode_cachep == NULL) | 199 | if (exofs_inode_cachep == NULL) |
200 | return -ENOMEM; | 200 | return -ENOMEM; |
201 | return 0; | 201 | return 0; |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 748d35afc902..2a188413a2b0 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -203,7 +203,7 @@ static int __init init_inodecache(void) | |||
203 | ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", | 203 | ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", |
204 | sizeof(struct ext2_inode_info), | 204 | sizeof(struct ext2_inode_info), |
205 | 0, (SLAB_RECLAIM_ACCOUNT| | 205 | 0, (SLAB_RECLAIM_ACCOUNT| |
206 | SLAB_MEM_SPREAD), | 206 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
207 | init_once); | 207 | init_once); |
208 | if (ext2_inode_cachep == NULL) | 208 | if (ext2_inode_cachep == NULL) |
209 | return -ENOMEM; | 209 | return -ENOMEM; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c9ab67da6e5a..f1b56ff01208 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -966,7 +966,7 @@ static int __init init_inodecache(void) | |||
966 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", | 966 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", |
967 | sizeof(struct ext4_inode_info), | 967 | sizeof(struct ext4_inode_info), |
968 | 0, (SLAB_RECLAIM_ACCOUNT| | 968 | 0, (SLAB_RECLAIM_ACCOUNT| |
969 | SLAB_MEM_SPREAD), | 969 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
970 | init_once); | 970 | init_once); |
971 | if (ext4_inode_cachep == NULL) | 971 | if (ext4_inode_cachep == NULL) |
972 | return -ENOMEM; | 972 | return -ENOMEM; |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3bf990b80026..6134832baaaf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -1541,8 +1541,9 @@ MODULE_ALIAS_FS("f2fs"); | |||
1541 | 1541 | ||
1542 | static int __init init_inodecache(void) | 1542 | static int __init init_inodecache(void) |
1543 | { | 1543 | { |
1544 | f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", | 1544 | f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache", |
1545 | sizeof(struct f2fs_inode_info)); | 1545 | sizeof(struct f2fs_inode_info), 0, |
1546 | SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL); | ||
1546 | if (!f2fs_inode_cachep) | 1547 | if (!f2fs_inode_cachep) |
1547 | return -ENOMEM; | 1548 | return -ENOMEM; |
1548 | return 0; | 1549 | return 0; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 509411dd3698..6aece96df19f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -677,7 +677,7 @@ static int __init fat_init_inodecache(void) | |||
677 | fat_inode_cachep = kmem_cache_create("fat_inode_cache", | 677 | fat_inode_cachep = kmem_cache_create("fat_inode_cache", |
678 | sizeof(struct msdos_inode_info), | 678 | sizeof(struct msdos_inode_info), |
679 | 0, (SLAB_RECLAIM_ACCOUNT| | 679 | 0, (SLAB_RECLAIM_ACCOUNT| |
680 | SLAB_MEM_SPREAD), | 680 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
681 | init_once); | 681 | init_once); |
682 | if (fat_inode_cachep == NULL) | 682 | if (fat_inode_cachep == NULL) |
683 | return -ENOMEM; | 683 | return -ENOMEM; |
@@ -37,11 +37,12 @@ static void *alloc_fdmem(size_t size) | |||
37 | * vmalloc() if the allocation size will be considered "large" by the VM. | 37 | * vmalloc() if the allocation size will be considered "large" by the VM. |
38 | */ | 38 | */ |
39 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { | 39 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { |
40 | void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); | 40 | void *data = kmalloc(size, GFP_KERNEL_ACCOUNT | |
41 | __GFP_NOWARN | __GFP_NORETRY); | ||
41 | if (data != NULL) | 42 | if (data != NULL) |
42 | return data; | 43 | return data; |
43 | } | 44 | } |
44 | return vmalloc(size); | 45 | return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL); |
45 | } | 46 | } |
46 | 47 | ||
47 | static void __free_fdtable(struct fdtable *fdt) | 48 | static void __free_fdtable(struct fdtable *fdt) |
@@ -126,7 +127,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
126 | if (unlikely(nr > sysctl_nr_open)) | 127 | if (unlikely(nr > sysctl_nr_open)) |
127 | nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; | 128 | nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; |
128 | 129 | ||
129 | fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); | 130 | fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT); |
130 | if (!fdt) | 131 | if (!fdt) |
131 | goto out; | 132 | goto out; |
132 | fdt->max_fds = nr; | 133 | fdt->max_fds = nr; |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2913db2a5b99..4d69d5c0bedc 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -1255,8 +1255,8 @@ static int __init fuse_fs_init(void) | |||
1255 | int err; | 1255 | int err; |
1256 | 1256 | ||
1257 | fuse_inode_cachep = kmem_cache_create("fuse_inode", | 1257 | fuse_inode_cachep = kmem_cache_create("fuse_inode", |
1258 | sizeof(struct fuse_inode), | 1258 | sizeof(struct fuse_inode), 0, |
1259 | 0, SLAB_HWCACHE_ALIGN, | 1259 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, |
1260 | fuse_inode_init_once); | 1260 | fuse_inode_init_once); |
1261 | err = -ENOMEM; | 1261 | err = -ENOMEM; |
1262 | if (!fuse_inode_cachep) | 1262 | if (!fuse_inode_cachep) |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 1d709d496364..f99f8e94de3f 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -114,7 +114,8 @@ static int __init init_gfs2_fs(void) | |||
114 | gfs2_inode_cachep = kmem_cache_create("gfs2_inode", | 114 | gfs2_inode_cachep = kmem_cache_create("gfs2_inode", |
115 | sizeof(struct gfs2_inode), | 115 | sizeof(struct gfs2_inode), |
116 | 0, SLAB_RECLAIM_ACCOUNT| | 116 | 0, SLAB_RECLAIM_ACCOUNT| |
117 | SLAB_MEM_SPREAD, | 117 | SLAB_MEM_SPREAD| |
118 | SLAB_ACCOUNT, | ||
118 | gfs2_init_inode_once); | 119 | gfs2_init_inode_once); |
119 | if (!gfs2_inode_cachep) | 120 | if (!gfs2_inode_cachep) |
120 | goto fail; | 121 | goto fail; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4574fdd3d421..1ca95c232bb5 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -483,8 +483,8 @@ static int __init init_hfs_fs(void) | |||
483 | int err; | 483 | int err; |
484 | 484 | ||
485 | hfs_inode_cachep = kmem_cache_create("hfs_inode_cache", | 485 | hfs_inode_cachep = kmem_cache_create("hfs_inode_cache", |
486 | sizeof(struct hfs_inode_info), 0, SLAB_HWCACHE_ALIGN, | 486 | sizeof(struct hfs_inode_info), 0, |
487 | hfs_init_once); | 487 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, hfs_init_once); |
488 | if (!hfs_inode_cachep) | 488 | if (!hfs_inode_cachep) |
489 | return -ENOMEM; | 489 | return -ENOMEM; |
490 | err = register_filesystem(&hfs_fs_type); | 490 | err = register_filesystem(&hfs_fs_type); |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 7302d96ae8bf..5d54490a136d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -663,7 +663,7 @@ static int __init init_hfsplus_fs(void) | |||
663 | int err; | 663 | int err; |
664 | 664 | ||
665 | hfsplus_inode_cachep = kmem_cache_create("hfsplus_icache", | 665 | hfsplus_inode_cachep = kmem_cache_create("hfsplus_icache", |
666 | HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN, | 666 | HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, |
667 | hfsplus_init_once); | 667 | hfsplus_init_once); |
668 | if (!hfsplus_inode_cachep) | 668 | if (!hfsplus_inode_cachep) |
669 | return -ENOMEM; | 669 | return -ENOMEM; |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index f49be23e78aa..cfaa18c7a337 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -223,7 +223,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) | |||
223 | { | 223 | { |
224 | struct hostfs_inode_info *hi; | 224 | struct hostfs_inode_info *hi; |
225 | 225 | ||
226 | hi = kmalloc(sizeof(*hi), GFP_KERNEL); | 226 | hi = kmalloc(sizeof(*hi), GFP_KERNEL_ACCOUNT); |
227 | if (hi == NULL) | 227 | if (hi == NULL) |
228 | return NULL; | 228 | return NULL; |
229 | hi->fd = -1; | 229 | hi->fd = -1; |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a561591896bd..458cf463047b 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -261,7 +261,7 @@ static int init_inodecache(void) | |||
261 | hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache", | 261 | hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache", |
262 | sizeof(struct hpfs_inode_info), | 262 | sizeof(struct hpfs_inode_info), |
263 | 0, (SLAB_RECLAIM_ACCOUNT| | 263 | 0, (SLAB_RECLAIM_ACCOUNT| |
264 | SLAB_MEM_SPREAD), | 264 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
265 | init_once); | 265 | init_once); |
266 | if (hpfs_inode_cachep == NULL) | 266 | if (hpfs_inode_cachep == NULL) |
267 | return -ENOMEM; | 267 | return -ENOMEM; |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d8f51ee8126b..47789292a582 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -4,11 +4,11 @@ | |||
4 | * Nadia Yvette Chambers, 2002 | 4 | * Nadia Yvette Chambers, 2002 |
5 | * | 5 | * |
6 | * Copyright (C) 2002 Linus Torvalds. | 6 | * Copyright (C) 2002 Linus Torvalds. |
7 | * License: GPL | ||
7 | */ | 8 | */ |
8 | 9 | ||
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | 11 | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/thread_info.h> | 12 | #include <linux/thread_info.h> |
13 | #include <asm/current.h> | 13 | #include <asm/current.h> |
14 | #include <linux/sched.h> /* remove ASAP */ | 14 | #include <linux/sched.h> /* remove ASAP */ |
@@ -738,7 +738,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
738 | /* | 738 | /* |
739 | * The policy is initialized here even if we are creating a | 739 | * The policy is initialized here even if we are creating a |
740 | * private inode because initialization simply creates an | 740 | * private inode because initialization simply creates an |
741 | * an empty rb tree and calls spin_lock_init(), later when we | 741 | * an empty rb tree and calls rwlock_init(), later when we |
742 | * call mpol_free_shared_policy() it will just return because | 742 | * call mpol_free_shared_policy() it will just return because |
743 | * the rb tree will still be empty. | 743 | * the rb tree will still be empty. |
744 | */ | 744 | */ |
@@ -1202,7 +1202,6 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
1202 | .mount = hugetlbfs_mount, | 1202 | .mount = hugetlbfs_mount, |
1203 | .kill_sb = kill_litter_super, | 1203 | .kill_sb = kill_litter_super, |
1204 | }; | 1204 | }; |
1205 | MODULE_ALIAS_FS("hugetlbfs"); | ||
1206 | 1205 | ||
1207 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; | 1206 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; |
1208 | 1207 | ||
@@ -1322,7 +1321,7 @@ static int __init init_hugetlbfs_fs(void) | |||
1322 | error = -ENOMEM; | 1321 | error = -ENOMEM; |
1323 | hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", | 1322 | hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", |
1324 | sizeof(struct hugetlbfs_inode_info), | 1323 | sizeof(struct hugetlbfs_inode_info), |
1325 | 0, 0, init_once); | 1324 | 0, SLAB_ACCOUNT, init_once); |
1326 | if (hugetlbfs_inode_cachep == NULL) | 1325 | if (hugetlbfs_inode_cachep == NULL) |
1327 | goto out2; | 1326 | goto out2; |
1328 | 1327 | ||
@@ -1356,26 +1355,4 @@ static int __init init_hugetlbfs_fs(void) | |||
1356 | out2: | 1355 | out2: |
1357 | return error; | 1356 | return error; |
1358 | } | 1357 | } |
1359 | 1358 | fs_initcall(init_hugetlbfs_fs) | |
1360 | static void __exit exit_hugetlbfs_fs(void) | ||
1361 | { | ||
1362 | struct hstate *h; | ||
1363 | int i; | ||
1364 | |||
1365 | |||
1366 | /* | ||
1367 | * Make sure all delayed rcu free inodes are flushed before we | ||
1368 | * destroy cache. | ||
1369 | */ | ||
1370 | rcu_barrier(); | ||
1371 | kmem_cache_destroy(hugetlbfs_inode_cachep); | ||
1372 | i = 0; | ||
1373 | for_each_hstate(h) | ||
1374 | kern_unmount(hugetlbfs_vfsmount[i++]); | ||
1375 | unregister_filesystem(&hugetlbfs_fs_type); | ||
1376 | } | ||
1377 | |||
1378 | module_init(init_hugetlbfs_fs) | ||
1379 | module_exit(exit_hugetlbfs_fs) | ||
1380 | |||
1381 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/inode.c b/fs/inode.c index 4230f66b7410..e491e54d2430 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1883,7 +1883,7 @@ void __init inode_init(void) | |||
1883 | sizeof(struct inode), | 1883 | sizeof(struct inode), |
1884 | 0, | 1884 | 0, |
1885 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| | 1885 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| |
1886 | SLAB_MEM_SPREAD), | 1886 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1887 | init_once); | 1887 | init_once); |
1888 | 1888 | ||
1889 | /* Hash may have been set up in inode_init_early */ | 1889 | /* Hash may have been set up in inode_init_early */ |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 61abdc4920da..bcd2d41b318a 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -94,7 +94,7 @@ static int __init init_inodecache(void) | |||
94 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", | 94 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", |
95 | sizeof(struct iso_inode_info), | 95 | sizeof(struct iso_inode_info), |
96 | 0, (SLAB_RECLAIM_ACCOUNT| | 96 | 0, (SLAB_RECLAIM_ACCOUNT| |
97 | SLAB_MEM_SPREAD), | 97 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
98 | init_once); | 98 | init_once); |
99 | if (isofs_inode_cachep == NULL) | 99 | if (isofs_inode_cachep == NULL) |
100 | return -ENOMEM; | 100 | return -ENOMEM; |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d86c5e3176a1..bb080c272149 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -387,7 +387,7 @@ static int __init init_jffs2_fs(void) | |||
387 | jffs2_inode_cachep = kmem_cache_create("jffs2_i", | 387 | jffs2_inode_cachep = kmem_cache_create("jffs2_i", |
388 | sizeof(struct jffs2_inode_info), | 388 | sizeof(struct jffs2_inode_info), |
389 | 0, (SLAB_RECLAIM_ACCOUNT| | 389 | 0, (SLAB_RECLAIM_ACCOUNT| |
390 | SLAB_MEM_SPREAD), | 390 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
391 | jffs2_i_init_once); | 391 | jffs2_i_init_once); |
392 | if (!jffs2_inode_cachep) { | 392 | if (!jffs2_inode_cachep) { |
393 | pr_err("error: Failed to initialise inode cache\n"); | 393 | pr_err("error: Failed to initialise inode cache\n"); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 8f9176caf098..900925b5eb8c 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -898,7 +898,7 @@ static int __init init_jfs_fs(void) | |||
898 | 898 | ||
899 | jfs_inode_cachep = | 899 | jfs_inode_cachep = |
900 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, | 900 | kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, |
901 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 901 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, |
902 | init_once); | 902 | init_once); |
903 | if (jfs_inode_cachep == NULL) | 903 | if (jfs_inode_cachep == NULL) |
904 | return -ENOMEM; | 904 | return -ENOMEM; |
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 742bf4a230e8..821973853340 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c | |||
@@ -541,14 +541,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | |||
541 | if (!kn) | 541 | if (!kn) |
542 | goto err_out1; | 542 | goto err_out1; |
543 | 543 | ||
544 | /* | 544 | ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); |
545 | * If the ino of the sysfs entry created for a kmem cache gets | ||
546 | * allocated from an ida layer, which is accounted to the memcg that | ||
547 | * owns the cache, the memcg will get pinned forever. So do not account | ||
548 | * ino ida allocations. | ||
549 | */ | ||
550 | ret = ida_simple_get(&root->ino_ida, 1, 0, | ||
551 | GFP_KERNEL | __GFP_NOACCOUNT); | ||
552 | if (ret < 0) | 545 | if (ret < 0) |
553 | goto err_out2; | 546 | goto err_out2; |
554 | kn->ino = ret; | 547 | kn->ino = ret; |
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig index 09ed066c0221..2b4503163930 100644 --- a/fs/logfs/Kconfig +++ b/fs/logfs/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config LOGFS | 1 | config LOGFS |
2 | tristate "LogFS file system" | 2 | tristate "LogFS file system" |
3 | depends on (MTD || BLOCK) | 3 | depends on MTD || (!MTD && BLOCK) |
4 | select ZLIB_INFLATE | 4 | select ZLIB_INFLATE |
5 | select ZLIB_DEFLATE | 5 | select ZLIB_DEFLATE |
6 | select CRC32 | 6 | select CRC32 |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 0fce46d62b9c..db9cfc598883 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -409,7 +409,8 @@ const struct super_operations logfs_super_operations = { | |||
409 | int logfs_init_inode_cache(void) | 409 | int logfs_init_inode_cache(void) |
410 | { | 410 | { |
411 | logfs_inode_cache = kmem_cache_create("logfs_inode_cache", | 411 | logfs_inode_cache = kmem_cache_create("logfs_inode_cache", |
412 | sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT, | 412 | sizeof(struct logfs_inode), 0, |
413 | SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, | ||
413 | logfs_init_once); | 414 | logfs_init_once); |
414 | if (!logfs_inode_cache) | 415 | if (!logfs_inode_cache) |
415 | return -ENOMEM; | 416 | return -ENOMEM; |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index cb1789ca1ee6..f975d667c539 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -91,7 +91,7 @@ static int __init init_inodecache(void) | |||
91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", | 91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", |
92 | sizeof(struct minix_inode_info), | 92 | sizeof(struct minix_inode_info), |
93 | 0, (SLAB_RECLAIM_ACCOUNT| | 93 | 0, (SLAB_RECLAIM_ACCOUNT| |
94 | SLAB_MEM_SPREAD), | 94 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
95 | init_once); | 95 | init_once); |
96 | if (minix_inode_cachep == NULL) | 96 | if (minix_inode_cachep == NULL) |
97 | return -ENOMEM; | 97 | return -ENOMEM; |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index ce1eb3f9dfe8..1af15fcbe57b 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -82,7 +82,7 @@ static int init_inodecache(void) | |||
82 | ncp_inode_cachep = kmem_cache_create("ncp_inode_cache", | 82 | ncp_inode_cachep = kmem_cache_create("ncp_inode_cache", |
83 | sizeof(struct ncp_inode_info), | 83 | sizeof(struct ncp_inode_info), |
84 | 0, (SLAB_RECLAIM_ACCOUNT| | 84 | 0, (SLAB_RECLAIM_ACCOUNT| |
85 | SLAB_MEM_SPREAD), | 85 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
86 | init_once); | 86 | init_once); |
87 | if (ncp_inode_cachep == NULL) | 87 | if (ncp_inode_cachep == NULL) |
88 | return -ENOMEM; | 88 | return -ENOMEM; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c11e855e0e18..8e24d886d2c5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1969,7 +1969,7 @@ static int __init nfs_init_inodecache(void) | |||
1969 | nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", | 1969 | nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", |
1970 | sizeof(struct nfs_inode), | 1970 | sizeof(struct nfs_inode), |
1971 | 0, (SLAB_RECLAIM_ACCOUNT| | 1971 | 0, (SLAB_RECLAIM_ACCOUNT| |
1972 | SLAB_MEM_SPREAD), | 1972 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1973 | init_once); | 1973 | init_once); |
1974 | if (nfs_inode_cachep == NULL) | 1974 | if (nfs_inode_cachep == NULL) |
1975 | return -ENOMEM; | 1975 | return -ENOMEM; |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7343844e6b6..7f5d3d9f1c37 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -1416,7 +1416,8 @@ static int __init nilfs_init_cachep(void) | |||
1416 | { | 1416 | { |
1417 | nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", | 1417 | nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", |
1418 | sizeof(struct nilfs_inode_info), 0, | 1418 | sizeof(struct nilfs_inode_info), 0, |
1419 | SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once); | 1419 | SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, |
1420 | nilfs_inode_init_once); | ||
1420 | if (!nilfs_inode_cachep) | 1421 | if (!nilfs_inode_cachep) |
1421 | goto fail; | 1422 | goto fail; |
1422 | 1423 | ||
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index e785fd954c30..741077deef3b 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -199,8 +199,7 @@ void fsnotify_unmount_inodes(struct super_block *sb) | |||
199 | break; | 199 | break; |
200 | } | 200 | } |
201 | spin_unlock(&next_i->i_lock); | 201 | spin_unlock(&next_i->i_lock); |
202 | next_i = list_entry(next_i->i_sb_list.next, | 202 | next_i = list_next_entry(next_i, i_sb_list); |
203 | struct inode, i_sb_list); | ||
204 | } | 203 | } |
205 | 204 | ||
206 | /* | 205 | /* |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index fc0df4442f7b..cfcbf114676e 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -92,9 +92,6 @@ | |||
92 | #include "fsnotify.h" | 92 | #include "fsnotify.h" |
93 | 93 | ||
94 | struct srcu_struct fsnotify_mark_srcu; | 94 | struct srcu_struct fsnotify_mark_srcu; |
95 | static DEFINE_SPINLOCK(destroy_lock); | ||
96 | static LIST_HEAD(destroy_list); | ||
97 | static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); | ||
98 | 95 | ||
99 | void fsnotify_get_mark(struct fsnotify_mark *mark) | 96 | void fsnotify_get_mark(struct fsnotify_mark *mark) |
100 | { | 97 | { |
@@ -168,10 +165,19 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) | |||
168 | atomic_dec(&group->num_marks); | 165 | atomic_dec(&group->num_marks); |
169 | } | 166 | } |
170 | 167 | ||
168 | static void | ||
169 | fsnotify_mark_free_rcu(struct rcu_head *rcu) | ||
170 | { | ||
171 | struct fsnotify_mark *mark; | ||
172 | |||
173 | mark = container_of(rcu, struct fsnotify_mark, g_rcu); | ||
174 | fsnotify_put_mark(mark); | ||
175 | } | ||
176 | |||
171 | /* | 177 | /* |
172 | * Free fsnotify mark. The freeing is actually happening from a kthread which | 178 | * Free fsnotify mark. The freeing is actually happening from a call_srcu |
173 | * first waits for srcu period end. Caller must have a reference to the mark | 179 | * callback. Caller must have a reference to the mark or be protected by |
174 | * or be protected by fsnotify_mark_srcu. | 180 | * fsnotify_mark_srcu. |
175 | */ | 181 | */ |
176 | void fsnotify_free_mark(struct fsnotify_mark *mark) | 182 | void fsnotify_free_mark(struct fsnotify_mark *mark) |
177 | { | 183 | { |
@@ -186,10 +192,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) | |||
186 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | 192 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; |
187 | spin_unlock(&mark->lock); | 193 | spin_unlock(&mark->lock); |
188 | 194 | ||
189 | spin_lock(&destroy_lock); | 195 | call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); |
190 | list_add(&mark->g_list, &destroy_list); | ||
191 | spin_unlock(&destroy_lock); | ||
192 | wake_up(&destroy_waitq); | ||
193 | 196 | ||
194 | /* | 197 | /* |
195 | * Some groups like to know that marks are being freed. This is a | 198 | * Some groups like to know that marks are being freed. This is a |
@@ -385,11 +388,7 @@ err: | |||
385 | 388 | ||
386 | spin_unlock(&mark->lock); | 389 | spin_unlock(&mark->lock); |
387 | 390 | ||
388 | spin_lock(&destroy_lock); | 391 | call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); |
389 | list_add(&mark->g_list, &destroy_list); | ||
390 | spin_unlock(&destroy_lock); | ||
391 | wake_up(&destroy_waitq); | ||
392 | |||
393 | return ret; | 392 | return ret; |
394 | } | 393 | } |
395 | 394 | ||
@@ -492,40 +491,3 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, | |||
492 | atomic_set(&mark->refcnt, 1); | 491 | atomic_set(&mark->refcnt, 1); |
493 | mark->free_mark = free_mark; | 492 | mark->free_mark = free_mark; |
494 | } | 493 | } |
495 | |||
496 | static int fsnotify_mark_destroy(void *ignored) | ||
497 | { | ||
498 | struct fsnotify_mark *mark, *next; | ||
499 | struct list_head private_destroy_list; | ||
500 | |||
501 | for (;;) { | ||
502 | spin_lock(&destroy_lock); | ||
503 | /* exchange the list head */ | ||
504 | list_replace_init(&destroy_list, &private_destroy_list); | ||
505 | spin_unlock(&destroy_lock); | ||
506 | |||
507 | synchronize_srcu(&fsnotify_mark_srcu); | ||
508 | |||
509 | list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { | ||
510 | list_del_init(&mark->g_list); | ||
511 | fsnotify_put_mark(mark); | ||
512 | } | ||
513 | |||
514 | wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); | ||
515 | } | ||
516 | |||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static int __init fsnotify_mark_init(void) | ||
521 | { | ||
522 | struct task_struct *thread; | ||
523 | |||
524 | thread = kthread_run(fsnotify_mark_destroy, NULL, | ||
525 | "fsnotify_mark"); | ||
526 | if (IS_ERR(thread)) | ||
527 | panic("unable to start fsnotify mark destruction thread."); | ||
528 | |||
529 | return 0; | ||
530 | } | ||
531 | device_initcall(fsnotify_mark_init); | ||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index d1a853585b53..2f77f8dfb861 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -3139,8 +3139,8 @@ static int __init init_ntfs_fs(void) | |||
3139 | 3139 | ||
3140 | ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, | 3140 | ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, |
3141 | sizeof(big_ntfs_inode), 0, | 3141 | sizeof(big_ntfs_inode), 0, |
3142 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 3142 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| |
3143 | ntfs_big_inode_init_once); | 3143 | SLAB_ACCOUNT, ntfs_big_inode_init_once); |
3144 | if (!ntfs_big_inode_cache) { | 3144 | if (!ntfs_big_inode_cache) { |
3145 | pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name); | 3145 | pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name); |
3146 | goto big_inode_err_out; | 3146 | goto big_inode_err_out; |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 86181d6526dc..a3ded88718c9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -164,7 +164,7 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, | |||
164 | struct ocfs2_extent_rec *rec); | 164 | struct ocfs2_extent_rec *rec); |
165 | static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); | 165 | static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); |
166 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); | 166 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); |
167 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { | 167 | static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { |
168 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, | 168 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, |
169 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, | 169 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, |
170 | .eo_update_clusters = ocfs2_dinode_update_clusters, | 170 | .eo_update_clusters = ocfs2_dinode_update_clusters, |
@@ -286,7 +286,7 @@ static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et, | |||
286 | le32_add_cpu(&vb->vb_xv->xr_clusters, clusters); | 286 | le32_add_cpu(&vb->vb_xv->xr_clusters, clusters); |
287 | } | 287 | } |
288 | 288 | ||
289 | static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { | 289 | static const struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { |
290 | .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk, | 290 | .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk, |
291 | .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk, | 291 | .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk, |
292 | .eo_update_clusters = ocfs2_xattr_value_update_clusters, | 292 | .eo_update_clusters = ocfs2_xattr_value_update_clusters, |
@@ -332,7 +332,7 @@ static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et, | |||
332 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters); | 332 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters); |
333 | } | 333 | } |
334 | 334 | ||
335 | static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { | 335 | static const struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { |
336 | .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk, | 336 | .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk, |
337 | .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk, | 337 | .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk, |
338 | .eo_update_clusters = ocfs2_xattr_tree_update_clusters, | 338 | .eo_update_clusters = ocfs2_xattr_tree_update_clusters, |
@@ -379,7 +379,7 @@ static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et) | |||
379 | et->et_root_el = &dx_root->dr_list; | 379 | et->et_root_el = &dx_root->dr_list; |
380 | } | 380 | } |
381 | 381 | ||
382 | static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { | 382 | static const struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { |
383 | .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk, | 383 | .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk, |
384 | .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk, | 384 | .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk, |
385 | .eo_update_clusters = ocfs2_dx_root_update_clusters, | 385 | .eo_update_clusters = ocfs2_dx_root_update_clusters, |
@@ -425,7 +425,7 @@ ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et, | |||
425 | return CONTIG_NONE; | 425 | return CONTIG_NONE; |
426 | } | 426 | } |
427 | 427 | ||
428 | static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = { | 428 | static const struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = { |
429 | .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk, | 429 | .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk, |
430 | .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk, | 430 | .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk, |
431 | .eo_update_clusters = ocfs2_refcount_tree_update_clusters, | 431 | .eo_update_clusters = ocfs2_refcount_tree_update_clusters, |
@@ -438,7 +438,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | |||
438 | struct buffer_head *bh, | 438 | struct buffer_head *bh, |
439 | ocfs2_journal_access_func access, | 439 | ocfs2_journal_access_func access, |
440 | void *obj, | 440 | void *obj, |
441 | struct ocfs2_extent_tree_operations *ops) | 441 | const struct ocfs2_extent_tree_operations *ops) |
442 | { | 442 | { |
443 | et->et_ops = ops; | 443 | et->et_ops = ops; |
444 | et->et_root_bh = bh; | 444 | et->et_root_bh = bh; |
@@ -6174,8 +6174,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
6174 | } | 6174 | } |
6175 | 6175 | ||
6176 | bail: | 6176 | bail: |
6177 | if (tl_inode) | 6177 | iput(tl_inode); |
6178 | iput(tl_inode); | ||
6179 | brelse(tl_bh); | 6178 | brelse(tl_bh); |
6180 | 6179 | ||
6181 | if (status < 0) { | 6180 | if (status < 0) { |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index fb09b97db162..f3dc1b0dfffc 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -54,7 +54,7 @@ | |||
54 | */ | 54 | */ |
55 | struct ocfs2_extent_tree_operations; | 55 | struct ocfs2_extent_tree_operations; |
56 | struct ocfs2_extent_tree { | 56 | struct ocfs2_extent_tree { |
57 | struct ocfs2_extent_tree_operations *et_ops; | 57 | const struct ocfs2_extent_tree_operations *et_ops; |
58 | struct buffer_head *et_root_bh; | 58 | struct buffer_head *et_root_bh; |
59 | struct ocfs2_extent_list *et_root_el; | 59 | struct ocfs2_extent_list *et_root_el; |
60 | struct ocfs2_caching_info *et_ci; | 60 | struct ocfs2_caching_info *et_ci; |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 709fbbd44c65..a3cc6d2fc896 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1780,8 +1780,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, | |||
1780 | } | 1780 | } |
1781 | ++live_threshold; | 1781 | ++live_threshold; |
1782 | atomic_set(®->hr_steady_iterations, live_threshold); | 1782 | atomic_set(®->hr_steady_iterations, live_threshold); |
1783 | /* unsteady_iterations is double the steady_iterations */ | 1783 | /* unsteady_iterations is triple the steady_iterations */ |
1784 | atomic_set(®->hr_unsteady_iterations, (live_threshold << 1)); | 1784 | atomic_set(®->hr_unsteady_iterations, (live_threshold * 3)); |
1785 | 1785 | ||
1786 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", | 1786 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", |
1787 | reg->hr_item.ci_name); | 1787 | reg->hr_item.ci_name); |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e88ccf8c83ff..68c607e63ff6 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -376,17 +376,6 @@ struct dlm_lock | |||
376 | lksb_kernel_allocated:1; | 376 | lksb_kernel_allocated:1; |
377 | }; | 377 | }; |
378 | 378 | ||
379 | |||
380 | #define DLM_LKSB_UNUSED1 0x01 | ||
381 | #define DLM_LKSB_PUT_LVB 0x02 | ||
382 | #define DLM_LKSB_GET_LVB 0x04 | ||
383 | #define DLM_LKSB_UNUSED2 0x08 | ||
384 | #define DLM_LKSB_UNUSED3 0x10 | ||
385 | #define DLM_LKSB_UNUSED4 0x20 | ||
386 | #define DLM_LKSB_UNUSED5 0x40 | ||
387 | #define DLM_LKSB_UNUSED6 0x80 | ||
388 | |||
389 | |||
390 | enum dlm_lockres_list { | 379 | enum dlm_lockres_list { |
391 | DLM_GRANTED_LIST = 0, | 380 | DLM_GRANTED_LIST = 0, |
392 | DLM_CONVERTING_LIST = 1, | 381 | DLM_CONVERTING_LIST = 1, |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84f2f8079466..9477d6e1de37 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2388,8 +2388,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
2388 | 2388 | ||
2389 | spin_lock(&res->spinlock); | 2389 | spin_lock(&res->spinlock); |
2390 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | 2390 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); |
2391 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); | ||
2391 | if (test_bit(node, res->refmap)) { | 2392 | if (test_bit(node, res->refmap)) { |
2392 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); | ||
2393 | dlm_lockres_clear_refmap_bit(dlm, res, node); | 2393 | dlm_lockres_clear_refmap_bit(dlm, res, node); |
2394 | cleared = 1; | 2394 | cleared = 1; |
2395 | } | 2395 | } |
@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2519 | spin_lock(&dlm->master_lock); | 2519 | spin_lock(&dlm->master_lock); |
2520 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2520 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
2521 | namelen, target, dlm->node_num); | 2521 | namelen, target, dlm->node_num); |
2522 | /* get an extra reference on the mle. | ||
2523 | * otherwise the assert_master from the new | ||
2524 | * master will destroy this. | ||
2525 | */ | ||
2526 | dlm_get_mle_inuse(mle); | ||
2522 | spin_unlock(&dlm->master_lock); | 2527 | spin_unlock(&dlm->master_lock); |
2523 | spin_unlock(&dlm->spinlock); | 2528 | spin_unlock(&dlm->spinlock); |
2524 | 2529 | ||
@@ -2544,7 +2549,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2544 | } | 2549 | } |
2545 | 2550 | ||
2546 | fail: | 2551 | fail: |
2547 | if (oldmle) { | 2552 | if (ret != -EEXIST && oldmle) { |
2548 | /* master is known, detach if not already detached */ | 2553 | /* master is known, detach if not already detached */ |
2549 | dlm_mle_detach_hb_events(dlm, oldmle); | 2554 | dlm_mle_detach_hb_events(dlm, oldmle); |
2550 | dlm_put_mle(oldmle); | 2555 | dlm_put_mle(oldmle); |
@@ -2554,6 +2559,7 @@ fail: | |||
2554 | if (mle_added) { | 2559 | if (mle_added) { |
2555 | dlm_mle_detach_hb_events(dlm, mle); | 2560 | dlm_mle_detach_hb_events(dlm, mle); |
2556 | dlm_put_mle(mle); | 2561 | dlm_put_mle(mle); |
2562 | dlm_put_mle_inuse(mle); | ||
2557 | } else if (mle) { | 2563 | } else if (mle) { |
2558 | kmem_cache_free(dlm_mle_cache, mle); | 2564 | kmem_cache_free(dlm_mle_cache, mle); |
2559 | mle = NULL; | 2565 | mle = NULL; |
@@ -2571,17 +2577,6 @@ fail: | |||
2571 | * ensure that all assert_master work is flushed. */ | 2577 | * ensure that all assert_master work is flushed. */ |
2572 | flush_workqueue(dlm->dlm_worker); | 2578 | flush_workqueue(dlm->dlm_worker); |
2573 | 2579 | ||
2574 | /* get an extra reference on the mle. | ||
2575 | * otherwise the assert_master from the new | ||
2576 | * master will destroy this. | ||
2577 | * also, make sure that all callers of dlm_get_mle | ||
2578 | * take both dlm->spinlock and dlm->master_lock */ | ||
2579 | spin_lock(&dlm->spinlock); | ||
2580 | spin_lock(&dlm->master_lock); | ||
2581 | dlm_get_mle_inuse(mle); | ||
2582 | spin_unlock(&dlm->master_lock); | ||
2583 | spin_unlock(&dlm->spinlock); | ||
2584 | |||
2585 | /* notify new node and send all lock state */ | 2580 | /* notify new node and send all lock state */ |
2586 | /* call send_one_lockres with migration flag. | 2581 | /* call send_one_lockres with migration flag. |
2587 | * this serves as notice to the target node that a | 2582 | * this serves as notice to the target node that a |
@@ -3050,7 +3045,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3050 | int ret = 0; | 3045 | int ret = 0; |
3051 | 3046 | ||
3052 | if (!dlm_grab(dlm)) | 3047 | if (!dlm_grab(dlm)) |
3053 | return -EINVAL; | 3048 | return 0; |
3054 | 3049 | ||
3055 | name = migrate->name; | 3050 | name = migrate->name; |
3056 | namelen = migrate->namelen; | 3051 | namelen = migrate->namelen; |
@@ -3141,7 +3136,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
3141 | mlog(0, "tried to migrate %.*s, but some " | 3136 | mlog(0, "tried to migrate %.*s, but some " |
3142 | "process beat me to it\n", | 3137 | "process beat me to it\n", |
3143 | namelen, name); | 3138 | namelen, name); |
3144 | ret = -EEXIST; | 3139 | spin_unlock(&tmp->spinlock); |
3140 | return -EEXIST; | ||
3145 | } else { | 3141 | } else { |
3146 | /* bad. 2 NODES are trying to migrate! */ | 3142 | /* bad. 2 NODES are trying to migrate! */ |
3147 | mlog(ML_ERROR, "migration error mle: " | 3143 | mlog(ML_ERROR, "migration error mle: " |
@@ -3312,6 +3308,15 @@ top: | |||
3312 | mle->new_master != dead_node) | 3308 | mle->new_master != dead_node) |
3313 | continue; | 3309 | continue; |
3314 | 3310 | ||
3311 | if (mle->new_master == dead_node && mle->inuse) { | ||
3312 | mlog(ML_NOTICE, "%s: target %u died during " | ||
3313 | "migration from %u, the MLE is " | ||
3314 | "still keep used, ignore it!\n", | ||
3315 | dlm->name, dead_node, | ||
3316 | mle->master); | ||
3317 | continue; | ||
3318 | } | ||
3319 | |||
3315 | /* If we have reached this point, this mle needs to be | 3320 | /* If we have reached this point, this mle needs to be |
3316 | * removed from the list and freed. */ | 3321 | * removed from the list and freed. */ |
3317 | dlm_clean_migration_mle(dlm, mle); | 3322 | dlm_clean_migration_mle(dlm, mle); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9e4f862d20fe..c5bdf02c213b 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1373,6 +1373,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1373 | char *buf = NULL; | 1373 | char *buf = NULL; |
1374 | struct dlm_work_item *item = NULL; | 1374 | struct dlm_work_item *item = NULL; |
1375 | struct dlm_lock_resource *res = NULL; | 1375 | struct dlm_lock_resource *res = NULL; |
1376 | unsigned int hash; | ||
1376 | 1377 | ||
1377 | if (!dlm_grab(dlm)) | 1378 | if (!dlm_grab(dlm)) |
1378 | return -EINVAL; | 1379 | return -EINVAL; |
@@ -1400,7 +1401,10 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1400 | /* lookup the lock to see if we have a secondary queue for this | 1401 | /* lookup the lock to see if we have a secondary queue for this |
1401 | * already... just add the locks in and this will have its owner | 1402 | * already... just add the locks in and this will have its owner |
1402 | * and RECOVERY flag changed when it completes. */ | 1403 | * and RECOVERY flag changed when it completes. */ |
1403 | res = dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len); | 1404 | hash = dlm_lockid_hash(mres->lockname, mres->lockname_len); |
1405 | spin_lock(&dlm->spinlock); | ||
1406 | res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len, | ||
1407 | hash); | ||
1404 | if (res) { | 1408 | if (res) { |
1405 | /* this will get a ref on res */ | 1409 | /* this will get a ref on res */ |
1406 | /* mark it as recovering/migrating and hash it */ | 1410 | /* mark it as recovering/migrating and hash it */ |
@@ -1421,13 +1425,16 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1421 | mres->lockname_len, mres->lockname); | 1425 | mres->lockname_len, mres->lockname); |
1422 | ret = -EFAULT; | 1426 | ret = -EFAULT; |
1423 | spin_unlock(&res->spinlock); | 1427 | spin_unlock(&res->spinlock); |
1428 | spin_unlock(&dlm->spinlock); | ||
1424 | dlm_lockres_put(res); | 1429 | dlm_lockres_put(res); |
1425 | goto leave; | 1430 | goto leave; |
1426 | } | 1431 | } |
1427 | res->state |= DLM_LOCK_RES_MIGRATING; | 1432 | res->state |= DLM_LOCK_RES_MIGRATING; |
1428 | } | 1433 | } |
1429 | spin_unlock(&res->spinlock); | 1434 | spin_unlock(&res->spinlock); |
1435 | spin_unlock(&dlm->spinlock); | ||
1430 | } else { | 1436 | } else { |
1437 | spin_unlock(&dlm->spinlock); | ||
1431 | /* need to allocate, just like if it was | 1438 | /* need to allocate, just like if it was |
1432 | * mastered here normally */ | 1439 | * mastered here normally */ |
1433 | res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len); | 1440 | res = dlm_new_lockres(dlm, mres->lockname, mres->lockname_len); |
@@ -2450,11 +2457,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
2450 | * perhaps later we can genericize this for other waiters. */ | 2457 | * perhaps later we can genericize this for other waiters. */ |
2451 | wake_up(&dlm->migration_wq); | 2458 | wake_up(&dlm->migration_wq); |
2452 | 2459 | ||
2453 | if (test_bit(idx, dlm->recovery_map)) | 2460 | set_bit(idx, dlm->recovery_map); |
2454 | mlog(0, "domain %s, node %u already added " | ||
2455 | "to recovery map!\n", dlm->name, idx); | ||
2456 | else | ||
2457 | set_bit(idx, dlm->recovery_map); | ||
2458 | } | 2461 | } |
2459 | 2462 | ||
2460 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) | 2463 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data) |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 2e3c9dbab68c..1082b2c3014b 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -421,7 +421,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, | |||
421 | } | 421 | } |
422 | 422 | ||
423 | if (!dlm_grab(dlm)) | 423 | if (!dlm_grab(dlm)) |
424 | return DLM_REJECTED; | 424 | return DLM_FORWARD; |
425 | 425 | ||
426 | mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), | 426 | mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), |
427 | "Domain %s not fully joined!\n", dlm->name); | 427 | "Domain %s not fully joined!\n", dlm->name); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b5cf27dcb18a..03768bb3aab1 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -638,7 +638,7 @@ static int __init init_dlmfs_fs(void) | |||
638 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | 638 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", |
639 | sizeof(struct dlmfs_inode_private), | 639 | sizeof(struct dlmfs_inode_private), |
640 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 640 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
641 | SLAB_MEM_SPREAD), | 641 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
642 | dlmfs_init_once); | 642 | dlmfs_init_once); |
643 | if (!dlmfs_inode_cache) { | 643 | if (!dlmfs_inode_cache) { |
644 | status = -ENOMEM; | 644 | status = -ENOMEM; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 20276e340339..f92612e4b9d6 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2432,12 +2432,6 @@ bail: | |||
2432 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method | 2432 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
2433 | * that called us can bubble that back up into the VFS who will then | 2433 | * that called us can bubble that back up into the VFS who will then |
2434 | * immediately retry the aop call. | 2434 | * immediately retry the aop call. |
2435 | * | ||
2436 | * We do a blocking lock and immediate unlock before returning, though, so that | ||
2437 | * the lock has a great chance of being cached on this node by the time the VFS | ||
2438 | * calls back to retry the aop. This has a potential to livelock as nodes | ||
2439 | * ping locks back and forth, but that's a risk we're willing to take to avoid | ||
2440 | * the lock inversion simply. | ||
2441 | */ | 2435 | */ |
2442 | int ocfs2_inode_lock_with_page(struct inode *inode, | 2436 | int ocfs2_inode_lock_with_page(struct inode *inode, |
2443 | struct buffer_head **ret_bh, | 2437 | struct buffer_head **ret_bh, |
@@ -2449,8 +2443,6 @@ int ocfs2_inode_lock_with_page(struct inode *inode, | |||
2449 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2443 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
2450 | if (ret == -EAGAIN) { | 2444 | if (ret == -EAGAIN) { |
2451 | unlock_page(page); | 2445 | unlock_page(page); |
2452 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) | ||
2453 | ocfs2_inode_unlock(inode, ex); | ||
2454 | ret = AOP_TRUNCATED_PAGE; | 2446 | ret = AOP_TRUNCATED_PAGE; |
2455 | } | 2447 | } |
2456 | 2448 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0e5b4515f92e..d63127932509 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1302,6 +1302,14 @@ int ocfs2_getattr(struct vfsmount *mnt, | |||
1302 | } | 1302 | } |
1303 | 1303 | ||
1304 | generic_fillattr(inode, stat); | 1304 | generic_fillattr(inode, stat); |
1305 | /* | ||
1306 | * If there is inline data in the inode, the inode will normally not | ||
1307 | * have data blocks allocated (it may have an external xattr block). | ||
1308 | * Report at least one sector for such files, so tools like tar, rsync, | ||
1309 | * others don't incorrectly think the file is completely sparse. | ||
1310 | */ | ||
1311 | if (unlikely(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) | ||
1312 | stat->blocks += (stat->size + 511)>>9; | ||
1305 | 1313 | ||
1306 | /* We set the blksize from the cluster size for performance */ | 1314 | /* We set the blksize from the cluster size for performance */ |
1307 | stat->blksize = osb->s_clustersize; | 1315 | stat->blksize = osb->s_clustersize; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 3cb097ccce60..16b0bb482ea7 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -606,9 +606,7 @@ bail: | |||
606 | if (gb_inode) | 606 | if (gb_inode) |
607 | mutex_unlock(&gb_inode->i_mutex); | 607 | mutex_unlock(&gb_inode->i_mutex); |
608 | 608 | ||
609 | if (gb_inode) | 609 | iput(gb_inode); |
610 | iput(gb_inode); | ||
611 | |||
612 | brelse(bh); | 610 | brelse(bh); |
613 | 611 | ||
614 | return status; | 612 | return status; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 13534f4fe5b5..3772a2dbb980 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1042,8 +1042,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
1042 | 1042 | ||
1043 | // up_write(&journal->j_trans_barrier); | 1043 | // up_write(&journal->j_trans_barrier); |
1044 | done: | 1044 | done: |
1045 | if (inode) | 1045 | iput(inode); |
1046 | iput(inode); | ||
1047 | } | 1046 | } |
1048 | 1047 | ||
1049 | static void ocfs2_clear_journal_error(struct super_block *sb, | 1048 | static void ocfs2_clear_journal_error(struct super_block *sb, |
@@ -1687,9 +1686,7 @@ done: | |||
1687 | if (got_lock) | 1686 | if (got_lock) |
1688 | ocfs2_inode_unlock(inode, 1); | 1687 | ocfs2_inode_unlock(inode, 1); |
1689 | 1688 | ||
1690 | if (inode) | 1689 | iput(inode); |
1691 | iput(inode); | ||
1692 | |||
1693 | brelse(bh); | 1690 | brelse(bh); |
1694 | 1691 | ||
1695 | return status; | 1692 | return status; |
@@ -1796,8 +1793,7 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb, | |||
1796 | 1793 | ||
1797 | ocfs2_inode_unlock(inode, 1); | 1794 | ocfs2_inode_unlock(inode, 1); |
1798 | bail: | 1795 | bail: |
1799 | if (inode) | 1796 | iput(inode); |
1800 | iput(inode); | ||
1801 | 1797 | ||
1802 | return status; | 1798 | return status; |
1803 | } | 1799 | } |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0a4457fb0711..e9c99e35f5ea 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -358,8 +358,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
358 | bail: | 358 | bail: |
359 | if (status < 0) | 359 | if (status < 0) |
360 | brelse(alloc_bh); | 360 | brelse(alloc_bh); |
361 | if (inode) | 361 | iput(inode); |
362 | iput(inode); | ||
363 | 362 | ||
364 | trace_ocfs2_load_local_alloc(osb->local_alloc_bits); | 363 | trace_ocfs2_load_local_alloc(osb->local_alloc_bits); |
365 | 364 | ||
@@ -473,8 +472,7 @@ out_mutex: | |||
473 | iput(main_bm_inode); | 472 | iput(main_bm_inode); |
474 | 473 | ||
475 | out: | 474 | out: |
476 | if (local_alloc_inode) | 475 | iput(local_alloc_inode); |
477 | iput(local_alloc_inode); | ||
478 | 476 | ||
479 | kfree(alloc_copy); | 477 | kfree(alloc_copy); |
480 | } | 478 | } |
@@ -1327,9 +1325,7 @@ bail: | |||
1327 | 1325 | ||
1328 | brelse(main_bm_bh); | 1326 | brelse(main_bm_bh); |
1329 | 1327 | ||
1330 | if (main_bm_inode) | 1328 | iput(main_bm_inode); |
1331 | iput(main_bm_inode); | ||
1332 | |||
1333 | kfree(alloc_copy); | 1329 | kfree(alloc_copy); |
1334 | 1330 | ||
1335 | if (ac) | 1331 | if (ac) |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index afb81eae2c18..ab42c38031b1 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -1683,8 +1683,7 @@ bail: | |||
1683 | if (new_inode) | 1683 | if (new_inode) |
1684 | sync_mapping_buffers(old_inode->i_mapping); | 1684 | sync_mapping_buffers(old_inode->i_mapping); |
1685 | 1685 | ||
1686 | if (new_inode) | 1686 | iput(new_inode); |
1687 | iput(new_inode); | ||
1688 | 1687 | ||
1689 | ocfs2_free_dir_lookup_result(&target_lookup_res); | 1688 | ocfs2_free_dir_lookup_result(&target_lookup_res); |
1690 | ocfs2_free_dir_lookup_result(&old_entry_lookup); | 1689 | ocfs2_free_dir_lookup_result(&old_entry_lookup); |
@@ -2373,6 +2372,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2373 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2372 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
2374 | name, strlen(name)); | 2373 | name, strlen(name)); |
2375 | 2374 | ||
2375 | status = ocfs2_journal_access_di(handle, | ||
2376 | INODE_CACHE(orphan_dir_inode), | ||
2377 | orphan_dir_bh, | ||
2378 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2379 | if (status < 0) { | ||
2380 | mlog_errno(status); | ||
2381 | goto leave; | ||
2382 | } | ||
2383 | |||
2376 | /* find it's spot in the orphan directory */ | 2384 | /* find it's spot in the orphan directory */ |
2377 | status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, | 2385 | status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, |
2378 | &lookup); | 2386 | &lookup); |
@@ -2388,15 +2396,6 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2388 | goto leave; | 2396 | goto leave; |
2389 | } | 2397 | } |
2390 | 2398 | ||
2391 | status = ocfs2_journal_access_di(handle, | ||
2392 | INODE_CACHE(orphan_dir_inode), | ||
2393 | orphan_dir_bh, | ||
2394 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2395 | if (status < 0) { | ||
2396 | mlog_errno(status); | ||
2397 | goto leave; | ||
2398 | } | ||
2399 | |||
2400 | /* do the i_nlink dance! :) */ | 2399 | /* do the i_nlink dance! :) */ |
2401 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2400 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
2402 | if (S_ISDIR(inode->i_mode)) | 2401 | if (S_ISDIR(inode->i_mode)) |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index e78a203d44c8..1e09592148ad 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -322,8 +322,7 @@ static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) | |||
322 | if (si == NULL) | 322 | if (si == NULL) |
323 | return; | 323 | return; |
324 | 324 | ||
325 | if (si->si_inode) | 325 | iput(si->si_inode); |
326 | iput(si->si_inode); | ||
327 | if (si->si_bh) { | 326 | if (si->si_bh) { |
328 | for (i = 0; i < si->si_blocks; i++) { | 327 | for (i = 0; i < si->si_blocks; i++) { |
329 | if (si->si_bh[i]) { | 328 | if (si->si_bh[i]) { |
@@ -503,8 +502,17 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
503 | trace_ocfs2_find_slot(osb->slot_num); | 502 | trace_ocfs2_find_slot(osb->slot_num); |
504 | 503 | ||
505 | status = ocfs2_update_disk_slot(osb, si, osb->slot_num); | 504 | status = ocfs2_update_disk_slot(osb, si, osb->slot_num); |
506 | if (status < 0) | 505 | if (status < 0) { |
507 | mlog_errno(status); | 506 | mlog_errno(status); |
507 | /* | ||
508 | * if write block failed, invalidate slot to avoid overwrite | ||
509 | * slot during dismount in case another node rightly has mounted | ||
510 | */ | ||
511 | spin_lock(&osb->osb_lock); | ||
512 | ocfs2_invalidate_slot(si, osb->slot_num); | ||
513 | osb->slot_num = OCFS2_INVALID_SLOT; | ||
514 | spin_unlock(&osb->osb_lock); | ||
515 | } | ||
508 | 516 | ||
509 | bail: | 517 | bail: |
510 | return status; | 518 | return status; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2de4c8a9340c..faa1365097bc 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1280,6 +1280,8 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1280 | int status, user_stack = 0; | 1280 | int status, user_stack = 0; |
1281 | char *p; | 1281 | char *p; |
1282 | u32 tmp; | 1282 | u32 tmp; |
1283 | int token, option; | ||
1284 | substring_t args[MAX_OPT_ARGS]; | ||
1283 | 1285 | ||
1284 | trace_ocfs2_parse_options(is_remount, options ? options : "(none)"); | 1286 | trace_ocfs2_parse_options(is_remount, options ? options : "(none)"); |
1285 | 1287 | ||
@@ -1298,9 +1300,6 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1298 | } | 1300 | } |
1299 | 1301 | ||
1300 | while ((p = strsep(&options, ",")) != NULL) { | 1302 | while ((p = strsep(&options, ",")) != NULL) { |
1301 | int token, option; | ||
1302 | substring_t args[MAX_OPT_ARGS]; | ||
1303 | |||
1304 | if (!*p) | 1303 | if (!*p) |
1305 | continue; | 1304 | continue; |
1306 | 1305 | ||
@@ -1367,7 +1366,6 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1367 | mopt->atime_quantum = option; | 1366 | mopt->atime_quantum = option; |
1368 | break; | 1367 | break; |
1369 | case Opt_slot: | 1368 | case Opt_slot: |
1370 | option = 0; | ||
1371 | if (match_int(&args[0], &option)) { | 1369 | if (match_int(&args[0], &option)) { |
1372 | status = 0; | 1370 | status = 0; |
1373 | goto bail; | 1371 | goto bail; |
@@ -1376,7 +1374,6 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1376 | mopt->slot = (s16)option; | 1374 | mopt->slot = (s16)option; |
1377 | break; | 1375 | break; |
1378 | case Opt_commit: | 1376 | case Opt_commit: |
1379 | option = 0; | ||
1380 | if (match_int(&args[0], &option)) { | 1377 | if (match_int(&args[0], &option)) { |
1381 | status = 0; | 1378 | status = 0; |
1382 | goto bail; | 1379 | goto bail; |
@@ -1388,7 +1385,6 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1388 | mopt->commit_interval = HZ * option; | 1385 | mopt->commit_interval = HZ * option; |
1389 | break; | 1386 | break; |
1390 | case Opt_localalloc: | 1387 | case Opt_localalloc: |
1391 | option = 0; | ||
1392 | if (match_int(&args[0], &option)) { | 1388 | if (match_int(&args[0], &option)) { |
1393 | status = 0; | 1389 | status = 0; |
1394 | goto bail; | 1390 | goto bail; |
@@ -1726,8 +1722,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1726 | ocfs2_inode_unlock(inode, 0); | 1722 | ocfs2_inode_unlock(inode, 0); |
1727 | status = 0; | 1723 | status = 0; |
1728 | bail: | 1724 | bail: |
1729 | if (inode) | 1725 | iput(inode); |
1730 | iput(inode); | ||
1731 | 1726 | ||
1732 | if (status) | 1727 | if (status) |
1733 | mlog_errno(status); | 1728 | mlog_errno(status); |
@@ -1771,7 +1766,7 @@ static int ocfs2_initialize_mem_caches(void) | |||
1771 | sizeof(struct ocfs2_inode_info), | 1766 | sizeof(struct ocfs2_inode_info), |
1772 | 0, | 1767 | 0, |
1773 | (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 1768 | (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
1774 | SLAB_MEM_SPREAD), | 1769 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1775 | ocfs2_inode_init_once); | 1770 | ocfs2_inode_init_once); |
1776 | ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", | 1771 | ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", |
1777 | sizeof(struct ocfs2_dquot), | 1772 | sizeof(struct ocfs2_dquot), |
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 15e4500cda3e..b61b883c8ff8 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -443,7 +443,7 @@ static int __init init_openprom_fs(void) | |||
443 | sizeof(struct op_inode_info), | 443 | sizeof(struct op_inode_info), |
444 | 0, | 444 | 0, |
445 | (SLAB_RECLAIM_ACCOUNT | | 445 | (SLAB_RECLAIM_ACCOUNT | |
446 | SLAB_MEM_SPREAD), | 446 | SLAB_MEM_SPREAD | SLAB_ACCOUNT), |
447 | op_inode_init_once); | 447 | op_inode_init_once); |
448 | if (!op_inode_cachep) | 448 | if (!op_inode_cachep) |
449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d0e9b9b6223e..42305ddcbaa0 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -95,7 +95,8 @@ void __init proc_init_inodecache(void) | |||
95 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", | 95 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", |
96 | sizeof(struct proc_inode), | 96 | sizeof(struct proc_inode), |
97 | 0, (SLAB_RECLAIM_ACCOUNT| | 97 | 0, (SLAB_RECLAIM_ACCOUNT| |
98 | SLAB_MEM_SPREAD|SLAB_PANIC), | 98 | SLAB_MEM_SPREAD|SLAB_ACCOUNT| |
99 | SLAB_PANIC), | ||
99 | init_once); | 100 | init_once); |
100 | } | 101 | } |
101 | 102 | ||
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 9155a5a0d3b9..df4661abadc4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
57 | /* | 57 | /* |
58 | * Estimate the amount of memory available for userspace allocations, | 58 | * Estimate the amount of memory available for userspace allocations, |
59 | * without causing swapping. | 59 | * without causing swapping. |
60 | * | ||
61 | * Free memory cannot be taken below the low watermark, before the | ||
62 | * system starts swapping. | ||
63 | */ | 60 | */ |
64 | available = i.freeram - wmark_low; | 61 | available = i.freeram - totalreserve_pages; |
65 | 62 | ||
66 | /* | 63 | /* |
67 | * Not all the page cache can be freed, otherwise the system will | 64 | * Not all the page cache can be freed, otherwise the system will |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 187b3b5f242e..a353b4c6e86e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/swapops.h> | 14 | #include <linux/swapops.h> |
15 | #include <linux/mmu_notifier.h> | 15 | #include <linux/mmu_notifier.h> |
16 | #include <linux/page_idle.h> | 16 | #include <linux/page_idle.h> |
17 | #include <linux/shmem_fs.h> | ||
17 | 18 | ||
18 | #include <asm/elf.h> | 19 | #include <asm/elf.h> |
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
@@ -22,9 +23,13 @@ | |||
22 | 23 | ||
23 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 24 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
24 | { | 25 | { |
25 | unsigned long data, text, lib, swap, ptes, pmds; | 26 | unsigned long text, lib, swap, ptes, pmds, anon, file, shmem; |
26 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 27 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; |
27 | 28 | ||
29 | anon = get_mm_counter(mm, MM_ANONPAGES); | ||
30 | file = get_mm_counter(mm, MM_FILEPAGES); | ||
31 | shmem = get_mm_counter(mm, MM_SHMEMPAGES); | ||
32 | |||
28 | /* | 33 | /* |
29 | * Note: to minimize their overhead, mm maintains hiwater_vm and | 34 | * Note: to minimize their overhead, mm maintains hiwater_vm and |
30 | * hiwater_rss only when about to *lower* total_vm or rss. Any | 35 | * hiwater_rss only when about to *lower* total_vm or rss. Any |
@@ -35,11 +40,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
35 | hiwater_vm = total_vm = mm->total_vm; | 40 | hiwater_vm = total_vm = mm->total_vm; |
36 | if (hiwater_vm < mm->hiwater_vm) | 41 | if (hiwater_vm < mm->hiwater_vm) |
37 | hiwater_vm = mm->hiwater_vm; | 42 | hiwater_vm = mm->hiwater_vm; |
38 | hiwater_rss = total_rss = get_mm_rss(mm); | 43 | hiwater_rss = total_rss = anon + file + shmem; |
39 | if (hiwater_rss < mm->hiwater_rss) | 44 | if (hiwater_rss < mm->hiwater_rss) |
40 | hiwater_rss = mm->hiwater_rss; | 45 | hiwater_rss = mm->hiwater_rss; |
41 | 46 | ||
42 | data = mm->total_vm - mm->shared_vm - mm->stack_vm; | ||
43 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 47 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; |
44 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 48 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; |
45 | swap = get_mm_counter(mm, MM_SWAPENTS); | 49 | swap = get_mm_counter(mm, MM_SWAPENTS); |
@@ -52,6 +56,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
52 | "VmPin:\t%8lu kB\n" | 56 | "VmPin:\t%8lu kB\n" |
53 | "VmHWM:\t%8lu kB\n" | 57 | "VmHWM:\t%8lu kB\n" |
54 | "VmRSS:\t%8lu kB\n" | 58 | "VmRSS:\t%8lu kB\n" |
59 | "RssAnon:\t%8lu kB\n" | ||
60 | "RssFile:\t%8lu kB\n" | ||
61 | "RssShmem:\t%8lu kB\n" | ||
55 | "VmData:\t%8lu kB\n" | 62 | "VmData:\t%8lu kB\n" |
56 | "VmStk:\t%8lu kB\n" | 63 | "VmStk:\t%8lu kB\n" |
57 | "VmExe:\t%8lu kB\n" | 64 | "VmExe:\t%8lu kB\n" |
@@ -65,7 +72,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
65 | mm->pinned_vm << (PAGE_SHIFT-10), | 72 | mm->pinned_vm << (PAGE_SHIFT-10), |
66 | hiwater_rss << (PAGE_SHIFT-10), | 73 | hiwater_rss << (PAGE_SHIFT-10), |
67 | total_rss << (PAGE_SHIFT-10), | 74 | total_rss << (PAGE_SHIFT-10), |
68 | data << (PAGE_SHIFT-10), | 75 | anon << (PAGE_SHIFT-10), |
76 | file << (PAGE_SHIFT-10), | ||
77 | shmem << (PAGE_SHIFT-10), | ||
78 | mm->data_vm << (PAGE_SHIFT-10), | ||
69 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 79 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, |
70 | ptes >> 10, | 80 | ptes >> 10, |
71 | pmds >> 10, | 81 | pmds >> 10, |
@@ -82,10 +92,11 @@ unsigned long task_statm(struct mm_struct *mm, | |||
82 | unsigned long *shared, unsigned long *text, | 92 | unsigned long *shared, unsigned long *text, |
83 | unsigned long *data, unsigned long *resident) | 93 | unsigned long *data, unsigned long *resident) |
84 | { | 94 | { |
85 | *shared = get_mm_counter(mm, MM_FILEPAGES); | 95 | *shared = get_mm_counter(mm, MM_FILEPAGES) + |
96 | get_mm_counter(mm, MM_SHMEMPAGES); | ||
86 | *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) | 97 | *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) |
87 | >> PAGE_SHIFT; | 98 | >> PAGE_SHIFT; |
88 | *data = mm->total_vm - mm->shared_vm; | 99 | *data = mm->data_vm + mm->stack_vm; |
89 | *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); | 100 | *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); |
90 | return mm->total_vm; | 101 | return mm->total_vm; |
91 | } | 102 | } |
@@ -451,6 +462,7 @@ struct mem_size_stats { | |||
451 | unsigned long private_hugetlb; | 462 | unsigned long private_hugetlb; |
452 | u64 pss; | 463 | u64 pss; |
453 | u64 swap_pss; | 464 | u64 swap_pss; |
465 | bool check_shmem_swap; | ||
454 | }; | 466 | }; |
455 | 467 | ||
456 | static void smaps_account(struct mem_size_stats *mss, struct page *page, | 468 | static void smaps_account(struct mem_size_stats *mss, struct page *page, |
@@ -485,6 +497,19 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, | |||
485 | } | 497 | } |
486 | } | 498 | } |
487 | 499 | ||
500 | #ifdef CONFIG_SHMEM | ||
501 | static int smaps_pte_hole(unsigned long addr, unsigned long end, | ||
502 | struct mm_walk *walk) | ||
503 | { | ||
504 | struct mem_size_stats *mss = walk->private; | ||
505 | |||
506 | mss->swap += shmem_partial_swap_usage( | ||
507 | walk->vma->vm_file->f_mapping, addr, end); | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | #endif | ||
512 | |||
488 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, | 513 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, |
489 | struct mm_walk *walk) | 514 | struct mm_walk *walk) |
490 | { | 515 | { |
@@ -512,6 +537,19 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, | |||
512 | } | 537 | } |
513 | } else if (is_migration_entry(swpent)) | 538 | } else if (is_migration_entry(swpent)) |
514 | page = migration_entry_to_page(swpent); | 539 | page = migration_entry_to_page(swpent); |
540 | } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap | ||
541 | && pte_none(*pte))) { | ||
542 | page = find_get_entry(vma->vm_file->f_mapping, | ||
543 | linear_page_index(vma, addr)); | ||
544 | if (!page) | ||
545 | return; | ||
546 | |||
547 | if (radix_tree_exceptional_entry(page)) | ||
548 | mss->swap += PAGE_SIZE; | ||
549 | else | ||
550 | page_cache_release(page); | ||
551 | |||
552 | return; | ||
515 | } | 553 | } |
516 | 554 | ||
517 | if (!page) | 555 | if (!page) |
@@ -671,6 +709,31 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
671 | }; | 709 | }; |
672 | 710 | ||
673 | memset(&mss, 0, sizeof mss); | 711 | memset(&mss, 0, sizeof mss); |
712 | |||
713 | #ifdef CONFIG_SHMEM | ||
714 | if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { | ||
715 | /* | ||
716 | * For shared or readonly shmem mappings we know that all | ||
717 | * swapped out pages belong to the shmem object, and we can | ||
718 | * obtain the swap value much more efficiently. For private | ||
719 | * writable mappings, we might have COW pages that are | ||
720 | * not affected by the parent swapped out pages of the shmem | ||
721 | * object, so we have to distinguish them during the page walk. | ||
722 | * Unless we know that the shmem object (or the part mapped by | ||
723 | * our VMA) has no swapped out pages at all. | ||
724 | */ | ||
725 | unsigned long shmem_swapped = shmem_swap_usage(vma); | ||
726 | |||
727 | if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || | ||
728 | !(vma->vm_flags & VM_WRITE)) { | ||
729 | mss.swap = shmem_swapped; | ||
730 | } else { | ||
731 | mss.check_shmem_swap = true; | ||
732 | smaps_walk.pte_hole = smaps_pte_hole; | ||
733 | } | ||
734 | } | ||
735 | #endif | ||
736 | |||
674 | /* mmap_sem is held in m_start */ | 737 | /* mmap_sem is held in m_start */ |
675 | walk_page_vma(vma, &smaps_walk); | 738 | walk_page_vma(vma, &smaps_walk); |
676 | 739 | ||
@@ -817,9 +880,6 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, | |||
817 | pmd = pmd_wrprotect(pmd); | 880 | pmd = pmd_wrprotect(pmd); |
818 | pmd = pmd_clear_soft_dirty(pmd); | 881 | pmd = pmd_clear_soft_dirty(pmd); |
819 | 882 | ||
820 | if (vma->vm_flags & VM_SOFTDIRTY) | ||
821 | vma->vm_flags &= ~VM_SOFTDIRTY; | ||
822 | |||
823 | set_pmd_at(vma->vm_mm, addr, pmdp, pmd); | 883 | set_pmd_at(vma->vm_mm, addr, pmdp, pmd); |
824 | } | 884 | } |
825 | #else | 885 | #else |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index f37b3deb01b4..3a67cfb142d8 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -365,7 +365,7 @@ static int init_inodecache(void) | |||
365 | qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache", | 365 | qnx4_inode_cachep = kmem_cache_create("qnx4_inode_cache", |
366 | sizeof(struct qnx4_inode_info), | 366 | sizeof(struct qnx4_inode_info), |
367 | 0, (SLAB_RECLAIM_ACCOUNT| | 367 | 0, (SLAB_RECLAIM_ACCOUNT| |
368 | SLAB_MEM_SPREAD), | 368 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
369 | init_once); | 369 | init_once); |
370 | if (qnx4_inode_cachep == NULL) | 370 | if (qnx4_inode_cachep == NULL) |
371 | return -ENOMEM; | 371 | return -ENOMEM; |
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 9728b5499e1d..47bb1de07155 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c | |||
@@ -625,7 +625,7 @@ static int init_inodecache(void) | |||
625 | qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache", | 625 | qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache", |
626 | sizeof(struct qnx6_inode_info), | 626 | sizeof(struct qnx6_inode_info), |
627 | 0, (SLAB_RECLAIM_ACCOUNT| | 627 | 0, (SLAB_RECLAIM_ACCOUNT| |
628 | SLAB_MEM_SPREAD), | 628 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
629 | init_once); | 629 | init_once); |
630 | if (!qnx6_inode_cachep) | 630 | if (!qnx6_inode_cachep) |
631 | return -ENOMEM; | 631 | return -ENOMEM; |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 4a62fe8cc3bf..05db7473bcb5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -626,7 +626,8 @@ static int __init init_inodecache(void) | |||
626 | sizeof(struct | 626 | sizeof(struct |
627 | reiserfs_inode_info), | 627 | reiserfs_inode_info), |
628 | 0, (SLAB_RECLAIM_ACCOUNT| | 628 | 0, (SLAB_RECLAIM_ACCOUNT| |
629 | SLAB_MEM_SPREAD), | 629 | SLAB_MEM_SPREAD| |
630 | SLAB_ACCOUNT), | ||
630 | init_once); | 631 | init_once); |
631 | if (reiserfs_inode_cachep == NULL) | 632 | if (reiserfs_inode_cachep == NULL) |
632 | return -ENOMEM; | 633 | return -ENOMEM; |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index bb894e78a821..6b00ca357c58 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -619,8 +619,8 @@ static int __init init_romfs_fs(void) | |||
619 | romfs_inode_cachep = | 619 | romfs_inode_cachep = |
620 | kmem_cache_create("romfs_i", | 620 | kmem_cache_create("romfs_i", |
621 | sizeof(struct romfs_inode_info), 0, | 621 | sizeof(struct romfs_inode_info), 0, |
622 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 622 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | |
623 | romfs_i_init_once); | 623 | SLAB_ACCOUNT, romfs_i_init_once); |
624 | 624 | ||
625 | if (!romfs_inode_cachep) { | 625 | if (!romfs_inode_cachep) { |
626 | pr_err("Failed to initialise inode cache\n"); | 626 | pr_err("Failed to initialise inode cache\n"); |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index dded920cbc8f..5e79bfa4f260 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -419,7 +419,8 @@ static int __init init_inodecache(void) | |||
419 | { | 419 | { |
420 | squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", | 420 | squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", |
421 | sizeof(struct squashfs_inode_info), 0, | 421 | sizeof(struct squashfs_inode_info), 0, |
422 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once); | 422 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, |
423 | init_once); | ||
423 | 424 | ||
424 | return squashfs_inode_cachep ? 0 : -ENOMEM; | 425 | return squashfs_inode_cachep ? 0 : -ENOMEM; |
425 | } | 426 | } |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 07ac18c355e7..d62c423a5a2d 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -346,7 +346,7 @@ int __init sysv_init_icache(void) | |||
346 | { | 346 | { |
347 | sysv_inode_cachep = kmem_cache_create("sysv_inode_cache", | 347 | sysv_inode_cachep = kmem_cache_create("sysv_inode_cache", |
348 | sizeof(struct sysv_inode_info), 0, | 348 | sizeof(struct sysv_inode_info), 0, |
349 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 349 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, |
350 | init_once); | 350 | init_once); |
351 | if (!sysv_inode_cachep) | 351 | if (!sysv_inode_cachep) |
352 | return -ENOMEM; | 352 | return -ENOMEM; |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fd90c079537..a233ba913be4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -2248,8 +2248,8 @@ static int __init ubifs_init(void) | |||
2248 | 2248 | ||
2249 | ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", | 2249 | ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", |
2250 | sizeof(struct ubifs_inode), 0, | 2250 | sizeof(struct ubifs_inode), 0, |
2251 | SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, | 2251 | SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT | |
2252 | &inode_slab_ctor); | 2252 | SLAB_ACCOUNT, &inode_slab_ctor); |
2253 | if (!ubifs_inode_slab) | 2253 | if (!ubifs_inode_slab) |
2254 | return -ENOMEM; | 2254 | return -ENOMEM; |
2255 | 2255 | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 81155b9b445b..9c64a3ca9837 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -179,7 +179,8 @@ static int __init init_inodecache(void) | |||
179 | udf_inode_cachep = kmem_cache_create("udf_inode_cache", | 179 | udf_inode_cachep = kmem_cache_create("udf_inode_cache", |
180 | sizeof(struct udf_inode_info), | 180 | sizeof(struct udf_inode_info), |
181 | 0, (SLAB_RECLAIM_ACCOUNT | | 181 | 0, (SLAB_RECLAIM_ACCOUNT | |
182 | SLAB_MEM_SPREAD), | 182 | SLAB_MEM_SPREAD | |
183 | SLAB_ACCOUNT), | ||
183 | init_once); | 184 | init_once); |
184 | if (!udf_inode_cachep) | 185 | if (!udf_inode_cachep) |
185 | return -ENOMEM; | 186 | return -ENOMEM; |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index f6390eec02ca..442fd52ebffe 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1427,7 +1427,7 @@ static int __init init_inodecache(void) | |||
1427 | ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", | 1427 | ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", |
1428 | sizeof(struct ufs_inode_info), | 1428 | sizeof(struct ufs_inode_info), |
1429 | 0, (SLAB_RECLAIM_ACCOUNT| | 1429 | 0, (SLAB_RECLAIM_ACCOUNT| |
1430 | SLAB_MEM_SPREAD), | 1430 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1431 | init_once); | 1431 | init_once); |
1432 | if (ufs_inode_cachep == NULL) | 1432 | if (ufs_inode_cachep == NULL) |
1433 | return -ENOMEM; | 1433 | return -ENOMEM; |
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index cc6b768fc068..d1c66e465ca5 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -84,6 +84,7 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) | |||
84 | #define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN | 84 | #define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN |
85 | #define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT | 85 | #define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT |
86 | #define KM_ZONE_SPREAD SLAB_MEM_SPREAD | 86 | #define KM_ZONE_SPREAD SLAB_MEM_SPREAD |
87 | #define KM_ZONE_ACCOUNT SLAB_ACCOUNT | ||
87 | 88 | ||
88 | #define kmem_zone kmem_cache | 89 | #define kmem_zone kmem_cache |
89 | #define kmem_zone_t struct kmem_cache | 90 | #define kmem_zone_t struct kmem_cache |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b35775752b74..59c9b7bd958d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1714,8 +1714,8 @@ xfs_init_zones(void) | |||
1714 | 1714 | ||
1715 | xfs_inode_zone = | 1715 | xfs_inode_zone = |
1716 | kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", | 1716 | kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", |
1717 | KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, | 1717 | KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD | |
1718 | xfs_fs_inode_init_once); | 1718 | KM_ZONE_ACCOUNT, xfs_fs_inode_init_once); |
1719 | if (!xfs_inode_zone) | 1719 | if (!xfs_inode_zone) |
1720 | goto out_destroy_efi_zone; | 1720 | goto out_destroy_efi_zone; |
1721 | 1721 | ||
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 4b4b056a6eb0..5148150cc80b 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __ASM_MEMORY_MODEL_H | 1 | #ifndef __ASM_MEMORY_MODEL_H |
2 | #define __ASM_MEMORY_MODEL_H | 2 | #define __ASM_MEMORY_MODEL_H |
3 | 3 | ||
4 | #include <linux/pfn.h> | ||
5 | |||
4 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
5 | 7 | ||
6 | #if defined(CONFIG_FLATMEM) | 8 | #if defined(CONFIG_FLATMEM) |
@@ -72,7 +74,7 @@ | |||
72 | /* | 74 | /* |
73 | * Convert a physical address to a Page Frame Number and back | 75 | * Convert a physical address to a Page Frame Number and back |
74 | */ | 76 | */ |
75 | #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) | 77 | #define __phys_to_pfn(paddr) PHYS_PFN(paddr) |
76 | #define __pfn_to_phys(pfn) PFN_PHYS(pfn) | 78 | #define __pfn_to_phys(pfn) PFN_PHYS(pfn) |
77 | 79 | ||
78 | #define page_to_pfn __page_to_pfn | 80 | #define page_to_pfn __page_to_pfn |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d67ae119cf4e..7781ce110503 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -27,10 +27,10 @@ struct vfsmount; | |||
27 | 27 | ||
28 | /* The hash is always the low bits of hash_len */ | 28 | /* The hash is always the low bits of hash_len */ |
29 | #ifdef __LITTLE_ENDIAN | 29 | #ifdef __LITTLE_ENDIAN |
30 | #define HASH_LEN_DECLARE u32 hash; u32 len; | 30 | #define HASH_LEN_DECLARE u32 hash; u32 len |
31 | #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) | 31 | #define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) |
32 | #else | 32 | #else |
33 | #define HASH_LEN_DECLARE u32 len; u32 hash; | 33 | #define HASH_LEN_DECLARE u32 len; u32 hash |
34 | #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) | 34 | #define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) |
35 | #endif | 35 | #endif |
36 | 36 | ||
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 533c4408529a..6b7e89f45aa4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h | |||
@@ -220,7 +220,10 @@ struct fsnotify_mark { | |||
220 | /* List of marks by group->i_fsnotify_marks. Also reused for queueing | 220 | /* List of marks by group->i_fsnotify_marks. Also reused for queueing |
221 | * mark into destroy_list when it's waiting for the end of SRCU period | 221 | * mark into destroy_list when it's waiting for the end of SRCU period |
222 | * before it can be freed. [group->mark_mutex] */ | 222 | * before it can be freed. [group->mark_mutex] */ |
223 | struct list_head g_list; | 223 | union { |
224 | struct list_head g_list; | ||
225 | struct rcu_head g_rcu; | ||
226 | }; | ||
224 | /* Protects inode / mnt pointers, flags, masks */ | 227 | /* Protects inode / mnt pointers, flags, masks */ |
225 | spinlock_t lock; | 228 | spinlock_t lock; |
226 | /* List of marks for inode / vfsmount [obj_lock] */ | 229 | /* List of marks for inode / vfsmount [obj_lock] */ |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8942af0813e3..28ad5f6494b0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -30,7 +30,7 @@ struct vm_area_struct; | |||
30 | #define ___GFP_HARDWALL 0x20000u | 30 | #define ___GFP_HARDWALL 0x20000u |
31 | #define ___GFP_THISNODE 0x40000u | 31 | #define ___GFP_THISNODE 0x40000u |
32 | #define ___GFP_ATOMIC 0x80000u | 32 | #define ___GFP_ATOMIC 0x80000u |
33 | #define ___GFP_NOACCOUNT 0x100000u | 33 | #define ___GFP_ACCOUNT 0x100000u |
34 | #define ___GFP_NOTRACK 0x200000u | 34 | #define ___GFP_NOTRACK 0x200000u |
35 | #define ___GFP_DIRECT_RECLAIM 0x400000u | 35 | #define ___GFP_DIRECT_RECLAIM 0x400000u |
36 | #define ___GFP_OTHER_NODE 0x800000u | 36 | #define ___GFP_OTHER_NODE 0x800000u |
@@ -73,11 +73,15 @@ struct vm_area_struct; | |||
73 | * | 73 | * |
74 | * __GFP_THISNODE forces the allocation to be satisified from the requested | 74 | * __GFP_THISNODE forces the allocation to be satisified from the requested |
75 | * node with no fallbacks or placement policy enforcements. | 75 | * node with no fallbacks or placement policy enforcements. |
76 | * | ||
77 | * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant | ||
78 | * to kmem allocations). | ||
76 | */ | 79 | */ |
77 | #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) | 80 | #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) |
78 | #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) | 81 | #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) |
79 | #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) | 82 | #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) |
80 | #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) | 83 | #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) |
84 | #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) | ||
81 | 85 | ||
82 | /* | 86 | /* |
83 | * Watermark modifiers -- controls access to emergency reserves | 87 | * Watermark modifiers -- controls access to emergency reserves |
@@ -104,7 +108,6 @@ struct vm_area_struct; | |||
104 | #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) | 108 | #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) |
105 | #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) | 109 | #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) |
106 | #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) | 110 | #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) |
107 | #define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) | ||
108 | 111 | ||
109 | /* | 112 | /* |
110 | * Reclaim modifiers | 113 | * Reclaim modifiers |
@@ -197,6 +200,9 @@ struct vm_area_struct; | |||
197 | * GFP_KERNEL is typical for kernel-internal allocations. The caller requires | 200 | * GFP_KERNEL is typical for kernel-internal allocations. The caller requires |
198 | * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. | 201 | * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. |
199 | * | 202 | * |
203 | * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is | ||
204 | * accounted to kmemcg. | ||
205 | * | ||
200 | * GFP_NOWAIT is for kernel allocations that should not stall for direct | 206 | * GFP_NOWAIT is for kernel allocations that should not stall for direct |
201 | * reclaim, start physical IO or use any filesystem callback. | 207 | * reclaim, start physical IO or use any filesystem callback. |
202 | * | 208 | * |
@@ -236,6 +242,7 @@ struct vm_area_struct; | |||
236 | */ | 242 | */ |
237 | #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) | 243 | #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) |
238 | #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) | 244 | #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) |
245 | #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) | ||
239 | #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) | 246 | #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) |
240 | #define GFP_NOIO (__GFP_RECLAIM) | 247 | #define GFP_NOIO (__GFP_RECLAIM) |
241 | #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) | 248 | #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) |
@@ -271,7 +278,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) | |||
271 | 278 | ||
272 | static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) | 279 | static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) |
273 | { | 280 | { |
274 | return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); | 281 | return !!(gfp_flags & __GFP_DIRECT_RECLAIM); |
275 | } | 282 | } |
276 | 283 | ||
277 | #ifdef CONFIG_HIGHMEM | 284 | #ifdef CONFIG_HIGHMEM |
@@ -377,10 +384,11 @@ static inline enum zone_type gfp_zone(gfp_t flags) | |||
377 | 384 | ||
378 | static inline int gfp_zonelist(gfp_t flags) | 385 | static inline int gfp_zonelist(gfp_t flags) |
379 | { | 386 | { |
380 | if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE)) | 387 | #ifdef CONFIG_NUMA |
381 | return 1; | 388 | if (unlikely(flags & __GFP_THISNODE)) |
382 | 389 | return ZONELIST_NOFALLBACK; | |
383 | return 0; | 390 | #endif |
391 | return ZONELIST_FALLBACK; | ||
384 | } | 392 | } |
385 | 393 | ||
386 | /* | 394 | /* |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b0eb06423d5e..e76574d8f9b5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -263,20 +263,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, | |||
263 | struct user_struct **user, int creat_flags, | 263 | struct user_struct **user, int creat_flags, |
264 | int page_size_log); | 264 | int page_size_log); |
265 | 265 | ||
266 | static inline int is_file_hugepages(struct file *file) | 266 | static inline bool is_file_hugepages(struct file *file) |
267 | { | 267 | { |
268 | if (file->f_op == &hugetlbfs_file_operations) | 268 | if (file->f_op == &hugetlbfs_file_operations) |
269 | return 1; | 269 | return true; |
270 | if (is_file_shm_hugepages(file)) | ||
271 | return 1; | ||
272 | 270 | ||
273 | return 0; | 271 | return is_file_shm_hugepages(file); |
274 | } | 272 | } |
275 | 273 | ||
276 | 274 | ||
277 | #else /* !CONFIG_HUGETLBFS */ | 275 | #else /* !CONFIG_HUGETLBFS */ |
278 | 276 | ||
279 | #define is_file_hugepages(file) 0 | 277 | #define is_file_hugepages(file) false |
280 | static inline struct file * | 278 | static inline struct file * |
281 | hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, | 279 | hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, |
282 | struct user_struct **user, int creat_flags, | 280 | struct user_struct **user, int creat_flags, |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fec66f86eeff..173fb44e22f1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -216,10 +216,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, | |||
216 | * for_each_free_mem_range - iterate through free memblock areas | 216 | * for_each_free_mem_range - iterate through free memblock areas |
217 | * @i: u64 used as loop variable | 217 | * @i: u64 used as loop variable |
218 | * @nid: node selector, %NUMA_NO_NODE for all nodes | 218 | * @nid: node selector, %NUMA_NO_NODE for all nodes |
219 | * @flags: pick from blocks based on memory attributes | ||
219 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 220 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL |
220 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 221 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL |
221 | * @p_nid: ptr to int for nid of the range, can be %NULL | 222 | * @p_nid: ptr to int for nid of the range, can be %NULL |
222 | * @flags: pick from blocks based on memory attributes | ||
223 | * | 223 | * |
224 | * Walks over free (memory && !reserved) areas of memblock. Available as | 224 | * Walks over free (memory && !reserved) areas of memblock. Available as |
225 | * soon as memblock is initialized. | 225 | * soon as memblock is initialized. |
@@ -232,10 +232,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, | |||
232 | * for_each_free_mem_range_reverse - rev-iterate through free memblock areas | 232 | * for_each_free_mem_range_reverse - rev-iterate through free memblock areas |
233 | * @i: u64 used as loop variable | 233 | * @i: u64 used as loop variable |
234 | * @nid: node selector, %NUMA_NO_NODE for all nodes | 234 | * @nid: node selector, %NUMA_NO_NODE for all nodes |
235 | * @flags: pick from blocks based on memory attributes | ||
235 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 236 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL |
236 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 237 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL |
237 | * @p_nid: ptr to int for nid of the range, can be %NULL | 238 | * @p_nid: ptr to int for nid of the range, can be %NULL |
238 | * @flags: pick from blocks based on memory attributes | ||
239 | * | 239 | * |
240 | * Walks over free (memory && !reserved) areas of memblock in reverse | 240 | * Walks over free (memory && !reserved) areas of memblock in reverse |
241 | * order. Available as soon as memblock is initialized. | 241 | * order. Available as soon as memblock is initialized. |
@@ -325,10 +325,10 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); | |||
325 | phys_addr_t memblock_start_of_DRAM(void); | 325 | phys_addr_t memblock_start_of_DRAM(void); |
326 | phys_addr_t memblock_end_of_DRAM(void); | 326 | phys_addr_t memblock_end_of_DRAM(void); |
327 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); | 327 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); |
328 | int memblock_is_memory(phys_addr_t addr); | 328 | bool memblock_is_memory(phys_addr_t addr); |
329 | int memblock_is_map_memory(phys_addr_t addr); | 329 | int memblock_is_map_memory(phys_addr_t addr); |
330 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); | 330 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); |
331 | int memblock_is_reserved(phys_addr_t addr); | 331 | bool memblock_is_reserved(phys_addr_t addr); |
332 | bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); | 332 | bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); |
333 | 333 | ||
334 | extern void __memblock_dump_all(void); | 334 | extern void __memblock_dump_all(void); |
@@ -399,6 +399,11 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo | |||
399 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ | 399 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ |
400 | region++) | 400 | region++) |
401 | 401 | ||
402 | #define for_each_memblock_type(memblock_type, rgn) \ | ||
403 | idx = 0; \ | ||
404 | rgn = &memblock_type->regions[idx]; \ | ||
405 | for (idx = 0; idx < memblock_type->cnt; \ | ||
406 | idx++,rgn = &memblock_type->regions[idx]) | ||
402 | 407 | ||
403 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK | 408 | #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK |
404 | #define __init_memblock __meminit | 409 | #define __init_memblock __meminit |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index cd0e2413c358..2292468f2a30 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -85,32 +85,10 @@ enum mem_cgroup_events_target { | |||
85 | MEM_CGROUP_NTARGETS, | 85 | MEM_CGROUP_NTARGETS, |
86 | }; | 86 | }; |
87 | 87 | ||
88 | /* | ||
89 | * Bits in struct cg_proto.flags | ||
90 | */ | ||
91 | enum cg_proto_flags { | ||
92 | /* Currently active and new sockets should be assigned to cgroups */ | ||
93 | MEMCG_SOCK_ACTIVE, | ||
94 | /* It was ever activated; we must disarm static keys on destruction */ | ||
95 | MEMCG_SOCK_ACTIVATED, | ||
96 | }; | ||
97 | |||
98 | struct cg_proto { | 88 | struct cg_proto { |
99 | struct page_counter memory_allocated; /* Current allocated memory. */ | 89 | struct page_counter memory_allocated; /* Current allocated memory. */ |
100 | struct percpu_counter sockets_allocated; /* Current number of sockets. */ | ||
101 | int memory_pressure; | 90 | int memory_pressure; |
102 | long sysctl_mem[3]; | 91 | bool active; |
103 | unsigned long flags; | ||
104 | /* | ||
105 | * memcg field is used to find which memcg we belong directly | ||
106 | * Each memcg struct can hold more than one cg_proto, so container_of | ||
107 | * won't really cut. | ||
108 | * | ||
109 | * The elegant solution would be having an inverse function to | ||
110 | * proto_cgroup in struct proto, but that means polluting the structure | ||
111 | * for everybody, instead of just for memcg users. | ||
112 | */ | ||
113 | struct mem_cgroup *memcg; | ||
114 | }; | 92 | }; |
115 | 93 | ||
116 | #ifdef CONFIG_MEMCG | 94 | #ifdef CONFIG_MEMCG |
@@ -192,6 +170,9 @@ struct mem_cgroup { | |||
192 | unsigned long low; | 170 | unsigned long low; |
193 | unsigned long high; | 171 | unsigned long high; |
194 | 172 | ||
173 | /* Range enforcement for interrupt charges */ | ||
174 | struct work_struct high_work; | ||
175 | |||
195 | unsigned long soft_limit; | 176 | unsigned long soft_limit; |
196 | 177 | ||
197 | /* vmpressure notifications */ | 178 | /* vmpressure notifications */ |
@@ -268,6 +249,10 @@ struct mem_cgroup { | |||
268 | struct wb_domain cgwb_domain; | 249 | struct wb_domain cgwb_domain; |
269 | #endif | 250 | #endif |
270 | 251 | ||
252 | #ifdef CONFIG_INET | ||
253 | unsigned long socket_pressure; | ||
254 | #endif | ||
255 | |||
271 | /* List of events which userspace want to receive */ | 256 | /* List of events which userspace want to receive */ |
272 | struct list_head event_list; | 257 | struct list_head event_list; |
273 | spinlock_t event_list_lock; | 258 | spinlock_t event_list_lock; |
@@ -275,7 +260,8 @@ struct mem_cgroup { | |||
275 | struct mem_cgroup_per_node *nodeinfo[0]; | 260 | struct mem_cgroup_per_node *nodeinfo[0]; |
276 | /* WARNING: nodeinfo must be the last member here */ | 261 | /* WARNING: nodeinfo must be the last member here */ |
277 | }; | 262 | }; |
278 | extern struct cgroup_subsys_state *mem_cgroup_root_css; | 263 | |
264 | extern struct mem_cgroup *root_mem_cgroup; | ||
279 | 265 | ||
280 | /** | 266 | /** |
281 | * mem_cgroup_events - count memory events against a cgroup | 267 | * mem_cgroup_events - count memory events against a cgroup |
@@ -308,18 +294,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | |||
308 | 294 | ||
309 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); | 295 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); |
310 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | 296 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); |
311 | struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); | ||
312 | 297 | ||
313 | static inline | 298 | static inline |
314 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ | 299 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ |
315 | return css ? container_of(css, struct mem_cgroup, css) : NULL; | 300 | return css ? container_of(css, struct mem_cgroup, css) : NULL; |
316 | } | 301 | } |
317 | 302 | ||
303 | #define mem_cgroup_from_counter(counter, member) \ | ||
304 | container_of(counter, struct mem_cgroup, member) | ||
305 | |||
318 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | 306 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, |
319 | struct mem_cgroup *, | 307 | struct mem_cgroup *, |
320 | struct mem_cgroup_reclaim_cookie *); | 308 | struct mem_cgroup_reclaim_cookie *); |
321 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 309 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
322 | 310 | ||
311 | /** | ||
312 | * parent_mem_cgroup - find the accounting parent of a memcg | ||
313 | * @memcg: memcg whose parent to find | ||
314 | * | ||
315 | * Returns the parent memcg, or NULL if this is the root or the memory | ||
316 | * controller is in legacy no-hierarchy mode. | ||
317 | */ | ||
318 | static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) | ||
319 | { | ||
320 | if (!memcg->memory.parent) | ||
321 | return NULL; | ||
322 | return mem_cgroup_from_counter(memcg->memory.parent, memory); | ||
323 | } | ||
324 | |||
323 | static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, | 325 | static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, |
324 | struct mem_cgroup *root) | 326 | struct mem_cgroup *root) |
325 | { | 327 | { |
@@ -671,12 +673,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) | |||
671 | } | 673 | } |
672 | #endif /* CONFIG_MEMCG */ | 674 | #endif /* CONFIG_MEMCG */ |
673 | 675 | ||
674 | enum { | ||
675 | UNDER_LIMIT, | ||
676 | SOFT_LIMIT, | ||
677 | OVER_LIMIT, | ||
678 | }; | ||
679 | |||
680 | #ifdef CONFIG_CGROUP_WRITEBACK | 676 | #ifdef CONFIG_CGROUP_WRITEBACK |
681 | 677 | ||
682 | struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); | 678 | struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); |
@@ -703,20 +699,35 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, | |||
703 | #endif /* CONFIG_CGROUP_WRITEBACK */ | 699 | #endif /* CONFIG_CGROUP_WRITEBACK */ |
704 | 700 | ||
705 | struct sock; | 701 | struct sock; |
706 | #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) | ||
707 | void sock_update_memcg(struct sock *sk); | 702 | void sock_update_memcg(struct sock *sk); |
708 | void sock_release_memcg(struct sock *sk); | 703 | void sock_release_memcg(struct sock *sk); |
709 | #else | 704 | bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); |
710 | static inline void sock_update_memcg(struct sock *sk) | 705 | void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); |
706 | #if defined(CONFIG_MEMCG) && defined(CONFIG_INET) | ||
707 | extern struct static_key_false memcg_sockets_enabled_key; | ||
708 | #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) | ||
709 | static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) | ||
711 | { | 710 | { |
711 | #ifdef CONFIG_MEMCG_KMEM | ||
712 | if (memcg->tcp_mem.memory_pressure) | ||
713 | return true; | ||
714 | #endif | ||
715 | do { | ||
716 | if (time_before(jiffies, memcg->socket_pressure)) | ||
717 | return true; | ||
718 | } while ((memcg = parent_mem_cgroup(memcg))); | ||
719 | return false; | ||
712 | } | 720 | } |
713 | static inline void sock_release_memcg(struct sock *sk) | 721 | #else |
722 | #define mem_cgroup_sockets_enabled 0 | ||
723 | static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) | ||
714 | { | 724 | { |
725 | return false; | ||
715 | } | 726 | } |
716 | #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ | 727 | #endif |
717 | 728 | ||
718 | #ifdef CONFIG_MEMCG_KMEM | 729 | #ifdef CONFIG_MEMCG_KMEM |
719 | extern struct static_key memcg_kmem_enabled_key; | 730 | extern struct static_key_false memcg_kmem_enabled_key; |
720 | 731 | ||
721 | extern int memcg_nr_cache_ids; | 732 | extern int memcg_nr_cache_ids; |
722 | void memcg_get_cache_ids(void); | 733 | void memcg_get_cache_ids(void); |
@@ -732,7 +743,7 @@ void memcg_put_cache_ids(void); | |||
732 | 743 | ||
733 | static inline bool memcg_kmem_enabled(void) | 744 | static inline bool memcg_kmem_enabled(void) |
734 | { | 745 | { |
735 | return static_key_false(&memcg_kmem_enabled_key); | 746 | return static_branch_unlikely(&memcg_kmem_enabled_key); |
736 | } | 747 | } |
737 | 748 | ||
738 | static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) | 749 | static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) |
@@ -766,15 +777,13 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) | |||
766 | return memcg ? memcg->kmemcg_id : -1; | 777 | return memcg ? memcg->kmemcg_id : -1; |
767 | } | 778 | } |
768 | 779 | ||
769 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); | 780 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); |
770 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); | 781 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); |
771 | 782 | ||
772 | static inline bool __memcg_kmem_bypass(gfp_t gfp) | 783 | static inline bool __memcg_kmem_bypass(void) |
773 | { | 784 | { |
774 | if (!memcg_kmem_enabled()) | 785 | if (!memcg_kmem_enabled()) |
775 | return true; | 786 | return true; |
776 | if (gfp & __GFP_NOACCOUNT) | ||
777 | return true; | ||
778 | if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) | 787 | if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) |
779 | return true; | 788 | return true; |
780 | return false; | 789 | return false; |
@@ -791,7 +800,9 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) | |||
791 | static __always_inline int memcg_kmem_charge(struct page *page, | 800 | static __always_inline int memcg_kmem_charge(struct page *page, |
792 | gfp_t gfp, int order) | 801 | gfp_t gfp, int order) |
793 | { | 802 | { |
794 | if (__memcg_kmem_bypass(gfp)) | 803 | if (__memcg_kmem_bypass()) |
804 | return 0; | ||
805 | if (!(gfp & __GFP_ACCOUNT)) | ||
795 | return 0; | 806 | return 0; |
796 | return __memcg_kmem_charge(page, gfp, order); | 807 | return __memcg_kmem_charge(page, gfp, order); |
797 | } | 808 | } |
@@ -810,16 +821,15 @@ static __always_inline void memcg_kmem_uncharge(struct page *page, int order) | |||
810 | /** | 821 | /** |
811 | * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation | 822 | * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation |
812 | * @cachep: the original global kmem cache | 823 | * @cachep: the original global kmem cache |
813 | * @gfp: allocation flags. | ||
814 | * | 824 | * |
815 | * All memory allocated from a per-memcg cache is charged to the owner memcg. | 825 | * All memory allocated from a per-memcg cache is charged to the owner memcg. |
816 | */ | 826 | */ |
817 | static __always_inline struct kmem_cache * | 827 | static __always_inline struct kmem_cache * |
818 | memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) | 828 | memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) |
819 | { | 829 | { |
820 | if (__memcg_kmem_bypass(gfp)) | 830 | if (__memcg_kmem_bypass()) |
821 | return cachep; | 831 | return cachep; |
822 | return __memcg_kmem_get_cache(cachep); | 832 | return __memcg_kmem_get_cache(cachep, gfp); |
823 | } | 833 | } |
824 | 834 | ||
825 | static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) | 835 | static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 3d385c81c153..2696c1f05ed1 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -122,7 +122,7 @@ struct sp_node { | |||
122 | 122 | ||
123 | struct shared_policy { | 123 | struct shared_policy { |
124 | struct rb_root root; | 124 | struct rb_root root; |
125 | spinlock_t lock; | 125 | rwlock_t lock; |
126 | }; | 126 | }; |
127 | 127 | ||
128 | int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); | 128 | int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 00bad7793788..839d9e9a1c38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -51,6 +51,17 @@ extern int sysctl_legacy_va_layout; | |||
51 | #define sysctl_legacy_va_layout 0 | 51 | #define sysctl_legacy_va_layout 0 |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS | ||
55 | extern const int mmap_rnd_bits_min; | ||
56 | extern const int mmap_rnd_bits_max; | ||
57 | extern int mmap_rnd_bits __read_mostly; | ||
58 | #endif | ||
59 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS | ||
60 | extern const int mmap_rnd_compat_bits_min; | ||
61 | extern const int mmap_rnd_compat_bits_max; | ||
62 | extern int mmap_rnd_compat_bits __read_mostly; | ||
63 | #endif | ||
64 | |||
54 | #include <asm/page.h> | 65 | #include <asm/page.h> |
55 | #include <asm/pgtable.h> | 66 | #include <asm/pgtable.h> |
56 | #include <asm/processor.h> | 67 | #include <asm/processor.h> |
@@ -225,10 +236,14 @@ extern pgprot_t protection_map[16]; | |||
225 | * ->fault function. The vma's ->fault is responsible for returning a bitmask | 236 | * ->fault function. The vma's ->fault is responsible for returning a bitmask |
226 | * of VM_FAULT_xxx flags that give details about how the fault was handled. | 237 | * of VM_FAULT_xxx flags that give details about how the fault was handled. |
227 | * | 238 | * |
239 | * MM layer fills up gfp_mask for page allocations but fault handler might | ||
240 | * alter it if its implementation requires a different allocation context. | ||
241 | * | ||
228 | * pgoff should be used in favour of virtual_address, if possible. | 242 | * pgoff should be used in favour of virtual_address, if possible. |
229 | */ | 243 | */ |
230 | struct vm_fault { | 244 | struct vm_fault { |
231 | unsigned int flags; /* FAULT_FLAG_xxx flags */ | 245 | unsigned int flags; /* FAULT_FLAG_xxx flags */ |
246 | gfp_t gfp_mask; /* gfp mask to be used for allocations */ | ||
232 | pgoff_t pgoff; /* Logical page offset based on vma */ | 247 | pgoff_t pgoff; /* Logical page offset based on vma */ |
233 | void __user *virtual_address; /* Faulting virtual address */ | 248 | void __user *virtual_address; /* Faulting virtual address */ |
234 | 249 | ||
@@ -1361,10 +1376,26 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member) | |||
1361 | atomic_long_dec(&mm->rss_stat.count[member]); | 1376 | atomic_long_dec(&mm->rss_stat.count[member]); |
1362 | } | 1377 | } |
1363 | 1378 | ||
1379 | /* Optimized variant when page is already known not to be PageAnon */ | ||
1380 | static inline int mm_counter_file(struct page *page) | ||
1381 | { | ||
1382 | if (PageSwapBacked(page)) | ||
1383 | return MM_SHMEMPAGES; | ||
1384 | return MM_FILEPAGES; | ||
1385 | } | ||
1386 | |||
1387 | static inline int mm_counter(struct page *page) | ||
1388 | { | ||
1389 | if (PageAnon(page)) | ||
1390 | return MM_ANONPAGES; | ||
1391 | return mm_counter_file(page); | ||
1392 | } | ||
1393 | |||
1364 | static inline unsigned long get_mm_rss(struct mm_struct *mm) | 1394 | static inline unsigned long get_mm_rss(struct mm_struct *mm) |
1365 | { | 1395 | { |
1366 | return get_mm_counter(mm, MM_FILEPAGES) + | 1396 | return get_mm_counter(mm, MM_FILEPAGES) + |
1367 | get_mm_counter(mm, MM_ANONPAGES); | 1397 | get_mm_counter(mm, MM_ANONPAGES) + |
1398 | get_mm_counter(mm, MM_SHMEMPAGES); | ||
1368 | } | 1399 | } |
1369 | 1400 | ||
1370 | static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) | 1401 | static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) |
@@ -1898,7 +1929,9 @@ extern void mm_drop_all_locks(struct mm_struct *mm); | |||
1898 | extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | 1929 | extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); |
1899 | extern struct file *get_mm_exe_file(struct mm_struct *mm); | 1930 | extern struct file *get_mm_exe_file(struct mm_struct *mm); |
1900 | 1931 | ||
1901 | extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); | 1932 | extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); |
1933 | extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); | ||
1934 | |||
1902 | extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, | 1935 | extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, |
1903 | unsigned long addr, unsigned long len, | 1936 | unsigned long addr, unsigned long len, |
1904 | unsigned long flags, | 1937 | unsigned long flags, |
@@ -2116,15 +2149,6 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | |||
2116 | extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, | 2149 | extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, |
2117 | unsigned long size, pte_fn_t fn, void *data); | 2150 | unsigned long size, pte_fn_t fn, void *data); |
2118 | 2151 | ||
2119 | #ifdef CONFIG_PROC_FS | ||
2120 | void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); | ||
2121 | #else | ||
2122 | static inline void vm_stat_account(struct mm_struct *mm, | ||
2123 | unsigned long flags, struct file *file, long pages) | ||
2124 | { | ||
2125 | mm->total_vm += pages; | ||
2126 | } | ||
2127 | #endif /* CONFIG_PROC_FS */ | ||
2128 | 2152 | ||
2129 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2153 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2130 | extern bool _debug_pagealloc_enabled; | 2154 | extern bool _debug_pagealloc_enabled; |
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index cf55945c83fb..712e8c37a200 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -100,4 +100,6 @@ static __always_inline enum lru_list page_lru(struct page *page) | |||
100 | return lru; | 100 | return lru; |
101 | } | 101 | } |
102 | 102 | ||
103 | #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
104 | |||
103 | #endif | 105 | #endif |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f8d1492a114f..6bc9a0ce2253 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -369,9 +369,10 @@ struct core_state { | |||
369 | }; | 369 | }; |
370 | 370 | ||
371 | enum { | 371 | enum { |
372 | MM_FILEPAGES, | 372 | MM_FILEPAGES, /* Resident file mapping pages */ |
373 | MM_ANONPAGES, | 373 | MM_ANONPAGES, /* Resident anonymous pages */ |
374 | MM_SWAPENTS, | 374 | MM_SWAPENTS, /* Anonymous swap entries */ |
375 | MM_SHMEMPAGES, /* Resident shared memory pages */ | ||
375 | NR_MM_COUNTERS | 376 | NR_MM_COUNTERS |
376 | }; | 377 | }; |
377 | 378 | ||
@@ -426,7 +427,7 @@ struct mm_struct { | |||
426 | unsigned long total_vm; /* Total pages mapped */ | 427 | unsigned long total_vm; /* Total pages mapped */ |
427 | unsigned long locked_vm; /* Pages that have PG_mlocked set */ | 428 | unsigned long locked_vm; /* Pages that have PG_mlocked set */ |
428 | unsigned long pinned_vm; /* Refcount permanently increased */ | 429 | unsigned long pinned_vm; /* Refcount permanently increased */ |
429 | unsigned long shared_vm; /* Shared pages (files) */ | 430 | unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */ |
430 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ | 431 | unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ |
431 | unsigned long stack_vm; /* VM_GROWSUP/DOWN */ | 432 | unsigned long stack_vm; /* VM_GROWSUP/DOWN */ |
432 | unsigned long def_flags; | 433 | unsigned long def_flags; |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e23a9e704536..33bb1b19273e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -195,11 +195,6 @@ static inline int is_active_lru(enum lru_list lru) | |||
195 | return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); | 195 | return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); |
196 | } | 196 | } |
197 | 197 | ||
198 | static inline int is_unevictable_lru(enum lru_list lru) | ||
199 | { | ||
200 | return (lru == LRU_UNEVICTABLE); | ||
201 | } | ||
202 | |||
203 | struct zone_reclaim_stat { | 198 | struct zone_reclaim_stat { |
204 | /* | 199 | /* |
205 | * The pageout code in vmscan.c keeps track of how many of the | 200 | * The pageout code in vmscan.c keeps track of how many of the |
@@ -361,10 +356,10 @@ struct zone { | |||
361 | struct per_cpu_pageset __percpu *pageset; | 356 | struct per_cpu_pageset __percpu *pageset; |
362 | 357 | ||
363 | /* | 358 | /* |
364 | * This is a per-zone reserve of pages that should not be | 359 | * This is a per-zone reserve of pages that are not available |
365 | * considered dirtyable memory. | 360 | * to userspace allocations. |
366 | */ | 361 | */ |
367 | unsigned long dirty_balance_reserve; | 362 | unsigned long totalreserve_pages; |
368 | 363 | ||
369 | #ifndef CONFIG_SPARSEMEM | 364 | #ifndef CONFIG_SPARSEMEM |
370 | /* | 365 | /* |
@@ -576,19 +571,17 @@ static inline bool zone_is_empty(struct zone *zone) | |||
576 | /* Maximum number of zones on a zonelist */ | 571 | /* Maximum number of zones on a zonelist */ |
577 | #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) | 572 | #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) |
578 | 573 | ||
574 | enum { | ||
575 | ZONELIST_FALLBACK, /* zonelist with fallback */ | ||
579 | #ifdef CONFIG_NUMA | 576 | #ifdef CONFIG_NUMA |
580 | 577 | /* | |
581 | /* | 578 | * The NUMA zonelists are doubled because we need zonelists that |
582 | * The NUMA zonelists are doubled because we need zonelists that restrict the | 579 | * restrict the allocations to a single node for __GFP_THISNODE. |
583 | * allocations to a single node for __GFP_THISNODE. | 580 | */ |
584 | * | 581 | ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ |
585 | * [0] : Zonelist with fallback | ||
586 | * [1] : No fallback (__GFP_THISNODE) | ||
587 | */ | ||
588 | #define MAX_ZONELISTS 2 | ||
589 | #else | ||
590 | #define MAX_ZONELISTS 1 | ||
591 | #endif | 582 | #endif |
583 | MAX_ZONELISTS | ||
584 | }; | ||
592 | 585 | ||
593 | /* | 586 | /* |
594 | * This struct contains information about a zone in a zonelist. It is stored | 587 | * This struct contains information about a zone in a zonelist. It is stored |
@@ -1207,13 +1200,13 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | |||
1207 | * the zone and PFN linkages are still valid. This is expensive, but walkers | 1200 | * the zone and PFN linkages are still valid. This is expensive, but walkers |
1208 | * of the full memmap are extremely rare. | 1201 | * of the full memmap are extremely rare. |
1209 | */ | 1202 | */ |
1210 | int memmap_valid_within(unsigned long pfn, | 1203 | bool memmap_valid_within(unsigned long pfn, |
1211 | struct page *page, struct zone *zone); | 1204 | struct page *page, struct zone *zone); |
1212 | #else | 1205 | #else |
1213 | static inline int memmap_valid_within(unsigned long pfn, | 1206 | static inline bool memmap_valid_within(unsigned long pfn, |
1214 | struct page *page, struct zone *zone) | 1207 | struct page *page, struct zone *zone) |
1215 | { | 1208 | { |
1216 | return 1; | 1209 | return true; |
1217 | } | 1210 | } |
1218 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ | 1211 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
1219 | 1212 | ||
diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 7646637221f3..97f3e88aead4 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h | |||
@@ -9,5 +9,6 @@ | |||
9 | #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) | 9 | #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) |
10 | #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) | 10 | #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) |
11 | #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) | 11 | #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) |
12 | #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) | ||
12 | 13 | ||
13 | #endif | 14 | #endif |
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 50777b5b1e4c..a43f41cb3c43 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h | |||
@@ -60,6 +60,10 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, | |||
60 | extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); | 60 | extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); |
61 | extern int shmem_unuse(swp_entry_t entry, struct page *page); | 61 | extern int shmem_unuse(swp_entry_t entry, struct page *page); |
62 | 62 | ||
63 | extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); | ||
64 | extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, | ||
65 | pgoff_t start, pgoff_t end); | ||
66 | |||
63 | static inline struct page *shmem_read_mapping_page( | 67 | static inline struct page *shmem_read_mapping_page( |
64 | struct address_space *mapping, pgoff_t index) | 68 | struct address_space *mapping, pgoff_t index) |
65 | { | 69 | { |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 2037a861e367..3ffee7422012 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -86,6 +86,11 @@ | |||
86 | #else | 86 | #else |
87 | # define SLAB_FAILSLAB 0x00000000UL | 87 | # define SLAB_FAILSLAB 0x00000000UL |
88 | #endif | 88 | #endif |
89 | #ifdef CONFIG_MEMCG_KMEM | ||
90 | # define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */ | ||
91 | #else | ||
92 | # define SLAB_ACCOUNT 0x00000000UL | ||
93 | #endif | ||
89 | 94 | ||
90 | /* The following flags affect the page allocator grouping pages by mobility */ | 95 | /* The following flags affect the page allocator grouping pages by mobility */ |
91 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ | 96 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 7ba7dccaf0e7..066bd21765ad 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -287,7 +287,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node) | |||
287 | /* linux/mm/page_alloc.c */ | 287 | /* linux/mm/page_alloc.c */ |
288 | extern unsigned long totalram_pages; | 288 | extern unsigned long totalram_pages; |
289 | extern unsigned long totalreserve_pages; | 289 | extern unsigned long totalreserve_pages; |
290 | extern unsigned long dirty_balance_reserve; | ||
291 | extern unsigned long nr_free_buffer_pages(void); | 290 | extern unsigned long nr_free_buffer_pages(void); |
292 | extern unsigned long nr_free_pagecache_pages(void); | 291 | extern unsigned long nr_free_pagecache_pages(void); |
293 | 292 | ||
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ff307b548ed3..b4c2a485b28a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -56,9 +56,10 @@ extern long do_no_restart_syscall(struct restart_block *parm); | |||
56 | #ifdef __KERNEL__ | 56 | #ifdef __KERNEL__ |
57 | 57 | ||
58 | #ifdef CONFIG_DEBUG_STACK_USAGE | 58 | #ifdef CONFIG_DEBUG_STACK_USAGE |
59 | # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 59 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ |
60 | __GFP_ZERO) | ||
60 | #else | 61 | #else |
61 | # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) | 62 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK) |
62 | #endif | 63 | #endif |
63 | 64 | ||
64 | /* | 65 | /* |
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3bff87a25a42..d1f1d338af20 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h | |||
@@ -14,7 +14,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ | |||
14 | #define VM_ALLOC 0x00000002 /* vmalloc() */ | 14 | #define VM_ALLOC 0x00000002 /* vmalloc() */ |
15 | #define VM_MAP 0x00000004 /* vmap()ed pages */ | 15 | #define VM_MAP 0x00000004 /* vmap()ed pages */ |
16 | #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ | 16 | #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ |
17 | #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ | ||
18 | #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ | 17 | #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ |
19 | #define VM_NO_GUARD 0x00000040 /* don't add guard page */ | 18 | #define VM_NO_GUARD 0x00000040 /* don't add guard page */ |
20 | #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ | 19 | #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ |
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3e4535876d37..3347cc3ec0ab 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h | |||
@@ -12,6 +12,9 @@ | |||
12 | struct vmpressure { | 12 | struct vmpressure { |
13 | unsigned long scanned; | 13 | unsigned long scanned; |
14 | unsigned long reclaimed; | 14 | unsigned long reclaimed; |
15 | |||
16 | unsigned long tree_scanned; | ||
17 | unsigned long tree_reclaimed; | ||
15 | /* The lock is used to keep the scanned/reclaimed above in sync. */ | 18 | /* The lock is used to keep the scanned/reclaimed above in sync. */ |
16 | struct spinlock sr_lock; | 19 | struct spinlock sr_lock; |
17 | 20 | ||
@@ -26,7 +29,7 @@ struct vmpressure { | |||
26 | struct mem_cgroup; | 29 | struct mem_cgroup; |
27 | 30 | ||
28 | #ifdef CONFIG_MEMCG | 31 | #ifdef CONFIG_MEMCG |
29 | extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 32 | extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, |
30 | unsigned long scanned, unsigned long reclaimed); | 33 | unsigned long scanned, unsigned long reclaimed); |
31 | extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); | 34 | extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); |
32 | 35 | ||
@@ -40,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg, | |||
40 | extern void vmpressure_unregister_event(struct mem_cgroup *memcg, | 43 | extern void vmpressure_unregister_event(struct mem_cgroup *memcg, |
41 | struct eventfd_ctx *eventfd); | 44 | struct eventfd_ctx *eventfd); |
42 | #else | 45 | #else |
43 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 46 | static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, |
44 | unsigned long scanned, unsigned long reclaimed) {} | 47 | unsigned long scanned, unsigned long reclaimed) {} |
45 | static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, | 48 | static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, |
46 | int prio) {} | 49 | int prio) {} |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3e5d9075960f..73fae8c4a5fb 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -189,6 +189,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); | |||
189 | extern void dec_zone_state(struct zone *, enum zone_stat_item); | 189 | extern void dec_zone_state(struct zone *, enum zone_stat_item); |
190 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); | 190 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); |
191 | 191 | ||
192 | void quiet_vmstat(void); | ||
192 | void cpu_vm_stats_fold(int cpu); | 193 | void cpu_vm_stats_fold(int cpu); |
193 | void refresh_zone_stat_thresholds(void); | 194 | void refresh_zone_stat_thresholds(void); |
194 | 195 | ||
@@ -249,6 +250,7 @@ static inline void __dec_zone_page_state(struct page *page, | |||
249 | 250 | ||
250 | static inline void refresh_zone_stat_thresholds(void) { } | 251 | static inline void refresh_zone_stat_thresholds(void) { } |
251 | static inline void cpu_vm_stats_fold(int cpu) { } | 252 | static inline void cpu_vm_stats_fold(int cpu) { } |
253 | static inline void quiet_vmstat(void) { } | ||
252 | 254 | ||
253 | static inline void drain_zonestat(struct zone *zone, | 255 | static inline void drain_zonestat(struct zone *zone, |
254 | struct per_cpu_pageset *pset) { } | 256 | struct per_cpu_pageset *pset) { } |
diff --git a/include/net/sock.h b/include/net/sock.h index e830c1006935..b9e7b3d863a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -71,22 +71,6 @@ | |||
71 | #include <net/tcp_states.h> | 71 | #include <net/tcp_states.h> |
72 | #include <linux/net_tstamp.h> | 72 | #include <linux/net_tstamp.h> |
73 | 73 | ||
74 | struct cgroup; | ||
75 | struct cgroup_subsys; | ||
76 | #ifdef CONFIG_NET | ||
77 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss); | ||
78 | void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg); | ||
79 | #else | ||
80 | static inline | ||
81 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | static inline | ||
86 | void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) | ||
87 | { | ||
88 | } | ||
89 | #endif | ||
90 | /* | 74 | /* |
91 | * This structure really needs to be cleaned up. | 75 | * This structure really needs to be cleaned up. |
92 | * Most of it is for TCP, and not used by any of | 76 | * Most of it is for TCP, and not used by any of |
@@ -245,7 +229,6 @@ struct sock_common { | |||
245 | /* public: */ | 229 | /* public: */ |
246 | }; | 230 | }; |
247 | 231 | ||
248 | struct cg_proto; | ||
249 | /** | 232 | /** |
250 | * struct sock - network layer representation of sockets | 233 | * struct sock - network layer representation of sockets |
251 | * @__sk_common: shared layout with inet_timewait_sock | 234 | * @__sk_common: shared layout with inet_timewait_sock |
@@ -310,7 +293,7 @@ struct cg_proto; | |||
310 | * @sk_security: used by security modules | 293 | * @sk_security: used by security modules |
311 | * @sk_mark: generic packet mark | 294 | * @sk_mark: generic packet mark |
312 | * @sk_cgrp_data: cgroup data for this cgroup | 295 | * @sk_cgrp_data: cgroup data for this cgroup |
313 | * @sk_cgrp: this socket's cgroup-specific proto data | 296 | * @sk_memcg: this socket's memory cgroup association |
314 | * @sk_write_pending: a write to stream socket waits to start | 297 | * @sk_write_pending: a write to stream socket waits to start |
315 | * @sk_state_change: callback to indicate change in the state of the sock | 298 | * @sk_state_change: callback to indicate change in the state of the sock |
316 | * @sk_data_ready: callback to indicate there is data to be processed | 299 | * @sk_data_ready: callback to indicate there is data to be processed |
@@ -446,7 +429,7 @@ struct sock { | |||
446 | void *sk_security; | 429 | void *sk_security; |
447 | #endif | 430 | #endif |
448 | struct sock_cgroup_data sk_cgrp_data; | 431 | struct sock_cgroup_data sk_cgrp_data; |
449 | struct cg_proto *sk_cgrp; | 432 | struct mem_cgroup *sk_memcg; |
450 | void (*sk_state_change)(struct sock *sk); | 433 | void (*sk_state_change)(struct sock *sk); |
451 | void (*sk_data_ready)(struct sock *sk); | 434 | void (*sk_data_ready)(struct sock *sk); |
452 | void (*sk_write_space)(struct sock *sk); | 435 | void (*sk_write_space)(struct sock *sk); |
@@ -1096,23 +1079,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) | |||
1096 | #define sk_refcnt_debug_release(sk) do { } while (0) | 1079 | #define sk_refcnt_debug_release(sk) do { } while (0) |
1097 | #endif /* SOCK_REFCNT_DEBUG */ | 1080 | #endif /* SOCK_REFCNT_DEBUG */ |
1098 | 1081 | ||
1099 | #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) | ||
1100 | extern struct static_key memcg_socket_limit_enabled; | ||
1101 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
1102 | struct cg_proto *cg_proto) | ||
1103 | { | ||
1104 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); | ||
1105 | } | ||
1106 | #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) | ||
1107 | #else | ||
1108 | #define mem_cgroup_sockets_enabled 0 | ||
1109 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
1110 | struct cg_proto *cg_proto) | ||
1111 | { | ||
1112 | return NULL; | ||
1113 | } | ||
1114 | #endif | ||
1115 | |||
1116 | static inline bool sk_stream_memory_free(const struct sock *sk) | 1082 | static inline bool sk_stream_memory_free(const struct sock *sk) |
1117 | { | 1083 | { |
1118 | if (sk->sk_wmem_queued >= sk->sk_sndbuf) | 1084 | if (sk->sk_wmem_queued >= sk->sk_sndbuf) |
@@ -1139,8 +1105,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) | |||
1139 | if (!sk->sk_prot->memory_pressure) | 1105 | if (!sk->sk_prot->memory_pressure) |
1140 | return false; | 1106 | return false; |
1141 | 1107 | ||
1142 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | 1108 | if (mem_cgroup_sockets_enabled && sk->sk_memcg && |
1143 | return !!sk->sk_cgrp->memory_pressure; | 1109 | mem_cgroup_under_socket_pressure(sk->sk_memcg)) |
1110 | return true; | ||
1144 | 1111 | ||
1145 | return !!*sk->sk_prot->memory_pressure; | 1112 | return !!*sk->sk_prot->memory_pressure; |
1146 | } | 1113 | } |
@@ -1154,15 +1121,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk) | |||
1154 | 1121 | ||
1155 | if (*memory_pressure) | 1122 | if (*memory_pressure) |
1156 | *memory_pressure = 0; | 1123 | *memory_pressure = 0; |
1157 | |||
1158 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1159 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1160 | struct proto *prot = sk->sk_prot; | ||
1161 | |||
1162 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1163 | cg_proto->memory_pressure = 0; | ||
1164 | } | ||
1165 | |||
1166 | } | 1124 | } |
1167 | 1125 | ||
1168 | static inline void sk_enter_memory_pressure(struct sock *sk) | 1126 | static inline void sk_enter_memory_pressure(struct sock *sk) |
@@ -1170,116 +1128,46 @@ static inline void sk_enter_memory_pressure(struct sock *sk) | |||
1170 | if (!sk->sk_prot->enter_memory_pressure) | 1128 | if (!sk->sk_prot->enter_memory_pressure) |
1171 | return; | 1129 | return; |
1172 | 1130 | ||
1173 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1174 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1175 | struct proto *prot = sk->sk_prot; | ||
1176 | |||
1177 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1178 | cg_proto->memory_pressure = 1; | ||
1179 | } | ||
1180 | |||
1181 | sk->sk_prot->enter_memory_pressure(sk); | 1131 | sk->sk_prot->enter_memory_pressure(sk); |
1182 | } | 1132 | } |
1183 | 1133 | ||
1184 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) | 1134 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) |
1185 | { | 1135 | { |
1186 | long *prot = sk->sk_prot->sysctl_mem; | 1136 | return sk->sk_prot->sysctl_mem[index]; |
1187 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1188 | prot = sk->sk_cgrp->sysctl_mem; | ||
1189 | return prot[index]; | ||
1190 | } | ||
1191 | |||
1192 | static inline void memcg_memory_allocated_add(struct cg_proto *prot, | ||
1193 | unsigned long amt, | ||
1194 | int *parent_status) | ||
1195 | { | ||
1196 | page_counter_charge(&prot->memory_allocated, amt); | ||
1197 | |||
1198 | if (page_counter_read(&prot->memory_allocated) > | ||
1199 | prot->memory_allocated.limit) | ||
1200 | *parent_status = OVER_LIMIT; | ||
1201 | } | ||
1202 | |||
1203 | static inline void memcg_memory_allocated_sub(struct cg_proto *prot, | ||
1204 | unsigned long amt) | ||
1205 | { | ||
1206 | page_counter_uncharge(&prot->memory_allocated, amt); | ||
1207 | } | 1137 | } |
1208 | 1138 | ||
1209 | static inline long | 1139 | static inline long |
1210 | sk_memory_allocated(const struct sock *sk) | 1140 | sk_memory_allocated(const struct sock *sk) |
1211 | { | 1141 | { |
1212 | struct proto *prot = sk->sk_prot; | 1142 | return atomic_long_read(sk->sk_prot->memory_allocated); |
1213 | |||
1214 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1215 | return page_counter_read(&sk->sk_cgrp->memory_allocated); | ||
1216 | |||
1217 | return atomic_long_read(prot->memory_allocated); | ||
1218 | } | 1143 | } |
1219 | 1144 | ||
1220 | static inline long | 1145 | static inline long |
1221 | sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) | 1146 | sk_memory_allocated_add(struct sock *sk, int amt) |
1222 | { | 1147 | { |
1223 | struct proto *prot = sk->sk_prot; | 1148 | return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); |
1224 | |||
1225 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1226 | memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); | ||
1227 | /* update the root cgroup regardless */ | ||
1228 | atomic_long_add_return(amt, prot->memory_allocated); | ||
1229 | return page_counter_read(&sk->sk_cgrp->memory_allocated); | ||
1230 | } | ||
1231 | |||
1232 | return atomic_long_add_return(amt, prot->memory_allocated); | ||
1233 | } | 1149 | } |
1234 | 1150 | ||
1235 | static inline void | 1151 | static inline void |
1236 | sk_memory_allocated_sub(struct sock *sk, int amt) | 1152 | sk_memory_allocated_sub(struct sock *sk, int amt) |
1237 | { | 1153 | { |
1238 | struct proto *prot = sk->sk_prot; | 1154 | atomic_long_sub(amt, sk->sk_prot->memory_allocated); |
1239 | |||
1240 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1241 | memcg_memory_allocated_sub(sk->sk_cgrp, amt); | ||
1242 | |||
1243 | atomic_long_sub(amt, prot->memory_allocated); | ||
1244 | } | 1155 | } |
1245 | 1156 | ||
1246 | static inline void sk_sockets_allocated_dec(struct sock *sk) | 1157 | static inline void sk_sockets_allocated_dec(struct sock *sk) |
1247 | { | 1158 | { |
1248 | struct proto *prot = sk->sk_prot; | 1159 | percpu_counter_dec(sk->sk_prot->sockets_allocated); |
1249 | |||
1250 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1251 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1252 | |||
1253 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1254 | percpu_counter_dec(&cg_proto->sockets_allocated); | ||
1255 | } | ||
1256 | |||
1257 | percpu_counter_dec(prot->sockets_allocated); | ||
1258 | } | 1160 | } |
1259 | 1161 | ||
1260 | static inline void sk_sockets_allocated_inc(struct sock *sk) | 1162 | static inline void sk_sockets_allocated_inc(struct sock *sk) |
1261 | { | 1163 | { |
1262 | struct proto *prot = sk->sk_prot; | 1164 | percpu_counter_inc(sk->sk_prot->sockets_allocated); |
1263 | |||
1264 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1265 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1266 | |||
1267 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1268 | percpu_counter_inc(&cg_proto->sockets_allocated); | ||
1269 | } | ||
1270 | |||
1271 | percpu_counter_inc(prot->sockets_allocated); | ||
1272 | } | 1165 | } |
1273 | 1166 | ||
1274 | static inline int | 1167 | static inline int |
1275 | sk_sockets_allocated_read_positive(struct sock *sk) | 1168 | sk_sockets_allocated_read_positive(struct sock *sk) |
1276 | { | 1169 | { |
1277 | struct proto *prot = sk->sk_prot; | 1170 | return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); |
1278 | |||
1279 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1280 | return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated); | ||
1281 | |||
1282 | return percpu_counter_read_positive(prot->sockets_allocated); | ||
1283 | } | 1171 | } |
1284 | 1172 | ||
1285 | static inline int | 1173 | static inline int |
diff --git a/include/net/tcp.h b/include/net/tcp.h index a80255f4ca33..8ea19977ea53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -289,8 +289,9 @@ extern int tcp_memory_pressure; | |||
289 | /* optimized version of sk_under_memory_pressure() for TCP sockets */ | 289 | /* optimized version of sk_under_memory_pressure() for TCP sockets */ |
290 | static inline bool tcp_under_memory_pressure(const struct sock *sk) | 290 | static inline bool tcp_under_memory_pressure(const struct sock *sk) |
291 | { | 291 | { |
292 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | 292 | if (mem_cgroup_sockets_enabled && sk->sk_memcg && |
293 | return !!sk->sk_cgrp->memory_pressure; | 293 | mem_cgroup_under_socket_pressure(sk->sk_memcg)) |
294 | return true; | ||
294 | 295 | ||
295 | return tcp_memory_pressure; | 296 | return tcp_memory_pressure; |
296 | } | 297 | } |
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 05b94d9453de..3a17b16ae8aa 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _TCP_MEMCG_H | 1 | #ifndef _TCP_MEMCG_H |
2 | #define _TCP_MEMCG_H | 2 | #define _TCP_MEMCG_H |
3 | 3 | ||
4 | struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); | ||
5 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); | 4 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); |
6 | void tcp_destroy_cgroup(struct mem_cgroup *memcg); | 5 | void tcp_destroy_cgroup(struct mem_cgroup *memcg); |
7 | #endif /* _TCP_MEMCG_H */ | 6 | #endif /* _TCP_MEMCG_H */ |
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h new file mode 100644 index 000000000000..97d635cabac8 --- /dev/null +++ b/include/trace/events/huge_memory.h | |||
@@ -0,0 +1,136 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM huge_memory | ||
3 | |||
4 | #if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define __HUGE_MEMORY_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | #include <trace/events/gfpflags.h> | ||
10 | |||
11 | #define SCAN_STATUS \ | ||
12 | EM( SCAN_FAIL, "failed") \ | ||
13 | EM( SCAN_SUCCEED, "succeeded") \ | ||
14 | EM( SCAN_PMD_NULL, "pmd_null") \ | ||
15 | EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ | ||
16 | EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ | ||
17 | EM( SCAN_PAGE_RO, "no_writable_page") \ | ||
18 | EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \ | ||
19 | EM( SCAN_PAGE_NULL, "page_null") \ | ||
20 | EM( SCAN_SCAN_ABORT, "scan_aborted") \ | ||
21 | EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \ | ||
22 | EM( SCAN_PAGE_LRU, "page_not_in_lru") \ | ||
23 | EM( SCAN_PAGE_LOCK, "page_locked") \ | ||
24 | EM( SCAN_PAGE_ANON, "page_not_anon") \ | ||
25 | EM( SCAN_ANY_PROCESS, "no_process_for_page") \ | ||
26 | EM( SCAN_VMA_NULL, "vma_null") \ | ||
27 | EM( SCAN_VMA_CHECK, "vma_check_failed") \ | ||
28 | EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \ | ||
29 | EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \ | ||
30 | EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ | ||
31 | EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ | ||
32 | EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") | ||
33 | |||
34 | #undef EM | ||
35 | #undef EMe | ||
36 | #define EM(a, b) TRACE_DEFINE_ENUM(a); | ||
37 | #define EMe(a, b) TRACE_DEFINE_ENUM(a); | ||
38 | |||
39 | SCAN_STATUS | ||
40 | |||
41 | #undef EM | ||
42 | #undef EMe | ||
43 | #define EM(a, b) {a, b}, | ||
44 | #define EMe(a, b) {a, b} | ||
45 | |||
46 | TRACE_EVENT(mm_khugepaged_scan_pmd, | ||
47 | |||
48 | TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable, | ||
49 | bool referenced, int none_or_zero, int status), | ||
50 | |||
51 | TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status), | ||
52 | |||
53 | TP_STRUCT__entry( | ||
54 | __field(struct mm_struct *, mm) | ||
55 | __field(unsigned long, pfn) | ||
56 | __field(bool, writable) | ||
57 | __field(bool, referenced) | ||
58 | __field(int, none_or_zero) | ||
59 | __field(int, status) | ||
60 | ), | ||
61 | |||
62 | TP_fast_assign( | ||
63 | __entry->mm = mm; | ||
64 | __entry->pfn = pfn; | ||
65 | __entry->writable = writable; | ||
66 | __entry->referenced = referenced; | ||
67 | __entry->none_or_zero = none_or_zero; | ||
68 | __entry->status = status; | ||
69 | ), | ||
70 | |||
71 | TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s", | ||
72 | __entry->mm, | ||
73 | __entry->pfn, | ||
74 | __entry->writable, | ||
75 | __entry->referenced, | ||
76 | __entry->none_or_zero, | ||
77 | __print_symbolic(__entry->status, SCAN_STATUS)) | ||
78 | ); | ||
79 | |||
80 | TRACE_EVENT(mm_collapse_huge_page, | ||
81 | |||
82 | TP_PROTO(struct mm_struct *mm, int isolated, int status), | ||
83 | |||
84 | TP_ARGS(mm, isolated, status), | ||
85 | |||
86 | TP_STRUCT__entry( | ||
87 | __field(struct mm_struct *, mm) | ||
88 | __field(int, isolated) | ||
89 | __field(int, status) | ||
90 | ), | ||
91 | |||
92 | TP_fast_assign( | ||
93 | __entry->mm = mm; | ||
94 | __entry->isolated = isolated; | ||
95 | __entry->status = status; | ||
96 | ), | ||
97 | |||
98 | TP_printk("mm=%p, isolated=%d, status=%s", | ||
99 | __entry->mm, | ||
100 | __entry->isolated, | ||
101 | __print_symbolic(__entry->status, SCAN_STATUS)) | ||
102 | ); | ||
103 | |||
104 | TRACE_EVENT(mm_collapse_huge_page_isolate, | ||
105 | |||
106 | TP_PROTO(unsigned long pfn, int none_or_zero, | ||
107 | bool referenced, bool writable, int status), | ||
108 | |||
109 | TP_ARGS(pfn, none_or_zero, referenced, writable, status), | ||
110 | |||
111 | TP_STRUCT__entry( | ||
112 | __field(unsigned long, pfn) | ||
113 | __field(int, none_or_zero) | ||
114 | __field(bool, referenced) | ||
115 | __field(bool, writable) | ||
116 | __field(int, status) | ||
117 | ), | ||
118 | |||
119 | TP_fast_assign( | ||
120 | __entry->pfn = pfn; | ||
121 | __entry->none_or_zero = none_or_zero; | ||
122 | __entry->referenced = referenced; | ||
123 | __entry->writable = writable; | ||
124 | __entry->status = status; | ||
125 | ), | ||
126 | |||
127 | TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s", | ||
128 | __entry->pfn, | ||
129 | __entry->none_or_zero, | ||
130 | __entry->referenced, | ||
131 | __entry->writable, | ||
132 | __print_symbolic(__entry->status, SCAN_STATUS)) | ||
133 | ); | ||
134 | |||
135 | #endif /* __HUGE_MEMORY_H */ | ||
136 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h new file mode 100644 index 000000000000..6fb644029c80 --- /dev/null +++ b/include/trace/events/page_isolation.h | |||
@@ -0,0 +1,38 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM page_isolation | ||
3 | |||
4 | #if !defined(_TRACE_PAGE_ISOLATION_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_PAGE_ISOLATION_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT(test_pages_isolated, | ||
10 | |||
11 | TP_PROTO( | ||
12 | unsigned long start_pfn, | ||
13 | unsigned long end_pfn, | ||
14 | unsigned long fin_pfn), | ||
15 | |||
16 | TP_ARGS(start_pfn, end_pfn, fin_pfn), | ||
17 | |||
18 | TP_STRUCT__entry( | ||
19 | __field(unsigned long, start_pfn) | ||
20 | __field(unsigned long, end_pfn) | ||
21 | __field(unsigned long, fin_pfn) | ||
22 | ), | ||
23 | |||
24 | TP_fast_assign( | ||
25 | __entry->start_pfn = start_pfn; | ||
26 | __entry->end_pfn = end_pfn; | ||
27 | __entry->fin_pfn = fin_pfn; | ||
28 | ), | ||
29 | |||
30 | TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s", | ||
31 | __entry->start_pfn, __entry->end_pfn, __entry->fin_pfn, | ||
32 | __entry->end_pfn == __entry->fin_pfn ? "success" : "fail") | ||
33 | ); | ||
34 | |||
35 | #endif /* _TRACE_PAGE_ISOLATION_H */ | ||
36 | |||
37 | /* This part must be outside protection */ | ||
38 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f66476b96264..31763dd8db1c 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -330,10 +330,9 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, | |||
330 | 330 | ||
331 | TRACE_EVENT(mm_vmscan_writepage, | 331 | TRACE_EVENT(mm_vmscan_writepage, |
332 | 332 | ||
333 | TP_PROTO(struct page *page, | 333 | TP_PROTO(struct page *page), |
334 | int reclaim_flags), | ||
335 | 334 | ||
336 | TP_ARGS(page, reclaim_flags), | 335 | TP_ARGS(page), |
337 | 336 | ||
338 | TP_STRUCT__entry( | 337 | TP_STRUCT__entry( |
339 | __field(unsigned long, pfn) | 338 | __field(unsigned long, pfn) |
@@ -342,7 +341,7 @@ TRACE_EVENT(mm_vmscan_writepage, | |||
342 | 341 | ||
343 | TP_fast_assign( | 342 | TP_fast_assign( |
344 | __entry->pfn = page_to_pfn(page); | 343 | __entry->pfn = page_to_pfn(page); |
345 | __entry->reclaim_flags = reclaim_flags; | 344 | __entry->reclaim_flags = trace_reclaim_flags(page); |
346 | ), | 345 | ), |
347 | 346 | ||
348 | TP_printk("page=%p pfn=%lu flags=%s", | 347 | TP_printk("page=%p pfn=%lu flags=%s", |
@@ -353,11 +352,11 @@ TRACE_EVENT(mm_vmscan_writepage, | |||
353 | 352 | ||
354 | TRACE_EVENT(mm_vmscan_lru_shrink_inactive, | 353 | TRACE_EVENT(mm_vmscan_lru_shrink_inactive, |
355 | 354 | ||
356 | TP_PROTO(int nid, int zid, | 355 | TP_PROTO(struct zone *zone, |
357 | unsigned long nr_scanned, unsigned long nr_reclaimed, | 356 | unsigned long nr_scanned, unsigned long nr_reclaimed, |
358 | int priority, int reclaim_flags), | 357 | int priority, int file), |
359 | 358 | ||
360 | TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags), | 359 | TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file), |
361 | 360 | ||
362 | TP_STRUCT__entry( | 361 | TP_STRUCT__entry( |
363 | __field(int, nid) | 362 | __field(int, nid) |
@@ -369,12 +368,12 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, | |||
369 | ), | 368 | ), |
370 | 369 | ||
371 | TP_fast_assign( | 370 | TP_fast_assign( |
372 | __entry->nid = nid; | 371 | __entry->nid = zone_to_nid(zone); |
373 | __entry->zid = zid; | 372 | __entry->zid = zone_idx(zone); |
374 | __entry->nr_scanned = nr_scanned; | 373 | __entry->nr_scanned = nr_scanned; |
375 | __entry->nr_reclaimed = nr_reclaimed; | 374 | __entry->nr_reclaimed = nr_reclaimed; |
376 | __entry->priority = priority; | 375 | __entry->priority = priority; |
377 | __entry->reclaim_flags = reclaim_flags; | 376 | __entry->reclaim_flags = trace_shrink_flags(file); |
378 | ), | 377 | ), |
379 | 378 | ||
380 | TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s", | 379 | TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s", |
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 161a1807e6ef..f4617cf07069 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
@@ -1438,7 +1438,7 @@ static int __init init_mqueue_fs(void) | |||
1438 | 1438 | ||
1439 | mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", | 1439 | mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", |
1440 | sizeof(struct mqueue_inode_info), 0, | 1440 | sizeof(struct mqueue_inode_info), 0, |
1441 | SLAB_HWCACHE_ALIGN, init_once); | 1441 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once); |
1442 | if (mqueue_inode_cachep == NULL) | 1442 | if (mqueue_inode_cachep == NULL) |
1443 | return -ENOMEM; | 1443 | return -ENOMEM; |
1444 | 1444 | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 71179a09c1d6..0c0cd8a62285 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -569,8 +569,8 @@ EXPORT_SYMBOL(revert_creds); | |||
569 | void __init cred_init(void) | 569 | void __init cred_init(void) |
570 | { | 570 | { |
571 | /* allocate a slab in which we can store credentials */ | 571 | /* allocate a slab in which we can store credentials */ |
572 | cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), | 572 | cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, |
573 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 573 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); |
574 | } | 574 | } |
575 | 575 | ||
576 | /** | 576 | /** |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index ef90b04d783f..435c14a45118 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
@@ -34,7 +34,7 @@ __setup("nodelayacct", delayacct_setup_disable); | |||
34 | 34 | ||
35 | void delayacct_init(void) | 35 | void delayacct_init(void) |
36 | { | 36 | { |
37 | delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC); | 37 | delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); |
38 | delayacct_tsk_init(&init_task); | 38 | delayacct_tsk_init(&init_task); |
39 | } | 39 | } |
40 | 40 | ||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7dad84913abf..bb0669169716 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -180,7 +180,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
180 | lru_cache_add_active_or_unevictable(kpage, vma); | 180 | lru_cache_add_active_or_unevictable(kpage, vma); |
181 | 181 | ||
182 | if (!PageAnon(page)) { | 182 | if (!PageAnon(page)) { |
183 | dec_mm_counter(mm, MM_FILEPAGES); | 183 | dec_mm_counter(mm, mm_counter_file(page)); |
184 | inc_mm_counter(mm, MM_ANONPAGES); | 184 | inc_mm_counter(mm, MM_ANONPAGES); |
185 | } | 185 | } |
186 | 186 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 6774e6b2e96d..2e391c754ae7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -300,9 +300,9 @@ void __init fork_init(void) | |||
300 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | 300 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES |
301 | #endif | 301 | #endif |
302 | /* create a slab on which task_structs can be allocated */ | 302 | /* create a slab on which task_structs can be allocated */ |
303 | task_struct_cachep = | 303 | task_struct_cachep = kmem_cache_create("task_struct", |
304 | kmem_cache_create("task_struct", arch_task_struct_size, | 304 | arch_task_struct_size, ARCH_MIN_TASKALIGN, |
305 | ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); | 305 | SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); |
306 | #endif | 306 | #endif |
307 | 307 | ||
308 | /* do the arch specific task caches init */ | 308 | /* do the arch specific task caches init */ |
@@ -414,7 +414,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
414 | RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); | 414 | RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); |
415 | 415 | ||
416 | mm->total_vm = oldmm->total_vm; | 416 | mm->total_vm = oldmm->total_vm; |
417 | mm->shared_vm = oldmm->shared_vm; | 417 | mm->data_vm = oldmm->data_vm; |
418 | mm->exec_vm = oldmm->exec_vm; | 418 | mm->exec_vm = oldmm->exec_vm; |
419 | mm->stack_vm = oldmm->stack_vm; | 419 | mm->stack_vm = oldmm->stack_vm; |
420 | 420 | ||
@@ -433,8 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
433 | struct file *file; | 433 | struct file *file; |
434 | 434 | ||
435 | if (mpnt->vm_flags & VM_DONTCOPY) { | 435 | if (mpnt->vm_flags & VM_DONTCOPY) { |
436 | vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, | 436 | vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); |
437 | -vma_pages(mpnt)); | ||
438 | continue; | 437 | continue; |
439 | } | 438 | } |
440 | charge = 0; | 439 | charge = 0; |
@@ -1848,16 +1847,19 @@ void __init proc_caches_init(void) | |||
1848 | sighand_cachep = kmem_cache_create("sighand_cache", | 1847 | sighand_cachep = kmem_cache_create("sighand_cache", |
1849 | sizeof(struct sighand_struct), 0, | 1848 | sizeof(struct sighand_struct), 0, |
1850 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| | 1849 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| |
1851 | SLAB_NOTRACK, sighand_ctor); | 1850 | SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); |
1852 | signal_cachep = kmem_cache_create("signal_cache", | 1851 | signal_cachep = kmem_cache_create("signal_cache", |
1853 | sizeof(struct signal_struct), 0, | 1852 | sizeof(struct signal_struct), 0, |
1854 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1853 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, |
1854 | NULL); | ||
1855 | files_cachep = kmem_cache_create("files_cache", | 1855 | files_cachep = kmem_cache_create("files_cache", |
1856 | sizeof(struct files_struct), 0, | 1856 | sizeof(struct files_struct), 0, |
1857 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1857 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, |
1858 | NULL); | ||
1858 | fs_cachep = kmem_cache_create("fs_cache", | 1859 | fs_cachep = kmem_cache_create("fs_cache", |
1859 | sizeof(struct fs_struct), 0, | 1860 | sizeof(struct fs_struct), 0, |
1860 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1861 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, |
1862 | NULL); | ||
1861 | /* | 1863 | /* |
1862 | * FIXME! The "sizeof(struct mm_struct)" currently includes the | 1864 | * FIXME! The "sizeof(struct mm_struct)" currently includes the |
1863 | * whole struct cpumask for the OFFSTACK case. We could change | 1865 | * whole struct cpumask for the OFFSTACK case. We could change |
@@ -1867,8 +1869,9 @@ void __init proc_caches_init(void) | |||
1867 | */ | 1869 | */ |
1868 | mm_cachep = kmem_cache_create("mm_struct", | 1870 | mm_cachep = kmem_cache_create("mm_struct", |
1869 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1871 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1870 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1872 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, |
1871 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); | 1873 | NULL); |
1874 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); | ||
1872 | mmap_init(); | 1875 | mmap_init(); |
1873 | nsproxy_cache_init(); | 1876 | nsproxy_cache_init(); |
1874 | } | 1877 | } |
diff --git a/kernel/pid.c b/kernel/pid.c index 78b3d9f80d44..f4ad91b746f1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -604,5 +604,5 @@ void __init pidmap_init(void) | |||
604 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 604 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
605 | 605 | ||
606 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, | 606 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
607 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 607 | SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); |
608 | } | 608 | } |
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 4a2ef5a02fd3..2489140a7c51 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -219,6 +219,7 @@ static void cpu_idle_loop(void) | |||
219 | */ | 219 | */ |
220 | 220 | ||
221 | __current_set_polling(); | 221 | __current_set_polling(); |
222 | quiet_vmstat(); | ||
222 | tick_nohz_idle_enter(); | 223 | tick_nohz_idle_enter(); |
223 | 224 | ||
224 | while (!need_resched()) { | 225 | while (!need_resched()) { |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5faf89ac9ec0..c810f8afdb7f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1568,6 +1568,28 @@ static struct ctl_table vm_table[] = { | |||
1568 | .mode = 0644, | 1568 | .mode = 0644, |
1569 | .proc_handler = proc_doulongvec_minmax, | 1569 | .proc_handler = proc_doulongvec_minmax, |
1570 | }, | 1570 | }, |
1571 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS | ||
1572 | { | ||
1573 | .procname = "mmap_rnd_bits", | ||
1574 | .data = &mmap_rnd_bits, | ||
1575 | .maxlen = sizeof(mmap_rnd_bits), | ||
1576 | .mode = 0600, | ||
1577 | .proc_handler = proc_dointvec_minmax, | ||
1578 | .extra1 = (void *)&mmap_rnd_bits_min, | ||
1579 | .extra2 = (void *)&mmap_rnd_bits_max, | ||
1580 | }, | ||
1581 | #endif | ||
1582 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS | ||
1583 | { | ||
1584 | .procname = "mmap_rnd_compat_bits", | ||
1585 | .data = &mmap_rnd_compat_bits, | ||
1586 | .maxlen = sizeof(mmap_rnd_compat_bits), | ||
1587 | .mode = 0600, | ||
1588 | .proc_handler = proc_dointvec_minmax, | ||
1589 | .extra1 = (void *)&mmap_rnd_compat_bits_min, | ||
1590 | .extra2 = (void *)&mmap_rnd_compat_bits_max, | ||
1591 | }, | ||
1592 | #endif | ||
1571 | { } | 1593 | { } |
1572 | }; | 1594 | }; |
1573 | 1595 | ||
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d34bd24c2c84..4a1515f4b452 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -1181,7 +1181,7 @@ static inline bool overlap(void *addr, unsigned long len, void *start, void *end | |||
1181 | 1181 | ||
1182 | static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) | 1182 | static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) |
1183 | { | 1183 | { |
1184 | if (overlap(addr, len, _text, _etext) || | 1184 | if (overlap(addr, len, _stext, _etext) || |
1185 | overlap(addr, len, __start_rodata, __end_rodata)) | 1185 | overlap(addr, len, __start_rodata, __end_rodata)) |
1186 | err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); | 1186 | err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); |
1187 | } | 1187 | } |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7340353f8aea..cc5d29d2da9b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -672,7 +672,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) | |||
672 | 672 | ||
673 | ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); | 673 | ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); |
674 | if (!ret) { | 674 | if (!ret) { |
675 | bdi->wb.memcg_css = mem_cgroup_root_css; | 675 | bdi->wb.memcg_css = &root_mem_cgroup->css; |
676 | bdi->wb.blkcg_css = blkcg_root_css; | 676 | bdi->wb.blkcg_css = blkcg_root_css; |
677 | } | 677 | } |
678 | return ret; | 678 | return ret; |
diff --git a/mm/compaction.c b/mm/compaction.c index de3e1e71cd9f..585de54dbe8c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -1658,14 +1658,15 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) | |||
1658 | !compaction_deferred(zone, cc->order)) | 1658 | !compaction_deferred(zone, cc->order)) |
1659 | compact_zone(zone, cc); | 1659 | compact_zone(zone, cc); |
1660 | 1660 | ||
1661 | if (cc->order > 0) { | ||
1662 | if (zone_watermark_ok(zone, cc->order, | ||
1663 | low_wmark_pages(zone), 0, 0)) | ||
1664 | compaction_defer_reset(zone, cc->order, false); | ||
1665 | } | ||
1666 | |||
1667 | VM_BUG_ON(!list_empty(&cc->freepages)); | 1661 | VM_BUG_ON(!list_empty(&cc->freepages)); |
1668 | VM_BUG_ON(!list_empty(&cc->migratepages)); | 1662 | VM_BUG_ON(!list_empty(&cc->migratepages)); |
1663 | |||
1664 | if (is_via_compact_memory(cc->order)) | ||
1665 | continue; | ||
1666 | |||
1667 | if (zone_watermark_ok(zone, cc->order, | ||
1668 | low_wmark_pages(zone), 0, 0)) | ||
1669 | compaction_defer_reset(zone, cc->order, false); | ||
1669 | } | 1670 | } |
1670 | } | 1671 | } |
1671 | 1672 | ||
@@ -1708,7 +1709,10 @@ static void compact_nodes(void) | |||
1708 | /* The written value is actually unused, all memory is compacted */ | 1709 | /* The written value is actually unused, all memory is compacted */ |
1709 | int sysctl_compact_memory; | 1710 | int sysctl_compact_memory; |
1710 | 1711 | ||
1711 | /* This is the entry point for compacting all nodes via /proc/sys/vm */ | 1712 | /* |
1713 | * This is the entry point for compacting all nodes via | ||
1714 | * /proc/sys/vm/compact_memory | ||
1715 | */ | ||
1712 | int sysctl_compaction_handler(struct ctl_table *table, int write, | 1716 | int sysctl_compaction_handler(struct ctl_table *table, int write, |
1713 | void __user *buffer, size_t *length, loff_t *ppos) | 1717 | void __user *buffer, size_t *length, loff_t *ppos) |
1714 | { | 1718 | { |
diff --git a/mm/debug.c b/mm/debug.c index 668aa35191ca..5d2072ed8d5e 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -175,7 +175,7 @@ void dump_mm(const struct mm_struct *mm) | |||
175 | "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" | 175 | "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" |
176 | "pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n" | 176 | "pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n" |
177 | "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" | 177 | "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" |
178 | "pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n" | 178 | "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" |
179 | "start_code %lx end_code %lx start_data %lx end_data %lx\n" | 179 | "start_code %lx end_code %lx start_data %lx end_data %lx\n" |
180 | "start_brk %lx brk %lx start_stack %lx\n" | 180 | "start_brk %lx brk %lx start_stack %lx\n" |
181 | "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" | 181 | "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" |
@@ -209,7 +209,7 @@ void dump_mm(const struct mm_struct *mm) | |||
209 | mm_nr_pmds((struct mm_struct *)mm), | 209 | mm_nr_pmds((struct mm_struct *)mm), |
210 | mm->map_count, | 210 | mm->map_count, |
211 | mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, | 211 | mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, |
212 | mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm, | 212 | mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm, |
213 | mm->start_code, mm->end_code, mm->start_data, mm->end_data, | 213 | mm->start_code, mm->end_code, mm->start_data, mm->end_data, |
214 | mm->start_brk, mm->brk, mm->start_stack, | 214 | mm->start_brk, mm->brk, mm->start_stack, |
215 | mm->arg_start, mm->arg_end, mm->env_start, mm->env_end, | 215 | mm->arg_start, mm->arg_end, mm->env_start, mm->env_end, |
diff --git a/mm/filemap.c b/mm/filemap.c index 1bb007624b53..ff42d31c891a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1812,19 +1812,18 @@ EXPORT_SYMBOL(generic_file_read_iter); | |||
1812 | * This adds the requested page to the page cache if it isn't already there, | 1812 | * This adds the requested page to the page cache if it isn't already there, |
1813 | * and schedules an I/O to read in its contents from disk. | 1813 | * and schedules an I/O to read in its contents from disk. |
1814 | */ | 1814 | */ |
1815 | static int page_cache_read(struct file *file, pgoff_t offset) | 1815 | static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) |
1816 | { | 1816 | { |
1817 | struct address_space *mapping = file->f_mapping; | 1817 | struct address_space *mapping = file->f_mapping; |
1818 | struct page *page; | 1818 | struct page *page; |
1819 | int ret; | 1819 | int ret; |
1820 | 1820 | ||
1821 | do { | 1821 | do { |
1822 | page = page_cache_alloc_cold(mapping); | 1822 | page = __page_cache_alloc(gfp_mask|__GFP_COLD); |
1823 | if (!page) | 1823 | if (!page) |
1824 | return -ENOMEM; | 1824 | return -ENOMEM; |
1825 | 1825 | ||
1826 | ret = add_to_page_cache_lru(page, mapping, offset, | 1826 | ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL); |
1827 | mapping_gfp_constraint(mapping, GFP_KERNEL)); | ||
1828 | if (ret == 0) | 1827 | if (ret == 0) |
1829 | ret = mapping->a_ops->readpage(file, page); | 1828 | ret = mapping->a_ops->readpage(file, page); |
1830 | else if (ret == -EEXIST) | 1829 | else if (ret == -EEXIST) |
@@ -2005,7 +2004,7 @@ no_cached_page: | |||
2005 | * We're only likely to ever get here if MADV_RANDOM is in | 2004 | * We're only likely to ever get here if MADV_RANDOM is in |
2006 | * effect. | 2005 | * effect. |
2007 | */ | 2006 | */ |
2008 | error = page_cache_read(file, offset); | 2007 | error = page_cache_read(file, offset, vmf->gfp_mask); |
2009 | 2008 | ||
2010 | /* | 2009 | /* |
2011 | * The page we want has now been added to the page cache. | 2010 | * The page we want has now been added to the page cache. |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 62fe06bb7d04..f952f055fdcf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -31,6 +31,33 @@ | |||
31 | #include <asm/pgalloc.h> | 31 | #include <asm/pgalloc.h> |
32 | #include "internal.h" | 32 | #include "internal.h" |
33 | 33 | ||
34 | enum scan_result { | ||
35 | SCAN_FAIL, | ||
36 | SCAN_SUCCEED, | ||
37 | SCAN_PMD_NULL, | ||
38 | SCAN_EXCEED_NONE_PTE, | ||
39 | SCAN_PTE_NON_PRESENT, | ||
40 | SCAN_PAGE_RO, | ||
41 | SCAN_NO_REFERENCED_PAGE, | ||
42 | SCAN_PAGE_NULL, | ||
43 | SCAN_SCAN_ABORT, | ||
44 | SCAN_PAGE_COUNT, | ||
45 | SCAN_PAGE_LRU, | ||
46 | SCAN_PAGE_LOCK, | ||
47 | SCAN_PAGE_ANON, | ||
48 | SCAN_ANY_PROCESS, | ||
49 | SCAN_VMA_NULL, | ||
50 | SCAN_VMA_CHECK, | ||
51 | SCAN_ADDRESS_RANGE, | ||
52 | SCAN_SWAP_CACHE_PAGE, | ||
53 | SCAN_DEL_PAGE_LRU, | ||
54 | SCAN_ALLOC_HUGE_PAGE_FAIL, | ||
55 | SCAN_CGROUP_CHARGE_FAIL | ||
56 | }; | ||
57 | |||
58 | #define CREATE_TRACE_POINTS | ||
59 | #include <trace/events/huge_memory.h> | ||
60 | |||
34 | /* | 61 | /* |
35 | * By default transparent hugepage support is disabled in order that avoid | 62 | * By default transparent hugepage support is disabled in order that avoid |
36 | * to risk increase the memory footprint of applications without a guaranteed | 63 | * to risk increase the memory footprint of applications without a guaranteed |
@@ -2198,26 +2225,33 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2198 | unsigned long address, | 2225 | unsigned long address, |
2199 | pte_t *pte) | 2226 | pte_t *pte) |
2200 | { | 2227 | { |
2201 | struct page *page; | 2228 | struct page *page = NULL; |
2202 | pte_t *_pte; | 2229 | pte_t *_pte; |
2203 | int none_or_zero = 0; | 2230 | int none_or_zero = 0, result = 0; |
2204 | bool referenced = false, writable = false; | 2231 | bool referenced = false, writable = false; |
2232 | |||
2205 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | 2233 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; |
2206 | _pte++, address += PAGE_SIZE) { | 2234 | _pte++, address += PAGE_SIZE) { |
2207 | pte_t pteval = *_pte; | 2235 | pte_t pteval = *_pte; |
2208 | if (pte_none(pteval) || (pte_present(pteval) && | 2236 | if (pte_none(pteval) || (pte_present(pteval) && |
2209 | is_zero_pfn(pte_pfn(pteval)))) { | 2237 | is_zero_pfn(pte_pfn(pteval)))) { |
2210 | if (!userfaultfd_armed(vma) && | 2238 | if (!userfaultfd_armed(vma) && |
2211 | ++none_or_zero <= khugepaged_max_ptes_none) | 2239 | ++none_or_zero <= khugepaged_max_ptes_none) { |
2212 | continue; | 2240 | continue; |
2213 | else | 2241 | } else { |
2242 | result = SCAN_EXCEED_NONE_PTE; | ||
2214 | goto out; | 2243 | goto out; |
2244 | } | ||
2215 | } | 2245 | } |
2216 | if (!pte_present(pteval)) | 2246 | if (!pte_present(pteval)) { |
2247 | result = SCAN_PTE_NON_PRESENT; | ||
2217 | goto out; | 2248 | goto out; |
2249 | } | ||
2218 | page = vm_normal_page(vma, address, pteval); | 2250 | page = vm_normal_page(vma, address, pteval); |
2219 | if (unlikely(!page)) | 2251 | if (unlikely(!page)) { |
2252 | result = SCAN_PAGE_NULL; | ||
2220 | goto out; | 2253 | goto out; |
2254 | } | ||
2221 | 2255 | ||
2222 | VM_BUG_ON_PAGE(PageCompound(page), page); | 2256 | VM_BUG_ON_PAGE(PageCompound(page), page); |
2223 | VM_BUG_ON_PAGE(!PageAnon(page), page); | 2257 | VM_BUG_ON_PAGE(!PageAnon(page), page); |
@@ -2229,8 +2263,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2229 | * is needed to serialize against split_huge_page | 2263 | * is needed to serialize against split_huge_page |
2230 | * when invoked from the VM. | 2264 | * when invoked from the VM. |
2231 | */ | 2265 | */ |
2232 | if (!trylock_page(page)) | 2266 | if (!trylock_page(page)) { |
2267 | result = SCAN_PAGE_LOCK; | ||
2233 | goto out; | 2268 | goto out; |
2269 | } | ||
2234 | 2270 | ||
2235 | /* | 2271 | /* |
2236 | * cannot use mapcount: can't collapse if there's a gup pin. | 2272 | * cannot use mapcount: can't collapse if there's a gup pin. |
@@ -2239,6 +2275,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2239 | */ | 2275 | */ |
2240 | if (page_count(page) != 1 + !!PageSwapCache(page)) { | 2276 | if (page_count(page) != 1 + !!PageSwapCache(page)) { |
2241 | unlock_page(page); | 2277 | unlock_page(page); |
2278 | result = SCAN_PAGE_COUNT; | ||
2242 | goto out; | 2279 | goto out; |
2243 | } | 2280 | } |
2244 | if (pte_write(pteval)) { | 2281 | if (pte_write(pteval)) { |
@@ -2246,6 +2283,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2246 | } else { | 2283 | } else { |
2247 | if (PageSwapCache(page) && !reuse_swap_page(page)) { | 2284 | if (PageSwapCache(page) && !reuse_swap_page(page)) { |
2248 | unlock_page(page); | 2285 | unlock_page(page); |
2286 | result = SCAN_SWAP_CACHE_PAGE; | ||
2249 | goto out; | 2287 | goto out; |
2250 | } | 2288 | } |
2251 | /* | 2289 | /* |
@@ -2260,6 +2298,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2260 | */ | 2298 | */ |
2261 | if (isolate_lru_page(page)) { | 2299 | if (isolate_lru_page(page)) { |
2262 | unlock_page(page); | 2300 | unlock_page(page); |
2301 | result = SCAN_DEL_PAGE_LRU; | ||
2263 | goto out; | 2302 | goto out; |
2264 | } | 2303 | } |
2265 | /* 0 stands for page_is_file_cache(page) == false */ | 2304 | /* 0 stands for page_is_file_cache(page) == false */ |
@@ -2273,10 +2312,21 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2273 | mmu_notifier_test_young(vma->vm_mm, address)) | 2312 | mmu_notifier_test_young(vma->vm_mm, address)) |
2274 | referenced = true; | 2313 | referenced = true; |
2275 | } | 2314 | } |
2276 | if (likely(referenced && writable)) | 2315 | if (likely(writable)) { |
2277 | return 1; | 2316 | if (likely(referenced)) { |
2317 | result = SCAN_SUCCEED; | ||
2318 | trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero, | ||
2319 | referenced, writable, result); | ||
2320 | return 1; | ||
2321 | } | ||
2322 | } else { | ||
2323 | result = SCAN_PAGE_RO; | ||
2324 | } | ||
2325 | |||
2278 | out: | 2326 | out: |
2279 | release_pte_pages(pte, _pte); | 2327 | release_pte_pages(pte, _pte); |
2328 | trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero, | ||
2329 | referenced, writable, result); | ||
2280 | return 0; | 2330 | return 0; |
2281 | } | 2331 | } |
2282 | 2332 | ||
@@ -2513,7 +2563,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2513 | pgtable_t pgtable; | 2563 | pgtable_t pgtable; |
2514 | struct page *new_page; | 2564 | struct page *new_page; |
2515 | spinlock_t *pmd_ptl, *pte_ptl; | 2565 | spinlock_t *pmd_ptl, *pte_ptl; |
2516 | int isolated; | 2566 | int isolated, result = 0; |
2517 | unsigned long hstart, hend; | 2567 | unsigned long hstart, hend; |
2518 | struct mem_cgroup *memcg; | 2568 | struct mem_cgroup *memcg; |
2519 | unsigned long mmun_start; /* For mmu_notifiers */ | 2569 | unsigned long mmun_start; /* For mmu_notifiers */ |
@@ -2528,12 +2578,15 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2528 | 2578 | ||
2529 | /* release the mmap_sem read lock. */ | 2579 | /* release the mmap_sem read lock. */ |
2530 | new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node); | 2580 | new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node); |
2531 | if (!new_page) | 2581 | if (!new_page) { |
2532 | return; | 2582 | result = SCAN_ALLOC_HUGE_PAGE_FAIL; |
2583 | goto out_nolock; | ||
2584 | } | ||
2533 | 2585 | ||
2534 | if (unlikely(mem_cgroup_try_charge(new_page, mm, | 2586 | if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg))) { |
2535 | gfp, &memcg))) | 2587 | result = SCAN_CGROUP_CHARGE_FAIL; |
2536 | return; | 2588 | goto out_nolock; |
2589 | } | ||
2537 | 2590 | ||
2538 | /* | 2591 | /* |
2539 | * Prevent all access to pagetables with the exception of | 2592 | * Prevent all access to pagetables with the exception of |
@@ -2541,21 +2594,31 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2541 | * handled by the anon_vma lock + PG_lock. | 2594 | * handled by the anon_vma lock + PG_lock. |
2542 | */ | 2595 | */ |
2543 | down_write(&mm->mmap_sem); | 2596 | down_write(&mm->mmap_sem); |
2544 | if (unlikely(khugepaged_test_exit(mm))) | 2597 | if (unlikely(khugepaged_test_exit(mm))) { |
2598 | result = SCAN_ANY_PROCESS; | ||
2545 | goto out; | 2599 | goto out; |
2600 | } | ||
2546 | 2601 | ||
2547 | vma = find_vma(mm, address); | 2602 | vma = find_vma(mm, address); |
2548 | if (!vma) | 2603 | if (!vma) { |
2604 | result = SCAN_VMA_NULL; | ||
2549 | goto out; | 2605 | goto out; |
2606 | } | ||
2550 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2607 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2551 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2608 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2552 | if (address < hstart || address + HPAGE_PMD_SIZE > hend) | 2609 | if (address < hstart || address + HPAGE_PMD_SIZE > hend) { |
2610 | result = SCAN_ADDRESS_RANGE; | ||
2553 | goto out; | 2611 | goto out; |
2554 | if (!hugepage_vma_check(vma)) | 2612 | } |
2613 | if (!hugepage_vma_check(vma)) { | ||
2614 | result = SCAN_VMA_CHECK; | ||
2555 | goto out; | 2615 | goto out; |
2616 | } | ||
2556 | pmd = mm_find_pmd(mm, address); | 2617 | pmd = mm_find_pmd(mm, address); |
2557 | if (!pmd) | 2618 | if (!pmd) { |
2619 | result = SCAN_PMD_NULL; | ||
2558 | goto out; | 2620 | goto out; |
2621 | } | ||
2559 | 2622 | ||
2560 | anon_vma_lock_write(vma->anon_vma); | 2623 | anon_vma_lock_write(vma->anon_vma); |
2561 | 2624 | ||
@@ -2592,6 +2655,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2592 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); | 2655 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); |
2593 | spin_unlock(pmd_ptl); | 2656 | spin_unlock(pmd_ptl); |
2594 | anon_vma_unlock_write(vma->anon_vma); | 2657 | anon_vma_unlock_write(vma->anon_vma); |
2658 | result = SCAN_FAIL; | ||
2595 | goto out; | 2659 | goto out; |
2596 | } | 2660 | } |
2597 | 2661 | ||
@@ -2629,10 +2693,15 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2629 | *hpage = NULL; | 2693 | *hpage = NULL; |
2630 | 2694 | ||
2631 | khugepaged_pages_collapsed++; | 2695 | khugepaged_pages_collapsed++; |
2696 | result = SCAN_SUCCEED; | ||
2632 | out_up_write: | 2697 | out_up_write: |
2633 | up_write(&mm->mmap_sem); | 2698 | up_write(&mm->mmap_sem); |
2699 | trace_mm_collapse_huge_page(mm, isolated, result); | ||
2634 | return; | 2700 | return; |
2635 | 2701 | ||
2702 | out_nolock: | ||
2703 | trace_mm_collapse_huge_page(mm, isolated, result); | ||
2704 | return; | ||
2636 | out: | 2705 | out: |
2637 | mem_cgroup_cancel_charge(new_page, memcg); | 2706 | mem_cgroup_cancel_charge(new_page, memcg); |
2638 | goto out_up_write; | 2707 | goto out_up_write; |
@@ -2645,8 +2714,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2645 | { | 2714 | { |
2646 | pmd_t *pmd; | 2715 | pmd_t *pmd; |
2647 | pte_t *pte, *_pte; | 2716 | pte_t *pte, *_pte; |
2648 | int ret = 0, none_or_zero = 0; | 2717 | int ret = 0, none_or_zero = 0, result = 0; |
2649 | struct page *page; | 2718 | struct page *page = NULL; |
2650 | unsigned long _address; | 2719 | unsigned long _address; |
2651 | spinlock_t *ptl; | 2720 | spinlock_t *ptl; |
2652 | int node = NUMA_NO_NODE; | 2721 | int node = NUMA_NO_NODE; |
@@ -2655,8 +2724,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2655 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 2724 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
2656 | 2725 | ||
2657 | pmd = mm_find_pmd(mm, address); | 2726 | pmd = mm_find_pmd(mm, address); |
2658 | if (!pmd) | 2727 | if (!pmd) { |
2728 | result = SCAN_PMD_NULL; | ||
2659 | goto out; | 2729 | goto out; |
2730 | } | ||
2660 | 2731 | ||
2661 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); | 2732 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); |
2662 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2733 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
@@ -2665,19 +2736,25 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2665 | pte_t pteval = *_pte; | 2736 | pte_t pteval = *_pte; |
2666 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { | 2737 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2667 | if (!userfaultfd_armed(vma) && | 2738 | if (!userfaultfd_armed(vma) && |
2668 | ++none_or_zero <= khugepaged_max_ptes_none) | 2739 | ++none_or_zero <= khugepaged_max_ptes_none) { |
2669 | continue; | 2740 | continue; |
2670 | else | 2741 | } else { |
2742 | result = SCAN_EXCEED_NONE_PTE; | ||
2671 | goto out_unmap; | 2743 | goto out_unmap; |
2744 | } | ||
2672 | } | 2745 | } |
2673 | if (!pte_present(pteval)) | 2746 | if (!pte_present(pteval)) { |
2747 | result = SCAN_PTE_NON_PRESENT; | ||
2674 | goto out_unmap; | 2748 | goto out_unmap; |
2749 | } | ||
2675 | if (pte_write(pteval)) | 2750 | if (pte_write(pteval)) |
2676 | writable = true; | 2751 | writable = true; |
2677 | 2752 | ||
2678 | page = vm_normal_page(vma, _address, pteval); | 2753 | page = vm_normal_page(vma, _address, pteval); |
2679 | if (unlikely(!page)) | 2754 | if (unlikely(!page)) { |
2755 | result = SCAN_PAGE_NULL; | ||
2680 | goto out_unmap; | 2756 | goto out_unmap; |
2757 | } | ||
2681 | /* | 2758 | /* |
2682 | * Record which node the original page is from and save this | 2759 | * Record which node the original page is from and save this |
2683 | * information to khugepaged_node_load[]. | 2760 | * information to khugepaged_node_load[]. |
@@ -2685,26 +2762,49 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2685 | * hit record. | 2762 | * hit record. |
2686 | */ | 2763 | */ |
2687 | node = page_to_nid(page); | 2764 | node = page_to_nid(page); |
2688 | if (khugepaged_scan_abort(node)) | 2765 | if (khugepaged_scan_abort(node)) { |
2766 | result = SCAN_SCAN_ABORT; | ||
2689 | goto out_unmap; | 2767 | goto out_unmap; |
2768 | } | ||
2690 | khugepaged_node_load[node]++; | 2769 | khugepaged_node_load[node]++; |
2691 | VM_BUG_ON_PAGE(PageCompound(page), page); | 2770 | VM_BUG_ON_PAGE(PageCompound(page), page); |
2692 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) | 2771 | if (!PageLRU(page)) { |
2772 | result = SCAN_SCAN_ABORT; | ||
2773 | goto out_unmap; | ||
2774 | } | ||
2775 | if (PageLocked(page)) { | ||
2776 | result = SCAN_PAGE_LOCK; | ||
2777 | goto out_unmap; | ||
2778 | } | ||
2779 | if (!PageAnon(page)) { | ||
2780 | result = SCAN_PAGE_ANON; | ||
2693 | goto out_unmap; | 2781 | goto out_unmap; |
2782 | } | ||
2783 | |||
2694 | /* | 2784 | /* |
2695 | * cannot use mapcount: can't collapse if there's a gup pin. | 2785 | * cannot use mapcount: can't collapse if there's a gup pin. |
2696 | * The page must only be referenced by the scanned process | 2786 | * The page must only be referenced by the scanned process |
2697 | * and page swap cache. | 2787 | * and page swap cache. |
2698 | */ | 2788 | */ |
2699 | if (page_count(page) != 1 + !!PageSwapCache(page)) | 2789 | if (page_count(page) != 1 + !!PageSwapCache(page)) { |
2790 | result = SCAN_PAGE_COUNT; | ||
2700 | goto out_unmap; | 2791 | goto out_unmap; |
2792 | } | ||
2701 | if (pte_young(pteval) || | 2793 | if (pte_young(pteval) || |
2702 | page_is_young(page) || PageReferenced(page) || | 2794 | page_is_young(page) || PageReferenced(page) || |
2703 | mmu_notifier_test_young(vma->vm_mm, address)) | 2795 | mmu_notifier_test_young(vma->vm_mm, address)) |
2704 | referenced = true; | 2796 | referenced = true; |
2705 | } | 2797 | } |
2706 | if (referenced && writable) | 2798 | if (writable) { |
2707 | ret = 1; | 2799 | if (referenced) { |
2800 | result = SCAN_SUCCEED; | ||
2801 | ret = 1; | ||
2802 | } else { | ||
2803 | result = SCAN_NO_REFERENCED_PAGE; | ||
2804 | } | ||
2805 | } else { | ||
2806 | result = SCAN_PAGE_RO; | ||
2807 | } | ||
2708 | out_unmap: | 2808 | out_unmap: |
2709 | pte_unmap_unlock(pte, ptl); | 2809 | pte_unmap_unlock(pte, ptl); |
2710 | if (ret) { | 2810 | if (ret) { |
@@ -2713,6 +2813,8 @@ out_unmap: | |||
2713 | collapse_huge_page(mm, address, hpage, vma, node); | 2813 | collapse_huge_page(mm, address, hpage, vma, node); |
2714 | } | 2814 | } |
2715 | out: | 2815 | out: |
2816 | trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced, | ||
2817 | none_or_zero, result); | ||
2716 | return ret; | 2818 | return ret; |
2717 | } | 2819 | } |
2718 | 2820 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ef6963b577fd..be934df69b85 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -4,7 +4,6 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/list.h> | 5 | #include <linux/list.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/module.h> | ||
8 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
9 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
10 | #include <linux/sysctl.h> | 9 | #include <linux/sysctl.h> |
@@ -2549,25 +2548,6 @@ static void hugetlb_unregister_node(struct node *node) | |||
2549 | nhs->hugepages_kobj = NULL; | 2548 | nhs->hugepages_kobj = NULL; |
2550 | } | 2549 | } |
2551 | 2550 | ||
2552 | /* | ||
2553 | * hugetlb module exit: unregister hstate attributes from node devices | ||
2554 | * that have them. | ||
2555 | */ | ||
2556 | static void hugetlb_unregister_all_nodes(void) | ||
2557 | { | ||
2558 | int nid; | ||
2559 | |||
2560 | /* | ||
2561 | * disable node device registrations. | ||
2562 | */ | ||
2563 | register_hugetlbfs_with_node(NULL, NULL); | ||
2564 | |||
2565 | /* | ||
2566 | * remove hstate attributes from any nodes that have them. | ||
2567 | */ | ||
2568 | for (nid = 0; nid < nr_node_ids; nid++) | ||
2569 | hugetlb_unregister_node(node_devices[nid]); | ||
2570 | } | ||
2571 | 2551 | ||
2572 | /* | 2552 | /* |
2573 | * Register hstate attributes for a single node device. | 2553 | * Register hstate attributes for a single node device. |
@@ -2632,27 +2612,10 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) | |||
2632 | return NULL; | 2612 | return NULL; |
2633 | } | 2613 | } |
2634 | 2614 | ||
2635 | static void hugetlb_unregister_all_nodes(void) { } | ||
2636 | |||
2637 | static void hugetlb_register_all_nodes(void) { } | 2615 | static void hugetlb_register_all_nodes(void) { } |
2638 | 2616 | ||
2639 | #endif | 2617 | #endif |
2640 | 2618 | ||
2641 | static void __exit hugetlb_exit(void) | ||
2642 | { | ||
2643 | struct hstate *h; | ||
2644 | |||
2645 | hugetlb_unregister_all_nodes(); | ||
2646 | |||
2647 | for_each_hstate(h) { | ||
2648 | kobject_put(hstate_kobjs[hstate_index(h)]); | ||
2649 | } | ||
2650 | |||
2651 | kobject_put(hugepages_kobj); | ||
2652 | kfree(hugetlb_fault_mutex_table); | ||
2653 | } | ||
2654 | module_exit(hugetlb_exit); | ||
2655 | |||
2656 | static int __init hugetlb_init(void) | 2619 | static int __init hugetlb_init(void) |
2657 | { | 2620 | { |
2658 | int i; | 2621 | int i; |
@@ -2690,7 +2653,7 @@ static int __init hugetlb_init(void) | |||
2690 | mutex_init(&hugetlb_fault_mutex_table[i]); | 2653 | mutex_init(&hugetlb_fault_mutex_table[i]); |
2691 | return 0; | 2654 | return 0; |
2692 | } | 2655 | } |
2693 | module_init(hugetlb_init); | 2656 | subsys_initcall(hugetlb_init); |
2694 | 2657 | ||
2695 | /* Should be called on processing a hugepagesz=... option */ | 2658 | /* Should be called on processing a hugepagesz=... option */ |
2696 | void __init hugetlb_add_hstate(unsigned int order) | 2659 | void __init hugetlb_add_hstate(unsigned int order) |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 19423a45d7d7..25c0ad36fe38 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -122,8 +122,7 @@ | |||
122 | #define BYTES_PER_POINTER sizeof(void *) | 122 | #define BYTES_PER_POINTER sizeof(void *) |
123 | 123 | ||
124 | /* GFP bitmask for kmemleak internal allocations */ | 124 | /* GFP bitmask for kmemleak internal allocations */ |
125 | #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC | \ | 125 | #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ |
126 | __GFP_NOACCOUNT)) | \ | ||
127 | __GFP_NORETRY | __GFP_NOMEMALLOC | \ | 126 | __GFP_NORETRY | __GFP_NOMEMALLOC | \ |
128 | __GFP_NOWARN) | 127 | __GFP_NOWARN) |
129 | 128 | ||
@@ -740,8 +740,7 @@ static int remove_stable_node(struct stable_node *stable_node) | |||
740 | 740 | ||
741 | static int remove_all_stable_nodes(void) | 741 | static int remove_all_stable_nodes(void) |
742 | { | 742 | { |
743 | struct stable_node *stable_node; | 743 | struct stable_node *stable_node, *next; |
744 | struct list_head *this, *next; | ||
745 | int nid; | 744 | int nid; |
746 | int err = 0; | 745 | int err = 0; |
747 | 746 | ||
@@ -756,8 +755,7 @@ static int remove_all_stable_nodes(void) | |||
756 | cond_resched(); | 755 | cond_resched(); |
757 | } | 756 | } |
758 | } | 757 | } |
759 | list_for_each_safe(this, next, &migrate_nodes) { | 758 | list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { |
760 | stable_node = list_entry(this, struct stable_node, list); | ||
761 | if (remove_stable_node(stable_node)) | 759 | if (remove_stable_node(stable_node)) |
762 | err = -EBUSY; | 760 | err = -EBUSY; |
763 | cond_resched(); | 761 | cond_resched(); |
@@ -1583,13 +1581,11 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) | |||
1583 | * so prune them once before each full scan. | 1581 | * so prune them once before each full scan. |
1584 | */ | 1582 | */ |
1585 | if (!ksm_merge_across_nodes) { | 1583 | if (!ksm_merge_across_nodes) { |
1586 | struct stable_node *stable_node; | 1584 | struct stable_node *stable_node, *next; |
1587 | struct list_head *this, *next; | ||
1588 | struct page *page; | 1585 | struct page *page; |
1589 | 1586 | ||
1590 | list_for_each_safe(this, next, &migrate_nodes) { | 1587 | list_for_each_entry_safe(stable_node, next, |
1591 | stable_node = list_entry(this, | 1588 | &migrate_nodes, list) { |
1592 | struct stable_node, list); | ||
1593 | page = get_ksm_page(stable_node, false); | 1589 | page = get_ksm_page(stable_node, false); |
1594 | if (page) | 1590 | if (page) |
1595 | put_page(page); | 1591 | put_page(page); |
@@ -2012,8 +2008,7 @@ static void wait_while_offlining(void) | |||
2012 | static void ksm_check_stable_tree(unsigned long start_pfn, | 2008 | static void ksm_check_stable_tree(unsigned long start_pfn, |
2013 | unsigned long end_pfn) | 2009 | unsigned long end_pfn) |
2014 | { | 2010 | { |
2015 | struct stable_node *stable_node; | 2011 | struct stable_node *stable_node, *next; |
2016 | struct list_head *this, *next; | ||
2017 | struct rb_node *node; | 2012 | struct rb_node *node; |
2018 | int nid; | 2013 | int nid; |
2019 | 2014 | ||
@@ -2034,8 +2029,7 @@ static void ksm_check_stable_tree(unsigned long start_pfn, | |||
2034 | cond_resched(); | 2029 | cond_resched(); |
2035 | } | 2030 | } |
2036 | } | 2031 | } |
2037 | list_for_each_safe(this, next, &migrate_nodes) { | 2032 | list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { |
2038 | stable_node = list_entry(this, struct stable_node, list); | ||
2039 | if (stable_node->kpfn >= start_pfn && | 2033 | if (stable_node->kpfn >= start_pfn && |
2040 | stable_node->kpfn < end_pfn) | 2034 | stable_node->kpfn < end_pfn) |
2041 | remove_node_from_stable_tree(stable_node); | 2035 | remove_node_from_stable_tree(stable_node); |
diff --git a/mm/memblock.c b/mm/memblock.c index 07ff069fef25..d2ed81e59a94 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -96,13 +96,10 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type, | |||
96 | { | 96 | { |
97 | unsigned long i; | 97 | unsigned long i; |
98 | 98 | ||
99 | for (i = 0; i < type->cnt; i++) { | 99 | for (i = 0; i < type->cnt; i++) |
100 | phys_addr_t rgnbase = type->regions[i].base; | 100 | if (memblock_addrs_overlap(base, size, type->regions[i].base, |
101 | phys_addr_t rgnsize = type->regions[i].size; | 101 | type->regions[i].size)) |
102 | if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) | ||
103 | break; | 102 | break; |
104 | } | ||
105 | |||
106 | return i < type->cnt; | 103 | return i < type->cnt; |
107 | } | 104 | } |
108 | 105 | ||
@@ -528,7 +525,8 @@ int __init_memblock memblock_add_range(struct memblock_type *type, | |||
528 | bool insert = false; | 525 | bool insert = false; |
529 | phys_addr_t obase = base; | 526 | phys_addr_t obase = base; |
530 | phys_addr_t end = base + memblock_cap_size(base, &size); | 527 | phys_addr_t end = base + memblock_cap_size(base, &size); |
531 | int i, nr_new; | 528 | int idx, nr_new; |
529 | struct memblock_region *rgn; | ||
532 | 530 | ||
533 | if (!size) | 531 | if (!size) |
534 | return 0; | 532 | return 0; |
@@ -552,8 +550,7 @@ repeat: | |||
552 | base = obase; | 550 | base = obase; |
553 | nr_new = 0; | 551 | nr_new = 0; |
554 | 552 | ||
555 | for (i = 0; i < type->cnt; i++) { | 553 | for_each_memblock_type(type, rgn) { |
556 | struct memblock_region *rgn = &type->regions[i]; | ||
557 | phys_addr_t rbase = rgn->base; | 554 | phys_addr_t rbase = rgn->base; |
558 | phys_addr_t rend = rbase + rgn->size; | 555 | phys_addr_t rend = rbase + rgn->size; |
559 | 556 | ||
@@ -572,7 +569,7 @@ repeat: | |||
572 | WARN_ON(flags != rgn->flags); | 569 | WARN_ON(flags != rgn->flags); |
573 | nr_new++; | 570 | nr_new++; |
574 | if (insert) | 571 | if (insert) |
575 | memblock_insert_region(type, i++, base, | 572 | memblock_insert_region(type, idx++, base, |
576 | rbase - base, nid, | 573 | rbase - base, nid, |
577 | flags); | 574 | flags); |
578 | } | 575 | } |
@@ -584,7 +581,7 @@ repeat: | |||
584 | if (base < end) { | 581 | if (base < end) { |
585 | nr_new++; | 582 | nr_new++; |
586 | if (insert) | 583 | if (insert) |
587 | memblock_insert_region(type, i, base, end - base, | 584 | memblock_insert_region(type, idx, base, end - base, |
588 | nid, flags); | 585 | nid, flags); |
589 | } | 586 | } |
590 | 587 | ||
@@ -651,7 +648,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
651 | int *start_rgn, int *end_rgn) | 648 | int *start_rgn, int *end_rgn) |
652 | { | 649 | { |
653 | phys_addr_t end = base + memblock_cap_size(base, &size); | 650 | phys_addr_t end = base + memblock_cap_size(base, &size); |
654 | int i; | 651 | int idx; |
652 | struct memblock_region *rgn; | ||
655 | 653 | ||
656 | *start_rgn = *end_rgn = 0; | 654 | *start_rgn = *end_rgn = 0; |
657 | 655 | ||
@@ -663,8 +661,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
663 | if (memblock_double_array(type, base, size) < 0) | 661 | if (memblock_double_array(type, base, size) < 0) |
664 | return -ENOMEM; | 662 | return -ENOMEM; |
665 | 663 | ||
666 | for (i = 0; i < type->cnt; i++) { | 664 | for_each_memblock_type(type, rgn) { |
667 | struct memblock_region *rgn = &type->regions[i]; | ||
668 | phys_addr_t rbase = rgn->base; | 665 | phys_addr_t rbase = rgn->base; |
669 | phys_addr_t rend = rbase + rgn->size; | 666 | phys_addr_t rend = rbase + rgn->size; |
670 | 667 | ||
@@ -681,7 +678,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
681 | rgn->base = base; | 678 | rgn->base = base; |
682 | rgn->size -= base - rbase; | 679 | rgn->size -= base - rbase; |
683 | type->total_size -= base - rbase; | 680 | type->total_size -= base - rbase; |
684 | memblock_insert_region(type, i, rbase, base - rbase, | 681 | memblock_insert_region(type, idx, rbase, base - rbase, |
685 | memblock_get_region_node(rgn), | 682 | memblock_get_region_node(rgn), |
686 | rgn->flags); | 683 | rgn->flags); |
687 | } else if (rend > end) { | 684 | } else if (rend > end) { |
@@ -692,14 +689,14 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
692 | rgn->base = end; | 689 | rgn->base = end; |
693 | rgn->size -= end - rbase; | 690 | rgn->size -= end - rbase; |
694 | type->total_size -= end - rbase; | 691 | type->total_size -= end - rbase; |
695 | memblock_insert_region(type, i--, rbase, end - rbase, | 692 | memblock_insert_region(type, idx--, rbase, end - rbase, |
696 | memblock_get_region_node(rgn), | 693 | memblock_get_region_node(rgn), |
697 | rgn->flags); | 694 | rgn->flags); |
698 | } else { | 695 | } else { |
699 | /* @rgn is fully contained, record it */ | 696 | /* @rgn is fully contained, record it */ |
700 | if (!*end_rgn) | 697 | if (!*end_rgn) |
701 | *start_rgn = i; | 698 | *start_rgn = idx; |
702 | *end_rgn = i + 1; | 699 | *end_rgn = idx + 1; |
703 | } | 700 | } |
704 | } | 701 | } |
705 | 702 | ||
@@ -1528,12 +1525,12 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr | |||
1528 | return -1; | 1525 | return -1; |
1529 | } | 1526 | } |
1530 | 1527 | ||
1531 | int __init memblock_is_reserved(phys_addr_t addr) | 1528 | bool __init memblock_is_reserved(phys_addr_t addr) |
1532 | { | 1529 | { |
1533 | return memblock_search(&memblock.reserved, addr) != -1; | 1530 | return memblock_search(&memblock.reserved, addr) != -1; |
1534 | } | 1531 | } |
1535 | 1532 | ||
1536 | int __init_memblock memblock_is_memory(phys_addr_t addr) | 1533 | bool __init_memblock memblock_is_memory(phys_addr_t addr) |
1537 | { | 1534 | { |
1538 | return memblock_search(&memblock.memory, addr) != -1; | 1535 | return memblock_search(&memblock.memory, addr) != -1; |
1539 | } | 1536 | } |
@@ -1641,12 +1638,12 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name | |||
1641 | { | 1638 | { |
1642 | unsigned long long base, size; | 1639 | unsigned long long base, size; |
1643 | unsigned long flags; | 1640 | unsigned long flags; |
1644 | int i; | 1641 | int idx; |
1642 | struct memblock_region *rgn; | ||
1645 | 1643 | ||
1646 | pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); | 1644 | pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); |
1647 | 1645 | ||
1648 | for (i = 0; i < type->cnt; i++) { | 1646 | for_each_memblock_type(type, rgn) { |
1649 | struct memblock_region *rgn = &type->regions[i]; | ||
1650 | char nid_buf[32] = ""; | 1647 | char nid_buf[32] = ""; |
1651 | 1648 | ||
1652 | base = rgn->base; | 1649 | base = rgn->base; |
@@ -1658,7 +1655,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name | |||
1658 | memblock_get_region_node(rgn)); | 1655 | memblock_get_region_node(rgn)); |
1659 | #endif | 1656 | #endif |
1660 | pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n", | 1657 | pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n", |
1661 | name, i, base, base + size - 1, size, nid_buf, flags); | 1658 | name, idx, base, base + size - 1, size, nid_buf, flags); |
1662 | } | 1659 | } |
1663 | } | 1660 | } |
1664 | 1661 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 14cb1db4c52b..54eae4f19d80 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -76,9 +76,12 @@ | |||
76 | struct cgroup_subsys memory_cgrp_subsys __read_mostly; | 76 | struct cgroup_subsys memory_cgrp_subsys __read_mostly; |
77 | EXPORT_SYMBOL(memory_cgrp_subsys); | 77 | EXPORT_SYMBOL(memory_cgrp_subsys); |
78 | 78 | ||
79 | struct mem_cgroup *root_mem_cgroup __read_mostly; | ||
80 | |||
79 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 81 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
80 | static struct mem_cgroup *root_mem_cgroup __read_mostly; | 82 | |
81 | struct cgroup_subsys_state *mem_cgroup_root_css __read_mostly; | 83 | /* Socket memory accounting disabled? */ |
84 | static bool cgroup_memory_nosocket; | ||
82 | 85 | ||
83 | /* Whether the swap controller is active */ | 86 | /* Whether the swap controller is active */ |
84 | #ifdef CONFIG_MEMCG_SWAP | 87 | #ifdef CONFIG_MEMCG_SWAP |
@@ -87,6 +90,12 @@ int do_swap_account __read_mostly; | |||
87 | #define do_swap_account 0 | 90 | #define do_swap_account 0 |
88 | #endif | 91 | #endif |
89 | 92 | ||
93 | /* Whether legacy memory+swap accounting is active */ | ||
94 | static bool do_memsw_account(void) | ||
95 | { | ||
96 | return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; | ||
97 | } | ||
98 | |||
90 | static const char * const mem_cgroup_stat_names[] = { | 99 | static const char * const mem_cgroup_stat_names[] = { |
91 | "cache", | 100 | "cache", |
92 | "rss", | 101 | "rss", |
@@ -288,64 +297,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | |||
288 | return mem_cgroup_from_css(css); | 297 | return mem_cgroup_from_css(css); |
289 | } | 298 | } |
290 | 299 | ||
291 | /* Writing them here to avoid exposing memcg's inner layout */ | ||
292 | #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) | ||
293 | |||
294 | void sock_update_memcg(struct sock *sk) | ||
295 | { | ||
296 | if (mem_cgroup_sockets_enabled) { | ||
297 | struct mem_cgroup *memcg; | ||
298 | struct cg_proto *cg_proto; | ||
299 | |||
300 | BUG_ON(!sk->sk_prot->proto_cgroup); | ||
301 | |||
302 | /* Socket cloning can throw us here with sk_cgrp already | ||
303 | * filled. It won't however, necessarily happen from | ||
304 | * process context. So the test for root memcg given | ||
305 | * the current task's memcg won't help us in this case. | ||
306 | * | ||
307 | * Respecting the original socket's memcg is a better | ||
308 | * decision in this case. | ||
309 | */ | ||
310 | if (sk->sk_cgrp) { | ||
311 | BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg)); | ||
312 | css_get(&sk->sk_cgrp->memcg->css); | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | rcu_read_lock(); | ||
317 | memcg = mem_cgroup_from_task(current); | ||
318 | cg_proto = sk->sk_prot->proto_cgroup(memcg); | ||
319 | if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) && | ||
320 | css_tryget_online(&memcg->css)) { | ||
321 | sk->sk_cgrp = cg_proto; | ||
322 | } | ||
323 | rcu_read_unlock(); | ||
324 | } | ||
325 | } | ||
326 | EXPORT_SYMBOL(sock_update_memcg); | ||
327 | |||
328 | void sock_release_memcg(struct sock *sk) | ||
329 | { | ||
330 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
331 | struct mem_cgroup *memcg; | ||
332 | WARN_ON(!sk->sk_cgrp->memcg); | ||
333 | memcg = sk->sk_cgrp->memcg; | ||
334 | css_put(&sk->sk_cgrp->memcg->css); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) | ||
339 | { | ||
340 | if (!memcg || mem_cgroup_is_root(memcg)) | ||
341 | return NULL; | ||
342 | |||
343 | return &memcg->tcp_mem; | ||
344 | } | ||
345 | EXPORT_SYMBOL(tcp_proto_cgroup); | ||
346 | |||
347 | #endif | ||
348 | |||
349 | #ifdef CONFIG_MEMCG_KMEM | 300 | #ifdef CONFIG_MEMCG_KMEM |
350 | /* | 301 | /* |
351 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. | 302 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. |
@@ -395,7 +346,7 @@ void memcg_put_cache_ids(void) | |||
395 | * conditional to this static branch, we'll have to allow modules that does | 346 | * conditional to this static branch, we'll have to allow modules that does |
396 | * kmem_cache_alloc and the such to see this symbol as well | 347 | * kmem_cache_alloc and the such to see this symbol as well |
397 | */ | 348 | */ |
398 | struct static_key memcg_kmem_enabled_key; | 349 | DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); |
399 | EXPORT_SYMBOL(memcg_kmem_enabled_key); | 350 | EXPORT_SYMBOL(memcg_kmem_enabled_key); |
400 | 351 | ||
401 | #endif /* CONFIG_MEMCG_KMEM */ | 352 | #endif /* CONFIG_MEMCG_KMEM */ |
@@ -1162,9 +1113,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) | |||
1162 | return ret; | 1113 | return ret; |
1163 | } | 1114 | } |
1164 | 1115 | ||
1165 | #define mem_cgroup_from_counter(counter, member) \ | ||
1166 | container_of(counter, struct mem_cgroup, member) | ||
1167 | |||
1168 | /** | 1116 | /** |
1169 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup | 1117 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup |
1170 | * @memcg: the memory cgroup | 1118 | * @memcg: the memory cgroup |
@@ -1183,7 +1131,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) | |||
1183 | if (count < limit) | 1131 | if (count < limit) |
1184 | margin = limit - count; | 1132 | margin = limit - count; |
1185 | 1133 | ||
1186 | if (do_swap_account) { | 1134 | if (do_memsw_account()) { |
1187 | count = page_counter_read(&memcg->memsw); | 1135 | count = page_counter_read(&memcg->memsw); |
1188 | limit = READ_ONCE(memcg->memsw.limit); | 1136 | limit = READ_ONCE(memcg->memsw.limit); |
1189 | if (count <= limit) | 1137 | if (count <= limit) |
@@ -1286,7 +1234,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1286 | pr_cont(":"); | 1234 | pr_cont(":"); |
1287 | 1235 | ||
1288 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 1236 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
1289 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 1237 | if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account()) |
1290 | continue; | 1238 | continue; |
1291 | pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], | 1239 | pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], |
1292 | K(mem_cgroup_read_stat(iter, i))); | 1240 | K(mem_cgroup_read_stat(iter, i))); |
@@ -1909,7 +1857,7 @@ static void drain_stock(struct memcg_stock_pcp *stock) | |||
1909 | 1857 | ||
1910 | if (stock->nr_pages) { | 1858 | if (stock->nr_pages) { |
1911 | page_counter_uncharge(&old->memory, stock->nr_pages); | 1859 | page_counter_uncharge(&old->memory, stock->nr_pages); |
1912 | if (do_swap_account) | 1860 | if (do_memsw_account()) |
1913 | page_counter_uncharge(&old->memsw, stock->nr_pages); | 1861 | page_counter_uncharge(&old->memsw, stock->nr_pages); |
1914 | css_put_many(&old->css, stock->nr_pages); | 1862 | css_put_many(&old->css, stock->nr_pages); |
1915 | stock->nr_pages = 0; | 1863 | stock->nr_pages = 0; |
@@ -1997,6 +1945,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, | |||
1997 | return NOTIFY_OK; | 1945 | return NOTIFY_OK; |
1998 | } | 1946 | } |
1999 | 1947 | ||
1948 | static void reclaim_high(struct mem_cgroup *memcg, | ||
1949 | unsigned int nr_pages, | ||
1950 | gfp_t gfp_mask) | ||
1951 | { | ||
1952 | do { | ||
1953 | if (page_counter_read(&memcg->memory) <= memcg->high) | ||
1954 | continue; | ||
1955 | mem_cgroup_events(memcg, MEMCG_HIGH, 1); | ||
1956 | try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); | ||
1957 | } while ((memcg = parent_mem_cgroup(memcg))); | ||
1958 | } | ||
1959 | |||
1960 | static void high_work_func(struct work_struct *work) | ||
1961 | { | ||
1962 | struct mem_cgroup *memcg; | ||
1963 | |||
1964 | memcg = container_of(work, struct mem_cgroup, high_work); | ||
1965 | reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); | ||
1966 | } | ||
1967 | |||
2000 | /* | 1968 | /* |
2001 | * Scheduled by try_charge() to be executed from the userland return path | 1969 | * Scheduled by try_charge() to be executed from the userland return path |
2002 | * and reclaims memory over the high limit. | 1970 | * and reclaims memory over the high limit. |
@@ -2004,20 +1972,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, | |||
2004 | void mem_cgroup_handle_over_high(void) | 1972 | void mem_cgroup_handle_over_high(void) |
2005 | { | 1973 | { |
2006 | unsigned int nr_pages = current->memcg_nr_pages_over_high; | 1974 | unsigned int nr_pages = current->memcg_nr_pages_over_high; |
2007 | struct mem_cgroup *memcg, *pos; | 1975 | struct mem_cgroup *memcg; |
2008 | 1976 | ||
2009 | if (likely(!nr_pages)) | 1977 | if (likely(!nr_pages)) |
2010 | return; | 1978 | return; |
2011 | 1979 | ||
2012 | pos = memcg = get_mem_cgroup_from_mm(current->mm); | 1980 | memcg = get_mem_cgroup_from_mm(current->mm); |
2013 | 1981 | reclaim_high(memcg, nr_pages, GFP_KERNEL); | |
2014 | do { | ||
2015 | if (page_counter_read(&pos->memory) <= pos->high) | ||
2016 | continue; | ||
2017 | mem_cgroup_events(pos, MEMCG_HIGH, 1); | ||
2018 | try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true); | ||
2019 | } while ((pos = parent_mem_cgroup(pos))); | ||
2020 | |||
2021 | css_put(&memcg->css); | 1982 | css_put(&memcg->css); |
2022 | current->memcg_nr_pages_over_high = 0; | 1983 | current->memcg_nr_pages_over_high = 0; |
2023 | } | 1984 | } |
@@ -2039,11 +2000,11 @@ retry: | |||
2039 | if (consume_stock(memcg, nr_pages)) | 2000 | if (consume_stock(memcg, nr_pages)) |
2040 | return 0; | 2001 | return 0; |
2041 | 2002 | ||
2042 | if (!do_swap_account || | 2003 | if (!do_memsw_account() || |
2043 | page_counter_try_charge(&memcg->memsw, batch, &counter)) { | 2004 | page_counter_try_charge(&memcg->memsw, batch, &counter)) { |
2044 | if (page_counter_try_charge(&memcg->memory, batch, &counter)) | 2005 | if (page_counter_try_charge(&memcg->memory, batch, &counter)) |
2045 | goto done_restock; | 2006 | goto done_restock; |
2046 | if (do_swap_account) | 2007 | if (do_memsw_account()) |
2047 | page_counter_uncharge(&memcg->memsw, batch); | 2008 | page_counter_uncharge(&memcg->memsw, batch); |
2048 | mem_over_limit = mem_cgroup_from_counter(counter, memory); | 2009 | mem_over_limit = mem_cgroup_from_counter(counter, memory); |
2049 | } else { | 2010 | } else { |
@@ -2130,7 +2091,7 @@ force: | |||
2130 | * temporarily by force charging it. | 2091 | * temporarily by force charging it. |
2131 | */ | 2092 | */ |
2132 | page_counter_charge(&memcg->memory, nr_pages); | 2093 | page_counter_charge(&memcg->memory, nr_pages); |
2133 | if (do_swap_account) | 2094 | if (do_memsw_account()) |
2134 | page_counter_charge(&memcg->memsw, nr_pages); | 2095 | page_counter_charge(&memcg->memsw, nr_pages); |
2135 | css_get_many(&memcg->css, nr_pages); | 2096 | css_get_many(&memcg->css, nr_pages); |
2136 | 2097 | ||
@@ -2152,6 +2113,11 @@ done_restock: | |||
2152 | */ | 2113 | */ |
2153 | do { | 2114 | do { |
2154 | if (page_counter_read(&memcg->memory) > memcg->high) { | 2115 | if (page_counter_read(&memcg->memory) > memcg->high) { |
2116 | /* Don't bother a random interrupted task */ | ||
2117 | if (in_interrupt()) { | ||
2118 | schedule_work(&memcg->high_work); | ||
2119 | break; | ||
2120 | } | ||
2155 | current->memcg_nr_pages_over_high += batch; | 2121 | current->memcg_nr_pages_over_high += batch; |
2156 | set_notify_resume(current); | 2122 | set_notify_resume(current); |
2157 | break; | 2123 | break; |
@@ -2167,7 +2133,7 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
2167 | return; | 2133 | return; |
2168 | 2134 | ||
2169 | page_counter_uncharge(&memcg->memory, nr_pages); | 2135 | page_counter_uncharge(&memcg->memory, nr_pages); |
2170 | if (do_swap_account) | 2136 | if (do_memsw_account()) |
2171 | page_counter_uncharge(&memcg->memsw, nr_pages); | 2137 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2172 | 2138 | ||
2173 | css_put_many(&memcg->css, nr_pages); | 2139 | css_put_many(&memcg->css, nr_pages); |
@@ -2356,7 +2322,7 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, | |||
2356 | * Can't be called in interrupt context or from kernel threads. | 2322 | * Can't be called in interrupt context or from kernel threads. |
2357 | * This function needs to be called with rcu_read_lock() held. | 2323 | * This function needs to be called with rcu_read_lock() held. |
2358 | */ | 2324 | */ |
2359 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) | 2325 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) |
2360 | { | 2326 | { |
2361 | struct mem_cgroup *memcg; | 2327 | struct mem_cgroup *memcg; |
2362 | struct kmem_cache *memcg_cachep; | 2328 | struct kmem_cache *memcg_cachep; |
@@ -2364,6 +2330,12 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) | |||
2364 | 2330 | ||
2365 | VM_BUG_ON(!is_root_cache(cachep)); | 2331 | VM_BUG_ON(!is_root_cache(cachep)); |
2366 | 2332 | ||
2333 | if (cachep->flags & SLAB_ACCOUNT) | ||
2334 | gfp |= __GFP_ACCOUNT; | ||
2335 | |||
2336 | if (!(gfp & __GFP_ACCOUNT)) | ||
2337 | return cachep; | ||
2338 | |||
2367 | if (current->memcg_kmem_skip_account) | 2339 | if (current->memcg_kmem_skip_account) |
2368 | return cachep; | 2340 | return cachep; |
2369 | 2341 | ||
@@ -2447,7 +2419,7 @@ void __memcg_kmem_uncharge(struct page *page, int order) | |||
2447 | 2419 | ||
2448 | page_counter_uncharge(&memcg->kmem, nr_pages); | 2420 | page_counter_uncharge(&memcg->kmem, nr_pages); |
2449 | page_counter_uncharge(&memcg->memory, nr_pages); | 2421 | page_counter_uncharge(&memcg->memory, nr_pages); |
2450 | if (do_swap_account) | 2422 | if (do_memsw_account()) |
2451 | page_counter_uncharge(&memcg->memsw, nr_pages); | 2423 | page_counter_uncharge(&memcg->memsw, nr_pages); |
2452 | 2424 | ||
2453 | page->mem_cgroup = NULL; | 2425 | page->mem_cgroup = NULL; |
@@ -2935,7 +2907,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, | |||
2935 | err = page_counter_limit(&memcg->kmem, nr_pages); | 2907 | err = page_counter_limit(&memcg->kmem, nr_pages); |
2936 | VM_BUG_ON(err); | 2908 | VM_BUG_ON(err); |
2937 | 2909 | ||
2938 | static_key_slow_inc(&memcg_kmem_enabled_key); | 2910 | static_branch_inc(&memcg_kmem_enabled_key); |
2939 | /* | 2911 | /* |
2940 | * A memory cgroup is considered kmem-active as soon as it gets | 2912 | * A memory cgroup is considered kmem-active as soon as it gets |
2941 | * kmemcg_id. Setting the id after enabling static branching will | 2913 | * kmemcg_id. Setting the id after enabling static branching will |
@@ -3162,7 +3134,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3162 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 3134 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
3163 | 3135 | ||
3164 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 3136 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
3165 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 3137 | if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account()) |
3166 | continue; | 3138 | continue; |
3167 | seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], | 3139 | seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], |
3168 | mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); | 3140 | mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); |
@@ -3184,14 +3156,14 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3184 | } | 3156 | } |
3185 | seq_printf(m, "hierarchical_memory_limit %llu\n", | 3157 | seq_printf(m, "hierarchical_memory_limit %llu\n", |
3186 | (u64)memory * PAGE_SIZE); | 3158 | (u64)memory * PAGE_SIZE); |
3187 | if (do_swap_account) | 3159 | if (do_memsw_account()) |
3188 | seq_printf(m, "hierarchical_memsw_limit %llu\n", | 3160 | seq_printf(m, "hierarchical_memsw_limit %llu\n", |
3189 | (u64)memsw * PAGE_SIZE); | 3161 | (u64)memsw * PAGE_SIZE); |
3190 | 3162 | ||
3191 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 3163 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
3192 | unsigned long long val = 0; | 3164 | unsigned long long val = 0; |
3193 | 3165 | ||
3194 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 3166 | if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account()) |
3195 | continue; | 3167 | continue; |
3196 | for_each_mem_cgroup_tree(mi, memcg) | 3168 | for_each_mem_cgroup_tree(mi, memcg) |
3197 | val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; | 3169 | val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; |
@@ -3322,7 +3294,7 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg) | |||
3322 | { | 3294 | { |
3323 | while (memcg) { | 3295 | while (memcg) { |
3324 | __mem_cgroup_threshold(memcg, false); | 3296 | __mem_cgroup_threshold(memcg, false); |
3325 | if (do_swap_account) | 3297 | if (do_memsw_account()) |
3326 | __mem_cgroup_threshold(memcg, true); | 3298 | __mem_cgroup_threshold(memcg, true); |
3327 | 3299 | ||
3328 | memcg = parent_mem_cgroup(memcg); | 3300 | memcg = parent_mem_cgroup(memcg); |
@@ -3621,7 +3593,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
3621 | if (ret) | 3593 | if (ret) |
3622 | return ret; | 3594 | return ret; |
3623 | 3595 | ||
3624 | return mem_cgroup_sockets_init(memcg, ss); | 3596 | return tcp_init_cgroup(memcg, ss); |
3625 | } | 3597 | } |
3626 | 3598 | ||
3627 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) | 3599 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) |
@@ -3674,10 +3646,10 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg) | |||
3674 | { | 3646 | { |
3675 | if (memcg->kmem_acct_activated) { | 3647 | if (memcg->kmem_acct_activated) { |
3676 | memcg_destroy_kmem_caches(memcg); | 3648 | memcg_destroy_kmem_caches(memcg); |
3677 | static_key_slow_dec(&memcg_kmem_enabled_key); | 3649 | static_branch_dec(&memcg_kmem_enabled_key); |
3678 | WARN_ON(page_counter_read(&memcg->kmem)); | 3650 | WARN_ON(page_counter_read(&memcg->kmem)); |
3679 | } | 3651 | } |
3680 | mem_cgroup_sockets_destroy(memcg); | 3652 | tcp_destroy_cgroup(memcg); |
3681 | } | 3653 | } |
3682 | #else | 3654 | #else |
3683 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 3655 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
@@ -4196,6 +4168,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4196 | { | 4168 | { |
4197 | int node; | 4169 | int node; |
4198 | 4170 | ||
4171 | cancel_work_sync(&memcg->high_work); | ||
4172 | |||
4199 | mem_cgroup_remove_from_trees(memcg); | 4173 | mem_cgroup_remove_from_trees(memcg); |
4200 | 4174 | ||
4201 | for_each_node(node) | 4175 | for_each_node(node) |
@@ -4206,17 +4180,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4206 | kfree(memcg); | 4180 | kfree(memcg); |
4207 | } | 4181 | } |
4208 | 4182 | ||
4209 | /* | ||
4210 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. | ||
4211 | */ | ||
4212 | struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) | ||
4213 | { | ||
4214 | if (!memcg->memory.parent) | ||
4215 | return NULL; | ||
4216 | return mem_cgroup_from_counter(memcg->memory.parent, memory); | ||
4217 | } | ||
4218 | EXPORT_SYMBOL(parent_mem_cgroup); | ||
4219 | |||
4220 | static struct cgroup_subsys_state * __ref | 4183 | static struct cgroup_subsys_state * __ref |
4221 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | 4184 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
4222 | { | 4185 | { |
@@ -4235,7 +4198,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4235 | /* root ? */ | 4198 | /* root ? */ |
4236 | if (parent_css == NULL) { | 4199 | if (parent_css == NULL) { |
4237 | root_mem_cgroup = memcg; | 4200 | root_mem_cgroup = memcg; |
4238 | mem_cgroup_root_css = &memcg->css; | ||
4239 | page_counter_init(&memcg->memory, NULL); | 4201 | page_counter_init(&memcg->memory, NULL); |
4240 | memcg->high = PAGE_COUNTER_MAX; | 4202 | memcg->high = PAGE_COUNTER_MAX; |
4241 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4203 | memcg->soft_limit = PAGE_COUNTER_MAX; |
@@ -4243,6 +4205,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4243 | page_counter_init(&memcg->kmem, NULL); | 4205 | page_counter_init(&memcg->kmem, NULL); |
4244 | } | 4206 | } |
4245 | 4207 | ||
4208 | INIT_WORK(&memcg->high_work, high_work_func); | ||
4246 | memcg->last_scanned_node = MAX_NUMNODES; | 4209 | memcg->last_scanned_node = MAX_NUMNODES; |
4247 | INIT_LIST_HEAD(&memcg->oom_notify); | 4210 | INIT_LIST_HEAD(&memcg->oom_notify); |
4248 | memcg->move_charge_at_immigrate = 0; | 4211 | memcg->move_charge_at_immigrate = 0; |
@@ -4257,6 +4220,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4257 | #ifdef CONFIG_CGROUP_WRITEBACK | 4220 | #ifdef CONFIG_CGROUP_WRITEBACK |
4258 | INIT_LIST_HEAD(&memcg->cgwb_list); | 4221 | INIT_LIST_HEAD(&memcg->cgwb_list); |
4259 | #endif | 4222 | #endif |
4223 | #ifdef CONFIG_INET | ||
4224 | memcg->socket_pressure = jiffies; | ||
4225 | #endif | ||
4260 | return &memcg->css; | 4226 | return &memcg->css; |
4261 | 4227 | ||
4262 | free_out: | 4228 | free_out: |
@@ -4314,6 +4280,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
4314 | if (ret) | 4280 | if (ret) |
4315 | return ret; | 4281 | return ret; |
4316 | 4282 | ||
4283 | #ifdef CONFIG_INET | ||
4284 | if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) | ||
4285 | static_branch_inc(&memcg_sockets_enabled_key); | ||
4286 | #endif | ||
4287 | |||
4317 | /* | 4288 | /* |
4318 | * Make sure the memcg is initialized: mem_cgroup_iter() | 4289 | * Make sure the memcg is initialized: mem_cgroup_iter() |
4319 | * orders reading memcg->initialized against its callers | 4290 | * orders reading memcg->initialized against its callers |
@@ -4360,6 +4331,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | |||
4360 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 4331 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
4361 | 4332 | ||
4362 | memcg_destroy_kmem(memcg); | 4333 | memcg_destroy_kmem(memcg); |
4334 | #ifdef CONFIG_INET | ||
4335 | if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) | ||
4336 | static_branch_dec(&memcg_sockets_enabled_key); | ||
4337 | #endif | ||
4363 | __mem_cgroup_free(memcg); | 4338 | __mem_cgroup_free(memcg); |
4364 | } | 4339 | } |
4365 | 4340 | ||
@@ -4476,7 +4451,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | |||
4476 | * we call find_get_page() with swapper_space directly. | 4451 | * we call find_get_page() with swapper_space directly. |
4477 | */ | 4452 | */ |
4478 | page = find_get_page(swap_address_space(ent), ent.val); | 4453 | page = find_get_page(swap_address_space(ent), ent.val); |
4479 | if (do_swap_account) | 4454 | if (do_memsw_account()) |
4480 | entry->val = ent.val; | 4455 | entry->val = ent.val; |
4481 | 4456 | ||
4482 | return page; | 4457 | return page; |
@@ -4511,7 +4486,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
4511 | page = find_get_entry(mapping, pgoff); | 4486 | page = find_get_entry(mapping, pgoff); |
4512 | if (radix_tree_exceptional_entry(page)) { | 4487 | if (radix_tree_exceptional_entry(page)) { |
4513 | swp_entry_t swp = radix_to_swp_entry(page); | 4488 | swp_entry_t swp = radix_to_swp_entry(page); |
4514 | if (do_swap_account) | 4489 | if (do_memsw_account()) |
4515 | *entry = swp; | 4490 | *entry = swp; |
4516 | page = find_get_page(swap_address_space(swp), swp.val); | 4491 | page = find_get_page(swap_address_space(swp), swp.val); |
4517 | } | 4492 | } |
@@ -5304,7 +5279,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | |||
5304 | if (page->mem_cgroup) | 5279 | if (page->mem_cgroup) |
5305 | goto out; | 5280 | goto out; |
5306 | 5281 | ||
5307 | if (do_swap_account) { | 5282 | if (do_memsw_account()) { |
5308 | swp_entry_t ent = { .val = page_private(page), }; | 5283 | swp_entry_t ent = { .val = page_private(page), }; |
5309 | unsigned short id = lookup_swap_cgroup_id(ent); | 5284 | unsigned short id = lookup_swap_cgroup_id(ent); |
5310 | 5285 | ||
@@ -5378,7 +5353,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | |||
5378 | memcg_check_events(memcg, page); | 5353 | memcg_check_events(memcg, page); |
5379 | local_irq_enable(); | 5354 | local_irq_enable(); |
5380 | 5355 | ||
5381 | if (do_swap_account && PageSwapCache(page)) { | 5356 | if (do_memsw_account() && PageSwapCache(page)) { |
5382 | swp_entry_t entry = { .val = page_private(page) }; | 5357 | swp_entry_t entry = { .val = page_private(page) }; |
5383 | /* | 5358 | /* |
5384 | * The swap entry might not get freed for a long time, | 5359 | * The swap entry might not get freed for a long time, |
@@ -5427,7 +5402,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |||
5427 | 5402 | ||
5428 | if (!mem_cgroup_is_root(memcg)) { | 5403 | if (!mem_cgroup_is_root(memcg)) { |
5429 | page_counter_uncharge(&memcg->memory, nr_pages); | 5404 | page_counter_uncharge(&memcg->memory, nr_pages); |
5430 | if (do_swap_account) | 5405 | if (do_memsw_account()) |
5431 | page_counter_uncharge(&memcg->memsw, nr_pages); | 5406 | page_counter_uncharge(&memcg->memsw, nr_pages); |
5432 | memcg_oom_recover(memcg); | 5407 | memcg_oom_recover(memcg); |
5433 | } | 5408 | } |
@@ -5580,6 +5555,121 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) | |||
5580 | commit_charge(newpage, memcg, true); | 5555 | commit_charge(newpage, memcg, true); |
5581 | } | 5556 | } |
5582 | 5557 | ||
5558 | #ifdef CONFIG_INET | ||
5559 | |||
5560 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | ||
5561 | EXPORT_SYMBOL(memcg_sockets_enabled_key); | ||
5562 | |||
5563 | void sock_update_memcg(struct sock *sk) | ||
5564 | { | ||
5565 | struct mem_cgroup *memcg; | ||
5566 | |||
5567 | /* Socket cloning can throw us here with sk_cgrp already | ||
5568 | * filled. It won't however, necessarily happen from | ||
5569 | * process context. So the test for root memcg given | ||
5570 | * the current task's memcg won't help us in this case. | ||
5571 | * | ||
5572 | * Respecting the original socket's memcg is a better | ||
5573 | * decision in this case. | ||
5574 | */ | ||
5575 | if (sk->sk_memcg) { | ||
5576 | BUG_ON(mem_cgroup_is_root(sk->sk_memcg)); | ||
5577 | css_get(&sk->sk_memcg->css); | ||
5578 | return; | ||
5579 | } | ||
5580 | |||
5581 | rcu_read_lock(); | ||
5582 | memcg = mem_cgroup_from_task(current); | ||
5583 | if (memcg == root_mem_cgroup) | ||
5584 | goto out; | ||
5585 | #ifdef CONFIG_MEMCG_KMEM | ||
5586 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active) | ||
5587 | goto out; | ||
5588 | #endif | ||
5589 | if (css_tryget_online(&memcg->css)) | ||
5590 | sk->sk_memcg = memcg; | ||
5591 | out: | ||
5592 | rcu_read_unlock(); | ||
5593 | } | ||
5594 | EXPORT_SYMBOL(sock_update_memcg); | ||
5595 | |||
5596 | void sock_release_memcg(struct sock *sk) | ||
5597 | { | ||
5598 | WARN_ON(!sk->sk_memcg); | ||
5599 | css_put(&sk->sk_memcg->css); | ||
5600 | } | ||
5601 | |||
5602 | /** | ||
5603 | * mem_cgroup_charge_skmem - charge socket memory | ||
5604 | * @memcg: memcg to charge | ||
5605 | * @nr_pages: number of pages to charge | ||
5606 | * | ||
5607 | * Charges @nr_pages to @memcg. Returns %true if the charge fit within | ||
5608 | * @memcg's configured limit, %false if the charge had to be forced. | ||
5609 | */ | ||
5610 | bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) | ||
5611 | { | ||
5612 | gfp_t gfp_mask = GFP_KERNEL; | ||
5613 | |||
5614 | #ifdef CONFIG_MEMCG_KMEM | ||
5615 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { | ||
5616 | struct page_counter *counter; | ||
5617 | |||
5618 | if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, | ||
5619 | nr_pages, &counter)) { | ||
5620 | memcg->tcp_mem.memory_pressure = 0; | ||
5621 | return true; | ||
5622 | } | ||
5623 | page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); | ||
5624 | memcg->tcp_mem.memory_pressure = 1; | ||
5625 | return false; | ||
5626 | } | ||
5627 | #endif | ||
5628 | /* Don't block in the packet receive path */ | ||
5629 | if (in_softirq()) | ||
5630 | gfp_mask = GFP_NOWAIT; | ||
5631 | |||
5632 | if (try_charge(memcg, gfp_mask, nr_pages) == 0) | ||
5633 | return true; | ||
5634 | |||
5635 | try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages); | ||
5636 | return false; | ||
5637 | } | ||
5638 | |||
5639 | /** | ||
5640 | * mem_cgroup_uncharge_skmem - uncharge socket memory | ||
5641 | * @memcg - memcg to uncharge | ||
5642 | * @nr_pages - number of pages to uncharge | ||
5643 | */ | ||
5644 | void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) | ||
5645 | { | ||
5646 | #ifdef CONFIG_MEMCG_KMEM | ||
5647 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) { | ||
5648 | page_counter_uncharge(&memcg->tcp_mem.memory_allocated, | ||
5649 | nr_pages); | ||
5650 | return; | ||
5651 | } | ||
5652 | #endif | ||
5653 | page_counter_uncharge(&memcg->memory, nr_pages); | ||
5654 | css_put_many(&memcg->css, nr_pages); | ||
5655 | } | ||
5656 | |||
5657 | #endif /* CONFIG_INET */ | ||
5658 | |||
5659 | static int __init cgroup_memory(char *s) | ||
5660 | { | ||
5661 | char *token; | ||
5662 | |||
5663 | while ((token = strsep(&s, ",")) != NULL) { | ||
5664 | if (!*token) | ||
5665 | continue; | ||
5666 | if (!strcmp(token, "nosocket")) | ||
5667 | cgroup_memory_nosocket = true; | ||
5668 | } | ||
5669 | return 0; | ||
5670 | } | ||
5671 | __setup("cgroup.memory=", cgroup_memory); | ||
5672 | |||
5583 | /* | 5673 | /* |
5584 | * subsys_initcall() for memory controller. | 5674 | * subsys_initcall() for memory controller. |
5585 | * | 5675 | * |
@@ -5635,7 +5725,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |||
5635 | VM_BUG_ON_PAGE(PageLRU(page), page); | 5725 | VM_BUG_ON_PAGE(PageLRU(page), page); |
5636 | VM_BUG_ON_PAGE(page_count(page), page); | 5726 | VM_BUG_ON_PAGE(page_count(page), page); |
5637 | 5727 | ||
5638 | if (!do_swap_account) | 5728 | if (!do_memsw_account()) |
5639 | return; | 5729 | return; |
5640 | 5730 | ||
5641 | memcg = page->mem_cgroup; | 5731 | memcg = page->mem_cgroup; |
@@ -5675,7 +5765,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) | |||
5675 | struct mem_cgroup *memcg; | 5765 | struct mem_cgroup *memcg; |
5676 | unsigned short id; | 5766 | unsigned short id; |
5677 | 5767 | ||
5678 | if (!do_swap_account) | 5768 | if (!do_memsw_account()) |
5679 | return; | 5769 | return; |
5680 | 5770 | ||
5681 | id = swap_cgroup_record(entry, 0); | 5771 | id = swap_cgroup_record(entry, 0); |
diff --git a/mm/memory.c b/mm/memory.c index c387430f06c3..d4e4d37c1989 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -832,10 +832,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
832 | } else if (is_migration_entry(entry)) { | 832 | } else if (is_migration_entry(entry)) { |
833 | page = migration_entry_to_page(entry); | 833 | page = migration_entry_to_page(entry); |
834 | 834 | ||
835 | if (PageAnon(page)) | 835 | rss[mm_counter(page)]++; |
836 | rss[MM_ANONPAGES]++; | ||
837 | else | ||
838 | rss[MM_FILEPAGES]++; | ||
839 | 836 | ||
840 | if (is_write_migration_entry(entry) && | 837 | if (is_write_migration_entry(entry) && |
841 | is_cow_mapping(vm_flags)) { | 838 | is_cow_mapping(vm_flags)) { |
@@ -874,10 +871,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
874 | if (page) { | 871 | if (page) { |
875 | get_page(page); | 872 | get_page(page); |
876 | page_dup_rmap(page); | 873 | page_dup_rmap(page); |
877 | if (PageAnon(page)) | 874 | rss[mm_counter(page)]++; |
878 | rss[MM_ANONPAGES]++; | ||
879 | else | ||
880 | rss[MM_FILEPAGES]++; | ||
881 | } | 875 | } |
882 | 876 | ||
883 | out_set_pte: | 877 | out_set_pte: |
@@ -1113,9 +1107,8 @@ again: | |||
1113 | tlb_remove_tlb_entry(tlb, pte, addr); | 1107 | tlb_remove_tlb_entry(tlb, pte, addr); |
1114 | if (unlikely(!page)) | 1108 | if (unlikely(!page)) |
1115 | continue; | 1109 | continue; |
1116 | if (PageAnon(page)) | 1110 | |
1117 | rss[MM_ANONPAGES]--; | 1111 | if (!PageAnon(page)) { |
1118 | else { | ||
1119 | if (pte_dirty(ptent)) { | 1112 | if (pte_dirty(ptent)) { |
1120 | force_flush = 1; | 1113 | force_flush = 1; |
1121 | set_page_dirty(page); | 1114 | set_page_dirty(page); |
@@ -1123,8 +1116,8 @@ again: | |||
1123 | if (pte_young(ptent) && | 1116 | if (pte_young(ptent) && |
1124 | likely(!(vma->vm_flags & VM_SEQ_READ))) | 1117 | likely(!(vma->vm_flags & VM_SEQ_READ))) |
1125 | mark_page_accessed(page); | 1118 | mark_page_accessed(page); |
1126 | rss[MM_FILEPAGES]--; | ||
1127 | } | 1119 | } |
1120 | rss[mm_counter(page)]--; | ||
1128 | page_remove_rmap(page); | 1121 | page_remove_rmap(page); |
1129 | if (unlikely(page_mapcount(page) < 0)) | 1122 | if (unlikely(page_mapcount(page) < 0)) |
1130 | print_bad_pte(vma, addr, ptent, page); | 1123 | print_bad_pte(vma, addr, ptent, page); |
@@ -1146,11 +1139,7 @@ again: | |||
1146 | struct page *page; | 1139 | struct page *page; |
1147 | 1140 | ||
1148 | page = migration_entry_to_page(entry); | 1141 | page = migration_entry_to_page(entry); |
1149 | 1142 | rss[mm_counter(page)]--; | |
1150 | if (PageAnon(page)) | ||
1151 | rss[MM_ANONPAGES]--; | ||
1152 | else | ||
1153 | rss[MM_FILEPAGES]--; | ||
1154 | } | 1143 | } |
1155 | if (unlikely(!free_swap_and_cache(entry))) | 1144 | if (unlikely(!free_swap_and_cache(entry))) |
1156 | print_bad_pte(vma, addr, ptent, NULL); | 1145 | print_bad_pte(vma, addr, ptent, NULL); |
@@ -1460,7 +1449,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
1460 | 1449 | ||
1461 | /* Ok, finally just insert the thing.. */ | 1450 | /* Ok, finally just insert the thing.. */ |
1462 | get_page(page); | 1451 | get_page(page); |
1463 | inc_mm_counter_fast(mm, MM_FILEPAGES); | 1452 | inc_mm_counter_fast(mm, mm_counter_file(page)); |
1464 | page_add_file_rmap(page); | 1453 | page_add_file_rmap(page); |
1465 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); | 1454 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); |
1466 | 1455 | ||
@@ -1949,6 +1938,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo | |||
1949 | copy_user_highpage(dst, src, va, vma); | 1938 | copy_user_highpage(dst, src, va, vma); |
1950 | } | 1939 | } |
1951 | 1940 | ||
1941 | static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) | ||
1942 | { | ||
1943 | struct file *vm_file = vma->vm_file; | ||
1944 | |||
1945 | if (vm_file) | ||
1946 | return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; | ||
1947 | |||
1948 | /* | ||
1949 | * Special mappings (e.g. VDSO) do not have any file so fake | ||
1950 | * a default GFP_KERNEL for them. | ||
1951 | */ | ||
1952 | return GFP_KERNEL; | ||
1953 | } | ||
1954 | |||
1952 | /* | 1955 | /* |
1953 | * Notify the address space that the page is about to become writable so that | 1956 | * Notify the address space that the page is about to become writable so that |
1954 | * it can prohibit this or wait for the page to get into an appropriate state. | 1957 | * it can prohibit this or wait for the page to get into an appropriate state. |
@@ -1964,6 +1967,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
1964 | vmf.virtual_address = (void __user *)(address & PAGE_MASK); | 1967 | vmf.virtual_address = (void __user *)(address & PAGE_MASK); |
1965 | vmf.pgoff = page->index; | 1968 | vmf.pgoff = page->index; |
1966 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; | 1969 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; |
1970 | vmf.gfp_mask = __get_fault_gfp_mask(vma); | ||
1967 | vmf.page = page; | 1971 | vmf.page = page; |
1968 | vmf.cow_page = NULL; | 1972 | vmf.cow_page = NULL; |
1969 | 1973 | ||
@@ -2097,7 +2101,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2097 | if (likely(pte_same(*page_table, orig_pte))) { | 2101 | if (likely(pte_same(*page_table, orig_pte))) { |
2098 | if (old_page) { | 2102 | if (old_page) { |
2099 | if (!PageAnon(old_page)) { | 2103 | if (!PageAnon(old_page)) { |
2100 | dec_mm_counter_fast(mm, MM_FILEPAGES); | 2104 | dec_mm_counter_fast(mm, |
2105 | mm_counter_file(old_page)); | ||
2101 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2106 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2102 | } | 2107 | } |
2103 | } else { | 2108 | } else { |
@@ -2767,6 +2772,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, | |||
2767 | vmf.pgoff = pgoff; | 2772 | vmf.pgoff = pgoff; |
2768 | vmf.flags = flags; | 2773 | vmf.flags = flags; |
2769 | vmf.page = NULL; | 2774 | vmf.page = NULL; |
2775 | vmf.gfp_mask = __get_fault_gfp_mask(vma); | ||
2770 | vmf.cow_page = cow_page; | 2776 | vmf.cow_page = cow_page; |
2771 | 2777 | ||
2772 | ret = vma->vm_ops->fault(vma, &vmf); | 2778 | ret = vma->vm_ops->fault(vma, &vmf); |
@@ -2820,7 +2826,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, | |||
2820 | inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); | 2826 | inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); |
2821 | page_add_new_anon_rmap(page, vma, address); | 2827 | page_add_new_anon_rmap(page, vma, address); |
2822 | } else { | 2828 | } else { |
2823 | inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES); | 2829 | inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); |
2824 | page_add_file_rmap(page); | 2830 | page_add_file_rmap(page); |
2825 | } | 2831 | } |
2826 | set_pte_at(vma->vm_mm, address, pte, entry); | 2832 | set_pte_at(vma->vm_mm, address, pte, entry); |
@@ -2933,6 +2939,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, | |||
2933 | vmf.pgoff = pgoff; | 2939 | vmf.pgoff = pgoff; |
2934 | vmf.max_pgoff = max_pgoff; | 2940 | vmf.max_pgoff = max_pgoff; |
2935 | vmf.flags = flags; | 2941 | vmf.flags = flags; |
2942 | vmf.gfp_mask = __get_fault_gfp_mask(vma); | ||
2936 | vma->vm_ops->map_pages(vma, &vmf); | 2943 | vma->vm_ops->map_pages(vma, &vmf); |
2937 | } | 2944 | } |
2938 | 2945 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a042a9d537bb..92f95952692b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -131,7 +131,8 @@ static struct resource *register_memory_resource(u64 start, u64 size) | |||
131 | { | 131 | { |
132 | struct resource *res; | 132 | struct resource *res; |
133 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); | 133 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); |
134 | BUG_ON(!res); | 134 | if (!res) |
135 | return ERR_PTR(-ENOMEM); | ||
135 | 136 | ||
136 | res->name = "System RAM"; | 137 | res->name = "System RAM"; |
137 | res->start = start; | 138 | res->start = start; |
@@ -140,7 +141,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) | |||
140 | if (request_resource(&iomem_resource, res) < 0) { | 141 | if (request_resource(&iomem_resource, res) < 0) { |
141 | pr_debug("System RAM resource %pR cannot be added\n", res); | 142 | pr_debug("System RAM resource %pR cannot be added\n", res); |
142 | kfree(res); | 143 | kfree(res); |
143 | res = NULL; | 144 | return ERR_PTR(-EEXIST); |
144 | } | 145 | } |
145 | return res; | 146 | return res; |
146 | } | 147 | } |
@@ -1312,8 +1313,8 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
1312 | int ret; | 1313 | int ret; |
1313 | 1314 | ||
1314 | res = register_memory_resource(start, size); | 1315 | res = register_memory_resource(start, size); |
1315 | if (!res) | 1316 | if (IS_ERR(res)) |
1316 | return -EEXIST; | 1317 | return PTR_ERR(res); |
1317 | 1318 | ||
1318 | ret = add_memory_resource(nid, res); | 1319 | ret = add_memory_resource(nid, res); |
1319 | if (ret < 0) | 1320 | if (ret < 0) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 87a177917cb2..d8caff071a30 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2142,12 +2142,14 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
2142 | * | 2142 | * |
2143 | * Remember policies even when nobody has shared memory mapped. | 2143 | * Remember policies even when nobody has shared memory mapped. |
2144 | * The policies are kept in Red-Black tree linked from the inode. | 2144 | * The policies are kept in Red-Black tree linked from the inode. |
2145 | * They are protected by the sp->lock spinlock, which should be held | 2145 | * They are protected by the sp->lock rwlock, which should be held |
2146 | * for any accesses to the tree. | 2146 | * for any accesses to the tree. |
2147 | */ | 2147 | */ |
2148 | 2148 | ||
2149 | /* lookup first element intersecting start-end */ | 2149 | /* |
2150 | /* Caller holds sp->lock */ | 2150 | * lookup first element intersecting start-end. Caller holds sp->lock for |
2151 | * reading or for writing | ||
2152 | */ | ||
2151 | static struct sp_node * | 2153 | static struct sp_node * |
2152 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) | 2154 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) |
2153 | { | 2155 | { |
@@ -2178,8 +2180,10 @@ sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) | |||
2178 | return rb_entry(n, struct sp_node, nd); | 2180 | return rb_entry(n, struct sp_node, nd); |
2179 | } | 2181 | } |
2180 | 2182 | ||
2181 | /* Insert a new shared policy into the list. */ | 2183 | /* |
2182 | /* Caller holds sp->lock */ | 2184 | * Insert a new shared policy into the list. Caller holds sp->lock for |
2185 | * writing. | ||
2186 | */ | ||
2183 | static void sp_insert(struct shared_policy *sp, struct sp_node *new) | 2187 | static void sp_insert(struct shared_policy *sp, struct sp_node *new) |
2184 | { | 2188 | { |
2185 | struct rb_node **p = &sp->root.rb_node; | 2189 | struct rb_node **p = &sp->root.rb_node; |
@@ -2211,13 +2215,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) | |||
2211 | 2215 | ||
2212 | if (!sp->root.rb_node) | 2216 | if (!sp->root.rb_node) |
2213 | return NULL; | 2217 | return NULL; |
2214 | spin_lock(&sp->lock); | 2218 | read_lock(&sp->lock); |
2215 | sn = sp_lookup(sp, idx, idx+1); | 2219 | sn = sp_lookup(sp, idx, idx+1); |
2216 | if (sn) { | 2220 | if (sn) { |
2217 | mpol_get(sn->policy); | 2221 | mpol_get(sn->policy); |
2218 | pol = sn->policy; | 2222 | pol = sn->policy; |
2219 | } | 2223 | } |
2220 | spin_unlock(&sp->lock); | 2224 | read_unlock(&sp->lock); |
2221 | return pol; | 2225 | return pol; |
2222 | } | 2226 | } |
2223 | 2227 | ||
@@ -2360,7 +2364,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
2360 | int ret = 0; | 2364 | int ret = 0; |
2361 | 2365 | ||
2362 | restart: | 2366 | restart: |
2363 | spin_lock(&sp->lock); | 2367 | write_lock(&sp->lock); |
2364 | n = sp_lookup(sp, start, end); | 2368 | n = sp_lookup(sp, start, end); |
2365 | /* Take care of old policies in the same range. */ | 2369 | /* Take care of old policies in the same range. */ |
2366 | while (n && n->start < end) { | 2370 | while (n && n->start < end) { |
@@ -2393,7 +2397,7 @@ restart: | |||
2393 | } | 2397 | } |
2394 | if (new) | 2398 | if (new) |
2395 | sp_insert(sp, new); | 2399 | sp_insert(sp, new); |
2396 | spin_unlock(&sp->lock); | 2400 | write_unlock(&sp->lock); |
2397 | ret = 0; | 2401 | ret = 0; |
2398 | 2402 | ||
2399 | err_out: | 2403 | err_out: |
@@ -2405,7 +2409,7 @@ err_out: | |||
2405 | return ret; | 2409 | return ret; |
2406 | 2410 | ||
2407 | alloc_new: | 2411 | alloc_new: |
2408 | spin_unlock(&sp->lock); | 2412 | write_unlock(&sp->lock); |
2409 | ret = -ENOMEM; | 2413 | ret = -ENOMEM; |
2410 | n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); | 2414 | n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); |
2411 | if (!n_new) | 2415 | if (!n_new) |
@@ -2431,7 +2435,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
2431 | int ret; | 2435 | int ret; |
2432 | 2436 | ||
2433 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ | 2437 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
2434 | spin_lock_init(&sp->lock); | 2438 | rwlock_init(&sp->lock); |
2435 | 2439 | ||
2436 | if (mpol) { | 2440 | if (mpol) { |
2437 | struct vm_area_struct pvma; | 2441 | struct vm_area_struct pvma; |
@@ -2497,14 +2501,14 @@ void mpol_free_shared_policy(struct shared_policy *p) | |||
2497 | 2501 | ||
2498 | if (!p->root.rb_node) | 2502 | if (!p->root.rb_node) |
2499 | return; | 2503 | return; |
2500 | spin_lock(&p->lock); | 2504 | write_lock(&p->lock); |
2501 | next = rb_first(&p->root); | 2505 | next = rb_first(&p->root); |
2502 | while (next) { | 2506 | while (next) { |
2503 | n = rb_entry(next, struct sp_node, nd); | 2507 | n = rb_entry(next, struct sp_node, nd); |
2504 | next = rb_next(&n->nd); | 2508 | next = rb_next(&n->nd); |
2505 | sp_delete(p, n); | 2509 | sp_delete(p, n); |
2506 | } | 2510 | } |
2507 | spin_unlock(&p->lock); | 2511 | write_unlock(&p->lock); |
2508 | } | 2512 | } |
2509 | 2513 | ||
2510 | #ifdef CONFIG_NUMA_BALANCING | 2514 | #ifdef CONFIG_NUMA_BALANCING |
diff --git a/mm/mlock.c b/mm/mlock.c index 339d9e0949b6..9cb87cbc4071 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -425,7 +425,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
425 | vma->vm_flags &= VM_LOCKED_CLEAR_MASK; | 425 | vma->vm_flags &= VM_LOCKED_CLEAR_MASK; |
426 | 426 | ||
427 | while (start < end) { | 427 | while (start < end) { |
428 | struct page *page = NULL; | 428 | struct page *page; |
429 | unsigned int page_mask; | 429 | unsigned int page_mask; |
430 | unsigned long page_increm; | 430 | unsigned long page_increm; |
431 | struct pagevec pvec; | 431 | struct pagevec pvec; |
@@ -58,6 +58,18 @@ | |||
58 | #define arch_rebalance_pgtables(addr, len) (addr) | 58 | #define arch_rebalance_pgtables(addr, len) (addr) |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS | ||
62 | const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; | ||
63 | const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; | ||
64 | int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS; | ||
65 | #endif | ||
66 | #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS | ||
67 | const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN; | ||
68 | const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; | ||
69 | int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; | ||
70 | #endif | ||
71 | |||
72 | |||
61 | static void unmap_region(struct mm_struct *mm, | 73 | static void unmap_region(struct mm_struct *mm, |
62 | struct vm_area_struct *vma, struct vm_area_struct *prev, | 74 | struct vm_area_struct *vma, struct vm_area_struct *prev, |
63 | unsigned long start, unsigned long end); | 75 | unsigned long start, unsigned long end); |
@@ -1208,24 +1220,6 @@ none: | |||
1208 | return NULL; | 1220 | return NULL; |
1209 | } | 1221 | } |
1210 | 1222 | ||
1211 | #ifdef CONFIG_PROC_FS | ||
1212 | void vm_stat_account(struct mm_struct *mm, unsigned long flags, | ||
1213 | struct file *file, long pages) | ||
1214 | { | ||
1215 | const unsigned long stack_flags | ||
1216 | = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN); | ||
1217 | |||
1218 | mm->total_vm += pages; | ||
1219 | |||
1220 | if (file) { | ||
1221 | mm->shared_vm += pages; | ||
1222 | if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC) | ||
1223 | mm->exec_vm += pages; | ||
1224 | } else if (flags & stack_flags) | ||
1225 | mm->stack_vm += pages; | ||
1226 | } | ||
1227 | #endif /* CONFIG_PROC_FS */ | ||
1228 | |||
1229 | /* | 1223 | /* |
1230 | * If a hint addr is less than mmap_min_addr change hint to be as | 1224 | * If a hint addr is less than mmap_min_addr change hint to be as |
1231 | * low as possible but still greater than mmap_min_addr | 1225 | * low as possible but still greater than mmap_min_addr |
@@ -1544,19 +1538,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | |||
1544 | unsigned long charged = 0; | 1538 | unsigned long charged = 0; |
1545 | 1539 | ||
1546 | /* Check against address space limit. */ | 1540 | /* Check against address space limit. */ |
1547 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { | 1541 | if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { |
1548 | unsigned long nr_pages; | 1542 | unsigned long nr_pages; |
1549 | 1543 | ||
1550 | /* | 1544 | /* |
1551 | * MAP_FIXED may remove pages of mappings that intersects with | 1545 | * MAP_FIXED may remove pages of mappings that intersects with |
1552 | * requested mapping. Account for the pages it would unmap. | 1546 | * requested mapping. Account for the pages it would unmap. |
1553 | */ | 1547 | */ |
1554 | if (!(vm_flags & MAP_FIXED)) | ||
1555 | return -ENOMEM; | ||
1556 | |||
1557 | nr_pages = count_vma_pages_range(mm, addr, addr + len); | 1548 | nr_pages = count_vma_pages_range(mm, addr, addr + len); |
1558 | 1549 | ||
1559 | if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages)) | 1550 | if (!may_expand_vm(mm, vm_flags, |
1551 | (len >> PAGE_SHIFT) - nr_pages)) | ||
1560 | return -ENOMEM; | 1552 | return -ENOMEM; |
1561 | } | 1553 | } |
1562 | 1554 | ||
@@ -1655,7 +1647,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | |||
1655 | out: | 1647 | out: |
1656 | perf_event_mmap(vma); | 1648 | perf_event_mmap(vma); |
1657 | 1649 | ||
1658 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1650 | vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); |
1659 | if (vm_flags & VM_LOCKED) { | 1651 | if (vm_flags & VM_LOCKED) { |
1660 | if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || | 1652 | if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || |
1661 | vma == get_gate_vma(current->mm))) | 1653 | vma == get_gate_vma(current->mm))) |
@@ -2102,7 +2094,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
2102 | unsigned long new_start, actual_size; | 2094 | unsigned long new_start, actual_size; |
2103 | 2095 | ||
2104 | /* address space limit tests */ | 2096 | /* address space limit tests */ |
2105 | if (!may_expand_vm(mm, grow)) | 2097 | if (!may_expand_vm(mm, vma->vm_flags, grow)) |
2106 | return -ENOMEM; | 2098 | return -ENOMEM; |
2107 | 2099 | ||
2108 | /* Stack limit test */ | 2100 | /* Stack limit test */ |
@@ -2199,8 +2191,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
2199 | spin_lock(&mm->page_table_lock); | 2191 | spin_lock(&mm->page_table_lock); |
2200 | if (vma->vm_flags & VM_LOCKED) | 2192 | if (vma->vm_flags & VM_LOCKED) |
2201 | mm->locked_vm += grow; | 2193 | mm->locked_vm += grow; |
2202 | vm_stat_account(mm, vma->vm_flags, | 2194 | vm_stat_account(mm, vma->vm_flags, grow); |
2203 | vma->vm_file, grow); | ||
2204 | anon_vma_interval_tree_pre_update_vma(vma); | 2195 | anon_vma_interval_tree_pre_update_vma(vma); |
2205 | vma->vm_end = address; | 2196 | vma->vm_end = address; |
2206 | anon_vma_interval_tree_post_update_vma(vma); | 2197 | anon_vma_interval_tree_post_update_vma(vma); |
@@ -2275,8 +2266,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
2275 | spin_lock(&mm->page_table_lock); | 2266 | spin_lock(&mm->page_table_lock); |
2276 | if (vma->vm_flags & VM_LOCKED) | 2267 | if (vma->vm_flags & VM_LOCKED) |
2277 | mm->locked_vm += grow; | 2268 | mm->locked_vm += grow; |
2278 | vm_stat_account(mm, vma->vm_flags, | 2269 | vm_stat_account(mm, vma->vm_flags, grow); |
2279 | vma->vm_file, grow); | ||
2280 | anon_vma_interval_tree_pre_update_vma(vma); | 2270 | anon_vma_interval_tree_pre_update_vma(vma); |
2281 | vma->vm_start = address; | 2271 | vma->vm_start = address; |
2282 | vma->vm_pgoff -= grow; | 2272 | vma->vm_pgoff -= grow; |
@@ -2390,7 +2380,7 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) | |||
2390 | 2380 | ||
2391 | if (vma->vm_flags & VM_ACCOUNT) | 2381 | if (vma->vm_flags & VM_ACCOUNT) |
2392 | nr_accounted += nrpages; | 2382 | nr_accounted += nrpages; |
2393 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); | 2383 | vm_stat_account(mm, vma->vm_flags, -nrpages); |
2394 | vma = remove_vma(vma); | 2384 | vma = remove_vma(vma); |
2395 | } while (vma); | 2385 | } while (vma); |
2396 | vm_unacct_memory(nr_accounted); | 2386 | vm_unacct_memory(nr_accounted); |
@@ -2760,7 +2750,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2760 | } | 2750 | } |
2761 | 2751 | ||
2762 | /* Check against address space limits *after* clearing old maps... */ | 2752 | /* Check against address space limits *after* clearing old maps... */ |
2763 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) | 2753 | if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) |
2764 | return -ENOMEM; | 2754 | return -ENOMEM; |
2765 | 2755 | ||
2766 | if (mm->map_count > sysctl_max_map_count) | 2756 | if (mm->map_count > sysctl_max_map_count) |
@@ -2795,6 +2785,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2795 | out: | 2785 | out: |
2796 | perf_event_mmap(vma); | 2786 | perf_event_mmap(vma); |
2797 | mm->total_vm += len >> PAGE_SHIFT; | 2787 | mm->total_vm += len >> PAGE_SHIFT; |
2788 | mm->data_vm += len >> PAGE_SHIFT; | ||
2798 | if (flags & VM_LOCKED) | 2789 | if (flags & VM_LOCKED) |
2799 | mm->locked_vm += (len >> PAGE_SHIFT); | 2790 | mm->locked_vm += (len >> PAGE_SHIFT); |
2800 | vma->vm_flags |= VM_SOFTDIRTY; | 2791 | vma->vm_flags |= VM_SOFTDIRTY; |
@@ -2986,16 +2977,28 @@ out: | |||
2986 | * Return true if the calling process may expand its vm space by the passed | 2977 | * Return true if the calling process may expand its vm space by the passed |
2987 | * number of pages | 2978 | * number of pages |
2988 | */ | 2979 | */ |
2989 | int may_expand_vm(struct mm_struct *mm, unsigned long npages) | 2980 | bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) |
2990 | { | 2981 | { |
2991 | unsigned long cur = mm->total_vm; /* pages */ | 2982 | if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) |
2992 | unsigned long lim; | 2983 | return false; |
2993 | 2984 | ||
2994 | lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT; | 2985 | if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS & |
2986 | (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE) | ||
2987 | return mm->data_vm + npages <= rlimit(RLIMIT_DATA); | ||
2995 | 2988 | ||
2996 | if (cur + npages > lim) | 2989 | return true; |
2997 | return 0; | 2990 | } |
2998 | return 1; | 2991 | |
2992 | void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) | ||
2993 | { | ||
2994 | mm->total_vm += npages; | ||
2995 | |||
2996 | if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC) | ||
2997 | mm->exec_vm += npages; | ||
2998 | else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN))) | ||
2999 | mm->stack_vm += npages; | ||
3000 | else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
3001 | mm->data_vm += npages; | ||
2999 | } | 3002 | } |
3000 | 3003 | ||
3001 | static int special_mapping_fault(struct vm_area_struct *vma, | 3004 | static int special_mapping_fault(struct vm_area_struct *vma, |
@@ -3077,7 +3080,7 @@ static struct vm_area_struct *__install_special_mapping( | |||
3077 | if (ret) | 3080 | if (ret) |
3078 | goto out; | 3081 | goto out; |
3079 | 3082 | ||
3080 | mm->total_vm += len >> PAGE_SHIFT; | 3083 | vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); |
3081 | 3084 | ||
3082 | perf_event_mmap(vma); | 3085 | perf_event_mmap(vma); |
3083 | 3086 | ||
diff --git a/mm/mmzone.c b/mm/mmzone.c index 7d87ebb0d632..52687fb4de6f 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c | |||
@@ -72,16 +72,16 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, | |||
72 | } | 72 | } |
73 | 73 | ||
74 | #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL | 74 | #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL |
75 | int memmap_valid_within(unsigned long pfn, | 75 | bool memmap_valid_within(unsigned long pfn, |
76 | struct page *page, struct zone *zone) | 76 | struct page *page, struct zone *zone) |
77 | { | 77 | { |
78 | if (page_to_pfn(page) != pfn) | 78 | if (page_to_pfn(page) != pfn) |
79 | return 0; | 79 | return false; |
80 | 80 | ||
81 | if (page_zone(page) != zone) | 81 | if (page_zone(page) != zone) |
82 | return 0; | 82 | return false; |
83 | 83 | ||
84 | return 1; | 84 | return true; |
85 | } | 85 | } |
86 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ | 86 | #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ |
87 | 87 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index ef5be8eaab00..c764402c464f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -278,6 +278,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
278 | * even if read-only so there is no need to account for them here | 278 | * even if read-only so there is no need to account for them here |
279 | */ | 279 | */ |
280 | if (newflags & VM_WRITE) { | 280 | if (newflags & VM_WRITE) { |
281 | /* Check space limits when area turns into data. */ | ||
282 | if (!may_expand_vm(mm, newflags, nrpages) && | ||
283 | may_expand_vm(mm, oldflags, nrpages)) | ||
284 | return -ENOMEM; | ||
281 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| | 285 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| |
282 | VM_SHARED|VM_NORESERVE))) { | 286 | VM_SHARED|VM_NORESERVE))) { |
283 | charged = nrpages; | 287 | charged = nrpages; |
@@ -334,8 +338,8 @@ success: | |||
334 | populate_vma_page_range(vma, start, end, NULL); | 338 | populate_vma_page_range(vma, start, end, NULL); |
335 | } | 339 | } |
336 | 340 | ||
337 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); | 341 | vm_stat_account(mm, oldflags, -nrpages); |
338 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); | 342 | vm_stat_account(mm, newflags, nrpages); |
339 | perf_event_mmap(vma); | 343 | perf_event_mmap(vma); |
340 | return 0; | 344 | return 0; |
341 | 345 | ||
diff --git a/mm/mremap.c b/mm/mremap.c index de824e72c3e8..e55b157865d5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -317,7 +317,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
317 | * If this were a serious issue, we'd add a flag to do_munmap(). | 317 | * If this were a serious issue, we'd add a flag to do_munmap(). |
318 | */ | 318 | */ |
319 | hiwater_vm = mm->hiwater_vm; | 319 | hiwater_vm = mm->hiwater_vm; |
320 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); | 320 | vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT); |
321 | 321 | ||
322 | /* Tell pfnmap has moved from this vma */ | 322 | /* Tell pfnmap has moved from this vma */ |
323 | if (unlikely(vma->vm_flags & VM_PFNMAP)) | 323 | if (unlikely(vma->vm_flags & VM_PFNMAP)) |
@@ -383,7 +383,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, | |||
383 | return ERR_PTR(-EAGAIN); | 383 | return ERR_PTR(-EAGAIN); |
384 | } | 384 | } |
385 | 385 | ||
386 | if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) | 386 | if (!may_expand_vm(mm, vma->vm_flags, |
387 | (new_len - old_len) >> PAGE_SHIFT)) | ||
387 | return ERR_PTR(-ENOMEM); | 388 | return ERR_PTR(-ENOMEM); |
388 | 389 | ||
389 | if (vma->vm_flags & VM_ACCOUNT) { | 390 | if (vma->vm_flags & VM_ACCOUNT) { |
@@ -545,7 +546,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | |||
545 | goto out; | 546 | goto out; |
546 | } | 547 | } |
547 | 548 | ||
548 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); | 549 | vm_stat_account(mm, vma->vm_flags, pages); |
549 | if (vma->vm_flags & VM_LOCKED) { | 550 | if (vma->vm_flags & VM_LOCKED) { |
550 | mm->locked_vm += pages; | 551 | mm->locked_vm += pages; |
551 | locked = true; | 552 | locked = true; |
diff --git a/mm/nommu.c b/mm/nommu.c index 92be862c859b..fbf6f0f1d6c9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -560,7 +560,7 @@ void __init mmap_init(void) | |||
560 | 560 | ||
561 | ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); | 561 | ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); |
562 | VM_BUG_ON(ret); | 562 | VM_BUG_ON(ret); |
563 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 563 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); |
564 | } | 564 | } |
565 | 565 | ||
566 | /* | 566 | /* |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c12680993ff3..dc490c06941b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -585,10 +585,11 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, | |||
585 | */ | 585 | */ |
586 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); | 586 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); |
587 | mark_oom_victim(victim); | 587 | mark_oom_victim(victim); |
588 | pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", | 588 | pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", |
589 | task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), | 589 | task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), |
590 | K(get_mm_counter(victim->mm, MM_ANONPAGES)), | 590 | K(get_mm_counter(victim->mm, MM_ANONPAGES)), |
591 | K(get_mm_counter(victim->mm, MM_FILEPAGES))); | 591 | K(get_mm_counter(victim->mm, MM_FILEPAGES)), |
592 | K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); | ||
592 | task_unlock(victim); | 593 | task_unlock(victim); |
593 | 594 | ||
594 | /* | 595 | /* |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d15d88c8efa1..6fe7d15bd1f7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -278,7 +278,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone) | |||
278 | unsigned long nr_pages; | 278 | unsigned long nr_pages; |
279 | 279 | ||
280 | nr_pages = zone_page_state(zone, NR_FREE_PAGES); | 280 | nr_pages = zone_page_state(zone, NR_FREE_PAGES); |
281 | nr_pages -= min(nr_pages, zone->dirty_balance_reserve); | 281 | /* |
282 | * Pages reserved for the kernel should not be considered | ||
283 | * dirtyable, to prevent a situation where reclaim has to | ||
284 | * clean pages in order to balance the zones. | ||
285 | */ | ||
286 | nr_pages -= min(nr_pages, zone->totalreserve_pages); | ||
282 | 287 | ||
283 | nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); | 288 | nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); |
284 | nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); | 289 | nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); |
@@ -332,7 +337,12 @@ static unsigned long global_dirtyable_memory(void) | |||
332 | unsigned long x; | 337 | unsigned long x; |
333 | 338 | ||
334 | x = global_page_state(NR_FREE_PAGES); | 339 | x = global_page_state(NR_FREE_PAGES); |
335 | x -= min(x, dirty_balance_reserve); | 340 | /* |
341 | * Pages reserved for the kernel should not be considered | ||
342 | * dirtyable, to prevent a situation where reclaim has to | ||
343 | * clean pages in order to balance the zones. | ||
344 | */ | ||
345 | x -= min(x, totalreserve_pages); | ||
336 | 346 | ||
337 | x += global_page_state(NR_INACTIVE_FILE); | 347 | x += global_page_state(NR_INACTIVE_FILE); |
338 | x += global_page_state(NR_ACTIVE_FILE); | 348 | x += global_page_state(NR_ACTIVE_FILE); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9d666df5ef95..ce63d603820f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock); | |||
114 | unsigned long totalram_pages __read_mostly; | 114 | unsigned long totalram_pages __read_mostly; |
115 | unsigned long totalreserve_pages __read_mostly; | 115 | unsigned long totalreserve_pages __read_mostly; |
116 | unsigned long totalcma_pages __read_mostly; | 116 | unsigned long totalcma_pages __read_mostly; |
117 | /* | ||
118 | * When calculating the number of globally allowed dirty pages, there | ||
119 | * is a certain number of per-zone reserves that should not be | ||
120 | * considered dirtyable memory. This is the sum of those reserves | ||
121 | * over all existing zones that contribute dirtyable memory. | ||
122 | */ | ||
123 | unsigned long dirty_balance_reserve __read_mostly; | ||
124 | 117 | ||
125 | int percpu_pagelist_fraction; | 118 | int percpu_pagelist_fraction; |
126 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | 119 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; |
@@ -812,7 +805,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
812 | do { | 805 | do { |
813 | int mt; /* migratetype of the to-be-freed page */ | 806 | int mt; /* migratetype of the to-be-freed page */ |
814 | 807 | ||
815 | page = list_entry(list->prev, struct page, lru); | 808 | page = list_last_entry(list, struct page, lru); |
816 | /* must delete as __free_one_page list manipulates */ | 809 | /* must delete as __free_one_page list manipulates */ |
817 | list_del(&page->lru); | 810 | list_del(&page->lru); |
818 | 811 | ||
@@ -1417,11 +1410,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
1417 | /* Find a page of the appropriate size in the preferred list */ | 1410 | /* Find a page of the appropriate size in the preferred list */ |
1418 | for (current_order = order; current_order < MAX_ORDER; ++current_order) { | 1411 | for (current_order = order; current_order < MAX_ORDER; ++current_order) { |
1419 | area = &(zone->free_area[current_order]); | 1412 | area = &(zone->free_area[current_order]); |
1420 | if (list_empty(&area->free_list[migratetype])) | 1413 | page = list_first_entry_or_null(&area->free_list[migratetype], |
1421 | continue; | ||
1422 | |||
1423 | page = list_entry(area->free_list[migratetype].next, | ||
1424 | struct page, lru); | 1414 | struct page, lru); |
1415 | if (!page) | ||
1416 | continue; | ||
1425 | list_del(&page->lru); | 1417 | list_del(&page->lru); |
1426 | rmv_page_order(page); | 1418 | rmv_page_order(page); |
1427 | area->nr_free--; | 1419 | area->nr_free--; |
@@ -1700,12 +1692,12 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) | |||
1700 | for (order = 0; order < MAX_ORDER; order++) { | 1692 | for (order = 0; order < MAX_ORDER; order++) { |
1701 | struct free_area *area = &(zone->free_area[order]); | 1693 | struct free_area *area = &(zone->free_area[order]); |
1702 | 1694 | ||
1703 | if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) | 1695 | page = list_first_entry_or_null( |
1696 | &area->free_list[MIGRATE_HIGHATOMIC], | ||
1697 | struct page, lru); | ||
1698 | if (!page) | ||
1704 | continue; | 1699 | continue; |
1705 | 1700 | ||
1706 | page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next, | ||
1707 | struct page, lru); | ||
1708 | |||
1709 | /* | 1701 | /* |
1710 | * It should never happen but changes to locking could | 1702 | * It should never happen but changes to locking could |
1711 | * inadvertently allow a per-cpu drain to add pages | 1703 | * inadvertently allow a per-cpu drain to add pages |
@@ -1753,7 +1745,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
1753 | if (fallback_mt == -1) | 1745 | if (fallback_mt == -1) |
1754 | continue; | 1746 | continue; |
1755 | 1747 | ||
1756 | page = list_entry(area->free_list[fallback_mt].next, | 1748 | page = list_first_entry(&area->free_list[fallback_mt], |
1757 | struct page, lru); | 1749 | struct page, lru); |
1758 | if (can_steal) | 1750 | if (can_steal) |
1759 | steal_suitable_fallback(zone, page, start_migratetype); | 1751 | steal_suitable_fallback(zone, page, start_migratetype); |
@@ -1788,7 +1780,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
1788 | * Call me with the zone->lock already held. | 1780 | * Call me with the zone->lock already held. |
1789 | */ | 1781 | */ |
1790 | static struct page *__rmqueue(struct zone *zone, unsigned int order, | 1782 | static struct page *__rmqueue(struct zone *zone, unsigned int order, |
1791 | int migratetype, gfp_t gfp_flags) | 1783 | int migratetype) |
1792 | { | 1784 | { |
1793 | struct page *page; | 1785 | struct page *page; |
1794 | 1786 | ||
@@ -1818,7 +1810,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
1818 | 1810 | ||
1819 | spin_lock(&zone->lock); | 1811 | spin_lock(&zone->lock); |
1820 | for (i = 0; i < count; ++i) { | 1812 | for (i = 0; i < count; ++i) { |
1821 | struct page *page = __rmqueue(zone, order, migratetype, 0); | 1813 | struct page *page = __rmqueue(zone, order, migratetype); |
1822 | if (unlikely(page == NULL)) | 1814 | if (unlikely(page == NULL)) |
1823 | break; | 1815 | break; |
1824 | 1816 | ||
@@ -1988,7 +1980,7 @@ void mark_free_pages(struct zone *zone) | |||
1988 | unsigned long pfn, max_zone_pfn; | 1980 | unsigned long pfn, max_zone_pfn; |
1989 | unsigned long flags; | 1981 | unsigned long flags; |
1990 | unsigned int order, t; | 1982 | unsigned int order, t; |
1991 | struct list_head *curr; | 1983 | struct page *page; |
1992 | 1984 | ||
1993 | if (zone_is_empty(zone)) | 1985 | if (zone_is_empty(zone)) |
1994 | return; | 1986 | return; |
@@ -1998,17 +1990,17 @@ void mark_free_pages(struct zone *zone) | |||
1998 | max_zone_pfn = zone_end_pfn(zone); | 1990 | max_zone_pfn = zone_end_pfn(zone); |
1999 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1991 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
2000 | if (pfn_valid(pfn)) { | 1992 | if (pfn_valid(pfn)) { |
2001 | struct page *page = pfn_to_page(pfn); | 1993 | page = pfn_to_page(pfn); |
2002 | |||
2003 | if (!swsusp_page_is_forbidden(page)) | 1994 | if (!swsusp_page_is_forbidden(page)) |
2004 | swsusp_unset_page_free(page); | 1995 | swsusp_unset_page_free(page); |
2005 | } | 1996 | } |
2006 | 1997 | ||
2007 | for_each_migratetype_order(order, t) { | 1998 | for_each_migratetype_order(order, t) { |
2008 | list_for_each(curr, &zone->free_area[order].free_list[t]) { | 1999 | list_for_each_entry(page, |
2000 | &zone->free_area[order].free_list[t], lru) { | ||
2009 | unsigned long i; | 2001 | unsigned long i; |
2010 | 2002 | ||
2011 | pfn = page_to_pfn(list_entry(curr, struct page, lru)); | 2003 | pfn = page_to_pfn(page); |
2012 | for (i = 0; i < (1UL << order); i++) | 2004 | for (i = 0; i < (1UL << order); i++) |
2013 | swsusp_set_page_free(pfn_to_page(pfn + i)); | 2005 | swsusp_set_page_free(pfn_to_page(pfn + i)); |
2014 | } | 2006 | } |
@@ -2212,9 +2204,9 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |||
2212 | } | 2204 | } |
2213 | 2205 | ||
2214 | if (cold) | 2206 | if (cold) |
2215 | page = list_entry(list->prev, struct page, lru); | 2207 | page = list_last_entry(list, struct page, lru); |
2216 | else | 2208 | else |
2217 | page = list_entry(list->next, struct page, lru); | 2209 | page = list_first_entry(list, struct page, lru); |
2218 | 2210 | ||
2219 | list_del(&page->lru); | 2211 | list_del(&page->lru); |
2220 | pcp->count--; | 2212 | pcp->count--; |
@@ -2241,7 +2233,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |||
2241 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | 2233 | trace_mm_page_alloc_zone_locked(page, order, migratetype); |
2242 | } | 2234 | } |
2243 | if (!page) | 2235 | if (!page) |
2244 | page = __rmqueue(zone, order, migratetype, gfp_flags); | 2236 | page = __rmqueue(zone, order, migratetype); |
2245 | spin_unlock(&zone->lock); | 2237 | spin_unlock(&zone->lock); |
2246 | if (!page) | 2238 | if (!page) |
2247 | goto failed; | 2239 | goto failed; |
@@ -2740,8 +2732,21 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2740 | goto out; | 2732 | goto out; |
2741 | } | 2733 | } |
2742 | /* Exhausted what can be done so it's blamo time */ | 2734 | /* Exhausted what can be done so it's blamo time */ |
2743 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) | 2735 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { |
2744 | *did_some_progress = 1; | 2736 | *did_some_progress = 1; |
2737 | |||
2738 | if (gfp_mask & __GFP_NOFAIL) { | ||
2739 | page = get_page_from_freelist(gfp_mask, order, | ||
2740 | ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac); | ||
2741 | /* | ||
2742 | * fallback to ignore cpuset restriction if our nodes | ||
2743 | * are depleted | ||
2744 | */ | ||
2745 | if (!page) | ||
2746 | page = get_page_from_freelist(gfp_mask, order, | ||
2747 | ALLOC_NO_WATERMARKS, ac); | ||
2748 | } | ||
2749 | } | ||
2745 | out: | 2750 | out: |
2746 | mutex_unlock(&oom_lock); | 2751 | mutex_unlock(&oom_lock); |
2747 | return page; | 2752 | return page; |
@@ -2876,28 +2881,6 @@ retry: | |||
2876 | return page; | 2881 | return page; |
2877 | } | 2882 | } |
2878 | 2883 | ||
2879 | /* | ||
2880 | * This is called in the allocator slow-path if the allocation request is of | ||
2881 | * sufficient urgency to ignore watermarks and take other desperate measures | ||
2882 | */ | ||
2883 | static inline struct page * | ||
2884 | __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | ||
2885 | const struct alloc_context *ac) | ||
2886 | { | ||
2887 | struct page *page; | ||
2888 | |||
2889 | do { | ||
2890 | page = get_page_from_freelist(gfp_mask, order, | ||
2891 | ALLOC_NO_WATERMARKS, ac); | ||
2892 | |||
2893 | if (!page && gfp_mask & __GFP_NOFAIL) | ||
2894 | wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, | ||
2895 | HZ/50); | ||
2896 | } while (!page && (gfp_mask & __GFP_NOFAIL)); | ||
2897 | |||
2898 | return page; | ||
2899 | } | ||
2900 | |||
2901 | static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) | 2884 | static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) |
2902 | { | 2885 | { |
2903 | struct zoneref *z; | 2886 | struct zoneref *z; |
@@ -3042,28 +3025,36 @@ retry: | |||
3042 | * allocations are system rather than user orientated | 3025 | * allocations are system rather than user orientated |
3043 | */ | 3026 | */ |
3044 | ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); | 3027 | ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); |
3045 | 3028 | page = get_page_from_freelist(gfp_mask, order, | |
3046 | page = __alloc_pages_high_priority(gfp_mask, order, ac); | 3029 | ALLOC_NO_WATERMARKS, ac); |
3047 | 3030 | if (page) | |
3048 | if (page) { | ||
3049 | goto got_pg; | 3031 | goto got_pg; |
3050 | } | ||
3051 | } | 3032 | } |
3052 | 3033 | ||
3053 | /* Caller is not willing to reclaim, we can't balance anything */ | 3034 | /* Caller is not willing to reclaim, we can't balance anything */ |
3054 | if (!can_direct_reclaim) { | 3035 | if (!can_direct_reclaim) { |
3055 | /* | 3036 | /* |
3056 | * All existing users of the deprecated __GFP_NOFAIL are | 3037 | * All existing users of the __GFP_NOFAIL are blockable, so warn |
3057 | * blockable, so warn of any new users that actually allow this | 3038 | * of any new users that actually allow this type of allocation |
3058 | * type of allocation to fail. | 3039 | * to fail. |
3059 | */ | 3040 | */ |
3060 | WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); | 3041 | WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); |
3061 | goto nopage; | 3042 | goto nopage; |
3062 | } | 3043 | } |
3063 | 3044 | ||
3064 | /* Avoid recursion of direct reclaim */ | 3045 | /* Avoid recursion of direct reclaim */ |
3065 | if (current->flags & PF_MEMALLOC) | 3046 | if (current->flags & PF_MEMALLOC) { |
3047 | /* | ||
3048 | * __GFP_NOFAIL request from this context is rather bizarre | ||
3049 | * because we cannot reclaim anything and only can loop waiting | ||
3050 | * for somebody to do a work for us. | ||
3051 | */ | ||
3052 | if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { | ||
3053 | cond_resched(); | ||
3054 | goto retry; | ||
3055 | } | ||
3066 | goto nopage; | 3056 | goto nopage; |
3057 | } | ||
3067 | 3058 | ||
3068 | /* Avoid allocations with no watermarks from looping endlessly */ | 3059 | /* Avoid allocations with no watermarks from looping endlessly */ |
3069 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 3060 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) |
@@ -3402,7 +3393,8 @@ EXPORT_SYMBOL(__free_page_frag); | |||
3402 | 3393 | ||
3403 | /* | 3394 | /* |
3404 | * alloc_kmem_pages charges newly allocated pages to the kmem resource counter | 3395 | * alloc_kmem_pages charges newly allocated pages to the kmem resource counter |
3405 | * of the current memory cgroup. | 3396 | * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is |
3397 | * equivalent to alloc_pages. | ||
3406 | * | 3398 | * |
3407 | * It should be used when the caller would like to use kmalloc, but since the | 3399 | * It should be used when the caller would like to use kmalloc, but since the |
3408 | * allocation is large, it has to fall back to the page allocator. | 3400 | * allocation is large, it has to fall back to the page allocator. |
@@ -4147,8 +4139,7 @@ static void set_zonelist_order(void) | |||
4147 | 4139 | ||
4148 | static void build_zonelists(pg_data_t *pgdat) | 4140 | static void build_zonelists(pg_data_t *pgdat) |
4149 | { | 4141 | { |
4150 | int j, node, load; | 4142 | int i, node, load; |
4151 | enum zone_type i; | ||
4152 | nodemask_t used_mask; | 4143 | nodemask_t used_mask; |
4153 | int local_node, prev_node; | 4144 | int local_node, prev_node; |
4154 | struct zonelist *zonelist; | 4145 | struct zonelist *zonelist; |
@@ -4168,7 +4159,7 @@ static void build_zonelists(pg_data_t *pgdat) | |||
4168 | nodes_clear(used_mask); | 4159 | nodes_clear(used_mask); |
4169 | 4160 | ||
4170 | memset(node_order, 0, sizeof(node_order)); | 4161 | memset(node_order, 0, sizeof(node_order)); |
4171 | j = 0; | 4162 | i = 0; |
4172 | 4163 | ||
4173 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { | 4164 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { |
4174 | /* | 4165 | /* |
@@ -4185,12 +4176,12 @@ static void build_zonelists(pg_data_t *pgdat) | |||
4185 | if (order == ZONELIST_ORDER_NODE) | 4176 | if (order == ZONELIST_ORDER_NODE) |
4186 | build_zonelists_in_node_order(pgdat, node); | 4177 | build_zonelists_in_node_order(pgdat, node); |
4187 | else | 4178 | else |
4188 | node_order[j++] = node; /* remember order */ | 4179 | node_order[i++] = node; /* remember order */ |
4189 | } | 4180 | } |
4190 | 4181 | ||
4191 | if (order == ZONELIST_ORDER_ZONE) { | 4182 | if (order == ZONELIST_ORDER_ZONE) { |
4192 | /* calculate node order -- i.e., DMA last! */ | 4183 | /* calculate node order -- i.e., DMA last! */ |
4193 | build_zonelists_in_zone_order(pgdat, j); | 4184 | build_zonelists_in_zone_order(pgdat, i); |
4194 | } | 4185 | } |
4195 | 4186 | ||
4196 | build_thisnode_zonelists(pgdat); | 4187 | build_thisnode_zonelists(pgdat); |
@@ -5956,20 +5947,12 @@ static void calculate_totalreserve_pages(void) | |||
5956 | 5947 | ||
5957 | if (max > zone->managed_pages) | 5948 | if (max > zone->managed_pages) |
5958 | max = zone->managed_pages; | 5949 | max = zone->managed_pages; |
5950 | |||
5951 | zone->totalreserve_pages = max; | ||
5952 | |||
5959 | reserve_pages += max; | 5953 | reserve_pages += max; |
5960 | /* | ||
5961 | * Lowmem reserves are not available to | ||
5962 | * GFP_HIGHUSER page cache allocations and | ||
5963 | * kswapd tries to balance zones to their high | ||
5964 | * watermark. As a result, neither should be | ||
5965 | * regarded as dirtyable memory, to prevent a | ||
5966 | * situation where reclaim has to clean pages | ||
5967 | * in order to balance the zones. | ||
5968 | */ | ||
5969 | zone->dirty_balance_reserve = max; | ||
5970 | } | 5954 | } |
5971 | } | 5955 | } |
5972 | dirty_balance_reserve = reserve_pages; | ||
5973 | totalreserve_pages = reserve_pages; | 5956 | totalreserve_pages = reserve_pages; |
5974 | } | 5957 | } |
5975 | 5958 | ||
@@ -6724,8 +6707,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6724 | if (ret) | 6707 | if (ret) |
6725 | return ret; | 6708 | return ret; |
6726 | 6709 | ||
6710 | /* | ||
6711 | * In case of -EBUSY, we'd like to know which page causes problem. | ||
6712 | * So, just fall through. We will check it in test_pages_isolated(). | ||
6713 | */ | ||
6727 | ret = __alloc_contig_migrate_range(&cc, start, end); | 6714 | ret = __alloc_contig_migrate_range(&cc, start, end); |
6728 | if (ret) | 6715 | if (ret && ret != -EBUSY) |
6729 | goto done; | 6716 | goto done; |
6730 | 6717 | ||
6731 | /* | 6718 | /* |
@@ -6752,12 +6739,25 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6752 | outer_start = start; | 6739 | outer_start = start; |
6753 | while (!PageBuddy(pfn_to_page(outer_start))) { | 6740 | while (!PageBuddy(pfn_to_page(outer_start))) { |
6754 | if (++order >= MAX_ORDER) { | 6741 | if (++order >= MAX_ORDER) { |
6755 | ret = -EBUSY; | 6742 | outer_start = start; |
6756 | goto done; | 6743 | break; |
6757 | } | 6744 | } |
6758 | outer_start &= ~0UL << order; | 6745 | outer_start &= ~0UL << order; |
6759 | } | 6746 | } |
6760 | 6747 | ||
6748 | if (outer_start != start) { | ||
6749 | order = page_order(pfn_to_page(outer_start)); | ||
6750 | |||
6751 | /* | ||
6752 | * outer_start page could be small order buddy page and | ||
6753 | * it doesn't include start page. Adjust outer_start | ||
6754 | * in this case to report failed page properly | ||
6755 | * on tracepoint in test_pages_isolated() | ||
6756 | */ | ||
6757 | if (outer_start + (1UL << order) <= start) | ||
6758 | outer_start = start; | ||
6759 | } | ||
6760 | |||
6761 | /* Make sure the range is really isolated. */ | 6761 | /* Make sure the range is really isolated. */ |
6762 | if (test_pages_isolated(outer_start, end, false)) { | 6762 | if (test_pages_isolated(outer_start, end, false)) { |
6763 | pr_info("%s: [%lx, %lx) PFNs busy\n", | 6763 | pr_info("%s: [%lx, %lx) PFNs busy\n", |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4568fd58f70a..5e139fec6c6c 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -9,6 +9,9 @@ | |||
9 | #include <linux/hugetlb.h> | 9 | #include <linux/hugetlb.h> |
10 | #include "internal.h" | 10 | #include "internal.h" |
11 | 11 | ||
12 | #define CREATE_TRACE_POINTS | ||
13 | #include <trace/events/page_isolation.h> | ||
14 | |||
12 | static int set_migratetype_isolate(struct page *page, | 15 | static int set_migratetype_isolate(struct page *page, |
13 | bool skip_hwpoisoned_pages) | 16 | bool skip_hwpoisoned_pages) |
14 | { | 17 | { |
@@ -162,8 +165,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, | |||
162 | unsigned long undo_pfn; | 165 | unsigned long undo_pfn; |
163 | struct page *page; | 166 | struct page *page; |
164 | 167 | ||
165 | BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); | 168 | BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); |
166 | BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); | 169 | BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); |
167 | 170 | ||
168 | for (pfn = start_pfn; | 171 | for (pfn = start_pfn; |
169 | pfn < end_pfn; | 172 | pfn < end_pfn; |
@@ -212,7 +215,7 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, | |||
212 | * | 215 | * |
213 | * Returns 1 if all pages in the range are isolated. | 216 | * Returns 1 if all pages in the range are isolated. |
214 | */ | 217 | */ |
215 | static int | 218 | static unsigned long |
216 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, | 219 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, |
217 | bool skip_hwpoisoned_pages) | 220 | bool skip_hwpoisoned_pages) |
218 | { | 221 | { |
@@ -237,9 +240,8 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, | |||
237 | else | 240 | else |
238 | break; | 241 | break; |
239 | } | 242 | } |
240 | if (pfn < end_pfn) | 243 | |
241 | return 0; | 244 | return pfn; |
242 | return 1; | ||
243 | } | 245 | } |
244 | 246 | ||
245 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, | 247 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, |
@@ -248,7 +250,6 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, | |||
248 | unsigned long pfn, flags; | 250 | unsigned long pfn, flags; |
249 | struct page *page; | 251 | struct page *page; |
250 | struct zone *zone; | 252 | struct zone *zone; |
251 | int ret; | ||
252 | 253 | ||
253 | /* | 254 | /* |
254 | * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages | 255 | * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages |
@@ -266,10 +267,13 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, | |||
266 | /* Check all pages are free or marked as ISOLATED */ | 267 | /* Check all pages are free or marked as ISOLATED */ |
267 | zone = page_zone(page); | 268 | zone = page_zone(page); |
268 | spin_lock_irqsave(&zone->lock, flags); | 269 | spin_lock_irqsave(&zone->lock, flags); |
269 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn, | 270 | pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, |
270 | skip_hwpoisoned_pages); | 271 | skip_hwpoisoned_pages); |
271 | spin_unlock_irqrestore(&zone->lock, flags); | 272 | spin_unlock_irqrestore(&zone->lock, flags); |
272 | return ret ? 0 : -EBUSY; | 273 | |
274 | trace_test_pages_isolated(start_pfn, end_pfn, pfn); | ||
275 | |||
276 | return pfn < end_pfn ? -EBUSY : 0; | ||
273 | } | 277 | } |
274 | 278 | ||
275 | struct page *alloc_migrate_target(struct page *page, unsigned long private, | 279 | struct page *alloc_migrate_target(struct page *page, unsigned long private, |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 7d3db0247983..4c681baff363 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -176,13 +176,10 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | |||
176 | 176 | ||
177 | /* FIFO */ | 177 | /* FIFO */ |
178 | pgtable = pmd_huge_pte(mm, pmdp); | 178 | pgtable = pmd_huge_pte(mm, pmdp); |
179 | if (list_empty(&pgtable->lru)) | 179 | pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru, |
180 | pmd_huge_pte(mm, pmdp) = NULL; | 180 | struct page, lru); |
181 | else { | 181 | if (pmd_huge_pte(mm, pmdp)) |
182 | pmd_huge_pte(mm, pmdp) = list_entry(pgtable->lru.next, | ||
183 | struct page, lru); | ||
184 | list_del(&pgtable->lru); | 182 | list_del(&pgtable->lru); |
185 | } | ||
186 | return pgtable; | 183 | return pgtable; |
187 | } | 184 | } |
188 | #endif | 185 | #endif |
diff --git a/mm/readahead.c b/mm/readahead.c index ba22d7fe0afb..20e58e820e44 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
18 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
19 | #include <linux/file.h> | 19 | #include <linux/file.h> |
20 | #include <linux/mm_inline.h> | ||
20 | 21 | ||
21 | #include "internal.h" | 22 | #include "internal.h" |
22 | 23 | ||
@@ -32,8 +33,6 @@ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) | |||
32 | } | 33 | } |
33 | EXPORT_SYMBOL_GPL(file_ra_state_init); | 34 | EXPORT_SYMBOL_GPL(file_ra_state_init); |
34 | 35 | ||
35 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
36 | |||
37 | /* | 36 | /* |
38 | * see if a page needs releasing upon read_cache_pages() failure | 37 | * see if a page needs releasing upon read_cache_pages() failure |
39 | * - the caller of read_cache_pages() may have set PG_private or PG_fscache | 38 | * - the caller of read_cache_pages() may have set PG_private or PG_fscache |
@@ -64,7 +63,7 @@ static void read_cache_pages_invalidate_pages(struct address_space *mapping, | |||
64 | struct page *victim; | 63 | struct page *victim; |
65 | 64 | ||
66 | while (!list_empty(pages)) { | 65 | while (!list_empty(pages)) { |
67 | victim = list_to_page(pages); | 66 | victim = lru_to_page(pages); |
68 | list_del(&victim->lru); | 67 | list_del(&victim->lru); |
69 | read_cache_pages_invalidate_page(mapping, victim); | 68 | read_cache_pages_invalidate_page(mapping, victim); |
70 | } | 69 | } |
@@ -87,7 +86,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, | |||
87 | int ret = 0; | 86 | int ret = 0; |
88 | 87 | ||
89 | while (!list_empty(pages)) { | 88 | while (!list_empty(pages)) { |
90 | page = list_to_page(pages); | 89 | page = lru_to_page(pages); |
91 | list_del(&page->lru); | 90 | list_del(&page->lru); |
92 | if (add_to_page_cache_lru(page, mapping, page->index, | 91 | if (add_to_page_cache_lru(page, mapping, page->index, |
93 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { | 92 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { |
@@ -125,7 +124,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
125 | } | 124 | } |
126 | 125 | ||
127 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 126 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
128 | struct page *page = list_to_page(pages); | 127 | struct page *page = lru_to_page(pages); |
129 | list_del(&page->lru); | 128 | list_del(&page->lru); |
130 | if (!add_to_page_cache_lru(page, mapping, page->index, | 129 | if (!add_to_page_cache_lru(page, mapping, page->index, |
131 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { | 130 | mapping_gfp_constraint(mapping, GFP_KERNEL))) { |
@@ -428,8 +428,10 @@ static void anon_vma_ctor(void *data) | |||
428 | void __init anon_vma_init(void) | 428 | void __init anon_vma_init(void) |
429 | { | 429 | { |
430 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), | 430 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), |
431 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); | 431 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, |
432 | anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); | 432 | anon_vma_ctor); |
433 | anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, | ||
434 | SLAB_PANIC|SLAB_ACCOUNT); | ||
433 | } | 435 | } |
434 | 436 | ||
435 | /* | 437 | /* |
@@ -1362,10 +1364,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1362 | if (PageHuge(page)) { | 1364 | if (PageHuge(page)) { |
1363 | hugetlb_count_sub(1 << compound_order(page), mm); | 1365 | hugetlb_count_sub(1 << compound_order(page), mm); |
1364 | } else { | 1366 | } else { |
1365 | if (PageAnon(page)) | 1367 | dec_mm_counter(mm, mm_counter(page)); |
1366 | dec_mm_counter(mm, MM_ANONPAGES); | ||
1367 | else | ||
1368 | dec_mm_counter(mm, MM_FILEPAGES); | ||
1369 | } | 1368 | } |
1370 | set_pte_at(mm, address, pte, | 1369 | set_pte_at(mm, address, pte, |
1371 | swp_entry_to_pte(make_hwpoison_entry(page))); | 1370 | swp_entry_to_pte(make_hwpoison_entry(page))); |
@@ -1375,10 +1374,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1375 | * interest anymore. Simply discard the pte, vmscan | 1374 | * interest anymore. Simply discard the pte, vmscan |
1376 | * will take care of the rest. | 1375 | * will take care of the rest. |
1377 | */ | 1376 | */ |
1378 | if (PageAnon(page)) | 1377 | dec_mm_counter(mm, mm_counter(page)); |
1379 | dec_mm_counter(mm, MM_ANONPAGES); | ||
1380 | else | ||
1381 | dec_mm_counter(mm, MM_FILEPAGES); | ||
1382 | } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { | 1378 | } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) { |
1383 | swp_entry_t entry; | 1379 | swp_entry_t entry; |
1384 | pte_t swp_pte; | 1380 | pte_t swp_pte; |
@@ -1418,7 +1414,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1418 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1414 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1419 | set_pte_at(mm, address, pte, swp_pte); | 1415 | set_pte_at(mm, address, pte, swp_pte); |
1420 | } else | 1416 | } else |
1421 | dec_mm_counter(mm, MM_FILEPAGES); | 1417 | dec_mm_counter(mm, mm_counter_file(page)); |
1422 | 1418 | ||
1423 | page_remove_rmap(page); | 1419 | page_remove_rmap(page); |
1424 | page_cache_release(page); | 1420 | page_cache_release(page); |
diff --git a/mm/shmem.c b/mm/shmem.c index 642471b0ddea..970ff5b80853 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -360,6 +360,87 @@ static int shmem_free_swap(struct address_space *mapping, | |||
360 | } | 360 | } |
361 | 361 | ||
362 | /* | 362 | /* |
363 | * Determine (in bytes) how many of the shmem object's pages mapped by the | ||
364 | * given offsets are swapped out. | ||
365 | * | ||
366 | * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU, | ||
367 | * as long as the inode doesn't go away and racy results are not a problem. | ||
368 | */ | ||
369 | unsigned long shmem_partial_swap_usage(struct address_space *mapping, | ||
370 | pgoff_t start, pgoff_t end) | ||
371 | { | ||
372 | struct radix_tree_iter iter; | ||
373 | void **slot; | ||
374 | struct page *page; | ||
375 | unsigned long swapped = 0; | ||
376 | |||
377 | rcu_read_lock(); | ||
378 | |||
379 | restart: | ||
380 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | ||
381 | if (iter.index >= end) | ||
382 | break; | ||
383 | |||
384 | page = radix_tree_deref_slot(slot); | ||
385 | |||
386 | /* | ||
387 | * This should only be possible to happen at index 0, so we | ||
388 | * don't need to reset the counter, nor do we risk infinite | ||
389 | * restarts. | ||
390 | */ | ||
391 | if (radix_tree_deref_retry(page)) | ||
392 | goto restart; | ||
393 | |||
394 | if (radix_tree_exceptional_entry(page)) | ||
395 | swapped++; | ||
396 | |||
397 | if (need_resched()) { | ||
398 | cond_resched_rcu(); | ||
399 | start = iter.index + 1; | ||
400 | goto restart; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | rcu_read_unlock(); | ||
405 | |||
406 | return swapped << PAGE_SHIFT; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Determine (in bytes) how many of the shmem object's pages mapped by the | ||
411 | * given vma is swapped out. | ||
412 | * | ||
413 | * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU, | ||
414 | * as long as the inode doesn't go away and racy results are not a problem. | ||
415 | */ | ||
416 | unsigned long shmem_swap_usage(struct vm_area_struct *vma) | ||
417 | { | ||
418 | struct inode *inode = file_inode(vma->vm_file); | ||
419 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
420 | struct address_space *mapping = inode->i_mapping; | ||
421 | unsigned long swapped; | ||
422 | |||
423 | /* Be careful as we don't hold info->lock */ | ||
424 | swapped = READ_ONCE(info->swapped); | ||
425 | |||
426 | /* | ||
427 | * The easier cases are when the shmem object has nothing in swap, or | ||
428 | * the vma maps it whole. Then we can simply use the stats that we | ||
429 | * already track. | ||
430 | */ | ||
431 | if (!swapped) | ||
432 | return 0; | ||
433 | |||
434 | if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) | ||
435 | return swapped << PAGE_SHIFT; | ||
436 | |||
437 | /* Here comes the more involved part */ | ||
438 | return shmem_partial_swap_usage(mapping, | ||
439 | linear_page_index(vma, vma->vm_start), | ||
440 | linear_page_index(vma, vma->vm_end)); | ||
441 | } | ||
442 | |||
443 | /* | ||
363 | * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. | 444 | * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. |
364 | */ | 445 | */ |
365 | void shmem_unlock_mapping(struct address_space *mapping) | 446 | void shmem_unlock_mapping(struct address_space *mapping) |
@@ -3064,7 +3145,7 @@ static int shmem_init_inodecache(void) | |||
3064 | { | 3145 | { |
3065 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", | 3146 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", |
3066 | sizeof(struct shmem_inode_info), | 3147 | sizeof(struct shmem_inode_info), |
3067 | 0, SLAB_PANIC, shmem_init_inode); | 3148 | 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); |
3068 | return 0; | 3149 | return 0; |
3069 | } | 3150 | } |
3070 | 3151 | ||
@@ -2756,6 +2756,21 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2756 | #define cache_free_debugcheck(x,objp,z) (objp) | 2756 | #define cache_free_debugcheck(x,objp,z) (objp) |
2757 | #endif | 2757 | #endif |
2758 | 2758 | ||
2759 | static struct page *get_first_slab(struct kmem_cache_node *n) | ||
2760 | { | ||
2761 | struct page *page; | ||
2762 | |||
2763 | page = list_first_entry_or_null(&n->slabs_partial, | ||
2764 | struct page, lru); | ||
2765 | if (!page) { | ||
2766 | n->free_touched = 1; | ||
2767 | page = list_first_entry_or_null(&n->slabs_free, | ||
2768 | struct page, lru); | ||
2769 | } | ||
2770 | |||
2771 | return page; | ||
2772 | } | ||
2773 | |||
2759 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, | 2774 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, |
2760 | bool force_refill) | 2775 | bool force_refill) |
2761 | { | 2776 | { |
@@ -2791,18 +2806,12 @@ retry: | |||
2791 | } | 2806 | } |
2792 | 2807 | ||
2793 | while (batchcount > 0) { | 2808 | while (batchcount > 0) { |
2794 | struct list_head *entry; | ||
2795 | struct page *page; | 2809 | struct page *page; |
2796 | /* Get slab alloc is to come from. */ | 2810 | /* Get slab alloc is to come from. */ |
2797 | entry = n->slabs_partial.next; | 2811 | page = get_first_slab(n); |
2798 | if (entry == &n->slabs_partial) { | 2812 | if (!page) |
2799 | n->free_touched = 1; | 2813 | goto must_grow; |
2800 | entry = n->slabs_free.next; | ||
2801 | if (entry == &n->slabs_free) | ||
2802 | goto must_grow; | ||
2803 | } | ||
2804 | 2814 | ||
2805 | page = list_entry(entry, struct page, lru); | ||
2806 | check_spinlock_acquired(cachep); | 2815 | check_spinlock_acquired(cachep); |
2807 | 2816 | ||
2808 | /* | 2817 | /* |
@@ -3085,7 +3094,6 @@ retry: | |||
3085 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3094 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
3086 | int nodeid) | 3095 | int nodeid) |
3087 | { | 3096 | { |
3088 | struct list_head *entry; | ||
3089 | struct page *page; | 3097 | struct page *page; |
3090 | struct kmem_cache_node *n; | 3098 | struct kmem_cache_node *n; |
3091 | void *obj; | 3099 | void *obj; |
@@ -3098,15 +3106,10 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
3098 | retry: | 3106 | retry: |
3099 | check_irq_off(); | 3107 | check_irq_off(); |
3100 | spin_lock(&n->list_lock); | 3108 | spin_lock(&n->list_lock); |
3101 | entry = n->slabs_partial.next; | 3109 | page = get_first_slab(n); |
3102 | if (entry == &n->slabs_partial) { | 3110 | if (!page) |
3103 | n->free_touched = 1; | 3111 | goto must_grow; |
3104 | entry = n->slabs_free.next; | ||
3105 | if (entry == &n->slabs_free) | ||
3106 | goto must_grow; | ||
3107 | } | ||
3108 | 3112 | ||
3109 | page = list_entry(entry, struct page, lru); | ||
3110 | check_spinlock_acquired_node(cachep, nodeid); | 3113 | check_spinlock_acquired_node(cachep, nodeid); |
3111 | 3114 | ||
3112 | STATS_INC_NODEALLOCS(cachep); | 3115 | STATS_INC_NODEALLOCS(cachep); |
@@ -3338,17 +3341,12 @@ free_done: | |||
3338 | #if STATS | 3341 | #if STATS |
3339 | { | 3342 | { |
3340 | int i = 0; | 3343 | int i = 0; |
3341 | struct list_head *p; | 3344 | struct page *page; |
3342 | |||
3343 | p = n->slabs_free.next; | ||
3344 | while (p != &(n->slabs_free)) { | ||
3345 | struct page *page; | ||
3346 | 3345 | ||
3347 | page = list_entry(p, struct page, lru); | 3346 | list_for_each_entry(page, &n->slabs_free, lru) { |
3348 | BUG_ON(page->active); | 3347 | BUG_ON(page->active); |
3349 | 3348 | ||
3350 | i++; | 3349 | i++; |
3351 | p = p->next; | ||
3352 | } | 3350 | } |
3353 | STATS_SET_FREEABLE(cachep, i); | 3351 | STATS_SET_FREEABLE(cachep, i); |
3354 | } | 3352 | } |
@@ -128,10 +128,11 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
128 | 128 | ||
129 | #if defined(CONFIG_SLAB) | 129 | #if defined(CONFIG_SLAB) |
130 | #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ | 130 | #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ |
131 | SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK) | 131 | SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ |
132 | SLAB_NOTRACK | SLAB_ACCOUNT) | ||
132 | #elif defined(CONFIG_SLUB) | 133 | #elif defined(CONFIG_SLUB) |
133 | #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ | 134 | #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ |
134 | SLAB_TEMPORARY | SLAB_NOTRACK) | 135 | SLAB_TEMPORARY | SLAB_NOTRACK | SLAB_ACCOUNT) |
135 | #else | 136 | #else |
136 | #define SLAB_CACHE_FLAGS (0) | 137 | #define SLAB_CACHE_FLAGS (0) |
137 | #endif | 138 | #endif |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 3c6a86b4ec25..e016178063e1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -37,7 +37,8 @@ struct kmem_cache *kmem_cache; | |||
37 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ | 37 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ |
38 | SLAB_FAILSLAB) | 38 | SLAB_FAILSLAB) |
39 | 39 | ||
40 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK) | 40 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ |
41 | SLAB_NOTRACK | SLAB_ACCOUNT) | ||
41 | 42 | ||
42 | /* | 43 | /* |
43 | * Merge control. If this is set then no merging of slab caches will occur. | 44 | * Merge control. If this is set then no merging of slab caches will occur. |
@@ -5362,6 +5362,8 @@ static char *create_unique_id(struct kmem_cache *s) | |||
5362 | *p++ = 'F'; | 5362 | *p++ = 'F'; |
5363 | if (!(s->flags & SLAB_NOTRACK)) | 5363 | if (!(s->flags & SLAB_NOTRACK)) |
5364 | *p++ = 't'; | 5364 | *p++ = 't'; |
5365 | if (s->flags & SLAB_ACCOUNT) | ||
5366 | *p++ = 'A'; | ||
5365 | if (p != name + 1) | 5367 | if (p != name + 1) |
5366 | *p++ = '-'; | 5368 | *p++ = '-'; |
5367 | p += sprintf(p, "%07d", s->size); | 5369 | p += sprintf(p, "%07d", s->size); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 58877312cf6b..e6b8591a3ed2 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -165,8 +165,6 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
165 | int found_extent = 0; | 165 | int found_extent = 0; |
166 | 166 | ||
167 | while (nr_pages) { | 167 | while (nr_pages) { |
168 | struct list_head *lh; | ||
169 | |||
170 | if (se->start_page <= start_page && | 168 | if (se->start_page <= start_page && |
171 | start_page < se->start_page + se->nr_pages) { | 169 | start_page < se->start_page + se->nr_pages) { |
172 | pgoff_t offset = start_page - se->start_page; | 170 | pgoff_t offset = start_page - se->start_page; |
@@ -188,8 +186,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, | |||
188 | break; | 186 | break; |
189 | } | 187 | } |
190 | 188 | ||
191 | lh = se->list.next; | 189 | se = list_next_entry(se, list); |
192 | se = list_entry(lh, struct swap_extent, list); | ||
193 | } | 190 | } |
194 | } | 191 | } |
195 | 192 | ||
@@ -903,7 +900,7 @@ int swp_swapcount(swp_entry_t entry) | |||
903 | VM_BUG_ON(page_private(page) != SWP_CONTINUED); | 900 | VM_BUG_ON(page_private(page) != SWP_CONTINUED); |
904 | 901 | ||
905 | do { | 902 | do { |
906 | page = list_entry(page->lru.next, struct page, lru); | 903 | page = list_next_entry(page, lru); |
907 | map = kmap_atomic(page); | 904 | map = kmap_atomic(page); |
908 | tmp_count = map[offset]; | 905 | tmp_count = map[offset]; |
909 | kunmap_atomic(map); | 906 | kunmap_atomic(map); |
@@ -1633,14 +1630,11 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |||
1633 | se = start_se; | 1630 | se = start_se; |
1634 | 1631 | ||
1635 | for ( ; ; ) { | 1632 | for ( ; ; ) { |
1636 | struct list_head *lh; | ||
1637 | |||
1638 | if (se->start_page <= offset && | 1633 | if (se->start_page <= offset && |
1639 | offset < (se->start_page + se->nr_pages)) { | 1634 | offset < (se->start_page + se->nr_pages)) { |
1640 | return se->start_block + (offset - se->start_page); | 1635 | return se->start_block + (offset - se->start_page); |
1641 | } | 1636 | } |
1642 | lh = se->list.next; | 1637 | se = list_next_entry(se, list); |
1643 | se = list_entry(lh, struct swap_extent, list); | ||
1644 | sis->curr_swap_extent = se; | 1638 | sis->curr_swap_extent = se; |
1645 | BUG_ON(se == start_se); /* It *must* be present */ | 1639 | BUG_ON(se == start_se); /* It *must* be present */ |
1646 | } | 1640 | } |
@@ -1664,7 +1658,7 @@ static void destroy_swap_extents(struct swap_info_struct *sis) | |||
1664 | while (!list_empty(&sis->first_swap_extent.list)) { | 1658 | while (!list_empty(&sis->first_swap_extent.list)) { |
1665 | struct swap_extent *se; | 1659 | struct swap_extent *se; |
1666 | 1660 | ||
1667 | se = list_entry(sis->first_swap_extent.list.next, | 1661 | se = list_first_entry(&sis->first_swap_extent.list, |
1668 | struct swap_extent, list); | 1662 | struct swap_extent, list); |
1669 | list_del(&se->list); | 1663 | list_del(&se->list); |
1670 | kfree(se); | 1664 | kfree(se); |
@@ -2959,11 +2953,10 @@ static void free_swap_count_continuations(struct swap_info_struct *si) | |||
2959 | struct page *head; | 2953 | struct page *head; |
2960 | head = vmalloc_to_page(si->swap_map + offset); | 2954 | head = vmalloc_to_page(si->swap_map + offset); |
2961 | if (page_private(head)) { | 2955 | if (page_private(head)) { |
2962 | struct list_head *this, *next; | 2956 | struct page *page, *next; |
2963 | list_for_each_safe(this, next, &head->lru) { | 2957 | |
2964 | struct page *page; | 2958 | list_for_each_entry_safe(page, next, &head->lru, lru) { |
2965 | page = list_entry(this, struct page, lru); | 2959 | list_del(&page->lru); |
2966 | list_del(this); | ||
2967 | __free_page(page); | 2960 | __free_page(page); |
2968 | } | 2961 | } |
2969 | } | 2962 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8e3c9c5a3042..58ceeb107960 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -441,8 +441,7 @@ nocache: | |||
441 | if (list_is_last(&first->list, &vmap_area_list)) | 441 | if (list_is_last(&first->list, &vmap_area_list)) |
442 | goto found; | 442 | goto found; |
443 | 443 | ||
444 | first = list_entry(first->list.next, | 444 | first = list_next_entry(first, list); |
445 | struct vmap_area, list); | ||
446 | } | 445 | } |
447 | 446 | ||
448 | found: | 447 | found: |
@@ -1477,13 +1476,10 @@ static void __vunmap(const void *addr, int deallocate_pages) | |||
1477 | struct page *page = area->pages[i]; | 1476 | struct page *page = area->pages[i]; |
1478 | 1477 | ||
1479 | BUG_ON(!page); | 1478 | BUG_ON(!page); |
1480 | __free_page(page); | 1479 | __free_kmem_pages(page, 0); |
1481 | } | 1480 | } |
1482 | 1481 | ||
1483 | if (area->flags & VM_VPAGES) | 1482 | kvfree(area->pages); |
1484 | vfree(area->pages); | ||
1485 | else | ||
1486 | kfree(area->pages); | ||
1487 | } | 1483 | } |
1488 | 1484 | ||
1489 | kfree(area); | 1485 | kfree(area); |
@@ -1593,7 +1589,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
1593 | if (array_size > PAGE_SIZE) { | 1589 | if (array_size > PAGE_SIZE) { |
1594 | pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, | 1590 | pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, |
1595 | PAGE_KERNEL, node, area->caller); | 1591 | PAGE_KERNEL, node, area->caller); |
1596 | area->flags |= VM_VPAGES; | ||
1597 | } else { | 1592 | } else { |
1598 | pages = kmalloc_node(array_size, nested_gfp, node); | 1593 | pages = kmalloc_node(array_size, nested_gfp, node); |
1599 | } | 1594 | } |
@@ -1608,9 +1603,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
1608 | struct page *page; | 1603 | struct page *page; |
1609 | 1604 | ||
1610 | if (node == NUMA_NO_NODE) | 1605 | if (node == NUMA_NO_NODE) |
1611 | page = alloc_page(alloc_mask); | 1606 | page = alloc_kmem_pages(alloc_mask, order); |
1612 | else | 1607 | else |
1613 | page = alloc_pages_node(node, alloc_mask, order); | 1608 | page = alloc_kmem_pages_node(node, alloc_mask, order); |
1614 | 1609 | ||
1615 | if (unlikely(!page)) { | 1610 | if (unlikely(!page)) { |
1616 | /* Successfully allocated i pages, free them in __vunmap() */ | 1611 | /* Successfully allocated i pages, free them in __vunmap() */ |
@@ -2559,10 +2554,10 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
2559 | struct vmap_area *va; | 2554 | struct vmap_area *va; |
2560 | 2555 | ||
2561 | spin_lock(&vmap_area_lock); | 2556 | spin_lock(&vmap_area_lock); |
2562 | va = list_entry((&vmap_area_list)->next, typeof(*va), list); | 2557 | va = list_first_entry(&vmap_area_list, typeof(*va), list); |
2563 | while (n > 0 && &va->list != &vmap_area_list) { | 2558 | while (n > 0 && &va->list != &vmap_area_list) { |
2564 | n--; | 2559 | n--; |
2565 | va = list_entry(va->list.next, typeof(*va), list); | 2560 | va = list_next_entry(va, list); |
2566 | } | 2561 | } |
2567 | if (!n && &va->list != &vmap_area_list) | 2562 | if (!n && &va->list != &vmap_area_list) |
2568 | return va; | 2563 | return va; |
@@ -2576,7 +2571,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
2576 | struct vmap_area *va = p, *next; | 2571 | struct vmap_area *va = p, *next; |
2577 | 2572 | ||
2578 | ++*pos; | 2573 | ++*pos; |
2579 | next = list_entry(va->list.next, typeof(*va), list); | 2574 | next = list_next_entry(va, list); |
2580 | if (&next->list != &vmap_area_list) | 2575 | if (&next->list != &vmap_area_list) |
2581 | return next; | 2576 | return next; |
2582 | 2577 | ||
@@ -2651,7 +2646,7 @@ static int s_show(struct seq_file *m, void *p) | |||
2651 | if (v->flags & VM_USERMAP) | 2646 | if (v->flags & VM_USERMAP) |
2652 | seq_puts(m, " user"); | 2647 | seq_puts(m, " user"); |
2653 | 2648 | ||
2654 | if (v->flags & VM_VPAGES) | 2649 | if (is_vmalloc_addr(v->pages)) |
2655 | seq_puts(m, " vpages"); | 2650 | seq_puts(m, " vpages"); |
2656 | 2651 | ||
2657 | show_numa_info(m, v); | 2652 | show_numa_info(m, v); |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index c5afd573d7da..9a6c0704211c 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -137,14 +137,11 @@ struct vmpressure_event { | |||
137 | }; | 137 | }; |
138 | 138 | ||
139 | static bool vmpressure_event(struct vmpressure *vmpr, | 139 | static bool vmpressure_event(struct vmpressure *vmpr, |
140 | unsigned long scanned, unsigned long reclaimed) | 140 | enum vmpressure_levels level) |
141 | { | 141 | { |
142 | struct vmpressure_event *ev; | 142 | struct vmpressure_event *ev; |
143 | enum vmpressure_levels level; | ||
144 | bool signalled = false; | 143 | bool signalled = false; |
145 | 144 | ||
146 | level = vmpressure_calc_level(scanned, reclaimed); | ||
147 | |||
148 | mutex_lock(&vmpr->events_lock); | 145 | mutex_lock(&vmpr->events_lock); |
149 | 146 | ||
150 | list_for_each_entry(ev, &vmpr->events, node) { | 147 | list_for_each_entry(ev, &vmpr->events, node) { |
@@ -164,6 +161,7 @@ static void vmpressure_work_fn(struct work_struct *work) | |||
164 | struct vmpressure *vmpr = work_to_vmpressure(work); | 161 | struct vmpressure *vmpr = work_to_vmpressure(work); |
165 | unsigned long scanned; | 162 | unsigned long scanned; |
166 | unsigned long reclaimed; | 163 | unsigned long reclaimed; |
164 | enum vmpressure_levels level; | ||
167 | 165 | ||
168 | spin_lock(&vmpr->sr_lock); | 166 | spin_lock(&vmpr->sr_lock); |
169 | /* | 167 | /* |
@@ -174,19 +172,21 @@ static void vmpressure_work_fn(struct work_struct *work) | |||
174 | * here. No need for any locks here since we don't care if | 172 | * here. No need for any locks here since we don't care if |
175 | * vmpr->reclaimed is in sync. | 173 | * vmpr->reclaimed is in sync. |
176 | */ | 174 | */ |
177 | scanned = vmpr->scanned; | 175 | scanned = vmpr->tree_scanned; |
178 | if (!scanned) { | 176 | if (!scanned) { |
179 | spin_unlock(&vmpr->sr_lock); | 177 | spin_unlock(&vmpr->sr_lock); |
180 | return; | 178 | return; |
181 | } | 179 | } |
182 | 180 | ||
183 | reclaimed = vmpr->reclaimed; | 181 | reclaimed = vmpr->tree_reclaimed; |
184 | vmpr->scanned = 0; | 182 | vmpr->tree_scanned = 0; |
185 | vmpr->reclaimed = 0; | 183 | vmpr->tree_reclaimed = 0; |
186 | spin_unlock(&vmpr->sr_lock); | 184 | spin_unlock(&vmpr->sr_lock); |
187 | 185 | ||
186 | level = vmpressure_calc_level(scanned, reclaimed); | ||
187 | |||
188 | do { | 188 | do { |
189 | if (vmpressure_event(vmpr, scanned, reclaimed)) | 189 | if (vmpressure_event(vmpr, level)) |
190 | break; | 190 | break; |
191 | /* | 191 | /* |
192 | * If not handled, propagate the event upward into the | 192 | * If not handled, propagate the event upward into the |
@@ -199,6 +199,7 @@ static void vmpressure_work_fn(struct work_struct *work) | |||
199 | * vmpressure() - Account memory pressure through scanned/reclaimed ratio | 199 | * vmpressure() - Account memory pressure through scanned/reclaimed ratio |
200 | * @gfp: reclaimer's gfp mask | 200 | * @gfp: reclaimer's gfp mask |
201 | * @memcg: cgroup memory controller handle | 201 | * @memcg: cgroup memory controller handle |
202 | * @tree: legacy subtree mode | ||
202 | * @scanned: number of pages scanned | 203 | * @scanned: number of pages scanned |
203 | * @reclaimed: number of pages reclaimed | 204 | * @reclaimed: number of pages reclaimed |
204 | * | 205 | * |
@@ -206,9 +207,16 @@ static void vmpressure_work_fn(struct work_struct *work) | |||
206 | * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw | 207 | * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw |
207 | * pressure index is then further refined and averaged over time. | 208 | * pressure index is then further refined and averaged over time. |
208 | * | 209 | * |
210 | * If @tree is set, vmpressure is in traditional userspace reporting | ||
211 | * mode: @memcg is considered the pressure root and userspace is | ||
212 | * notified of the entire subtree's reclaim efficiency. | ||
213 | * | ||
214 | * If @tree is not set, reclaim efficiency is recorded for @memcg, and | ||
215 | * only in-kernel users are notified. | ||
216 | * | ||
209 | * This function does not return any value. | 217 | * This function does not return any value. |
210 | */ | 218 | */ |
211 | void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | 219 | void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, |
212 | unsigned long scanned, unsigned long reclaimed) | 220 | unsigned long scanned, unsigned long reclaimed) |
213 | { | 221 | { |
214 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); | 222 | struct vmpressure *vmpr = memcg_to_vmpressure(memcg); |
@@ -238,15 +246,47 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, | |||
238 | if (!scanned) | 246 | if (!scanned) |
239 | return; | 247 | return; |
240 | 248 | ||
241 | spin_lock(&vmpr->sr_lock); | 249 | if (tree) { |
242 | vmpr->scanned += scanned; | 250 | spin_lock(&vmpr->sr_lock); |
243 | vmpr->reclaimed += reclaimed; | 251 | vmpr->tree_scanned += scanned; |
244 | scanned = vmpr->scanned; | 252 | vmpr->tree_reclaimed += reclaimed; |
245 | spin_unlock(&vmpr->sr_lock); | 253 | scanned = vmpr->scanned; |
254 | spin_unlock(&vmpr->sr_lock); | ||
246 | 255 | ||
247 | if (scanned < vmpressure_win) | 256 | if (scanned < vmpressure_win) |
248 | return; | 257 | return; |
249 | schedule_work(&vmpr->work); | 258 | schedule_work(&vmpr->work); |
259 | } else { | ||
260 | enum vmpressure_levels level; | ||
261 | |||
262 | /* For now, no users for root-level efficiency */ | ||
263 | if (!memcg || memcg == root_mem_cgroup) | ||
264 | return; | ||
265 | |||
266 | spin_lock(&vmpr->sr_lock); | ||
267 | scanned = vmpr->scanned += scanned; | ||
268 | reclaimed = vmpr->reclaimed += reclaimed; | ||
269 | if (scanned < vmpressure_win) { | ||
270 | spin_unlock(&vmpr->sr_lock); | ||
271 | return; | ||
272 | } | ||
273 | vmpr->scanned = vmpr->reclaimed = 0; | ||
274 | spin_unlock(&vmpr->sr_lock); | ||
275 | |||
276 | level = vmpressure_calc_level(scanned, reclaimed); | ||
277 | |||
278 | if (level > VMPRESSURE_LOW) { | ||
279 | /* | ||
280 | * Let the socket buffer allocator know that | ||
281 | * we are having trouble reclaiming LRU pages. | ||
282 | * | ||
283 | * For hysteresis keep the pressure state | ||
284 | * asserted for a second in which subsequent | ||
285 | * pressure events can occur. | ||
286 | */ | ||
287 | memcg->socket_pressure = jiffies + HZ; | ||
288 | } | ||
289 | } | ||
250 | } | 290 | } |
251 | 291 | ||
252 | /** | 292 | /** |
@@ -276,7 +316,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) | |||
276 | * to the vmpressure() basically means that we signal 'critical' | 316 | * to the vmpressure() basically means that we signal 'critical' |
277 | * level. | 317 | * level. |
278 | */ | 318 | */ |
279 | vmpressure(gfp, memcg, vmpressure_win, 0); | 319 | vmpressure(gfp, memcg, true, vmpressure_win, 0); |
280 | } | 320 | } |
281 | 321 | ||
282 | /** | 322 | /** |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2aec4241b42a..108bd119f2f6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -106,8 +106,6 @@ struct scan_control { | |||
106 | unsigned long nr_reclaimed; | 106 | unsigned long nr_reclaimed; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | ||
110 | |||
111 | #ifdef ARCH_HAS_PREFETCH | 109 | #ifdef ARCH_HAS_PREFETCH |
112 | #define prefetch_prev_lru_page(_page, _base, _field) \ | 110 | #define prefetch_prev_lru_page(_page, _base, _field) \ |
113 | do { \ | 111 | do { \ |
@@ -197,11 +195,13 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) | |||
197 | unsigned long nr; | 195 | unsigned long nr; |
198 | 196 | ||
199 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | 197 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + |
200 | zone_page_state(zone, NR_INACTIVE_FILE); | 198 | zone_page_state(zone, NR_INACTIVE_FILE) + |
199 | zone_page_state(zone, NR_ISOLATED_FILE); | ||
201 | 200 | ||
202 | if (get_nr_swap_pages() > 0) | 201 | if (get_nr_swap_pages() > 0) |
203 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | 202 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + |
204 | zone_page_state(zone, NR_INACTIVE_ANON); | 203 | zone_page_state(zone, NR_INACTIVE_ANON) + |
204 | zone_page_state(zone, NR_ISOLATED_ANON); | ||
205 | 205 | ||
206 | return nr; | 206 | return nr; |
207 | } | 207 | } |
@@ -594,7 +594,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
594 | /* synchronous write or broken a_ops? */ | 594 | /* synchronous write or broken a_ops? */ |
595 | ClearPageReclaim(page); | 595 | ClearPageReclaim(page); |
596 | } | 596 | } |
597 | trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); | 597 | trace_mm_vmscan_writepage(page); |
598 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 598 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
599 | return PAGE_SUCCESS; | 599 | return PAGE_SUCCESS; |
600 | } | 600 | } |
@@ -1426,6 +1426,7 @@ int isolate_lru_page(struct page *page) | |||
1426 | int ret = -EBUSY; | 1426 | int ret = -EBUSY; |
1427 | 1427 | ||
1428 | VM_BUG_ON_PAGE(!page_count(page), page); | 1428 | VM_BUG_ON_PAGE(!page_count(page), page); |
1429 | VM_BUG_ON_PAGE(PageTail(page), page); | ||
1429 | 1430 | ||
1430 | if (PageLRU(page)) { | 1431 | if (PageLRU(page)) { |
1431 | struct zone *zone = page_zone(page); | 1432 | struct zone *zone = page_zone(page); |
@@ -1691,11 +1692,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1691 | current_may_throttle()) | 1692 | current_may_throttle()) |
1692 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); | 1693 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); |
1693 | 1694 | ||
1694 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | 1695 | trace_mm_vmscan_lru_shrink_inactive(zone, nr_scanned, nr_reclaimed, |
1695 | zone_idx(zone), | 1696 | sc->priority, file); |
1696 | nr_scanned, nr_reclaimed, | ||
1697 | sc->priority, | ||
1698 | trace_shrink_flags(file)); | ||
1699 | return nr_reclaimed; | 1697 | return nr_reclaimed; |
1700 | } | 1698 | } |
1701 | 1699 | ||
@@ -2046,10 +2044,16 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness, | |||
2046 | } | 2044 | } |
2047 | 2045 | ||
2048 | /* | 2046 | /* |
2049 | * There is enough inactive page cache, do not reclaim | 2047 | * If there is enough inactive page cache, i.e. if the size of the |
2050 | * anything from the anonymous working set right now. | 2048 | * inactive list is greater than that of the active list *and* the |
2049 | * inactive list actually has some pages to scan on this priority, we | ||
2050 | * do not reclaim anything from the anonymous working set right now. | ||
2051 | * Without the second condition we could end up never scanning an | ||
2052 | * lruvec even if it has plenty of old anonymous pages unless the | ||
2053 | * system is under heavy pressure. | ||
2051 | */ | 2054 | */ |
2052 | if (!inactive_file_is_low(lruvec)) { | 2055 | if (!inactive_file_is_low(lruvec) && |
2056 | get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { | ||
2053 | scan_balance = SCAN_FILE; | 2057 | scan_balance = SCAN_FILE; |
2054 | goto out; | 2058 | goto out; |
2055 | } | 2059 | } |
@@ -2393,6 +2397,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2393 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 2397 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2394 | do { | 2398 | do { |
2395 | unsigned long lru_pages; | 2399 | unsigned long lru_pages; |
2400 | unsigned long reclaimed; | ||
2396 | unsigned long scanned; | 2401 | unsigned long scanned; |
2397 | struct lruvec *lruvec; | 2402 | struct lruvec *lruvec; |
2398 | int swappiness; | 2403 | int swappiness; |
@@ -2405,6 +2410,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2405 | 2410 | ||
2406 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2411 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2407 | swappiness = mem_cgroup_swappiness(memcg); | 2412 | swappiness = mem_cgroup_swappiness(memcg); |
2413 | reclaimed = sc->nr_reclaimed; | ||
2408 | scanned = sc->nr_scanned; | 2414 | scanned = sc->nr_scanned; |
2409 | 2415 | ||
2410 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); | 2416 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); |
@@ -2415,6 +2421,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2415 | memcg, sc->nr_scanned - scanned, | 2421 | memcg, sc->nr_scanned - scanned, |
2416 | lru_pages); | 2422 | lru_pages); |
2417 | 2423 | ||
2424 | /* Record the group's reclaim efficiency */ | ||
2425 | vmpressure(sc->gfp_mask, memcg, false, | ||
2426 | sc->nr_scanned - scanned, | ||
2427 | sc->nr_reclaimed - reclaimed); | ||
2428 | |||
2418 | /* | 2429 | /* |
2419 | * Direct reclaim and kswapd have to scan all memory | 2430 | * Direct reclaim and kswapd have to scan all memory |
2420 | * cgroups to fulfill the overall scan target for the | 2431 | * cgroups to fulfill the overall scan target for the |
@@ -2446,7 +2457,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2446 | reclaim_state->reclaimed_slab = 0; | 2457 | reclaim_state->reclaimed_slab = 0; |
2447 | } | 2458 | } |
2448 | 2459 | ||
2449 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2460 | /* Record the subtree's reclaim efficiency */ |
2461 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, | ||
2450 | sc->nr_scanned - nr_scanned, | 2462 | sc->nr_scanned - nr_scanned, |
2451 | sc->nr_reclaimed - nr_reclaimed); | 2463 | sc->nr_reclaimed - nr_reclaimed); |
2452 | 2464 | ||
diff --git a/mm/vmstat.c b/mm/vmstat.c index c54fd2924f25..83a003bc3cae 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -460,7 +460,7 @@ static int fold_diff(int *diff) | |||
460 | * | 460 | * |
461 | * The function returns the number of global counters updated. | 461 | * The function returns the number of global counters updated. |
462 | */ | 462 | */ |
463 | static int refresh_cpu_vm_stats(void) | 463 | static int refresh_cpu_vm_stats(bool do_pagesets) |
464 | { | 464 | { |
465 | struct zone *zone; | 465 | struct zone *zone; |
466 | int i; | 466 | int i; |
@@ -484,33 +484,35 @@ static int refresh_cpu_vm_stats(void) | |||
484 | #endif | 484 | #endif |
485 | } | 485 | } |
486 | } | 486 | } |
487 | cond_resched(); | ||
488 | #ifdef CONFIG_NUMA | 487 | #ifdef CONFIG_NUMA |
489 | /* | 488 | if (do_pagesets) { |
490 | * Deal with draining the remote pageset of this | 489 | cond_resched(); |
491 | * processor | 490 | /* |
492 | * | 491 | * Deal with draining the remote pageset of this |
493 | * Check if there are pages remaining in this pageset | 492 | * processor |
494 | * if not then there is nothing to expire. | 493 | * |
495 | */ | 494 | * Check if there are pages remaining in this pageset |
496 | if (!__this_cpu_read(p->expire) || | 495 | * if not then there is nothing to expire. |
496 | */ | ||
497 | if (!__this_cpu_read(p->expire) || | ||
497 | !__this_cpu_read(p->pcp.count)) | 498 | !__this_cpu_read(p->pcp.count)) |
498 | continue; | 499 | continue; |
499 | 500 | ||
500 | /* | 501 | /* |
501 | * We never drain zones local to this processor. | 502 | * We never drain zones local to this processor. |
502 | */ | 503 | */ |
503 | if (zone_to_nid(zone) == numa_node_id()) { | 504 | if (zone_to_nid(zone) == numa_node_id()) { |
504 | __this_cpu_write(p->expire, 0); | 505 | __this_cpu_write(p->expire, 0); |
505 | continue; | 506 | continue; |
506 | } | 507 | } |
507 | 508 | ||
508 | if (__this_cpu_dec_return(p->expire)) | 509 | if (__this_cpu_dec_return(p->expire)) |
509 | continue; | 510 | continue; |
510 | 511 | ||
511 | if (__this_cpu_read(p->pcp.count)) { | 512 | if (__this_cpu_read(p->pcp.count)) { |
512 | drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); | 513 | drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); |
513 | changes++; | 514 | changes++; |
515 | } | ||
514 | } | 516 | } |
515 | #endif | 517 | #endif |
516 | } | 518 | } |
@@ -1386,7 +1388,7 @@ static cpumask_var_t cpu_stat_off; | |||
1386 | 1388 | ||
1387 | static void vmstat_update(struct work_struct *w) | 1389 | static void vmstat_update(struct work_struct *w) |
1388 | { | 1390 | { |
1389 | if (refresh_cpu_vm_stats()) { | 1391 | if (refresh_cpu_vm_stats(true)) { |
1390 | /* | 1392 | /* |
1391 | * Counters were updated so we expect more updates | 1393 | * Counters were updated so we expect more updates |
1392 | * to occur in the future. Keep on running the | 1394 | * to occur in the future. Keep on running the |
@@ -1418,6 +1420,23 @@ static void vmstat_update(struct work_struct *w) | |||
1418 | } | 1420 | } |
1419 | 1421 | ||
1420 | /* | 1422 | /* |
1423 | * Switch off vmstat processing and then fold all the remaining differentials | ||
1424 | * until the diffs stay at zero. The function is used by NOHZ and can only be | ||
1425 | * invoked when tick processing is not active. | ||
1426 | */ | ||
1427 | void quiet_vmstat(void) | ||
1428 | { | ||
1429 | if (system_state != SYSTEM_RUNNING) | ||
1430 | return; | ||
1431 | |||
1432 | do { | ||
1433 | if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off)) | ||
1434 | cancel_delayed_work(this_cpu_ptr(&vmstat_work)); | ||
1435 | |||
1436 | } while (refresh_cpu_vm_stats(false)); | ||
1437 | } | ||
1438 | |||
1439 | /* | ||
1421 | * Check if the diffs for a certain cpu indicate that | 1440 | * Check if the diffs for a certain cpu indicate that |
1422 | * an update is needed. | 1441 | * an update is needed. |
1423 | */ | 1442 | */ |
@@ -1449,7 +1468,7 @@ static bool need_update(int cpu) | |||
1449 | */ | 1468 | */ |
1450 | static void vmstat_shepherd(struct work_struct *w); | 1469 | static void vmstat_shepherd(struct work_struct *w); |
1451 | 1470 | ||
1452 | static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd); | 1471 | static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); |
1453 | 1472 | ||
1454 | static void vmstat_shepherd(struct work_struct *w) | 1473 | static void vmstat_shepherd(struct work_struct *w) |
1455 | { | 1474 | { |
@@ -463,9 +463,6 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) | |||
463 | spin_unlock(&pool->lock); | 463 | spin_unlock(&pool->lock); |
464 | } | 464 | } |
465 | 465 | ||
466 | #define list_tail_entry(ptr, type, member) \ | ||
467 | list_entry((ptr)->prev, type, member) | ||
468 | |||
469 | /** | 466 | /** |
470 | * zbud_reclaim_page() - evicts allocations from a pool page and frees it | 467 | * zbud_reclaim_page() - evicts allocations from a pool page and frees it |
471 | * @pool: pool from which a page will attempt to be evicted | 468 | * @pool: pool from which a page will attempt to be evicted |
@@ -514,7 +511,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) | |||
514 | return -EINVAL; | 511 | return -EINVAL; |
515 | } | 512 | } |
516 | for (i = 0; i < retries; i++) { | 513 | for (i = 0; i < retries; i++) { |
517 | zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru); | 514 | zhdr = list_last_entry(&pool->lru, struct zbud_header, lru); |
518 | list_del(&zhdr->lru); | 515 | list_del(&zhdr->lru); |
519 | list_del(&zhdr->buddy); | 516 | list_del(&zhdr->buddy); |
520 | /* Protect zbud page against free */ | 517 | /* Protect zbud page against free */ |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 9f15bdd9163c..e7414cec220b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -213,10 +213,10 @@ struct size_class { | |||
213 | int size; | 213 | int size; |
214 | unsigned int index; | 214 | unsigned int index; |
215 | 215 | ||
216 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ | ||
217 | int pages_per_zspage; | ||
218 | struct zs_size_stat stats; | 216 | struct zs_size_stat stats; |
219 | 217 | ||
218 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ | ||
219 | int pages_per_zspage; | ||
220 | /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ | 220 | /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ |
221 | bool huge; | 221 | bool huge; |
222 | }; | 222 | }; |
diff --git a/net/core/sock.c b/net/core/sock.c index 51270238e269..6c1c8bc93412 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -195,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap) | |||
195 | } | 195 | } |
196 | EXPORT_SYMBOL(sk_net_capable); | 196 | EXPORT_SYMBOL(sk_net_capable); |
197 | 197 | ||
198 | |||
199 | #ifdef CONFIG_MEMCG_KMEM | ||
200 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | ||
201 | { | ||
202 | struct proto *proto; | ||
203 | int ret = 0; | ||
204 | |||
205 | mutex_lock(&proto_list_mutex); | ||
206 | list_for_each_entry(proto, &proto_list, node) { | ||
207 | if (proto->init_cgroup) { | ||
208 | ret = proto->init_cgroup(memcg, ss); | ||
209 | if (ret) | ||
210 | goto out; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | mutex_unlock(&proto_list_mutex); | ||
215 | return ret; | ||
216 | out: | ||
217 | list_for_each_entry_continue_reverse(proto, &proto_list, node) | ||
218 | if (proto->destroy_cgroup) | ||
219 | proto->destroy_cgroup(memcg); | ||
220 | mutex_unlock(&proto_list_mutex); | ||
221 | return ret; | ||
222 | } | ||
223 | |||
224 | void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) | ||
225 | { | ||
226 | struct proto *proto; | ||
227 | |||
228 | mutex_lock(&proto_list_mutex); | ||
229 | list_for_each_entry_reverse(proto, &proto_list, node) | ||
230 | if (proto->destroy_cgroup) | ||
231 | proto->destroy_cgroup(memcg); | ||
232 | mutex_unlock(&proto_list_mutex); | ||
233 | } | ||
234 | #endif | ||
235 | |||
236 | /* | 198 | /* |
237 | * Each address family might have different locking rules, so we have | 199 | * Each address family might have different locking rules, so we have |
238 | * one slock key per address family: | 200 | * one slock key per address family: |
@@ -240,11 +202,6 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) | |||
240 | static struct lock_class_key af_family_keys[AF_MAX]; | 202 | static struct lock_class_key af_family_keys[AF_MAX]; |
241 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 203 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
242 | 204 | ||
243 | #if defined(CONFIG_MEMCG_KMEM) | ||
244 | struct static_key memcg_socket_limit_enabled; | ||
245 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | ||
246 | #endif | ||
247 | |||
248 | /* | 205 | /* |
249 | * Make lock validator output more readable. (we pre-construct these | 206 | * Make lock validator output more readable. (we pre-construct these |
250 | * strings build-time, so that runtime initialization of socket | 207 | * strings build-time, so that runtime initialization of socket |
@@ -1507,12 +1464,6 @@ void sk_free(struct sock *sk) | |||
1507 | } | 1464 | } |
1508 | EXPORT_SYMBOL(sk_free); | 1465 | EXPORT_SYMBOL(sk_free); |
1509 | 1466 | ||
1510 | static void sk_update_clone(const struct sock *sk, struct sock *newsk) | ||
1511 | { | ||
1512 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1513 | sock_update_memcg(newsk); | ||
1514 | } | ||
1515 | |||
1516 | /** | 1467 | /** |
1517 | * sk_clone_lock - clone a socket, and lock its clone | 1468 | * sk_clone_lock - clone a socket, and lock its clone |
1518 | * @sk: the socket to clone | 1469 | * @sk: the socket to clone |
@@ -1607,7 +1558,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1607 | sk_set_socket(newsk, NULL); | 1558 | sk_set_socket(newsk, NULL); |
1608 | newsk->sk_wq = NULL; | 1559 | newsk->sk_wq = NULL; |
1609 | 1560 | ||
1610 | sk_update_clone(sk, newsk); | 1561 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) |
1562 | sock_update_memcg(newsk); | ||
1611 | 1563 | ||
1612 | if (newsk->sk_prot->sockets_allocated) | 1564 | if (newsk->sk_prot->sockets_allocated) |
1613 | sk_sockets_allocated_inc(newsk); | 1565 | sk_sockets_allocated_inc(newsk); |
@@ -2089,27 +2041,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
2089 | struct proto *prot = sk->sk_prot; | 2041 | struct proto *prot = sk->sk_prot; |
2090 | int amt = sk_mem_pages(size); | 2042 | int amt = sk_mem_pages(size); |
2091 | long allocated; | 2043 | long allocated; |
2092 | int parent_status = UNDER_LIMIT; | ||
2093 | 2044 | ||
2094 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 2045 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
2095 | 2046 | ||
2096 | allocated = sk_memory_allocated_add(sk, amt, &parent_status); | 2047 | allocated = sk_memory_allocated_add(sk, amt); |
2048 | |||
2049 | if (mem_cgroup_sockets_enabled && sk->sk_memcg && | ||
2050 | !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) | ||
2051 | goto suppress_allocation; | ||
2097 | 2052 | ||
2098 | /* Under limit. */ | 2053 | /* Under limit. */ |
2099 | if (parent_status == UNDER_LIMIT && | 2054 | if (allocated <= sk_prot_mem_limits(sk, 0)) { |
2100 | allocated <= sk_prot_mem_limits(sk, 0)) { | ||
2101 | sk_leave_memory_pressure(sk); | 2055 | sk_leave_memory_pressure(sk); |
2102 | return 1; | 2056 | return 1; |
2103 | } | 2057 | } |
2104 | 2058 | ||
2105 | /* Under pressure. (we or our parents) */ | 2059 | /* Under pressure. */ |
2106 | if ((parent_status > SOFT_LIMIT) || | 2060 | if (allocated > sk_prot_mem_limits(sk, 1)) |
2107 | allocated > sk_prot_mem_limits(sk, 1)) | ||
2108 | sk_enter_memory_pressure(sk); | 2061 | sk_enter_memory_pressure(sk); |
2109 | 2062 | ||
2110 | /* Over hard limit (we or our parents) */ | 2063 | /* Over hard limit. */ |
2111 | if ((parent_status == OVER_LIMIT) || | 2064 | if (allocated > sk_prot_mem_limits(sk, 2)) |
2112 | (allocated > sk_prot_mem_limits(sk, 2))) | ||
2113 | goto suppress_allocation; | 2065 | goto suppress_allocation; |
2114 | 2066 | ||
2115 | /* guarantee minimum buffer size under pressure */ | 2067 | /* guarantee minimum buffer size under pressure */ |
@@ -2158,6 +2110,9 @@ suppress_allocation: | |||
2158 | 2110 | ||
2159 | sk_memory_allocated_sub(sk, amt); | 2111 | sk_memory_allocated_sub(sk, amt); |
2160 | 2112 | ||
2113 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) | ||
2114 | mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); | ||
2115 | |||
2161 | return 0; | 2116 | return 0; |
2162 | } | 2117 | } |
2163 | EXPORT_SYMBOL(__sk_mem_schedule); | 2118 | EXPORT_SYMBOL(__sk_mem_schedule); |
@@ -2173,6 +2128,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount) | |||
2173 | sk_memory_allocated_sub(sk, amount); | 2128 | sk_memory_allocated_sub(sk, amount); |
2174 | sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; | 2129 | sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; |
2175 | 2130 | ||
2131 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) | ||
2132 | mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); | ||
2133 | |||
2176 | if (sk_under_memory_pressure(sk) && | 2134 | if (sk_under_memory_pressure(sk) && |
2177 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) | 2135 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
2178 | sk_leave_memory_pressure(sk); | 2136 | sk_leave_memory_pressure(sk); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7bb1b091efd1..fd17eec93525 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -422,7 +422,8 @@ void tcp_init_sock(struct sock *sk) | |||
422 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 422 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
423 | 423 | ||
424 | local_bh_disable(); | 424 | local_bh_disable(); |
425 | sock_update_memcg(sk); | 425 | if (mem_cgroup_sockets_enabled) |
426 | sock_update_memcg(sk); | ||
426 | sk_sockets_allocated_inc(sk); | 427 | sk_sockets_allocated_inc(sk); |
427 | local_bh_enable(); | 428 | local_bh_enable(); |
428 | } | 429 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65947c1f4733..c7d1fb50f381 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1818,7 +1818,9 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1818 | tcp_saved_syn_free(tp); | 1818 | tcp_saved_syn_free(tp); |
1819 | 1819 | ||
1820 | sk_sockets_allocated_dec(sk); | 1820 | sk_sockets_allocated_dec(sk); |
1821 | sock_release_memcg(sk); | 1821 | |
1822 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) | ||
1823 | sock_release_memcg(sk); | ||
1822 | } | 1824 | } |
1823 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1825 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
1824 | 1826 | ||
@@ -2342,11 +2344,6 @@ struct proto tcp_prot = { | |||
2342 | .compat_setsockopt = compat_tcp_setsockopt, | 2344 | .compat_setsockopt = compat_tcp_setsockopt, |
2343 | .compat_getsockopt = compat_tcp_getsockopt, | 2345 | .compat_getsockopt = compat_tcp_getsockopt, |
2344 | #endif | 2346 | #endif |
2345 | #ifdef CONFIG_MEMCG_KMEM | ||
2346 | .init_cgroup = tcp_init_cgroup, | ||
2347 | .destroy_cgroup = tcp_destroy_cgroup, | ||
2348 | .proto_cgroup = tcp_proto_cgroup, | ||
2349 | #endif | ||
2350 | .diag_destroy = tcp_abort, | 2347 | .diag_destroy = tcp_abort, |
2351 | }; | 2348 | }; |
2352 | EXPORT_SYMBOL(tcp_prot); | 2349 | EXPORT_SYMBOL(tcp_prot); |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 2379c1b4efb2..18bc7f745e9c 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
@@ -8,75 +8,49 @@ | |||
8 | 8 | ||
9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
10 | { | 10 | { |
11 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | ||
12 | struct page_counter *counter_parent = NULL; | ||
11 | /* | 13 | /* |
12 | * The root cgroup does not use page_counters, but rather, | 14 | * The root cgroup does not use page_counters, but rather, |
13 | * rely on the data already collected by the network | 15 | * rely on the data already collected by the network |
14 | * subsystem | 16 | * subsystem |
15 | */ | 17 | */ |
16 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | 18 | if (memcg == root_mem_cgroup) |
17 | struct page_counter *counter_parent = NULL; | ||
18 | struct cg_proto *cg_proto, *parent_cg; | ||
19 | |||
20 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
21 | if (!cg_proto) | ||
22 | return 0; | 19 | return 0; |
23 | 20 | ||
24 | cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; | 21 | memcg->tcp_mem.memory_pressure = 0; |
25 | cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; | ||
26 | cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; | ||
27 | cg_proto->memory_pressure = 0; | ||
28 | cg_proto->memcg = memcg; | ||
29 | 22 | ||
30 | parent_cg = tcp_prot.proto_cgroup(parent); | 23 | if (parent) |
31 | if (parent_cg) | 24 | counter_parent = &parent->tcp_mem.memory_allocated; |
32 | counter_parent = &parent_cg->memory_allocated; | ||
33 | 25 | ||
34 | page_counter_init(&cg_proto->memory_allocated, counter_parent); | 26 | page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent); |
35 | percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); | ||
36 | 27 | ||
37 | return 0; | 28 | return 0; |
38 | } | 29 | } |
39 | EXPORT_SYMBOL(tcp_init_cgroup); | ||
40 | 30 | ||
41 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) | 31 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
42 | { | 32 | { |
43 | struct cg_proto *cg_proto; | 33 | if (memcg == root_mem_cgroup) |
44 | |||
45 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
46 | if (!cg_proto) | ||
47 | return; | 34 | return; |
48 | 35 | ||
49 | percpu_counter_destroy(&cg_proto->sockets_allocated); | 36 | if (memcg->tcp_mem.active) |
50 | 37 | static_branch_dec(&memcg_sockets_enabled_key); | |
51 | if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | ||
52 | static_key_slow_dec(&memcg_socket_limit_enabled); | ||
53 | |||
54 | } | 38 | } |
55 | EXPORT_SYMBOL(tcp_destroy_cgroup); | ||
56 | 39 | ||
57 | static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) | 40 | static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) |
58 | { | 41 | { |
59 | struct cg_proto *cg_proto; | ||
60 | int i; | ||
61 | int ret; | 42 | int ret; |
62 | 43 | ||
63 | cg_proto = tcp_prot.proto_cgroup(memcg); | 44 | if (memcg == root_mem_cgroup) |
64 | if (!cg_proto) | ||
65 | return -EINVAL; | 45 | return -EINVAL; |
66 | 46 | ||
67 | ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); | 47 | ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages); |
68 | if (ret) | 48 | if (ret) |
69 | return ret; | 49 | return ret; |
70 | 50 | ||
71 | for (i = 0; i < 3; i++) | 51 | if (!memcg->tcp_mem.active) { |
72 | cg_proto->sysctl_mem[i] = min_t(long, nr_pages, | ||
73 | sysctl_tcp_mem[i]); | ||
74 | |||
75 | if (nr_pages == PAGE_COUNTER_MAX) | ||
76 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | ||
77 | else { | ||
78 | /* | 52 | /* |
79 | * The active bit needs to be written after the static_key | 53 | * The active flag needs to be written after the static_key |
80 | * update. This is what guarantees that the socket activation | 54 | * update. This is what guarantees that the socket activation |
81 | * function is the last one to run. See sock_update_memcg() for | 55 | * function is the last one to run. See sock_update_memcg() for |
82 | * details, and note that we don't mark any socket as belonging | 56 | * details, and note that we don't mark any socket as belonging |
@@ -90,14 +64,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) | |||
90 | * We never race with the readers in sock_update_memcg(), | 64 | * We never race with the readers in sock_update_memcg(), |
91 | * because when this value change, the code to process it is not | 65 | * because when this value change, the code to process it is not |
92 | * patched in yet. | 66 | * patched in yet. |
93 | * | ||
94 | * The activated bit is used to guarantee that no two writers | ||
95 | * will do the update in the same memcg. Without that, we can't | ||
96 | * properly shutdown the static key. | ||
97 | */ | 67 | */ |
98 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | 68 | static_branch_inc(&memcg_sockets_enabled_key); |
99 | static_key_slow_inc(&memcg_socket_limit_enabled); | 69 | memcg->tcp_mem.active = true; |
100 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | ||
101 | } | 70 | } |
102 | 71 | ||
103 | return 0; | 72 | return 0; |
@@ -141,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, | |||
141 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) | 110 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
142 | { | 111 | { |
143 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 112 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
144 | struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); | ||
145 | u64 val; | 113 | u64 val; |
146 | 114 | ||
147 | switch (cft->private) { | 115 | switch (cft->private) { |
148 | case RES_LIMIT: | 116 | case RES_LIMIT: |
149 | if (!cg_proto) | 117 | if (memcg == root_mem_cgroup) |
150 | return PAGE_COUNTER_MAX; | 118 | val = PAGE_COUNTER_MAX; |
151 | val = cg_proto->memory_allocated.limit; | 119 | else |
120 | val = memcg->tcp_mem.memory_allocated.limit; | ||
152 | val *= PAGE_SIZE; | 121 | val *= PAGE_SIZE; |
153 | break; | 122 | break; |
154 | case RES_USAGE: | 123 | case RES_USAGE: |
155 | if (!cg_proto) | 124 | if (memcg == root_mem_cgroup) |
156 | val = atomic_long_read(&tcp_memory_allocated); | 125 | val = atomic_long_read(&tcp_memory_allocated); |
157 | else | 126 | else |
158 | val = page_counter_read(&cg_proto->memory_allocated); | 127 | val = page_counter_read(&memcg->tcp_mem.memory_allocated); |
159 | val *= PAGE_SIZE; | 128 | val *= PAGE_SIZE; |
160 | break; | 129 | break; |
161 | case RES_FAILCNT: | 130 | case RES_FAILCNT: |
162 | if (!cg_proto) | 131 | if (memcg == root_mem_cgroup) |
163 | return 0; | 132 | return 0; |
164 | val = cg_proto->memory_allocated.failcnt; | 133 | val = memcg->tcp_mem.memory_allocated.failcnt; |
165 | break; | 134 | break; |
166 | case RES_MAX_USAGE: | 135 | case RES_MAX_USAGE: |
167 | if (!cg_proto) | 136 | if (memcg == root_mem_cgroup) |
168 | return 0; | 137 | return 0; |
169 | val = cg_proto->memory_allocated.watermark; | 138 | val = memcg->tcp_mem.memory_allocated.watermark; |
170 | val *= PAGE_SIZE; | 139 | val *= PAGE_SIZE; |
171 | break; | 140 | break; |
172 | default: | 141 | default: |
@@ -179,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, | |||
179 | char *buf, size_t nbytes, loff_t off) | 148 | char *buf, size_t nbytes, loff_t off) |
180 | { | 149 | { |
181 | struct mem_cgroup *memcg; | 150 | struct mem_cgroup *memcg; |
182 | struct cg_proto *cg_proto; | ||
183 | 151 | ||
184 | memcg = mem_cgroup_from_css(of_css(of)); | 152 | memcg = mem_cgroup_from_css(of_css(of)); |
185 | cg_proto = tcp_prot.proto_cgroup(memcg); | 153 | if (memcg == root_mem_cgroup) |
186 | if (!cg_proto) | ||
187 | return nbytes; | 154 | return nbytes; |
188 | 155 | ||
189 | switch (of_cft(of)->private) { | 156 | switch (of_cft(of)->private) { |
190 | case RES_MAX_USAGE: | 157 | case RES_MAX_USAGE: |
191 | page_counter_reset_watermark(&cg_proto->memory_allocated); | 158 | page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated); |
192 | break; | 159 | break; |
193 | case RES_FAILCNT: | 160 | case RES_FAILCNT: |
194 | cg_proto->memory_allocated.failcnt = 0; | 161 | memcg->tcp_mem.memory_allocated.failcnt = 0; |
195 | break; | 162 | break; |
196 | } | 163 | } |
197 | 164 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 412a920fe0ec..fda379cd600d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2813,13 +2813,16 @@ begin_fwd: | |||
2813 | */ | 2813 | */ |
2814 | void sk_forced_mem_schedule(struct sock *sk, int size) | 2814 | void sk_forced_mem_schedule(struct sock *sk, int size) |
2815 | { | 2815 | { |
2816 | int amt, status; | 2816 | int amt; |
2817 | 2817 | ||
2818 | if (size <= sk->sk_forward_alloc) | 2818 | if (size <= sk->sk_forward_alloc) |
2819 | return; | 2819 | return; |
2820 | amt = sk_mem_pages(size); | 2820 | amt = sk_mem_pages(size); |
2821 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 2821 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
2822 | sk_memory_allocated_add(sk, amt, &status); | 2822 | sk_memory_allocated_add(sk, amt); |
2823 | |||
2824 | if (mem_cgroup_sockets_enabled && sk->sk_memcg) | ||
2825 | mem_cgroup_charge_skmem(sk->sk_memcg, amt); | ||
2823 | } | 2826 | } |
2824 | 2827 | ||
2825 | /* Send a FIN. The caller locks the socket for us. | 2828 | /* Send a FIN. The caller locks the socket for us. |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index db9f1c318afc..4ad8edb46f7c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1889,9 +1889,6 @@ struct proto tcpv6_prot = { | |||
1889 | .compat_setsockopt = compat_tcp_setsockopt, | 1889 | .compat_setsockopt = compat_tcp_setsockopt, |
1890 | .compat_getsockopt = compat_tcp_getsockopt, | 1890 | .compat_getsockopt = compat_tcp_getsockopt, |
1891 | #endif | 1891 | #endif |
1892 | #ifdef CONFIG_MEMCG_KMEM | ||
1893 | .proto_cgroup = tcp_proto_cgroup, | ||
1894 | #endif | ||
1895 | .clear_sk = tcp_v6_clear_sk, | 1892 | .clear_sk = tcp_v6_clear_sk, |
1896 | .diag_destroy = tcp_abort, | 1893 | .diag_destroy = tcp_abort, |
1897 | }; | 1894 | }; |
diff --git a/net/socket.c b/net/socket.c index 91c2de6f5020..c044d1e8508c 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -294,7 +294,7 @@ static int init_inodecache(void) | |||
294 | 0, | 294 | 0, |
295 | (SLAB_HWCACHE_ALIGN | | 295 | (SLAB_HWCACHE_ALIGN | |
296 | SLAB_RECLAIM_ACCOUNT | | 296 | SLAB_RECLAIM_ACCOUNT | |
297 | SLAB_MEM_SPREAD), | 297 | SLAB_MEM_SPREAD | SLAB_ACCOUNT), |
298 | init_once); | 298 | init_once); |
299 | if (sock_inode_cachep == NULL) | 299 | if (sock_inode_cachep == NULL) |
300 | return -ENOMEM; | 300 | return -ENOMEM; |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d81186d34558..14f45bf0410c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -1500,7 +1500,7 @@ int register_rpc_pipefs(void) | |||
1500 | rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", | 1500 | rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", |
1501 | sizeof(struct rpc_inode), | 1501 | sizeof(struct rpc_inode), |
1502 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 1502 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
1503 | SLAB_MEM_SPREAD), | 1503 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), |
1504 | init_once); | 1504 | init_once); |
1505 | if (!rpc_inode_cachep) | 1505 | if (!rpc_inode_cachep) |
1506 | return -ENOMEM; | 1506 | return -ENOMEM; |
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 23e78dcd12bf..38b64f487315 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter | |||
@@ -58,8 +58,8 @@ for name in common: | |||
58 | delta.sort() | 58 | delta.sort() |
59 | delta.reverse() | 59 | delta.reverse() |
60 | 60 | ||
61 | print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ | 61 | print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ |
62 | (add, remove, grow, shrink, up, -down, up-down) | 62 | (add, remove, grow, shrink, up, -down, up-down)) |
63 | print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta") | 63 | print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")) |
64 | for d, n in delta: | 64 | for d, n in delta: |
65 | if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d) | 65 | if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) |
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 8adca4406198..161dd0d67da8 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c | |||
@@ -125,7 +125,7 @@ do { \ | |||
125 | sprintf(str + strlen(str), "*"); \ | 125 | sprintf(str + strlen(str), "*"); \ |
126 | } while(0) | 126 | } while(0) |
127 | 127 | ||
128 | /* Always end in a wildcard, for future extension */ | 128 | /* End in a wildcard, for future extension */ |
129 | static inline void add_wildcard(char *str) | 129 | static inline void add_wildcard(char *str) |
130 | { | 130 | { |
131 | int len = strlen(str); | 131 | int len = strlen(str); |
@@ -704,7 +704,6 @@ static int do_of_entry (const char *filename, void *symval, char *alias) | |||
704 | if (isspace (*tmp)) | 704 | if (isspace (*tmp)) |
705 | *tmp = '_'; | 705 | *tmp = '_'; |
706 | 706 | ||
707 | add_wildcard(alias); | ||
708 | return 1; | 707 | return 1; |
709 | } | 708 | } |
710 | ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); | 709 | ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); |