diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 19:49:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 19:49:17 -0400 |
commit | 1dcf58d6e6e6eb7ec10e9abc56887b040205b06f (patch) | |
tree | c03e7a25ef13eea62f1547914a76e5c68f3f4c28 | |
parent | 80dcc31fbe55932ac9204daee5f2ebc0c49b6da3 (diff) | |
parent | e4b0db72be2487bae0e3251c22f82c104f7c1cfd (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge first patchbomb from Andrew Morton:
- arch/sh updates
- ocfs2 updates
- kernel/watchdog feature
- about half of mm/
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (122 commits)
Documentation: update arch list in the 'memtest' entry
Kconfig: memtest: update number of test patterns up to 17
arm: add support for memtest
arm64: add support for memtest
memtest: use phys_addr_t for physical addresses
mm: move memtest under mm
mm, hugetlb: abort __get_user_pages if current has been oom killed
mm, mempool: do not allow atomic resizing
memcg: print cgroup information when system panics due to panic_on_oom
mm: numa: remove migrate_ratelimited
mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE
mm: split ET_DYN ASLR from mmap ASLR
s390: redefine randomize_et_dyn for ELF_ET_DYN_BASE
mm: expose arch_mmap_rnd when available
s390: standardize mmap_rnd() usage
powerpc: standardize mmap_rnd() usage
mips: extract logic for mmap_rnd()
arm64: standardize mmap_rnd() usage
x86: standardize mmap_rnd() usage
arm: factor out mmap ASLR into mmap_rnd
...
153 files changed, 2312 insertions, 1419 deletions
diff --git a/Documentation/cma/debugfs.txt b/Documentation/cma/debugfs.txt new file mode 100644 index 000000000000..6cef20a8cedc --- /dev/null +++ b/Documentation/cma/debugfs.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | The CMA debugfs interface is useful to retrieve basic information out of the | ||
2 | different CMA areas and to test allocation/release in each of the areas. | ||
3 | |||
4 | Each CMA zone represents a directory under <debugfs>/cma/, indexed by the | ||
5 | kernel's CMA index. So the first CMA zone would be: | ||
6 | |||
7 | <debugfs>/cma/cma-0 | ||
8 | |||
9 | The structure of the files created under that directory is as follows: | ||
10 | |||
11 | - [RO] base_pfn: The base PFN (Page Frame Number) of the zone. | ||
12 | - [RO] count: Amount of memory in the CMA area. | ||
13 | - [RO] order_per_bit: Order of pages represented by one bit. | ||
14 | - [RO] bitmap: The bitmap of page states in the zone. | ||
15 | - [WO] alloc: Allocate N pages from that CMA area. For example: | ||
16 | |||
17 | echo 5 > <debugfs>/cma/cma-2/alloc | ||
18 | |||
19 | would try to allocate 5 pages from the cma-2 area. | ||
20 | |||
21 | - [WO] free: Free N pages from that CMA area, similar to the above. | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 05c36118f8d7..327556349757 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1989,7 +1989,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1989 | seconds. Use this parameter to check at some | 1989 | seconds. Use this parameter to check at some |
1990 | other rate. 0 disables periodic checking. | 1990 | other rate. 0 disables periodic checking. |
1991 | 1991 | ||
1992 | memtest= [KNL,X86] Enable memtest | 1992 | memtest= [KNL,X86,ARM] Enable memtest |
1993 | Format: <integer> | 1993 | Format: <integer> |
1994 | default : 0 <disable> | 1994 | default : 0 <disable> |
1995 | Specifies the number of memtest passes to be | 1995 | Specifies the number of memtest passes to be |
@@ -2236,8 +2236,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2236 | 2236 | ||
2237 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels | 2237 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels |
2238 | Format: [panic,][nopanic,][num] | 2238 | Format: [panic,][nopanic,][num] |
2239 | Valid num: 0 | 2239 | Valid num: 0 or 1 |
2240 | 0 - turn nmi_watchdog off | 2240 | 0 - turn nmi_watchdog off |
2241 | 1 - turn nmi_watchdog on | ||
2241 | When panic is specified, panic when an NMI watchdog | 2242 | When panic is specified, panic when an NMI watchdog |
2242 | timeout occurs (or 'nopanic' to override the opposite | 2243 | timeout occurs (or 'nopanic' to override the opposite |
2243 | default). | 2244 | default). |
@@ -2322,6 +2323,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2322 | register save and restore. The kernel will only save | 2323 | register save and restore. The kernel will only save |
2323 | legacy floating-point registers on task switch. | 2324 | legacy floating-point registers on task switch. |
2324 | 2325 | ||
2326 | nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. | ||
2327 | |||
2325 | noxsave [BUGS=X86] Disables x86 extended register state save | 2328 | noxsave [BUGS=X86] Disables x86 extended register state save |
2326 | and restore using xsave. The kernel will fallback to | 2329 | and restore using xsave. The kernel will fallback to |
2327 | enabling legacy floating-point and sse state. | 2330 | enabling legacy floating-point and sse state. |
@@ -2464,7 +2467,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2464 | 2467 | ||
2465 | nousb [USB] Disable the USB subsystem | 2468 | nousb [USB] Disable the USB subsystem |
2466 | 2469 | ||
2467 | nowatchdog [KNL] Disable the lockup detector (NMI watchdog). | 2470 | nowatchdog [KNL] Disable both lockup detectors, i.e. |
2471 | soft-lockup and NMI watchdog (hard-lockup). | ||
2468 | 2472 | ||
2469 | nowb [ARM] | 2473 | nowb [ARM] |
2470 | 2474 | ||
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 83ab25660fc9..99d7eb3a1416 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -77,12 +77,14 @@ show up in /proc/sys/kernel: | |||
77 | - shmmax [ sysv ipc ] | 77 | - shmmax [ sysv ipc ] |
78 | - shmmni | 78 | - shmmni |
79 | - softlockup_all_cpu_backtrace | 79 | - softlockup_all_cpu_backtrace |
80 | - soft_watchdog | ||
80 | - stop-a [ SPARC only ] | 81 | - stop-a [ SPARC only ] |
81 | - sysrq ==> Documentation/sysrq.txt | 82 | - sysrq ==> Documentation/sysrq.txt |
82 | - sysctl_writes_strict | 83 | - sysctl_writes_strict |
83 | - tainted | 84 | - tainted |
84 | - threads-max | 85 | - threads-max |
85 | - unknown_nmi_panic | 86 | - unknown_nmi_panic |
87 | - watchdog | ||
86 | - watchdog_thresh | 88 | - watchdog_thresh |
87 | - version | 89 | - version |
88 | 90 | ||
@@ -417,16 +419,23 @@ successful IPC object allocation. | |||
417 | 419 | ||
418 | nmi_watchdog: | 420 | nmi_watchdog: |
419 | 421 | ||
420 | Enables/Disables the NMI watchdog on x86 systems. When the value is | 422 | This parameter can be used to control the NMI watchdog |
421 | non-zero the NMI watchdog is enabled and will continuously test all | 423 | (i.e. the hard lockup detector) on x86 systems. |
422 | online cpus to determine whether or not they are still functioning | ||
423 | properly. Currently, passing "nmi_watchdog=" parameter at boot time is | ||
424 | required for this function to work. | ||
425 | 424 | ||
426 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel | 425 | 0 - disable the hard lockup detector |
427 | parameter), the NMI watchdog shares registers with oprofile. By | 426 | 1 - enable the hard lockup detector |
428 | disabling the NMI watchdog, oprofile may have more registers to | 427 | |
429 | utilize. | 428 | The hard lockup detector monitors each CPU for its ability to respond to |
429 | timer interrupts. The mechanism utilizes CPU performance counter registers | ||
430 | that are programmed to generate Non-Maskable Interrupts (NMIs) periodically | ||
431 | while a CPU is busy. Hence, the alternative name 'NMI watchdog'. | ||
432 | |||
433 | The NMI watchdog is disabled by default if the kernel is running as a guest | ||
434 | in a KVM virtual machine. This default can be overridden by adding | ||
435 | |||
436 | nmi_watchdog=1 | ||
437 | |||
438 | to the guest kernel command line (see Documentation/kernel-parameters.txt). | ||
430 | 439 | ||
431 | ============================================================== | 440 | ============================================================== |
432 | 441 | ||
@@ -816,6 +825,22 @@ NMI. | |||
816 | 825 | ||
817 | ============================================================== | 826 | ============================================================== |
818 | 827 | ||
828 | soft_watchdog | ||
829 | |||
830 | This parameter can be used to control the soft lockup detector. | ||
831 | |||
832 | 0 - disable the soft lockup detector | ||
833 | 1 - enable the soft lockup detector | ||
834 | |||
835 | The soft lockup detector monitors CPUs for threads that are hogging the CPUs | ||
836 | without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads | ||
837 | from running. The mechanism depends on the CPUs ability to respond to timer | ||
838 | interrupts which are needed for the 'watchdog/N' threads to be woken up by | ||
839 | the watchdog timer function, otherwise the NMI watchdog - if enabled - can | ||
840 | detect a hard lockup condition. | ||
841 | |||
842 | ============================================================== | ||
843 | |||
819 | tainted: | 844 | tainted: |
820 | 845 | ||
821 | Non-zero if the kernel has been tainted. Numeric values, which | 846 | Non-zero if the kernel has been tainted. Numeric values, which |
@@ -858,6 +883,25 @@ example. If a system hangs up, try pressing the NMI switch. | |||
858 | 883 | ||
859 | ============================================================== | 884 | ============================================================== |
860 | 885 | ||
886 | watchdog: | ||
887 | |||
888 | This parameter can be used to disable or enable the soft lockup detector | ||
889 | _and_ the NMI watchdog (i.e. the hard lockup detector) at the same time. | ||
890 | |||
891 | 0 - disable both lockup detectors | ||
892 | 1 - enable both lockup detectors | ||
893 | |||
894 | The soft lockup detector and the NMI watchdog can also be disabled or | ||
895 | enabled individually, using the soft_watchdog and nmi_watchdog parameters. | ||
896 | If the watchdog parameter is read, for example by executing | ||
897 | |||
898 | cat /proc/sys/kernel/watchdog | ||
899 | |||
900 | the output of this command (0 or 1) shows the logical OR of soft_watchdog | ||
901 | and nmi_watchdog. | ||
902 | |||
903 | ============================================================== | ||
904 | |||
861 | watchdog_thresh: | 905 | watchdog_thresh: |
862 | 906 | ||
863 | This value can be used to control the frequency of hrtimer and NMI | 907 | This value can be used to control the frequency of hrtimer and NMI |
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt index 01d76282444e..e4b49df7a048 100644 --- a/Documentation/vm/cleancache.txt +++ b/Documentation/vm/cleancache.txt | |||
@@ -28,9 +28,7 @@ IMPLEMENTATION OVERVIEW | |||
28 | A cleancache "backend" that provides transcendent memory registers itself | 28 | A cleancache "backend" that provides transcendent memory registers itself |
29 | to the kernel's cleancache "frontend" by calling cleancache_register_ops, | 29 | to the kernel's cleancache "frontend" by calling cleancache_register_ops, |
30 | passing a pointer to a cleancache_ops structure with funcs set appropriately. | 30 | passing a pointer to a cleancache_ops structure with funcs set appropriately. |
31 | Note that cleancache_register_ops returns the previous settings so that | 31 | The functions provided must conform to certain semantics as follows: |
32 | chaining can be performed if desired. The functions provided must conform to | ||
33 | certain semantics as follows: | ||
34 | 32 | ||
35 | Most important, cleancache is "ephemeral". Pages which are copied into | 33 | Most important, cleancache is "ephemeral". Pages which are copied into |
36 | cleancache have an indefinite lifetime which is completely unknowable | 34 | cleancache have an indefinite lifetime which is completely unknowable |
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index 744f82f86c58..86cb4624fc5a 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt | |||
@@ -317,7 +317,7 @@ If the VMA passes some filtering as described in "Filtering Special Vmas" | |||
317 | below, mlock_fixup() will attempt to merge the VMA with its neighbors or split | 317 | below, mlock_fixup() will attempt to merge the VMA with its neighbors or split |
318 | off a subset of the VMA if the range does not cover the entire VMA. Once the | 318 | off a subset of the VMA if the range does not cover the entire VMA. Once the |
319 | VMA has been merged or split or neither, mlock_fixup() will call | 319 | VMA has been merged or split or neither, mlock_fixup() will call |
320 | __mlock_vma_pages_range() to fault in the pages via get_user_pages() and to | 320 | populate_vma_page_range() to fault in the pages via get_user_pages() and to |
321 | mark the pages as mlocked via mlock_vma_page(). | 321 | mark the pages as mlocked via mlock_vma_page(). |
322 | 322 | ||
323 | Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, | 323 | Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, |
@@ -327,7 +327,7 @@ fault path or in vmscan. | |||
327 | 327 | ||
328 | Also note that a page returned by get_user_pages() could be truncated or | 328 | Also note that a page returned by get_user_pages() could be truncated or |
329 | migrated out from under us, while we're trying to mlock it. To detect this, | 329 | migrated out from under us, while we're trying to mlock it. To detect this, |
330 | __mlock_vma_pages_range() checks page_mapping() after acquiring the page lock. | 330 | populate_vma_page_range() checks page_mapping() after acquiring the page lock. |
331 | If the page is still associated with its mapping, we'll go ahead and call | 331 | If the page is still associated with its mapping, we'll go ahead and call |
332 | mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. | 332 | mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. |
333 | In the worst case, this will result in a page mapped in a VM_LOCKED VMA | 333 | In the worst case, this will result in a page mapped in a VM_LOCKED VMA |
@@ -392,7 +392,7 @@ ignored for munlock. | |||
392 | 392 | ||
393 | If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the | 393 | If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the |
394 | specified range. The range is then munlocked via the function | 394 | specified range. The range is then munlocked via the function |
395 | __mlock_vma_pages_range() - the same function used to mlock a VMA range - | 395 | populate_vma_page_range() - the same function used to mlock a VMA range - |
396 | passing a flag to indicate that munlock() is being performed. | 396 | passing a flag to indicate that munlock() is being performed. |
397 | 397 | ||
398 | Because the VMA access protections could have been changed to PROT_NONE after | 398 | Because the VMA access protections could have been changed to PROT_NONE after |
@@ -402,7 +402,7 @@ get_user_pages() was enhanced to accept a flag to ignore the permissions when | |||
402 | fetching the pages - all of which should be resident as a result of previous | 402 | fetching the pages - all of which should be resident as a result of previous |
403 | mlocking. | 403 | mlocking. |
404 | 404 | ||
405 | For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling | 405 | For munlock(), populate_vma_page_range() unlocks individual pages by calling |
406 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked | 406 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked |
407 | flag using TestClearPageMlocked(). As with mlock_vma_page(), | 407 | flag using TestClearPageMlocked(). As with mlock_vma_page(), |
408 | munlock_vma_page() use the Test*PageMlocked() function to handle the case where | 408 | munlock_vma_page() use the Test*PageMlocked() function to handle the case where |
@@ -463,21 +463,11 @@ populate the page table. | |||
463 | 463 | ||
464 | To mlock a range of memory under the unevictable/mlock infrastructure, the | 464 | To mlock a range of memory under the unevictable/mlock infrastructure, the |
465 | mmap() handler and task address space expansion functions call | 465 | mmap() handler and task address space expansion functions call |
466 | mlock_vma_pages_range() specifying the vma and the address range to mlock. | 466 | populate_vma_page_range() specifying the vma and the address range to mlock. |
467 | mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in | 467 | |
468 | "Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have | 468 | The callers of populate_vma_page_range() will have already added the memory range |
469 | already been set by the caller, in filtered VMAs. Thus these VMA's need not be | ||
470 | visited for munlock when the region is unmapped. | ||
471 | |||
472 | For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to | ||
473 | fault/allocate the pages and mlock them. Again, like mlock_fixup(), | ||
474 | mlock_vma_pages_range() downgrades the mmap semaphore to read mode before | ||
475 | attempting to fault/allocate and mlock the pages and "upgrades" the semaphore | ||
476 | back to write mode before returning. | ||
477 | |||
478 | The callers of mlock_vma_pages_range() will have already added the memory range | ||
479 | to be mlocked to the task's "locked_vm". To account for filtered VMAs, | 469 | to be mlocked to the task's "locked_vm". To account for filtered VMAs, |
480 | mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the | 470 | populate_vma_page_range() returns the number of pages NOT mlocked. All of the |
481 | callers then subtract a non-negative return value from the task's locked_vm. A | 471 | callers then subtract a non-negative return value from the task's locked_vm. A |
482 | negative return value represent an error - for example, from get_user_pages() | 472 | negative return value represent an error - for example, from get_user_pages() |
483 | attempting to fault in a VMA with PROT_NONE access. In this case, we leave the | 473 | attempting to fault in a VMA with PROT_NONE access. In this case, we leave the |
diff --git a/arch/Kconfig b/arch/Kconfig index 05d7a8a458d5..e1068987bad1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -446,6 +446,9 @@ config HAVE_IRQ_TIME_ACCOUNTING | |||
446 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE | 446 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE |
447 | bool | 447 | bool |
448 | 448 | ||
449 | config HAVE_ARCH_HUGE_VMAP | ||
450 | bool | ||
451 | |||
449 | config HAVE_ARCH_SOFT_DIRTY | 452 | config HAVE_ARCH_SOFT_DIRTY |
450 | bool | 453 | bool |
451 | 454 | ||
@@ -484,6 +487,18 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK | |||
484 | This spares a stack switch and improves cache usage on softirq | 487 | This spares a stack switch and improves cache usage on softirq |
485 | processing. | 488 | processing. |
486 | 489 | ||
490 | config PGTABLE_LEVELS | ||
491 | int | ||
492 | default 2 | ||
493 | |||
494 | config ARCH_HAS_ELF_RANDOMIZE | ||
495 | bool | ||
496 | help | ||
497 | An architecture supports choosing randomized locations for | ||
498 | stack, mmap, brk, and ET_DYN. Defined functions: | ||
499 | - arch_mmap_rnd() | ||
500 | - arch_randomize_brk() | ||
501 | |||
487 | # | 502 | # |
488 | # ABI hall of shame | 503 | # ABI hall of shame |
489 | # | 504 | # |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b7ff9a318c31..bf9e9d3b3792 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
@@ -76,6 +76,10 @@ config GENERIC_ISA_DMA | |||
76 | bool | 76 | bool |
77 | default y | 77 | default y |
78 | 78 | ||
79 | config PGTABLE_LEVELS | ||
80 | int | ||
81 | default 3 | ||
82 | |||
79 | source "init/Kconfig" | 83 | source "init/Kconfig" |
80 | source "kernel/Kconfig.freezer" | 84 | source "kernel/Kconfig.freezer" |
81 | 85 | ||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf4c0c99aa25..4b62f4caf0ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -1,8 +1,8 @@ | |||
1 | config ARM | 1 | config ARM |
2 | bool | 2 | bool |
3 | default y | 3 | default y |
4 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
5 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 4 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
5 | select ARCH_HAS_ELF_RANDOMIZE | ||
6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
7 | select ARCH_HAVE_CUSTOM_GPIO_H | 7 | select ARCH_HAVE_CUSTOM_GPIO_H |
8 | select ARCH_HAS_GCOV_PROFILE_ALL | 8 | select ARCH_HAS_GCOV_PROFILE_ALL |
@@ -286,6 +286,11 @@ config GENERIC_BUG | |||
286 | def_bool y | 286 | def_bool y |
287 | depends on BUG | 287 | depends on BUG |
288 | 288 | ||
289 | config PGTABLE_LEVELS | ||
290 | int | ||
291 | default 3 if ARM_LPAE | ||
292 | default 2 | ||
293 | |||
289 | source "init/Kconfig" | 294 | source "init/Kconfig" |
290 | 295 | ||
291 | source "kernel/Kconfig.freezer" | 296 | source "kernel/Kconfig.freezer" |
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9cafd3786..c1ff8ab12914 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h | |||
@@ -125,10 +125,6 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); | |||
125 | extern void elf_set_personality(const struct elf32_hdr *); | 125 | extern void elf_set_personality(const struct elf32_hdr *); |
126 | #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) | 126 | #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) |
127 | 127 | ||
128 | struct mm_struct; | ||
129 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
130 | #define arch_randomize_brk arch_randomize_brk | ||
131 | |||
132 | #ifdef CONFIG_MMU | 128 | #ifdef CONFIG_MMU |
133 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 129 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
134 | struct linux_binprm; | 130 | struct linux_binprm; |
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 1609b022a72f..3d0e9aed4b40 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
@@ -335,6 +335,9 @@ void __init bootmem_init(void) | |||
335 | 335 | ||
336 | find_limits(&min, &max_low, &max_high); | 336 | find_limits(&min, &max_low, &max_high); |
337 | 337 | ||
338 | early_memtest((phys_addr_t)min << PAGE_SHIFT, | ||
339 | (phys_addr_t)max_low << PAGE_SHIFT); | ||
340 | |||
338 | /* | 341 | /* |
339 | * Sparsemem tries to allocate bootmem in memory_present(), | 342 | * Sparsemem tries to allocate bootmem in memory_present(), |
340 | * so must be done after the fixed reservations | 343 | * so must be done after the fixed reservations |
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..407dc786583a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c | |||
@@ -169,14 +169,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
169 | return addr; | 169 | return addr; |
170 | } | 170 | } |
171 | 171 | ||
172 | unsigned long arch_mmap_rnd(void) | ||
173 | { | ||
174 | unsigned long rnd; | ||
175 | |||
176 | /* 8 bits of randomness in 20 address space bits */ | ||
177 | rnd = (unsigned long)get_random_int() % (1 << 8); | ||
178 | |||
179 | return rnd << PAGE_SHIFT; | ||
180 | } | ||
181 | |||
172 | void arch_pick_mmap_layout(struct mm_struct *mm) | 182 | void arch_pick_mmap_layout(struct mm_struct *mm) |
173 | { | 183 | { |
174 | unsigned long random_factor = 0UL; | 184 | unsigned long random_factor = 0UL; |
175 | 185 | ||
176 | /* 8 bits of randomness in 20 address space bits */ | 186 | if (current->flags & PF_RANDOMIZE) |
177 | if ((current->flags & PF_RANDOMIZE) && | 187 | random_factor = arch_mmap_rnd(); |
178 | !(current->personality & ADDR_NO_RANDOMIZE)) | ||
179 | random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; | ||
180 | 188 | ||
181 | if (mmap_is_legacy()) { | 189 | if (mmap_is_legacy()) { |
182 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; | 190 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b8e97331ffb..34f487d5d84e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | config ARM64 | 1 | config ARM64 |
2 | def_bool y | 2 | def_bool y |
3 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
4 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 3 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
4 | select ARCH_HAS_ELF_RANDOMIZE | ||
5 | select ARCH_HAS_GCOV_PROFILE_ALL | 5 | select ARCH_HAS_GCOV_PROFILE_ALL |
6 | select ARCH_HAS_SG_CHAIN | 6 | select ARCH_HAS_SG_CHAIN |
7 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 7 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
@@ -143,6 +143,13 @@ config KERNEL_MODE_NEON | |||
143 | config FIX_EARLYCON_MEM | 143 | config FIX_EARLYCON_MEM |
144 | def_bool y | 144 | def_bool y |
145 | 145 | ||
146 | config PGTABLE_LEVELS | ||
147 | int | ||
148 | default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 | ||
149 | default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 | ||
150 | default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 | ||
151 | default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 | ||
152 | |||
146 | source "init/Kconfig" | 153 | source "init/Kconfig" |
147 | 154 | ||
148 | source "kernel/Kconfig.freezer" | 155 | source "kernel/Kconfig.freezer" |
@@ -413,13 +420,6 @@ config ARM64_VA_BITS | |||
413 | default 42 if ARM64_VA_BITS_42 | 420 | default 42 if ARM64_VA_BITS_42 |
414 | default 48 if ARM64_VA_BITS_48 | 421 | default 48 if ARM64_VA_BITS_48 |
415 | 422 | ||
416 | config ARM64_PGTABLE_LEVELS | ||
417 | int | ||
418 | default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 | ||
419 | default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 | ||
420 | default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 | ||
421 | default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 | ||
422 | |||
423 | config CPU_BIG_ENDIAN | 423 | config CPU_BIG_ENDIAN |
424 | bool "Build big-endian kernel" | 424 | bool "Build big-endian kernel" |
425 | help | 425 | help |
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 1f65be393139..faad6df49e5b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h | |||
@@ -125,7 +125,6 @@ typedef struct user_fpsimd_state elf_fpregset_t; | |||
125 | * the loader. We need to make sure that it is out of the way of the program | 125 | * the loader. We need to make sure that it is out of the way of the program |
126 | * that it will "exec", and that there is sufficient room for the brk. | 126 | * that it will "exec", and that there is sufficient room for the brk. |
127 | */ | 127 | */ |
128 | extern unsigned long randomize_et_dyn(unsigned long base); | ||
129 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) | 128 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) |
130 | 129 | ||
131 | /* | 130 | /* |
@@ -157,10 +156,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, | |||
157 | #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) | 156 | #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) |
158 | #endif | 157 | #endif |
159 | 158 | ||
160 | struct mm_struct; | ||
161 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
162 | #define arch_randomize_brk arch_randomize_brk | ||
163 | |||
164 | #ifdef CONFIG_COMPAT | 159 | #ifdef CONFIG_COMPAT |
165 | 160 | ||
166 | #ifdef __AARCH64EB__ | 161 | #ifdef __AARCH64EB__ |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index bbfb600fa822..36250705dc4c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -163,12 +163,12 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) | |||
163 | /* | 163 | /* |
164 | * If we are concatenating first level stage-2 page tables, we would have less | 164 | * If we are concatenating first level stage-2 page tables, we would have less |
165 | * than or equal to 16 pointers in the fake PGD, because that's what the | 165 | * than or equal to 16 pointers in the fake PGD, because that's what the |
166 | * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) | 166 | * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) |
167 | * represents the first level for the host, and we add 1 to go to the next | 167 | * represents the first level for the host, and we add 1 to go to the next |
168 | * level (which uses contatenation) for the stage-2 tables. | 168 | * level (which uses contatenation) for the stage-2 tables. |
169 | */ | 169 | */ |
170 | #if PTRS_PER_S2_PGD <= 16 | 170 | #if PTRS_PER_S2_PGD <= 16 |
171 | #define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) | 171 | #define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) |
172 | #else | 172 | #else |
173 | #define KVM_PREALLOC_LEVEL (0) | 173 | #define KVM_PREALLOC_LEVEL (0) |
174 | #endif | 174 | #endif |
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 22b16232bd60..8fc8fa280e92 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h | |||
@@ -36,9 +36,9 @@ | |||
36 | * for more information). | 36 | * for more information). |
37 | */ | 37 | */ |
38 | #ifdef CONFIG_ARM64_64K_PAGES | 38 | #ifdef CONFIG_ARM64_64K_PAGES |
39 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) | 39 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) |
40 | #else | 40 | #else |
41 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) | 41 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) | 44 | #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) |
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e20df38a8ff3..76420568d66a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) | 29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) |
30 | 30 | ||
31 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 31 | #if CONFIG_PGTABLE_LEVELS > 2 |
32 | 32 | ||
33 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 33 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
34 | { | 34 | { |
@@ -46,9 +46,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
46 | set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); | 46 | set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); |
47 | } | 47 | } |
48 | 48 | ||
49 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | 49 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
50 | 50 | ||
51 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 51 | #if CONFIG_PGTABLE_LEVELS > 3 |
52 | 52 | ||
53 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | 53 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) |
54 | { | 54 | { |
@@ -66,7 +66,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | |||
66 | set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); | 66 | set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); |
67 | } | 67 | } |
68 | 68 | ||
69 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ | 69 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
70 | 70 | ||
71 | extern pgd_t *pgd_alloc(struct mm_struct *mm); | 71 | extern pgd_t *pgd_alloc(struct mm_struct *mm); |
72 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | 72 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); |
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5f930cc9ea83..80f3d241cff8 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h | |||
@@ -21,7 +21,7 @@ | |||
21 | /* | 21 | /* |
22 | * PMD_SHIFT determines the size a level 2 page table entry can map. | 22 | * PMD_SHIFT determines the size a level 2 page table entry can map. |
23 | */ | 23 | */ |
24 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 24 | #if CONFIG_PGTABLE_LEVELS > 2 |
25 | #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) | 25 | #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) |
26 | #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) | 26 | #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) |
27 | #define PMD_MASK (~(PMD_SIZE-1)) | 27 | #define PMD_MASK (~(PMD_SIZE-1)) |
@@ -31,7 +31,7 @@ | |||
31 | /* | 31 | /* |
32 | * PUD_SHIFT determines the size a level 1 page table entry can map. | 32 | * PUD_SHIFT determines the size a level 1 page table entry can map. |
33 | */ | 33 | */ |
34 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 34 | #if CONFIG_PGTABLE_LEVELS > 3 |
35 | #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) | 35 | #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) |
36 | #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) | 36 | #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) |
37 | #define PUD_MASK (~(PUD_SIZE-1)) | 37 | #define PUD_MASK (~(PUD_SIZE-1)) |
@@ -42,7 +42,7 @@ | |||
42 | * PGDIR_SHIFT determines the size a top-level page table entry can map | 42 | * PGDIR_SHIFT determines the size a top-level page table entry can map |
43 | * (depending on the configuration, this level can be 0, 1 or 2). | 43 | * (depending on the configuration, this level can be 0, 1 or 2). |
44 | */ | 44 | */ |
45 | #define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) | 45 | #define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) |
46 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | 46 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) |
47 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 47 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
48 | #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) | 48 | #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) |
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index ca9df80af896..2b1bd7e52c3b 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h | |||
@@ -38,13 +38,13 @@ typedef struct { pteval_t pte; } pte_t; | |||
38 | #define pte_val(x) ((x).pte) | 38 | #define pte_val(x) ((x).pte) |
39 | #define __pte(x) ((pte_t) { (x) } ) | 39 | #define __pte(x) ((pte_t) { (x) } ) |
40 | 40 | ||
41 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 41 | #if CONFIG_PGTABLE_LEVELS > 2 |
42 | typedef struct { pmdval_t pmd; } pmd_t; | 42 | typedef struct { pmdval_t pmd; } pmd_t; |
43 | #define pmd_val(x) ((x).pmd) | 43 | #define pmd_val(x) ((x).pmd) |
44 | #define __pmd(x) ((pmd_t) { (x) } ) | 44 | #define __pmd(x) ((pmd_t) { (x) } ) |
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 47 | #if CONFIG_PGTABLE_LEVELS > 3 |
48 | typedef struct { pudval_t pud; } pud_t; | 48 | typedef struct { pudval_t pud; } pud_t; |
49 | #define pud_val(x) ((x).pud) | 49 | #define pud_val(x) ((x).pud) |
50 | #define __pud(x) ((pud_t) { (x) } ) | 50 | #define __pud(x) ((pud_t) { (x) } ) |
@@ -64,13 +64,13 @@ typedef pteval_t pte_t; | |||
64 | #define pte_val(x) (x) | 64 | #define pte_val(x) (x) |
65 | #define __pte(x) (x) | 65 | #define __pte(x) (x) |
66 | 66 | ||
67 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 67 | #if CONFIG_PGTABLE_LEVELS > 2 |
68 | typedef pmdval_t pmd_t; | 68 | typedef pmdval_t pmd_t; |
69 | #define pmd_val(x) (x) | 69 | #define pmd_val(x) (x) |
70 | #define __pmd(x) (x) | 70 | #define __pmd(x) (x) |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 73 | #if CONFIG_PGTABLE_LEVELS > 3 |
74 | typedef pudval_t pud_t; | 74 | typedef pudval_t pud_t; |
75 | #define pud_val(x) (x) | 75 | #define pud_val(x) (x) |
76 | #define __pud(x) (x) | 76 | #define __pud(x) (x) |
@@ -86,9 +86,9 @@ typedef pteval_t pgprot_t; | |||
86 | 86 | ||
87 | #endif /* STRICT_MM_TYPECHECKS */ | 87 | #endif /* STRICT_MM_TYPECHECKS */ |
88 | 88 | ||
89 | #if CONFIG_ARM64_PGTABLE_LEVELS == 2 | 89 | #if CONFIG_PGTABLE_LEVELS == 2 |
90 | #include <asm-generic/pgtable-nopmd.h> | 90 | #include <asm-generic/pgtable-nopmd.h> |
91 | #elif CONFIG_ARM64_PGTABLE_LEVELS == 3 | 91 | #elif CONFIG_PGTABLE_LEVELS == 3 |
92 | #include <asm-generic/pgtable-nopud.h> | 92 | #include <asm-generic/pgtable-nopud.h> |
93 | #endif | 93 | #endif |
94 | 94 | ||
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 800ec0e87ed9..56283f8a675c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h | |||
@@ -374,7 +374,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) | |||
374 | */ | 374 | */ |
375 | #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) | 375 | #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) |
376 | 376 | ||
377 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 377 | #if CONFIG_PGTABLE_LEVELS > 2 |
378 | 378 | ||
379 | #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) | 379 | #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) |
380 | 380 | ||
@@ -409,9 +409,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | |||
409 | 409 | ||
410 | #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) | 410 | #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) |
411 | 411 | ||
412 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | 412 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
413 | 413 | ||
414 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 414 | #if CONFIG_PGTABLE_LEVELS > 3 |
415 | 415 | ||
416 | #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) | 416 | #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) |
417 | 417 | ||
@@ -445,7 +445,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) | |||
445 | 445 | ||
446 | #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) | 446 | #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) |
447 | 447 | ||
448 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ | 448 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
449 | 449 | ||
450 | #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) | 450 | #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) |
451 | 451 | ||
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 53d9c354219f..3a0242c7eb8d 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h | |||
@@ -53,7 +53,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | |||
53 | tlb_remove_entry(tlb, pte); | 53 | tlb_remove_entry(tlb, pte); |
54 | } | 54 | } |
55 | 55 | ||
56 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 56 | #if CONFIG_PGTABLE_LEVELS > 2 |
57 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | 57 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, |
58 | unsigned long addr) | 58 | unsigned long addr) |
59 | { | 59 | { |
@@ -62,7 +62,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | |||
62 | } | 62 | } |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 65 | #if CONFIG_PGTABLE_LEVELS > 3 |
66 | static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, | 66 | static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, |
67 | unsigned long addr) | 67 | unsigned long addr) |
68 | { | 68 | { |
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ae85da6307bb..597831bdddf3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c | |||
@@ -190,6 +190,8 @@ void __init bootmem_init(void) | |||
190 | min = PFN_UP(memblock_start_of_DRAM()); | 190 | min = PFN_UP(memblock_start_of_DRAM()); |
191 | max = PFN_DOWN(memblock_end_of_DRAM()); | 191 | max = PFN_DOWN(memblock_end_of_DRAM()); |
192 | 192 | ||
193 | early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); | ||
194 | |||
193 | /* | 195 | /* |
194 | * Sparsemem tries to allocate bootmem in memory_present(), so must be | 196 | * Sparsemem tries to allocate bootmem in memory_present(), so must be |
195 | * done after the fixed reservations. | 197 | * done after the fixed reservations. |
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 54922d1275b8..ed177475dd8c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c | |||
@@ -47,17 +47,16 @@ static int mmap_is_legacy(void) | |||
47 | return sysctl_legacy_va_layout; | 47 | return sysctl_legacy_va_layout; |
48 | } | 48 | } |
49 | 49 | ||
50 | static unsigned long mmap_rnd(void) | 50 | unsigned long arch_mmap_rnd(void) |
51 | { | 51 | { |
52 | unsigned long rnd = 0; | 52 | unsigned long rnd; |
53 | 53 | ||
54 | if (current->flags & PF_RANDOMIZE) | 54 | rnd = (unsigned long)get_random_int() & STACK_RND_MASK; |
55 | rnd = (long)get_random_int() & STACK_RND_MASK; | ||
56 | 55 | ||
57 | return rnd << PAGE_SHIFT; | 56 | return rnd << PAGE_SHIFT; |
58 | } | 57 | } |
59 | 58 | ||
60 | static unsigned long mmap_base(void) | 59 | static unsigned long mmap_base(unsigned long rnd) |
61 | { | 60 | { |
62 | unsigned long gap = rlimit(RLIMIT_STACK); | 61 | unsigned long gap = rlimit(RLIMIT_STACK); |
63 | 62 | ||
@@ -66,7 +65,7 @@ static unsigned long mmap_base(void) | |||
66 | else if (gap > MAX_GAP) | 65 | else if (gap > MAX_GAP) |
67 | gap = MAX_GAP; | 66 | gap = MAX_GAP; |
68 | 67 | ||
69 | return PAGE_ALIGN(STACK_TOP - gap - mmap_rnd()); | 68 | return PAGE_ALIGN(STACK_TOP - gap - rnd); |
70 | } | 69 | } |
71 | 70 | ||
72 | /* | 71 | /* |
@@ -75,15 +74,20 @@ static unsigned long mmap_base(void) | |||
75 | */ | 74 | */ |
76 | void arch_pick_mmap_layout(struct mm_struct *mm) | 75 | void arch_pick_mmap_layout(struct mm_struct *mm) |
77 | { | 76 | { |
77 | unsigned long random_factor = 0UL; | ||
78 | |||
79 | if (current->flags & PF_RANDOMIZE) | ||
80 | random_factor = arch_mmap_rnd(); | ||
81 | |||
78 | /* | 82 | /* |
79 | * Fall back to the standard layout if the personality bit is set, or | 83 | * Fall back to the standard layout if the personality bit is set, or |
80 | * if the expected stack growth is unlimited: | 84 | * if the expected stack growth is unlimited: |
81 | */ | 85 | */ |
82 | if (mmap_is_legacy()) { | 86 | if (mmap_is_legacy()) { |
83 | mm->mmap_base = TASK_UNMAPPED_BASE; | 87 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
84 | mm->get_unmapped_area = arch_get_unmapped_area; | 88 | mm->get_unmapped_area = arch_get_unmapped_area; |
85 | } else { | 89 | } else { |
86 | mm->mmap_base = mmap_base(); | 90 | mm->mmap_base = mmap_base(random_factor); |
87 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 91 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
88 | } | 92 | } |
89 | } | 93 | } |
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c6daaf6c6f97..79e01163a981 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c | |||
@@ -550,10 +550,10 @@ void vmemmap_free(unsigned long start, unsigned long end) | |||
550 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 550 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
551 | 551 | ||
552 | static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; | 552 | static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; |
553 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 553 | #if CONFIG_PGTABLE_LEVELS > 2 |
554 | static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; | 554 | static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; |
555 | #endif | 555 | #endif |
556 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 556 | #if CONFIG_PGTABLE_LEVELS > 3 |
557 | static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; | 557 | static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; |
558 | #endif | 558 | #endif |
559 | 559 | ||
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 074e52bf815c..4f9a6661491b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -1,3 +1,8 @@ | |||
1 | config PGTABLE_LEVELS | ||
2 | int "Page Table Levels" if !IA64_PAGE_SIZE_64KB | ||
3 | range 3 4 if !IA64_PAGE_SIZE_64KB | ||
4 | default 3 | ||
5 | |||
1 | source "init/Kconfig" | 6 | source "init/Kconfig" |
2 | 7 | ||
3 | source "kernel/Kconfig.freezer" | 8 | source "kernel/Kconfig.freezer" |
@@ -286,19 +291,6 @@ config IA64_PAGE_SIZE_64KB | |||
286 | 291 | ||
287 | endchoice | 292 | endchoice |
288 | 293 | ||
289 | choice | ||
290 | prompt "Page Table Levels" | ||
291 | default PGTABLE_3 | ||
292 | |||
293 | config PGTABLE_3 | ||
294 | bool "3 Levels" | ||
295 | |||
296 | config PGTABLE_4 | ||
297 | depends on !IA64_PAGE_SIZE_64KB | ||
298 | bool "4 Levels" | ||
299 | |||
300 | endchoice | ||
301 | |||
302 | if IA64_HP_SIM | 294 | if IA64_HP_SIM |
303 | config HZ | 295 | config HZ |
304 | default 32 | 296 | default 32 |
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 1f1bf144fe62..ec48bb9f95e1 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h | |||
@@ -173,7 +173,7 @@ get_order (unsigned long size) | |||
173 | */ | 173 | */ |
174 | typedef struct { unsigned long pte; } pte_t; | 174 | typedef struct { unsigned long pte; } pte_t; |
175 | typedef struct { unsigned long pmd; } pmd_t; | 175 | typedef struct { unsigned long pmd; } pmd_t; |
176 | #ifdef CONFIG_PGTABLE_4 | 176 | #if CONFIG_PGTABLE_LEVELS == 4 |
177 | typedef struct { unsigned long pud; } pud_t; | 177 | typedef struct { unsigned long pud; } pud_t; |
178 | #endif | 178 | #endif |
179 | typedef struct { unsigned long pgd; } pgd_t; | 179 | typedef struct { unsigned long pgd; } pgd_t; |
@@ -182,7 +182,7 @@ get_order (unsigned long size) | |||
182 | 182 | ||
183 | # define pte_val(x) ((x).pte) | 183 | # define pte_val(x) ((x).pte) |
184 | # define pmd_val(x) ((x).pmd) | 184 | # define pmd_val(x) ((x).pmd) |
185 | #ifdef CONFIG_PGTABLE_4 | 185 | #if CONFIG_PGTABLE_LEVELS == 4 |
186 | # define pud_val(x) ((x).pud) | 186 | # define pud_val(x) ((x).pud) |
187 | #endif | 187 | #endif |
188 | # define pgd_val(x) ((x).pgd) | 188 | # define pgd_val(x) ((x).pgd) |
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 5767cdfc08db..f5e70e961948 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h | |||
@@ -32,7 +32,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
32 | quicklist_free(0, NULL, pgd); | 32 | quicklist_free(0, NULL, pgd); |
33 | } | 33 | } |
34 | 34 | ||
35 | #ifdef CONFIG_PGTABLE_4 | 35 | #if CONFIG_PGTABLE_LEVELS == 4 |
36 | static inline void | 36 | static inline void |
37 | pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) | 37 | pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) |
38 | { | 38 | { |
@@ -49,7 +49,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) | |||
49 | quicklist_free(0, NULL, pud); | 49 | quicklist_free(0, NULL, pud); |
50 | } | 50 | } |
51 | #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) | 51 | #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) |
52 | #endif /* CONFIG_PGTABLE_4 */ | 52 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
53 | 53 | ||
54 | static inline void | 54 | static inline void |
55 | pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) | 55 | pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) |
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7b6f8801df57..9f3ed9ee8f13 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h | |||
@@ -99,7 +99,7 @@ | |||
99 | #define PMD_MASK (~(PMD_SIZE-1)) | 99 | #define PMD_MASK (~(PMD_SIZE-1)) |
100 | #define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) | 100 | #define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) |
101 | 101 | ||
102 | #ifdef CONFIG_PGTABLE_4 | 102 | #if CONFIG_PGTABLE_LEVELS == 4 |
103 | /* | 103 | /* |
104 | * Definitions for second level: | 104 | * Definitions for second level: |
105 | * | 105 | * |
@@ -117,7 +117,7 @@ | |||
117 | * | 117 | * |
118 | * PGDIR_SHIFT determines what a first-level page table entry can map. | 118 | * PGDIR_SHIFT determines what a first-level page table entry can map. |
119 | */ | 119 | */ |
120 | #ifdef CONFIG_PGTABLE_4 | 120 | #if CONFIG_PGTABLE_LEVELS == 4 |
121 | #define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) | 121 | #define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) |
122 | #else | 122 | #else |
123 | #define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) | 123 | #define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) |
@@ -180,7 +180,7 @@ | |||
180 | #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) | 180 | #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) |
181 | 181 | ||
182 | #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) | 182 | #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) |
183 | #ifdef CONFIG_PGTABLE_4 | 183 | #if CONFIG_PGTABLE_LEVELS == 4 |
184 | #define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) | 184 | #define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) |
185 | #endif | 185 | #endif |
186 | #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) | 186 | #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) |
@@ -281,7 +281,7 @@ extern unsigned long VMALLOC_END; | |||
281 | #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) | 281 | #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) |
282 | #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) | 282 | #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) |
283 | 283 | ||
284 | #ifdef CONFIG_PGTABLE_4 | 284 | #if CONFIG_PGTABLE_LEVELS == 4 |
285 | #define pgd_none(pgd) (!pgd_val(pgd)) | 285 | #define pgd_none(pgd) (!pgd_val(pgd)) |
286 | #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) | 286 | #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) |
287 | #define pgd_present(pgd) (pgd_val(pgd) != 0UL) | 287 | #define pgd_present(pgd) (pgd_val(pgd) != 0UL) |
@@ -384,7 +384,7 @@ pgd_offset (const struct mm_struct *mm, unsigned long address) | |||
384 | here. */ | 384 | here. */ |
385 | #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) | 385 | #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) |
386 | 386 | ||
387 | #ifdef CONFIG_PGTABLE_4 | 387 | #if CONFIG_PGTABLE_LEVELS == 4 |
388 | /* Find an entry in the second-level page table.. */ | 388 | /* Find an entry in the second-level page table.. */ |
389 | #define pud_offset(dir,addr) \ | 389 | #define pud_offset(dir,addr) \ |
390 | ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) | 390 | ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) |
@@ -586,7 +586,7 @@ extern struct page *zero_page_memmap_ptr; | |||
586 | #define __HAVE_ARCH_PGD_OFFSET_GATE | 586 | #define __HAVE_ARCH_PGD_OFFSET_GATE |
587 | 587 | ||
588 | 588 | ||
589 | #ifndef CONFIG_PGTABLE_4 | 589 | #if CONFIG_PGTABLE_LEVELS == 3 |
590 | #include <asm-generic/pgtable-nopud.h> | 590 | #include <asm-generic/pgtable-nopud.h> |
591 | #endif | 591 | #endif |
592 | #include <asm-generic/pgtable.h> | 592 | #include <asm-generic/pgtable.h> |
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 18e794a57248..e42bf7a913f3 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S | |||
@@ -146,7 +146,7 @@ ENTRY(vhpt_miss) | |||
146 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 | 146 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 |
147 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] | 147 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] |
148 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | 148 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? |
149 | #ifdef CONFIG_PGTABLE_4 | 149 | #if CONFIG_PGTABLE_LEVELS == 4 |
150 | shr.u r28=r22,PUD_SHIFT // shift pud index into position | 150 | shr.u r28=r22,PUD_SHIFT // shift pud index into position |
151 | #else | 151 | #else |
152 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 152 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
@@ -155,7 +155,7 @@ ENTRY(vhpt_miss) | |||
155 | ld8 r17=[r17] // get *pgd (may be 0) | 155 | ld8 r17=[r17] // get *pgd (may be 0) |
156 | ;; | 156 | ;; |
157 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? | 157 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? |
158 | #ifdef CONFIG_PGTABLE_4 | 158 | #if CONFIG_PGTABLE_LEVELS == 4 |
159 | dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) | 159 | dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) |
160 | ;; | 160 | ;; |
161 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 161 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
@@ -222,13 +222,13 @@ ENTRY(vhpt_miss) | |||
222 | */ | 222 | */ |
223 | ld8 r25=[r21] // read *pte again | 223 | ld8 r25=[r21] // read *pte again |
224 | ld8 r26=[r17] // read *pmd again | 224 | ld8 r26=[r17] // read *pmd again |
225 | #ifdef CONFIG_PGTABLE_4 | 225 | #if CONFIG_PGTABLE_LEVELS == 4 |
226 | ld8 r19=[r28] // read *pud again | 226 | ld8 r19=[r28] // read *pud again |
227 | #endif | 227 | #endif |
228 | cmp.ne p6,p7=r0,r0 | 228 | cmp.ne p6,p7=r0,r0 |
229 | ;; | 229 | ;; |
230 | cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change | 230 | cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change |
231 | #ifdef CONFIG_PGTABLE_4 | 231 | #if CONFIG_PGTABLE_LEVELS == 4 |
232 | cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change | 232 | cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change |
233 | #endif | 233 | #endif |
234 | mov r27=PAGE_SHIFT<<2 | 234 | mov r27=PAGE_SHIFT<<2 |
@@ -476,7 +476,7 @@ ENTRY(nested_dtlb_miss) | |||
476 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 | 476 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 |
477 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] | 477 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] |
478 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | 478 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? |
479 | #ifdef CONFIG_PGTABLE_4 | 479 | #if CONFIG_PGTABLE_LEVELS == 4 |
480 | shr.u r18=r22,PUD_SHIFT // shift pud index into position | 480 | shr.u r18=r22,PUD_SHIFT // shift pud index into position |
481 | #else | 481 | #else |
482 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 482 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
@@ -487,7 +487,7 @@ ENTRY(nested_dtlb_miss) | |||
487 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? | 487 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? |
488 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) | 488 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) |
489 | ;; | 489 | ;; |
490 | #ifdef CONFIG_PGTABLE_4 | 490 | #if CONFIG_PGTABLE_LEVELS == 4 |
491 | (p7) ld8 r17=[r17] // get *pud (may be 0) | 491 | (p7) ld8 r17=[r17] // get *pud (may be 0) |
492 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 492 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
493 | ;; | 493 | ;; |
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 5151a649c96b..b72cd7a07222 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c | |||
@@ -156,9 +156,9 @@ void arch_crash_save_vmcoreinfo(void) | |||
156 | VMCOREINFO_OFFSET(node_memblk_s, start_paddr); | 156 | VMCOREINFO_OFFSET(node_memblk_s, start_paddr); |
157 | VMCOREINFO_OFFSET(node_memblk_s, size); | 157 | VMCOREINFO_OFFSET(node_memblk_s, size); |
158 | #endif | 158 | #endif |
159 | #ifdef CONFIG_PGTABLE_3 | 159 | #if CONFIG_PGTABLE_LEVELS == 3 |
160 | VMCOREINFO_CONFIG(PGTABLE_3); | 160 | VMCOREINFO_CONFIG(PGTABLE_3); |
161 | #elif defined(CONFIG_PGTABLE_4) | 161 | #elif CONFIG_PGTABLE_LEVELS == 4 |
162 | VMCOREINFO_CONFIG(PGTABLE_4); | 162 | VMCOREINFO_CONFIG(PGTABLE_4); |
163 | #endif | 163 | #endif |
164 | } | 164 | } |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 87b7c7581b1d..2dd8f63bfbbb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
@@ -67,6 +67,10 @@ config HZ | |||
67 | default 1000 if CLEOPATRA | 67 | default 1000 if CLEOPATRA |
68 | default 100 | 68 | default 100 |
69 | 69 | ||
70 | config PGTABLE_LEVELS | ||
71 | default 2 if SUN3 || COLDFIRE | ||
72 | default 3 | ||
73 | |||
70 | source "init/Kconfig" | 74 | source "init/Kconfig" |
71 | 75 | ||
72 | source "kernel/Kconfig.freezer" | 76 | source "kernel/Kconfig.freezer" |
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c7a16904cd03..a326c4cb8cf0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -23,7 +23,7 @@ config MIPS | |||
23 | select HAVE_KRETPROBES | 23 | select HAVE_KRETPROBES |
24 | select HAVE_DEBUG_KMEMLEAK | 24 | select HAVE_DEBUG_KMEMLEAK |
25 | select HAVE_SYSCALL_TRACEPOINTS | 25 | select HAVE_SYSCALL_TRACEPOINTS |
26 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 26 | select ARCH_HAS_ELF_RANDOMIZE |
27 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT | 27 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT |
28 | select RTC_LIB if !MACH_LOONGSON | 28 | select RTC_LIB if !MACH_LOONGSON |
29 | select GENERIC_ATOMIC64 if !64BIT | 29 | select GENERIC_ATOMIC64 if !64BIT |
@@ -2600,6 +2600,11 @@ config STACKTRACE_SUPPORT | |||
2600 | bool | 2600 | bool |
2601 | default y | 2601 | default y |
2602 | 2602 | ||
2603 | config PGTABLE_LEVELS | ||
2604 | int | ||
2605 | default 3 if 64BIT && !PAGE_SIZE_64KB | ||
2606 | default 2 | ||
2607 | |||
2603 | source "init/Kconfig" | 2608 | source "init/Kconfig" |
2604 | 2609 | ||
2605 | source "kernel/Kconfig.freezer" | 2610 | source "kernel/Kconfig.freezer" |
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 535f196ffe02..31d747d46a23 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h | |||
@@ -410,10 +410,6 @@ struct linux_binprm; | |||
410 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | 410 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, |
411 | int uses_interp); | 411 | int uses_interp); |
412 | 412 | ||
413 | struct mm_struct; | ||
414 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
415 | #define arch_randomize_brk arch_randomize_brk | ||
416 | |||
417 | struct arch_elf_state { | 413 | struct arch_elf_state { |
418 | int fp_abi; | 414 | int fp_abi; |
419 | int interp_fp_abi; | 415 | int interp_fp_abi; |
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index f1baadd56e82..5c81fdd032c3 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c | |||
@@ -142,18 +142,26 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, | |||
142 | addr0, len, pgoff, flags, DOWN); | 142 | addr0, len, pgoff, flags, DOWN); |
143 | } | 143 | } |
144 | 144 | ||
145 | unsigned long arch_mmap_rnd(void) | ||
146 | { | ||
147 | unsigned long rnd; | ||
148 | |||
149 | rnd = (unsigned long)get_random_int(); | ||
150 | rnd <<= PAGE_SHIFT; | ||
151 | if (TASK_IS_32BIT_ADDR) | ||
152 | rnd &= 0xfffffful; | ||
153 | else | ||
154 | rnd &= 0xffffffful; | ||
155 | |||
156 | return rnd; | ||
157 | } | ||
158 | |||
145 | void arch_pick_mmap_layout(struct mm_struct *mm) | 159 | void arch_pick_mmap_layout(struct mm_struct *mm) |
146 | { | 160 | { |
147 | unsigned long random_factor = 0UL; | 161 | unsigned long random_factor = 0UL; |
148 | 162 | ||
149 | if (current->flags & PF_RANDOMIZE) { | 163 | if (current->flags & PF_RANDOMIZE) |
150 | random_factor = get_random_int(); | 164 | random_factor = arch_mmap_rnd(); |
151 | random_factor = random_factor << PAGE_SHIFT; | ||
152 | if (TASK_IS_32BIT_ADDR) | ||
153 | random_factor &= 0xfffffful; | ||
154 | else | ||
155 | random_factor &= 0xffffffful; | ||
156 | } | ||
157 | 165 | ||
158 | if (mmap_is_legacy()) { | 166 | if (mmap_is_legacy()) { |
159 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; | 167 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8014727a2743..c36546959e86 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
@@ -103,6 +103,11 @@ config ARCH_MAY_HAVE_PC_FDC | |||
103 | depends on BROKEN | 103 | depends on BROKEN |
104 | default y | 104 | default y |
105 | 105 | ||
106 | config PGTABLE_LEVELS | ||
107 | int | ||
108 | default 3 if 64BIT && PARISC_PAGE_SIZE_4KB | ||
109 | default 2 | ||
110 | |||
106 | source "init/Kconfig" | 111 | source "init/Kconfig" |
107 | 112 | ||
108 | source "kernel/Kconfig.freezer" | 113 | source "kernel/Kconfig.freezer" |
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index d17437238a2c..1ba29369257c 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h | |||
@@ -51,7 +51,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
51 | free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); | 51 | free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); |
52 | } | 52 | } |
53 | 53 | ||
54 | #if PT_NLEVELS == 3 | 54 | #if CONFIG_PGTABLE_LEVELS == 3 |
55 | 55 | ||
56 | /* Three Level Page Table Support for pmd's */ | 56 | /* Three Level Page Table Support for pmd's */ |
57 | 57 | ||
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 15207b9362bf..0a183756d6ec 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h | |||
@@ -68,13 +68,11 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); | |||
68 | #define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ | 68 | #define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ |
69 | #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) | 69 | #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) |
70 | 70 | ||
71 | #if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) | 71 | #if CONFIG_PGTABLE_LEVELS == 3 |
72 | #define PT_NLEVELS 3 | ||
73 | #define PGD_ORDER 1 /* Number of pages per pgd */ | 72 | #define PGD_ORDER 1 /* Number of pages per pgd */ |
74 | #define PMD_ORDER 1 /* Number of pages per pmd */ | 73 | #define PMD_ORDER 1 /* Number of pages per pmd */ |
75 | #define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ | 74 | #define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ |
76 | #else | 75 | #else |
77 | #define PT_NLEVELS 2 | ||
78 | #define PGD_ORDER 1 /* Number of pages per pgd */ | 76 | #define PGD_ORDER 1 /* Number of pages per pgd */ |
79 | #define PGD_ALLOC_ORDER PGD_ORDER | 77 | #define PGD_ALLOC_ORDER PGD_ORDER |
80 | #endif | 78 | #endif |
@@ -93,7 +91,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); | |||
93 | #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) | 91 | #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) |
94 | #define PMD_SIZE (1UL << PMD_SHIFT) | 92 | #define PMD_SIZE (1UL << PMD_SHIFT) |
95 | #define PMD_MASK (~(PMD_SIZE-1)) | 93 | #define PMD_MASK (~(PMD_SIZE-1)) |
96 | #if PT_NLEVELS == 3 | 94 | #if CONFIG_PGTABLE_LEVELS == 3 |
97 | #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) | 95 | #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) |
98 | #else | 96 | #else |
99 | #define __PAGETABLE_PMD_FOLDED | 97 | #define __PAGETABLE_PMD_FOLDED |
@@ -277,7 +275,7 @@ extern unsigned long *empty_zero_page; | |||
277 | #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) | 275 | #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) |
278 | #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) | 276 | #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) |
279 | 277 | ||
280 | #if PT_NLEVELS == 3 | 278 | #if CONFIG_PGTABLE_LEVELS == 3 |
281 | /* The first entry of the permanent pmd is not there if it contains | 279 | /* The first entry of the permanent pmd is not there if it contains |
282 | * the gateway marker */ | 280 | * the gateway marker */ |
283 | #define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) | 281 | #define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) |
@@ -287,7 +285,7 @@ extern unsigned long *empty_zero_page; | |||
287 | #define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) | 285 | #define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) |
288 | #define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) | 286 | #define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) |
289 | static inline void pmd_clear(pmd_t *pmd) { | 287 | static inline void pmd_clear(pmd_t *pmd) { |
290 | #if PT_NLEVELS == 3 | 288 | #if CONFIG_PGTABLE_LEVELS == 3 |
291 | if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) | 289 | if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) |
292 | /* This is the entry pointing to the permanent pmd | 290 | /* This is the entry pointing to the permanent pmd |
293 | * attached to the pgd; cannot clear it */ | 291 | * attached to the pgd; cannot clear it */ |
@@ -299,7 +297,7 @@ static inline void pmd_clear(pmd_t *pmd) { | |||
299 | 297 | ||
300 | 298 | ||
301 | 299 | ||
302 | #if PT_NLEVELS == 3 | 300 | #if CONFIG_PGTABLE_LEVELS == 3 |
303 | #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) | 301 | #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) |
304 | #define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) | 302 | #define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) |
305 | 303 | ||
@@ -309,7 +307,7 @@ static inline void pmd_clear(pmd_t *pmd) { | |||
309 | #define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) | 307 | #define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) |
310 | #define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) | 308 | #define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) |
311 | static inline void pgd_clear(pgd_t *pgd) { | 309 | static inline void pgd_clear(pgd_t *pgd) { |
312 | #if PT_NLEVELS == 3 | 310 | #if CONFIG_PGTABLE_LEVELS == 3 |
313 | if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) | 311 | if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) |
314 | /* This is the permanent pmd attached to the pgd; cannot | 312 | /* This is the permanent pmd attached to the pgd; cannot |
315 | * free it */ | 313 | * free it */ |
@@ -393,7 +391,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
393 | 391 | ||
394 | /* Find an entry in the second-level page table.. */ | 392 | /* Find an entry in the second-level page table.. */ |
395 | 393 | ||
396 | #if PT_NLEVELS == 3 | 394 | #if CONFIG_PGTABLE_LEVELS == 3 |
397 | #define pmd_offset(dir,address) \ | 395 | #define pmd_offset(dir,address) \ |
398 | ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) | 396 | ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) |
399 | #else | 397 | #else |
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 2ab16bb160a8..75819617f93b 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S | |||
@@ -398,7 +398,7 @@ | |||
398 | * can address up to 1TB | 398 | * can address up to 1TB |
399 | */ | 399 | */ |
400 | .macro L2_ptep pmd,pte,index,va,fault | 400 | .macro L2_ptep pmd,pte,index,va,fault |
401 | #if PT_NLEVELS == 3 | 401 | #if CONFIG_PGTABLE_LEVELS == 3 |
402 | extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index | 402 | extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index |
403 | #else | 403 | #else |
404 | # if defined(CONFIG_64BIT) | 404 | # if defined(CONFIG_64BIT) |
@@ -436,7 +436,7 @@ | |||
436 | * all ILP32 processes and all the kernel for machines with | 436 | * all ILP32 processes and all the kernel for machines with |
437 | * under 4GB of memory) */ | 437 | * under 4GB of memory) */ |
438 | .macro L3_ptep pgd,pte,index,va,fault | 438 | .macro L3_ptep pgd,pte,index,va,fault |
439 | #if PT_NLEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ | 439 | #if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ |
440 | extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index | 440 | extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index |
441 | copy %r0,\pte | 441 | copy %r0,\pte |
442 | extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 | 442 | extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 |
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index d4dc588c0dc1..e7d64527aff9 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S | |||
@@ -74,7 +74,7 @@ $bss_loop: | |||
74 | mtctl %r4,%cr24 /* Initialize kernel root pointer */ | 74 | mtctl %r4,%cr24 /* Initialize kernel root pointer */ |
75 | mtctl %r4,%cr25 /* Initialize user root pointer */ | 75 | mtctl %r4,%cr25 /* Initialize user root pointer */ |
76 | 76 | ||
77 | #if PT_NLEVELS == 3 | 77 | #if CONFIG_PGTABLE_LEVELS == 3 |
78 | /* Set pmd in pgd */ | 78 | /* Set pmd in pgd */ |
79 | load32 PA(pmd0),%r5 | 79 | load32 PA(pmd0),%r5 |
80 | shrd %r5,PxD_VALUE_SHIFT,%r3 | 80 | shrd %r5,PxD_VALUE_SHIFT,%r3 |
@@ -97,7 +97,7 @@ $bss_loop: | |||
97 | stw %r3,0(%r4) | 97 | stw %r3,0(%r4) |
98 | ldo (PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3 | 98 | ldo (PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3 |
99 | addib,> -1,%r1,1b | 99 | addib,> -1,%r1,1b |
100 | #if PT_NLEVELS == 3 | 100 | #if CONFIG_PGTABLE_LEVELS == 3 |
101 | ldo ASM_PMD_ENTRY_SIZE(%r4),%r4 | 101 | ldo ASM_PMD_ENTRY_SIZE(%r4),%r4 |
102 | #else | 102 | #else |
103 | ldo ASM_PGD_ENTRY_SIZE(%r4),%r4 | 103 | ldo ASM_PGD_ENTRY_SIZE(%r4),%r4 |
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 15dbe81cf5f3..c229427fa546 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
@@ -34,7 +34,7 @@ | |||
34 | extern int data_start; | 34 | extern int data_start; |
35 | extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ | 35 | extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ |
36 | 36 | ||
37 | #if PT_NLEVELS == 3 | 37 | #if CONFIG_PGTABLE_LEVELS == 3 |
38 | /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout | 38 | /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout |
39 | * with the first pmd adjacent to the pgd and below it. gcc doesn't actually | 39 | * with the first pmd adjacent to the pgd and below it. gcc doesn't actually |
40 | * guarantee that global objects will be laid out in memory in the same order | 40 | * guarantee that global objects will be laid out in memory in the same order |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 22b0940494bb..e99014adf017 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -88,7 +88,7 @@ config PPC | |||
88 | select ARCH_MIGHT_HAVE_PC_PARPORT | 88 | select ARCH_MIGHT_HAVE_PC_PARPORT |
89 | select ARCH_MIGHT_HAVE_PC_SERIO | 89 | select ARCH_MIGHT_HAVE_PC_SERIO |
90 | select BINFMT_ELF | 90 | select BINFMT_ELF |
91 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 91 | select ARCH_HAS_ELF_RANDOMIZE |
92 | select OF | 92 | select OF |
93 | select OF_EARLY_FLATTREE | 93 | select OF_EARLY_FLATTREE |
94 | select OF_RESERVED_MEM | 94 | select OF_RESERVED_MEM |
@@ -297,6 +297,12 @@ config ZONE_DMA32 | |||
297 | bool | 297 | bool |
298 | default y if PPC64 | 298 | default y if PPC64 |
299 | 299 | ||
300 | config PGTABLE_LEVELS | ||
301 | int | ||
302 | default 2 if !PPC64 | ||
303 | default 3 if PPC_64K_PAGES | ||
304 | default 4 | ||
305 | |||
300 | source "init/Kconfig" | 306 | source "init/Kconfig" |
301 | 307 | ||
302 | source "kernel/Kconfig.freezer" | 308 | source "kernel/Kconfig.freezer" |
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 57d289acb803..ee46ffef608e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h | |||
@@ -128,10 +128,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, | |||
128 | (0x7ff >> (PAGE_SHIFT - 12)) : \ | 128 | (0x7ff >> (PAGE_SHIFT - 12)) : \ |
129 | (0x3ffff >> (PAGE_SHIFT - 12))) | 129 | (0x3ffff >> (PAGE_SHIFT - 12))) |
130 | 130 | ||
131 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
132 | #define arch_randomize_brk arch_randomize_brk | ||
133 | |||
134 | |||
135 | #ifdef CONFIG_SPU_BASE | 131 | #ifdef CONFIG_SPU_BASE |
136 | /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ | 132 | /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ |
137 | #define NT_SPU 1 | 133 | #define NT_SPU 1 |
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index cb8bdbe4972f..0f0502e12f6c 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c | |||
@@ -53,21 +53,20 @@ static inline int mmap_is_legacy(void) | |||
53 | return sysctl_legacy_va_layout; | 53 | return sysctl_legacy_va_layout; |
54 | } | 54 | } |
55 | 55 | ||
56 | static unsigned long mmap_rnd(void) | 56 | unsigned long arch_mmap_rnd(void) |
57 | { | 57 | { |
58 | unsigned long rnd = 0; | 58 | unsigned long rnd; |
59 | |||
60 | /* 8MB for 32bit, 1GB for 64bit */ | ||
61 | if (is_32bit_task()) | ||
62 | rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); | ||
63 | else | ||
64 | rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); | ||
59 | 65 | ||
60 | if (current->flags & PF_RANDOMIZE) { | ||
61 | /* 8MB for 32bit, 1GB for 64bit */ | ||
62 | if (is_32bit_task()) | ||
63 | rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); | ||
64 | else | ||
65 | rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); | ||
66 | } | ||
67 | return rnd << PAGE_SHIFT; | 66 | return rnd << PAGE_SHIFT; |
68 | } | 67 | } |
69 | 68 | ||
70 | static inline unsigned long mmap_base(void) | 69 | static inline unsigned long mmap_base(unsigned long rnd) |
71 | { | 70 | { |
72 | unsigned long gap = rlimit(RLIMIT_STACK); | 71 | unsigned long gap = rlimit(RLIMIT_STACK); |
73 | 72 | ||
@@ -76,7 +75,7 @@ static inline unsigned long mmap_base(void) | |||
76 | else if (gap > MAX_GAP) | 75 | else if (gap > MAX_GAP) |
77 | gap = MAX_GAP; | 76 | gap = MAX_GAP; |
78 | 77 | ||
79 | return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); | 78 | return PAGE_ALIGN(TASK_SIZE - gap - rnd); |
80 | } | 79 | } |
81 | 80 | ||
82 | /* | 81 | /* |
@@ -85,6 +84,11 @@ static inline unsigned long mmap_base(void) | |||
85 | */ | 84 | */ |
86 | void arch_pick_mmap_layout(struct mm_struct *mm) | 85 | void arch_pick_mmap_layout(struct mm_struct *mm) |
87 | { | 86 | { |
87 | unsigned long random_factor = 0UL; | ||
88 | |||
89 | if (current->flags & PF_RANDOMIZE) | ||
90 | random_factor = arch_mmap_rnd(); | ||
91 | |||
88 | /* | 92 | /* |
89 | * Fall back to the standard layout if the personality | 93 | * Fall back to the standard layout if the personality |
90 | * bit is set, or if the expected stack growth is unlimited: | 94 | * bit is set, or if the expected stack growth is unlimited: |
@@ -93,7 +97,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
93 | mm->mmap_base = TASK_UNMAPPED_BASE; | 97 | mm->mmap_base = TASK_UNMAPPED_BASE; |
94 | mm->get_unmapped_area = arch_get_unmapped_area; | 98 | mm->get_unmapped_area = arch_get_unmapped_area; |
95 | } else { | 99 | } else { |
96 | mm->mmap_base = mmap_base(); | 100 | mm->mmap_base = mmap_base(random_factor); |
97 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 101 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
98 | } | 102 | } |
99 | } | 103 | } |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b2d7ec1669b4..6321fd8bf813 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -65,6 +65,7 @@ config S390 | |||
65 | def_bool y | 65 | def_bool y |
66 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 66 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
67 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS | 67 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS |
68 | select ARCH_HAS_ELF_RANDOMIZE | ||
68 | select ARCH_HAS_GCOV_PROFILE_ALL | 69 | select ARCH_HAS_GCOV_PROFILE_ALL |
69 | select ARCH_HAS_SG_CHAIN | 70 | select ARCH_HAS_SG_CHAIN |
70 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 71 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
@@ -156,6 +157,11 @@ config S390 | |||
156 | config SCHED_OMIT_FRAME_POINTER | 157 | config SCHED_OMIT_FRAME_POINTER |
157 | def_bool y | 158 | def_bool y |
158 | 159 | ||
160 | config PGTABLE_LEVELS | ||
161 | int | ||
162 | default 4 if 64BIT | ||
163 | default 2 | ||
164 | |||
159 | source "init/Kconfig" | 165 | source "init/Kconfig" |
160 | 166 | ||
161 | source "kernel/Kconfig.freezer" | 167 | source "kernel/Kconfig.freezer" |
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9c875d9ed31..a5c4978462c1 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h | |||
@@ -161,10 +161,11 @@ extern unsigned int vdso_enabled; | |||
161 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical | 161 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical |
162 | use of this is to invoke "./ld.so someprog" to test out a new version of | 162 | use of this is to invoke "./ld.so someprog" to test out a new version of |
163 | the loader. We need to make sure that it is out of the way of the program | 163 | the loader. We need to make sure that it is out of the way of the program |
164 | that it will "exec", and that there is sufficient room for the brk. */ | 164 | that it will "exec", and that there is sufficient room for the brk. 64-bit |
165 | 165 | tasks are aligned to 4GB. */ | |
166 | extern unsigned long randomize_et_dyn(void); | 166 | #define ELF_ET_DYN_BASE (is_32bit_task() ? \ |
167 | #define ELF_ET_DYN_BASE randomize_et_dyn() | 167 | (STACK_TOP / 3 * 2) : \ |
168 | (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) | ||
168 | 169 | ||
169 | /* This yields a mask that user programs can use to figure out what | 170 | /* This yields a mask that user programs can use to figure out what |
170 | instruction set this CPU supports. */ | 171 | instruction set this CPU supports. */ |
@@ -225,9 +226,6 @@ struct linux_binprm; | |||
225 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 226 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
226 | int arch_setup_additional_pages(struct linux_binprm *, int); | 227 | int arch_setup_additional_pages(struct linux_binprm *, int); |
227 | 228 | ||
228 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
229 | #define arch_randomize_brk arch_randomize_brk | ||
230 | |||
231 | void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); | 229 | void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); |
232 | 230 | ||
233 | #endif | 231 | #endif |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 179a2c20b01f..bb3367c5cb0b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
@@ -60,22 +60,20 @@ static inline int mmap_is_legacy(void) | |||
60 | return sysctl_legacy_va_layout; | 60 | return sysctl_legacy_va_layout; |
61 | } | 61 | } |
62 | 62 | ||
63 | static unsigned long mmap_rnd(void) | 63 | unsigned long arch_mmap_rnd(void) |
64 | { | 64 | { |
65 | if (!(current->flags & PF_RANDOMIZE)) | ||
66 | return 0; | ||
67 | if (is_32bit_task()) | 65 | if (is_32bit_task()) |
68 | return (get_random_int() & 0x7ff) << PAGE_SHIFT; | 66 | return (get_random_int() & 0x7ff) << PAGE_SHIFT; |
69 | else | 67 | else |
70 | return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; | 68 | return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; |
71 | } | 69 | } |
72 | 70 | ||
73 | static unsigned long mmap_base_legacy(void) | 71 | static unsigned long mmap_base_legacy(unsigned long rnd) |
74 | { | 72 | { |
75 | return TASK_UNMAPPED_BASE + mmap_rnd(); | 73 | return TASK_UNMAPPED_BASE + rnd; |
76 | } | 74 | } |
77 | 75 | ||
78 | static inline unsigned long mmap_base(void) | 76 | static inline unsigned long mmap_base(unsigned long rnd) |
79 | { | 77 | { |
80 | unsigned long gap = rlimit(RLIMIT_STACK); | 78 | unsigned long gap = rlimit(RLIMIT_STACK); |
81 | 79 | ||
@@ -84,7 +82,7 @@ static inline unsigned long mmap_base(void) | |||
84 | else if (gap > MAX_GAP) | 82 | else if (gap > MAX_GAP) |
85 | gap = MAX_GAP; | 83 | gap = MAX_GAP; |
86 | gap &= PAGE_MASK; | 84 | gap &= PAGE_MASK; |
87 | return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; | 85 | return STACK_TOP - stack_maxrandom_size() - rnd - gap; |
88 | } | 86 | } |
89 | 87 | ||
90 | unsigned long | 88 | unsigned long |
@@ -179,17 +177,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
179 | return addr; | 177 | return addr; |
180 | } | 178 | } |
181 | 179 | ||
182 | unsigned long randomize_et_dyn(void) | ||
183 | { | ||
184 | unsigned long base; | ||
185 | |||
186 | base = STACK_TOP / 3 * 2; | ||
187 | if (!is_32bit_task()) | ||
188 | /* Align to 4GB */ | ||
189 | base &= ~((1UL << 32) - 1); | ||
190 | return base + mmap_rnd(); | ||
191 | } | ||
192 | |||
193 | #ifndef CONFIG_64BIT | 180 | #ifndef CONFIG_64BIT |
194 | 181 | ||
195 | /* | 182 | /* |
@@ -198,15 +185,20 @@ unsigned long randomize_et_dyn(void) | |||
198 | */ | 185 | */ |
199 | void arch_pick_mmap_layout(struct mm_struct *mm) | 186 | void arch_pick_mmap_layout(struct mm_struct *mm) |
200 | { | 187 | { |
188 | unsigned long random_factor = 0UL; | ||
189 | |||
190 | if (current->flags & PF_RANDOMIZE) | ||
191 | random_factor = arch_mmap_rnd(); | ||
192 | |||
201 | /* | 193 | /* |
202 | * Fall back to the standard layout if the personality | 194 | * Fall back to the standard layout if the personality |
203 | * bit is set, or if the expected stack growth is unlimited: | 195 | * bit is set, or if the expected stack growth is unlimited: |
204 | */ | 196 | */ |
205 | if (mmap_is_legacy()) { | 197 | if (mmap_is_legacy()) { |
206 | mm->mmap_base = mmap_base_legacy(); | 198 | mm->mmap_base = mmap_base_legacy(random_factor); |
207 | mm->get_unmapped_area = arch_get_unmapped_area; | 199 | mm->get_unmapped_area = arch_get_unmapped_area; |
208 | } else { | 200 | } else { |
209 | mm->mmap_base = mmap_base(); | 201 | mm->mmap_base = mmap_base(random_factor); |
210 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 202 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
211 | } | 203 | } |
212 | } | 204 | } |
@@ -273,15 +265,20 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, | |||
273 | */ | 265 | */ |
274 | void arch_pick_mmap_layout(struct mm_struct *mm) | 266 | void arch_pick_mmap_layout(struct mm_struct *mm) |
275 | { | 267 | { |
268 | unsigned long random_factor = 0UL; | ||
269 | |||
270 | if (current->flags & PF_RANDOMIZE) | ||
271 | random_factor = arch_mmap_rnd(); | ||
272 | |||
276 | /* | 273 | /* |
277 | * Fall back to the standard layout if the personality | 274 | * Fall back to the standard layout if the personality |
278 | * bit is set, or if the expected stack growth is unlimited: | 275 | * bit is set, or if the expected stack growth is unlimited: |
279 | */ | 276 | */ |
280 | if (mmap_is_legacy()) { | 277 | if (mmap_is_legacy()) { |
281 | mm->mmap_base = mmap_base_legacy(); | 278 | mm->mmap_base = mmap_base_legacy(random_factor); |
282 | mm->get_unmapped_area = s390_get_unmapped_area; | 279 | mm->get_unmapped_area = s390_get_unmapped_area; |
283 | } else { | 280 | } else { |
284 | mm->mmap_base = mmap_base(); | 281 | mm->mmap_base = mmap_base(random_factor); |
285 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; | 282 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; |
286 | } | 283 | } |
287 | } | 284 | } |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index eb4ef274ae9b..50057fed819d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -162,6 +162,10 @@ config NEED_DMA_MAP_STATE | |||
162 | config NEED_SG_DMA_LENGTH | 162 | config NEED_SG_DMA_LENGTH |
163 | def_bool y | 163 | def_bool y |
164 | 164 | ||
165 | config PGTABLE_LEVELS | ||
166 | default 3 if X2TLB | ||
167 | default 2 | ||
168 | |||
165 | source "init/Kconfig" | 169 | source "init/Kconfig" |
166 | 170 | ||
167 | source "kernel/Kconfig.freezer" | 171 | source "kernel/Kconfig.freezer" |
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 67a049e75ec1..9d209a07235e 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c | |||
@@ -993,7 +993,7 @@ static struct unwinder dwarf_unwinder = { | |||
993 | .rating = 150, | 993 | .rating = 150, |
994 | }; | 994 | }; |
995 | 995 | ||
996 | static void dwarf_unwinder_cleanup(void) | 996 | static void __init dwarf_unwinder_cleanup(void) |
997 | { | 997 | { |
998 | struct dwarf_fde *fde, *next_fde; | 998 | struct dwarf_fde *fde, *next_fde; |
999 | struct dwarf_cie *cie, *next_cie; | 999 | struct dwarf_cie *cie, *next_cie; |
@@ -1009,6 +1009,10 @@ static void dwarf_unwinder_cleanup(void) | |||
1009 | rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) | 1009 | rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) |
1010 | kfree(cie); | 1010 | kfree(cie); |
1011 | 1011 | ||
1012 | if (dwarf_reg_pool) | ||
1013 | mempool_destroy(dwarf_reg_pool); | ||
1014 | if (dwarf_frame_pool) | ||
1015 | mempool_destroy(dwarf_frame_pool); | ||
1012 | kmem_cache_destroy(dwarf_reg_cachep); | 1016 | kmem_cache_destroy(dwarf_reg_cachep); |
1013 | kmem_cache_destroy(dwarf_frame_cachep); | 1017 | kmem_cache_destroy(dwarf_frame_cachep); |
1014 | } | 1018 | } |
@@ -1176,17 +1180,13 @@ static int __init dwarf_unwinder_init(void) | |||
1176 | sizeof(struct dwarf_reg), 0, | 1180 | sizeof(struct dwarf_reg), 0, |
1177 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); | 1181 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); |
1178 | 1182 | ||
1179 | dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, | 1183 | dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, |
1180 | mempool_alloc_slab, | 1184 | dwarf_frame_cachep); |
1181 | mempool_free_slab, | ||
1182 | dwarf_frame_cachep); | ||
1183 | if (!dwarf_frame_pool) | 1185 | if (!dwarf_frame_pool) |
1184 | goto out; | 1186 | goto out; |
1185 | 1187 | ||
1186 | dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, | 1188 | dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ, |
1187 | mempool_alloc_slab, | 1189 | dwarf_reg_cachep); |
1188 | mempool_free_slab, | ||
1189 | dwarf_reg_cachep); | ||
1190 | if (!dwarf_reg_pool) | 1190 | if (!dwarf_reg_pool) |
1191 | goto out; | 1191 | goto out; |
1192 | 1192 | ||
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efb00ec75805..e49502acbab4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -146,6 +146,10 @@ config GENERIC_ISA_DMA | |||
146 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 146 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
147 | def_bool y if SPARC64 | 147 | def_bool y if SPARC64 |
148 | 148 | ||
149 | config PGTABLE_LEVELS | ||
150 | default 4 if 64BIT | ||
151 | default 3 | ||
152 | |||
149 | source "init/Kconfig" | 153 | source "init/Kconfig" |
150 | 154 | ||
151 | source "kernel/Kconfig.freezer" | 155 | source "kernel/Kconfig.freezer" |
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 99632a87e697..26c80e18d7b1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c | |||
@@ -130,26 +130,26 @@ static struct mdesc_mem_ops memblock_mdesc_ops = { | |||
130 | static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) | 130 | static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) |
131 | { | 131 | { |
132 | unsigned int handle_size; | 132 | unsigned int handle_size; |
133 | struct mdesc_handle *hp; | ||
134 | unsigned long addr; | ||
133 | void *base; | 135 | void *base; |
134 | 136 | ||
135 | handle_size = (sizeof(struct mdesc_handle) - | 137 | handle_size = (sizeof(struct mdesc_handle) - |
136 | sizeof(struct mdesc_hdr) + | 138 | sizeof(struct mdesc_hdr) + |
137 | mdesc_size); | 139 | mdesc_size); |
138 | 140 | ||
141 | /* | ||
142 | * Allocation has to succeed because mdesc update would be missed | ||
143 | * and such events are not retransmitted. | ||
144 | */ | ||
139 | base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); | 145 | base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); |
140 | if (base) { | 146 | addr = (unsigned long)base; |
141 | struct mdesc_handle *hp; | 147 | addr = (addr + 15UL) & ~15UL; |
142 | unsigned long addr; | 148 | hp = (struct mdesc_handle *) addr; |
143 | |||
144 | addr = (unsigned long)base; | ||
145 | addr = (addr + 15UL) & ~15UL; | ||
146 | hp = (struct mdesc_handle *) addr; | ||
147 | 149 | ||
148 | mdesc_handle_init(hp, handle_size, base); | 150 | mdesc_handle_init(hp, handle_size, base); |
149 | return hp; | ||
150 | } | ||
151 | 151 | ||
152 | return NULL; | 152 | return hp; |
153 | } | 153 | } |
154 | 154 | ||
155 | static void mdesc_kfree(struct mdesc_handle *hp) | 155 | static void mdesc_kfree(struct mdesc_handle *hp) |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 7cca41842a9e..0142d578b5a8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -147,6 +147,11 @@ config ARCH_DEFCONFIG | |||
147 | default "arch/tile/configs/tilepro_defconfig" if !TILEGX | 147 | default "arch/tile/configs/tilepro_defconfig" if !TILEGX |
148 | default "arch/tile/configs/tilegx_defconfig" if TILEGX | 148 | default "arch/tile/configs/tilegx_defconfig" if TILEGX |
149 | 149 | ||
150 | config PGTABLE_LEVELS | ||
151 | int | ||
152 | default 3 if 64BIT | ||
153 | default 2 | ||
154 | |||
150 | source "init/Kconfig" | 155 | source "init/Kconfig" |
151 | 156 | ||
152 | source "kernel/Kconfig.freezer" | 157 | source "kernel/Kconfig.freezer" |
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index a7520c90f62d..5dbfe3d9107c 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um | |||
@@ -155,3 +155,8 @@ config MMAPPER | |||
155 | 155 | ||
156 | config NO_DMA | 156 | config NO_DMA |
157 | def_bool y | 157 | def_bool y |
158 | |||
159 | config PGTABLE_LEVELS | ||
160 | int | ||
161 | default 3 if 3_LEVEL_PGTABLES | ||
162 | default 2 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index faff6934c05a..d43e7e1c784b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -87,7 +87,7 @@ config X86 | |||
87 | select HAVE_ARCH_KMEMCHECK | 87 | select HAVE_ARCH_KMEMCHECK |
88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
89 | select HAVE_USER_RETURN_NOTIFIER | 89 | select HAVE_USER_RETURN_NOTIFIER |
90 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 90 | select ARCH_HAS_ELF_RANDOMIZE |
91 | select HAVE_ARCH_JUMP_LABEL | 91 | select HAVE_ARCH_JUMP_LABEL |
92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
93 | select SPARSE_IRQ | 93 | select SPARSE_IRQ |
@@ -99,6 +99,7 @@ config X86 | |||
99 | select IRQ_FORCED_THREADING | 99 | select IRQ_FORCED_THREADING |
100 | select HAVE_BPF_JIT if X86_64 | 100 | select HAVE_BPF_JIT if X86_64 |
101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
102 | select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) | ||
102 | select ARCH_HAS_SG_CHAIN | 103 | select ARCH_HAS_SG_CHAIN |
103 | select CLKEVT_I8253 | 104 | select CLKEVT_I8253 |
104 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 105 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
@@ -277,6 +278,12 @@ config ARCH_SUPPORTS_UPROBES | |||
277 | config FIX_EARLYCON_MEM | 278 | config FIX_EARLYCON_MEM |
278 | def_bool y | 279 | def_bool y |
279 | 280 | ||
281 | config PGTABLE_LEVELS | ||
282 | int | ||
283 | default 4 if X86_64 | ||
284 | default 3 if X86_PAE | ||
285 | default 2 | ||
286 | |||
280 | source "init/Kconfig" | 287 | source "init/Kconfig" |
281 | source "kernel/Kconfig.freezer" | 288 | source "kernel/Kconfig.freezer" |
282 | 289 | ||
@@ -714,17 +721,6 @@ endif #HYPERVISOR_GUEST | |||
714 | config NO_BOOTMEM | 721 | config NO_BOOTMEM |
715 | def_bool y | 722 | def_bool y |
716 | 723 | ||
717 | config MEMTEST | ||
718 | bool "Memtest" | ||
719 | ---help--- | ||
720 | This option adds a kernel parameter 'memtest', which allows memtest | ||
721 | to be set. | ||
722 | memtest=0, mean disabled; -- default | ||
723 | memtest=1, mean do 1 test pattern; | ||
724 | ... | ||
725 | memtest=4, mean do 4 test patterns. | ||
726 | If you are unsure how to answer this question, answer N. | ||
727 | |||
728 | source "arch/x86/Kconfig.cpu" | 724 | source "arch/x86/Kconfig.cpu" |
729 | 725 | ||
730 | config HPET_TIMER | 726 | config HPET_TIMER |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 779c2efe2e97..3ab0537872fb 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn) | |||
40 | } | 40 | } |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | #ifdef CONFIG_MEMTEST | ||
44 | extern void early_memtest(unsigned long start, unsigned long end); | ||
45 | #else | ||
46 | static inline void early_memtest(unsigned long start, unsigned long end) | ||
47 | { | ||
48 | } | ||
49 | #endif | ||
50 | |||
51 | extern unsigned long e820_end_of_ram_pfn(void); | 43 | extern unsigned long e820_end_of_ram_pfn(void); |
52 | extern unsigned long e820_end_of_low_ram_pfn(void); | 44 | extern unsigned long e820_end_of_low_ram_pfn(void); |
53 | extern u64 early_reserve_e820(u64 sizet, u64 align); | 45 | extern u64 early_reserve_e820(u64 sizet, u64 align); |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 935588d95c82..f161c189c27b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -339,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |||
339 | int uses_interp); | 339 | int uses_interp); |
340 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages | 340 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages |
341 | 341 | ||
342 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
343 | #define arch_randomize_brk arch_randomize_brk | ||
344 | |||
345 | /* | 342 | /* |
346 | * True on X86_32 or when emulating IA32 on X86_64 | 343 | * True on X86_32 or when emulating IA32 on X86_64 |
347 | */ | 344 | */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..c7c712f2648b 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -40,8 +40,10 @@ | |||
40 | 40 | ||
41 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
42 | #include <asm/page_64_types.h> | 42 | #include <asm/page_64_types.h> |
43 | #define IOREMAP_MAX_ORDER (PUD_SHIFT) | ||
43 | #else | 44 | #else |
44 | #include <asm/page_32_types.h> | 45 | #include <asm/page_32_types.h> |
46 | #define IOREMAP_MAX_ORDER (PMD_SHIFT) | ||
45 | #endif /* CONFIG_X86_64 */ | 47 | #endif /* CONFIG_X86_64 */ |
46 | 48 | ||
47 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5f6051d5d139..8957810ad7d1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | |||
545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); | 545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); |
546 | } | 546 | } |
547 | 547 | ||
548 | #if PAGETABLE_LEVELS >= 3 | 548 | #if CONFIG_PGTABLE_LEVELS >= 3 |
549 | static inline pmd_t __pmd(pmdval_t val) | 549 | static inline pmd_t __pmd(pmdval_t val) |
550 | { | 550 | { |
551 | pmdval_t ret; | 551 | pmdval_t ret; |
@@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) | |||
585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, | 585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, |
586 | val); | 586 | val); |
587 | } | 587 | } |
588 | #if PAGETABLE_LEVELS == 4 | 588 | #if CONFIG_PGTABLE_LEVELS == 4 |
589 | static inline pud_t __pud(pudval_t val) | 589 | static inline pud_t __pud(pudval_t val) |
590 | { | 590 | { |
591 | pudval_t ret; | 591 | pudval_t ret; |
@@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp) | |||
636 | set_pud(pudp, __pud(0)); | 636 | set_pud(pudp, __pud(0)); |
637 | } | 637 | } |
638 | 638 | ||
639 | #endif /* PAGETABLE_LEVELS == 4 */ | 639 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
640 | 640 | ||
641 | #endif /* PAGETABLE_LEVELS >= 3 */ | 641 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
642 | 642 | ||
643 | #ifdef CONFIG_X86_PAE | 643 | #ifdef CONFIG_X86_PAE |
644 | /* Special-case pte-setting operations for PAE, which can't update a | 644 | /* Special-case pte-setting operations for PAE, which can't update a |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..f7b0b5c112f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -294,7 +294,7 @@ struct pv_mmu_ops { | |||
294 | struct paravirt_callee_save pgd_val; | 294 | struct paravirt_callee_save pgd_val; |
295 | struct paravirt_callee_save make_pgd; | 295 | struct paravirt_callee_save make_pgd; |
296 | 296 | ||
297 | #if PAGETABLE_LEVELS >= 3 | 297 | #if CONFIG_PGTABLE_LEVELS >= 3 |
298 | #ifdef CONFIG_X86_PAE | 298 | #ifdef CONFIG_X86_PAE |
299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | 299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); |
300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | 300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, |
@@ -308,13 +308,13 @@ struct pv_mmu_ops { | |||
308 | struct paravirt_callee_save pmd_val; | 308 | struct paravirt_callee_save pmd_val; |
309 | struct paravirt_callee_save make_pmd; | 309 | struct paravirt_callee_save make_pmd; |
310 | 310 | ||
311 | #if PAGETABLE_LEVELS == 4 | 311 | #if CONFIG_PGTABLE_LEVELS == 4 |
312 | struct paravirt_callee_save pud_val; | 312 | struct paravirt_callee_save pud_val; |
313 | struct paravirt_callee_save make_pud; | 313 | struct paravirt_callee_save make_pud; |
314 | 314 | ||
315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | 315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); |
316 | #endif /* PAGETABLE_LEVELS == 4 */ | 316 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
317 | #endif /* PAGETABLE_LEVELS >= 3 */ | 317 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
318 | 318 | ||
319 | struct pv_lazy_ops lazy_mode; | 319 | struct pv_lazy_ops lazy_mode; |
320 | 320 | ||
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c4412e972bbd..bf7f8b55b0f9 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, | |||
77 | 77 | ||
78 | #define pmd_pgtable(pmd) pmd_page(pmd) | 78 | #define pmd_pgtable(pmd) pmd_page(pmd) |
79 | 79 | ||
80 | #if PAGETABLE_LEVELS > 2 | 80 | #if CONFIG_PGTABLE_LEVELS > 2 |
81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
82 | { | 82 | { |
83 | struct page *page; | 83 | struct page *page; |
@@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
116 | } | 116 | } |
117 | #endif /* CONFIG_X86_PAE */ | 117 | #endif /* CONFIG_X86_PAE */ |
118 | 118 | ||
119 | #if PAGETABLE_LEVELS > 3 | 119 | #if CONFIG_PGTABLE_LEVELS > 3 |
120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | 120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) |
121 | { | 121 | { |
122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | 122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); |
@@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | |||
142 | ___pud_free_tlb(tlb, pud); | 142 | ___pud_free_tlb(tlb, pud); |
143 | } | 143 | } |
144 | 144 | ||
145 | #endif /* PAGETABLE_LEVELS > 3 */ | 145 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
146 | #endif /* PAGETABLE_LEVELS > 2 */ | 146 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
147 | 147 | ||
148 | #endif /* _ASM_X86_PGALLOC_H */ | 148 | #endif /* _ASM_X86_PGALLOC_H */ |
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index daacc23e3fb9..392576433e77 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h | |||
@@ -17,7 +17,6 @@ typedef union { | |||
17 | #endif /* !__ASSEMBLY__ */ | 17 | #endif /* !__ASSEMBLY__ */ |
18 | 18 | ||
19 | #define SHARED_KERNEL_PMD 0 | 19 | #define SHARED_KERNEL_PMD 0 |
20 | #define PAGETABLE_LEVELS 2 | ||
21 | 20 | ||
22 | /* | 21 | /* |
23 | * traditional i386 two-level paging structure: | 22 | * traditional i386 two-level paging structure: |
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 1bd5876c8649..bcc89625ebe5 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h | |||
@@ -24,8 +24,6 @@ typedef union { | |||
24 | #define SHARED_KERNEL_PMD 1 | 24 | #define SHARED_KERNEL_PMD 1 |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #define PAGETABLE_LEVELS 3 | ||
28 | |||
29 | /* | 27 | /* |
30 | * PGDIR_SHIFT determines what a top-level page table entry can map | 28 | * PGDIR_SHIFT determines what a top-level page table entry can map |
31 | */ | 29 | */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a0c35bf6cb92..fe57e7a98839 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) | |||
551 | return npg >> (20 - PAGE_SHIFT); | 551 | return npg >> (20 - PAGE_SHIFT); |
552 | } | 552 | } |
553 | 553 | ||
554 | #if PAGETABLE_LEVELS > 2 | 554 | #if CONFIG_PGTABLE_LEVELS > 2 |
555 | static inline int pud_none(pud_t pud) | 555 | static inline int pud_none(pud_t pud) |
556 | { | 556 | { |
557 | return native_pud_val(pud) == 0; | 557 | return native_pud_val(pud) == 0; |
@@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud) | |||
594 | { | 594 | { |
595 | return 0; | 595 | return 0; |
596 | } | 596 | } |
597 | #endif /* PAGETABLE_LEVELS > 2 */ | 597 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
598 | 598 | ||
599 | #if PAGETABLE_LEVELS > 3 | 599 | #if CONFIG_PGTABLE_LEVELS > 3 |
600 | static inline int pgd_present(pgd_t pgd) | 600 | static inline int pgd_present(pgd_t pgd) |
601 | { | 601 | { |
602 | return pgd_flags(pgd) & _PAGE_PRESENT; | 602 | return pgd_flags(pgd) & _PAGE_PRESENT; |
@@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd) | |||
633 | { | 633 | { |
634 | return !native_pgd_val(pgd); | 634 | return !native_pgd_val(pgd); |
635 | } | 635 | } |
636 | #endif /* PAGETABLE_LEVELS > 3 */ | 636 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
637 | 637 | ||
638 | #endif /* __ASSEMBLY__ */ | 638 | #endif /* __ASSEMBLY__ */ |
639 | 639 | ||
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 602b6028c5b6..e6844dfb4471 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t; | |||
20 | #endif /* !__ASSEMBLY__ */ | 20 | #endif /* !__ASSEMBLY__ */ |
21 | 21 | ||
22 | #define SHARED_KERNEL_PMD 0 | 22 | #define SHARED_KERNEL_PMD 0 |
23 | #define PAGETABLE_LEVELS 4 | ||
24 | 23 | ||
25 | /* | 24 | /* |
26 | * PGDIR_SHIFT determines what a top-level page table entry can map | 25 | * PGDIR_SHIFT determines what a top-level page table entry can map |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8c7c10802e9c..78f0c8cbe316 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd) | |||
234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; | 234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; |
235 | } | 235 | } |
236 | 236 | ||
237 | #if PAGETABLE_LEVELS > 3 | 237 | #if CONFIG_PGTABLE_LEVELS > 3 |
238 | typedef struct { pudval_t pud; } pud_t; | 238 | typedef struct { pudval_t pud; } pud_t; |
239 | 239 | ||
240 | static inline pud_t native_make_pud(pmdval_t val) | 240 | static inline pud_t native_make_pud(pmdval_t val) |
@@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud) | |||
255 | } | 255 | } |
256 | #endif | 256 | #endif |
257 | 257 | ||
258 | #if PAGETABLE_LEVELS > 2 | 258 | #if CONFIG_PGTABLE_LEVELS > 2 |
259 | typedef struct { pmdval_t pmd; } pmd_t; | 259 | typedef struct { pmdval_t pmd; } pmd_t; |
260 | 260 | ||
261 | static inline pmd_t native_make_pmd(pmdval_t val) | 261 | static inline pmd_t native_make_pmd(pmdval_t val) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e354cc6446ab..9435620062df 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -513,7 +513,7 @@ void __init kvm_guest_init(void) | |||
513 | * can get false positives too easily, for example if the host is | 513 | * can get false positives too easily, for example if the host is |
514 | * overcommitted. | 514 | * overcommitted. |
515 | */ | 515 | */ |
516 | watchdog_enable_hardlockup_detector(false); | 516 | hardlockup_detector_disable(); |
517 | } | 517 | } |
518 | 518 | ||
519 | static noinline uint32_t __kvm_cpuid_base(void) | 519 | static noinline uint32_t __kvm_cpuid_base(void) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..c614dd492f5f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
443 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 443 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
445 | 445 | ||
446 | #if PAGETABLE_LEVELS >= 3 | 446 | #if CONFIG_PGTABLE_LEVELS >= 3 |
447 | #ifdef CONFIG_X86_PAE | 447 | #ifdef CONFIG_X86_PAE |
448 | .set_pte_atomic = native_set_pte_atomic, | 448 | .set_pte_atomic = native_set_pte_atomic, |
449 | .pte_clear = native_pte_clear, | 449 | .pte_clear = native_pte_clear, |
@@ -454,13 +454,13 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
454 | .pmd_val = PTE_IDENT, | 454 | .pmd_val = PTE_IDENT, |
455 | .make_pmd = PTE_IDENT, | 455 | .make_pmd = PTE_IDENT, |
456 | 456 | ||
457 | #if PAGETABLE_LEVELS == 4 | 457 | #if CONFIG_PGTABLE_LEVELS == 4 |
458 | .pud_val = PTE_IDENT, | 458 | .pud_val = PTE_IDENT, |
459 | .make_pud = PTE_IDENT, | 459 | .make_pud = PTE_IDENT, |
460 | 460 | ||
461 | .set_pgd = native_set_pgd, | 461 | .set_pgd = native_set_pgd, |
462 | #endif | 462 | #endif |
463 | #endif /* PAGETABLE_LEVELS >= 3 */ | 463 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
464 | 464 | ||
465 | .pte_val = PTE_IDENT, | 465 | .pte_val = PTE_IDENT, |
466 | .pgd_val = PTE_IDENT, | 466 | .pgd_val = PTE_IDENT, |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c4cc74006c61..a482d105172b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o | |||
32 | obj-$(CONFIG_ACPI_NUMA) += srat.o | 32 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
34 | 34 | ||
35 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
36 | |||
37 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o | 35 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..5ead4d6cf3a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, | |||
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Remap an arbitrary physical address space into the kernel virtual | 69 | * Remap an arbitrary physical address space into the kernel virtual |
70 | * address space. Needed when the kernel wants to access high addresses | 70 | * address space. It transparently creates kernel huge I/O mapping when |
71 | * directly. | 71 | * the physical address is aligned by a huge page size (1GB or 2MB) and |
72 | * the requested size is at least the huge page size. | ||
73 | * | ||
74 | * NOTE: MTRRs can override PAT memory types with a 4KB granularity. | ||
75 | * Therefore, the mapping code falls back to use a smaller page toward 4KB | ||
76 | * when a mapping range is covered by non-WB type of MTRRs. | ||
72 | * | 77 | * |
73 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously | 78 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously |
74 | * have to convert them into an offset in a page-aligned mapping, but the | 79 | * have to convert them into an offset in a page-aligned mapping, but the |
@@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr) | |||
326 | } | 331 | } |
327 | EXPORT_SYMBOL(iounmap); | 332 | EXPORT_SYMBOL(iounmap); |
328 | 333 | ||
334 | int arch_ioremap_pud_supported(void) | ||
335 | { | ||
336 | #ifdef CONFIG_X86_64 | ||
337 | return cpu_has_gbpages; | ||
338 | #else | ||
339 | return 0; | ||
340 | #endif | ||
341 | } | ||
342 | |||
343 | int arch_ioremap_pmd_supported(void) | ||
344 | { | ||
345 | return cpu_has_pse; | ||
346 | } | ||
347 | |||
329 | /* | 348 | /* |
330 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem | 349 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem |
331 | * access | 350 | * access |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index df4552bd239e..9d518d693b4b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -65,24 +65,23 @@ static int mmap_is_legacy(void) | |||
65 | return sysctl_legacy_va_layout; | 65 | return sysctl_legacy_va_layout; |
66 | } | 66 | } |
67 | 67 | ||
68 | static unsigned long mmap_rnd(void) | 68 | unsigned long arch_mmap_rnd(void) |
69 | { | 69 | { |
70 | unsigned long rnd = 0; | 70 | unsigned long rnd; |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits | 73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits |
74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits | 74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits |
75 | */ | 75 | */ |
76 | if (current->flags & PF_RANDOMIZE) { | 76 | if (mmap_is_ia32()) |
77 | if (mmap_is_ia32()) | 77 | rnd = (unsigned long)get_random_int() % (1<<8); |
78 | rnd = get_random_int() % (1<<8); | 78 | else |
79 | else | 79 | rnd = (unsigned long)get_random_int() % (1<<28); |
80 | rnd = get_random_int() % (1<<28); | 80 | |
81 | } | ||
82 | return rnd << PAGE_SHIFT; | 81 | return rnd << PAGE_SHIFT; |
83 | } | 82 | } |
84 | 83 | ||
85 | static unsigned long mmap_base(void) | 84 | static unsigned long mmap_base(unsigned long rnd) |
86 | { | 85 | { |
87 | unsigned long gap = rlimit(RLIMIT_STACK); | 86 | unsigned long gap = rlimit(RLIMIT_STACK); |
88 | 87 | ||
@@ -91,19 +90,19 @@ static unsigned long mmap_base(void) | |||
91 | else if (gap > MAX_GAP) | 90 | else if (gap > MAX_GAP) |
92 | gap = MAX_GAP; | 91 | gap = MAX_GAP; |
93 | 92 | ||
94 | return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); | 93 | return PAGE_ALIGN(TASK_SIZE - gap - rnd); |
95 | } | 94 | } |
96 | 95 | ||
97 | /* | 96 | /* |
98 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | 97 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 |
99 | * does, but not when emulating X86_32 | 98 | * does, but not when emulating X86_32 |
100 | */ | 99 | */ |
101 | static unsigned long mmap_legacy_base(void) | 100 | static unsigned long mmap_legacy_base(unsigned long rnd) |
102 | { | 101 | { |
103 | if (mmap_is_ia32()) | 102 | if (mmap_is_ia32()) |
104 | return TASK_UNMAPPED_BASE; | 103 | return TASK_UNMAPPED_BASE; |
105 | else | 104 | else |
106 | return TASK_UNMAPPED_BASE + mmap_rnd(); | 105 | return TASK_UNMAPPED_BASE + rnd; |
107 | } | 106 | } |
108 | 107 | ||
109 | /* | 108 | /* |
@@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void) | |||
112 | */ | 111 | */ |
113 | void arch_pick_mmap_layout(struct mm_struct *mm) | 112 | void arch_pick_mmap_layout(struct mm_struct *mm) |
114 | { | 113 | { |
115 | mm->mmap_legacy_base = mmap_legacy_base(); | 114 | unsigned long random_factor = 0UL; |
116 | mm->mmap_base = mmap_base(); | 115 | |
116 | if (current->flags & PF_RANDOMIZE) | ||
117 | random_factor = arch_mmap_rnd(); | ||
118 | |||
119 | mm->mmap_legacy_base = mmap_legacy_base(random_factor); | ||
117 | 120 | ||
118 | if (mmap_is_legacy()) { | 121 | if (mmap_is_legacy()) { |
119 | mm->mmap_base = mm->mmap_legacy_base; | 122 | mm->mmap_base = mm->mmap_legacy_base; |
120 | mm->get_unmapped_area = arch_get_unmapped_area; | 123 | mm->get_unmapped_area = arch_get_unmapped_area; |
121 | } else { | 124 | } else { |
125 | mm->mmap_base = mmap_base(random_factor); | ||
122 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 126 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
123 | } | 127 | } |
124 | } | 128 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5a7e5252c878..0b97d2c75df3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
5 | #include <asm/tlb.h> | 5 | #include <asm/tlb.h> |
6 | #include <asm/fixmap.h> | 6 | #include <asm/fixmap.h> |
7 | #include <asm/mtrr.h> | ||
7 | 8 | ||
8 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 9 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
9 | 10 | ||
@@ -58,7 +59,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
58 | tlb_remove_page(tlb, pte); | 59 | tlb_remove_page(tlb, pte); |
59 | } | 60 | } |
60 | 61 | ||
61 | #if PAGETABLE_LEVELS > 2 | 62 | #if CONFIG_PGTABLE_LEVELS > 2 |
62 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 63 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
63 | { | 64 | { |
64 | struct page *page = virt_to_page(pmd); | 65 | struct page *page = virt_to_page(pmd); |
@@ -74,14 +75,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
74 | tlb_remove_page(tlb, page); | 75 | tlb_remove_page(tlb, page); |
75 | } | 76 | } |
76 | 77 | ||
77 | #if PAGETABLE_LEVELS > 3 | 78 | #if CONFIG_PGTABLE_LEVELS > 3 |
78 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 79 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
79 | { | 80 | { |
80 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | 81 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); |
81 | tlb_remove_page(tlb, virt_to_page(pud)); | 82 | tlb_remove_page(tlb, virt_to_page(pud)); |
82 | } | 83 | } |
83 | #endif /* PAGETABLE_LEVELS > 3 */ | 84 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
84 | #endif /* PAGETABLE_LEVELS > 2 */ | 85 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
85 | 86 | ||
86 | static inline void pgd_list_add(pgd_t *pgd) | 87 | static inline void pgd_list_add(pgd_t *pgd) |
87 | { | 88 | { |
@@ -117,9 +118,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
117 | /* If the pgd points to a shared pagetable level (either the | 118 | /* If the pgd points to a shared pagetable level (either the |
118 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 119 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
119 | references from swapper_pg_dir. */ | 120 | references from swapper_pg_dir. */ |
120 | if (PAGETABLE_LEVELS == 2 || | 121 | if (CONFIG_PGTABLE_LEVELS == 2 || |
121 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || | 122 | (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || |
122 | PAGETABLE_LEVELS == 4) { | 123 | CONFIG_PGTABLE_LEVELS == 4) { |
123 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 124 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
124 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 125 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
125 | KERNEL_PGD_PTRS); | 126 | KERNEL_PGD_PTRS); |
@@ -560,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, | |||
560 | { | 561 | { |
561 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); | 562 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); |
562 | } | 563 | } |
564 | |||
565 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
566 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) | ||
567 | { | ||
568 | u8 mtrr; | ||
569 | |||
570 | /* | ||
571 | * Do not use a huge page when the range is covered by non-WB type | ||
572 | * of MTRRs. | ||
573 | */ | ||
574 | mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); | ||
575 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
576 | return 0; | ||
577 | |||
578 | prot = pgprot_4k_2_large(prot); | ||
579 | |||
580 | set_pte((pte_t *)pud, pfn_pte( | ||
581 | (u64)addr >> PAGE_SHIFT, | ||
582 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
583 | |||
584 | return 1; | ||
585 | } | ||
586 | |||
587 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | ||
588 | { | ||
589 | u8 mtrr; | ||
590 | |||
591 | /* | ||
592 | * Do not use a huge page when the range is covered by non-WB type | ||
593 | * of MTRRs. | ||
594 | */ | ||
595 | mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); | ||
596 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
597 | return 0; | ||
598 | |||
599 | prot = pgprot_4k_2_large(prot); | ||
600 | |||
601 | set_pte((pte_t *)pmd, pfn_pte( | ||
602 | (u64)addr >> PAGE_SHIFT, | ||
603 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
604 | |||
605 | return 1; | ||
606 | } | ||
607 | |||
608 | int pud_clear_huge(pud_t *pud) | ||
609 | { | ||
610 | if (pud_large(*pud)) { | ||
611 | pud_clear(pud); | ||
612 | return 1; | ||
613 | } | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | int pmd_clear_huge(pmd_t *pmd) | ||
619 | { | ||
620 | if (pmd_large(*pmd)) { | ||
621 | pmd_clear(pmd); | ||
622 | return 1; | ||
623 | } | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2b6553..65083ad63b6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) | |||
502 | } | 502 | } |
503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
504 | 504 | ||
505 | #if PAGETABLE_LEVELS == 4 | 505 | #if CONFIG_PGTABLE_LEVELS == 4 |
506 | __visible pudval_t xen_pud_val(pud_t pud) | 506 | __visible pudval_t xen_pud_val(pud_t pud) |
507 | { | 507 | { |
508 | return pte_mfn_to_pfn(pud.pud); | 508 | return pte_mfn_to_pfn(pud.pud); |
@@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
589 | 589 | ||
590 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 590 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
591 | } | 591 | } |
592 | #endif /* PAGETABLE_LEVELS == 4 */ | 592 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
593 | 593 | ||
594 | /* | 594 | /* |
595 | * (Yet another) pagetable walker. This one is intended for pinning a | 595 | * (Yet another) pagetable walker. This one is intended for pinning a |
@@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn) | |||
1628 | xen_release_ptpage(pfn, PT_PMD); | 1628 | xen_release_ptpage(pfn, PT_PMD); |
1629 | } | 1629 | } |
1630 | 1630 | ||
1631 | #if PAGETABLE_LEVELS == 4 | 1631 | #if CONFIG_PGTABLE_LEVELS == 4 |
1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | 1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
1633 | { | 1633 | { |
1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); | 1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
@@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void) | |||
2046 | pv_mmu_ops.set_pte = xen_set_pte; | 2046 | pv_mmu_ops.set_pte = xen_set_pte; |
2047 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2047 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2048 | pv_mmu_ops.set_pud = xen_set_pud; | 2048 | pv_mmu_ops.set_pud = xen_set_pud; |
2049 | #if PAGETABLE_LEVELS == 4 | 2049 | #if CONFIG_PGTABLE_LEVELS == 4 |
2050 | pv_mmu_ops.set_pgd = xen_set_pgd; | 2050 | pv_mmu_ops.set_pgd = xen_set_pgd; |
2051 | #endif | 2051 | #endif |
2052 | 2052 | ||
@@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void) | |||
2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | 2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
2057 | pv_mmu_ops.release_pte = xen_release_pte; | 2057 | pv_mmu_ops.release_pte = xen_release_pte; |
2058 | pv_mmu_ops.release_pmd = xen_release_pmd; | 2058 | pv_mmu_ops.release_pmd = xen_release_pmd; |
2059 | #if PAGETABLE_LEVELS == 4 | 2059 | #if CONFIG_PGTABLE_LEVELS == 4 |
2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | 2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; |
2061 | pv_mmu_ops.release_pud = xen_release_pud; | 2061 | pv_mmu_ops.release_pud = xen_release_pud; |
2062 | #endif | 2062 | #endif |
@@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), | 2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), |
2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), | 2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), |
2124 | 2124 | ||
2125 | #if PAGETABLE_LEVELS == 4 | 2125 | #if CONFIG_PGTABLE_LEVELS == 4 |
2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), | 2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), |
2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), | 2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
2128 | .set_pgd = xen_set_pgd_hyper, | 2128 | .set_pgd = xen_set_pgd_hyper, |
2129 | 2129 | ||
2130 | .alloc_pud = xen_alloc_pmd_init, | 2130 | .alloc_pud = xen_alloc_pmd_init, |
2131 | .release_pud = xen_release_pmd_init, | 2131 | .release_pud = xen_release_pmd_init, |
2132 | #endif /* PAGETABLE_LEVELS == 4 */ | 2132 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
2133 | 2133 | ||
2134 | .activate_mm = xen_activate_mm, | 2134 | .activate_mm = xen_activate_mm, |
2135 | .dup_mmap = xen_dup_mmap, | 2135 | .dup_mmap = xen_dup_mmap, |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index af9c911cd6b5..2804aed3f416 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -219,6 +219,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn) | |||
219 | /* | 219 | /* |
220 | * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is | 220 | * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is |
221 | * OK to have direct references to sparsemem variables in here. | 221 | * OK to have direct references to sparsemem variables in here. |
222 | * Must already be protected by mem_hotplug_begin(). | ||
222 | */ | 223 | */ |
223 | static int | 224 | static int |
224 | memory_block_action(unsigned long phys_index, unsigned long action, int online_type) | 225 | memory_block_action(unsigned long phys_index, unsigned long action, int online_type) |
@@ -228,7 +229,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t | |||
228 | struct page *first_page; | 229 | struct page *first_page; |
229 | int ret; | 230 | int ret; |
230 | 231 | ||
231 | start_pfn = phys_index << PFN_SECTION_SHIFT; | 232 | start_pfn = section_nr_to_pfn(phys_index); |
232 | first_page = pfn_to_page(start_pfn); | 233 | first_page = pfn_to_page(start_pfn); |
233 | 234 | ||
234 | switch (action) { | 235 | switch (action) { |
@@ -286,6 +287,7 @@ static int memory_subsys_online(struct device *dev) | |||
286 | if (mem->online_type < 0) | 287 | if (mem->online_type < 0) |
287 | mem->online_type = MMOP_ONLINE_KEEP; | 288 | mem->online_type = MMOP_ONLINE_KEEP; |
288 | 289 | ||
290 | /* Already under protection of mem_hotplug_begin() */ | ||
289 | ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); | 291 | ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); |
290 | 292 | ||
291 | /* clear online_type */ | 293 | /* clear online_type */ |
@@ -328,17 +330,19 @@ store_mem_state(struct device *dev, | |||
328 | goto err; | 330 | goto err; |
329 | } | 331 | } |
330 | 332 | ||
333 | /* | ||
334 | * Memory hotplug needs to hold mem_hotplug_begin() for probe to find | ||
335 | * the correct memory block to online before doing device_online(dev), | ||
336 | * which will take dev->mutex. Take the lock early to prevent an | ||
337 | * inversion, memory_subsys_online() callbacks will be implemented by | ||
338 | * assuming it's already protected. | ||
339 | */ | ||
340 | mem_hotplug_begin(); | ||
341 | |||
331 | switch (online_type) { | 342 | switch (online_type) { |
332 | case MMOP_ONLINE_KERNEL: | 343 | case MMOP_ONLINE_KERNEL: |
333 | case MMOP_ONLINE_MOVABLE: | 344 | case MMOP_ONLINE_MOVABLE: |
334 | case MMOP_ONLINE_KEEP: | 345 | case MMOP_ONLINE_KEEP: |
335 | /* | ||
336 | * mem->online_type is not protected so there can be a | ||
337 | * race here. However, when racing online, the first | ||
338 | * will succeed and the second will just return as the | ||
339 | * block will already be online. The online type | ||
340 | * could be either one, but that is expected. | ||
341 | */ | ||
342 | mem->online_type = online_type; | 346 | mem->online_type = online_type; |
343 | ret = device_online(&mem->dev); | 347 | ret = device_online(&mem->dev); |
344 | break; | 348 | break; |
@@ -349,6 +353,7 @@ store_mem_state(struct device *dev, | |||
349 | ret = -EINVAL; /* should never happen */ | 353 | ret = -EINVAL; /* should never happen */ |
350 | } | 354 | } |
351 | 355 | ||
356 | mem_hotplug_done(); | ||
352 | err: | 357 | err: |
353 | unlock_device_hotplug(); | 358 | unlock_device_hotplug(); |
354 | 359 | ||
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 2c5d4567d1da..acde3f5d6e9e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c | |||
@@ -738,11 +738,11 @@ static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act) | |||
738 | return ZFCP_ERP_FAILED; | 738 | return ZFCP_ERP_FAILED; |
739 | 739 | ||
740 | if (mempool_resize(act->adapter->pool.sr_data, | 740 | if (mempool_resize(act->adapter->pool.sr_data, |
741 | act->adapter->stat_read_buf_num, GFP_KERNEL)) | 741 | act->adapter->stat_read_buf_num)) |
742 | return ZFCP_ERP_FAILED; | 742 | return ZFCP_ERP_FAILED; |
743 | 743 | ||
744 | if (mempool_resize(act->adapter->pool.status_read_req, | 744 | if (mempool_resize(act->adapter->pool.status_read_req, |
745 | act->adapter->stat_read_buf_num, GFP_KERNEL)) | 745 | act->adapter->stat_read_buf_num)) |
746 | return ZFCP_ERP_FAILED; | 746 | return ZFCP_ERP_FAILED; |
747 | 747 | ||
748 | atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num); | 748 | atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num); |
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h index a260e99a4447..d72605864b0a 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h | |||
@@ -55,7 +55,9 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
55 | if (PagePrivate(page)) | 55 | if (PagePrivate(page)) |
56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | 56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
57 | 57 | ||
58 | cancel_dirty_page(page, PAGE_SIZE); | 58 | if (TestClearPageDirty(page)) |
59 | account_page_cleaned(page, mapping); | ||
60 | |||
59 | ClearPageMappedToDisk(page); | 61 | ClearPageMappedToDisk(page); |
60 | ll_delete_from_page_cache(page); | 62 | ll_delete_from_page_cache(page); |
61 | } | 63 | } |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 8a65423bc696..c4211a31612d 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c | |||
@@ -397,13 +397,15 @@ static int __init xen_tmem_init(void) | |||
397 | #ifdef CONFIG_CLEANCACHE | 397 | #ifdef CONFIG_CLEANCACHE |
398 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | 398 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); |
399 | if (tmem_enabled && cleancache) { | 399 | if (tmem_enabled && cleancache) { |
400 | char *s = ""; | 400 | int err; |
401 | struct cleancache_ops *old_ops = | 401 | |
402 | cleancache_register_ops(&tmem_cleancache_ops); | 402 | err = cleancache_register_ops(&tmem_cleancache_ops); |
403 | if (old_ops) | 403 | if (err) |
404 | s = " (WARNING: cleancache_ops overridden)"; | 404 | pr_warn("xen-tmem: failed to enable cleancache: %d\n", |
405 | pr_info("cleancache enabled, RAM provided by Xen Transcendent Memory%s\n", | 405 | err); |
406 | s); | 406 | else |
407 | pr_info("cleancache enabled, RAM provided by " | ||
408 | "Xen Transcendent Memory\n"); | ||
407 | } | 409 | } |
408 | #endif | 410 | #endif |
409 | #ifdef CONFIG_XEN_SELFBALLOONING | 411 | #ifdef CONFIG_XEN_SELFBALLOONING |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF | |||
27 | bool | 27 | bool |
28 | depends on COMPAT && BINFMT_ELF | 28 | depends on COMPAT && BINFMT_ELF |
29 | 29 | ||
30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
31 | bool | ||
32 | |||
33 | config ARCH_BINFMT_ELF_STATE | 30 | config ARCH_BINFMT_ELF_STATE |
34 | bool | 31 | bool |
35 | 32 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/random.h> | 32 | #include <linux/random.h> |
33 | #include <linux/elf.h> | 33 | #include <linux/elf.h> |
34 | #include <linux/elf-randomize.h> | ||
34 | #include <linux/utsname.h> | 35 | #include <linux/utsname.h> |
35 | #include <linux/coredump.h> | 36 | #include <linux/coredump.h> |
36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
862 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 863 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
863 | int elf_prot = 0, elf_flags; | 864 | int elf_prot = 0, elf_flags; |
864 | unsigned long k, vaddr; | 865 | unsigned long k, vaddr; |
866 | unsigned long total_size = 0; | ||
865 | 867 | ||
866 | if (elf_ppnt->p_type != PT_LOAD) | 868 | if (elf_ppnt->p_type != PT_LOAD) |
867 | continue; | 869 | continue; |
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
909 | * default mmap base, as well as whatever program they | 911 | * default mmap base, as well as whatever program they |
910 | * might try to exec. This is because the brk will | 912 | * might try to exec. This is because the brk will |
911 | * follow the loader, and is not movable. */ | 913 | * follow the loader, and is not movable. */ |
912 | #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE | 914 | load_bias = ELF_ET_DYN_BASE - vaddr; |
913 | /* Memory randomization might have been switched off | ||
914 | * in runtime via sysctl or explicit setting of | ||
915 | * personality flags. | ||
916 | * If that is the case, retain the original non-zero | ||
917 | * load_bias value in order to establish proper | ||
918 | * non-randomized mappings. | ||
919 | */ | ||
920 | if (current->flags & PF_RANDOMIZE) | 915 | if (current->flags & PF_RANDOMIZE) |
921 | load_bias = 0; | 916 | load_bias += arch_mmap_rnd(); |
922 | else | 917 | load_bias = ELF_PAGESTART(load_bias); |
923 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 918 | total_size = total_mapping_size(elf_phdata, |
924 | #else | 919 | loc->elf_ex.e_phnum); |
925 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 920 | if (!total_size) { |
926 | #endif | 921 | error = -EINVAL; |
922 | goto out_free_dentry; | ||
923 | } | ||
927 | } | 924 | } |
928 | 925 | ||
929 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 926 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
930 | elf_prot, elf_flags, 0); | 927 | elf_prot, elf_flags, total_size); |
931 | if (BAD_ADDR(error)) { | 928 | if (BAD_ADDR(error)) { |
932 | retval = IS_ERR((void *)error) ? | 929 | retval = IS_ERR((void *)error) ? |
933 | PTR_ERR((void*)error) : -EINVAL; | 930 | PTR_ERR((void*)error) : -EINVAL; |
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
1053 | current->mm->end_data = end_data; | 1050 | current->mm->end_data = end_data; |
1054 | current->mm->start_stack = bprm->p; | 1051 | current->mm->start_stack = bprm->p; |
1055 | 1052 | ||
1056 | #ifdef arch_randomize_brk | ||
1057 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { | 1053 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { |
1058 | current->mm->brk = current->mm->start_brk = | 1054 | current->mm->brk = current->mm->start_brk = |
1059 | arch_randomize_brk(current->mm); | 1055 | arch_randomize_brk(current->mm); |
1060 | #ifdef CONFIG_COMPAT_BRK | 1056 | #ifdef compat_brk_randomized |
1061 | current->brk_randomized = 1; | 1057 | current->brk_randomized = 1; |
1062 | #endif | 1058 | #endif |
1063 | } | 1059 | } |
1064 | #endif | ||
1065 | 1060 | ||
1066 | if (current->personality & MMAP_PAGE_ZERO) { | 1061 | if (current->personality & MMAP_PAGE_ZERO) { |
1067 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, | 1062 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, |
diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) | |||
3243 | * to synchronise against __set_page_dirty_buffers and prevent the | 3243 | * to synchronise against __set_page_dirty_buffers and prevent the |
3244 | * dirty bit from being lost. | 3244 | * dirty bit from being lost. |
3245 | */ | 3245 | */ |
3246 | if (ret) | 3246 | if (ret && TestClearPageDirty(page)) |
3247 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 3247 | account_page_cleaned(page, mapping); |
3248 | spin_unlock(&mapping->private_lock); | 3248 | spin_unlock(&mapping->private_lock); |
3249 | out: | 3249 | out: |
3250 | if (buffers_to_free) { | 3250 | if (buffers_to_free) { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 480cf9c81d50..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
773 | 773 | ||
774 | length = atomic_dec_return(&tcpSesAllocCount); | 774 | length = atomic_dec_return(&tcpSesAllocCount); |
775 | if (length > 0) | 775 | if (length > 0) |
776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
777 | GFP_KERNEL); | ||
778 | } | 777 | } |
779 | 778 | ||
780 | static int | 779 | static int |
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) | |||
848 | 847 | ||
849 | length = atomic_inc_return(&tcpSesAllocCount); | 848 | length = atomic_inc_return(&tcpSesAllocCount); |
850 | if (length > 1) | 849 | if (length > 1) |
851 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 850 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
852 | GFP_KERNEL); | ||
853 | 851 | ||
854 | set_freezable(); | 852 | set_freezable(); |
855 | while (server->tcpStatus != CifsExiting) { | 853 | while (server->tcpStatus != CifsExiting) { |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, | |||
319 | 319 | ||
320 | static void truncate_huge_page(struct page *page) | 320 | static void truncate_huge_page(struct page *page) |
321 | { | 321 | { |
322 | cancel_dirty_page(page, /* No IO accounting for huge pages? */0); | 322 | ClearPageDirty(page); |
323 | ClearPageUptodate(page); | 323 | ClearPageUptodate(page); |
324 | delete_from_page_cache(page); | 324 | delete_from_page_cache(page); |
325 | } | 325 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1876 | * request from the inode / page_private pointer and | 1876 | * request from the inode / page_private pointer and |
1877 | * release it */ | 1877 | * release it */ |
1878 | nfs_inode_remove_request(req); | 1878 | nfs_inode_remove_request(req); |
1879 | /* | ||
1880 | * In case nfs_inode_remove_request has marked the | ||
1881 | * page as being dirty | ||
1882 | */ | ||
1883 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
1884 | nfs_unlock_and_release_request(req); | 1879 | nfs_unlock_and_release_request(req); |
1885 | } | 1880 | } |
1886 | 1881 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); | 3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); |
3371 | if (ret) { | 3371 | if (ret) { |
3372 | mlog_errno(ret); | 3372 | mlog_errno(ret); |
3373 | goto out; | 3373 | return ret; |
3374 | } | 3374 | } |
3375 | 3375 | ||
3376 | right_el = path_leaf_el(right_path); | 3376 | right_el = path_leaf_el(right_path); |
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
3453 | subtree_index); | 3453 | subtree_index); |
3454 | } | 3454 | } |
3455 | out: | 3455 | out: |
3456 | if (right_path) | 3456 | ocfs2_free_path(right_path); |
3457 | ocfs2_free_path(right_path); | ||
3458 | return ret; | 3457 | return ret; |
3459 | } | 3458 | } |
3460 | 3459 | ||
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
3536 | ret = ocfs2_get_left_path(et, right_path, &left_path); | 3535 | ret = ocfs2_get_left_path(et, right_path, &left_path); |
3537 | if (ret) { | 3536 | if (ret) { |
3538 | mlog_errno(ret); | 3537 | mlog_errno(ret); |
3539 | goto out; | 3538 | return ret; |
3540 | } | 3539 | } |
3541 | 3540 | ||
3542 | left_el = path_leaf_el(left_path); | 3541 | left_el = path_leaf_el(left_path); |
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
3647 | right_path, subtree_index); | 3646 | right_path, subtree_index); |
3648 | } | 3647 | } |
3649 | out: | 3648 | out: |
3650 | if (left_path) | 3649 | ocfs2_free_path(left_path); |
3651 | ocfs2_free_path(left_path); | ||
3652 | return ret; | 3650 | return ret; |
3653 | } | 3651 | } |
3654 | 3652 | ||
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
4334 | } else if (path->p_tree_depth > 0) { | 4332 | } else if (path->p_tree_depth > 0) { |
4335 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); | 4333 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); |
4336 | if (status) | 4334 | if (status) |
4337 | goto out; | 4335 | goto exit; |
4338 | 4336 | ||
4339 | if (left_cpos != 0) { | 4337 | if (left_cpos != 0) { |
4340 | left_path = ocfs2_new_path_from_path(path); | 4338 | left_path = ocfs2_new_path_from_path(path); |
4341 | if (!left_path) | 4339 | if (!left_path) |
4342 | goto out; | 4340 | goto exit; |
4343 | 4341 | ||
4344 | status = ocfs2_find_path(et->et_ci, left_path, | 4342 | status = ocfs2_find_path(et->et_ci, left_path, |
4345 | left_cpos); | 4343 | left_cpos); |
4346 | if (status) | 4344 | if (status) |
4347 | goto out; | 4345 | goto free_left_path; |
4348 | 4346 | ||
4349 | new_el = path_leaf_el(left_path); | 4347 | new_el = path_leaf_el(left_path); |
4350 | 4348 | ||
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
4361 | le16_to_cpu(new_el->l_next_free_rec), | 4359 | le16_to_cpu(new_el->l_next_free_rec), |
4362 | le16_to_cpu(new_el->l_count)); | 4360 | le16_to_cpu(new_el->l_count)); |
4363 | status = -EINVAL; | 4361 | status = -EINVAL; |
4364 | goto out; | 4362 | goto free_left_path; |
4365 | } | 4363 | } |
4366 | rec = &new_el->l_recs[ | 4364 | rec = &new_el->l_recs[ |
4367 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | 4365 | le16_to_cpu(new_el->l_next_free_rec) - 1]; |
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
4388 | path->p_tree_depth > 0) { | 4386 | path->p_tree_depth > 0) { |
4389 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); | 4387 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); |
4390 | if (status) | 4388 | if (status) |
4391 | goto out; | 4389 | goto free_left_path; |
4392 | 4390 | ||
4393 | if (right_cpos == 0) | 4391 | if (right_cpos == 0) |
4394 | goto out; | 4392 | goto free_left_path; |
4395 | 4393 | ||
4396 | right_path = ocfs2_new_path_from_path(path); | 4394 | right_path = ocfs2_new_path_from_path(path); |
4397 | if (!right_path) | 4395 | if (!right_path) |
4398 | goto out; | 4396 | goto free_left_path; |
4399 | 4397 | ||
4400 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); | 4398 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
4401 | if (status) | 4399 | if (status) |
4402 | goto out; | 4400 | goto free_right_path; |
4403 | 4401 | ||
4404 | new_el = path_leaf_el(right_path); | 4402 | new_el = path_leaf_el(right_path); |
4405 | rec = &new_el->l_recs[0]; | 4403 | rec = &new_el->l_recs[0]; |
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
4413 | (unsigned long long)le64_to_cpu(eb->h_blkno), | 4411 | (unsigned long long)le64_to_cpu(eb->h_blkno), |
4414 | le16_to_cpu(new_el->l_next_free_rec)); | 4412 | le16_to_cpu(new_el->l_next_free_rec)); |
4415 | status = -EINVAL; | 4413 | status = -EINVAL; |
4416 | goto out; | 4414 | goto free_right_path; |
4417 | } | 4415 | } |
4418 | rec = &new_el->l_recs[1]; | 4416 | rec = &new_el->l_recs[1]; |
4419 | } | 4417 | } |
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
4430 | ret = contig_type; | 4428 | ret = contig_type; |
4431 | } | 4429 | } |
4432 | 4430 | ||
4433 | out: | 4431 | free_right_path: |
4434 | if (left_path) | 4432 | ocfs2_free_path(right_path); |
4435 | ocfs2_free_path(left_path); | 4433 | free_left_path: |
4436 | if (right_path) | 4434 | ocfs2_free_path(left_path); |
4437 | ocfs2_free_path(right_path); | 4435 | exit: |
4438 | |||
4439 | return ret; | 4436 | return ret; |
4440 | } | 4437 | } |
4441 | 4438 | ||
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6858 | if (pages == NULL) { | 6855 | if (pages == NULL) { |
6859 | ret = -ENOMEM; | 6856 | ret = -ENOMEM; |
6860 | mlog_errno(ret); | 6857 | mlog_errno(ret); |
6861 | goto out; | 6858 | return ret; |
6862 | } | 6859 | } |
6863 | 6860 | ||
6864 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | 6861 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); |
6865 | if (ret) { | 6862 | if (ret) { |
6866 | mlog_errno(ret); | 6863 | mlog_errno(ret); |
6867 | goto out; | 6864 | goto free_pages; |
6868 | } | 6865 | } |
6869 | } | 6866 | } |
6870 | 6867 | ||
@@ -6996,9 +6993,8 @@ out_commit: | |||
6996 | out: | 6993 | out: |
6997 | if (data_ac) | 6994 | if (data_ac) |
6998 | ocfs2_free_alloc_context(data_ac); | 6995 | ocfs2_free_alloc_context(data_ac); |
6999 | if (pages) | 6996 | free_pages: |
7000 | kfree(pages); | 6997 | kfree(pages); |
7001 | |||
7002 | return ret; | 6998 | return ret; |
7003 | } | 6999 | } |
7004 | 7000 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e1bf18c5d25e..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -664,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, | |||
664 | return 0; | 664 | return 0; |
665 | } | 665 | } |
666 | 666 | ||
667 | static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, | ||
668 | struct inode *inode, loff_t offset, | ||
669 | u64 zero_len, int cluster_align) | ||
670 | { | ||
671 | u32 p_cpos = 0; | ||
672 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
673 | unsigned int num_clusters = 0; | ||
674 | unsigned int ext_flags = 0; | ||
675 | int ret = 0; | ||
676 | |||
677 | if (offset <= i_size_read(inode) || cluster_align) | ||
678 | return 0; | ||
679 | |||
680 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
681 | &ext_flags); | ||
682 | if (ret < 0) { | ||
683 | mlog_errno(ret); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
688 | u64 s = i_size_read(inode); | ||
689 | sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + | ||
690 | (do_div(s, osb->s_clustersize) >> 9); | ||
691 | |||
692 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, | ||
693 | zero_len >> 9, GFP_NOFS, false); | ||
694 | if (ret < 0) | ||
695 | mlog_errno(ret); | ||
696 | } | ||
697 | |||
698 | return ret; | ||
699 | } | ||
700 | |||
701 | static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, | ||
702 | struct inode *inode, loff_t offset) | ||
703 | { | ||
704 | u64 zero_start, zero_len, total_zero_len; | ||
705 | u32 p_cpos = 0, clusters_to_add; | ||
706 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
707 | unsigned int num_clusters = 0; | ||
708 | unsigned int ext_flags = 0; | ||
709 | u32 size_div, offset_div; | ||
710 | int ret = 0; | ||
711 | |||
712 | { | ||
713 | u64 o = offset; | ||
714 | u64 s = i_size_read(inode); | ||
715 | |||
716 | offset_div = do_div(o, osb->s_clustersize); | ||
717 | size_div = do_div(s, osb->s_clustersize); | ||
718 | } | ||
719 | |||
720 | if (offset <= i_size_read(inode)) | ||
721 | return 0; | ||
722 | |||
723 | clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - | ||
724 | ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); | ||
725 | total_zero_len = offset - i_size_read(inode); | ||
726 | if (clusters_to_add) | ||
727 | total_zero_len -= offset_div; | ||
728 | |||
729 | /* Allocate clusters to fill out holes, and this is only needed | ||
730 | * when we add more than one clusters. Otherwise the cluster will | ||
731 | * be allocated during direct IO */ | ||
732 | if (clusters_to_add > 1) { | ||
733 | ret = ocfs2_extend_allocation(inode, | ||
734 | OCFS2_I(inode)->ip_clusters, | ||
735 | clusters_to_add - 1, 0); | ||
736 | if (ret) { | ||
737 | mlog_errno(ret); | ||
738 | goto out; | ||
739 | } | ||
740 | } | ||
741 | |||
742 | while (total_zero_len) { | ||
743 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
744 | &ext_flags); | ||
745 | if (ret < 0) { | ||
746 | mlog_errno(ret); | ||
747 | goto out; | ||
748 | } | ||
749 | |||
750 | zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + | ||
751 | size_div; | ||
752 | zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - | ||
753 | size_div; | ||
754 | zero_len = min(total_zero_len, zero_len); | ||
755 | |||
756 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
757 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
758 | zero_start >> 9, zero_len >> 9, | ||
759 | GFP_NOFS, false); | ||
760 | if (ret < 0) { | ||
761 | mlog_errno(ret); | ||
762 | goto out; | ||
763 | } | ||
764 | } | ||
765 | |||
766 | total_zero_len -= zero_len; | ||
767 | v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); | ||
768 | |||
769 | /* Only at first iteration can be cluster not aligned. | ||
770 | * So set size_div to 0 for the rest */ | ||
771 | size_div = 0; | ||
772 | } | ||
773 | |||
774 | out: | ||
775 | return ret; | ||
776 | } | ||
777 | |||
667 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | 778 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, |
668 | struct iov_iter *iter, | 779 | struct iov_iter *iter, |
669 | loff_t offset) | 780 | loff_t offset) |
@@ -678,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
678 | struct buffer_head *di_bh = NULL; | 789 | struct buffer_head *di_bh = NULL; |
679 | size_t count = iter->count; | 790 | size_t count = iter->count; |
680 | journal_t *journal = osb->journal->j_journal; | 791 | journal_t *journal = osb->journal->j_journal; |
681 | u32 zero_len; | 792 | u64 zero_len_head, zero_len_tail; |
682 | int cluster_align; | 793 | int cluster_align_head, cluster_align_tail; |
683 | loff_t final_size = offset + count; | 794 | loff_t final_size = offset + count; |
684 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | 795 | int append_write = offset >= i_size_read(inode) ? 1 : 0; |
685 | unsigned int num_clusters = 0; | 796 | unsigned int num_clusters = 0; |
@@ -687,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
687 | 798 | ||
688 | { | 799 | { |
689 | u64 o = offset; | 800 | u64 o = offset; |
801 | u64 s = i_size_read(inode); | ||
802 | |||
803 | zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); | ||
804 | cluster_align_head = !zero_len_head; | ||
690 | 805 | ||
691 | zero_len = do_div(o, 1 << osb->s_clustersize_bits); | 806 | zero_len_tail = osb->s_clustersize - |
692 | cluster_align = !zero_len; | 807 | do_div(s, osb->s_clustersize); |
808 | if ((offset - i_size_read(inode)) < zero_len_tail) | ||
809 | zero_len_tail = offset - i_size_read(inode); | ||
810 | cluster_align_tail = !zero_len_tail; | ||
693 | } | 811 | } |
694 | 812 | ||
695 | /* | 813 | /* |
@@ -707,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
707 | } | 825 | } |
708 | 826 | ||
709 | if (append_write) { | 827 | if (append_write) { |
710 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 828 | ret = ocfs2_inode_lock(inode, NULL, 1); |
711 | if (ret < 0) { | 829 | if (ret < 0) { |
712 | mlog_errno(ret); | 830 | mlog_errno(ret); |
713 | goto clean_orphan; | 831 | goto clean_orphan; |
714 | } | 832 | } |
715 | 833 | ||
834 | /* zeroing out the previously allocated cluster tail | ||
835 | * that but not zeroed */ | ||
716 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 836 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
717 | ret = ocfs2_zero_extend(inode, di_bh, offset); | 837 | ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, |
838 | zero_len_tail, cluster_align_tail); | ||
718 | else | 839 | else |
719 | ret = ocfs2_extend_no_holes(inode, di_bh, offset, | 840 | ret = ocfs2_direct_IO_extend_no_holes(osb, inode, |
720 | offset); | 841 | offset); |
721 | if (ret < 0) { | 842 | if (ret < 0) { |
722 | mlog_errno(ret); | 843 | mlog_errno(ret); |
723 | ocfs2_inode_unlock(inode, 1); | 844 | ocfs2_inode_unlock(inode, 1); |
724 | brelse(di_bh); | ||
725 | goto clean_orphan; | 845 | goto clean_orphan; |
726 | } | 846 | } |
727 | 847 | ||
@@ -729,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
729 | if (is_overwrite < 0) { | 849 | if (is_overwrite < 0) { |
730 | mlog_errno(is_overwrite); | 850 | mlog_errno(is_overwrite); |
731 | ocfs2_inode_unlock(inode, 1); | 851 | ocfs2_inode_unlock(inode, 1); |
732 | brelse(di_bh); | ||
733 | goto clean_orphan; | 852 | goto clean_orphan; |
734 | } | 853 | } |
735 | 854 | ||
736 | ocfs2_inode_unlock(inode, 1); | 855 | ocfs2_inode_unlock(inode, 1); |
737 | brelse(di_bh); | ||
738 | di_bh = NULL; | ||
739 | } | 856 | } |
740 | 857 | ||
741 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, | 858 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, |
@@ -772,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
772 | if (ret < 0) | 889 | if (ret < 0) |
773 | mlog_errno(ret); | 890 | mlog_errno(ret); |
774 | } | 891 | } |
775 | } else if (written < 0 && append_write && !is_overwrite && | 892 | } else if (written > 0 && append_write && !is_overwrite && |
776 | !cluster_align) { | 893 | !cluster_align_head) { |
894 | /* zeroing out the allocated cluster head */ | ||
777 | u32 p_cpos = 0; | 895 | u32 p_cpos = 0; |
778 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | 896 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); |
779 | 897 | ||
898 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
899 | if (ret < 0) { | ||
900 | mlog_errno(ret); | ||
901 | goto clean_orphan; | ||
902 | } | ||
903 | |||
780 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | 904 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, |
781 | &num_clusters, &ext_flags); | 905 | &num_clusters, &ext_flags); |
782 | if (ret < 0) { | 906 | if (ret < 0) { |
783 | mlog_errno(ret); | 907 | mlog_errno(ret); |
908 | ocfs2_inode_unlock(inode, 0); | ||
784 | goto clean_orphan; | 909 | goto clean_orphan; |
785 | } | 910 | } |
786 | 911 | ||
@@ -788,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
788 | 913 | ||
789 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | 914 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, |
790 | p_cpos << (osb->s_clustersize_bits - 9), | 915 | p_cpos << (osb->s_clustersize_bits - 9), |
791 | zero_len >> 9, GFP_KERNEL, false); | 916 | zero_len_head >> 9, GFP_NOFS, false); |
792 | if (ret < 0) | 917 | if (ret < 0) |
793 | mlog_errno(ret); | 918 | mlog_errno(ret); |
919 | |||
920 | ocfs2_inode_unlock(inode, 0); | ||
794 | } | 921 | } |
795 | 922 | ||
796 | clean_orphan: | 923 | clean_orphan: |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) | |||
1312 | int ret = -ENOMEM; | 1312 | int ret = -ENOMEM; |
1313 | 1313 | ||
1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); |
1315 | if (!o2hb_debug_dir) { | 1315 | if (IS_ERR_OR_NULL(o2hb_debug_dir)) { |
1316 | ret = o2hb_debug_dir ? | ||
1317 | PTR_ERR(o2hb_debug_dir) : -ENOMEM; | ||
1316 | mlog_errno(ret); | 1318 | mlog_errno(ret); |
1317 | goto bail; | 1319 | goto bail; |
1318 | } | 1320 | } |
@@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) | |||
1325 | sizeof(o2hb_live_node_bitmap), | 1327 | sizeof(o2hb_live_node_bitmap), |
1326 | O2NM_MAX_NODES, | 1328 | O2NM_MAX_NODES, |
1327 | o2hb_live_node_bitmap); | 1329 | o2hb_live_node_bitmap); |
1328 | if (!o2hb_debug_livenodes) { | 1330 | if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { |
1331 | ret = o2hb_debug_livenodes ? | ||
1332 | PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; | ||
1329 | mlog_errno(ret); | 1333 | mlog_errno(ret); |
1330 | goto bail; | 1334 | goto bail; |
1331 | } | 1335 | } |
@@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) | |||
1338 | sizeof(o2hb_live_region_bitmap), | 1342 | sizeof(o2hb_live_region_bitmap), |
1339 | O2NM_MAX_REGIONS, | 1343 | O2NM_MAX_REGIONS, |
1340 | o2hb_live_region_bitmap); | 1344 | o2hb_live_region_bitmap); |
1341 | if (!o2hb_debug_liveregions) { | 1345 | if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { |
1346 | ret = o2hb_debug_liveregions ? | ||
1347 | PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; | ||
1342 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
1343 | goto bail; | 1349 | goto bail; |
1344 | } | 1350 | } |
@@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) | |||
1352 | sizeof(o2hb_quorum_region_bitmap), | 1358 | sizeof(o2hb_quorum_region_bitmap), |
1353 | O2NM_MAX_REGIONS, | 1359 | O2NM_MAX_REGIONS, |
1354 | o2hb_quorum_region_bitmap); | 1360 | o2hb_quorum_region_bitmap); |
1355 | if (!o2hb_debug_quorumregions) { | 1361 | if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { |
1362 | ret = o2hb_debug_quorumregions ? | ||
1363 | PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; | ||
1356 | mlog_errno(ret); | 1364 | mlog_errno(ret); |
1357 | goto bail; | 1365 | goto bail; |
1358 | } | 1366 | } |
@@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) | |||
1366 | sizeof(o2hb_failed_region_bitmap), | 1374 | sizeof(o2hb_failed_region_bitmap), |
1367 | O2NM_MAX_REGIONS, | 1375 | O2NM_MAX_REGIONS, |
1368 | o2hb_failed_region_bitmap); | 1376 | o2hb_failed_region_bitmap); |
1369 | if (!o2hb_debug_failedregions) { | 1377 | if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { |
1378 | ret = o2hb_debug_failedregions ? | ||
1379 | PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; | ||
1370 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
1371 | goto bail; | 1381 | goto bail; |
1372 | } | 1382 | } |
@@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2000 | 2010 | ||
2001 | reg->hr_debug_dir = | 2011 | reg->hr_debug_dir = |
2002 | debugfs_create_dir(config_item_name(®->hr_item), dir); | 2012 | debugfs_create_dir(config_item_name(®->hr_item), dir); |
2003 | if (!reg->hr_debug_dir) { | 2013 | if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { |
2014 | ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; | ||
2004 | mlog_errno(ret); | 2015 | mlog_errno(ret); |
2005 | goto bail; | 2016 | goto bail; |
2006 | } | 2017 | } |
@@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2013 | O2HB_DB_TYPE_REGION_LIVENODES, | 2024 | O2HB_DB_TYPE_REGION_LIVENODES, |
2014 | sizeof(reg->hr_live_node_bitmap), | 2025 | sizeof(reg->hr_live_node_bitmap), |
2015 | O2NM_MAX_NODES, reg); | 2026 | O2NM_MAX_NODES, reg); |
2016 | if (!reg->hr_debug_livenodes) { | 2027 | if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { |
2028 | ret = reg->hr_debug_livenodes ? | ||
2029 | PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; | ||
2017 | mlog_errno(ret); | 2030 | mlog_errno(ret); |
2018 | goto bail; | 2031 | goto bail; |
2019 | } | 2032 | } |
@@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2025 | sizeof(*(reg->hr_db_regnum)), | 2038 | sizeof(*(reg->hr_db_regnum)), |
2026 | O2HB_DB_TYPE_REGION_NUMBER, | 2039 | O2HB_DB_TYPE_REGION_NUMBER, |
2027 | 0, O2NM_MAX_NODES, reg); | 2040 | 0, O2NM_MAX_NODES, reg); |
2028 | if (!reg->hr_debug_regnum) { | 2041 | if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { |
2042 | ret = reg->hr_debug_regnum ? | ||
2043 | PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; | ||
2029 | mlog_errno(ret); | 2044 | mlog_errno(ret); |
2030 | goto bail; | 2045 | goto bail; |
2031 | } | 2046 | } |
@@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2037 | sizeof(*(reg->hr_db_elapsed_time)), | 2052 | sizeof(*(reg->hr_db_elapsed_time)), |
2038 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | 2053 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, |
2039 | 0, 0, reg); | 2054 | 0, 0, reg); |
2040 | if (!reg->hr_debug_elapsed_time) { | 2055 | if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { |
2056 | ret = reg->hr_debug_elapsed_time ? | ||
2057 | PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; | ||
2041 | mlog_errno(ret); | 2058 | mlog_errno(ret); |
2042 | goto bail; | 2059 | goto bail; |
2043 | } | 2060 | } |
@@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2049 | sizeof(*(reg->hr_db_pinned)), | 2066 | sizeof(*(reg->hr_db_pinned)), |
2050 | O2HB_DB_TYPE_REGION_PINNED, | 2067 | O2HB_DB_TYPE_REGION_PINNED, |
2051 | 0, 0, reg); | 2068 | 0, 0, reg); |
2052 | if (!reg->hr_debug_pinned) { | 2069 | if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { |
2070 | ret = reg->hr_debug_pinned ? | ||
2071 | PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; | ||
2053 | mlog_errno(ret); | 2072 | mlog_errno(ret); |
2054 | goto bail; | 2073 | goto bail; |
2055 | } | 2074 | } |
2056 | 2075 | ||
2057 | ret = 0; | 2076 | return 0; |
2058 | bail: | 2077 | bail: |
2078 | debugfs_remove_recursive(reg->hr_debug_dir); | ||
2059 | return ret; | 2079 | return ret; |
2060 | } | 2080 | } |
2061 | 2081 | ||
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; | |||
196 | } \ | 196 | } \ |
197 | } while (0) | 197 | } while (0) |
198 | 198 | ||
199 | #define mlog_errno(st) do { \ | 199 | #define mlog_errno(st) ({ \ |
200 | int _st = (st); \ | 200 | int _st = (st); \ |
201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ | 201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ |
202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ | 202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ |
203 | _st != -EDQUOT) \ | 203 | _st != -EDQUOT) \ |
204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ | 204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ |
205 | } while (0) | 205 | _st; \ |
206 | }) | ||
206 | 207 | ||
207 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ | 208 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ |
208 | if (cond) { \ | 209 | if (cond) { \ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -18,7 +18,7 @@ | |||
18 | * | 18 | * |
19 | * linux/fs/minix/dir.c | 19 | * linux/fs/minix/dir.c |
20 | * | 20 | * |
21 | * Copyright (C) 1991, 1992 Linux Torvalds | 21 | * Copyright (C) 1991, 1992 Linus Torvalds |
22 | * | 22 | * |
23 | * This program is free software; you can redistribute it and/or | 23 | * This program is free software; you can redistribute it and/or |
24 | * modify it under the terms of the GNU General Public | 24 | * modify it under the terms of the GNU General Public |
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
2047 | const char *name, | 2047 | const char *name, |
2048 | int namelen) | 2048 | int namelen) |
2049 | { | 2049 | { |
2050 | int ret; | 2050 | int ret = 0; |
2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
2052 | 2052 | ||
2053 | trace_ocfs2_check_dir_for_entry( | 2053 | trace_ocfs2_check_dir_for_entry( |
2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
2055 | 2055 | ||
2056 | ret = -EEXIST; | 2056 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { |
2057 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) | 2057 | ret = -EEXIST; |
2058 | goto bail; | 2058 | mlog_errno(ret); |
2059 | } | ||
2059 | 2060 | ||
2060 | ret = 0; | ||
2061 | bail: | ||
2062 | ocfs2_free_dir_lookup_result(&lookup); | 2061 | ocfs2_free_dir_lookup_result(&lookup); |
2063 | 2062 | ||
2064 | if (ret) | ||
2065 | mlog_errno(ret); | ||
2066 | return ret; | 2063 | return ret; |
2067 | } | 2064 | } |
2068 | 2065 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
1391 | int noqueue_attempted = 0; | 1391 | int noqueue_attempted = 0; |
1392 | int dlm_locked = 0; | 1392 | int dlm_locked = 0; |
1393 | 1393 | ||
1394 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { | ||
1395 | mlog_errno(-EINVAL); | ||
1396 | return -EINVAL; | ||
1397 | } | ||
1398 | |||
1394 | ocfs2_init_mask_waiter(&mw); | 1399 | ocfs2_init_mask_waiter(&mw); |
1395 | 1400 | ||
1396 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1401 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
@@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | |||
2954 | osb->osb_debug_root, | 2959 | osb->osb_debug_root, |
2955 | osb, | 2960 | osb, |
2956 | &ocfs2_dlm_debug_fops); | 2961 | &ocfs2_dlm_debug_fops); |
2957 | if (!dlm_debug->d_locking_state) { | 2962 | if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { |
2958 | ret = -EINVAL; | 2963 | ret = -EINVAL; |
2959 | mlog(ML_ERROR, | 2964 | mlog(ML_ERROR, |
2960 | "Unable to create locking state debugfs file.\n"); | 2965 | "Unable to create locking state debugfs file.\n"); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | status = ocfs2_test_inode_bit(osb, blkno, &set); | 84 | status = ocfs2_test_inode_bit(osb, blkno, &set); |
85 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
86 | if (status < 0) { | 85 | if (status < 0) { |
87 | if (status == -EINVAL) { | 86 | if (status == -EINVAL) { |
88 | /* | 87 | /* |
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
96 | goto unlock_nfs_sync; | 95 | goto unlock_nfs_sync; |
97 | } | 96 | } |
98 | 97 | ||
98 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
99 | /* If the inode allocator bit is clear, this inode must be stale */ | 99 | /* If the inode allocator bit is clear, this inode must be stale */ |
100 | if (!set) { | 100 | if (!set) { |
101 | status = -ESTALE; | 101 | status = -ESTALE; |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, | 624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, |
625 | le16_to_cpu(di->i_suballoc_slot)); | 625 | le16_to_cpu(di->i_suballoc_slot)); |
626 | if (!inode_alloc_inode) { | 626 | if (!inode_alloc_inode) { |
627 | status = -EEXIST; | 627 | status = -ENOENT; |
628 | mlog_errno(status); | 628 | mlog_errno(status); |
629 | goto bail; | 629 | goto bail; |
630 | } | 630 | } |
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
742 | ORPHAN_DIR_SYSTEM_INODE, | 742 | ORPHAN_DIR_SYSTEM_INODE, |
743 | orphaned_slot); | 743 | orphaned_slot); |
744 | if (!orphan_dir_inode) { | 744 | if (!orphan_dir_inode) { |
745 | status = -EEXIST; | 745 | status = -ENOENT; |
746 | mlog_errno(status); | 746 | mlog_errno(status); |
747 | goto bail; | 747 | goto bail; |
748 | } | 748 | } |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != | 666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != |
667 | ocfs2_local_alloc_count_bits(alloc)) { | 667 | ocfs2_local_alloc_count_bits(alloc)) { |
668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " | 668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " |
669 | "%u free bits, but a count shows %u", | 669 | "%u used bits, but a count shows %u", |
670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), | 670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), |
671 | le32_to_cpu(alloc->id1.bitmap1.i_used), | 671 | le32_to_cpu(alloc->id1.bitmap1.i_used), |
672 | ocfs2_local_alloc_count_bits(alloc)); | 672 | ocfs2_local_alloc_count_bits(alloc)); |
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
839 | u32 *numbits, | 839 | u32 *numbits, |
840 | struct ocfs2_alloc_reservation *resv) | 840 | struct ocfs2_alloc_reservation *resv) |
841 | { | 841 | { |
842 | int numfound, bitoff, left, startoff, lastzero; | 842 | int numfound = 0, bitoff, left, startoff, lastzero; |
843 | int local_resv = 0; | 843 | int local_resv = 0; |
844 | struct ocfs2_alloc_reservation r; | 844 | struct ocfs2_alloc_reservation r; |
845 | void *bitmap = NULL; | 845 | void *bitmap = NULL; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2322 | 2322 | ||
2323 | trace_ocfs2_orphan_del( | 2323 | trace_ocfs2_orphan_del( |
2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
2325 | name, namelen); | 2325 | name, strlen(name)); |
2326 | 2326 | ||
2327 | /* find it's spot in the orphan directory */ | 2327 | /* find it's spot in the orphan directory */ |
2328 | status = ocfs2_find_entry(name, namelen, orphan_dir_inode, | 2328 | status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, |
2329 | &lookup); | 2329 | &lookup); |
2330 | if (status) { | 2330 | if (status) { |
2331 | mlog_errno(status); | 2331 | mlog_errno(status); |
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2808 | ORPHAN_DIR_SYSTEM_INODE, | 2808 | ORPHAN_DIR_SYSTEM_INODE, |
2809 | osb->slot_num); | 2809 | osb->slot_num); |
2810 | if (!orphan_dir_inode) { | 2810 | if (!orphan_dir_inode) { |
2811 | status = -EEXIST; | 2811 | status = -ENOENT; |
2812 | mlog_errno(status); | 2812 | mlog_errno(status); |
2813 | goto leave; | 2813 | goto leave; |
2814 | } | 2814 | } |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); | 4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); |
4277 | if (error) { | 4277 | if (error) { |
4278 | mlog_errno(error); | 4278 | mlog_errno(error); |
4279 | goto out; | 4279 | return error; |
4280 | } | 4280 | } |
4281 | 4281 | ||
4282 | error = ocfs2_create_inode_in_orphan(dir, mode, | 4282 | error = ocfs2_create_inode_in_orphan(dir, mode, |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
427 | if (!si) { | 427 | if (!si) { |
428 | status = -ENOMEM; | 428 | status = -ENOMEM; |
429 | mlog_errno(status); | 429 | mlog_errno(status); |
430 | goto bail; | 430 | return status; |
431 | } | 431 | } |
432 | 432 | ||
433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); | 433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
452 | 452 | ||
453 | osb->slot_info = (struct ocfs2_slot_info *)si; | 453 | osb->slot_info = (struct ocfs2_slot_info *)si; |
454 | bail: | 454 | bail: |
455 | if (status < 0 && si) | 455 | if (status < 0) |
456 | __ocfs2_free_slot_info(si); | 456 | __ocfs2_free_slot_info(si); |
457 | 457 | ||
458 | return status; | 458 | return status; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) | |||
295 | set_bit(node_num, netmap); | 295 | set_bit(node_num, netmap); |
296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) | 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) |
297 | return 0; | 297 | return 0; |
298 | if (i < O2CB_MAP_STABILIZE_COUNT) | 298 | if (i < O2CB_MAP_STABILIZE_COUNT - 1) |
299 | msleep(1000); | 299 | msleep(1000); |
300 | } | 300 | } |
301 | 301 | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
1004 | BUG_ON(conn == NULL); | 1004 | BUG_ON(conn == NULL); |
1005 | 1005 | ||
1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); |
1007 | if (!lc) { | 1007 | if (!lc) |
1008 | rc = -ENOMEM; | 1008 | return -ENOMEM; |
1009 | goto out; | ||
1010 | } | ||
1011 | 1009 | ||
1012 | init_waitqueue_head(&lc->oc_wait); | 1010 | init_waitqueue_head(&lc->oc_wait); |
1013 | init_completion(&lc->oc_sync_wait); | 1011 | init_completion(&lc->oc_sync_wait); |
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
1063 | } | 1061 | } |
1064 | 1062 | ||
1065 | out: | 1063 | out: |
1066 | if (rc && lc) | 1064 | if (rc) |
1067 | kfree(lc); | 1065 | kfree(lc); |
1068 | return rc; | 1066 | return rc; |
1069 | } | 1067 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, | |||
2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
2500 | if (status < 0) { | 2500 | if (status < 0) { |
2501 | mlog_errno(status); | 2501 | mlog_errno(status); |
2502 | ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, | ||
2503 | start_bit, count); | ||
2502 | goto bail; | 2504 | goto bail; |
2503 | } | 2505 | } |
2504 | 2506 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1112 | 1112 | ||
1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, | 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, |
1114 | ocfs2_debugfs_root); | 1114 | ocfs2_debugfs_root); |
1115 | if (!osb->osb_debug_root) { | 1115 | if (IS_ERR_OR_NULL(osb->osb_debug_root)) { |
1116 | status = -EINVAL; | 1116 | status = -EINVAL; |
1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); | 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); |
1118 | goto read_super_error; | 1118 | goto read_super_error; |
@@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1122 | osb->osb_debug_root, | 1122 | osb->osb_debug_root, |
1123 | osb, | 1123 | osb, |
1124 | &ocfs2_osb_debug_fops); | 1124 | &ocfs2_osb_debug_fops); |
1125 | if (!osb->osb_ctxt) { | 1125 | if (IS_ERR_OR_NULL(osb->osb_ctxt)) { |
1126 | status = -EINVAL; | 1126 | status = -EINVAL; |
1127 | mlog_errno(status); | 1127 | mlog_errno(status); |
1128 | goto read_super_error; | 1128 | goto read_super_error; |
@@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) | |||
1606 | } | 1606 | } |
1607 | 1607 | ||
1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); | 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); |
1609 | if (!ocfs2_debugfs_root) { | 1609 | if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { |
1610 | status = -ENOMEM; | 1610 | status = ocfs2_debugfs_root ? |
1611 | PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; | ||
1611 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1612 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
1612 | goto out4; | 1613 | goto out4; |
1613 | } | 1614 | } |
@@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2069 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); | 2070 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); |
2070 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); | 2071 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); |
2071 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); | 2072 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); |
2073 | memcpy(sb->s_uuid, di->id2.i_super.s_uuid, | ||
2074 | sizeof(di->id2.i_super.s_uuid)); | ||
2072 | 2075 | ||
2073 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; | 2076 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; |
2074 | 2077 | ||
@@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2333 | mlog_errno(status); | 2336 | mlog_errno(status); |
2334 | goto bail; | 2337 | goto bail; |
2335 | } | 2338 | } |
2336 | cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); | 2339 | cleancache_init_shared_fs(sb); |
2337 | 2340 | ||
2338 | bail: | 2341 | bail: |
2339 | return status; | 2342 | return status; |
@@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb) | |||
2563 | ocfs2_set_ro_flag(osb, 0); | 2566 | ocfs2_set_ro_flag(osb, 0); |
2564 | } | 2567 | } |
2565 | 2568 | ||
2566 | static char error_buf[1024]; | 2569 | void __ocfs2_error(struct super_block *sb, const char *function, |
2567 | 2570 | const char *fmt, ...) | |
2568 | void __ocfs2_error(struct super_block *sb, | ||
2569 | const char *function, | ||
2570 | const char *fmt, ...) | ||
2571 | { | 2571 | { |
2572 | struct va_format vaf; | ||
2572 | va_list args; | 2573 | va_list args; |
2573 | 2574 | ||
2574 | va_start(args, fmt); | 2575 | va_start(args, fmt); |
2575 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | 2576 | vaf.fmt = fmt; |
2576 | va_end(args); | 2577 | vaf.va = &args; |
2577 | 2578 | ||
2578 | /* Not using mlog here because we want to show the actual | 2579 | /* Not using mlog here because we want to show the actual |
2579 | * function the error came from. */ | 2580 | * function the error came from. */ |
2580 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", | 2581 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", |
2581 | sb->s_id, function, error_buf); | 2582 | sb->s_id, function, &vaf); |
2583 | |||
2584 | va_end(args); | ||
2582 | 2585 | ||
2583 | ocfs2_handle_error(sb); | 2586 | ocfs2_handle_error(sb); |
2584 | } | 2587 | } |
@@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb, | |||
2586 | /* Handle critical errors. This is intentionally more drastic than | 2589 | /* Handle critical errors. This is intentionally more drastic than |
2587 | * ocfs2_handle_error, so we only use for things like journal errors, | 2590 | * ocfs2_handle_error, so we only use for things like journal errors, |
2588 | * etc. */ | 2591 | * etc. */ |
2589 | void __ocfs2_abort(struct super_block* sb, | 2592 | void __ocfs2_abort(struct super_block *sb, const char *function, |
2590 | const char *function, | ||
2591 | const char *fmt, ...) | 2593 | const char *fmt, ...) |
2592 | { | 2594 | { |
2595 | struct va_format vaf; | ||
2593 | va_list args; | 2596 | va_list args; |
2594 | 2597 | ||
2595 | va_start(args, fmt); | 2598 | va_start(args, fmt); |
2596 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | ||
2597 | va_end(args); | ||
2598 | 2599 | ||
2599 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", | 2600 | vaf.fmt = fmt; |
2600 | sb->s_id, function, error_buf); | 2601 | vaf.va = &args; |
2602 | |||
2603 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", | ||
2604 | sb->s_id, function, &vaf); | ||
2605 | |||
2606 | va_end(args); | ||
2601 | 2607 | ||
2602 | /* We don't have the cluster support yet to go straight to | 2608 | /* We don't have the cluster support yet to go straight to |
2603 | * hard readonly in here. Until then, we want to keep | 2609 | * hard readonly in here. Until then, we want to keep |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, | |||
1238 | i, | 1238 | i, |
1239 | &block_off, | 1239 | &block_off, |
1240 | &name_offset); | 1240 | &name_offset); |
1241 | if (ret) { | ||
1242 | mlog_errno(ret); | ||
1243 | goto cleanup; | ||
1244 | } | ||
1241 | xs->base = bucket_block(xs->bucket, block_off); | 1245 | xs->base = bucket_block(xs->bucket, block_off); |
1242 | } | 1246 | } |
1243 | if (ocfs2_xattr_is_local(xs->here)) { | 1247 | if (ocfs2_xattr_is_local(xs->here)) { |
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
5665 | 5669 | ||
5666 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, | 5670 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, |
5667 | i, &xv, NULL); | 5671 | i, &xv, NULL); |
5672 | if (ret) { | ||
5673 | mlog_errno(ret); | ||
5674 | break; | ||
5675 | } | ||
5668 | 5676 | ||
5669 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, | 5677 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, |
5670 | args->ref_ci, | 5678 | args->ref_ci, |
diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
224 | s->s_maxbytes = MAX_NON_LFS; | 224 | s->s_maxbytes = MAX_NON_LFS; |
225 | s->s_op = &default_op; | 225 | s->s_op = &default_op; |
226 | s->s_time_gran = 1000000000; | 226 | s->s_time_gran = 1000000000; |
227 | s->cleancache_poolid = -1; | 227 | s->cleancache_poolid = CLEANCACHE_NO_POOL; |
228 | 228 | ||
229 | s->s_shrink.seeks = DEFAULT_SEEKS; | 229 | s->s_shrink.seeks = DEFAULT_SEEKS; |
230 | s->s_shrink.scan_objects = super_cache_scan; | 230 | s->s_shrink.scan_objects = super_cache_scan; |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d46085c1b90..39f1d6a2b04d 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -6,6 +6,12 @@ | |||
6 | 6 | ||
7 | #include <linux/mm_types.h> | 7 | #include <linux/mm_types.h> |
8 | #include <linux/bug.h> | 8 | #include <linux/bug.h> |
9 | #include <linux/errno.h> | ||
10 | |||
11 | #if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ | ||
12 | CONFIG_PGTABLE_LEVELS | ||
13 | #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED | ||
14 | #endif | ||
9 | 15 | ||
10 | /* | 16 | /* |
11 | * On almost all architectures and configurations, 0 can be used as the | 17 | * On almost all architectures and configurations, 0 can be used as the |
@@ -691,6 +697,30 @@ static inline int pmd_protnone(pmd_t pmd) | |||
691 | 697 | ||
692 | #endif /* CONFIG_MMU */ | 698 | #endif /* CONFIG_MMU */ |
693 | 699 | ||
700 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
701 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); | ||
702 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); | ||
703 | int pud_clear_huge(pud_t *pud); | ||
704 | int pmd_clear_huge(pmd_t *pmd); | ||
705 | #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
706 | static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) | ||
707 | { | ||
708 | return 0; | ||
709 | } | ||
710 | static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | ||
711 | { | ||
712 | return 0; | ||
713 | } | ||
714 | static inline int pud_clear_huge(pud_t *pud) | ||
715 | { | ||
716 | return 0; | ||
717 | } | ||
718 | static inline int pmd_clear_huge(pmd_t *pmd) | ||
719 | { | ||
720 | return 0; | ||
721 | } | ||
722 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
723 | |||
694 | #endif /* !__ASSEMBLY__ */ | 724 | #endif /* !__ASSEMBLY__ */ |
695 | 725 | ||
696 | #ifndef io_remap_pfn_range | 726 | #ifndef io_remap_pfn_range |
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 4ce9056b31a8..bda5ec0b4b4d 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h | |||
@@ -5,6 +5,10 @@ | |||
5 | #include <linux/exportfs.h> | 5 | #include <linux/exportfs.h> |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | 7 | ||
8 | #define CLEANCACHE_NO_POOL -1 | ||
9 | #define CLEANCACHE_NO_BACKEND -2 | ||
10 | #define CLEANCACHE_NO_BACKEND_SHARED -3 | ||
11 | |||
8 | #define CLEANCACHE_KEY_MAX 6 | 12 | #define CLEANCACHE_KEY_MAX 6 |
9 | 13 | ||
10 | /* | 14 | /* |
@@ -33,10 +37,9 @@ struct cleancache_ops { | |||
33 | void (*invalidate_fs)(int); | 37 | void (*invalidate_fs)(int); |
34 | }; | 38 | }; |
35 | 39 | ||
36 | extern struct cleancache_ops * | 40 | extern int cleancache_register_ops(struct cleancache_ops *ops); |
37 | cleancache_register_ops(struct cleancache_ops *ops); | ||
38 | extern void __cleancache_init_fs(struct super_block *); | 41 | extern void __cleancache_init_fs(struct super_block *); |
39 | extern void __cleancache_init_shared_fs(char *, struct super_block *); | 42 | extern void __cleancache_init_shared_fs(struct super_block *); |
40 | extern int __cleancache_get_page(struct page *); | 43 | extern int __cleancache_get_page(struct page *); |
41 | extern void __cleancache_put_page(struct page *); | 44 | extern void __cleancache_put_page(struct page *); |
42 | extern void __cleancache_invalidate_page(struct address_space *, struct page *); | 45 | extern void __cleancache_invalidate_page(struct address_space *, struct page *); |
@@ -78,10 +81,10 @@ static inline void cleancache_init_fs(struct super_block *sb) | |||
78 | __cleancache_init_fs(sb); | 81 | __cleancache_init_fs(sb); |
79 | } | 82 | } |
80 | 83 | ||
81 | static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) | 84 | static inline void cleancache_init_shared_fs(struct super_block *sb) |
82 | { | 85 | { |
83 | if (cleancache_enabled) | 86 | if (cleancache_enabled) |
84 | __cleancache_init_shared_fs(uuid, sb); | 87 | __cleancache_init_shared_fs(sb); |
85 | } | 88 | } |
86 | 89 | ||
87 | static inline int cleancache_get_page(struct page *page) | 90 | static inline int cleancache_get_page(struct page *page) |
diff --git a/include/linux/cma.h b/include/linux/cma.h index 9384ba66e975..f7ef093ec49a 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h | |||
@@ -16,16 +16,16 @@ | |||
16 | struct cma; | 16 | struct cma; |
17 | 17 | ||
18 | extern unsigned long totalcma_pages; | 18 | extern unsigned long totalcma_pages; |
19 | extern phys_addr_t cma_get_base(struct cma *cma); | 19 | extern phys_addr_t cma_get_base(const struct cma *cma); |
20 | extern unsigned long cma_get_size(struct cma *cma); | 20 | extern unsigned long cma_get_size(const struct cma *cma); |
21 | 21 | ||
22 | extern int __init cma_declare_contiguous(phys_addr_t base, | 22 | extern int __init cma_declare_contiguous(phys_addr_t base, |
23 | phys_addr_t size, phys_addr_t limit, | 23 | phys_addr_t size, phys_addr_t limit, |
24 | phys_addr_t alignment, unsigned int order_per_bit, | 24 | phys_addr_t alignment, unsigned int order_per_bit, |
25 | bool fixed, struct cma **res_cma); | 25 | bool fixed, struct cma **res_cma); |
26 | extern int cma_init_reserved_mem(phys_addr_t base, | 26 | extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, |
27 | phys_addr_t size, int order_per_bit, | 27 | unsigned int order_per_bit, |
28 | struct cma **res_cma); | 28 | struct cma **res_cma); |
29 | extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); | 29 | extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align); |
30 | extern bool cma_release(struct cma *cma, struct page *pages, int count); | 30 | extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); |
31 | #endif | 31 | #endif |
diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h new file mode 100644 index 000000000000..b5f0bda9472e --- /dev/null +++ b/include/linux/elf-randomize.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef _ELF_RANDOMIZE_H | ||
2 | #define _ELF_RANDOMIZE_H | ||
3 | |||
4 | struct mm_struct; | ||
5 | |||
6 | #ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE | ||
7 | static inline unsigned long arch_mmap_rnd(void) { return 0; } | ||
8 | # if defined(arch_randomize_brk) && defined(CONFIG_COMPAT_BRK) | ||
9 | # define compat_brk_randomized | ||
10 | # endif | ||
11 | # ifndef arch_randomize_brk | ||
12 | # define arch_randomize_brk(mm) (mm->brk) | ||
13 | # endif | ||
14 | #else | ||
15 | extern unsigned long arch_mmap_rnd(void); | ||
16 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
17 | # ifdef CONFIG_COMPAT_BRK | ||
18 | # define compat_brk_randomized | ||
19 | # endif | ||
20 | #endif | ||
21 | |||
22 | #endif | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 51bd1e72a917..97a9373e61e8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -57,8 +57,10 @@ struct vm_area_struct; | |||
57 | * _might_ fail. This depends upon the particular VM implementation. | 57 | * _might_ fail. This depends upon the particular VM implementation. |
58 | * | 58 | * |
59 | * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller | 59 | * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller |
60 | * cannot handle allocation failures. This modifier is deprecated and no new | 60 | * cannot handle allocation failures. New users should be evaluated carefully |
61 | * users should be added. | 61 | * (and the flag should be used only when there is no reasonable failure policy) |
62 | * but it is definitely preferable to use the flag rather than opencode endless | ||
63 | * loop around allocator. | ||
62 | * | 64 | * |
63 | * __GFP_NORETRY: The VM implementation must not retry indefinitely. | 65 | * __GFP_NORETRY: The VM implementation must not retry indefinitely. |
64 | * | 66 | * |
@@ -117,16 +119,6 @@ struct vm_area_struct; | |||
117 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ | 119 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ |
118 | __GFP_NO_KSWAPD) | 120 | __GFP_NO_KSWAPD) |
119 | 121 | ||
120 | /* | ||
121 | * GFP_THISNODE does not perform any reclaim, you most likely want to | ||
122 | * use __GFP_THISNODE to allocate from a given node without fallback! | ||
123 | */ | ||
124 | #ifdef CONFIG_NUMA | ||
125 | #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) | ||
126 | #else | ||
127 | #define GFP_THISNODE ((__force gfp_t)0) | ||
128 | #endif | ||
129 | |||
130 | /* This mask makes up all the page movable related flags */ | 122 | /* This mask makes up all the page movable related flags */ |
131 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) | 123 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) |
132 | 124 | ||
diff --git a/include/linux/io.h b/include/linux/io.h index fa02e55e5a2e..4cc299c598e0 100644 --- a/include/linux/io.h +++ b/include/linux/io.h | |||
@@ -38,6 +38,14 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, | |||
38 | } | 38 | } |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
42 | void __init ioremap_huge_init(void); | ||
43 | int arch_ioremap_pud_supported(void); | ||
44 | int arch_ioremap_pmd_supported(void); | ||
45 | #else | ||
46 | static inline void ioremap_huge_init(void) { } | ||
47 | #endif | ||
48 | |||
41 | /* | 49 | /* |
42 | * Managed iomap interface | 50 | * Managed iomap interface |
43 | */ | 51 | */ |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e8cc45307f8f..9497ec7c77ea 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -365,6 +365,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo | |||
365 | #define __initdata_memblock | 365 | #define __initdata_memblock |
366 | #endif | 366 | #endif |
367 | 367 | ||
368 | #ifdef CONFIG_MEMTEST | ||
369 | extern void early_memtest(phys_addr_t start, phys_addr_t end); | ||
370 | #else | ||
371 | static inline void early_memtest(phys_addr_t start, phys_addr_t end) | ||
372 | { | ||
373 | } | ||
374 | #endif | ||
375 | |||
368 | #else | 376 | #else |
369 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) | 377 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) |
370 | { | 378 | { |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8f1a41951df9..6ffa0ac7f7d6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -192,6 +192,9 @@ extern void get_page_bootmem(unsigned long ingo, struct page *page, | |||
192 | void get_online_mems(void); | 192 | void get_online_mems(void); |
193 | void put_online_mems(void); | 193 | void put_online_mems(void); |
194 | 194 | ||
195 | void mem_hotplug_begin(void); | ||
196 | void mem_hotplug_done(void); | ||
197 | |||
195 | #else /* ! CONFIG_MEMORY_HOTPLUG */ | 198 | #else /* ! CONFIG_MEMORY_HOTPLUG */ |
196 | /* | 199 | /* |
197 | * Stub functions for when hotplug is off | 200 | * Stub functions for when hotplug is off |
@@ -231,6 +234,9 @@ static inline int try_online_node(int nid) | |||
231 | static inline void get_online_mems(void) {} | 234 | static inline void get_online_mems(void) {} |
232 | static inline void put_online_mems(void) {} | 235 | static inline void put_online_mems(void) {} |
233 | 236 | ||
237 | static inline void mem_hotplug_begin(void) {} | ||
238 | static inline void mem_hotplug_done(void) {} | ||
239 | |||
234 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ | 240 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ |
235 | 241 | ||
236 | #ifdef CONFIG_MEMORY_HOTREMOVE | 242 | #ifdef CONFIG_MEMORY_HOTREMOVE |
diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 39ed62ab5b8a..b19b3023c880 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h | |||
@@ -29,7 +29,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | |||
29 | mempool_free_t *free_fn, void *pool_data, | 29 | mempool_free_t *free_fn, void *pool_data, |
30 | gfp_t gfp_mask, int nid); | 30 | gfp_t gfp_mask, int nid); |
31 | 31 | ||
32 | extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); | 32 | extern int mempool_resize(mempool_t *pool, int new_min_nr); |
33 | extern void mempool_destroy(mempool_t *pool); | 33 | extern void mempool_destroy(mempool_t *pool); |
34 | extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); | 34 | extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); |
35 | extern void mempool_free(void *element, mempool_t *pool); | 35 | extern void mempool_free(void *element, mempool_t *pool); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 78baed5f2952..cac1c0904d5f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -69,7 +69,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
69 | extern bool pmd_trans_migrating(pmd_t pmd); | 69 | extern bool pmd_trans_migrating(pmd_t pmd); |
70 | extern int migrate_misplaced_page(struct page *page, | 70 | extern int migrate_misplaced_page(struct page *page, |
71 | struct vm_area_struct *vma, int node); | 71 | struct vm_area_struct *vma, int node); |
72 | extern bool migrate_ratelimited(int node); | ||
73 | #else | 72 | #else |
74 | static inline bool pmd_trans_migrating(pmd_t pmd) | 73 | static inline bool pmd_trans_migrating(pmd_t pmd) |
75 | { | 74 | { |
@@ -80,10 +79,6 @@ static inline int migrate_misplaced_page(struct page *page, | |||
80 | { | 79 | { |
81 | return -EAGAIN; /* can't migrate now */ | 80 | return -EAGAIN; /* can't migrate now */ |
82 | } | 81 | } |
83 | static inline bool migrate_ratelimited(int node) | ||
84 | { | ||
85 | return false; | ||
86 | } | ||
87 | #endif /* CONFIG_NUMA_BALANCING */ | 82 | #endif /* CONFIG_NUMA_BALANCING */ |
88 | 83 | ||
89 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | 84 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 47a93928b90f..6571dd78e984 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1294,9 +1294,11 @@ int __set_page_dirty_no_writeback(struct page *page); | |||
1294 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1294 | int redirty_page_for_writepage(struct writeback_control *wbc, |
1295 | struct page *page); | 1295 | struct page *page); |
1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); | 1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
1297 | void account_page_cleaned(struct page *page, struct address_space *mapping); | ||
1297 | int set_page_dirty(struct page *page); | 1298 | int set_page_dirty(struct page *page); |
1298 | int set_page_dirty_lock(struct page *page); | 1299 | int set_page_dirty_lock(struct page *page); |
1299 | int clear_page_dirty_for_io(struct page *page); | 1300 | int clear_page_dirty_for_io(struct page *page); |
1301 | |||
1300 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); | 1302 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); |
1301 | 1303 | ||
1302 | /* Is the vma a continuation of the stack vma above it? */ | 1304 | /* Is the vma a continuation of the stack vma above it? */ |
@@ -2109,7 +2111,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, | |||
2109 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ | 2111 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ |
2110 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO | 2112 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO |
2111 | * and return without waiting upon it */ | 2113 | * and return without waiting upon it */ |
2112 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ | 2114 | #define FOLL_POPULATE 0x40 /* fault in page */ |
2113 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ | 2115 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ |
2114 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | 2116 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ |
2115 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | 2117 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 199a03aab8dc..590630eb59ba 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -364,7 +364,9 @@ struct mm_struct { | |||
364 | atomic_t mm_users; /* How many users with user space? */ | 364 | atomic_t mm_users; /* How many users with user space? */ |
365 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ | 365 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ |
366 | atomic_long_t nr_ptes; /* PTE page table pages */ | 366 | atomic_long_t nr_ptes; /* PTE page table pages */ |
367 | #if CONFIG_PGTABLE_LEVELS > 2 | ||
367 | atomic_long_t nr_pmds; /* PMD page table pages */ | 368 | atomic_long_t nr_pmds; /* PMD page table pages */ |
369 | #endif | ||
368 | int map_count; /* number of VMAs */ | 370 | int map_count; /* number of VMAs */ |
369 | 371 | ||
370 | spinlock_t page_table_lock; /* Protects page tables and some counters */ | 372 | spinlock_t page_table_lock; /* Protects page tables and some counters */ |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9b2022ab4d85..3d46fb4708e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -25,16 +25,11 @@ static inline void touch_nmi_watchdog(void) | |||
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) | 27 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) |
28 | extern void watchdog_enable_hardlockup_detector(bool val); | 28 | extern void hardlockup_detector_disable(void); |
29 | extern bool watchdog_hardlockup_detector_is_enabled(void); | ||
30 | #else | 29 | #else |
31 | static inline void watchdog_enable_hardlockup_detector(bool val) | 30 | static inline void hardlockup_detector_disable(void) |
32 | { | 31 | { |
33 | } | 32 | } |
34 | static inline bool watchdog_hardlockup_detector_is_enabled(void) | ||
35 | { | ||
36 | return true; | ||
37 | } | ||
38 | #endif | 33 | #endif |
39 | 34 | ||
40 | /* | 35 | /* |
@@ -68,12 +63,20 @@ static inline bool trigger_allbutself_cpu_backtrace(void) | |||
68 | #ifdef CONFIG_LOCKUP_DETECTOR | 63 | #ifdef CONFIG_LOCKUP_DETECTOR |
69 | int hw_nmi_is_cpu_stuck(struct pt_regs *); | 64 | int hw_nmi_is_cpu_stuck(struct pt_regs *); |
70 | u64 hw_nmi_get_sample_period(int watchdog_thresh); | 65 | u64 hw_nmi_get_sample_period(int watchdog_thresh); |
66 | extern int nmi_watchdog_enabled; | ||
67 | extern int soft_watchdog_enabled; | ||
71 | extern int watchdog_user_enabled; | 68 | extern int watchdog_user_enabled; |
72 | extern int watchdog_thresh; | 69 | extern int watchdog_thresh; |
73 | extern int sysctl_softlockup_all_cpu_backtrace; | 70 | extern int sysctl_softlockup_all_cpu_backtrace; |
74 | struct ctl_table; | 71 | struct ctl_table; |
75 | extern int proc_dowatchdog(struct ctl_table *, int , | 72 | extern int proc_watchdog(struct ctl_table *, int , |
76 | void __user *, size_t *, loff_t *); | 73 | void __user *, size_t *, loff_t *); |
74 | extern int proc_nmi_watchdog(struct ctl_table *, int , | ||
75 | void __user *, size_t *, loff_t *); | ||
76 | extern int proc_soft_watchdog(struct ctl_table *, int , | ||
77 | void __user *, size_t *, loff_t *); | ||
78 | extern int proc_watchdog_thresh(struct ctl_table *, int , | ||
79 | void __user *, size_t *, loff_t *); | ||
77 | #endif | 80 | #endif |
78 | 81 | ||
79 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | 82 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI |
diff --git a/include/linux/oom.h b/include/linux/oom.h index d5771bed59c9..44b2f6f7bbd8 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
@@ -66,7 +66,8 @@ extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); | |||
66 | extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); | 66 | extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); |
67 | 67 | ||
68 | extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | 68 | extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, |
69 | int order, const nodemask_t *nodemask); | 69 | int order, const nodemask_t *nodemask, |
70 | struct mem_cgroup *memcg); | ||
70 | 71 | ||
71 | extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, | 72 | extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, |
72 | unsigned long totalpages, const nodemask_t *nodemask, | 73 | unsigned long totalpages, const nodemask_t *nodemask, |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5ed7bdaf22d5..c851ff92d5b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -328,8 +328,6 @@ static inline void SetPageUptodate(struct page *page) | |||
328 | 328 | ||
329 | CLEARPAGEFLAG(Uptodate, uptodate) | 329 | CLEARPAGEFLAG(Uptodate, uptodate) |
330 | 330 | ||
331 | extern void cancel_dirty_page(struct page *page, unsigned int account_size); | ||
332 | |||
333 | int test_clear_page_writeback(struct page *page); | 331 | int test_clear_page_writeback(struct page *page); |
334 | int __test_set_page_writeback(struct page *page, bool keep_write); | 332 | int __test_set_page_writeback(struct page *page, bool keep_write); |
335 | 333 | ||
diff --git a/include/linux/slab.h b/include/linux/slab.h index 76f1feeabd38..ffd24c830151 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -18,7 +18,7 @@ | |||
18 | 18 | ||
19 | /* | 19 | /* |
20 | * Flags to pass to kmem_cache_create(). | 20 | * Flags to pass to kmem_cache_create(). |
21 | * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. | 21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. |
22 | */ | 22 | */ |
23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ | 23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ |
24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ | 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ |
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index d06b6da5c1e3..bce990f5a35d 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h | |||
@@ -224,7 +224,7 @@ TRACE_EVENT(xen_mmu_pmd_clear, | |||
224 | TP_printk("pmdp %p", __entry->pmdp) | 224 | TP_printk("pmdp %p", __entry->pmdp) |
225 | ); | 225 | ); |
226 | 226 | ||
227 | #if PAGETABLE_LEVELS >= 4 | 227 | #if CONFIG_PGTABLE_LEVELS >= 4 |
228 | 228 | ||
229 | TRACE_EVENT(xen_mmu_set_pud, | 229 | TRACE_EVENT(xen_mmu_set_pud, |
230 | TP_PROTO(pud_t *pudp, pud_t pudval), | 230 | TP_PROTO(pud_t *pudp, pud_t pudval), |
diff --git a/init/main.c b/init/main.c index e82171b99874..a7e969d12f51 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/list.h> | 80 | #include <linux/list.h> |
81 | #include <linux/integrity.h> | 81 | #include <linux/integrity.h> |
82 | #include <linux/proc_ns.h> | 82 | #include <linux/proc_ns.h> |
83 | #include <linux/io.h> | ||
83 | 84 | ||
84 | #include <asm/io.h> | 85 | #include <asm/io.h> |
85 | #include <asm/bugs.h> | 86 | #include <asm/bugs.h> |
@@ -485,6 +486,7 @@ static void __init mm_init(void) | |||
485 | percpu_init_late(); | 486 | percpu_init_late(); |
486 | pgtable_init(); | 487 | pgtable_init(); |
487 | vmalloc_init(); | 488 | vmalloc_init(); |
489 | ioremap_huge_init(); | ||
488 | } | 490 | } |
489 | 491 | ||
490 | asmlinkage __visible void __init start_kernel(void) | 492 | asmlinkage __visible void __init start_kernel(void) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c68f0721df10..ee14e3a35a29 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2453,20 +2453,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2453 | * @node: is this an allowed node? | 2453 | * @node: is this an allowed node? |
2454 | * @gfp_mask: memory allocation flags | 2454 | * @gfp_mask: memory allocation flags |
2455 | * | 2455 | * |
2456 | * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is | 2456 | * If we're in interrupt, yes, we can always allocate. If @node is set in |
2457 | * set, yes, we can always allocate. If node is in our task's mems_allowed, | 2457 | * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this |
2458 | * yes. If it's not a __GFP_HARDWALL request and this node is in the nearest | 2458 | * node is set in the nearest hardwalled cpuset ancestor to current's cpuset, |
2459 | * hardwalled cpuset ancestor to this task's cpuset, yes. If the task has been | 2459 | * yes. If current has access to memory reserves due to TIF_MEMDIE, yes. |
2460 | * OOM killed and has access to memory reserves as specified by the TIF_MEMDIE | ||
2461 | * flag, yes. | ||
2462 | * Otherwise, no. | 2460 | * Otherwise, no. |
2463 | * | 2461 | * |
2464 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2465 | * by forcibly using a zonelist starting at a specified node, and by | ||
2466 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2467 | * any node on the zonelist except the first. By the time any such | ||
2468 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2469 | * | ||
2470 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2462 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
2471 | * and do not allow allocations outside the current tasks cpuset | 2463 | * and do not allow allocations outside the current tasks cpuset |
2472 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | 2464 | * unless the task has been OOM killed as is marked TIF_MEMDIE. |
@@ -2502,7 +2494,7 @@ int __cpuset_node_allowed(int node, gfp_t gfp_mask) | |||
2502 | int allowed; /* is allocation in zone z allowed? */ | 2494 | int allowed; /* is allocation in zone z allowed? */ |
2503 | unsigned long flags; | 2495 | unsigned long flags; |
2504 | 2496 | ||
2505 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | 2497 | if (in_interrupt()) |
2506 | return 1; | 2498 | return 1; |
2507 | if (node_isset(node, current->mems_allowed)) | 2499 | if (node_isset(node, current->mems_allowed)) |
2508 | return 1; | 2500 | return 1; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4012336de30f..8c0eabd41886 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -847,7 +847,7 @@ static struct ctl_table kern_table[] = { | |||
847 | .data = &watchdog_user_enabled, | 847 | .data = &watchdog_user_enabled, |
848 | .maxlen = sizeof (int), | 848 | .maxlen = sizeof (int), |
849 | .mode = 0644, | 849 | .mode = 0644, |
850 | .proc_handler = proc_dowatchdog, | 850 | .proc_handler = proc_watchdog, |
851 | .extra1 = &zero, | 851 | .extra1 = &zero, |
852 | .extra2 = &one, | 852 | .extra2 = &one, |
853 | }, | 853 | }, |
@@ -856,11 +856,33 @@ static struct ctl_table kern_table[] = { | |||
856 | .data = &watchdog_thresh, | 856 | .data = &watchdog_thresh, |
857 | .maxlen = sizeof(int), | 857 | .maxlen = sizeof(int), |
858 | .mode = 0644, | 858 | .mode = 0644, |
859 | .proc_handler = proc_dowatchdog, | 859 | .proc_handler = proc_watchdog_thresh, |
860 | .extra1 = &zero, | 860 | .extra1 = &zero, |
861 | .extra2 = &sixty, | 861 | .extra2 = &sixty, |
862 | }, | 862 | }, |
863 | { | 863 | { |
864 | .procname = "nmi_watchdog", | ||
865 | .data = &nmi_watchdog_enabled, | ||
866 | .maxlen = sizeof (int), | ||
867 | .mode = 0644, | ||
868 | .proc_handler = proc_nmi_watchdog, | ||
869 | .extra1 = &zero, | ||
870 | #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) | ||
871 | .extra2 = &one, | ||
872 | #else | ||
873 | .extra2 = &zero, | ||
874 | #endif | ||
875 | }, | ||
876 | { | ||
877 | .procname = "soft_watchdog", | ||
878 | .data = &soft_watchdog_enabled, | ||
879 | .maxlen = sizeof (int), | ||
880 | .mode = 0644, | ||
881 | .proc_handler = proc_soft_watchdog, | ||
882 | .extra1 = &zero, | ||
883 | .extra2 = &one, | ||
884 | }, | ||
885 | { | ||
864 | .procname = "softlockup_panic", | 886 | .procname = "softlockup_panic", |
865 | .data = &softlockup_panic, | 887 | .data = &softlockup_panic, |
866 | .maxlen = sizeof(int), | 888 | .maxlen = sizeof(int), |
@@ -880,15 +902,6 @@ static struct ctl_table kern_table[] = { | |||
880 | .extra2 = &one, | 902 | .extra2 = &one, |
881 | }, | 903 | }, |
882 | #endif /* CONFIG_SMP */ | 904 | #endif /* CONFIG_SMP */ |
883 | { | ||
884 | .procname = "nmi_watchdog", | ||
885 | .data = &watchdog_user_enabled, | ||
886 | .maxlen = sizeof (int), | ||
887 | .mode = 0644, | ||
888 | .proc_handler = proc_dowatchdog, | ||
889 | .extra1 = &zero, | ||
890 | .extra2 = &one, | ||
891 | }, | ||
892 | #endif | 905 | #endif |
893 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 906 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
894 | { | 907 | { |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9a056f5bc02c..2316f50b07a4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -24,8 +24,33 @@ | |||
24 | #include <linux/kvm_para.h> | 24 | #include <linux/kvm_para.h> |
25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
26 | 26 | ||
27 | int watchdog_user_enabled = 1; | 27 | /* |
28 | * The run state of the lockup detectors is controlled by the content of the | ||
29 | * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - | ||
30 | * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. | ||
31 | * | ||
32 | * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' | ||
33 | * are variables that are only used as an 'interface' between the parameters | ||
34 | * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The | ||
35 | * 'watchdog_thresh' variable is handled differently because its value is not | ||
36 | * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' | ||
37 | * is equal zero. | ||
38 | */ | ||
39 | #define NMI_WATCHDOG_ENABLED_BIT 0 | ||
40 | #define SOFT_WATCHDOG_ENABLED_BIT 1 | ||
41 | #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) | ||
42 | #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) | ||
43 | |||
44 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
45 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; | ||
46 | #else | ||
47 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; | ||
48 | #endif | ||
49 | int __read_mostly nmi_watchdog_enabled; | ||
50 | int __read_mostly soft_watchdog_enabled; | ||
51 | int __read_mostly watchdog_user_enabled; | ||
28 | int __read_mostly watchdog_thresh = 10; | 52 | int __read_mostly watchdog_thresh = 10; |
53 | |||
29 | #ifdef CONFIG_SMP | 54 | #ifdef CONFIG_SMP |
30 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | 55 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; |
31 | #else | 56 | #else |
@@ -58,8 +83,6 @@ static unsigned long soft_lockup_nmi_warn; | |||
58 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 83 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
59 | static int hardlockup_panic = | 84 | static int hardlockup_panic = |
60 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; | 85 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; |
61 | |||
62 | static bool hardlockup_detector_enabled = true; | ||
63 | /* | 86 | /* |
64 | * We may not want to enable hard lockup detection by default in all cases, | 87 | * We may not want to enable hard lockup detection by default in all cases, |
65 | * for example when running the kernel as a guest on a hypervisor. In these | 88 | * for example when running the kernel as a guest on a hypervisor. In these |
@@ -68,14 +91,9 @@ static bool hardlockup_detector_enabled = true; | |||
68 | * kernel command line parameters are parsed, because otherwise it is not | 91 | * kernel command line parameters are parsed, because otherwise it is not |
69 | * possible to override this in hardlockup_panic_setup(). | 92 | * possible to override this in hardlockup_panic_setup(). |
70 | */ | 93 | */ |
71 | void watchdog_enable_hardlockup_detector(bool val) | 94 | void hardlockup_detector_disable(void) |
72 | { | ||
73 | hardlockup_detector_enabled = val; | ||
74 | } | ||
75 | |||
76 | bool watchdog_hardlockup_detector_is_enabled(void) | ||
77 | { | 95 | { |
78 | return hardlockup_detector_enabled; | 96 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; |
79 | } | 97 | } |
80 | 98 | ||
81 | static int __init hardlockup_panic_setup(char *str) | 99 | static int __init hardlockup_panic_setup(char *str) |
@@ -85,15 +103,9 @@ static int __init hardlockup_panic_setup(char *str) | |||
85 | else if (!strncmp(str, "nopanic", 7)) | 103 | else if (!strncmp(str, "nopanic", 7)) |
86 | hardlockup_panic = 0; | 104 | hardlockup_panic = 0; |
87 | else if (!strncmp(str, "0", 1)) | 105 | else if (!strncmp(str, "0", 1)) |
88 | watchdog_user_enabled = 0; | 106 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; |
89 | else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) { | 107 | else if (!strncmp(str, "1", 1)) |
90 | /* | 108 | watchdog_enabled |= NMI_WATCHDOG_ENABLED; |
91 | * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option) | ||
92 | * has the same effect. | ||
93 | */ | ||
94 | watchdog_user_enabled = 1; | ||
95 | watchdog_enable_hardlockup_detector(true); | ||
96 | } | ||
97 | return 1; | 109 | return 1; |
98 | } | 110 | } |
99 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 111 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -112,19 +124,18 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
112 | 124 | ||
113 | static int __init nowatchdog_setup(char *str) | 125 | static int __init nowatchdog_setup(char *str) |
114 | { | 126 | { |
115 | watchdog_user_enabled = 0; | 127 | watchdog_enabled = 0; |
116 | return 1; | 128 | return 1; |
117 | } | 129 | } |
118 | __setup("nowatchdog", nowatchdog_setup); | 130 | __setup("nowatchdog", nowatchdog_setup); |
119 | 131 | ||
120 | /* deprecated */ | ||
121 | static int __init nosoftlockup_setup(char *str) | 132 | static int __init nosoftlockup_setup(char *str) |
122 | { | 133 | { |
123 | watchdog_user_enabled = 0; | 134 | watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; |
124 | return 1; | 135 | return 1; |
125 | } | 136 | } |
126 | __setup("nosoftlockup", nosoftlockup_setup); | 137 | __setup("nosoftlockup", nosoftlockup_setup); |
127 | /* */ | 138 | |
128 | #ifdef CONFIG_SMP | 139 | #ifdef CONFIG_SMP |
129 | static int __init softlockup_all_cpu_backtrace_setup(char *str) | 140 | static int __init softlockup_all_cpu_backtrace_setup(char *str) |
130 | { | 141 | { |
@@ -239,10 +250,11 @@ static int is_softlockup(unsigned long touch_ts) | |||
239 | { | 250 | { |
240 | unsigned long now = get_timestamp(); | 251 | unsigned long now = get_timestamp(); |
241 | 252 | ||
242 | /* Warn about unreasonable delays: */ | 253 | if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) { |
243 | if (time_after(now, touch_ts + get_softlockup_thresh())) | 254 | /* Warn about unreasonable delays. */ |
244 | return now - touch_ts; | 255 | if (time_after(now, touch_ts + get_softlockup_thresh())) |
245 | 256 | return now - touch_ts; | |
257 | } | ||
246 | return 0; | 258 | return 0; |
247 | } | 259 | } |
248 | 260 | ||
@@ -477,6 +489,21 @@ static void watchdog(unsigned int cpu) | |||
477 | __this_cpu_write(soft_lockup_hrtimer_cnt, | 489 | __this_cpu_write(soft_lockup_hrtimer_cnt, |
478 | __this_cpu_read(hrtimer_interrupts)); | 490 | __this_cpu_read(hrtimer_interrupts)); |
479 | __touch_watchdog(); | 491 | __touch_watchdog(); |
492 | |||
493 | /* | ||
494 | * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the | ||
495 | * failure path. Check for failures that can occur asynchronously - | ||
496 | * for example, when CPUs are on-lined - and shut down the hardware | ||
497 | * perf event on each CPU accordingly. | ||
498 | * | ||
499 | * The only non-obvious place this bit can be cleared is through | ||
500 | * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a | ||
501 | * pr_info here would be too noisy as it would result in a message | ||
502 | * every few seconds if the hardlockup was disabled but the softlockup | ||
503 | * enabled. | ||
504 | */ | ||
505 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
506 | watchdog_nmi_disable(cpu); | ||
480 | } | 507 | } |
481 | 508 | ||
482 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 509 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
@@ -492,14 +519,9 @@ static int watchdog_nmi_enable(unsigned int cpu) | |||
492 | struct perf_event_attr *wd_attr; | 519 | struct perf_event_attr *wd_attr; |
493 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 520 | struct perf_event *event = per_cpu(watchdog_ev, cpu); |
494 | 521 | ||
495 | /* | 522 | /* nothing to do if the hard lockup detector is disabled */ |
496 | * Some kernels need to default hard lockup detection to | 523 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) |
497 | * 'disabled', for example a guest on a hypervisor. | 524 | goto out; |
498 | */ | ||
499 | if (!watchdog_hardlockup_detector_is_enabled()) { | ||
500 | event = ERR_PTR(-ENOENT); | ||
501 | goto handle_err; | ||
502 | } | ||
503 | 525 | ||
504 | /* is it already setup and enabled? */ | 526 | /* is it already setup and enabled? */ |
505 | if (event && event->state > PERF_EVENT_STATE_OFF) | 527 | if (event && event->state > PERF_EVENT_STATE_OFF) |
@@ -515,7 +537,6 @@ static int watchdog_nmi_enable(unsigned int cpu) | |||
515 | /* Try to register using hardware perf events */ | 537 | /* Try to register using hardware perf events */ |
516 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 538 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
517 | 539 | ||
518 | handle_err: | ||
519 | /* save cpu0 error for future comparision */ | 540 | /* save cpu0 error for future comparision */ |
520 | if (cpu == 0 && IS_ERR(event)) | 541 | if (cpu == 0 && IS_ERR(event)) |
521 | cpu0_err = PTR_ERR(event); | 542 | cpu0_err = PTR_ERR(event); |
@@ -527,6 +548,18 @@ handle_err: | |||
527 | goto out_save; | 548 | goto out_save; |
528 | } | 549 | } |
529 | 550 | ||
551 | /* | ||
552 | * Disable the hard lockup detector if _any_ CPU fails to set up | ||
553 | * set up the hardware perf event. The watchdog() function checks | ||
554 | * the NMI_WATCHDOG_ENABLED bit periodically. | ||
555 | * | ||
556 | * The barriers are for syncing up watchdog_enabled across all the | ||
557 | * cpus, as clear_bit() does not use barriers. | ||
558 | */ | ||
559 | smp_mb__before_atomic(); | ||
560 | clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); | ||
561 | smp_mb__after_atomic(); | ||
562 | |||
530 | /* skip displaying the same error again */ | 563 | /* skip displaying the same error again */ |
531 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) | 564 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) |
532 | return PTR_ERR(event); | 565 | return PTR_ERR(event); |
@@ -540,6 +573,9 @@ handle_err: | |||
540 | else | 573 | else |
541 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", | 574 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", |
542 | cpu, PTR_ERR(event)); | 575 | cpu, PTR_ERR(event)); |
576 | |||
577 | pr_info("Shutting down hard lockup detector on all cpus\n"); | ||
578 | |||
543 | return PTR_ERR(event); | 579 | return PTR_ERR(event); |
544 | 580 | ||
545 | /* success path */ | 581 | /* success path */ |
@@ -628,7 +664,7 @@ static void restart_watchdog_hrtimer(void *info) | |||
628 | HRTIMER_MODE_REL_PINNED); | 664 | HRTIMER_MODE_REL_PINNED); |
629 | } | 665 | } |
630 | 666 | ||
631 | static void update_timers(int cpu) | 667 | static void update_watchdog(int cpu) |
632 | { | 668 | { |
633 | /* | 669 | /* |
634 | * Make sure that perf event counter will adopt to a new | 670 | * Make sure that perf event counter will adopt to a new |
@@ -643,17 +679,17 @@ static void update_timers(int cpu) | |||
643 | watchdog_nmi_enable(cpu); | 679 | watchdog_nmi_enable(cpu); |
644 | } | 680 | } |
645 | 681 | ||
646 | static void update_timers_all_cpus(void) | 682 | static void update_watchdog_all_cpus(void) |
647 | { | 683 | { |
648 | int cpu; | 684 | int cpu; |
649 | 685 | ||
650 | get_online_cpus(); | 686 | get_online_cpus(); |
651 | for_each_online_cpu(cpu) | 687 | for_each_online_cpu(cpu) |
652 | update_timers(cpu); | 688 | update_watchdog(cpu); |
653 | put_online_cpus(); | 689 | put_online_cpus(); |
654 | } | 690 | } |
655 | 691 | ||
656 | static int watchdog_enable_all_cpus(bool sample_period_changed) | 692 | static int watchdog_enable_all_cpus(void) |
657 | { | 693 | { |
658 | int err = 0; | 694 | int err = 0; |
659 | 695 | ||
@@ -663,8 +699,12 @@ static int watchdog_enable_all_cpus(bool sample_period_changed) | |||
663 | pr_err("Failed to create watchdog threads, disabled\n"); | 699 | pr_err("Failed to create watchdog threads, disabled\n"); |
664 | else | 700 | else |
665 | watchdog_running = 1; | 701 | watchdog_running = 1; |
666 | } else if (sample_period_changed) { | 702 | } else { |
667 | update_timers_all_cpus(); | 703 | /* |
704 | * Enable/disable the lockup detectors or | ||
705 | * change the sample period 'on the fly'. | ||
706 | */ | ||
707 | update_watchdog_all_cpus(); | ||
668 | } | 708 | } |
669 | 709 | ||
670 | return err; | 710 | return err; |
@@ -682,48 +722,149 @@ static void watchdog_disable_all_cpus(void) | |||
682 | } | 722 | } |
683 | 723 | ||
684 | /* | 724 | /* |
685 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh | 725 | * Update the run state of the lockup detectors. |
726 | */ | ||
727 | static int proc_watchdog_update(void) | ||
728 | { | ||
729 | int err = 0; | ||
730 | |||
731 | /* | ||
732 | * Watchdog threads won't be started if they are already active. | ||
733 | * The 'watchdog_running' variable in watchdog_*_all_cpus() takes | ||
734 | * care of this. If those threads are already active, the sample | ||
735 | * period will be updated and the lockup detectors will be enabled | ||
736 | * or disabled 'on the fly'. | ||
737 | */ | ||
738 | if (watchdog_enabled && watchdog_thresh) | ||
739 | err = watchdog_enable_all_cpus(); | ||
740 | else | ||
741 | watchdog_disable_all_cpus(); | ||
742 | |||
743 | return err; | ||
744 | |||
745 | } | ||
746 | |||
747 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
748 | |||
749 | /* | ||
750 | * common function for watchdog, nmi_watchdog and soft_watchdog parameter | ||
751 | * | ||
752 | * caller | table->data points to | 'which' contains the flag(s) | ||
753 | * -------------------|-----------------------|----------------------------- | ||
754 | * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed | ||
755 | * | | with SOFT_WATCHDOG_ENABLED | ||
756 | * -------------------|-----------------------|----------------------------- | ||
757 | * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED | ||
758 | * -------------------|-----------------------|----------------------------- | ||
759 | * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED | ||
760 | */ | ||
761 | static int proc_watchdog_common(int which, struct ctl_table *table, int write, | ||
762 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
763 | { | ||
764 | int err, old, new; | ||
765 | int *watchdog_param = (int *)table->data; | ||
766 | |||
767 | mutex_lock(&watchdog_proc_mutex); | ||
768 | |||
769 | /* | ||
770 | * If the parameter is being read return the state of the corresponding | ||
771 | * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the | ||
772 | * run state of the lockup detectors. | ||
773 | */ | ||
774 | if (!write) { | ||
775 | *watchdog_param = (watchdog_enabled & which) != 0; | ||
776 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
777 | } else { | ||
778 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
779 | if (err) | ||
780 | goto out; | ||
781 | |||
782 | /* | ||
783 | * There is a race window between fetching the current value | ||
784 | * from 'watchdog_enabled' and storing the new value. During | ||
785 | * this race window, watchdog_nmi_enable() can sneak in and | ||
786 | * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. | ||
787 | * The 'cmpxchg' detects this race and the loop retries. | ||
788 | */ | ||
789 | do { | ||
790 | old = watchdog_enabled; | ||
791 | /* | ||
792 | * If the parameter value is not zero set the | ||
793 | * corresponding bit(s), else clear it(them). | ||
794 | */ | ||
795 | if (*watchdog_param) | ||
796 | new = old | which; | ||
797 | else | ||
798 | new = old & ~which; | ||
799 | } while (cmpxchg(&watchdog_enabled, old, new) != old); | ||
800 | |||
801 | /* | ||
802 | * Update the run state of the lockup detectors. | ||
803 | * Restore 'watchdog_enabled' on failure. | ||
804 | */ | ||
805 | err = proc_watchdog_update(); | ||
806 | if (err) | ||
807 | watchdog_enabled = old; | ||
808 | } | ||
809 | out: | ||
810 | mutex_unlock(&watchdog_proc_mutex); | ||
811 | return err; | ||
812 | } | ||
813 | |||
814 | /* | ||
815 | * /proc/sys/kernel/watchdog | ||
816 | */ | ||
817 | int proc_watchdog(struct ctl_table *table, int write, | ||
818 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
819 | { | ||
820 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, | ||
821 | table, write, buffer, lenp, ppos); | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * /proc/sys/kernel/nmi_watchdog | ||
686 | */ | 826 | */ |
827 | int proc_nmi_watchdog(struct ctl_table *table, int write, | ||
828 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
829 | { | ||
830 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED, | ||
831 | table, write, buffer, lenp, ppos); | ||
832 | } | ||
833 | |||
834 | /* | ||
835 | * /proc/sys/kernel/soft_watchdog | ||
836 | */ | ||
837 | int proc_soft_watchdog(struct ctl_table *table, int write, | ||
838 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
839 | { | ||
840 | return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, | ||
841 | table, write, buffer, lenp, ppos); | ||
842 | } | ||
687 | 843 | ||
688 | int proc_dowatchdog(struct ctl_table *table, int write, | 844 | /* |
689 | void __user *buffer, size_t *lenp, loff_t *ppos) | 845 | * /proc/sys/kernel/watchdog_thresh |
846 | */ | ||
847 | int proc_watchdog_thresh(struct ctl_table *table, int write, | ||
848 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
690 | { | 849 | { |
691 | int err, old_thresh, old_enabled; | 850 | int err, old; |
692 | bool old_hardlockup; | ||
693 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
694 | 851 | ||
695 | mutex_lock(&watchdog_proc_mutex); | 852 | mutex_lock(&watchdog_proc_mutex); |
696 | old_thresh = ACCESS_ONCE(watchdog_thresh); | ||
697 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); | ||
698 | old_hardlockup = watchdog_hardlockup_detector_is_enabled(); | ||
699 | 853 | ||
854 | old = ACCESS_ONCE(watchdog_thresh); | ||
700 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 855 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
856 | |||
701 | if (err || !write) | 857 | if (err || !write) |
702 | goto out; | 858 | goto out; |
703 | 859 | ||
704 | set_sample_period(); | ||
705 | /* | 860 | /* |
706 | * Watchdog threads shouldn't be enabled if they are | 861 | * Update the sample period. |
707 | * disabled. The 'watchdog_running' variable check in | 862 | * Restore 'watchdog_thresh' on failure. |
708 | * watchdog_*_all_cpus() function takes care of this. | ||
709 | */ | 863 | */ |
710 | if (watchdog_user_enabled && watchdog_thresh) { | 864 | set_sample_period(); |
711 | /* | 865 | err = proc_watchdog_update(); |
712 | * Prevent a change in watchdog_thresh accidentally overriding | 866 | if (err) |
713 | * the enablement of the hardlockup detector. | 867 | watchdog_thresh = old; |
714 | */ | ||
715 | if (watchdog_user_enabled != old_enabled) | ||
716 | watchdog_enable_hardlockup_detector(true); | ||
717 | err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); | ||
718 | } else | ||
719 | watchdog_disable_all_cpus(); | ||
720 | |||
721 | /* Restore old values on failure */ | ||
722 | if (err) { | ||
723 | watchdog_thresh = old_thresh; | ||
724 | watchdog_user_enabled = old_enabled; | ||
725 | watchdog_enable_hardlockup_detector(old_hardlockup); | ||
726 | } | ||
727 | out: | 868 | out: |
728 | mutex_unlock(&watchdog_proc_mutex); | 869 | mutex_unlock(&watchdog_proc_mutex); |
729 | return err; | 870 | return err; |
@@ -734,6 +875,6 @@ void __init lockup_detector_init(void) | |||
734 | { | 875 | { |
735 | set_sample_period(); | 876 | set_sample_period(); |
736 | 877 | ||
737 | if (watchdog_user_enabled) | 878 | if (watchdog_enabled) |
738 | watchdog_enable_all_cpus(false); | 879 | watchdog_enable_all_cpus(); |
739 | } | 880 | } |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 93967e634a1e..17670573dda8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1760,6 +1760,18 @@ config TEST_UDELAY | |||
1760 | 1760 | ||
1761 | If unsure, say N. | 1761 | If unsure, say N. |
1762 | 1762 | ||
1763 | config MEMTEST | ||
1764 | bool "Memtest" | ||
1765 | depends on HAVE_MEMBLOCK | ||
1766 | ---help--- | ||
1767 | This option adds a kernel parameter 'memtest', which allows memtest | ||
1768 | to be set. | ||
1769 | memtest=0, mean disabled; -- default | ||
1770 | memtest=1, mean do 1 test pattern; | ||
1771 | ... | ||
1772 | memtest=17, mean do 17 test patterns. | ||
1773 | If you are unsure how to answer this question, answer N. | ||
1774 | |||
1763 | source "samples/Kconfig" | 1775 | source "samples/Kconfig" |
1764 | 1776 | ||
1765 | source "lib/Kconfig.kgdb" | 1777 | source "lib/Kconfig.kgdb" |
diff --git a/lib/ioremap.c b/lib/ioremap.c index 0c9216c48762..86c8911b0e3a 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c | |||
@@ -13,6 +13,43 @@ | |||
13 | #include <asm/cacheflush.h> | 13 | #include <asm/cacheflush.h> |
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | 15 | ||
16 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
17 | static int __read_mostly ioremap_pud_capable; | ||
18 | static int __read_mostly ioremap_pmd_capable; | ||
19 | static int __read_mostly ioremap_huge_disabled; | ||
20 | |||
21 | static int __init set_nohugeiomap(char *str) | ||
22 | { | ||
23 | ioremap_huge_disabled = 1; | ||
24 | return 0; | ||
25 | } | ||
26 | early_param("nohugeiomap", set_nohugeiomap); | ||
27 | |||
28 | void __init ioremap_huge_init(void) | ||
29 | { | ||
30 | if (!ioremap_huge_disabled) { | ||
31 | if (arch_ioremap_pud_supported()) | ||
32 | ioremap_pud_capable = 1; | ||
33 | if (arch_ioremap_pmd_supported()) | ||
34 | ioremap_pmd_capable = 1; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | static inline int ioremap_pud_enabled(void) | ||
39 | { | ||
40 | return ioremap_pud_capable; | ||
41 | } | ||
42 | |||
43 | static inline int ioremap_pmd_enabled(void) | ||
44 | { | ||
45 | return ioremap_pmd_capable; | ||
46 | } | ||
47 | |||
48 | #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
49 | static inline int ioremap_pud_enabled(void) { return 0; } | ||
50 | static inline int ioremap_pmd_enabled(void) { return 0; } | ||
51 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
52 | |||
16 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, | 53 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, |
17 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) | 54 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
18 | { | 55 | { |
@@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, | |||
43 | return -ENOMEM; | 80 | return -ENOMEM; |
44 | do { | 81 | do { |
45 | next = pmd_addr_end(addr, end); | 82 | next = pmd_addr_end(addr, end); |
83 | |||
84 | if (ioremap_pmd_enabled() && | ||
85 | ((next - addr) == PMD_SIZE) && | ||
86 | IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { | ||
87 | if (pmd_set_huge(pmd, phys_addr + addr, prot)) | ||
88 | continue; | ||
89 | } | ||
90 | |||
46 | if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) | 91 | if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) |
47 | return -ENOMEM; | 92 | return -ENOMEM; |
48 | } while (pmd++, addr = next, addr != end); | 93 | } while (pmd++, addr = next, addr != end); |
@@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, | |||
61 | return -ENOMEM; | 106 | return -ENOMEM; |
62 | do { | 107 | do { |
63 | next = pud_addr_end(addr, end); | 108 | next = pud_addr_end(addr, end); |
109 | |||
110 | if (ioremap_pud_enabled() && | ||
111 | ((next - addr) == PUD_SIZE) && | ||
112 | IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { | ||
113 | if (pud_set_huge(pud, phys_addr + addr, prot)) | ||
114 | continue; | ||
115 | } | ||
116 | |||
64 | if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) | 117 | if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) |
65 | return -ENOMEM; | 118 | return -ENOMEM; |
66 | } while (pud++, addr = next, addr != end); | 119 | } while (pud++, addr = next, addr != end); |
diff --git a/mm/Kconfig b/mm/Kconfig index a03131b6ba8e..390214da4546 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -517,6 +517,12 @@ config CMA_DEBUG | |||
517 | processing calls such as dma_alloc_from_contiguous(). | 517 | processing calls such as dma_alloc_from_contiguous(). |
518 | This option does not affect warning and error messages. | 518 | This option does not affect warning and error messages. |
519 | 519 | ||
520 | config CMA_DEBUGFS | ||
521 | bool "CMA debugfs interface" | ||
522 | depends on CMA && DEBUG_FS | ||
523 | help | ||
524 | Turns on the DebugFS interface for CMA. | ||
525 | |||
520 | config CMA_AREAS | 526 | config CMA_AREAS |
521 | int "Maximum count of the CMA areas" | 527 | int "Maximum count of the CMA areas" |
522 | depends on CMA | 528 | depends on CMA |
diff --git a/mm/Makefile b/mm/Makefile index 15dbe9903c27..98c4eaeabdcb 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -55,6 +55,7 @@ obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | |||
55 | obj-$(CONFIG_KASAN) += kasan/ | 55 | obj-$(CONFIG_KASAN) += kasan/ |
56 | obj-$(CONFIG_FAILSLAB) += failslab.o | 56 | obj-$(CONFIG_FAILSLAB) += failslab.o |
57 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 57 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
58 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
58 | obj-$(CONFIG_MIGRATION) += migrate.o | 59 | obj-$(CONFIG_MIGRATION) += migrate.o |
59 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 60 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
60 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o | 61 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o |
@@ -76,3 +77,4 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o | |||
76 | obj-$(CONFIG_CMA) += cma.o | 77 | obj-$(CONFIG_CMA) += cma.o |
77 | obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o | 78 | obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o |
78 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o | 79 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o |
80 | obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c index 053bcd8f12fb..8fc50811119b 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/cleancache.h> | 19 | #include <linux/cleancache.h> |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | 22 | * cleancache_ops is set by cleancache_register_ops to contain the pointers |
23 | * to the cleancache "backend" implementation functions. | 23 | * to the cleancache "backend" implementation functions. |
24 | */ | 24 | */ |
25 | static struct cleancache_ops *cleancache_ops __read_mostly; | 25 | static struct cleancache_ops *cleancache_ops __read_mostly; |
@@ -34,145 +34,107 @@ static u64 cleancache_failed_gets; | |||
34 | static u64 cleancache_puts; | 34 | static u64 cleancache_puts; |
35 | static u64 cleancache_invalidates; | 35 | static u64 cleancache_invalidates; |
36 | 36 | ||
37 | /* | 37 | static void cleancache_register_ops_sb(struct super_block *sb, void *unused) |
38 | * When no backend is registered all calls to init_fs and init_shared_fs | 38 | { |
39 | * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or | 39 | switch (sb->cleancache_poolid) { |
40 | * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array | 40 | case CLEANCACHE_NO_BACKEND: |
41 | * [shared_|]fs_poolid_map) are given to the respective super block | 41 | __cleancache_init_fs(sb); |
42 | * (sb->cleancache_poolid) and no tmem_pools are created. When a backend | 42 | break; |
43 | * registers with cleancache the previous calls to init_fs and init_shared_fs | 43 | case CLEANCACHE_NO_BACKEND_SHARED: |
44 | * are executed to create tmem_pools and set the respective poolids. While no | 44 | __cleancache_init_shared_fs(sb); |
45 | * backend is registered all "puts", "gets" and "flushes" are ignored or failed. | 45 | break; |
46 | */ | 46 | } |
47 | #define MAX_INITIALIZABLE_FS 32 | 47 | } |
48 | #define FAKE_FS_POOLID_OFFSET 1000 | ||
49 | #define FAKE_SHARED_FS_POOLID_OFFSET 2000 | ||
50 | |||
51 | #define FS_NO_BACKEND (-1) | ||
52 | #define FS_UNKNOWN (-2) | ||
53 | static int fs_poolid_map[MAX_INITIALIZABLE_FS]; | ||
54 | static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS]; | ||
55 | static char *uuids[MAX_INITIALIZABLE_FS]; | ||
56 | /* | ||
57 | * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads | ||
58 | * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple | ||
59 | * threads calling mount (and ending up in __cleancache_init_[shared|]fs). | ||
60 | */ | ||
61 | static DEFINE_MUTEX(poolid_mutex); | ||
62 | /* | ||
63 | * When set to false (default) all calls to the cleancache functions, except | ||
64 | * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded | ||
65 | * by the if (!cleancache_ops) return. This means multiple threads (from | ||
66 | * different filesystems) will be checking cleancache_ops. The usage of a | ||
67 | * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are | ||
68 | * OK if the time between the backend's have been initialized (and | ||
69 | * cleancache_ops has been set to not NULL) and when the filesystems start | ||
70 | * actually calling the backends. The inverse (when unloading) is obviously | ||
71 | * not good - but this shim does not do that (yet). | ||
72 | */ | ||
73 | |||
74 | /* | ||
75 | * The backends and filesystems work all asynchronously. This is b/c the | ||
76 | * backends can be built as modules. | ||
77 | * The usual sequence of events is: | ||
78 | * a) mount / -> __cleancache_init_fs is called. We set the | ||
79 | * [shared_|]fs_poolid_map and uuids for. | ||
80 | * | ||
81 | * b). user does I/Os -> we call the rest of __cleancache_* functions | ||
82 | * which return immediately as cleancache_ops is false. | ||
83 | * | ||
84 | * c). modprobe zcache -> cleancache_register_ops. We init the backend | ||
85 | * and set cleancache_ops to true, and for any fs_poolid_map | ||
86 | * (which is set by __cleancache_init_fs) we initialize the poolid. | ||
87 | * | ||
88 | * d). user does I/Os -> now that cleancache_ops is true all the | ||
89 | * __cleancache_* functions can call the backend. They all check | ||
90 | * that fs_poolid_map is valid and if so invoke the backend. | ||
91 | * | ||
92 | * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is | ||
93 | * reset (which is the second check in the __cleancache_* ops | ||
94 | * to call the backend). | ||
95 | * | ||
96 | * The sequence of event could also be c), followed by a), and d). and e). The | ||
97 | * c) would not happen anymore. There is also the chance of c), and one thread | ||
98 | * doing a) + d), and another doing e). For that case we depend on the | ||
99 | * filesystem calling __cleancache_invalidate_fs in the proper sequence (so | ||
100 | * that it handles all I/Os before it invalidates the fs (which is last part | ||
101 | * of unmounting process). | ||
102 | * | ||
103 | * Note: The acute reader will notice that there is no "rmmod zcache" case. | ||
104 | * This is b/c the functionality for that is not yet implemented and when | ||
105 | * done, will require some extra locking not yet devised. | ||
106 | */ | ||
107 | 48 | ||
108 | /* | 49 | /* |
109 | * Register operations for cleancache, returning previous thus allowing | 50 | * Register operations for cleancache. Returns 0 on success. |
110 | * detection of multiple backends and possible nesting. | ||
111 | */ | 51 | */ |
112 | struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) | 52 | int cleancache_register_ops(struct cleancache_ops *ops) |
113 | { | 53 | { |
114 | struct cleancache_ops *old = cleancache_ops; | 54 | if (cmpxchg(&cleancache_ops, NULL, ops)) |
115 | int i; | 55 | return -EBUSY; |
116 | 56 | ||
117 | mutex_lock(&poolid_mutex); | ||
118 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | ||
119 | if (fs_poolid_map[i] == FS_NO_BACKEND) | ||
120 | fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); | ||
121 | if (shared_fs_poolid_map[i] == FS_NO_BACKEND) | ||
122 | shared_fs_poolid_map[i] = ops->init_shared_fs | ||
123 | (uuids[i], PAGE_SIZE); | ||
124 | } | ||
125 | /* | 57 | /* |
126 | * We MUST set cleancache_ops _after_ we have called the backends | 58 | * A cleancache backend can be built as a module and hence loaded after |
127 | * init_fs or init_shared_fs functions. Otherwise the compiler might | 59 | * a cleancache enabled filesystem has called cleancache_init_fs. To |
128 | * re-order where cleancache_ops is set in this function. | 60 | * handle such a scenario, here we call ->init_fs or ->init_shared_fs |
61 | * for each active super block. To differentiate between local and | ||
62 | * shared filesystems, we temporarily initialize sb->cleancache_poolid | ||
63 | * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED | ||
64 | * respectively in case there is no backend registered at the time | ||
65 | * cleancache_init_fs or cleancache_init_shared_fs is called. | ||
66 | * | ||
67 | * Since filesystems can be mounted concurrently with cleancache | ||
68 | * backend registration, we have to be careful to guarantee that all | ||
69 | * cleancache enabled filesystems that has been mounted by the time | ||
70 | * cleancache_register_ops is called has got and all mounted later will | ||
71 | * get cleancache_poolid. This is assured by the following statements | ||
72 | * tied together: | ||
73 | * | ||
74 | * a) iterate_supers skips only those super blocks that has started | ||
75 | * ->kill_sb | ||
76 | * | ||
77 | * b) if iterate_supers encounters a super block that has not finished | ||
78 | * ->mount yet, it waits until it is finished | ||
79 | * | ||
80 | * c) cleancache_init_fs is called from ->mount and | ||
81 | * cleancache_invalidate_fs is called from ->kill_sb | ||
82 | * | ||
83 | * d) we call iterate_supers after cleancache_ops has been set | ||
84 | * | ||
85 | * From a) it follows that if iterate_supers skips a super block, then | ||
86 | * either the super block is already dead, in which case we do not need | ||
87 | * to bother initializing cleancache for it, or it was mounted after we | ||
88 | * initiated iterate_supers. In the latter case, it must have seen | ||
89 | * cleancache_ops set according to d) and initialized cleancache from | ||
90 | * ->mount by itself according to c). This proves that we call | ||
91 | * ->init_fs at least once for each active super block. | ||
92 | * | ||
93 | * From b) and c) it follows that if iterate_supers encounters a super | ||
94 | * block that has already started ->init_fs, it will wait until ->mount | ||
95 | * and hence ->init_fs has finished, then check cleancache_poolid, see | ||
96 | * that it has already been set and therefore do nothing. This proves | ||
97 | * that we call ->init_fs no more than once for each super block. | ||
98 | * | ||
99 | * Combined together, the last two paragraphs prove the function | ||
100 | * correctness. | ||
101 | * | ||
102 | * Note that various cleancache callbacks may proceed before this | ||
103 | * function is called or even concurrently with it, but since | ||
104 | * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop | ||
105 | * until the corresponding ->init_fs has been actually called and | ||
106 | * cleancache_ops has been set. | ||
129 | */ | 107 | */ |
130 | barrier(); | 108 | iterate_supers(cleancache_register_ops_sb, NULL); |
131 | cleancache_ops = ops; | 109 | return 0; |
132 | mutex_unlock(&poolid_mutex); | ||
133 | return old; | ||
134 | } | 110 | } |
135 | EXPORT_SYMBOL(cleancache_register_ops); | 111 | EXPORT_SYMBOL(cleancache_register_ops); |
136 | 112 | ||
137 | /* Called by a cleancache-enabled filesystem at time of mount */ | 113 | /* Called by a cleancache-enabled filesystem at time of mount */ |
138 | void __cleancache_init_fs(struct super_block *sb) | 114 | void __cleancache_init_fs(struct super_block *sb) |
139 | { | 115 | { |
140 | int i; | 116 | int pool_id = CLEANCACHE_NO_BACKEND; |
141 | 117 | ||
142 | mutex_lock(&poolid_mutex); | 118 | if (cleancache_ops) { |
143 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | 119 | pool_id = cleancache_ops->init_fs(PAGE_SIZE); |
144 | if (fs_poolid_map[i] == FS_UNKNOWN) { | 120 | if (pool_id < 0) |
145 | sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; | 121 | pool_id = CLEANCACHE_NO_POOL; |
146 | if (cleancache_ops) | ||
147 | fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); | ||
148 | else | ||
149 | fs_poolid_map[i] = FS_NO_BACKEND; | ||
150 | break; | ||
151 | } | ||
152 | } | 122 | } |
153 | mutex_unlock(&poolid_mutex); | 123 | sb->cleancache_poolid = pool_id; |
154 | } | 124 | } |
155 | EXPORT_SYMBOL(__cleancache_init_fs); | 125 | EXPORT_SYMBOL(__cleancache_init_fs); |
156 | 126 | ||
157 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | 127 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ |
158 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | 128 | void __cleancache_init_shared_fs(struct super_block *sb) |
159 | { | 129 | { |
160 | int i; | 130 | int pool_id = CLEANCACHE_NO_BACKEND_SHARED; |
161 | 131 | ||
162 | mutex_lock(&poolid_mutex); | 132 | if (cleancache_ops) { |
163 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | 133 | pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE); |
164 | if (shared_fs_poolid_map[i] == FS_UNKNOWN) { | 134 | if (pool_id < 0) |
165 | sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; | 135 | pool_id = CLEANCACHE_NO_POOL; |
166 | uuids[i] = uuid; | ||
167 | if (cleancache_ops) | ||
168 | shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs | ||
169 | (uuid, PAGE_SIZE); | ||
170 | else | ||
171 | shared_fs_poolid_map[i] = FS_NO_BACKEND; | ||
172 | break; | ||
173 | } | ||
174 | } | 136 | } |
175 | mutex_unlock(&poolid_mutex); | 137 | sb->cleancache_poolid = pool_id; |
176 | } | 138 | } |
177 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | 139 | EXPORT_SYMBOL(__cleancache_init_shared_fs); |
178 | 140 | ||
@@ -202,19 +164,6 @@ static int cleancache_get_key(struct inode *inode, | |||
202 | } | 164 | } |
203 | 165 | ||
204 | /* | 166 | /* |
205 | * Returns a pool_id that is associated with a given fake poolid. | ||
206 | */ | ||
207 | static int get_poolid_from_fake(int fake_pool_id) | ||
208 | { | ||
209 | if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) | ||
210 | return shared_fs_poolid_map[fake_pool_id - | ||
211 | FAKE_SHARED_FS_POOLID_OFFSET]; | ||
212 | else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) | ||
213 | return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET]; | ||
214 | return FS_NO_BACKEND; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * "Get" data from cleancache associated with the poolid/inode/index | 167 | * "Get" data from cleancache associated with the poolid/inode/index |
219 | * that were specified when the data was put to cleanache and, if | 168 | * that were specified when the data was put to cleanache and, if |
220 | * successful, use it to fill the specified page with data and return 0. | 169 | * successful, use it to fill the specified page with data and return 0. |
@@ -229,7 +178,6 @@ int __cleancache_get_page(struct page *page) | |||
229 | { | 178 | { |
230 | int ret = -1; | 179 | int ret = -1; |
231 | int pool_id; | 180 | int pool_id; |
232 | int fake_pool_id; | ||
233 | struct cleancache_filekey key = { .u.key = { 0 } }; | 181 | struct cleancache_filekey key = { .u.key = { 0 } }; |
234 | 182 | ||
235 | if (!cleancache_ops) { | 183 | if (!cleancache_ops) { |
@@ -238,17 +186,14 @@ int __cleancache_get_page(struct page *page) | |||
238 | } | 186 | } |
239 | 187 | ||
240 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 188 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
241 | fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; | 189 | pool_id = page->mapping->host->i_sb->cleancache_poolid; |
242 | if (fake_pool_id < 0) | 190 | if (pool_id < 0) |
243 | goto out; | 191 | goto out; |
244 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
245 | 192 | ||
246 | if (cleancache_get_key(page->mapping->host, &key) < 0) | 193 | if (cleancache_get_key(page->mapping->host, &key) < 0) |
247 | goto out; | 194 | goto out; |
248 | 195 | ||
249 | if (pool_id >= 0) | 196 | ret = cleancache_ops->get_page(pool_id, key, page->index, page); |
250 | ret = cleancache_ops->get_page(pool_id, | ||
251 | key, page->index, page); | ||
252 | if (ret == 0) | 197 | if (ret == 0) |
253 | cleancache_succ_gets++; | 198 | cleancache_succ_gets++; |
254 | else | 199 | else |
@@ -271,7 +216,6 @@ EXPORT_SYMBOL(__cleancache_get_page); | |||
271 | void __cleancache_put_page(struct page *page) | 216 | void __cleancache_put_page(struct page *page) |
272 | { | 217 | { |
273 | int pool_id; | 218 | int pool_id; |
274 | int fake_pool_id; | ||
275 | struct cleancache_filekey key = { .u.key = { 0 } }; | 219 | struct cleancache_filekey key = { .u.key = { 0 } }; |
276 | 220 | ||
277 | if (!cleancache_ops) { | 221 | if (!cleancache_ops) { |
@@ -280,12 +224,7 @@ void __cleancache_put_page(struct page *page) | |||
280 | } | 224 | } |
281 | 225 | ||
282 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 226 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
283 | fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; | 227 | pool_id = page->mapping->host->i_sb->cleancache_poolid; |
284 | if (fake_pool_id < 0) | ||
285 | return; | ||
286 | |||
287 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
288 | |||
289 | if (pool_id >= 0 && | 228 | if (pool_id >= 0 && |
290 | cleancache_get_key(page->mapping->host, &key) >= 0) { | 229 | cleancache_get_key(page->mapping->host, &key) >= 0) { |
291 | cleancache_ops->put_page(pool_id, key, page->index, page); | 230 | cleancache_ops->put_page(pool_id, key, page->index, page); |
@@ -306,18 +245,13 @@ void __cleancache_invalidate_page(struct address_space *mapping, | |||
306 | struct page *page) | 245 | struct page *page) |
307 | { | 246 | { |
308 | /* careful... page->mapping is NULL sometimes when this is called */ | 247 | /* careful... page->mapping is NULL sometimes when this is called */ |
309 | int pool_id; | 248 | int pool_id = mapping->host->i_sb->cleancache_poolid; |
310 | int fake_pool_id = mapping->host->i_sb->cleancache_poolid; | ||
311 | struct cleancache_filekey key = { .u.key = { 0 } }; | 249 | struct cleancache_filekey key = { .u.key = { 0 } }; |
312 | 250 | ||
313 | if (!cleancache_ops) | 251 | if (!cleancache_ops) |
314 | return; | 252 | return; |
315 | 253 | ||
316 | if (fake_pool_id >= 0) { | 254 | if (pool_id >= 0) { |
317 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
318 | if (pool_id < 0) | ||
319 | return; | ||
320 | |||
321 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 255 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
322 | if (cleancache_get_key(mapping->host, &key) >= 0) { | 256 | if (cleancache_get_key(mapping->host, &key) >= 0) { |
323 | cleancache_ops->invalidate_page(pool_id, | 257 | cleancache_ops->invalidate_page(pool_id, |
@@ -339,18 +273,12 @@ EXPORT_SYMBOL(__cleancache_invalidate_page); | |||
339 | */ | 273 | */ |
340 | void __cleancache_invalidate_inode(struct address_space *mapping) | 274 | void __cleancache_invalidate_inode(struct address_space *mapping) |
341 | { | 275 | { |
342 | int pool_id; | 276 | int pool_id = mapping->host->i_sb->cleancache_poolid; |
343 | int fake_pool_id = mapping->host->i_sb->cleancache_poolid; | ||
344 | struct cleancache_filekey key = { .u.key = { 0 } }; | 277 | struct cleancache_filekey key = { .u.key = { 0 } }; |
345 | 278 | ||
346 | if (!cleancache_ops) | 279 | if (!cleancache_ops) |
347 | return; | 280 | return; |
348 | 281 | ||
349 | if (fake_pool_id < 0) | ||
350 | return; | ||
351 | |||
352 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
353 | |||
354 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | 282 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) |
355 | cleancache_ops->invalidate_inode(pool_id, key); | 283 | cleancache_ops->invalidate_inode(pool_id, key); |
356 | } | 284 | } |
@@ -363,32 +291,18 @@ EXPORT_SYMBOL(__cleancache_invalidate_inode); | |||
363 | */ | 291 | */ |
364 | void __cleancache_invalidate_fs(struct super_block *sb) | 292 | void __cleancache_invalidate_fs(struct super_block *sb) |
365 | { | 293 | { |
366 | int index; | 294 | int pool_id; |
367 | int fake_pool_id = sb->cleancache_poolid; | ||
368 | int old_poolid = fake_pool_id; | ||
369 | 295 | ||
370 | mutex_lock(&poolid_mutex); | 296 | pool_id = sb->cleancache_poolid; |
371 | if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { | 297 | sb->cleancache_poolid = CLEANCACHE_NO_POOL; |
372 | index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; | 298 | |
373 | old_poolid = shared_fs_poolid_map[index]; | 299 | if (cleancache_ops && pool_id >= 0) |
374 | shared_fs_poolid_map[index] = FS_UNKNOWN; | 300 | cleancache_ops->invalidate_fs(pool_id); |
375 | uuids[index] = NULL; | ||
376 | } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { | ||
377 | index = fake_pool_id - FAKE_FS_POOLID_OFFSET; | ||
378 | old_poolid = fs_poolid_map[index]; | ||
379 | fs_poolid_map[index] = FS_UNKNOWN; | ||
380 | } | ||
381 | sb->cleancache_poolid = -1; | ||
382 | if (cleancache_ops) | ||
383 | cleancache_ops->invalidate_fs(old_poolid); | ||
384 | mutex_unlock(&poolid_mutex); | ||
385 | } | 301 | } |
386 | EXPORT_SYMBOL(__cleancache_invalidate_fs); | 302 | EXPORT_SYMBOL(__cleancache_invalidate_fs); |
387 | 303 | ||
388 | static int __init init_cleancache(void) | 304 | static int __init init_cleancache(void) |
389 | { | 305 | { |
390 | int i; | ||
391 | |||
392 | #ifdef CONFIG_DEBUG_FS | 306 | #ifdef CONFIG_DEBUG_FS |
393 | struct dentry *root = debugfs_create_dir("cleancache", NULL); | 307 | struct dentry *root = debugfs_create_dir("cleancache", NULL); |
394 | if (root == NULL) | 308 | if (root == NULL) |
@@ -400,10 +314,6 @@ static int __init init_cleancache(void) | |||
400 | debugfs_create_u64("invalidates", S_IRUGO, | 314 | debugfs_create_u64("invalidates", S_IRUGO, |
401 | root, &cleancache_invalidates); | 315 | root, &cleancache_invalidates); |
402 | #endif | 316 | #endif |
403 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | ||
404 | fs_poolid_map[i] = FS_UNKNOWN; | ||
405 | shared_fs_poolid_map[i] = FS_UNKNOWN; | ||
406 | } | ||
407 | return 0; | 317 | return 0; |
408 | } | 318 | } |
409 | module_init(init_cleancache) | 319 | module_init(init_cleancache) |
@@ -35,29 +35,24 @@ | |||
35 | #include <linux/highmem.h> | 35 | #include <linux/highmem.h> |
36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
37 | 37 | ||
38 | struct cma { | 38 | #include "cma.h" |
39 | unsigned long base_pfn; | 39 | |
40 | unsigned long count; | 40 | struct cma cma_areas[MAX_CMA_AREAS]; |
41 | unsigned long *bitmap; | 41 | unsigned cma_area_count; |
42 | unsigned int order_per_bit; /* Order of pages represented by one bit */ | ||
43 | struct mutex lock; | ||
44 | }; | ||
45 | |||
46 | static struct cma cma_areas[MAX_CMA_AREAS]; | ||
47 | static unsigned cma_area_count; | ||
48 | static DEFINE_MUTEX(cma_mutex); | 42 | static DEFINE_MUTEX(cma_mutex); |
49 | 43 | ||
50 | phys_addr_t cma_get_base(struct cma *cma) | 44 | phys_addr_t cma_get_base(const struct cma *cma) |
51 | { | 45 | { |
52 | return PFN_PHYS(cma->base_pfn); | 46 | return PFN_PHYS(cma->base_pfn); |
53 | } | 47 | } |
54 | 48 | ||
55 | unsigned long cma_get_size(struct cma *cma) | 49 | unsigned long cma_get_size(const struct cma *cma) |
56 | { | 50 | { |
57 | return cma->count << PAGE_SHIFT; | 51 | return cma->count << PAGE_SHIFT; |
58 | } | 52 | } |
59 | 53 | ||
60 | static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) | 54 | static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, |
55 | int align_order) | ||
61 | { | 56 | { |
62 | if (align_order <= cma->order_per_bit) | 57 | if (align_order <= cma->order_per_bit) |
63 | return 0; | 58 | return 0; |
@@ -68,7 +63,8 @@ static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) | |||
68 | * Find a PFN aligned to the specified order and return an offset represented in | 63 | * Find a PFN aligned to the specified order and return an offset represented in |
69 | * order_per_bits. | 64 | * order_per_bits. |
70 | */ | 65 | */ |
71 | static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order) | 66 | static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, |
67 | int align_order) | ||
72 | { | 68 | { |
73 | if (align_order <= cma->order_per_bit) | 69 | if (align_order <= cma->order_per_bit) |
74 | return 0; | 70 | return 0; |
@@ -77,18 +73,14 @@ static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order) | |||
77 | - cma->base_pfn) >> cma->order_per_bit; | 73 | - cma->base_pfn) >> cma->order_per_bit; |
78 | } | 74 | } |
79 | 75 | ||
80 | static unsigned long cma_bitmap_maxno(struct cma *cma) | 76 | static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, |
81 | { | 77 | unsigned long pages) |
82 | return cma->count >> cma->order_per_bit; | ||
83 | } | ||
84 | |||
85 | static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, | ||
86 | unsigned long pages) | ||
87 | { | 78 | { |
88 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; | 79 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; |
89 | } | 80 | } |
90 | 81 | ||
91 | static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) | 82 | static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, |
83 | unsigned int count) | ||
92 | { | 84 | { |
93 | unsigned long bitmap_no, bitmap_count; | 85 | unsigned long bitmap_no, bitmap_count; |
94 | 86 | ||
@@ -134,6 +126,12 @@ static int __init cma_activate_area(struct cma *cma) | |||
134 | } while (--i); | 126 | } while (--i); |
135 | 127 | ||
136 | mutex_init(&cma->lock); | 128 | mutex_init(&cma->lock); |
129 | |||
130 | #ifdef CONFIG_CMA_DEBUGFS | ||
131 | INIT_HLIST_HEAD(&cma->mem_head); | ||
132 | spin_lock_init(&cma->mem_head_lock); | ||
133 | #endif | ||
134 | |||
137 | return 0; | 135 | return 0; |
138 | 136 | ||
139 | err: | 137 | err: |
@@ -167,7 +165,8 @@ core_initcall(cma_init_reserved_areas); | |||
167 | * This function creates custom contiguous area from already reserved memory. | 165 | * This function creates custom contiguous area from already reserved memory. |
168 | */ | 166 | */ |
169 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, | 167 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, |
170 | int order_per_bit, struct cma **res_cma) | 168 | unsigned int order_per_bit, |
169 | struct cma **res_cma) | ||
171 | { | 170 | { |
172 | struct cma *cma; | 171 | struct cma *cma; |
173 | phys_addr_t alignment; | 172 | phys_addr_t alignment; |
@@ -358,7 +357,7 @@ err: | |||
358 | * This function allocates part of contiguous memory on specific | 357 | * This function allocates part of contiguous memory on specific |
359 | * contiguous memory area. | 358 | * contiguous memory area. |
360 | */ | 359 | */ |
361 | struct page *cma_alloc(struct cma *cma, int count, unsigned int align) | 360 | struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align) |
362 | { | 361 | { |
363 | unsigned long mask, offset, pfn, start = 0; | 362 | unsigned long mask, offset, pfn, start = 0; |
364 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; | 363 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; |
@@ -429,7 +428,7 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) | |||
429 | * It returns false when provided pages do not belong to contiguous area and | 428 | * It returns false when provided pages do not belong to contiguous area and |
430 | * true otherwise. | 429 | * true otherwise. |
431 | */ | 430 | */ |
432 | bool cma_release(struct cma *cma, struct page *pages, int count) | 431 | bool cma_release(struct cma *cma, const struct page *pages, unsigned int count) |
433 | { | 432 | { |
434 | unsigned long pfn; | 433 | unsigned long pfn; |
435 | 434 | ||
diff --git a/mm/cma.h b/mm/cma.h new file mode 100644 index 000000000000..1132d733556d --- /dev/null +++ b/mm/cma.h | |||
@@ -0,0 +1,24 @@ | |||
1 | #ifndef __MM_CMA_H__ | ||
2 | #define __MM_CMA_H__ | ||
3 | |||
4 | struct cma { | ||
5 | unsigned long base_pfn; | ||
6 | unsigned long count; | ||
7 | unsigned long *bitmap; | ||
8 | unsigned int order_per_bit; /* Order of pages represented by one bit */ | ||
9 | struct mutex lock; | ||
10 | #ifdef CONFIG_CMA_DEBUGFS | ||
11 | struct hlist_head mem_head; | ||
12 | spinlock_t mem_head_lock; | ||
13 | #endif | ||
14 | }; | ||
15 | |||
16 | extern struct cma cma_areas[MAX_CMA_AREAS]; | ||
17 | extern unsigned cma_area_count; | ||
18 | |||
19 | static unsigned long cma_bitmap_maxno(struct cma *cma) | ||
20 | { | ||
21 | return cma->count >> cma->order_per_bit; | ||
22 | } | ||
23 | |||
24 | #endif | ||
diff --git a/mm/cma_debug.c b/mm/cma_debug.c new file mode 100644 index 000000000000..0b377536ccde --- /dev/null +++ b/mm/cma_debug.c | |||
@@ -0,0 +1,170 @@ | |||
1 | /* | ||
2 | * CMA DebugFS Interface | ||
3 | * | ||
4 | * Copyright (c) 2015 Sasha Levin <sasha.levin@oracle.com> | ||
5 | */ | ||
6 | |||
7 | |||
8 | #include <linux/debugfs.h> | ||
9 | #include <linux/cma.h> | ||
10 | #include <linux/list.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/mm_types.h> | ||
14 | |||
15 | #include "cma.h" | ||
16 | |||
17 | struct cma_mem { | ||
18 | struct hlist_node node; | ||
19 | struct page *p; | ||
20 | unsigned long n; | ||
21 | }; | ||
22 | |||
23 | static struct dentry *cma_debugfs_root; | ||
24 | |||
25 | static int cma_debugfs_get(void *data, u64 *val) | ||
26 | { | ||
27 | unsigned long *p = data; | ||
28 | |||
29 | *val = *p; | ||
30 | |||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n"); | ||
35 | |||
36 | static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem) | ||
37 | { | ||
38 | spin_lock(&cma->mem_head_lock); | ||
39 | hlist_add_head(&mem->node, &cma->mem_head); | ||
40 | spin_unlock(&cma->mem_head_lock); | ||
41 | } | ||
42 | |||
43 | static struct cma_mem *cma_get_entry_from_list(struct cma *cma) | ||
44 | { | ||
45 | struct cma_mem *mem = NULL; | ||
46 | |||
47 | spin_lock(&cma->mem_head_lock); | ||
48 | if (!hlist_empty(&cma->mem_head)) { | ||
49 | mem = hlist_entry(cma->mem_head.first, struct cma_mem, node); | ||
50 | hlist_del_init(&mem->node); | ||
51 | } | ||
52 | spin_unlock(&cma->mem_head_lock); | ||
53 | |||
54 | return mem; | ||
55 | } | ||
56 | |||
57 | static int cma_free_mem(struct cma *cma, int count) | ||
58 | { | ||
59 | struct cma_mem *mem = NULL; | ||
60 | |||
61 | while (count) { | ||
62 | mem = cma_get_entry_from_list(cma); | ||
63 | if (mem == NULL) | ||
64 | return 0; | ||
65 | |||
66 | if (mem->n <= count) { | ||
67 | cma_release(cma, mem->p, mem->n); | ||
68 | count -= mem->n; | ||
69 | kfree(mem); | ||
70 | } else if (cma->order_per_bit == 0) { | ||
71 | cma_release(cma, mem->p, count); | ||
72 | mem->p += count; | ||
73 | mem->n -= count; | ||
74 | count = 0; | ||
75 | cma_add_to_cma_mem_list(cma, mem); | ||
76 | } else { | ||
77 | pr_debug("cma: cannot release partial block when order_per_bit != 0\n"); | ||
78 | cma_add_to_cma_mem_list(cma, mem); | ||
79 | break; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | return 0; | ||
84 | |||
85 | } | ||
86 | |||
87 | static int cma_free_write(void *data, u64 val) | ||
88 | { | ||
89 | int pages = val; | ||
90 | struct cma *cma = data; | ||
91 | |||
92 | return cma_free_mem(cma, pages); | ||
93 | } | ||
94 | |||
95 | DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n"); | ||
96 | |||
97 | static int cma_alloc_mem(struct cma *cma, int count) | ||
98 | { | ||
99 | struct cma_mem *mem; | ||
100 | struct page *p; | ||
101 | |||
102 | mem = kzalloc(sizeof(*mem), GFP_KERNEL); | ||
103 | if (!mem) | ||
104 | return -ENOMEM; | ||
105 | |||
106 | p = cma_alloc(cma, count, 0); | ||
107 | if (!p) { | ||
108 | kfree(mem); | ||
109 | return -ENOMEM; | ||
110 | } | ||
111 | |||
112 | mem->p = p; | ||
113 | mem->n = count; | ||
114 | |||
115 | cma_add_to_cma_mem_list(cma, mem); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int cma_alloc_write(void *data, u64 val) | ||
121 | { | ||
122 | int pages = val; | ||
123 | struct cma *cma = data; | ||
124 | |||
125 | return cma_alloc_mem(cma, pages); | ||
126 | } | ||
127 | |||
128 | DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n"); | ||
129 | |||
130 | static void cma_debugfs_add_one(struct cma *cma, int idx) | ||
131 | { | ||
132 | struct dentry *tmp; | ||
133 | char name[16]; | ||
134 | int u32s; | ||
135 | |||
136 | sprintf(name, "cma-%d", idx); | ||
137 | |||
138 | tmp = debugfs_create_dir(name, cma_debugfs_root); | ||
139 | |||
140 | debugfs_create_file("alloc", S_IWUSR, cma_debugfs_root, cma, | ||
141 | &cma_alloc_fops); | ||
142 | |||
143 | debugfs_create_file("free", S_IWUSR, cma_debugfs_root, cma, | ||
144 | &cma_free_fops); | ||
145 | |||
146 | debugfs_create_file("base_pfn", S_IRUGO, tmp, | ||
147 | &cma->base_pfn, &cma_debugfs_fops); | ||
148 | debugfs_create_file("count", S_IRUGO, tmp, | ||
149 | &cma->count, &cma_debugfs_fops); | ||
150 | debugfs_create_file("order_per_bit", S_IRUGO, tmp, | ||
151 | &cma->order_per_bit, &cma_debugfs_fops); | ||
152 | |||
153 | u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); | ||
154 | debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s); | ||
155 | } | ||
156 | |||
157 | static int __init cma_debugfs_init(void) | ||
158 | { | ||
159 | int i; | ||
160 | |||
161 | cma_debugfs_root = debugfs_create_dir("cma", NULL); | ||
162 | if (!cma_debugfs_root) | ||
163 | return -ENOMEM; | ||
164 | |||
165 | for (i = 0; i < cma_area_count; i++) | ||
166 | cma_debugfs_add_one(&cma_areas[i], i); | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | late_initcall(cma_debugfs_init); | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 8c0d9459b54a..a18201a8124e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -1174,13 +1174,24 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, | |||
1174 | /* Direct compactor: Is a suitable page free? */ | 1174 | /* Direct compactor: Is a suitable page free? */ |
1175 | for (order = cc->order; order < MAX_ORDER; order++) { | 1175 | for (order = cc->order; order < MAX_ORDER; order++) { |
1176 | struct free_area *area = &zone->free_area[order]; | 1176 | struct free_area *area = &zone->free_area[order]; |
1177 | bool can_steal; | ||
1177 | 1178 | ||
1178 | /* Job done if page is free of the right migratetype */ | 1179 | /* Job done if page is free of the right migratetype */ |
1179 | if (!list_empty(&area->free_list[migratetype])) | 1180 | if (!list_empty(&area->free_list[migratetype])) |
1180 | return COMPACT_PARTIAL; | 1181 | return COMPACT_PARTIAL; |
1181 | 1182 | ||
1182 | /* Job done if allocation would set block type */ | 1183 | #ifdef CONFIG_CMA |
1183 | if (order >= pageblock_order && area->nr_free) | 1184 | /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ |
1185 | if (migratetype == MIGRATE_MOVABLE && | ||
1186 | !list_empty(&area->free_list[MIGRATE_CMA])) | ||
1187 | return COMPACT_PARTIAL; | ||
1188 | #endif | ||
1189 | /* | ||
1190 | * Job done if allocation would steal freepages from | ||
1191 | * other migratetype buddy lists. | ||
1192 | */ | ||
1193 | if (find_suitable_fallback(area, order, migratetype, | ||
1194 | true, &can_steal) != -1) | ||
1184 | return COMPACT_PARTIAL; | 1195 | return COMPACT_PARTIAL; |
1185 | } | 1196 | } |
1186 | 1197 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 876f4e6f3ed6..12548d03c11d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -202,16 +202,15 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
202 | BUG_ON(page_mapped(page)); | 202 | BUG_ON(page_mapped(page)); |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * Some filesystems seem to re-dirty the page even after | 205 | * At this point page must be either written or cleaned by truncate. |
206 | * the VM has canceled the dirty bit (eg ext3 journaling). | 206 | * Dirty page here signals a bug and loss of unwritten data. |
207 | * | 207 | * |
208 | * Fix it up by doing a final dirty accounting check after | 208 | * This fixes dirty accounting after removing the page entirely but |
209 | * having removed the page entirely. | 209 | * leaves PageDirty set: it has no effect for truncated page and |
210 | * anyway will be cleared before returning page into buddy allocator. | ||
210 | */ | 211 | */ |
211 | if (PageDirty(page) && mapping_cap_account_dirty(mapping)) { | 212 | if (WARN_ON_ONCE(PageDirty(page))) |
212 | dec_zone_page_state(page, NR_FILE_DIRTY); | 213 | account_page_cleaned(page, mapping); |
213 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
214 | } | ||
215 | } | 214 | } |
216 | 215 | ||
217 | /** | 216 | /** |
@@ -92,7 +92,7 @@ retry: | |||
92 | */ | 92 | */ |
93 | mark_page_accessed(page); | 93 | mark_page_accessed(page); |
94 | } | 94 | } |
95 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | 95 | if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { |
96 | /* | 96 | /* |
97 | * The preliminary mapping check is mainly to avoid the | 97 | * The preliminary mapping check is mainly to avoid the |
98 | * pointless overhead of lock_page on the ZERO_PAGE | 98 | * pointless overhead of lock_page on the ZERO_PAGE |
@@ -265,8 +265,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | |||
265 | unsigned int fault_flags = 0; | 265 | unsigned int fault_flags = 0; |
266 | int ret; | 266 | int ret; |
267 | 267 | ||
268 | /* For mlock, just skip the stack guard page. */ | 268 | /* For mm_populate(), just skip the stack guard page. */ |
269 | if ((*flags & FOLL_MLOCK) && | 269 | if ((*flags & FOLL_POPULATE) && |
270 | (stack_guard_page_start(vma, address) || | 270 | (stack_guard_page_start(vma, address) || |
271 | stack_guard_page_end(vma, address + PAGE_SIZE))) | 271 | stack_guard_page_end(vma, address + PAGE_SIZE))) |
272 | return -ENOENT; | 272 | return -ENOENT; |
@@ -819,6 +819,124 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
819 | EXPORT_SYMBOL(get_user_pages); | 819 | EXPORT_SYMBOL(get_user_pages); |
820 | 820 | ||
821 | /** | 821 | /** |
822 | * populate_vma_page_range() - populate a range of pages in the vma. | ||
823 | * @vma: target vma | ||
824 | * @start: start address | ||
825 | * @end: end address | ||
826 | * @nonblocking: | ||
827 | * | ||
828 | * This takes care of mlocking the pages too if VM_LOCKED is set. | ||
829 | * | ||
830 | * return 0 on success, negative error code on error. | ||
831 | * | ||
832 | * vma->vm_mm->mmap_sem must be held. | ||
833 | * | ||
834 | * If @nonblocking is NULL, it may be held for read or write and will | ||
835 | * be unperturbed. | ||
836 | * | ||
837 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
838 | * released. If it's released, *@nonblocking will be set to 0. | ||
839 | */ | ||
840 | long populate_vma_page_range(struct vm_area_struct *vma, | ||
841 | unsigned long start, unsigned long end, int *nonblocking) | ||
842 | { | ||
843 | struct mm_struct *mm = vma->vm_mm; | ||
844 | unsigned long nr_pages = (end - start) / PAGE_SIZE; | ||
845 | int gup_flags; | ||
846 | |||
847 | VM_BUG_ON(start & ~PAGE_MASK); | ||
848 | VM_BUG_ON(end & ~PAGE_MASK); | ||
849 | VM_BUG_ON_VMA(start < vma->vm_start, vma); | ||
850 | VM_BUG_ON_VMA(end > vma->vm_end, vma); | ||
851 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
852 | |||
853 | gup_flags = FOLL_TOUCH | FOLL_POPULATE; | ||
854 | /* | ||
855 | * We want to touch writable mappings with a write fault in order | ||
856 | * to break COW, except for shared mappings because these don't COW | ||
857 | * and we would not want to dirty them for nothing. | ||
858 | */ | ||
859 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
860 | gup_flags |= FOLL_WRITE; | ||
861 | |||
862 | /* | ||
863 | * We want mlock to succeed for regions that have any permissions | ||
864 | * other than PROT_NONE. | ||
865 | */ | ||
866 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
867 | gup_flags |= FOLL_FORCE; | ||
868 | |||
869 | /* | ||
870 | * We made sure addr is within a VMA, so the following will | ||
871 | * not result in a stack expansion that recurses back here. | ||
872 | */ | ||
873 | return __get_user_pages(current, mm, start, nr_pages, gup_flags, | ||
874 | NULL, NULL, nonblocking); | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
879 | * | ||
880 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
881 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
882 | * mmap_sem must not be held. | ||
883 | */ | ||
884 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
885 | { | ||
886 | struct mm_struct *mm = current->mm; | ||
887 | unsigned long end, nstart, nend; | ||
888 | struct vm_area_struct *vma = NULL; | ||
889 | int locked = 0; | ||
890 | long ret = 0; | ||
891 | |||
892 | VM_BUG_ON(start & ~PAGE_MASK); | ||
893 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
894 | end = start + len; | ||
895 | |||
896 | for (nstart = start; nstart < end; nstart = nend) { | ||
897 | /* | ||
898 | * We want to fault in pages for [nstart; end) address range. | ||
899 | * Find first corresponding VMA. | ||
900 | */ | ||
901 | if (!locked) { | ||
902 | locked = 1; | ||
903 | down_read(&mm->mmap_sem); | ||
904 | vma = find_vma(mm, nstart); | ||
905 | } else if (nstart >= vma->vm_end) | ||
906 | vma = vma->vm_next; | ||
907 | if (!vma || vma->vm_start >= end) | ||
908 | break; | ||
909 | /* | ||
910 | * Set [nstart; nend) to intersection of desired address | ||
911 | * range with the first VMA. Also, skip undesirable VMA types. | ||
912 | */ | ||
913 | nend = min(end, vma->vm_end); | ||
914 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
915 | continue; | ||
916 | if (nstart < vma->vm_start) | ||
917 | nstart = vma->vm_start; | ||
918 | /* | ||
919 | * Now fault in a range of pages. populate_vma_page_range() | ||
920 | * double checks the vma flags, so that it won't mlock pages | ||
921 | * if the vma was already munlocked. | ||
922 | */ | ||
923 | ret = populate_vma_page_range(vma, nstart, nend, &locked); | ||
924 | if (ret < 0) { | ||
925 | if (ignore_errors) { | ||
926 | ret = 0; | ||
927 | continue; /* continue at next VMA */ | ||
928 | } | ||
929 | break; | ||
930 | } | ||
931 | nend = nstart + ret * PAGE_SIZE; | ||
932 | ret = 0; | ||
933 | } | ||
934 | if (locked) | ||
935 | up_read(&mm->mmap_sem); | ||
936 | return ret; /* 0 or negative error code */ | ||
937 | } | ||
938 | |||
939 | /** | ||
822 | * get_dump_page() - pin user page in memory while writing it to core dump | 940 | * get_dump_page() - pin user page in memory while writing it to core dump |
823 | * @addr: user address | 941 | * @addr: user address |
824 | * | 942 | * |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6817b0350c71..3afb5cbe1312 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1231,7 +1231,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1231 | pmd, _pmd, 1)) | 1231 | pmd, _pmd, 1)) |
1232 | update_mmu_cache_pmd(vma, addr, pmd); | 1232 | update_mmu_cache_pmd(vma, addr, pmd); |
1233 | } | 1233 | } |
1234 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | 1234 | if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { |
1235 | if (page->mapping && trylock_page(page)) { | 1235 | if (page->mapping && trylock_page(page)) { |
1236 | lru_add_drain(); | 1236 | lru_add_drain(); |
1237 | if (page->mapping) | 1237 | if (page->mapping) |
@@ -2109,7 +2109,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte) | |||
2109 | { | 2109 | { |
2110 | while (--_pte >= pte) { | 2110 | while (--_pte >= pte) { |
2111 | pte_t pteval = *_pte; | 2111 | pte_t pteval = *_pte; |
2112 | if (!pte_none(pteval)) | 2112 | if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval))) |
2113 | release_pte_page(pte_page(pteval)); | 2113 | release_pte_page(pte_page(pteval)); |
2114 | } | 2114 | } |
2115 | } | 2115 | } |
@@ -2120,13 +2120,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2120 | { | 2120 | { |
2121 | struct page *page; | 2121 | struct page *page; |
2122 | pte_t *_pte; | 2122 | pte_t *_pte; |
2123 | int none = 0; | 2123 | int none_or_zero = 0; |
2124 | bool referenced = false, writable = false; | 2124 | bool referenced = false, writable = false; |
2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | 2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; |
2126 | _pte++, address += PAGE_SIZE) { | 2126 | _pte++, address += PAGE_SIZE) { |
2127 | pte_t pteval = *_pte; | 2127 | pte_t pteval = *_pte; |
2128 | if (pte_none(pteval)) { | 2128 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2129 | if (++none <= khugepaged_max_ptes_none) | 2129 | if (++none_or_zero <= khugepaged_max_ptes_none) |
2130 | continue; | 2130 | continue; |
2131 | else | 2131 | else |
2132 | goto out; | 2132 | goto out; |
@@ -2207,9 +2207,21 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
2207 | pte_t pteval = *_pte; | 2207 | pte_t pteval = *_pte; |
2208 | struct page *src_page; | 2208 | struct page *src_page; |
2209 | 2209 | ||
2210 | if (pte_none(pteval)) { | 2210 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2211 | clear_user_highpage(page, address); | 2211 | clear_user_highpage(page, address); |
2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); | 2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); |
2213 | if (is_zero_pfn(pte_pfn(pteval))) { | ||
2214 | /* | ||
2215 | * ptl mostly unnecessary. | ||
2216 | */ | ||
2217 | spin_lock(ptl); | ||
2218 | /* | ||
2219 | * paravirt calls inside pte_clear here are | ||
2220 | * superfluous. | ||
2221 | */ | ||
2222 | pte_clear(vma->vm_mm, address, _pte); | ||
2223 | spin_unlock(ptl); | ||
2224 | } | ||
2213 | } else { | 2225 | } else { |
2214 | src_page = pte_page(pteval); | 2226 | src_page = pte_page(pteval); |
2215 | copy_user_highpage(page, src_page, address, vma); | 2227 | copy_user_highpage(page, src_page, address, vma); |
@@ -2316,8 +2328,14 @@ static struct page | |||
2316 | struct vm_area_struct *vma, unsigned long address, | 2328 | struct vm_area_struct *vma, unsigned long address, |
2317 | int node) | 2329 | int node) |
2318 | { | 2330 | { |
2331 | gfp_t flags; | ||
2332 | |||
2319 | VM_BUG_ON_PAGE(*hpage, *hpage); | 2333 | VM_BUG_ON_PAGE(*hpage, *hpage); |
2320 | 2334 | ||
2335 | /* Only allocate from the target node */ | ||
2336 | flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) | | ||
2337 | __GFP_THISNODE; | ||
2338 | |||
2321 | /* | 2339 | /* |
2322 | * Before allocating the hugepage, release the mmap_sem read lock. | 2340 | * Before allocating the hugepage, release the mmap_sem read lock. |
2323 | * The allocation can take potentially a long time if it involves | 2341 | * The allocation can take potentially a long time if it involves |
@@ -2326,8 +2344,7 @@ static struct page | |||
2326 | */ | 2344 | */ |
2327 | up_read(&mm->mmap_sem); | 2345 | up_read(&mm->mmap_sem); |
2328 | 2346 | ||
2329 | *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask( | 2347 | *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); |
2330 | khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER); | ||
2331 | if (unlikely(!*hpage)) { | 2348 | if (unlikely(!*hpage)) { |
2332 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | 2349 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); |
2333 | *hpage = ERR_PTR(-ENOMEM); | 2350 | *hpage = ERR_PTR(-ENOMEM); |
@@ -2543,7 +2560,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2543 | { | 2560 | { |
2544 | pmd_t *pmd; | 2561 | pmd_t *pmd; |
2545 | pte_t *pte, *_pte; | 2562 | pte_t *pte, *_pte; |
2546 | int ret = 0, none = 0; | 2563 | int ret = 0, none_or_zero = 0; |
2547 | struct page *page; | 2564 | struct page *page; |
2548 | unsigned long _address; | 2565 | unsigned long _address; |
2549 | spinlock_t *ptl; | 2566 | spinlock_t *ptl; |
@@ -2561,8 +2578,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2561 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; | 2578 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; |
2562 | _pte++, _address += PAGE_SIZE) { | 2579 | _pte++, _address += PAGE_SIZE) { |
2563 | pte_t pteval = *_pte; | 2580 | pte_t pteval = *_pte; |
2564 | if (pte_none(pteval)) { | 2581 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
2565 | if (++none <= khugepaged_max_ptes_none) | 2582 | if (++none_or_zero <= khugepaged_max_ptes_none) |
2566 | continue; | 2583 | continue; |
2567 | else | 2584 | else |
2568 | goto out_unmap; | 2585 | goto out_unmap; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c41b2a0ee273..8874c8ad55aa 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3278,6 +3278,15 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3278 | struct page *page; | 3278 | struct page *page; |
3279 | 3279 | ||
3280 | /* | 3280 | /* |
3281 | * If we have a pending SIGKILL, don't keep faulting pages and | ||
3282 | * potentially allocating memory. | ||
3283 | */ | ||
3284 | if (unlikely(fatal_signal_pending(current))) { | ||
3285 | remainder = 0; | ||
3286 | break; | ||
3287 | } | ||
3288 | |||
3289 | /* | ||
3281 | * Some archs (sparc64, sh*) have multiple pte_ts to | 3290 | * Some archs (sparc64, sh*) have multiple pte_ts to |
3282 | * each hugepage. We have to make sure we get the | 3291 | * each hugepage. We have to make sure we get the |
3283 | * first, for the page indexing below to work. | 3292 | * first, for the page indexing below to work. |
@@ -3735,8 +3744,7 @@ retry: | |||
3735 | if (!pmd_huge(*pmd)) | 3744 | if (!pmd_huge(*pmd)) |
3736 | goto out; | 3745 | goto out; |
3737 | if (pmd_present(*pmd)) { | 3746 | if (pmd_present(*pmd)) { |
3738 | page = pte_page(*(pte_t *)pmd) + | 3747 | page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); |
3739 | ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
3740 | if (flags & FOLL_GET) | 3748 | if (flags & FOLL_GET) |
3741 | get_page(page); | 3749 | get_page(page); |
3742 | } else { | 3750 | } else { |
diff --git a/mm/internal.h b/mm/internal.h index a96da5b0029d..edaab69a9c35 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -200,6 +200,8 @@ isolate_freepages_range(struct compact_control *cc, | |||
200 | unsigned long | 200 | unsigned long |
201 | isolate_migratepages_range(struct compact_control *cc, | 201 | isolate_migratepages_range(struct compact_control *cc, |
202 | unsigned long low_pfn, unsigned long end_pfn); | 202 | unsigned long low_pfn, unsigned long end_pfn); |
203 | int find_suitable_fallback(struct free_area *area, unsigned int order, | ||
204 | int migratetype, bool only_stealable, bool *can_steal); | ||
203 | 205 | ||
204 | #endif | 206 | #endif |
205 | 207 | ||
@@ -240,7 +242,7 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | |||
240 | struct vm_area_struct *prev, struct rb_node *rb_parent); | 242 | struct vm_area_struct *prev, struct rb_node *rb_parent); |
241 | 243 | ||
242 | #ifdef CONFIG_MMU | 244 | #ifdef CONFIG_MMU |
243 | extern long __mlock_vma_pages_range(struct vm_area_struct *vma, | 245 | extern long populate_vma_page_range(struct vm_area_struct *vma, |
244 | unsigned long start, unsigned long end, int *nonblocking); | 246 | unsigned long start, unsigned long end, int *nonblocking); |
245 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, | 247 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, |
246 | unsigned long start, unsigned long end); | 248 | unsigned long start, unsigned long end); |
diff --git a/mm/memblock.c b/mm/memblock.c index 252b77bdf65e..3f37a0bca5d5 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -699,14 +699,14 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base, | |||
699 | int nid, | 699 | int nid, |
700 | unsigned long flags) | 700 | unsigned long flags) |
701 | { | 701 | { |
702 | struct memblock_type *_rgn = &memblock.reserved; | 702 | struct memblock_type *type = &memblock.reserved; |
703 | 703 | ||
704 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", | 704 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", |
705 | (unsigned long long)base, | 705 | (unsigned long long)base, |
706 | (unsigned long long)base + size - 1, | 706 | (unsigned long long)base + size - 1, |
707 | flags, (void *)_RET_IP_); | 707 | flags, (void *)_RET_IP_); |
708 | 708 | ||
709 | return memblock_add_range(_rgn, base, size, nid, flags); | 709 | return memblock_add_range(type, base, size, nid, flags); |
710 | } | 710 | } |
711 | 711 | ||
712 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | 712 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b34ef4a32a3b..c3f09b2dda5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -14,6 +14,12 @@ | |||
14 | * Copyright (C) 2012 Parallels Inc. and Google Inc. | 14 | * Copyright (C) 2012 Parallels Inc. and Google Inc. |
15 | * Authors: Glauber Costa and Suleiman Souhlal | 15 | * Authors: Glauber Costa and Suleiman Souhlal |
16 | * | 16 | * |
17 | * Native page reclaim | ||
18 | * Charge lifetime sanitation | ||
19 | * Lockless page tracking & accounting | ||
20 | * Unified hierarchy configuration model | ||
21 | * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner | ||
22 | * | ||
17 | * This program is free software; you can redistribute it and/or modify | 23 | * This program is free software; you can redistribute it and/or modify |
18 | * it under the terms of the GNU General Public License as published by | 24 | * it under the terms of the GNU General Public License as published by |
19 | * the Free Software Foundation; either version 2 of the License, or | 25 | * the Free Software Foundation; either version 2 of the License, or |
@@ -1436,15 +1442,17 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1436 | struct mem_cgroup *iter; | 1442 | struct mem_cgroup *iter; |
1437 | unsigned int i; | 1443 | unsigned int i; |
1438 | 1444 | ||
1439 | if (!p) | ||
1440 | return; | ||
1441 | |||
1442 | mutex_lock(&oom_info_lock); | 1445 | mutex_lock(&oom_info_lock); |
1443 | rcu_read_lock(); | 1446 | rcu_read_lock(); |
1444 | 1447 | ||
1445 | pr_info("Task in "); | 1448 | if (p) { |
1446 | pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); | 1449 | pr_info("Task in "); |
1447 | pr_cont(" killed as a result of limit of "); | 1450 | pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); |
1451 | pr_cont(" killed as a result of limit of "); | ||
1452 | } else { | ||
1453 | pr_info("Memory limit reached of cgroup "); | ||
1454 | } | ||
1455 | |||
1448 | pr_cont_cgroup_path(memcg->css.cgroup); | 1456 | pr_cont_cgroup_path(memcg->css.cgroup); |
1449 | pr_cont("\n"); | 1457 | pr_cont("\n"); |
1450 | 1458 | ||
@@ -1531,7 +1539,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1531 | return; | 1539 | return; |
1532 | } | 1540 | } |
1533 | 1541 | ||
1534 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); | 1542 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); |
1535 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; | 1543 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; |
1536 | for_each_mem_cgroup_tree(iter, memcg) { | 1544 | for_each_mem_cgroup_tree(iter, memcg) { |
1537 | struct css_task_iter it; | 1545 | struct css_task_iter it; |
@@ -2779,92 +2787,6 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
2779 | } | 2787 | } |
2780 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 2788 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
2781 | 2789 | ||
2782 | /** | ||
2783 | * mem_cgroup_move_account - move account of the page | ||
2784 | * @page: the page | ||
2785 | * @nr_pages: number of regular pages (>1 for huge pages) | ||
2786 | * @from: mem_cgroup which the page is moved from. | ||
2787 | * @to: mem_cgroup which the page is moved to. @from != @to. | ||
2788 | * | ||
2789 | * The caller must confirm following. | ||
2790 | * - page is not on LRU (isolate_page() is useful.) | ||
2791 | * - compound_lock is held when nr_pages > 1 | ||
2792 | * | ||
2793 | * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" | ||
2794 | * from old cgroup. | ||
2795 | */ | ||
2796 | static int mem_cgroup_move_account(struct page *page, | ||
2797 | unsigned int nr_pages, | ||
2798 | struct mem_cgroup *from, | ||
2799 | struct mem_cgroup *to) | ||
2800 | { | ||
2801 | unsigned long flags; | ||
2802 | int ret; | ||
2803 | |||
2804 | VM_BUG_ON(from == to); | ||
2805 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
2806 | /* | ||
2807 | * The page is isolated from LRU. So, collapse function | ||
2808 | * will not handle this page. But page splitting can happen. | ||
2809 | * Do this check under compound_page_lock(). The caller should | ||
2810 | * hold it. | ||
2811 | */ | ||
2812 | ret = -EBUSY; | ||
2813 | if (nr_pages > 1 && !PageTransHuge(page)) | ||
2814 | goto out; | ||
2815 | |||
2816 | /* | ||
2817 | * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup | ||
2818 | * of its source page while we change it: page migration takes | ||
2819 | * both pages off the LRU, but page cache replacement doesn't. | ||
2820 | */ | ||
2821 | if (!trylock_page(page)) | ||
2822 | goto out; | ||
2823 | |||
2824 | ret = -EINVAL; | ||
2825 | if (page->mem_cgroup != from) | ||
2826 | goto out_unlock; | ||
2827 | |||
2828 | spin_lock_irqsave(&from->move_lock, flags); | ||
2829 | |||
2830 | if (!PageAnon(page) && page_mapped(page)) { | ||
2831 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
2832 | nr_pages); | ||
2833 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
2834 | nr_pages); | ||
2835 | } | ||
2836 | |||
2837 | if (PageWriteback(page)) { | ||
2838 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
2839 | nr_pages); | ||
2840 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
2841 | nr_pages); | ||
2842 | } | ||
2843 | |||
2844 | /* | ||
2845 | * It is safe to change page->mem_cgroup here because the page | ||
2846 | * is referenced, charged, and isolated - we can't race with | ||
2847 | * uncharging, charging, migration, or LRU putback. | ||
2848 | */ | ||
2849 | |||
2850 | /* caller should have done css_get */ | ||
2851 | page->mem_cgroup = to; | ||
2852 | spin_unlock_irqrestore(&from->move_lock, flags); | ||
2853 | |||
2854 | ret = 0; | ||
2855 | |||
2856 | local_irq_disable(); | ||
2857 | mem_cgroup_charge_statistics(to, page, nr_pages); | ||
2858 | memcg_check_events(to, page); | ||
2859 | mem_cgroup_charge_statistics(from, page, -nr_pages); | ||
2860 | memcg_check_events(from, page); | ||
2861 | local_irq_enable(); | ||
2862 | out_unlock: | ||
2863 | unlock_page(page); | ||
2864 | out: | ||
2865 | return ret; | ||
2866 | } | ||
2867 | |||
2868 | #ifdef CONFIG_MEMCG_SWAP | 2790 | #ifdef CONFIG_MEMCG_SWAP |
2869 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, | 2791 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, |
2870 | bool charge) | 2792 | bool charge) |
@@ -4816,6 +4738,92 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
4816 | return page; | 4738 | return page; |
4817 | } | 4739 | } |
4818 | 4740 | ||
4741 | /** | ||
4742 | * mem_cgroup_move_account - move account of the page | ||
4743 | * @page: the page | ||
4744 | * @nr_pages: number of regular pages (>1 for huge pages) | ||
4745 | * @from: mem_cgroup which the page is moved from. | ||
4746 | * @to: mem_cgroup which the page is moved to. @from != @to. | ||
4747 | * | ||
4748 | * The caller must confirm following. | ||
4749 | * - page is not on LRU (isolate_page() is useful.) | ||
4750 | * - compound_lock is held when nr_pages > 1 | ||
4751 | * | ||
4752 | * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" | ||
4753 | * from old cgroup. | ||
4754 | */ | ||
4755 | static int mem_cgroup_move_account(struct page *page, | ||
4756 | unsigned int nr_pages, | ||
4757 | struct mem_cgroup *from, | ||
4758 | struct mem_cgroup *to) | ||
4759 | { | ||
4760 | unsigned long flags; | ||
4761 | int ret; | ||
4762 | |||
4763 | VM_BUG_ON(from == to); | ||
4764 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
4765 | /* | ||
4766 | * The page is isolated from LRU. So, collapse function | ||
4767 | * will not handle this page. But page splitting can happen. | ||
4768 | * Do this check under compound_page_lock(). The caller should | ||
4769 | * hold it. | ||
4770 | */ | ||
4771 | ret = -EBUSY; | ||
4772 | if (nr_pages > 1 && !PageTransHuge(page)) | ||
4773 | goto out; | ||
4774 | |||
4775 | /* | ||
4776 | * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup | ||
4777 | * of its source page while we change it: page migration takes | ||
4778 | * both pages off the LRU, but page cache replacement doesn't. | ||
4779 | */ | ||
4780 | if (!trylock_page(page)) | ||
4781 | goto out; | ||
4782 | |||
4783 | ret = -EINVAL; | ||
4784 | if (page->mem_cgroup != from) | ||
4785 | goto out_unlock; | ||
4786 | |||
4787 | spin_lock_irqsave(&from->move_lock, flags); | ||
4788 | |||
4789 | if (!PageAnon(page) && page_mapped(page)) { | ||
4790 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
4791 | nr_pages); | ||
4792 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
4793 | nr_pages); | ||
4794 | } | ||
4795 | |||
4796 | if (PageWriteback(page)) { | ||
4797 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
4798 | nr_pages); | ||
4799 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
4800 | nr_pages); | ||
4801 | } | ||
4802 | |||
4803 | /* | ||
4804 | * It is safe to change page->mem_cgroup here because the page | ||
4805 | * is referenced, charged, and isolated - we can't race with | ||
4806 | * uncharging, charging, migration, or LRU putback. | ||
4807 | */ | ||
4808 | |||
4809 | /* caller should have done css_get */ | ||
4810 | page->mem_cgroup = to; | ||
4811 | spin_unlock_irqrestore(&from->move_lock, flags); | ||
4812 | |||
4813 | ret = 0; | ||
4814 | |||
4815 | local_irq_disable(); | ||
4816 | mem_cgroup_charge_statistics(to, page, nr_pages); | ||
4817 | memcg_check_events(to, page); | ||
4818 | mem_cgroup_charge_statistics(from, page, -nr_pages); | ||
4819 | memcg_check_events(from, page); | ||
4820 | local_irq_enable(); | ||
4821 | out_unlock: | ||
4822 | unlock_page(page); | ||
4823 | out: | ||
4824 | return ret; | ||
4825 | } | ||
4826 | |||
4819 | static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, | 4827 | static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, |
4820 | unsigned long addr, pte_t ptent, union mc_target *target) | 4828 | unsigned long addr, pte_t ptent, union mc_target *target) |
4821 | { | 4829 | { |
diff --git a/mm/memory.c b/mm/memory.c index 97839f5c8c30..ac20b2a6a0c3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1983,167 +1983,91 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
1983 | } | 1983 | } |
1984 | 1984 | ||
1985 | /* | 1985 | /* |
1986 | * This routine handles present pages, when users try to write | 1986 | * Handle write page faults for pages that can be reused in the current vma |
1987 | * to a shared page. It is done by copying the page to a new address | ||
1988 | * and decrementing the shared-page counter for the old page. | ||
1989 | * | ||
1990 | * Note that this routine assumes that the protection checks have been | ||
1991 | * done by the caller (the low-level page fault routine in most cases). | ||
1992 | * Thus we can safely just mark it writable once we've done any necessary | ||
1993 | * COW. | ||
1994 | * | 1987 | * |
1995 | * We also mark the page dirty at this point even though the page will | 1988 | * This can happen either due to the mapping being with the VM_SHARED flag, |
1996 | * change only once the write actually happens. This avoids a few races, | 1989 | * or due to us being the last reference standing to the page. In either |
1997 | * and potentially makes it more efficient. | 1990 | * case, all we need to do here is to mark the page as writable and update |
1998 | * | 1991 | * any related book-keeping. |
1999 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
2000 | * but allow concurrent faults), with pte both mapped and locked. | ||
2001 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
2002 | */ | 1992 | */ |
2003 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1993 | static inline int wp_page_reuse(struct mm_struct *mm, |
2004 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1994 | struct vm_area_struct *vma, unsigned long address, |
2005 | spinlock_t *ptl, pte_t orig_pte) | 1995 | pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, |
1996 | struct page *page, int page_mkwrite, | ||
1997 | int dirty_shared) | ||
2006 | __releases(ptl) | 1998 | __releases(ptl) |
2007 | { | 1999 | { |
2008 | struct page *old_page, *new_page = NULL; | ||
2009 | pte_t entry; | 2000 | pte_t entry; |
2010 | int ret = 0; | ||
2011 | int page_mkwrite = 0; | ||
2012 | bool dirty_shared = false; | ||
2013 | unsigned long mmun_start = 0; /* For mmu_notifiers */ | ||
2014 | unsigned long mmun_end = 0; /* For mmu_notifiers */ | ||
2015 | struct mem_cgroup *memcg; | ||
2016 | |||
2017 | old_page = vm_normal_page(vma, address, orig_pte); | ||
2018 | if (!old_page) { | ||
2019 | /* | ||
2020 | * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a | ||
2021 | * VM_PFNMAP VMA. | ||
2022 | * | ||
2023 | * We should not cow pages in a shared writeable mapping. | ||
2024 | * Just mark the pages writable as we can't do any dirty | ||
2025 | * accounting on raw pfn maps. | ||
2026 | */ | ||
2027 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
2028 | (VM_WRITE|VM_SHARED)) | ||
2029 | goto reuse; | ||
2030 | goto gotten; | ||
2031 | } | ||
2032 | |||
2033 | /* | 2001 | /* |
2034 | * Take out anonymous pages first, anonymous shared vmas are | 2002 | * Clear the pages cpupid information as the existing |
2035 | * not dirty accountable. | 2003 | * information potentially belongs to a now completely |
2004 | * unrelated process. | ||
2036 | */ | 2005 | */ |
2037 | if (PageAnon(old_page) && !PageKsm(old_page)) { | 2006 | if (page) |
2038 | if (!trylock_page(old_page)) { | 2007 | page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); |
2039 | page_cache_get(old_page); | ||
2040 | pte_unmap_unlock(page_table, ptl); | ||
2041 | lock_page(old_page); | ||
2042 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
2043 | &ptl); | ||
2044 | if (!pte_same(*page_table, orig_pte)) { | ||
2045 | unlock_page(old_page); | ||
2046 | goto unlock; | ||
2047 | } | ||
2048 | page_cache_release(old_page); | ||
2049 | } | ||
2050 | if (reuse_swap_page(old_page)) { | ||
2051 | /* | ||
2052 | * The page is all ours. Move it to our anon_vma so | ||
2053 | * the rmap code will not search our parent or siblings. | ||
2054 | * Protected against the rmap code by the page lock. | ||
2055 | */ | ||
2056 | page_move_anon_rmap(old_page, vma, address); | ||
2057 | unlock_page(old_page); | ||
2058 | goto reuse; | ||
2059 | } | ||
2060 | unlock_page(old_page); | ||
2061 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
2062 | (VM_WRITE|VM_SHARED))) { | ||
2063 | page_cache_get(old_page); | ||
2064 | /* | ||
2065 | * Only catch write-faults on shared writable pages, | ||
2066 | * read-only shared pages can get COWed by | ||
2067 | * get_user_pages(.write=1, .force=1). | ||
2068 | */ | ||
2069 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | ||
2070 | int tmp; | ||
2071 | |||
2072 | pte_unmap_unlock(page_table, ptl); | ||
2073 | tmp = do_page_mkwrite(vma, old_page, address); | ||
2074 | if (unlikely(!tmp || (tmp & | ||
2075 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { | ||
2076 | page_cache_release(old_page); | ||
2077 | return tmp; | ||
2078 | } | ||
2079 | /* | ||
2080 | * Since we dropped the lock we need to revalidate | ||
2081 | * the PTE as someone else may have changed it. If | ||
2082 | * they did, we just return, as we can count on the | ||
2083 | * MMU to tell us if they didn't also make it writable. | ||
2084 | */ | ||
2085 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
2086 | &ptl); | ||
2087 | if (!pte_same(*page_table, orig_pte)) { | ||
2088 | unlock_page(old_page); | ||
2089 | goto unlock; | ||
2090 | } | ||
2091 | page_mkwrite = 1; | ||
2092 | } | ||
2093 | |||
2094 | dirty_shared = true; | ||
2095 | |||
2096 | reuse: | ||
2097 | /* | ||
2098 | * Clear the pages cpupid information as the existing | ||
2099 | * information potentially belongs to a now completely | ||
2100 | * unrelated process. | ||
2101 | */ | ||
2102 | if (old_page) | ||
2103 | page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1); | ||
2104 | |||
2105 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | ||
2106 | entry = pte_mkyoung(orig_pte); | ||
2107 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
2108 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) | ||
2109 | update_mmu_cache(vma, address, page_table); | ||
2110 | pte_unmap_unlock(page_table, ptl); | ||
2111 | ret |= VM_FAULT_WRITE; | ||
2112 | 2008 | ||
2113 | if (dirty_shared) { | 2009 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
2114 | struct address_space *mapping; | 2010 | entry = pte_mkyoung(orig_pte); |
2115 | int dirtied; | 2011 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2012 | if (ptep_set_access_flags(vma, address, page_table, entry, 1)) | ||
2013 | update_mmu_cache(vma, address, page_table); | ||
2014 | pte_unmap_unlock(page_table, ptl); | ||
2116 | 2015 | ||
2117 | if (!page_mkwrite) | 2016 | if (dirty_shared) { |
2118 | lock_page(old_page); | 2017 | struct address_space *mapping; |
2018 | int dirtied; | ||
2119 | 2019 | ||
2120 | dirtied = set_page_dirty(old_page); | 2020 | if (!page_mkwrite) |
2121 | VM_BUG_ON_PAGE(PageAnon(old_page), old_page); | 2021 | lock_page(page); |
2122 | mapping = old_page->mapping; | ||
2123 | unlock_page(old_page); | ||
2124 | page_cache_release(old_page); | ||
2125 | 2022 | ||
2126 | if ((dirtied || page_mkwrite) && mapping) { | 2023 | dirtied = set_page_dirty(page); |
2127 | /* | 2024 | VM_BUG_ON_PAGE(PageAnon(page), page); |
2128 | * Some device drivers do not set page.mapping | 2025 | mapping = page->mapping; |
2129 | * but still dirty their pages | 2026 | unlock_page(page); |
2130 | */ | 2027 | page_cache_release(page); |
2131 | balance_dirty_pages_ratelimited(mapping); | ||
2132 | } | ||
2133 | 2028 | ||
2134 | if (!page_mkwrite) | 2029 | if ((dirtied || page_mkwrite) && mapping) { |
2135 | file_update_time(vma->vm_file); | 2030 | /* |
2031 | * Some device drivers do not set page.mapping | ||
2032 | * but still dirty their pages | ||
2033 | */ | ||
2034 | balance_dirty_pages_ratelimited(mapping); | ||
2136 | } | 2035 | } |
2137 | 2036 | ||
2138 | return ret; | 2037 | if (!page_mkwrite) |
2038 | file_update_time(vma->vm_file); | ||
2139 | } | 2039 | } |
2140 | 2040 | ||
2141 | /* | 2041 | return VM_FAULT_WRITE; |
2142 | * Ok, we need to copy. Oh, well.. | 2042 | } |
2143 | */ | 2043 | |
2144 | page_cache_get(old_page); | 2044 | /* |
2145 | gotten: | 2045 | * Handle the case of a page which we actually need to copy to a new page. |
2146 | pte_unmap_unlock(page_table, ptl); | 2046 | * |
2047 | * Called with mmap_sem locked and the old page referenced, but | ||
2048 | * without the ptl held. | ||
2049 | * | ||
2050 | * High level logic flow: | ||
2051 | * | ||
2052 | * - Allocate a page, copy the content of the old page to the new one. | ||
2053 | * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. | ||
2054 | * - Take the PTL. If the pte changed, bail out and release the allocated page | ||
2055 | * - If the pte is still the way we remember it, update the page table and all | ||
2056 | * relevant references. This includes dropping the reference the page-table | ||
2057 | * held to the old page, as well as updating the rmap. | ||
2058 | * - In any case, unlock the PTL and drop the reference we took to the old page. | ||
2059 | */ | ||
2060 | static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2061 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2062 | pte_t orig_pte, struct page *old_page) | ||
2063 | { | ||
2064 | struct page *new_page = NULL; | ||
2065 | spinlock_t *ptl = NULL; | ||
2066 | pte_t entry; | ||
2067 | int page_copied = 0; | ||
2068 | const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ | ||
2069 | const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ | ||
2070 | struct mem_cgroup *memcg; | ||
2147 | 2071 | ||
2148 | if (unlikely(anon_vma_prepare(vma))) | 2072 | if (unlikely(anon_vma_prepare(vma))) |
2149 | goto oom; | 2073 | goto oom; |
@@ -2163,8 +2087,6 @@ gotten: | |||
2163 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) | 2087 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) |
2164 | goto oom_free_new; | 2088 | goto oom_free_new; |
2165 | 2089 | ||
2166 | mmun_start = address & PAGE_MASK; | ||
2167 | mmun_end = mmun_start + PAGE_SIZE; | ||
2168 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2090 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2169 | 2091 | ||
2170 | /* | 2092 | /* |
@@ -2177,8 +2099,9 @@ gotten: | |||
2177 | dec_mm_counter_fast(mm, MM_FILEPAGES); | 2099 | dec_mm_counter_fast(mm, MM_FILEPAGES); |
2178 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2100 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2179 | } | 2101 | } |
2180 | } else | 2102 | } else { |
2181 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2103 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2104 | } | ||
2182 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 2105 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
2183 | entry = mk_pte(new_page, vma->vm_page_prot); | 2106 | entry = mk_pte(new_page, vma->vm_page_prot); |
2184 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2107 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -2227,29 +2150,29 @@ gotten: | |||
2227 | 2150 | ||
2228 | /* Free the old page.. */ | 2151 | /* Free the old page.. */ |
2229 | new_page = old_page; | 2152 | new_page = old_page; |
2230 | ret |= VM_FAULT_WRITE; | 2153 | page_copied = 1; |
2231 | } else | 2154 | } else { |
2232 | mem_cgroup_cancel_charge(new_page, memcg); | 2155 | mem_cgroup_cancel_charge(new_page, memcg); |
2156 | } | ||
2233 | 2157 | ||
2234 | if (new_page) | 2158 | if (new_page) |
2235 | page_cache_release(new_page); | 2159 | page_cache_release(new_page); |
2236 | unlock: | 2160 | |
2237 | pte_unmap_unlock(page_table, ptl); | 2161 | pte_unmap_unlock(page_table, ptl); |
2238 | if (mmun_end > mmun_start) | 2162 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2239 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
2240 | if (old_page) { | 2163 | if (old_page) { |
2241 | /* | 2164 | /* |
2242 | * Don't let another task, with possibly unlocked vma, | 2165 | * Don't let another task, with possibly unlocked vma, |
2243 | * keep the mlocked page. | 2166 | * keep the mlocked page. |
2244 | */ | 2167 | */ |
2245 | if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { | 2168 | if (page_copied && (vma->vm_flags & VM_LOCKED)) { |
2246 | lock_page(old_page); /* LRU manipulation */ | 2169 | lock_page(old_page); /* LRU manipulation */ |
2247 | munlock_vma_page(old_page); | 2170 | munlock_vma_page(old_page); |
2248 | unlock_page(old_page); | 2171 | unlock_page(old_page); |
2249 | } | 2172 | } |
2250 | page_cache_release(old_page); | 2173 | page_cache_release(old_page); |
2251 | } | 2174 | } |
2252 | return ret; | 2175 | return page_copied ? VM_FAULT_WRITE : 0; |
2253 | oom_free_new: | 2176 | oom_free_new: |
2254 | page_cache_release(new_page); | 2177 | page_cache_release(new_page); |
2255 | oom: | 2178 | oom: |
@@ -2258,6 +2181,144 @@ oom: | |||
2258 | return VM_FAULT_OOM; | 2181 | return VM_FAULT_OOM; |
2259 | } | 2182 | } |
2260 | 2183 | ||
2184 | static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2185 | unsigned long address, pte_t *page_table, | ||
2186 | pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, | ||
2187 | struct page *old_page) | ||
2188 | __releases(ptl) | ||
2189 | { | ||
2190 | int page_mkwrite = 0; | ||
2191 | |||
2192 | page_cache_get(old_page); | ||
2193 | |||
2194 | /* | ||
2195 | * Only catch write-faults on shared writable pages, | ||
2196 | * read-only shared pages can get COWed by | ||
2197 | * get_user_pages(.write=1, .force=1). | ||
2198 | */ | ||
2199 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | ||
2200 | int tmp; | ||
2201 | |||
2202 | pte_unmap_unlock(page_table, ptl); | ||
2203 | tmp = do_page_mkwrite(vma, old_page, address); | ||
2204 | if (unlikely(!tmp || (tmp & | ||
2205 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { | ||
2206 | page_cache_release(old_page); | ||
2207 | return tmp; | ||
2208 | } | ||
2209 | /* | ||
2210 | * Since we dropped the lock we need to revalidate | ||
2211 | * the PTE as someone else may have changed it. If | ||
2212 | * they did, we just return, as we can count on the | ||
2213 | * MMU to tell us if they didn't also make it writable. | ||
2214 | */ | ||
2215 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
2216 | &ptl); | ||
2217 | if (!pte_same(*page_table, orig_pte)) { | ||
2218 | unlock_page(old_page); | ||
2219 | pte_unmap_unlock(page_table, ptl); | ||
2220 | page_cache_release(old_page); | ||
2221 | return 0; | ||
2222 | } | ||
2223 | page_mkwrite = 1; | ||
2224 | } | ||
2225 | |||
2226 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
2227 | orig_pte, old_page, page_mkwrite, 1); | ||
2228 | } | ||
2229 | |||
2230 | /* | ||
2231 | * This routine handles present pages, when users try to write | ||
2232 | * to a shared page. It is done by copying the page to a new address | ||
2233 | * and decrementing the shared-page counter for the old page. | ||
2234 | * | ||
2235 | * Note that this routine assumes that the protection checks have been | ||
2236 | * done by the caller (the low-level page fault routine in most cases). | ||
2237 | * Thus we can safely just mark it writable once we've done any necessary | ||
2238 | * COW. | ||
2239 | * | ||
2240 | * We also mark the page dirty at this point even though the page will | ||
2241 | * change only once the write actually happens. This avoids a few races, | ||
2242 | * and potentially makes it more efficient. | ||
2243 | * | ||
2244 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
2245 | * but allow concurrent faults), with pte both mapped and locked. | ||
2246 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
2247 | */ | ||
2248 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2249 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2250 | spinlock_t *ptl, pte_t orig_pte) | ||
2251 | __releases(ptl) | ||
2252 | { | ||
2253 | struct page *old_page; | ||
2254 | |||
2255 | old_page = vm_normal_page(vma, address, orig_pte); | ||
2256 | if (!old_page) { | ||
2257 | /* | ||
2258 | * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a | ||
2259 | * VM_PFNMAP VMA. | ||
2260 | * | ||
2261 | * We should not cow pages in a shared writeable mapping. | ||
2262 | * Just mark the pages writable as we can't do any dirty | ||
2263 | * accounting on raw pfn maps. | ||
2264 | */ | ||
2265 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
2266 | (VM_WRITE|VM_SHARED)) | ||
2267 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
2268 | orig_pte, old_page, 0, 0); | ||
2269 | |||
2270 | pte_unmap_unlock(page_table, ptl); | ||
2271 | return wp_page_copy(mm, vma, address, page_table, pmd, | ||
2272 | orig_pte, old_page); | ||
2273 | } | ||
2274 | |||
2275 | /* | ||
2276 | * Take out anonymous pages first, anonymous shared vmas are | ||
2277 | * not dirty accountable. | ||
2278 | */ | ||
2279 | if (PageAnon(old_page) && !PageKsm(old_page)) { | ||
2280 | if (!trylock_page(old_page)) { | ||
2281 | page_cache_get(old_page); | ||
2282 | pte_unmap_unlock(page_table, ptl); | ||
2283 | lock_page(old_page); | ||
2284 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
2285 | &ptl); | ||
2286 | if (!pte_same(*page_table, orig_pte)) { | ||
2287 | unlock_page(old_page); | ||
2288 | pte_unmap_unlock(page_table, ptl); | ||
2289 | page_cache_release(old_page); | ||
2290 | return 0; | ||
2291 | } | ||
2292 | page_cache_release(old_page); | ||
2293 | } | ||
2294 | if (reuse_swap_page(old_page)) { | ||
2295 | /* | ||
2296 | * The page is all ours. Move it to our anon_vma so | ||
2297 | * the rmap code will not search our parent or siblings. | ||
2298 | * Protected against the rmap code by the page lock. | ||
2299 | */ | ||
2300 | page_move_anon_rmap(old_page, vma, address); | ||
2301 | unlock_page(old_page); | ||
2302 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
2303 | orig_pte, old_page, 0, 0); | ||
2304 | } | ||
2305 | unlock_page(old_page); | ||
2306 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
2307 | (VM_WRITE|VM_SHARED))) { | ||
2308 | return wp_page_shared(mm, vma, address, page_table, pmd, | ||
2309 | ptl, orig_pte, old_page); | ||
2310 | } | ||
2311 | |||
2312 | /* | ||
2313 | * Ok, we need to copy. Oh, well.. | ||
2314 | */ | ||
2315 | page_cache_get(old_page); | ||
2316 | |||
2317 | pte_unmap_unlock(page_table, ptl); | ||
2318 | return wp_page_copy(mm, vma, address, page_table, pmd, | ||
2319 | orig_pte, old_page); | ||
2320 | } | ||
2321 | |||
2261 | static void unmap_mapping_range_vma(struct vm_area_struct *vma, | 2322 | static void unmap_mapping_range_vma(struct vm_area_struct *vma, |
2262 | unsigned long start_addr, unsigned long end_addr, | 2323 | unsigned long start_addr, unsigned long end_addr, |
2263 | struct zap_details *details) | 2324 | struct zap_details *details) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 65842d688b7c..e2e8014fb755 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -104,7 +104,7 @@ void put_online_mems(void) | |||
104 | 104 | ||
105 | } | 105 | } |
106 | 106 | ||
107 | static void mem_hotplug_begin(void) | 107 | void mem_hotplug_begin(void) |
108 | { | 108 | { |
109 | mem_hotplug.active_writer = current; | 109 | mem_hotplug.active_writer = current; |
110 | 110 | ||
@@ -119,7 +119,7 @@ static void mem_hotplug_begin(void) | |||
119 | } | 119 | } |
120 | } | 120 | } |
121 | 121 | ||
122 | static void mem_hotplug_done(void) | 122 | void mem_hotplug_done(void) |
123 | { | 123 | { |
124 | mem_hotplug.active_writer = NULL; | 124 | mem_hotplug.active_writer = NULL; |
125 | mutex_unlock(&mem_hotplug.lock); | 125 | mutex_unlock(&mem_hotplug.lock); |
@@ -502,7 +502,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
502 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 502 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
503 | 503 | ||
504 | for (i = start_sec; i <= end_sec; i++) { | 504 | for (i = start_sec; i <= end_sec; i++) { |
505 | err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); | 505 | err = __add_section(nid, zone, section_nr_to_pfn(i)); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * EEXIST is finally dealt with by ioresource collision | 508 | * EEXIST is finally dealt with by ioresource collision |
@@ -959,6 +959,7 @@ static void node_states_set_node(int node, struct memory_notify *arg) | |||
959 | } | 959 | } |
960 | 960 | ||
961 | 961 | ||
962 | /* Must be protected by mem_hotplug_begin() */ | ||
962 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) | 963 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) |
963 | { | 964 | { |
964 | unsigned long flags; | 965 | unsigned long flags; |
@@ -969,7 +970,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
969 | int ret; | 970 | int ret; |
970 | struct memory_notify arg; | 971 | struct memory_notify arg; |
971 | 972 | ||
972 | mem_hotplug_begin(); | ||
973 | /* | 973 | /* |
974 | * This doesn't need a lock to do pfn_to_page(). | 974 | * This doesn't need a lock to do pfn_to_page(). |
975 | * The section can't be removed here because of the | 975 | * The section can't be removed here because of the |
@@ -977,21 +977,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
977 | */ | 977 | */ |
978 | zone = page_zone(pfn_to_page(pfn)); | 978 | zone = page_zone(pfn_to_page(pfn)); |
979 | 979 | ||
980 | ret = -EINVAL; | ||
981 | if ((zone_idx(zone) > ZONE_NORMAL || | 980 | if ((zone_idx(zone) > ZONE_NORMAL || |
982 | online_type == MMOP_ONLINE_MOVABLE) && | 981 | online_type == MMOP_ONLINE_MOVABLE) && |
983 | !can_online_high_movable(zone)) | 982 | !can_online_high_movable(zone)) |
984 | goto out; | 983 | return -EINVAL; |
985 | 984 | ||
986 | if (online_type == MMOP_ONLINE_KERNEL && | 985 | if (online_type == MMOP_ONLINE_KERNEL && |
987 | zone_idx(zone) == ZONE_MOVABLE) { | 986 | zone_idx(zone) == ZONE_MOVABLE) { |
988 | if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) | 987 | if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) |
989 | goto out; | 988 | return -EINVAL; |
990 | } | 989 | } |
991 | if (online_type == MMOP_ONLINE_MOVABLE && | 990 | if (online_type == MMOP_ONLINE_MOVABLE && |
992 | zone_idx(zone) == ZONE_MOVABLE - 1) { | 991 | zone_idx(zone) == ZONE_MOVABLE - 1) { |
993 | if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) | 992 | if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) |
994 | goto out; | 993 | return -EINVAL; |
995 | } | 994 | } |
996 | 995 | ||
997 | /* Previous code may changed the zone of the pfn range */ | 996 | /* Previous code may changed the zone of the pfn range */ |
@@ -1007,7 +1006,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1007 | ret = notifier_to_errno(ret); | 1006 | ret = notifier_to_errno(ret); |
1008 | if (ret) { | 1007 | if (ret) { |
1009 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 1008 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
1010 | goto out; | 1009 | return ret; |
1011 | } | 1010 | } |
1012 | /* | 1011 | /* |
1013 | * If this zone is not populated, then it is not in zonelist. | 1012 | * If this zone is not populated, then it is not in zonelist. |
@@ -1031,7 +1030,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1031 | (((unsigned long long) pfn + nr_pages) | 1030 | (((unsigned long long) pfn + nr_pages) |
1032 | << PAGE_SHIFT) - 1); | 1031 | << PAGE_SHIFT) - 1); |
1033 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 1032 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
1034 | goto out; | 1033 | return ret; |
1035 | } | 1034 | } |
1036 | 1035 | ||
1037 | zone->present_pages += onlined_pages; | 1036 | zone->present_pages += onlined_pages; |
@@ -1061,9 +1060,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1061 | 1060 | ||
1062 | if (onlined_pages) | 1061 | if (onlined_pages) |
1063 | memory_notify(MEM_ONLINE, &arg); | 1062 | memory_notify(MEM_ONLINE, &arg); |
1064 | out: | 1063 | return 0; |
1065 | mem_hotplug_done(); | ||
1066 | return ret; | ||
1067 | } | 1064 | } |
1068 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 1065 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
1069 | 1066 | ||
@@ -1688,21 +1685,18 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1688 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 1685 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
1689 | return -EINVAL; | 1686 | return -EINVAL; |
1690 | 1687 | ||
1691 | mem_hotplug_begin(); | ||
1692 | |||
1693 | zone = page_zone(pfn_to_page(start_pfn)); | 1688 | zone = page_zone(pfn_to_page(start_pfn)); |
1694 | node = zone_to_nid(zone); | 1689 | node = zone_to_nid(zone); |
1695 | nr_pages = end_pfn - start_pfn; | 1690 | nr_pages = end_pfn - start_pfn; |
1696 | 1691 | ||
1697 | ret = -EINVAL; | ||
1698 | if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages)) | 1692 | if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages)) |
1699 | goto out; | 1693 | return -EINVAL; |
1700 | 1694 | ||
1701 | /* set above range as isolated */ | 1695 | /* set above range as isolated */ |
1702 | ret = start_isolate_page_range(start_pfn, end_pfn, | 1696 | ret = start_isolate_page_range(start_pfn, end_pfn, |
1703 | MIGRATE_MOVABLE, true); | 1697 | MIGRATE_MOVABLE, true); |
1704 | if (ret) | 1698 | if (ret) |
1705 | goto out; | 1699 | return ret; |
1706 | 1700 | ||
1707 | arg.start_pfn = start_pfn; | 1701 | arg.start_pfn = start_pfn; |
1708 | arg.nr_pages = nr_pages; | 1702 | arg.nr_pages = nr_pages; |
@@ -1795,7 +1789,6 @@ repeat: | |||
1795 | writeback_set_ratelimit(); | 1789 | writeback_set_ratelimit(); |
1796 | 1790 | ||
1797 | memory_notify(MEM_OFFLINE, &arg); | 1791 | memory_notify(MEM_OFFLINE, &arg); |
1798 | mem_hotplug_done(); | ||
1799 | return 0; | 1792 | return 0; |
1800 | 1793 | ||
1801 | failed_removal: | 1794 | failed_removal: |
@@ -1805,12 +1798,10 @@ failed_removal: | |||
1805 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 1798 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
1806 | /* pushback to free area */ | 1799 | /* pushback to free area */ |
1807 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | 1800 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
1808 | |||
1809 | out: | ||
1810 | mem_hotplug_done(); | ||
1811 | return ret; | 1801 | return ret; |
1812 | } | 1802 | } |
1813 | 1803 | ||
1804 | /* Must be protected by mem_hotplug_begin() */ | ||
1814 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) | 1805 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) |
1815 | { | 1806 | { |
1816 | return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); | 1807 | return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4721046a134a..ede26291d4aa 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -945,7 +945,8 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x | |||
945 | return alloc_huge_page_node(page_hstate(compound_head(page)), | 945 | return alloc_huge_page_node(page_hstate(compound_head(page)), |
946 | node); | 946 | node); |
947 | else | 947 | else |
948 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | 948 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE | |
949 | __GFP_THISNODE, 0); | ||
949 | } | 950 | } |
950 | 951 | ||
951 | /* | 952 | /* |
@@ -1985,7 +1986,8 @@ retry_cpuset: | |||
1985 | nmask = policy_nodemask(gfp, pol); | 1986 | nmask = policy_nodemask(gfp, pol); |
1986 | if (!nmask || node_isset(node, *nmask)) { | 1987 | if (!nmask || node_isset(node, *nmask)) { |
1987 | mpol_cond_put(pol); | 1988 | mpol_cond_put(pol); |
1988 | page = alloc_pages_exact_node(node, gfp, order); | 1989 | page = alloc_pages_exact_node(node, |
1990 | gfp | __GFP_THISNODE, order); | ||
1989 | goto out; | 1991 | goto out; |
1990 | } | 1992 | } |
1991 | } | 1993 | } |
diff --git a/mm/mempool.c b/mm/mempool.c index e209c98c7203..949970db2874 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -113,23 +113,24 @@ EXPORT_SYMBOL(mempool_create_node); | |||
113 | * mempool_create(). | 113 | * mempool_create(). |
114 | * @new_min_nr: the new minimum number of elements guaranteed to be | 114 | * @new_min_nr: the new minimum number of elements guaranteed to be |
115 | * allocated for this pool. | 115 | * allocated for this pool. |
116 | * @gfp_mask: the usual allocation bitmask. | ||
117 | * | 116 | * |
118 | * This function shrinks/grows the pool. In the case of growing, | 117 | * This function shrinks/grows the pool. In the case of growing, |
119 | * it cannot be guaranteed that the pool will be grown to the new | 118 | * it cannot be guaranteed that the pool will be grown to the new |
120 | * size immediately, but new mempool_free() calls will refill it. | 119 | * size immediately, but new mempool_free() calls will refill it. |
120 | * This function may sleep. | ||
121 | * | 121 | * |
122 | * Note, the caller must guarantee that no mempool_destroy is called | 122 | * Note, the caller must guarantee that no mempool_destroy is called |
123 | * while this function is running. mempool_alloc() & mempool_free() | 123 | * while this function is running. mempool_alloc() & mempool_free() |
124 | * might be called (eg. from IRQ contexts) while this function executes. | 124 | * might be called (eg. from IRQ contexts) while this function executes. |
125 | */ | 125 | */ |
126 | int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | 126 | int mempool_resize(mempool_t *pool, int new_min_nr) |
127 | { | 127 | { |
128 | void *element; | 128 | void *element; |
129 | void **new_elements; | 129 | void **new_elements; |
130 | unsigned long flags; | 130 | unsigned long flags; |
131 | 131 | ||
132 | BUG_ON(new_min_nr <= 0); | 132 | BUG_ON(new_min_nr <= 0); |
133 | might_sleep(); | ||
133 | 134 | ||
134 | spin_lock_irqsave(&pool->lock, flags); | 135 | spin_lock_irqsave(&pool->lock, flags); |
135 | if (new_min_nr <= pool->min_nr) { | 136 | if (new_min_nr <= pool->min_nr) { |
@@ -145,7 +146,8 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | |||
145 | spin_unlock_irqrestore(&pool->lock, flags); | 146 | spin_unlock_irqrestore(&pool->lock, flags); |
146 | 147 | ||
147 | /* Grow the pool */ | 148 | /* Grow the pool */ |
148 | new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); | 149 | new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), |
150 | GFP_KERNEL); | ||
149 | if (!new_elements) | 151 | if (!new_elements) |
150 | return -ENOMEM; | 152 | return -ENOMEM; |
151 | 153 | ||
@@ -164,7 +166,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | |||
164 | 166 | ||
165 | while (pool->curr_nr < pool->min_nr) { | 167 | while (pool->curr_nr < pool->min_nr) { |
166 | spin_unlock_irqrestore(&pool->lock, flags); | 168 | spin_unlock_irqrestore(&pool->lock, flags); |
167 | element = pool->alloc(gfp_mask, pool->pool_data); | 169 | element = pool->alloc(GFP_KERNEL, pool->pool_data); |
168 | if (!element) | 170 | if (!element) |
169 | goto out; | 171 | goto out; |
170 | spin_lock_irqsave(&pool->lock, flags); | 172 | spin_lock_irqsave(&pool->lock, flags); |
diff --git a/arch/x86/mm/memtest.c b/mm/memtest.c index 1e9da795767a..1997d934b13b 100644 --- a/arch/x86/mm/memtest.c +++ b/mm/memtest.c | |||
@@ -29,7 +29,7 @@ static u64 patterns[] __initdata = { | |||
29 | 0x7a6c7258554e494cULL, /* yeah ;-) */ | 29 | 0x7a6c7258554e494cULL, /* yeah ;-) */ |
30 | }; | 30 | }; |
31 | 31 | ||
32 | static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | 32 | static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, phys_addr_t end_bad) |
33 | { | 33 | { |
34 | printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", | 34 | printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", |
35 | (unsigned long long) pattern, | 35 | (unsigned long long) pattern, |
@@ -38,11 +38,11 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
38 | memblock_reserve(start_bad, end_bad - start_bad); | 38 | memblock_reserve(start_bad, end_bad - start_bad); |
39 | } | 39 | } |
40 | 40 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size) |
42 | { | 42 | { |
43 | u64 *p, *start, *end; | 43 | u64 *p, *start, *end; |
44 | u64 start_bad, last_bad; | 44 | phys_addr_t start_bad, last_bad; |
45 | u64 start_phys_aligned; | 45 | phys_addr_t start_phys_aligned; |
46 | const size_t incr = sizeof(pattern); | 46 | const size_t incr = sizeof(pattern); |
47 | 47 | ||
48 | start_phys_aligned = ALIGN(start_phys, incr); | 48 | start_phys_aligned = ALIGN(start_phys, incr); |
@@ -69,14 +69,14 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size) | |||
69 | reserve_bad_mem(pattern, start_bad, last_bad + incr); | 69 | reserve_bad_mem(pattern, start_bad, last_bad + incr); |
70 | } | 70 | } |
71 | 71 | ||
72 | static void __init do_one_pass(u64 pattern, u64 start, u64 end) | 72 | static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end) |
73 | { | 73 | { |
74 | u64 i; | 74 | u64 i; |
75 | phys_addr_t this_start, this_end; | 75 | phys_addr_t this_start, this_end; |
76 | 76 | ||
77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { | 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { |
78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | 78 | this_start = clamp(this_start, start, end); |
79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | 79 | this_end = clamp(this_end, start, end); |
80 | if (this_start < this_end) { | 80 | if (this_start < this_end) { |
81 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", | 81 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", |
82 | (unsigned long long)this_start, | 82 | (unsigned long long)this_start, |
@@ -102,7 +102,7 @@ static int __init parse_memtest(char *arg) | |||
102 | 102 | ||
103 | early_param("memtest", parse_memtest); | 103 | early_param("memtest", parse_memtest); |
104 | 104 | ||
105 | void __init early_memtest(unsigned long start, unsigned long end) | 105 | void __init early_memtest(phys_addr_t start, phys_addr_t end) |
106 | { | 106 | { |
107 | unsigned int i; | 107 | unsigned int i; |
108 | unsigned int idx = 0; | 108 | unsigned int idx = 0; |
diff --git a/mm/migrate.c b/mm/migrate.c index 85e042686031..a65ff72ab739 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -901,12 +901,23 @@ out: | |||
901 | } | 901 | } |
902 | 902 | ||
903 | /* | 903 | /* |
904 | * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work | ||
905 | * around it. | ||
906 | */ | ||
907 | #if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM) | ||
908 | #define ICE_noinline noinline | ||
909 | #else | ||
910 | #define ICE_noinline | ||
911 | #endif | ||
912 | |||
913 | /* | ||
904 | * Obtain the lock on page, remove all ptes and migrate the page | 914 | * Obtain the lock on page, remove all ptes and migrate the page |
905 | * to the newly allocated page in newpage. | 915 | * to the newly allocated page in newpage. |
906 | */ | 916 | */ |
907 | static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, | 917 | static ICE_noinline int unmap_and_move(new_page_t get_new_page, |
908 | unsigned long private, struct page *page, int force, | 918 | free_page_t put_new_page, |
909 | enum migrate_mode mode) | 919 | unsigned long private, struct page *page, |
920 | int force, enum migrate_mode mode) | ||
910 | { | 921 | { |
911 | int rc = 0; | 922 | int rc = 0; |
912 | int *result = NULL; | 923 | int *result = NULL; |
@@ -1554,30 +1565,10 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
1554 | * page migration rate limiting control. | 1565 | * page migration rate limiting control. |
1555 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs | 1566 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs |
1556 | * window of time. Default here says do not migrate more than 1280M per second. | 1567 | * window of time. Default here says do not migrate more than 1280M per second. |
1557 | * If a node is rate-limited then PTE NUMA updates are also rate-limited. However | ||
1558 | * as it is faults that reset the window, pte updates will happen unconditionally | ||
1559 | * if there has not been a fault since @pteupdate_interval_millisecs after the | ||
1560 | * throttle window closed. | ||
1561 | */ | 1568 | */ |
1562 | static unsigned int migrate_interval_millisecs __read_mostly = 100; | 1569 | static unsigned int migrate_interval_millisecs __read_mostly = 100; |
1563 | static unsigned int pteupdate_interval_millisecs __read_mostly = 1000; | ||
1564 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); | 1570 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); |
1565 | 1571 | ||
1566 | /* Returns true if NUMA migration is currently rate limited */ | ||
1567 | bool migrate_ratelimited(int node) | ||
1568 | { | ||
1569 | pg_data_t *pgdat = NODE_DATA(node); | ||
1570 | |||
1571 | if (time_after(jiffies, pgdat->numabalancing_migrate_next_window + | ||
1572 | msecs_to_jiffies(pteupdate_interval_millisecs))) | ||
1573 | return false; | ||
1574 | |||
1575 | if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages) | ||
1576 | return false; | ||
1577 | |||
1578 | return true; | ||
1579 | } | ||
1580 | |||
1581 | /* Returns true if the node is migrate rate-limited after the update */ | 1572 | /* Returns true if the node is migrate rate-limited after the update */ |
1582 | static bool numamigrate_update_ratelimit(pg_data_t *pgdat, | 1573 | static bool numamigrate_update_ratelimit(pg_data_t *pgdat, |
1583 | unsigned long nr_pages) | 1574 | unsigned long nr_pages) |
diff --git a/mm/mlock.c b/mm/mlock.c index 8a54cd214925..6fd2cf15e868 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -205,62 +205,6 @@ out: | |||
205 | return nr_pages - 1; | 205 | return nr_pages - 1; |
206 | } | 206 | } |
207 | 207 | ||
208 | /** | ||
209 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. | ||
210 | * @vma: target vma | ||
211 | * @start: start address | ||
212 | * @end: end address | ||
213 | * @nonblocking: | ||
214 | * | ||
215 | * This takes care of making the pages present too. | ||
216 | * | ||
217 | * return 0 on success, negative error code on error. | ||
218 | * | ||
219 | * vma->vm_mm->mmap_sem must be held. | ||
220 | * | ||
221 | * If @nonblocking is NULL, it may be held for read or write and will | ||
222 | * be unperturbed. | ||
223 | * | ||
224 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
225 | * released. If it's released, *@nonblocking will be set to 0. | ||
226 | */ | ||
227 | long __mlock_vma_pages_range(struct vm_area_struct *vma, | ||
228 | unsigned long start, unsigned long end, int *nonblocking) | ||
229 | { | ||
230 | struct mm_struct *mm = vma->vm_mm; | ||
231 | unsigned long nr_pages = (end - start) / PAGE_SIZE; | ||
232 | int gup_flags; | ||
233 | |||
234 | VM_BUG_ON(start & ~PAGE_MASK); | ||
235 | VM_BUG_ON(end & ~PAGE_MASK); | ||
236 | VM_BUG_ON_VMA(start < vma->vm_start, vma); | ||
237 | VM_BUG_ON_VMA(end > vma->vm_end, vma); | ||
238 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
239 | |||
240 | gup_flags = FOLL_TOUCH | FOLL_MLOCK; | ||
241 | /* | ||
242 | * We want to touch writable mappings with a write fault in order | ||
243 | * to break COW, except for shared mappings because these don't COW | ||
244 | * and we would not want to dirty them for nothing. | ||
245 | */ | ||
246 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
247 | gup_flags |= FOLL_WRITE; | ||
248 | |||
249 | /* | ||
250 | * We want mlock to succeed for regions that have any permissions | ||
251 | * other than PROT_NONE. | ||
252 | */ | ||
253 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
254 | gup_flags |= FOLL_FORCE; | ||
255 | |||
256 | /* | ||
257 | * We made sure addr is within a VMA, so the following will | ||
258 | * not result in a stack expansion that recurses back here. | ||
259 | */ | ||
260 | return __get_user_pages(current, mm, start, nr_pages, gup_flags, | ||
261 | NULL, NULL, nonblocking); | ||
262 | } | ||
263 | |||
264 | /* | 208 | /* |
265 | * convert get_user_pages() return value to posix mlock() error | 209 | * convert get_user_pages() return value to posix mlock() error |
266 | */ | 210 | */ |
@@ -596,7 +540,7 @@ success: | |||
596 | /* | 540 | /* |
597 | * vm_flags is protected by the mmap_sem held in write mode. | 541 | * vm_flags is protected by the mmap_sem held in write mode. |
598 | * It's okay if try_to_unmap_one unmaps a page just after we | 542 | * It's okay if try_to_unmap_one unmaps a page just after we |
599 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. | 543 | * set VM_LOCKED, populate_vma_page_range will bring it back. |
600 | */ | 544 | */ |
601 | 545 | ||
602 | if (lock) | 546 | if (lock) |
@@ -660,69 +604,6 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
660 | return error; | 604 | return error; |
661 | } | 605 | } |
662 | 606 | ||
663 | /* | ||
664 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
665 | * | ||
666 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
667 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
668 | * mmap_sem must not be held. | ||
669 | */ | ||
670 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
671 | { | ||
672 | struct mm_struct *mm = current->mm; | ||
673 | unsigned long end, nstart, nend; | ||
674 | struct vm_area_struct *vma = NULL; | ||
675 | int locked = 0; | ||
676 | long ret = 0; | ||
677 | |||
678 | VM_BUG_ON(start & ~PAGE_MASK); | ||
679 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
680 | end = start + len; | ||
681 | |||
682 | for (nstart = start; nstart < end; nstart = nend) { | ||
683 | /* | ||
684 | * We want to fault in pages for [nstart; end) address range. | ||
685 | * Find first corresponding VMA. | ||
686 | */ | ||
687 | if (!locked) { | ||
688 | locked = 1; | ||
689 | down_read(&mm->mmap_sem); | ||
690 | vma = find_vma(mm, nstart); | ||
691 | } else if (nstart >= vma->vm_end) | ||
692 | vma = vma->vm_next; | ||
693 | if (!vma || vma->vm_start >= end) | ||
694 | break; | ||
695 | /* | ||
696 | * Set [nstart; nend) to intersection of desired address | ||
697 | * range with the first VMA. Also, skip undesirable VMA types. | ||
698 | */ | ||
699 | nend = min(end, vma->vm_end); | ||
700 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
701 | continue; | ||
702 | if (nstart < vma->vm_start) | ||
703 | nstart = vma->vm_start; | ||
704 | /* | ||
705 | * Now fault in a range of pages. __mlock_vma_pages_range() | ||
706 | * double checks the vma flags, so that it won't mlock pages | ||
707 | * if the vma was already munlocked. | ||
708 | */ | ||
709 | ret = __mlock_vma_pages_range(vma, nstart, nend, &locked); | ||
710 | if (ret < 0) { | ||
711 | if (ignore_errors) { | ||
712 | ret = 0; | ||
713 | continue; /* continue at next VMA */ | ||
714 | } | ||
715 | ret = __mlock_posix_error_return(ret); | ||
716 | break; | ||
717 | } | ||
718 | nend = nstart + ret * PAGE_SIZE; | ||
719 | ret = 0; | ||
720 | } | ||
721 | if (locked) | ||
722 | up_read(&mm->mmap_sem); | ||
723 | return ret; /* 0 or negative error code */ | ||
724 | } | ||
725 | |||
726 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | 607 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) |
727 | { | 608 | { |
728 | unsigned long locked; | 609 | unsigned long locked; |
@@ -750,9 +631,13 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
750 | error = do_mlock(start, len, 1); | 631 | error = do_mlock(start, len, 1); |
751 | 632 | ||
752 | up_write(¤t->mm->mmap_sem); | 633 | up_write(¤t->mm->mmap_sem); |
753 | if (!error) | 634 | if (error) |
754 | error = __mm_populate(start, len, 0); | 635 | return error; |
755 | return error; | 636 | |
637 | error = __mm_populate(start, len, 0); | ||
638 | if (error) | ||
639 | return __mlock_posix_error_return(error); | ||
640 | return 0; | ||
756 | } | 641 | } |
757 | 642 | ||
758 | SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) | 643 | SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) |
@@ -2316,7 +2316,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
2316 | if (!prev || expand_stack(prev, addr)) | 2316 | if (!prev || expand_stack(prev, addr)) |
2317 | return NULL; | 2317 | return NULL; |
2318 | if (prev->vm_flags & VM_LOCKED) | 2318 | if (prev->vm_flags & VM_LOCKED) |
2319 | __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL); | 2319 | populate_vma_page_range(prev, addr, prev->vm_end, NULL); |
2320 | return prev; | 2320 | return prev; |
2321 | } | 2321 | } |
2322 | #else | 2322 | #else |
@@ -2351,7 +2351,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
2351 | if (expand_stack(vma, addr)) | 2351 | if (expand_stack(vma, addr)) |
2352 | return NULL; | 2352 | return NULL; |
2353 | if (vma->vm_flags & VM_LOCKED) | 2353 | if (vma->vm_flags & VM_LOCKED) |
2354 | __mlock_vma_pages_range(vma, addr, start, NULL); | 2354 | populate_vma_page_range(vma, addr, start, NULL); |
2355 | return vma; | 2355 | return vma; |
2356 | } | 2356 | } |
2357 | #endif | 2357 | #endif |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 642f38cb175a..52628c819bf7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -612,7 +612,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
612 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. | 612 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. |
613 | */ | 613 | */ |
614 | void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | 614 | void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, |
615 | int order, const nodemask_t *nodemask) | 615 | int order, const nodemask_t *nodemask, |
616 | struct mem_cgroup *memcg) | ||
616 | { | 617 | { |
617 | if (likely(!sysctl_panic_on_oom)) | 618 | if (likely(!sysctl_panic_on_oom)) |
618 | return; | 619 | return; |
@@ -625,7 +626,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | |||
625 | if (constraint != CONSTRAINT_NONE) | 626 | if (constraint != CONSTRAINT_NONE) |
626 | return; | 627 | return; |
627 | } | 628 | } |
628 | dump_header(NULL, gfp_mask, order, NULL, nodemask); | 629 | dump_header(NULL, gfp_mask, order, memcg, nodemask); |
629 | panic("Out of memory: %s panic_on_oom is enabled\n", | 630 | panic("Out of memory: %s panic_on_oom is enabled\n", |
630 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); | 631 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); |
631 | } | 632 | } |
@@ -740,7 +741,7 @@ static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
740 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask, | 741 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask, |
741 | &totalpages); | 742 | &totalpages); |
742 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; | 743 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; |
743 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); | 744 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL); |
744 | 745 | ||
745 | if (sysctl_oom_kill_allocating_task && current->mm && | 746 | if (sysctl_oom_kill_allocating_task && current->mm && |
746 | !oom_unkillable_task(current, NULL, nodemask) && | 747 | !oom_unkillable_task(current, NULL, nodemask) && |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 644bcb665773..0372411f38fc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2111,6 +2111,25 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
2111 | EXPORT_SYMBOL(account_page_dirtied); | 2111 | EXPORT_SYMBOL(account_page_dirtied); |
2112 | 2112 | ||
2113 | /* | 2113 | /* |
2114 | * Helper function for deaccounting dirty page without writeback. | ||
2115 | * | ||
2116 | * Doing this should *normally* only ever be done when a page | ||
2117 | * is truncated, and is not actually mapped anywhere at all. However, | ||
2118 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
2119 | * out all the buffers on a page without actually doing it through | ||
2120 | * the VM. Can you say "ext3 is horribly ugly"? Thought you could. | ||
2121 | */ | ||
2122 | void account_page_cleaned(struct page *page, struct address_space *mapping) | ||
2123 | { | ||
2124 | if (mapping_cap_account_dirty(mapping)) { | ||
2125 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
2126 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
2127 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); | ||
2128 | } | ||
2129 | } | ||
2130 | EXPORT_SYMBOL(account_page_cleaned); | ||
2131 | |||
2132 | /* | ||
2114 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 2133 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
2115 | * its radix tree. | 2134 | * its radix tree. |
2116 | * | 2135 | * |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40e29429e7b0..1b849500640c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1032,11 +1032,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
1032 | static int fallbacks[MIGRATE_TYPES][4] = { | 1032 | static int fallbacks[MIGRATE_TYPES][4] = { |
1033 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 1033 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
1034 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 1034 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
1035 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
1035 | #ifdef CONFIG_CMA | 1036 | #ifdef CONFIG_CMA |
1036 | [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
1037 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ | 1037 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ |
1038 | #else | ||
1039 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
1040 | #endif | 1038 | #endif |
1041 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ | 1039 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ |
1042 | #ifdef CONFIG_MEMORY_ISOLATION | 1040 | #ifdef CONFIG_MEMORY_ISOLATION |
@@ -1044,6 +1042,17 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
1044 | #endif | 1042 | #endif |
1045 | }; | 1043 | }; |
1046 | 1044 | ||
1045 | #ifdef CONFIG_CMA | ||
1046 | static struct page *__rmqueue_cma_fallback(struct zone *zone, | ||
1047 | unsigned int order) | ||
1048 | { | ||
1049 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); | ||
1050 | } | ||
1051 | #else | ||
1052 | static inline struct page *__rmqueue_cma_fallback(struct zone *zone, | ||
1053 | unsigned int order) { return NULL; } | ||
1054 | #endif | ||
1055 | |||
1047 | /* | 1056 | /* |
1048 | * Move the free pages in a range to the free lists of the requested type. | 1057 | * Move the free pages in a range to the free lists of the requested type. |
1049 | * Note that start_page and end_pages are not aligned on a pageblock | 1058 | * Note that start_page and end_pages are not aligned on a pageblock |
@@ -1136,14 +1145,40 @@ static void change_pageblock_range(struct page *pageblock_page, | |||
1136 | * as fragmentation caused by those allocations polluting movable pageblocks | 1145 | * as fragmentation caused by those allocations polluting movable pageblocks |
1137 | * is worse than movable allocations stealing from unmovable and reclaimable | 1146 | * is worse than movable allocations stealing from unmovable and reclaimable |
1138 | * pageblocks. | 1147 | * pageblocks. |
1139 | * | ||
1140 | * If we claim more than half of the pageblock, change pageblock's migratetype | ||
1141 | * as well. | ||
1142 | */ | 1148 | */ |
1143 | static void try_to_steal_freepages(struct zone *zone, struct page *page, | 1149 | static bool can_steal_fallback(unsigned int order, int start_mt) |
1144 | int start_type, int fallback_type) | 1150 | { |
1151 | /* | ||
1152 | * Leaving this order check is intended, although there is | ||
1153 | * relaxed order check in next check. The reason is that | ||
1154 | * we can actually steal whole pageblock if this condition met, | ||
1155 | * but, below check doesn't guarantee it and that is just heuristic | ||
1156 | * so could be changed anytime. | ||
1157 | */ | ||
1158 | if (order >= pageblock_order) | ||
1159 | return true; | ||
1160 | |||
1161 | if (order >= pageblock_order / 2 || | ||
1162 | start_mt == MIGRATE_RECLAIMABLE || | ||
1163 | start_mt == MIGRATE_UNMOVABLE || | ||
1164 | page_group_by_mobility_disabled) | ||
1165 | return true; | ||
1166 | |||
1167 | return false; | ||
1168 | } | ||
1169 | |||
1170 | /* | ||
1171 | * This function implements actual steal behaviour. If order is large enough, | ||
1172 | * we can steal whole pageblock. If not, we first move freepages in this | ||
1173 | * pageblock and check whether half of pages are moved or not. If half of | ||
1174 | * pages are moved, we can change migratetype of pageblock and permanently | ||
1175 | * use it's pages as requested migratetype in the future. | ||
1176 | */ | ||
1177 | static void steal_suitable_fallback(struct zone *zone, struct page *page, | ||
1178 | int start_type) | ||
1145 | { | 1179 | { |
1146 | int current_order = page_order(page); | 1180 | int current_order = page_order(page); |
1181 | int pages; | ||
1147 | 1182 | ||
1148 | /* Take ownership for orders >= pageblock_order */ | 1183 | /* Take ownership for orders >= pageblock_order */ |
1149 | if (current_order >= pageblock_order) { | 1184 | if (current_order >= pageblock_order) { |
@@ -1151,19 +1186,49 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page, | |||
1151 | return; | 1186 | return; |
1152 | } | 1187 | } |
1153 | 1188 | ||
1154 | if (current_order >= pageblock_order / 2 || | 1189 | pages = move_freepages_block(zone, page, start_type); |
1155 | start_type == MIGRATE_RECLAIMABLE || | 1190 | |
1156 | start_type == MIGRATE_UNMOVABLE || | 1191 | /* Claim the whole block if over half of it is free */ |
1157 | page_group_by_mobility_disabled) { | 1192 | if (pages >= (1 << (pageblock_order-1)) || |
1158 | int pages; | 1193 | page_group_by_mobility_disabled) |
1194 | set_pageblock_migratetype(page, start_type); | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
1198 | * Check whether there is a suitable fallback freepage with requested order. | ||
1199 | * If only_stealable is true, this function returns fallback_mt only if | ||
1200 | * we can steal other freepages all together. This would help to reduce | ||
1201 | * fragmentation due to mixed migratetype pages in one pageblock. | ||
1202 | */ | ||
1203 | int find_suitable_fallback(struct free_area *area, unsigned int order, | ||
1204 | int migratetype, bool only_stealable, bool *can_steal) | ||
1205 | { | ||
1206 | int i; | ||
1207 | int fallback_mt; | ||
1208 | |||
1209 | if (area->nr_free == 0) | ||
1210 | return -1; | ||
1211 | |||
1212 | *can_steal = false; | ||
1213 | for (i = 0;; i++) { | ||
1214 | fallback_mt = fallbacks[migratetype][i]; | ||
1215 | if (fallback_mt == MIGRATE_RESERVE) | ||
1216 | break; | ||
1217 | |||
1218 | if (list_empty(&area->free_list[fallback_mt])) | ||
1219 | continue; | ||
1159 | 1220 | ||
1160 | pages = move_freepages_block(zone, page, start_type); | 1221 | if (can_steal_fallback(order, migratetype)) |
1222 | *can_steal = true; | ||
1161 | 1223 | ||
1162 | /* Claim the whole block if over half of it is free */ | 1224 | if (!only_stealable) |
1163 | if (pages >= (1 << (pageblock_order-1)) || | 1225 | return fallback_mt; |
1164 | page_group_by_mobility_disabled) | 1226 | |
1165 | set_pageblock_migratetype(page, start_type); | 1227 | if (*can_steal) |
1228 | return fallback_mt; | ||
1166 | } | 1229 | } |
1230 | |||
1231 | return -1; | ||
1167 | } | 1232 | } |
1168 | 1233 | ||
1169 | /* Remove an element from the buddy allocator from the fallback list */ | 1234 | /* Remove an element from the buddy allocator from the fallback list */ |
@@ -1173,64 +1238,45 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
1173 | struct free_area *area; | 1238 | struct free_area *area; |
1174 | unsigned int current_order; | 1239 | unsigned int current_order; |
1175 | struct page *page; | 1240 | struct page *page; |
1241 | int fallback_mt; | ||
1242 | bool can_steal; | ||
1176 | 1243 | ||
1177 | /* Find the largest possible block of pages in the other list */ | 1244 | /* Find the largest possible block of pages in the other list */ |
1178 | for (current_order = MAX_ORDER-1; | 1245 | for (current_order = MAX_ORDER-1; |
1179 | current_order >= order && current_order <= MAX_ORDER-1; | 1246 | current_order >= order && current_order <= MAX_ORDER-1; |
1180 | --current_order) { | 1247 | --current_order) { |
1181 | int i; | 1248 | area = &(zone->free_area[current_order]); |
1182 | for (i = 0;; i++) { | 1249 | fallback_mt = find_suitable_fallback(area, current_order, |
1183 | int migratetype = fallbacks[start_migratetype][i]; | 1250 | start_migratetype, false, &can_steal); |
1184 | int buddy_type = start_migratetype; | 1251 | if (fallback_mt == -1) |
1185 | 1252 | continue; | |
1186 | /* MIGRATE_RESERVE handled later if necessary */ | ||
1187 | if (migratetype == MIGRATE_RESERVE) | ||
1188 | break; | ||
1189 | |||
1190 | area = &(zone->free_area[current_order]); | ||
1191 | if (list_empty(&area->free_list[migratetype])) | ||
1192 | continue; | ||
1193 | |||
1194 | page = list_entry(area->free_list[migratetype].next, | ||
1195 | struct page, lru); | ||
1196 | area->nr_free--; | ||
1197 | |||
1198 | if (!is_migrate_cma(migratetype)) { | ||
1199 | try_to_steal_freepages(zone, page, | ||
1200 | start_migratetype, | ||
1201 | migratetype); | ||
1202 | } else { | ||
1203 | /* | ||
1204 | * When borrowing from MIGRATE_CMA, we need to | ||
1205 | * release the excess buddy pages to CMA | ||
1206 | * itself, and we do not try to steal extra | ||
1207 | * free pages. | ||
1208 | */ | ||
1209 | buddy_type = migratetype; | ||
1210 | } | ||
1211 | 1253 | ||
1212 | /* Remove the page from the freelists */ | 1254 | page = list_entry(area->free_list[fallback_mt].next, |
1213 | list_del(&page->lru); | 1255 | struct page, lru); |
1214 | rmv_page_order(page); | 1256 | if (can_steal) |
1257 | steal_suitable_fallback(zone, page, start_migratetype); | ||
1215 | 1258 | ||
1216 | expand(zone, page, order, current_order, area, | 1259 | /* Remove the page from the freelists */ |
1217 | buddy_type); | 1260 | area->nr_free--; |
1261 | list_del(&page->lru); | ||
1262 | rmv_page_order(page); | ||
1218 | 1263 | ||
1219 | /* | 1264 | expand(zone, page, order, current_order, area, |
1220 | * The freepage_migratetype may differ from pageblock's | 1265 | start_migratetype); |
1221 | * migratetype depending on the decisions in | 1266 | /* |
1222 | * try_to_steal_freepages(). This is OK as long as it | 1267 | * The freepage_migratetype may differ from pageblock's |
1223 | * does not differ for MIGRATE_CMA pageblocks. For CMA | 1268 | * migratetype depending on the decisions in |
1224 | * we need to make sure unallocated pages flushed from | 1269 | * try_to_steal_freepages(). This is OK as long as it |
1225 | * pcp lists are returned to the correct freelist. | 1270 | * does not differ for MIGRATE_CMA pageblocks. For CMA |
1226 | */ | 1271 | * we need to make sure unallocated pages flushed from |
1227 | set_freepage_migratetype(page, buddy_type); | 1272 | * pcp lists are returned to the correct freelist. |
1273 | */ | ||
1274 | set_freepage_migratetype(page, start_migratetype); | ||
1228 | 1275 | ||
1229 | trace_mm_page_alloc_extfrag(page, order, current_order, | 1276 | trace_mm_page_alloc_extfrag(page, order, current_order, |
1230 | start_migratetype, migratetype); | 1277 | start_migratetype, fallback_mt); |
1231 | 1278 | ||
1232 | return page; | 1279 | return page; |
1233 | } | ||
1234 | } | 1280 | } |
1235 | 1281 | ||
1236 | return NULL; | 1282 | return NULL; |
@@ -1249,7 +1295,11 @@ retry_reserve: | |||
1249 | page = __rmqueue_smallest(zone, order, migratetype); | 1295 | page = __rmqueue_smallest(zone, order, migratetype); |
1250 | 1296 | ||
1251 | if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { | 1297 | if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { |
1252 | page = __rmqueue_fallback(zone, order, migratetype); | 1298 | if (migratetype == MIGRATE_MOVABLE) |
1299 | page = __rmqueue_cma_fallback(zone, order); | ||
1300 | |||
1301 | if (!page) | ||
1302 | page = __rmqueue_fallback(zone, order, migratetype); | ||
1253 | 1303 | ||
1254 | /* | 1304 | /* |
1255 | * Use MIGRATE_RESERVE rather than fail an allocation. goto | 1305 | * Use MIGRATE_RESERVE rather than fail an allocation. goto |
@@ -2362,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2362 | *did_some_progress = 1; | 2412 | *did_some_progress = 1; |
2363 | goto out; | 2413 | goto out; |
2364 | } | 2414 | } |
2365 | /* | 2415 | /* The OOM killer may not free memory on a specific node */ |
2366 | * GFP_THISNODE contains __GFP_NORETRY and we never hit this. | ||
2367 | * Sanity check for bare calls of __GFP_THISNODE, not real OOM. | ||
2368 | * The caller should handle page allocation failure by itself if | ||
2369 | * it specifies __GFP_THISNODE. | ||
2370 | * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. | ||
2371 | */ | ||
2372 | if (gfp_mask & __GFP_THISNODE) | 2416 | if (gfp_mask & __GFP_THISNODE) |
2373 | goto out; | 2417 | goto out; |
2374 | } | 2418 | } |
@@ -2623,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2623 | } | 2667 | } |
2624 | 2668 | ||
2625 | /* | 2669 | /* |
2626 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and | 2670 | * If this allocation cannot block and it is for a specific node, then |
2627 | * __GFP_NOWARN set) should not cause reclaim since the subsystem | 2671 | * fail early. There's no need to wakeup kswapd or retry for a |
2628 | * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim | 2672 | * speculative node-specific allocation. |
2629 | * using a larger set of nodes after it has established that the | ||
2630 | * allowed per node queues are empty and that nodes are | ||
2631 | * over allocated. | ||
2632 | */ | 2673 | */ |
2633 | if (IS_ENABLED(CONFIG_NUMA) && | 2674 | if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) |
2634 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2635 | goto nopage; | 2675 | goto nopage; |
2636 | 2676 | ||
2637 | retry: | 2677 | retry: |
@@ -2824,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2824 | /* | 2864 | /* |
2825 | * Check the zones suitable for the gfp_mask contain at least one | 2865 | * Check the zones suitable for the gfp_mask contain at least one |
2826 | * valid zone. It's possible to have an empty zonelist as a result | 2866 | * valid zone. It's possible to have an empty zonelist as a result |
2827 | * of GFP_THISNODE and a memoryless node | 2867 | * of __GFP_THISNODE and a memoryless node |
2828 | */ | 2868 | */ |
2829 | if (unlikely(!zonelist->_zonerefs->zone)) | 2869 | if (unlikely(!zonelist->_zonerefs->zone)) |
2830 | return NULL; | 2870 | return NULL; |
@@ -3201,38 +3241,31 @@ static void show_migration_types(unsigned char type) | |||
3201 | * Show free area list (used inside shift_scroll-lock stuff) | 3241 | * Show free area list (used inside shift_scroll-lock stuff) |
3202 | * We also calculate the percentage fragmentation. We do this by counting the | 3242 | * We also calculate the percentage fragmentation. We do this by counting the |
3203 | * memory on each free list with the exception of the first item on the list. | 3243 | * memory on each free list with the exception of the first item on the list. |
3204 | * Suppresses nodes that are not allowed by current's cpuset if | 3244 | * |
3205 | * SHOW_MEM_FILTER_NODES is passed. | 3245 | * Bits in @filter: |
3246 | * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's | ||
3247 | * cpuset. | ||
3206 | */ | 3248 | */ |
3207 | void show_free_areas(unsigned int filter) | 3249 | void show_free_areas(unsigned int filter) |
3208 | { | 3250 | { |
3251 | unsigned long free_pcp = 0; | ||
3209 | int cpu; | 3252 | int cpu; |
3210 | struct zone *zone; | 3253 | struct zone *zone; |
3211 | 3254 | ||
3212 | for_each_populated_zone(zone) { | 3255 | for_each_populated_zone(zone) { |
3213 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3256 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
3214 | continue; | 3257 | continue; |
3215 | show_node(zone); | ||
3216 | printk("%s per-cpu:\n", zone->name); | ||
3217 | 3258 | ||
3218 | for_each_online_cpu(cpu) { | 3259 | for_each_online_cpu(cpu) |
3219 | struct per_cpu_pageset *pageset; | 3260 | free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; |
3220 | |||
3221 | pageset = per_cpu_ptr(zone->pageset, cpu); | ||
3222 | |||
3223 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", | ||
3224 | cpu, pageset->pcp.high, | ||
3225 | pageset->pcp.batch, pageset->pcp.count); | ||
3226 | } | ||
3227 | } | 3261 | } |
3228 | 3262 | ||
3229 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" | 3263 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
3230 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" | 3264 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
3231 | " unevictable:%lu" | 3265 | " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n" |
3232 | " dirty:%lu writeback:%lu unstable:%lu\n" | 3266 | " slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
3233 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | ||
3234 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" | 3267 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" |
3235 | " free_cma:%lu\n", | 3268 | " free:%lu free_pcp:%lu free_cma:%lu\n", |
3236 | global_page_state(NR_ACTIVE_ANON), | 3269 | global_page_state(NR_ACTIVE_ANON), |
3237 | global_page_state(NR_INACTIVE_ANON), | 3270 | global_page_state(NR_INACTIVE_ANON), |
3238 | global_page_state(NR_ISOLATED_ANON), | 3271 | global_page_state(NR_ISOLATED_ANON), |
@@ -3243,13 +3276,14 @@ void show_free_areas(unsigned int filter) | |||
3243 | global_page_state(NR_FILE_DIRTY), | 3276 | global_page_state(NR_FILE_DIRTY), |
3244 | global_page_state(NR_WRITEBACK), | 3277 | global_page_state(NR_WRITEBACK), |
3245 | global_page_state(NR_UNSTABLE_NFS), | 3278 | global_page_state(NR_UNSTABLE_NFS), |
3246 | global_page_state(NR_FREE_PAGES), | ||
3247 | global_page_state(NR_SLAB_RECLAIMABLE), | 3279 | global_page_state(NR_SLAB_RECLAIMABLE), |
3248 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 3280 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
3249 | global_page_state(NR_FILE_MAPPED), | 3281 | global_page_state(NR_FILE_MAPPED), |
3250 | global_page_state(NR_SHMEM), | 3282 | global_page_state(NR_SHMEM), |
3251 | global_page_state(NR_PAGETABLE), | 3283 | global_page_state(NR_PAGETABLE), |
3252 | global_page_state(NR_BOUNCE), | 3284 | global_page_state(NR_BOUNCE), |
3285 | global_page_state(NR_FREE_PAGES), | ||
3286 | free_pcp, | ||
3253 | global_page_state(NR_FREE_CMA_PAGES)); | 3287 | global_page_state(NR_FREE_CMA_PAGES)); |
3254 | 3288 | ||
3255 | for_each_populated_zone(zone) { | 3289 | for_each_populated_zone(zone) { |
@@ -3257,6 +3291,11 @@ void show_free_areas(unsigned int filter) | |||
3257 | 3291 | ||
3258 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3292 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
3259 | continue; | 3293 | continue; |
3294 | |||
3295 | free_pcp = 0; | ||
3296 | for_each_online_cpu(cpu) | ||
3297 | free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; | ||
3298 | |||
3260 | show_node(zone); | 3299 | show_node(zone); |
3261 | printk("%s" | 3300 | printk("%s" |
3262 | " free:%lukB" | 3301 | " free:%lukB" |
@@ -3283,6 +3322,8 @@ void show_free_areas(unsigned int filter) | |||
3283 | " pagetables:%lukB" | 3322 | " pagetables:%lukB" |
3284 | " unstable:%lukB" | 3323 | " unstable:%lukB" |
3285 | " bounce:%lukB" | 3324 | " bounce:%lukB" |
3325 | " free_pcp:%lukB" | ||
3326 | " local_pcp:%ukB" | ||
3286 | " free_cma:%lukB" | 3327 | " free_cma:%lukB" |
3287 | " writeback_tmp:%lukB" | 3328 | " writeback_tmp:%lukB" |
3288 | " pages_scanned:%lu" | 3329 | " pages_scanned:%lu" |
@@ -3314,6 +3355,8 @@ void show_free_areas(unsigned int filter) | |||
3314 | K(zone_page_state(zone, NR_PAGETABLE)), | 3355 | K(zone_page_state(zone, NR_PAGETABLE)), |
3315 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | 3356 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), |
3316 | K(zone_page_state(zone, NR_BOUNCE)), | 3357 | K(zone_page_state(zone, NR_BOUNCE)), |
3358 | K(free_pcp), | ||
3359 | K(this_cpu_read(zone->pageset->pcp.count)), | ||
3317 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), | 3360 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), |
3318 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | 3361 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), |
3319 | K(zone_page_state(zone, NR_PAGES_SCANNED)), | 3362 | K(zone_page_state(zone, NR_PAGES_SCANNED)), |
@@ -5717,7 +5760,7 @@ static void __setup_per_zone_wmarks(void) | |||
5717 | * value here. | 5760 | * value here. |
5718 | * | 5761 | * |
5719 | * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) | 5762 | * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) |
5720 | * deltas controls asynch page reclaim, and so should | 5763 | * deltas control asynch page reclaim, and so should |
5721 | * not be capped for highmem. | 5764 | * not be capped for highmem. |
5722 | */ | 5765 | */ |
5723 | unsigned long min_pages; | 5766 | unsigned long min_pages; |
@@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, | |||
857 | return NULL; | 857 | return NULL; |
858 | } | 858 | } |
859 | 859 | ||
860 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
861 | { | ||
862 | return flags; | ||
863 | } | ||
864 | |||
860 | #else /* CONFIG_NUMA */ | 865 | #else /* CONFIG_NUMA */ |
861 | 866 | ||
862 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 867 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
@@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1023 | 1028 | ||
1024 | return __cache_free_alien(cachep, objp, node, page_node); | 1029 | return __cache_free_alien(cachep, objp, node, page_node); |
1025 | } | 1030 | } |
1031 | |||
1032 | /* | ||
1033 | * Construct gfp mask to allocate from a specific node but do not invoke reclaim | ||
1034 | * or warn about failures. | ||
1035 | */ | ||
1036 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
1037 | { | ||
1038 | return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; | ||
1039 | } | ||
1026 | #endif | 1040 | #endif |
1027 | 1041 | ||
1028 | /* | 1042 | /* |
@@ -2825,7 +2839,7 @@ alloc_done: | |||
2825 | if (unlikely(!ac->avail)) { | 2839 | if (unlikely(!ac->avail)) { |
2826 | int x; | 2840 | int x; |
2827 | force_grow: | 2841 | force_grow: |
2828 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 2842 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); |
2829 | 2843 | ||
2830 | /* cache_grow can reenable interrupts, then ac could change. */ | 2844 | /* cache_grow can reenable interrupts, then ac could change. */ |
2831 | ac = cpu_cache_get(cachep); | 2845 | ac = cpu_cache_get(cachep); |
@@ -3019,7 +3033,7 @@ retry: | |||
3019 | get_node(cache, nid) && | 3033 | get_node(cache, nid) && |
3020 | get_node(cache, nid)->free_objects) { | 3034 | get_node(cache, nid)->free_objects) { |
3021 | obj = ____cache_alloc_node(cache, | 3035 | obj = ____cache_alloc_node(cache, |
3022 | flags | GFP_THISNODE, nid); | 3036 | gfp_exact_node(flags), nid); |
3023 | if (obj) | 3037 | if (obj) |
3024 | break; | 3038 | break; |
3025 | } | 3039 | } |
@@ -3047,7 +3061,7 @@ retry: | |||
3047 | nid = page_to_nid(page); | 3061 | nid = page_to_nid(page); |
3048 | if (cache_grow(cache, flags, nid, page)) { | 3062 | if (cache_grow(cache, flags, nid, page)) { |
3049 | obj = ____cache_alloc_node(cache, | 3063 | obj = ____cache_alloc_node(cache, |
3050 | flags | GFP_THISNODE, nid); | 3064 | gfp_exact_node(flags), nid); |
3051 | if (!obj) | 3065 | if (!obj) |
3052 | /* | 3066 | /* |
3053 | * Another processor may allocate the | 3067 | * Another processor may allocate the |
@@ -3118,7 +3132,7 @@ retry: | |||
3118 | 3132 | ||
3119 | must_grow: | 3133 | must_grow: |
3120 | spin_unlock(&n->list_lock); | 3134 | spin_unlock(&n->list_lock); |
3121 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); | 3135 | x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL); |
3122 | if (x) | 3136 | if (x) |
3123 | goto retry; | 3137 | goto retry; |
3124 | 3138 | ||
@@ -532,7 +532,7 @@ int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) | |||
532 | return 0; | 532 | return 0; |
533 | } | 533 | } |
534 | 534 | ||
535 | void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | 535 | static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) |
536 | { | 536 | { |
537 | void *b; | 537 | void *b; |
538 | 538 | ||
@@ -558,7 +558,6 @@ void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
558 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); | 558 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); |
559 | return b; | 559 | return b; |
560 | } | 560 | } |
561 | EXPORT_SYMBOL(slob_alloc_node); | ||
562 | 561 | ||
563 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 562 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
564 | { | 563 | { |
@@ -374,7 +374,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
374 | if (cmpxchg_double(&page->freelist, &page->counters, | 374 | if (cmpxchg_double(&page->freelist, &page->counters, |
375 | freelist_old, counters_old, | 375 | freelist_old, counters_old, |
376 | freelist_new, counters_new)) | 376 | freelist_new, counters_new)) |
377 | return 1; | 377 | return true; |
378 | } else | 378 | } else |
379 | #endif | 379 | #endif |
380 | { | 380 | { |
@@ -384,7 +384,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
384 | page->freelist = freelist_new; | 384 | page->freelist = freelist_new; |
385 | set_page_slub_counters(page, counters_new); | 385 | set_page_slub_counters(page, counters_new); |
386 | slab_unlock(page); | 386 | slab_unlock(page); |
387 | return 1; | 387 | return true; |
388 | } | 388 | } |
389 | slab_unlock(page); | 389 | slab_unlock(page); |
390 | } | 390 | } |
@@ -396,7 +396,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
396 | pr_info("%s %s: cmpxchg double redo ", n, s->name); | 396 | pr_info("%s %s: cmpxchg double redo ", n, s->name); |
397 | #endif | 397 | #endif |
398 | 398 | ||
399 | return 0; | 399 | return false; |
400 | } | 400 | } |
401 | 401 | ||
402 | static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | 402 | static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, |
@@ -410,7 +410,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
410 | if (cmpxchg_double(&page->freelist, &page->counters, | 410 | if (cmpxchg_double(&page->freelist, &page->counters, |
411 | freelist_old, counters_old, | 411 | freelist_old, counters_old, |
412 | freelist_new, counters_new)) | 412 | freelist_new, counters_new)) |
413 | return 1; | 413 | return true; |
414 | } else | 414 | } else |
415 | #endif | 415 | #endif |
416 | { | 416 | { |
@@ -424,7 +424,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
424 | set_page_slub_counters(page, counters_new); | 424 | set_page_slub_counters(page, counters_new); |
425 | slab_unlock(page); | 425 | slab_unlock(page); |
426 | local_irq_restore(flags); | 426 | local_irq_restore(flags); |
427 | return 1; | 427 | return true; |
428 | } | 428 | } |
429 | slab_unlock(page); | 429 | slab_unlock(page); |
430 | local_irq_restore(flags); | 430 | local_irq_restore(flags); |
@@ -437,7 +437,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
437 | pr_info("%s %s: cmpxchg double redo ", n, s->name); | 437 | pr_info("%s %s: cmpxchg double redo ", n, s->name); |
438 | #endif | 438 | #endif |
439 | 439 | ||
440 | return 0; | 440 | return false; |
441 | } | 441 | } |
442 | 442 | ||
443 | #ifdef CONFIG_SLUB_DEBUG | 443 | #ifdef CONFIG_SLUB_DEBUG |
@@ -1137,15 +1137,6 @@ static int __init setup_slub_debug(char *str) | |||
1137 | */ | 1137 | */ |
1138 | goto check_slabs; | 1138 | goto check_slabs; |
1139 | 1139 | ||
1140 | if (tolower(*str) == 'o') { | ||
1141 | /* | ||
1142 | * Avoid enabling debugging on caches if its minimum order | ||
1143 | * would increase as a result. | ||
1144 | */ | ||
1145 | disable_higher_order_debug = 1; | ||
1146 | goto out; | ||
1147 | } | ||
1148 | |||
1149 | slub_debug = 0; | 1140 | slub_debug = 0; |
1150 | if (*str == '-') | 1141 | if (*str == '-') |
1151 | /* | 1142 | /* |
@@ -1176,6 +1167,13 @@ static int __init setup_slub_debug(char *str) | |||
1176 | case 'a': | 1167 | case 'a': |
1177 | slub_debug |= SLAB_FAILSLAB; | 1168 | slub_debug |= SLAB_FAILSLAB; |
1178 | break; | 1169 | break; |
1170 | case 'o': | ||
1171 | /* | ||
1172 | * Avoid enabling debugging on caches if its minimum | ||
1173 | * order would increase as a result. | ||
1174 | */ | ||
1175 | disable_higher_order_debug = 1; | ||
1176 | break; | ||
1179 | default: | 1177 | default: |
1180 | pr_err("slub_debug option '%c' unknown. skipped\n", | 1178 | pr_err("slub_debug option '%c' unknown. skipped\n", |
1181 | *str); | 1179 | *str); |
diff --git a/mm/truncate.c b/mm/truncate.c index ddec5a5966d7..7a9d8a3cb143 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -93,35 +93,6 @@ void do_invalidatepage(struct page *page, unsigned int offset, | |||
93 | } | 93 | } |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * This cancels just the dirty bit on the kernel page itself, it | ||
97 | * does NOT actually remove dirty bits on any mmap's that may be | ||
98 | * around. It also leaves the page tagged dirty, so any sync | ||
99 | * activity will still find it on the dirty lists, and in particular, | ||
100 | * clear_page_dirty_for_io() will still look at the dirty bits in | ||
101 | * the VM. | ||
102 | * | ||
103 | * Doing this should *normally* only ever be done when a page | ||
104 | * is truncated, and is not actually mapped anywhere at all. However, | ||
105 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
106 | * out all the buffers on a page without actually doing it through | ||
107 | * the VM. Can you say "ext3 is horribly ugly"? Tought you could. | ||
108 | */ | ||
109 | void cancel_dirty_page(struct page *page, unsigned int account_size) | ||
110 | { | ||
111 | if (TestClearPageDirty(page)) { | ||
112 | struct address_space *mapping = page->mapping; | ||
113 | if (mapping && mapping_cap_account_dirty(mapping)) { | ||
114 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
115 | dec_bdi_stat(inode_to_bdi(mapping->host), | ||
116 | BDI_RECLAIMABLE); | ||
117 | if (account_size) | ||
118 | task_io_account_cancelled_write(account_size); | ||
119 | } | ||
120 | } | ||
121 | } | ||
122 | EXPORT_SYMBOL(cancel_dirty_page); | ||
123 | |||
124 | /* | ||
125 | * If truncate cannot remove the fs-private metadata from the page, the page | 96 | * If truncate cannot remove the fs-private metadata from the page, the page |
126 | * becomes orphaned. It will be left on the LRU and may even be mapped into | 97 | * becomes orphaned. It will be left on the LRU and may even be mapped into |
127 | * user pagetables if we're racing with filemap_fault(). | 98 | * user pagetables if we're racing with filemap_fault(). |
@@ -140,7 +111,13 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
140 | if (page_has_private(page)) | 111 | if (page_has_private(page)) |
141 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); | 112 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
142 | 113 | ||
143 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 114 | /* |
115 | * Some filesystems seem to re-dirty the page even after | ||
116 | * the VM has canceled the dirty bit (eg ext3 journaling). | ||
117 | * Hence dirty accounting check is placed after invalidation. | ||
118 | */ | ||
119 | if (TestClearPageDirty(page)) | ||
120 | account_page_cleaned(page, mapping); | ||
144 | 121 | ||
145 | ClearPageMappedToDisk(page); | 122 | ClearPageMappedToDisk(page); |
146 | delete_from_page_cache(page); | 123 | delete_from_page_cache(page); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 49abccf29a29..a5bbdd3b5d67 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/atomic.h> | 29 | #include <linux/atomic.h> |
30 | #include <linux/compiler.h> | 30 | #include <linux/compiler.h> |
31 | #include <linux/llist.h> | 31 | #include <linux/llist.h> |
32 | #include <linux/bitops.h> | ||
32 | 33 | ||
33 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
34 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
@@ -74,6 +75,8 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) | |||
74 | pmd = pmd_offset(pud, addr); | 75 | pmd = pmd_offset(pud, addr); |
75 | do { | 76 | do { |
76 | next = pmd_addr_end(addr, end); | 77 | next = pmd_addr_end(addr, end); |
78 | if (pmd_clear_huge(pmd)) | ||
79 | continue; | ||
77 | if (pmd_none_or_clear_bad(pmd)) | 80 | if (pmd_none_or_clear_bad(pmd)) |
78 | continue; | 81 | continue; |
79 | vunmap_pte_range(pmd, addr, next); | 82 | vunmap_pte_range(pmd, addr, next); |
@@ -88,6 +91,8 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) | |||
88 | pud = pud_offset(pgd, addr); | 91 | pud = pud_offset(pgd, addr); |
89 | do { | 92 | do { |
90 | next = pud_addr_end(addr, end); | 93 | next = pud_addr_end(addr, end); |
94 | if (pud_clear_huge(pud)) | ||
95 | continue; | ||
91 | if (pud_none_or_clear_bad(pud)) | 96 | if (pud_none_or_clear_bad(pud)) |
92 | continue; | 97 | continue; |
93 | vunmap_pmd_range(pud, addr, next); | 98 | vunmap_pmd_range(pud, addr, next); |
@@ -1314,7 +1319,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
1314 | 1319 | ||
1315 | BUG_ON(in_interrupt()); | 1320 | BUG_ON(in_interrupt()); |
1316 | if (flags & VM_IOREMAP) | 1321 | if (flags & VM_IOREMAP) |
1317 | align = 1ul << clamp(fls(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); | 1322 | align = 1ul << clamp_t(int, fls_long(size), |
1323 | PAGE_SHIFT, IOREMAP_MAX_ORDER); | ||
1318 | 1324 | ||
1319 | size = PAGE_ALIGN(size); | 1325 | size = PAGE_ALIGN(size); |
1320 | if (unlikely(!size)) | 1326 | if (unlikely(!size)) |
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 50ec42f170a0..2dacc7b5af23 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c | |||
@@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, | |||
100 | 100 | ||
101 | new_stats = | 101 | new_stats = |
102 | kmem_cache_alloc_node(flow_stats_cache, | 102 | kmem_cache_alloc_node(flow_stats_cache, |
103 | GFP_THISNODE | | 103 | GFP_NOWAIT | |
104 | __GFP_THISNODE | | ||
105 | __GFP_NOWARN | | ||
104 | __GFP_NOMEMALLOC, | 106 | __GFP_NOMEMALLOC, |
105 | node); | 107 | node); |
106 | if (likely(new_stats)) { | 108 | if (likely(new_stats)) { |
diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci index 3b7eec24fb5a..27c97f1f2767 100644 --- a/scripts/coccinelle/misc/bugon.cocci +++ b/scripts/coccinelle/misc/bugon.cocci | |||
@@ -57,6 +57,6 @@ coccilib.org.print_todo(p[0], "WARNING use BUG_ON") | |||
57 | p << r.p; | 57 | p << r.p; |
58 | @@ | 58 | @@ |
59 | 59 | ||
60 | msg="WARNING: Use BUG_ON" | 60 | msg="WARNING: Use BUG_ON instead of if condition followed by BUG.\nPlease make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)" |
61 | coccilib.report.print_report(p[0], msg) | 61 | coccilib.report.print_report(p[0], msg) |
62 | 62 | ||