diff options
153 files changed, 2312 insertions, 1419 deletions
diff --git a/Documentation/cma/debugfs.txt b/Documentation/cma/debugfs.txt new file mode 100644 index 000000000000..6cef20a8cedc --- /dev/null +++ b/Documentation/cma/debugfs.txt | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | The CMA debugfs interface is useful to retrieve basic information out of the | ||
| 2 | different CMA areas and to test allocation/release in each of the areas. | ||
| 3 | |||
| 4 | Each CMA zone represents a directory under <debugfs>/cma/, indexed by the | ||
| 5 | kernel's CMA index. So the first CMA zone would be: | ||
| 6 | |||
| 7 | <debugfs>/cma/cma-0 | ||
| 8 | |||
| 9 | The structure of the files created under that directory is as follows: | ||
| 10 | |||
| 11 | - [RO] base_pfn: The base PFN (Page Frame Number) of the zone. | ||
| 12 | - [RO] count: Amount of memory in the CMA area. | ||
| 13 | - [RO] order_per_bit: Order of pages represented by one bit. | ||
| 14 | - [RO] bitmap: The bitmap of page states in the zone. | ||
| 15 | - [WO] alloc: Allocate N pages from that CMA area. For example: | ||
| 16 | |||
| 17 | echo 5 > <debugfs>/cma/cma-2/alloc | ||
| 18 | |||
| 19 | would try to allocate 5 pages from the cma-2 area. | ||
| 20 | |||
| 21 | - [WO] free: Free N pages from that CMA area, similar to the above. | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 05c36118f8d7..327556349757 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1989,7 +1989,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1989 | seconds. Use this parameter to check at some | 1989 | seconds. Use this parameter to check at some |
| 1990 | other rate. 0 disables periodic checking. | 1990 | other rate. 0 disables periodic checking. |
| 1991 | 1991 | ||
| 1992 | memtest= [KNL,X86] Enable memtest | 1992 | memtest= [KNL,X86,ARM] Enable memtest |
| 1993 | Format: <integer> | 1993 | Format: <integer> |
| 1994 | default : 0 <disable> | 1994 | default : 0 <disable> |
| 1995 | Specifies the number of memtest passes to be | 1995 | Specifies the number of memtest passes to be |
| @@ -2236,8 +2236,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 2236 | 2236 | ||
| 2237 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels | 2237 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels |
| 2238 | Format: [panic,][nopanic,][num] | 2238 | Format: [panic,][nopanic,][num] |
| 2239 | Valid num: 0 | 2239 | Valid num: 0 or 1 |
| 2240 | 0 - turn nmi_watchdog off | 2240 | 0 - turn nmi_watchdog off |
| 2241 | 1 - turn nmi_watchdog on | ||
| 2241 | When panic is specified, panic when an NMI watchdog | 2242 | When panic is specified, panic when an NMI watchdog |
| 2242 | timeout occurs (or 'nopanic' to override the opposite | 2243 | timeout occurs (or 'nopanic' to override the opposite |
| 2243 | default). | 2244 | default). |
| @@ -2322,6 +2323,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 2322 | register save and restore. The kernel will only save | 2323 | register save and restore. The kernel will only save |
| 2323 | legacy floating-point registers on task switch. | 2324 | legacy floating-point registers on task switch. |
| 2324 | 2325 | ||
| 2326 | nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. | ||
| 2327 | |||
| 2325 | noxsave [BUGS=X86] Disables x86 extended register state save | 2328 | noxsave [BUGS=X86] Disables x86 extended register state save |
| 2326 | and restore using xsave. The kernel will fallback to | 2329 | and restore using xsave. The kernel will fallback to |
| 2327 | enabling legacy floating-point and sse state. | 2330 | enabling legacy floating-point and sse state. |
| @@ -2464,7 +2467,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 2464 | 2467 | ||
| 2465 | nousb [USB] Disable the USB subsystem | 2468 | nousb [USB] Disable the USB subsystem |
| 2466 | 2469 | ||
| 2467 | nowatchdog [KNL] Disable the lockup detector (NMI watchdog). | 2470 | nowatchdog [KNL] Disable both lockup detectors, i.e. |
| 2471 | soft-lockup and NMI watchdog (hard-lockup). | ||
| 2468 | 2472 | ||
| 2469 | nowb [ARM] | 2473 | nowb [ARM] |
| 2470 | 2474 | ||
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 83ab25660fc9..99d7eb3a1416 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
| @@ -77,12 +77,14 @@ show up in /proc/sys/kernel: | |||
| 77 | - shmmax [ sysv ipc ] | 77 | - shmmax [ sysv ipc ] |
| 78 | - shmmni | 78 | - shmmni |
| 79 | - softlockup_all_cpu_backtrace | 79 | - softlockup_all_cpu_backtrace |
| 80 | - soft_watchdog | ||
| 80 | - stop-a [ SPARC only ] | 81 | - stop-a [ SPARC only ] |
| 81 | - sysrq ==> Documentation/sysrq.txt | 82 | - sysrq ==> Documentation/sysrq.txt |
| 82 | - sysctl_writes_strict | 83 | - sysctl_writes_strict |
| 83 | - tainted | 84 | - tainted |
| 84 | - threads-max | 85 | - threads-max |
| 85 | - unknown_nmi_panic | 86 | - unknown_nmi_panic |
| 87 | - watchdog | ||
| 86 | - watchdog_thresh | 88 | - watchdog_thresh |
| 87 | - version | 89 | - version |
| 88 | 90 | ||
| @@ -417,16 +419,23 @@ successful IPC object allocation. | |||
| 417 | 419 | ||
| 418 | nmi_watchdog: | 420 | nmi_watchdog: |
| 419 | 421 | ||
| 420 | Enables/Disables the NMI watchdog on x86 systems. When the value is | 422 | This parameter can be used to control the NMI watchdog |
| 421 | non-zero the NMI watchdog is enabled and will continuously test all | 423 | (i.e. the hard lockup detector) on x86 systems. |
| 422 | online cpus to determine whether or not they are still functioning | ||
| 423 | properly. Currently, passing "nmi_watchdog=" parameter at boot time is | ||
| 424 | required for this function to work. | ||
| 425 | 424 | ||
| 426 | If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel | 425 | 0 - disable the hard lockup detector |
| 427 | parameter), the NMI watchdog shares registers with oprofile. By | 426 | 1 - enable the hard lockup detector |
| 428 | disabling the NMI watchdog, oprofile may have more registers to | 427 | |
| 429 | utilize. | 428 | The hard lockup detector monitors each CPU for its ability to respond to |
| 429 | timer interrupts. The mechanism utilizes CPU performance counter registers | ||
| 430 | that are programmed to generate Non-Maskable Interrupts (NMIs) periodically | ||
| 431 | while a CPU is busy. Hence, the alternative name 'NMI watchdog'. | ||
| 432 | |||
| 433 | The NMI watchdog is disabled by default if the kernel is running as a guest | ||
| 434 | in a KVM virtual machine. This default can be overridden by adding | ||
| 435 | |||
| 436 | nmi_watchdog=1 | ||
| 437 | |||
| 438 | to the guest kernel command line (see Documentation/kernel-parameters.txt). | ||
| 430 | 439 | ||
| 431 | ============================================================== | 440 | ============================================================== |
| 432 | 441 | ||
| @@ -816,6 +825,22 @@ NMI. | |||
| 816 | 825 | ||
| 817 | ============================================================== | 826 | ============================================================== |
| 818 | 827 | ||
| 828 | soft_watchdog | ||
| 829 | |||
| 830 | This parameter can be used to control the soft lockup detector. | ||
| 831 | |||
| 832 | 0 - disable the soft lockup detector | ||
| 833 | 1 - enable the soft lockup detector | ||
| 834 | |||
| 835 | The soft lockup detector monitors CPUs for threads that are hogging the CPUs | ||
| 836 | without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads | ||
| 837 | from running. The mechanism depends on the CPUs ability to respond to timer | ||
| 838 | interrupts which are needed for the 'watchdog/N' threads to be woken up by | ||
| 839 | the watchdog timer function, otherwise the NMI watchdog - if enabled - can | ||
| 840 | detect a hard lockup condition. | ||
| 841 | |||
| 842 | ============================================================== | ||
| 843 | |||
| 819 | tainted: | 844 | tainted: |
| 820 | 845 | ||
| 821 | Non-zero if the kernel has been tainted. Numeric values, which | 846 | Non-zero if the kernel has been tainted. Numeric values, which |
| @@ -858,6 +883,25 @@ example. If a system hangs up, try pressing the NMI switch. | |||
| 858 | 883 | ||
| 859 | ============================================================== | 884 | ============================================================== |
| 860 | 885 | ||
| 886 | watchdog: | ||
| 887 | |||
| 888 | This parameter can be used to disable or enable the soft lockup detector | ||
| 889 | _and_ the NMI watchdog (i.e. the hard lockup detector) at the same time. | ||
| 890 | |||
| 891 | 0 - disable both lockup detectors | ||
| 892 | 1 - enable both lockup detectors | ||
| 893 | |||
| 894 | The soft lockup detector and the NMI watchdog can also be disabled or | ||
| 895 | enabled individually, using the soft_watchdog and nmi_watchdog parameters. | ||
| 896 | If the watchdog parameter is read, for example by executing | ||
| 897 | |||
| 898 | cat /proc/sys/kernel/watchdog | ||
| 899 | |||
| 900 | the output of this command (0 or 1) shows the logical OR of soft_watchdog | ||
| 901 | and nmi_watchdog. | ||
| 902 | |||
| 903 | ============================================================== | ||
| 904 | |||
| 861 | watchdog_thresh: | 905 | watchdog_thresh: |
| 862 | 906 | ||
| 863 | This value can be used to control the frequency of hrtimer and NMI | 907 | This value can be used to control the frequency of hrtimer and NMI |
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt index 01d76282444e..e4b49df7a048 100644 --- a/Documentation/vm/cleancache.txt +++ b/Documentation/vm/cleancache.txt | |||
| @@ -28,9 +28,7 @@ IMPLEMENTATION OVERVIEW | |||
| 28 | A cleancache "backend" that provides transcendent memory registers itself | 28 | A cleancache "backend" that provides transcendent memory registers itself |
| 29 | to the kernel's cleancache "frontend" by calling cleancache_register_ops, | 29 | to the kernel's cleancache "frontend" by calling cleancache_register_ops, |
| 30 | passing a pointer to a cleancache_ops structure with funcs set appropriately. | 30 | passing a pointer to a cleancache_ops structure with funcs set appropriately. |
| 31 | Note that cleancache_register_ops returns the previous settings so that | 31 | The functions provided must conform to certain semantics as follows: |
| 32 | chaining can be performed if desired. The functions provided must conform to | ||
| 33 | certain semantics as follows: | ||
| 34 | 32 | ||
| 35 | Most important, cleancache is "ephemeral". Pages which are copied into | 33 | Most important, cleancache is "ephemeral". Pages which are copied into |
| 36 | cleancache have an indefinite lifetime which is completely unknowable | 34 | cleancache have an indefinite lifetime which is completely unknowable |
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index 744f82f86c58..86cb4624fc5a 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt | |||
| @@ -317,7 +317,7 @@ If the VMA passes some filtering as described in "Filtering Special Vmas" | |||
| 317 | below, mlock_fixup() will attempt to merge the VMA with its neighbors or split | 317 | below, mlock_fixup() will attempt to merge the VMA with its neighbors or split |
| 318 | off a subset of the VMA if the range does not cover the entire VMA. Once the | 318 | off a subset of the VMA if the range does not cover the entire VMA. Once the |
| 319 | VMA has been merged or split or neither, mlock_fixup() will call | 319 | VMA has been merged or split or neither, mlock_fixup() will call |
| 320 | __mlock_vma_pages_range() to fault in the pages via get_user_pages() and to | 320 | populate_vma_page_range() to fault in the pages via get_user_pages() and to |
| 321 | mark the pages as mlocked via mlock_vma_page(). | 321 | mark the pages as mlocked via mlock_vma_page(). |
| 322 | 322 | ||
| 323 | Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, | 323 | Note that the VMA being mlocked might be mapped with PROT_NONE. In this case, |
| @@ -327,7 +327,7 @@ fault path or in vmscan. | |||
| 327 | 327 | ||
| 328 | Also note that a page returned by get_user_pages() could be truncated or | 328 | Also note that a page returned by get_user_pages() could be truncated or |
| 329 | migrated out from under us, while we're trying to mlock it. To detect this, | 329 | migrated out from under us, while we're trying to mlock it. To detect this, |
| 330 | __mlock_vma_pages_range() checks page_mapping() after acquiring the page lock. | 330 | populate_vma_page_range() checks page_mapping() after acquiring the page lock. |
| 331 | If the page is still associated with its mapping, we'll go ahead and call | 331 | If the page is still associated with its mapping, we'll go ahead and call |
| 332 | mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. | 332 | mlock_vma_page(). If the mapping is gone, we just unlock the page and move on. |
| 333 | In the worst case, this will result in a page mapped in a VM_LOCKED VMA | 333 | In the worst case, this will result in a page mapped in a VM_LOCKED VMA |
| @@ -392,7 +392,7 @@ ignored for munlock. | |||
| 392 | 392 | ||
| 393 | If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the | 393 | If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the |
| 394 | specified range. The range is then munlocked via the function | 394 | specified range. The range is then munlocked via the function |
| 395 | __mlock_vma_pages_range() - the same function used to mlock a VMA range - | 395 | populate_vma_page_range() - the same function used to mlock a VMA range - |
| 396 | passing a flag to indicate that munlock() is being performed. | 396 | passing a flag to indicate that munlock() is being performed. |
| 397 | 397 | ||
| 398 | Because the VMA access protections could have been changed to PROT_NONE after | 398 | Because the VMA access protections could have been changed to PROT_NONE after |
| @@ -402,7 +402,7 @@ get_user_pages() was enhanced to accept a flag to ignore the permissions when | |||
| 402 | fetching the pages - all of which should be resident as a result of previous | 402 | fetching the pages - all of which should be resident as a result of previous |
| 403 | mlocking. | 403 | mlocking. |
| 404 | 404 | ||
| 405 | For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling | 405 | For munlock(), populate_vma_page_range() unlocks individual pages by calling |
| 406 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked | 406 | munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked |
| 407 | flag using TestClearPageMlocked(). As with mlock_vma_page(), | 407 | flag using TestClearPageMlocked(). As with mlock_vma_page(), |
| 408 | munlock_vma_page() use the Test*PageMlocked() function to handle the case where | 408 | munlock_vma_page() use the Test*PageMlocked() function to handle the case where |
| @@ -463,21 +463,11 @@ populate the page table. | |||
| 463 | 463 | ||
| 464 | To mlock a range of memory under the unevictable/mlock infrastructure, the | 464 | To mlock a range of memory under the unevictable/mlock infrastructure, the |
| 465 | mmap() handler and task address space expansion functions call | 465 | mmap() handler and task address space expansion functions call |
| 466 | mlock_vma_pages_range() specifying the vma and the address range to mlock. | 466 | populate_vma_page_range() specifying the vma and the address range to mlock. |
| 467 | mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in | 467 | |
| 468 | "Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have | 468 | The callers of populate_vma_page_range() will have already added the memory range |
| 469 | already been set by the caller, in filtered VMAs. Thus these VMA's need not be | ||
| 470 | visited for munlock when the region is unmapped. | ||
| 471 | |||
| 472 | For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to | ||
| 473 | fault/allocate the pages and mlock them. Again, like mlock_fixup(), | ||
| 474 | mlock_vma_pages_range() downgrades the mmap semaphore to read mode before | ||
| 475 | attempting to fault/allocate and mlock the pages and "upgrades" the semaphore | ||
| 476 | back to write mode before returning. | ||
| 477 | |||
| 478 | The callers of mlock_vma_pages_range() will have already added the memory range | ||
| 479 | to be mlocked to the task's "locked_vm". To account for filtered VMAs, | 469 | to be mlocked to the task's "locked_vm". To account for filtered VMAs, |
| 480 | mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the | 470 | populate_vma_page_range() returns the number of pages NOT mlocked. All of the |
| 481 | callers then subtract a non-negative return value from the task's locked_vm. A | 471 | callers then subtract a non-negative return value from the task's locked_vm. A |
| 482 | negative return value represent an error - for example, from get_user_pages() | 472 | negative return value represent an error - for example, from get_user_pages() |
| 483 | attempting to fault in a VMA with PROT_NONE access. In this case, we leave the | 473 | attempting to fault in a VMA with PROT_NONE access. In this case, we leave the |
diff --git a/arch/Kconfig b/arch/Kconfig index 05d7a8a458d5..e1068987bad1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -446,6 +446,9 @@ config HAVE_IRQ_TIME_ACCOUNTING | |||
| 446 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE | 446 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE |
| 447 | bool | 447 | bool |
| 448 | 448 | ||
| 449 | config HAVE_ARCH_HUGE_VMAP | ||
| 450 | bool | ||
| 451 | |||
| 449 | config HAVE_ARCH_SOFT_DIRTY | 452 | config HAVE_ARCH_SOFT_DIRTY |
| 450 | bool | 453 | bool |
| 451 | 454 | ||
| @@ -484,6 +487,18 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK | |||
| 484 | This spares a stack switch and improves cache usage on softirq | 487 | This spares a stack switch and improves cache usage on softirq |
| 485 | processing. | 488 | processing. |
| 486 | 489 | ||
| 490 | config PGTABLE_LEVELS | ||
| 491 | int | ||
| 492 | default 2 | ||
| 493 | |||
| 494 | config ARCH_HAS_ELF_RANDOMIZE | ||
| 495 | bool | ||
| 496 | help | ||
| 497 | An architecture supports choosing randomized locations for | ||
| 498 | stack, mmap, brk, and ET_DYN. Defined functions: | ||
| 499 | - arch_mmap_rnd() | ||
| 500 | - arch_randomize_brk() | ||
| 501 | |||
| 487 | # | 502 | # |
| 488 | # ABI hall of shame | 503 | # ABI hall of shame |
| 489 | # | 504 | # |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b7ff9a318c31..bf9e9d3b3792 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
| @@ -76,6 +76,10 @@ config GENERIC_ISA_DMA | |||
| 76 | bool | 76 | bool |
| 77 | default y | 77 | default y |
| 78 | 78 | ||
| 79 | config PGTABLE_LEVELS | ||
| 80 | int | ||
| 81 | default 3 | ||
| 82 | |||
| 79 | source "init/Kconfig" | 83 | source "init/Kconfig" |
| 80 | source "kernel/Kconfig.freezer" | 84 | source "kernel/Kconfig.freezer" |
| 81 | 85 | ||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf4c0c99aa25..4b62f4caf0ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | config ARM | 1 | config ARM |
| 2 | bool | 2 | bool |
| 3 | default y | 3 | default y |
| 4 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
| 5 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 4 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
| 5 | select ARCH_HAS_ELF_RANDOMIZE | ||
| 6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 6 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
| 7 | select ARCH_HAVE_CUSTOM_GPIO_H | 7 | select ARCH_HAVE_CUSTOM_GPIO_H |
| 8 | select ARCH_HAS_GCOV_PROFILE_ALL | 8 | select ARCH_HAS_GCOV_PROFILE_ALL |
| @@ -286,6 +286,11 @@ config GENERIC_BUG | |||
| 286 | def_bool y | 286 | def_bool y |
| 287 | depends on BUG | 287 | depends on BUG |
| 288 | 288 | ||
| 289 | config PGTABLE_LEVELS | ||
| 290 | int | ||
| 291 | default 3 if ARM_LPAE | ||
| 292 | default 2 | ||
| 293 | |||
| 289 | source "init/Kconfig" | 294 | source "init/Kconfig" |
| 290 | 295 | ||
| 291 | source "kernel/Kconfig.freezer" | 296 | source "kernel/Kconfig.freezer" |
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9cafd3786..c1ff8ab12914 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h | |||
| @@ -125,10 +125,6 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); | |||
| 125 | extern void elf_set_personality(const struct elf32_hdr *); | 125 | extern void elf_set_personality(const struct elf32_hdr *); |
| 126 | #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) | 126 | #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) |
| 127 | 127 | ||
| 128 | struct mm_struct; | ||
| 129 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 130 | #define arch_randomize_brk arch_randomize_brk | ||
| 131 | |||
| 132 | #ifdef CONFIG_MMU | 128 | #ifdef CONFIG_MMU |
| 133 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 129 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
| 134 | struct linux_binprm; | 130 | struct linux_binprm; |
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 1609b022a72f..3d0e9aed4b40 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c | |||
| @@ -335,6 +335,9 @@ void __init bootmem_init(void) | |||
| 335 | 335 | ||
| 336 | find_limits(&min, &max_low, &max_high); | 336 | find_limits(&min, &max_low, &max_high); |
| 337 | 337 | ||
| 338 | early_memtest((phys_addr_t)min << PAGE_SHIFT, | ||
| 339 | (phys_addr_t)max_low << PAGE_SHIFT); | ||
| 340 | |||
| 338 | /* | 341 | /* |
| 339 | * Sparsemem tries to allocate bootmem in memory_present(), | 342 | * Sparsemem tries to allocate bootmem in memory_present(), |
| 340 | * so must be done after the fixed reservations | 343 | * so must be done after the fixed reservations |
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..407dc786583a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c | |||
| @@ -169,14 +169,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 169 | return addr; | 169 | return addr; |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | unsigned long arch_mmap_rnd(void) | ||
| 173 | { | ||
| 174 | unsigned long rnd; | ||
| 175 | |||
| 176 | /* 8 bits of randomness in 20 address space bits */ | ||
| 177 | rnd = (unsigned long)get_random_int() % (1 << 8); | ||
| 178 | |||
| 179 | return rnd << PAGE_SHIFT; | ||
| 180 | } | ||
| 181 | |||
| 172 | void arch_pick_mmap_layout(struct mm_struct *mm) | 182 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 173 | { | 183 | { |
| 174 | unsigned long random_factor = 0UL; | 184 | unsigned long random_factor = 0UL; |
| 175 | 185 | ||
| 176 | /* 8 bits of randomness in 20 address space bits */ | 186 | if (current->flags & PF_RANDOMIZE) |
| 177 | if ((current->flags & PF_RANDOMIZE) && | 187 | random_factor = arch_mmap_rnd(); |
| 178 | !(current->personality & ADDR_NO_RANDOMIZE)) | ||
| 179 | random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; | ||
| 180 | 188 | ||
| 181 | if (mmap_is_legacy()) { | 189 | if (mmap_is_legacy()) { |
| 182 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; | 190 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b8e97331ffb..34f487d5d84e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | config ARM64 | 1 | config ARM64 |
| 2 | def_bool y | 2 | def_bool y |
| 3 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
| 4 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 3 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
| 4 | select ARCH_HAS_ELF_RANDOMIZE | ||
| 5 | select ARCH_HAS_GCOV_PROFILE_ALL | 5 | select ARCH_HAS_GCOV_PROFILE_ALL |
| 6 | select ARCH_HAS_SG_CHAIN | 6 | select ARCH_HAS_SG_CHAIN |
| 7 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | 7 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
| @@ -143,6 +143,13 @@ config KERNEL_MODE_NEON | |||
| 143 | config FIX_EARLYCON_MEM | 143 | config FIX_EARLYCON_MEM |
| 144 | def_bool y | 144 | def_bool y |
| 145 | 145 | ||
| 146 | config PGTABLE_LEVELS | ||
| 147 | int | ||
| 148 | default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 | ||
| 149 | default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 | ||
| 150 | default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 | ||
| 151 | default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 | ||
| 152 | |||
| 146 | source "init/Kconfig" | 153 | source "init/Kconfig" |
| 147 | 154 | ||
| 148 | source "kernel/Kconfig.freezer" | 155 | source "kernel/Kconfig.freezer" |
| @@ -413,13 +420,6 @@ config ARM64_VA_BITS | |||
| 413 | default 42 if ARM64_VA_BITS_42 | 420 | default 42 if ARM64_VA_BITS_42 |
| 414 | default 48 if ARM64_VA_BITS_48 | 421 | default 48 if ARM64_VA_BITS_48 |
| 415 | 422 | ||
| 416 | config ARM64_PGTABLE_LEVELS | ||
| 417 | int | ||
| 418 | default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 | ||
| 419 | default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 | ||
| 420 | default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 | ||
| 421 | default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 | ||
| 422 | |||
| 423 | config CPU_BIG_ENDIAN | 423 | config CPU_BIG_ENDIAN |
| 424 | bool "Build big-endian kernel" | 424 | bool "Build big-endian kernel" |
| 425 | help | 425 | help |
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 1f65be393139..faad6df49e5b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h | |||
| @@ -125,7 +125,6 @@ typedef struct user_fpsimd_state elf_fpregset_t; | |||
| 125 | * the loader. We need to make sure that it is out of the way of the program | 125 | * the loader. We need to make sure that it is out of the way of the program |
| 126 | * that it will "exec", and that there is sufficient room for the brk. | 126 | * that it will "exec", and that there is sufficient room for the brk. |
| 127 | */ | 127 | */ |
| 128 | extern unsigned long randomize_et_dyn(unsigned long base); | ||
| 129 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) | 128 | #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) |
| 130 | 129 | ||
| 131 | /* | 130 | /* |
| @@ -157,10 +156,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, | |||
| 157 | #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) | 156 | #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) |
| 158 | #endif | 157 | #endif |
| 159 | 158 | ||
| 160 | struct mm_struct; | ||
| 161 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 162 | #define arch_randomize_brk arch_randomize_brk | ||
| 163 | |||
| 164 | #ifdef CONFIG_COMPAT | 159 | #ifdef CONFIG_COMPAT |
| 165 | 160 | ||
| 166 | #ifdef __AARCH64EB__ | 161 | #ifdef __AARCH64EB__ |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index bbfb600fa822..36250705dc4c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
| @@ -163,12 +163,12 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) | |||
| 163 | /* | 163 | /* |
| 164 | * If we are concatenating first level stage-2 page tables, we would have less | 164 | * If we are concatenating first level stage-2 page tables, we would have less |
| 165 | * than or equal to 16 pointers in the fake PGD, because that's what the | 165 | * than or equal to 16 pointers in the fake PGD, because that's what the |
| 166 | * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) | 166 | * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) |
| 167 | * represents the first level for the host, and we add 1 to go to the next | 167 | * represents the first level for the host, and we add 1 to go to the next |
| 168 | * level (which uses contatenation) for the stage-2 tables. | 168 | * level (which uses contatenation) for the stage-2 tables. |
| 169 | */ | 169 | */ |
| 170 | #if PTRS_PER_S2_PGD <= 16 | 170 | #if PTRS_PER_S2_PGD <= 16 |
| 171 | #define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) | 171 | #define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) |
| 172 | #else | 172 | #else |
| 173 | #define KVM_PREALLOC_LEVEL (0) | 173 | #define KVM_PREALLOC_LEVEL (0) |
| 174 | #endif | 174 | #endif |
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 22b16232bd60..8fc8fa280e92 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h | |||
| @@ -36,9 +36,9 @@ | |||
| 36 | * for more information). | 36 | * for more information). |
| 37 | */ | 37 | */ |
| 38 | #ifdef CONFIG_ARM64_64K_PAGES | 38 | #ifdef CONFIG_ARM64_64K_PAGES |
| 39 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) | 39 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) |
| 40 | #else | 40 | #else |
| 41 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) | 41 | #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) |
| 42 | #endif | 42 | #endif |
| 43 | 43 | ||
| 44 | #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) | 44 | #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) |
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index e20df38a8ff3..76420568d66a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) | 29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) |
| 30 | 30 | ||
| 31 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 31 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 32 | 32 | ||
| 33 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 33 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
| 34 | { | 34 | { |
| @@ -46,9 +46,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
| 46 | set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); | 46 | set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | 49 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
| 50 | 50 | ||
| 51 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 51 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 52 | 52 | ||
| 53 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | 53 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) |
| 54 | { | 54 | { |
| @@ -66,7 +66,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | |||
| 66 | set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); | 66 | set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ | 69 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
| 70 | 70 | ||
| 71 | extern pgd_t *pgd_alloc(struct mm_struct *mm); | 71 | extern pgd_t *pgd_alloc(struct mm_struct *mm); |
| 72 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | 72 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); |
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5f930cc9ea83..80f3d241cff8 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | /* | 21 | /* |
| 22 | * PMD_SHIFT determines the size a level 2 page table entry can map. | 22 | * PMD_SHIFT determines the size a level 2 page table entry can map. |
| 23 | */ | 23 | */ |
| 24 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 24 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 25 | #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) | 25 | #define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) |
| 26 | #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) | 26 | #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) |
| 27 | #define PMD_MASK (~(PMD_SIZE-1)) | 27 | #define PMD_MASK (~(PMD_SIZE-1)) |
| @@ -31,7 +31,7 @@ | |||
| 31 | /* | 31 | /* |
| 32 | * PUD_SHIFT determines the size a level 1 page table entry can map. | 32 | * PUD_SHIFT determines the size a level 1 page table entry can map. |
| 33 | */ | 33 | */ |
| 34 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 34 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 35 | #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) | 35 | #define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) |
| 36 | #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) | 36 | #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) |
| 37 | #define PUD_MASK (~(PUD_SIZE-1)) | 37 | #define PUD_MASK (~(PUD_SIZE-1)) |
| @@ -42,7 +42,7 @@ | |||
| 42 | * PGDIR_SHIFT determines the size a top-level page table entry can map | 42 | * PGDIR_SHIFT determines the size a top-level page table entry can map |
| 43 | * (depending on the configuration, this level can be 0, 1 or 2). | 43 | * (depending on the configuration, this level can be 0, 1 or 2). |
| 44 | */ | 44 | */ |
| 45 | #define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) | 45 | #define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) |
| 46 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | 46 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) |
| 47 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 47 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
| 48 | #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) | 48 | #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) |
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index ca9df80af896..2b1bd7e52c3b 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h | |||
| @@ -38,13 +38,13 @@ typedef struct { pteval_t pte; } pte_t; | |||
| 38 | #define pte_val(x) ((x).pte) | 38 | #define pte_val(x) ((x).pte) |
| 39 | #define __pte(x) ((pte_t) { (x) } ) | 39 | #define __pte(x) ((pte_t) { (x) } ) |
| 40 | 40 | ||
| 41 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 41 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 42 | typedef struct { pmdval_t pmd; } pmd_t; | 42 | typedef struct { pmdval_t pmd; } pmd_t; |
| 43 | #define pmd_val(x) ((x).pmd) | 43 | #define pmd_val(x) ((x).pmd) |
| 44 | #define __pmd(x) ((pmd_t) { (x) } ) | 44 | #define __pmd(x) ((pmd_t) { (x) } ) |
| 45 | #endif | 45 | #endif |
| 46 | 46 | ||
| 47 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 47 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 48 | typedef struct { pudval_t pud; } pud_t; | 48 | typedef struct { pudval_t pud; } pud_t; |
| 49 | #define pud_val(x) ((x).pud) | 49 | #define pud_val(x) ((x).pud) |
| 50 | #define __pud(x) ((pud_t) { (x) } ) | 50 | #define __pud(x) ((pud_t) { (x) } ) |
| @@ -64,13 +64,13 @@ typedef pteval_t pte_t; | |||
| 64 | #define pte_val(x) (x) | 64 | #define pte_val(x) (x) |
| 65 | #define __pte(x) (x) | 65 | #define __pte(x) (x) |
| 66 | 66 | ||
| 67 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 67 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 68 | typedef pmdval_t pmd_t; | 68 | typedef pmdval_t pmd_t; |
| 69 | #define pmd_val(x) (x) | 69 | #define pmd_val(x) (x) |
| 70 | #define __pmd(x) (x) | 70 | #define __pmd(x) (x) |
| 71 | #endif | 71 | #endif |
| 72 | 72 | ||
| 73 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 73 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 74 | typedef pudval_t pud_t; | 74 | typedef pudval_t pud_t; |
| 75 | #define pud_val(x) (x) | 75 | #define pud_val(x) (x) |
| 76 | #define __pud(x) (x) | 76 | #define __pud(x) (x) |
| @@ -86,9 +86,9 @@ typedef pteval_t pgprot_t; | |||
| 86 | 86 | ||
| 87 | #endif /* STRICT_MM_TYPECHECKS */ | 87 | #endif /* STRICT_MM_TYPECHECKS */ |
| 88 | 88 | ||
| 89 | #if CONFIG_ARM64_PGTABLE_LEVELS == 2 | 89 | #if CONFIG_PGTABLE_LEVELS == 2 |
| 90 | #include <asm-generic/pgtable-nopmd.h> | 90 | #include <asm-generic/pgtable-nopmd.h> |
| 91 | #elif CONFIG_ARM64_PGTABLE_LEVELS == 3 | 91 | #elif CONFIG_PGTABLE_LEVELS == 3 |
| 92 | #include <asm-generic/pgtable-nopud.h> | 92 | #include <asm-generic/pgtable-nopud.h> |
| 93 | #endif | 93 | #endif |
| 94 | 94 | ||
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 800ec0e87ed9..56283f8a675c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h | |||
| @@ -374,7 +374,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) | |||
| 374 | */ | 374 | */ |
| 375 | #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) | 375 | #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) |
| 376 | 376 | ||
| 377 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 377 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 378 | 378 | ||
| 379 | #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) | 379 | #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) |
| 380 | 380 | ||
| @@ -409,9 +409,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | |||
| 409 | 409 | ||
| 410 | #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) | 410 | #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) |
| 411 | 411 | ||
| 412 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ | 412 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
| 413 | 413 | ||
| 414 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 414 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 415 | 415 | ||
| 416 | #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) | 416 | #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) |
| 417 | 417 | ||
| @@ -445,7 +445,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) | |||
| 445 | 445 | ||
| 446 | #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) | 446 | #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) |
| 447 | 447 | ||
| 448 | #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ | 448 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
| 449 | 449 | ||
| 450 | #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) | 450 | #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) |
| 451 | 451 | ||
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 53d9c354219f..3a0242c7eb8d 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h | |||
| @@ -53,7 +53,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | |||
| 53 | tlb_remove_entry(tlb, pte); | 53 | tlb_remove_entry(tlb, pte); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 56 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 57 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | 57 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, |
| 58 | unsigned long addr) | 58 | unsigned long addr) |
| 59 | { | 59 | { |
| @@ -62,7 +62,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, | |||
| 62 | } | 62 | } |
| 63 | #endif | 63 | #endif |
| 64 | 64 | ||
| 65 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 65 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 66 | static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, | 66 | static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, |
| 67 | unsigned long addr) | 67 | unsigned long addr) |
| 68 | { | 68 | { |
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ae85da6307bb..597831bdddf3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c | |||
| @@ -190,6 +190,8 @@ void __init bootmem_init(void) | |||
| 190 | min = PFN_UP(memblock_start_of_DRAM()); | 190 | min = PFN_UP(memblock_start_of_DRAM()); |
| 191 | max = PFN_DOWN(memblock_end_of_DRAM()); | 191 | max = PFN_DOWN(memblock_end_of_DRAM()); |
| 192 | 192 | ||
| 193 | early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); | ||
| 194 | |||
| 193 | /* | 195 | /* |
| 194 | * Sparsemem tries to allocate bootmem in memory_present(), so must be | 196 | * Sparsemem tries to allocate bootmem in memory_present(), so must be |
| 195 | * done after the fixed reservations. | 197 | * done after the fixed reservations. |
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 54922d1275b8..ed177475dd8c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c | |||
| @@ -47,17 +47,16 @@ static int mmap_is_legacy(void) | |||
| 47 | return sysctl_legacy_va_layout; | 47 | return sysctl_legacy_va_layout; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | static unsigned long mmap_rnd(void) | 50 | unsigned long arch_mmap_rnd(void) |
| 51 | { | 51 | { |
| 52 | unsigned long rnd = 0; | 52 | unsigned long rnd; |
| 53 | 53 | ||
| 54 | if (current->flags & PF_RANDOMIZE) | 54 | rnd = (unsigned long)get_random_int() & STACK_RND_MASK; |
| 55 | rnd = (long)get_random_int() & STACK_RND_MASK; | ||
| 56 | 55 | ||
| 57 | return rnd << PAGE_SHIFT; | 56 | return rnd << PAGE_SHIFT; |
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | static unsigned long mmap_base(void) | 59 | static unsigned long mmap_base(unsigned long rnd) |
| 61 | { | 60 | { |
| 62 | unsigned long gap = rlimit(RLIMIT_STACK); | 61 | unsigned long gap = rlimit(RLIMIT_STACK); |
| 63 | 62 | ||
| @@ -66,7 +65,7 @@ static unsigned long mmap_base(void) | |||
| 66 | else if (gap > MAX_GAP) | 65 | else if (gap > MAX_GAP) |
| 67 | gap = MAX_GAP; | 66 | gap = MAX_GAP; |
| 68 | 67 | ||
| 69 | return PAGE_ALIGN(STACK_TOP - gap - mmap_rnd()); | 68 | return PAGE_ALIGN(STACK_TOP - gap - rnd); |
| 70 | } | 69 | } |
| 71 | 70 | ||
| 72 | /* | 71 | /* |
| @@ -75,15 +74,20 @@ static unsigned long mmap_base(void) | |||
| 75 | */ | 74 | */ |
| 76 | void arch_pick_mmap_layout(struct mm_struct *mm) | 75 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 77 | { | 76 | { |
| 77 | unsigned long random_factor = 0UL; | ||
| 78 | |||
| 79 | if (current->flags & PF_RANDOMIZE) | ||
| 80 | random_factor = arch_mmap_rnd(); | ||
| 81 | |||
| 78 | /* | 82 | /* |
| 79 | * Fall back to the standard layout if the personality bit is set, or | 83 | * Fall back to the standard layout if the personality bit is set, or |
| 80 | * if the expected stack growth is unlimited: | 84 | * if the expected stack growth is unlimited: |
| 81 | */ | 85 | */ |
| 82 | if (mmap_is_legacy()) { | 86 | if (mmap_is_legacy()) { |
| 83 | mm->mmap_base = TASK_UNMAPPED_BASE; | 87 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
| 84 | mm->get_unmapped_area = arch_get_unmapped_area; | 88 | mm->get_unmapped_area = arch_get_unmapped_area; |
| 85 | } else { | 89 | } else { |
| 86 | mm->mmap_base = mmap_base(); | 90 | mm->mmap_base = mmap_base(random_factor); |
| 87 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 91 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
| 88 | } | 92 | } |
| 89 | } | 93 | } |
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c6daaf6c6f97..79e01163a981 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c | |||
| @@ -550,10 +550,10 @@ void vmemmap_free(unsigned long start, unsigned long end) | |||
| 550 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 550 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
| 551 | 551 | ||
| 552 | static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; | 552 | static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; |
| 553 | #if CONFIG_ARM64_PGTABLE_LEVELS > 2 | 553 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 554 | static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; | 554 | static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; |
| 555 | #endif | 555 | #endif |
| 556 | #if CONFIG_ARM64_PGTABLE_LEVELS > 3 | 556 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 557 | static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; | 557 | static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; |
| 558 | #endif | 558 | #endif |
| 559 | 559 | ||
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 074e52bf815c..4f9a6661491b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | config PGTABLE_LEVELS | ||
| 2 | int "Page Table Levels" if !IA64_PAGE_SIZE_64KB | ||
| 3 | range 3 4 if !IA64_PAGE_SIZE_64KB | ||
| 4 | default 3 | ||
| 5 | |||
| 1 | source "init/Kconfig" | 6 | source "init/Kconfig" |
| 2 | 7 | ||
| 3 | source "kernel/Kconfig.freezer" | 8 | source "kernel/Kconfig.freezer" |
| @@ -286,19 +291,6 @@ config IA64_PAGE_SIZE_64KB | |||
| 286 | 291 | ||
| 287 | endchoice | 292 | endchoice |
| 288 | 293 | ||
| 289 | choice | ||
| 290 | prompt "Page Table Levels" | ||
| 291 | default PGTABLE_3 | ||
| 292 | |||
| 293 | config PGTABLE_3 | ||
| 294 | bool "3 Levels" | ||
| 295 | |||
| 296 | config PGTABLE_4 | ||
| 297 | depends on !IA64_PAGE_SIZE_64KB | ||
| 298 | bool "4 Levels" | ||
| 299 | |||
| 300 | endchoice | ||
| 301 | |||
| 302 | if IA64_HP_SIM | 294 | if IA64_HP_SIM |
| 303 | config HZ | 295 | config HZ |
| 304 | default 32 | 296 | default 32 |
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 1f1bf144fe62..ec48bb9f95e1 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h | |||
| @@ -173,7 +173,7 @@ get_order (unsigned long size) | |||
| 173 | */ | 173 | */ |
| 174 | typedef struct { unsigned long pte; } pte_t; | 174 | typedef struct { unsigned long pte; } pte_t; |
| 175 | typedef struct { unsigned long pmd; } pmd_t; | 175 | typedef struct { unsigned long pmd; } pmd_t; |
| 176 | #ifdef CONFIG_PGTABLE_4 | 176 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 177 | typedef struct { unsigned long pud; } pud_t; | 177 | typedef struct { unsigned long pud; } pud_t; |
| 178 | #endif | 178 | #endif |
| 179 | typedef struct { unsigned long pgd; } pgd_t; | 179 | typedef struct { unsigned long pgd; } pgd_t; |
| @@ -182,7 +182,7 @@ get_order (unsigned long size) | |||
| 182 | 182 | ||
| 183 | # define pte_val(x) ((x).pte) | 183 | # define pte_val(x) ((x).pte) |
| 184 | # define pmd_val(x) ((x).pmd) | 184 | # define pmd_val(x) ((x).pmd) |
| 185 | #ifdef CONFIG_PGTABLE_4 | 185 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 186 | # define pud_val(x) ((x).pud) | 186 | # define pud_val(x) ((x).pud) |
| 187 | #endif | 187 | #endif |
| 188 | # define pgd_val(x) ((x).pgd) | 188 | # define pgd_val(x) ((x).pgd) |
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 5767cdfc08db..f5e70e961948 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h | |||
| @@ -32,7 +32,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
| 32 | quicklist_free(0, NULL, pgd); | 32 | quicklist_free(0, NULL, pgd); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | #ifdef CONFIG_PGTABLE_4 | 35 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 36 | static inline void | 36 | static inline void |
| 37 | pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) | 37 | pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) |
| 38 | { | 38 | { |
| @@ -49,7 +49,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) | |||
| 49 | quicklist_free(0, NULL, pud); | 49 | quicklist_free(0, NULL, pud); |
| 50 | } | 50 | } |
| 51 | #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) | 51 | #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) |
| 52 | #endif /* CONFIG_PGTABLE_4 */ | 52 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
| 53 | 53 | ||
| 54 | static inline void | 54 | static inline void |
| 55 | pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) | 55 | pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) |
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7b6f8801df57..9f3ed9ee8f13 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h | |||
| @@ -99,7 +99,7 @@ | |||
| 99 | #define PMD_MASK (~(PMD_SIZE-1)) | 99 | #define PMD_MASK (~(PMD_SIZE-1)) |
| 100 | #define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) | 100 | #define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) |
| 101 | 101 | ||
| 102 | #ifdef CONFIG_PGTABLE_4 | 102 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 103 | /* | 103 | /* |
| 104 | * Definitions for second level: | 104 | * Definitions for second level: |
| 105 | * | 105 | * |
| @@ -117,7 +117,7 @@ | |||
| 117 | * | 117 | * |
| 118 | * PGDIR_SHIFT determines what a first-level page table entry can map. | 118 | * PGDIR_SHIFT determines what a first-level page table entry can map. |
| 119 | */ | 119 | */ |
| 120 | #ifdef CONFIG_PGTABLE_4 | 120 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 121 | #define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) | 121 | #define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) |
| 122 | #else | 122 | #else |
| 123 | #define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) | 123 | #define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) |
| @@ -180,7 +180,7 @@ | |||
| 180 | #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) | 180 | #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) |
| 181 | 181 | ||
| 182 | #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) | 182 | #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) |
| 183 | #ifdef CONFIG_PGTABLE_4 | 183 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 184 | #define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) | 184 | #define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) |
| 185 | #endif | 185 | #endif |
| 186 | #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) | 186 | #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) |
| @@ -281,7 +281,7 @@ extern unsigned long VMALLOC_END; | |||
| 281 | #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) | 281 | #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) |
| 282 | #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) | 282 | #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) |
| 283 | 283 | ||
| 284 | #ifdef CONFIG_PGTABLE_4 | 284 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 285 | #define pgd_none(pgd) (!pgd_val(pgd)) | 285 | #define pgd_none(pgd) (!pgd_val(pgd)) |
| 286 | #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) | 286 | #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) |
| 287 | #define pgd_present(pgd) (pgd_val(pgd) != 0UL) | 287 | #define pgd_present(pgd) (pgd_val(pgd) != 0UL) |
| @@ -384,7 +384,7 @@ pgd_offset (const struct mm_struct *mm, unsigned long address) | |||
| 384 | here. */ | 384 | here. */ |
| 385 | #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) | 385 | #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) |
| 386 | 386 | ||
| 387 | #ifdef CONFIG_PGTABLE_4 | 387 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 388 | /* Find an entry in the second-level page table.. */ | 388 | /* Find an entry in the second-level page table.. */ |
| 389 | #define pud_offset(dir,addr) \ | 389 | #define pud_offset(dir,addr) \ |
| 390 | ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) | 390 | ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) |
| @@ -586,7 +586,7 @@ extern struct page *zero_page_memmap_ptr; | |||
| 586 | #define __HAVE_ARCH_PGD_OFFSET_GATE | 586 | #define __HAVE_ARCH_PGD_OFFSET_GATE |
| 587 | 587 | ||
| 588 | 588 | ||
| 589 | #ifndef CONFIG_PGTABLE_4 | 589 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 590 | #include <asm-generic/pgtable-nopud.h> | 590 | #include <asm-generic/pgtable-nopud.h> |
| 591 | #endif | 591 | #endif |
| 592 | #include <asm-generic/pgtable.h> | 592 | #include <asm-generic/pgtable.h> |
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 18e794a57248..e42bf7a913f3 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S | |||
| @@ -146,7 +146,7 @@ ENTRY(vhpt_miss) | |||
| 146 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 | 146 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 |
| 147 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] | 147 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] |
| 148 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | 148 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? |
| 149 | #ifdef CONFIG_PGTABLE_4 | 149 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 150 | shr.u r28=r22,PUD_SHIFT // shift pud index into position | 150 | shr.u r28=r22,PUD_SHIFT // shift pud index into position |
| 151 | #else | 151 | #else |
| 152 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 152 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
| @@ -155,7 +155,7 @@ ENTRY(vhpt_miss) | |||
| 155 | ld8 r17=[r17] // get *pgd (may be 0) | 155 | ld8 r17=[r17] // get *pgd (may be 0) |
| 156 | ;; | 156 | ;; |
| 157 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? | 157 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? |
| 158 | #ifdef CONFIG_PGTABLE_4 | 158 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 159 | dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) | 159 | dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) |
| 160 | ;; | 160 | ;; |
| 161 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 161 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
| @@ -222,13 +222,13 @@ ENTRY(vhpt_miss) | |||
| 222 | */ | 222 | */ |
| 223 | ld8 r25=[r21] // read *pte again | 223 | ld8 r25=[r21] // read *pte again |
| 224 | ld8 r26=[r17] // read *pmd again | 224 | ld8 r26=[r17] // read *pmd again |
| 225 | #ifdef CONFIG_PGTABLE_4 | 225 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 226 | ld8 r19=[r28] // read *pud again | 226 | ld8 r19=[r28] // read *pud again |
| 227 | #endif | 227 | #endif |
| 228 | cmp.ne p6,p7=r0,r0 | 228 | cmp.ne p6,p7=r0,r0 |
| 229 | ;; | 229 | ;; |
| 230 | cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change | 230 | cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change |
| 231 | #ifdef CONFIG_PGTABLE_4 | 231 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 232 | cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change | 232 | cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change |
| 233 | #endif | 233 | #endif |
| 234 | mov r27=PAGE_SHIFT<<2 | 234 | mov r27=PAGE_SHIFT<<2 |
| @@ -476,7 +476,7 @@ ENTRY(nested_dtlb_miss) | |||
| 476 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 | 476 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 |
| 477 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] | 477 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] |
| 478 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | 478 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? |
| 479 | #ifdef CONFIG_PGTABLE_4 | 479 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 480 | shr.u r18=r22,PUD_SHIFT // shift pud index into position | 480 | shr.u r18=r22,PUD_SHIFT // shift pud index into position |
| 481 | #else | 481 | #else |
| 482 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 482 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
| @@ -487,7 +487,7 @@ ENTRY(nested_dtlb_miss) | |||
| 487 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? | 487 | (p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? |
| 488 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) | 488 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) |
| 489 | ;; | 489 | ;; |
| 490 | #ifdef CONFIG_PGTABLE_4 | 490 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 491 | (p7) ld8 r17=[r17] // get *pud (may be 0) | 491 | (p7) ld8 r17=[r17] // get *pud (may be 0) |
| 492 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position | 492 | shr.u r18=r22,PMD_SHIFT // shift pmd index into position |
| 493 | ;; | 493 | ;; |
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 5151a649c96b..b72cd7a07222 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c | |||
| @@ -156,9 +156,9 @@ void arch_crash_save_vmcoreinfo(void) | |||
| 156 | VMCOREINFO_OFFSET(node_memblk_s, start_paddr); | 156 | VMCOREINFO_OFFSET(node_memblk_s, start_paddr); |
| 157 | VMCOREINFO_OFFSET(node_memblk_s, size); | 157 | VMCOREINFO_OFFSET(node_memblk_s, size); |
| 158 | #endif | 158 | #endif |
| 159 | #ifdef CONFIG_PGTABLE_3 | 159 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 160 | VMCOREINFO_CONFIG(PGTABLE_3); | 160 | VMCOREINFO_CONFIG(PGTABLE_3); |
| 161 | #elif defined(CONFIG_PGTABLE_4) | 161 | #elif CONFIG_PGTABLE_LEVELS == 4 |
| 162 | VMCOREINFO_CONFIG(PGTABLE_4); | 162 | VMCOREINFO_CONFIG(PGTABLE_4); |
| 163 | #endif | 163 | #endif |
| 164 | } | 164 | } |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 87b7c7581b1d..2dd8f63bfbbb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
| @@ -67,6 +67,10 @@ config HZ | |||
| 67 | default 1000 if CLEOPATRA | 67 | default 1000 if CLEOPATRA |
| 68 | default 100 | 68 | default 100 |
| 69 | 69 | ||
| 70 | config PGTABLE_LEVELS | ||
| 71 | default 2 if SUN3 || COLDFIRE | ||
| 72 | default 3 | ||
| 73 | |||
| 70 | source "init/Kconfig" | 74 | source "init/Kconfig" |
| 71 | 75 | ||
| 72 | source "kernel/Kconfig.freezer" | 76 | source "kernel/Kconfig.freezer" |
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c7a16904cd03..a326c4cb8cf0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
| @@ -23,7 +23,7 @@ config MIPS | |||
| 23 | select HAVE_KRETPROBES | 23 | select HAVE_KRETPROBES |
| 24 | select HAVE_DEBUG_KMEMLEAK | 24 | select HAVE_DEBUG_KMEMLEAK |
| 25 | select HAVE_SYSCALL_TRACEPOINTS | 25 | select HAVE_SYSCALL_TRACEPOINTS |
| 26 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 26 | select ARCH_HAS_ELF_RANDOMIZE |
| 27 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT | 27 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT |
| 28 | select RTC_LIB if !MACH_LOONGSON | 28 | select RTC_LIB if !MACH_LOONGSON |
| 29 | select GENERIC_ATOMIC64 if !64BIT | 29 | select GENERIC_ATOMIC64 if !64BIT |
| @@ -2600,6 +2600,11 @@ config STACKTRACE_SUPPORT | |||
| 2600 | bool | 2600 | bool |
| 2601 | default y | 2601 | default y |
| 2602 | 2602 | ||
| 2603 | config PGTABLE_LEVELS | ||
| 2604 | int | ||
| 2605 | default 3 if 64BIT && !PAGE_SIZE_64KB | ||
| 2606 | default 2 | ||
| 2607 | |||
| 2603 | source "init/Kconfig" | 2608 | source "init/Kconfig" |
| 2604 | 2609 | ||
| 2605 | source "kernel/Kconfig.freezer" | 2610 | source "kernel/Kconfig.freezer" |
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 535f196ffe02..31d747d46a23 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h | |||
| @@ -410,10 +410,6 @@ struct linux_binprm; | |||
| 410 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | 410 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, |
| 411 | int uses_interp); | 411 | int uses_interp); |
| 412 | 412 | ||
| 413 | struct mm_struct; | ||
| 414 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 415 | #define arch_randomize_brk arch_randomize_brk | ||
| 416 | |||
| 417 | struct arch_elf_state { | 413 | struct arch_elf_state { |
| 418 | int fp_abi; | 414 | int fp_abi; |
| 419 | int interp_fp_abi; | 415 | int interp_fp_abi; |
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index f1baadd56e82..5c81fdd032c3 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c | |||
| @@ -142,18 +142,26 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, | |||
| 142 | addr0, len, pgoff, flags, DOWN); | 142 | addr0, len, pgoff, flags, DOWN); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | unsigned long arch_mmap_rnd(void) | ||
| 146 | { | ||
| 147 | unsigned long rnd; | ||
| 148 | |||
| 149 | rnd = (unsigned long)get_random_int(); | ||
| 150 | rnd <<= PAGE_SHIFT; | ||
| 151 | if (TASK_IS_32BIT_ADDR) | ||
| 152 | rnd &= 0xfffffful; | ||
| 153 | else | ||
| 154 | rnd &= 0xffffffful; | ||
| 155 | |||
| 156 | return rnd; | ||
| 157 | } | ||
| 158 | |||
| 145 | void arch_pick_mmap_layout(struct mm_struct *mm) | 159 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 146 | { | 160 | { |
| 147 | unsigned long random_factor = 0UL; | 161 | unsigned long random_factor = 0UL; |
| 148 | 162 | ||
| 149 | if (current->flags & PF_RANDOMIZE) { | 163 | if (current->flags & PF_RANDOMIZE) |
| 150 | random_factor = get_random_int(); | 164 | random_factor = arch_mmap_rnd(); |
| 151 | random_factor = random_factor << PAGE_SHIFT; | ||
| 152 | if (TASK_IS_32BIT_ADDR) | ||
| 153 | random_factor &= 0xfffffful; | ||
| 154 | else | ||
| 155 | random_factor &= 0xffffffful; | ||
| 156 | } | ||
| 157 | 165 | ||
| 158 | if (mmap_is_legacy()) { | 166 | if (mmap_is_legacy()) { |
| 159 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; | 167 | mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; |
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8014727a2743..c36546959e86 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
| @@ -103,6 +103,11 @@ config ARCH_MAY_HAVE_PC_FDC | |||
| 103 | depends on BROKEN | 103 | depends on BROKEN |
| 104 | default y | 104 | default y |
| 105 | 105 | ||
| 106 | config PGTABLE_LEVELS | ||
| 107 | int | ||
| 108 | default 3 if 64BIT && PARISC_PAGE_SIZE_4KB | ||
| 109 | default 2 | ||
| 110 | |||
| 106 | source "init/Kconfig" | 111 | source "init/Kconfig" |
| 107 | 112 | ||
| 108 | source "kernel/Kconfig.freezer" | 113 | source "kernel/Kconfig.freezer" |
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index d17437238a2c..1ba29369257c 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h | |||
| @@ -51,7 +51,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
| 51 | free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); | 51 | free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | #if PT_NLEVELS == 3 | 54 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 55 | 55 | ||
| 56 | /* Three Level Page Table Support for pmd's */ | 56 | /* Three Level Page Table Support for pmd's */ |
| 57 | 57 | ||
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 15207b9362bf..0a183756d6ec 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h | |||
| @@ -68,13 +68,11 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); | |||
| 68 | #define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ | 68 | #define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ |
| 69 | #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) | 69 | #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) |
| 70 | 70 | ||
| 71 | #if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) | 71 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 72 | #define PT_NLEVELS 3 | ||
| 73 | #define PGD_ORDER 1 /* Number of pages per pgd */ | 72 | #define PGD_ORDER 1 /* Number of pages per pgd */ |
| 74 | #define PMD_ORDER 1 /* Number of pages per pmd */ | 73 | #define PMD_ORDER 1 /* Number of pages per pmd */ |
| 75 | #define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ | 74 | #define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ |
| 76 | #else | 75 | #else |
| 77 | #define PT_NLEVELS 2 | ||
| 78 | #define PGD_ORDER 1 /* Number of pages per pgd */ | 76 | #define PGD_ORDER 1 /* Number of pages per pgd */ |
| 79 | #define PGD_ALLOC_ORDER PGD_ORDER | 77 | #define PGD_ALLOC_ORDER PGD_ORDER |
| 80 | #endif | 78 | #endif |
| @@ -93,7 +91,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); | |||
| 93 | #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) | 91 | #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) |
| 94 | #define PMD_SIZE (1UL << PMD_SHIFT) | 92 | #define PMD_SIZE (1UL << PMD_SHIFT) |
| 95 | #define PMD_MASK (~(PMD_SIZE-1)) | 93 | #define PMD_MASK (~(PMD_SIZE-1)) |
| 96 | #if PT_NLEVELS == 3 | 94 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 97 | #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) | 95 | #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) |
| 98 | #else | 96 | #else |
| 99 | #define __PAGETABLE_PMD_FOLDED | 97 | #define __PAGETABLE_PMD_FOLDED |
| @@ -277,7 +275,7 @@ extern unsigned long *empty_zero_page; | |||
| 277 | #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) | 275 | #define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) |
| 278 | #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) | 276 | #define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) |
| 279 | 277 | ||
| 280 | #if PT_NLEVELS == 3 | 278 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 281 | /* The first entry of the permanent pmd is not there if it contains | 279 | /* The first entry of the permanent pmd is not there if it contains |
| 282 | * the gateway marker */ | 280 | * the gateway marker */ |
| 283 | #define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) | 281 | #define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) |
| @@ -287,7 +285,7 @@ extern unsigned long *empty_zero_page; | |||
| 287 | #define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) | 285 | #define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) |
| 288 | #define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) | 286 | #define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) |
| 289 | static inline void pmd_clear(pmd_t *pmd) { | 287 | static inline void pmd_clear(pmd_t *pmd) { |
| 290 | #if PT_NLEVELS == 3 | 288 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 291 | if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) | 289 | if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) |
| 292 | /* This is the entry pointing to the permanent pmd | 290 | /* This is the entry pointing to the permanent pmd |
| 293 | * attached to the pgd; cannot clear it */ | 291 | * attached to the pgd; cannot clear it */ |
| @@ -299,7 +297,7 @@ static inline void pmd_clear(pmd_t *pmd) { | |||
| 299 | 297 | ||
| 300 | 298 | ||
| 301 | 299 | ||
| 302 | #if PT_NLEVELS == 3 | 300 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 303 | #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) | 301 | #define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) |
| 304 | #define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) | 302 | #define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) |
| 305 | 303 | ||
| @@ -309,7 +307,7 @@ static inline void pmd_clear(pmd_t *pmd) { | |||
| 309 | #define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) | 307 | #define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) |
| 310 | #define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) | 308 | #define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) |
| 311 | static inline void pgd_clear(pgd_t *pgd) { | 309 | static inline void pgd_clear(pgd_t *pgd) { |
| 312 | #if PT_NLEVELS == 3 | 310 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 313 | if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) | 311 | if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) |
| 314 | /* This is the permanent pmd attached to the pgd; cannot | 312 | /* This is the permanent pmd attached to the pgd; cannot |
| 315 | * free it */ | 313 | * free it */ |
| @@ -393,7 +391,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
| 393 | 391 | ||
| 394 | /* Find an entry in the second-level page table.. */ | 392 | /* Find an entry in the second-level page table.. */ |
| 395 | 393 | ||
| 396 | #if PT_NLEVELS == 3 | 394 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 397 | #define pmd_offset(dir,address) \ | 395 | #define pmd_offset(dir,address) \ |
| 398 | ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) | 396 | ((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) |
| 399 | #else | 397 | #else |
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 2ab16bb160a8..75819617f93b 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S | |||
| @@ -398,7 +398,7 @@ | |||
| 398 | * can address up to 1TB | 398 | * can address up to 1TB |
| 399 | */ | 399 | */ |
| 400 | .macro L2_ptep pmd,pte,index,va,fault | 400 | .macro L2_ptep pmd,pte,index,va,fault |
| 401 | #if PT_NLEVELS == 3 | 401 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 402 | extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index | 402 | extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index |
| 403 | #else | 403 | #else |
| 404 | # if defined(CONFIG_64BIT) | 404 | # if defined(CONFIG_64BIT) |
| @@ -436,7 +436,7 @@ | |||
| 436 | * all ILP32 processes and all the kernel for machines with | 436 | * all ILP32 processes and all the kernel for machines with |
| 437 | * under 4GB of memory) */ | 437 | * under 4GB of memory) */ |
| 438 | .macro L3_ptep pgd,pte,index,va,fault | 438 | .macro L3_ptep pgd,pte,index,va,fault |
| 439 | #if PT_NLEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ | 439 | #if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ |
| 440 | extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index | 440 | extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index |
| 441 | copy %r0,\pte | 441 | copy %r0,\pte |
| 442 | extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 | 442 | extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 |
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index d4dc588c0dc1..e7d64527aff9 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S | |||
| @@ -74,7 +74,7 @@ $bss_loop: | |||
| 74 | mtctl %r4,%cr24 /* Initialize kernel root pointer */ | 74 | mtctl %r4,%cr24 /* Initialize kernel root pointer */ |
| 75 | mtctl %r4,%cr25 /* Initialize user root pointer */ | 75 | mtctl %r4,%cr25 /* Initialize user root pointer */ |
| 76 | 76 | ||
| 77 | #if PT_NLEVELS == 3 | 77 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 78 | /* Set pmd in pgd */ | 78 | /* Set pmd in pgd */ |
| 79 | load32 PA(pmd0),%r5 | 79 | load32 PA(pmd0),%r5 |
| 80 | shrd %r5,PxD_VALUE_SHIFT,%r3 | 80 | shrd %r5,PxD_VALUE_SHIFT,%r3 |
| @@ -97,7 +97,7 @@ $bss_loop: | |||
| 97 | stw %r3,0(%r4) | 97 | stw %r3,0(%r4) |
| 98 | ldo (PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3 | 98 | ldo (PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3 |
| 99 | addib,> -1,%r1,1b | 99 | addib,> -1,%r1,1b |
| 100 | #if PT_NLEVELS == 3 | 100 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 101 | ldo ASM_PMD_ENTRY_SIZE(%r4),%r4 | 101 | ldo ASM_PMD_ENTRY_SIZE(%r4),%r4 |
| 102 | #else | 102 | #else |
| 103 | ldo ASM_PGD_ENTRY_SIZE(%r4),%r4 | 103 | ldo ASM_PGD_ENTRY_SIZE(%r4),%r4 |
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 15dbe81cf5f3..c229427fa546 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | extern int data_start; | 34 | extern int data_start; |
| 35 | extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ | 35 | extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ |
| 36 | 36 | ||
| 37 | #if PT_NLEVELS == 3 | 37 | #if CONFIG_PGTABLE_LEVELS == 3 |
| 38 | /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout | 38 | /* NOTE: This layout exactly conforms to the hybrid L2/L3 page table layout |
| 39 | * with the first pmd adjacent to the pgd and below it. gcc doesn't actually | 39 | * with the first pmd adjacent to the pgd and below it. gcc doesn't actually |
| 40 | * guarantee that global objects will be laid out in memory in the same order | 40 | * guarantee that global objects will be laid out in memory in the same order |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 22b0940494bb..e99014adf017 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
| @@ -88,7 +88,7 @@ config PPC | |||
| 88 | select ARCH_MIGHT_HAVE_PC_PARPORT | 88 | select ARCH_MIGHT_HAVE_PC_PARPORT |
| 89 | select ARCH_MIGHT_HAVE_PC_SERIO | 89 | select ARCH_MIGHT_HAVE_PC_SERIO |
| 90 | select BINFMT_ELF | 90 | select BINFMT_ELF |
| 91 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 91 | select ARCH_HAS_ELF_RANDOMIZE |
| 92 | select OF | 92 | select OF |
| 93 | select OF_EARLY_FLATTREE | 93 | select OF_EARLY_FLATTREE |
| 94 | select OF_RESERVED_MEM | 94 | select OF_RESERVED_MEM |
| @@ -297,6 +297,12 @@ config ZONE_DMA32 | |||
| 297 | bool | 297 | bool |
| 298 | default y if PPC64 | 298 | default y if PPC64 |
| 299 | 299 | ||
| 300 | config PGTABLE_LEVELS | ||
| 301 | int | ||
| 302 | default 2 if !PPC64 | ||
| 303 | default 3 if PPC_64K_PAGES | ||
| 304 | default 4 | ||
| 305 | |||
| 300 | source "init/Kconfig" | 306 | source "init/Kconfig" |
| 301 | 307 | ||
| 302 | source "kernel/Kconfig.freezer" | 308 | source "kernel/Kconfig.freezer" |
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 57d289acb803..ee46ffef608e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h | |||
| @@ -128,10 +128,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, | |||
| 128 | (0x7ff >> (PAGE_SHIFT - 12)) : \ | 128 | (0x7ff >> (PAGE_SHIFT - 12)) : \ |
| 129 | (0x3ffff >> (PAGE_SHIFT - 12))) | 129 | (0x3ffff >> (PAGE_SHIFT - 12))) |
| 130 | 130 | ||
| 131 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 132 | #define arch_randomize_brk arch_randomize_brk | ||
| 133 | |||
| 134 | |||
| 135 | #ifdef CONFIG_SPU_BASE | 131 | #ifdef CONFIG_SPU_BASE |
| 136 | /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ | 132 | /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ |
| 137 | #define NT_SPU 1 | 133 | #define NT_SPU 1 |
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index cb8bdbe4972f..0f0502e12f6c 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c | |||
| @@ -53,21 +53,20 @@ static inline int mmap_is_legacy(void) | |||
| 53 | return sysctl_legacy_va_layout; | 53 | return sysctl_legacy_va_layout; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | static unsigned long mmap_rnd(void) | 56 | unsigned long arch_mmap_rnd(void) |
| 57 | { | 57 | { |
| 58 | unsigned long rnd = 0; | 58 | unsigned long rnd; |
| 59 | |||
| 60 | /* 8MB for 32bit, 1GB for 64bit */ | ||
| 61 | if (is_32bit_task()) | ||
| 62 | rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT)); | ||
| 63 | else | ||
| 64 | rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT)); | ||
| 59 | 65 | ||
| 60 | if (current->flags & PF_RANDOMIZE) { | ||
| 61 | /* 8MB for 32bit, 1GB for 64bit */ | ||
| 62 | if (is_32bit_task()) | ||
| 63 | rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); | ||
| 64 | else | ||
| 65 | rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); | ||
| 66 | } | ||
| 67 | return rnd << PAGE_SHIFT; | 66 | return rnd << PAGE_SHIFT; |
| 68 | } | 67 | } |
| 69 | 68 | ||
| 70 | static inline unsigned long mmap_base(void) | 69 | static inline unsigned long mmap_base(unsigned long rnd) |
| 71 | { | 70 | { |
| 72 | unsigned long gap = rlimit(RLIMIT_STACK); | 71 | unsigned long gap = rlimit(RLIMIT_STACK); |
| 73 | 72 | ||
| @@ -76,7 +75,7 @@ static inline unsigned long mmap_base(void) | |||
| 76 | else if (gap > MAX_GAP) | 75 | else if (gap > MAX_GAP) |
| 77 | gap = MAX_GAP; | 76 | gap = MAX_GAP; |
| 78 | 77 | ||
| 79 | return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); | 78 | return PAGE_ALIGN(TASK_SIZE - gap - rnd); |
| 80 | } | 79 | } |
| 81 | 80 | ||
| 82 | /* | 81 | /* |
| @@ -85,6 +84,11 @@ static inline unsigned long mmap_base(void) | |||
| 85 | */ | 84 | */ |
| 86 | void arch_pick_mmap_layout(struct mm_struct *mm) | 85 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 87 | { | 86 | { |
| 87 | unsigned long random_factor = 0UL; | ||
| 88 | |||
| 89 | if (current->flags & PF_RANDOMIZE) | ||
| 90 | random_factor = arch_mmap_rnd(); | ||
| 91 | |||
| 88 | /* | 92 | /* |
| 89 | * Fall back to the standard layout if the personality | 93 | * Fall back to the standard layout if the personality |
| 90 | * bit is set, or if the expected stack growth is unlimited: | 94 | * bit is set, or if the expected stack growth is unlimited: |
| @@ -93,7 +97,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
| 93 | mm->mmap_base = TASK_UNMAPPED_BASE; | 97 | mm->mmap_base = TASK_UNMAPPED_BASE; |
| 94 | mm->get_unmapped_area = arch_get_unmapped_area; | 98 | mm->get_unmapped_area = arch_get_unmapped_area; |
| 95 | } else { | 99 | } else { |
| 96 | mm->mmap_base = mmap_base(); | 100 | mm->mmap_base = mmap_base(random_factor); |
| 97 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 101 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
| 98 | } | 102 | } |
| 99 | } | 103 | } |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b2d7ec1669b4..6321fd8bf813 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
| @@ -65,6 +65,7 @@ config S390 | |||
| 65 | def_bool y | 65 | def_bool y |
| 66 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 66 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
| 67 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS | 67 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS |
| 68 | select ARCH_HAS_ELF_RANDOMIZE | ||
| 68 | select ARCH_HAS_GCOV_PROFILE_ALL | 69 | select ARCH_HAS_GCOV_PROFILE_ALL |
| 69 | select ARCH_HAS_SG_CHAIN | 70 | select ARCH_HAS_SG_CHAIN |
| 70 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 71 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
| @@ -156,6 +157,11 @@ config S390 | |||
| 156 | config SCHED_OMIT_FRAME_POINTER | 157 | config SCHED_OMIT_FRAME_POINTER |
| 157 | def_bool y | 158 | def_bool y |
| 158 | 159 | ||
| 160 | config PGTABLE_LEVELS | ||
| 161 | int | ||
| 162 | default 4 if 64BIT | ||
| 163 | default 2 | ||
| 164 | |||
| 159 | source "init/Kconfig" | 165 | source "init/Kconfig" |
| 160 | 166 | ||
| 161 | source "kernel/Kconfig.freezer" | 167 | source "kernel/Kconfig.freezer" |
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9c875d9ed31..a5c4978462c1 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h | |||
| @@ -161,10 +161,11 @@ extern unsigned int vdso_enabled; | |||
| 161 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical | 161 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical |
| 162 | use of this is to invoke "./ld.so someprog" to test out a new version of | 162 | use of this is to invoke "./ld.so someprog" to test out a new version of |
| 163 | the loader. We need to make sure that it is out of the way of the program | 163 | the loader. We need to make sure that it is out of the way of the program |
| 164 | that it will "exec", and that there is sufficient room for the brk. */ | 164 | that it will "exec", and that there is sufficient room for the brk. 64-bit |
| 165 | 165 | tasks are aligned to 4GB. */ | |
| 166 | extern unsigned long randomize_et_dyn(void); | 166 | #define ELF_ET_DYN_BASE (is_32bit_task() ? \ |
| 167 | #define ELF_ET_DYN_BASE randomize_et_dyn() | 167 | (STACK_TOP / 3 * 2) : \ |
| 168 | (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) | ||
| 168 | 169 | ||
| 169 | /* This yields a mask that user programs can use to figure out what | 170 | /* This yields a mask that user programs can use to figure out what |
| 170 | instruction set this CPU supports. */ | 171 | instruction set this CPU supports. */ |
| @@ -225,9 +226,6 @@ struct linux_binprm; | |||
| 225 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 226 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
| 226 | int arch_setup_additional_pages(struct linux_binprm *, int); | 227 | int arch_setup_additional_pages(struct linux_binprm *, int); |
| 227 | 228 | ||
| 228 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 229 | #define arch_randomize_brk arch_randomize_brk | ||
| 230 | |||
| 231 | void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); | 229 | void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); |
| 232 | 230 | ||
| 233 | #endif | 231 | #endif |
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 179a2c20b01f..bb3367c5cb0b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c | |||
| @@ -60,22 +60,20 @@ static inline int mmap_is_legacy(void) | |||
| 60 | return sysctl_legacy_va_layout; | 60 | return sysctl_legacy_va_layout; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | static unsigned long mmap_rnd(void) | 63 | unsigned long arch_mmap_rnd(void) |
| 64 | { | 64 | { |
| 65 | if (!(current->flags & PF_RANDOMIZE)) | ||
| 66 | return 0; | ||
| 67 | if (is_32bit_task()) | 65 | if (is_32bit_task()) |
| 68 | return (get_random_int() & 0x7ff) << PAGE_SHIFT; | 66 | return (get_random_int() & 0x7ff) << PAGE_SHIFT; |
| 69 | else | 67 | else |
| 70 | return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; | 68 | return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; |
| 71 | } | 69 | } |
| 72 | 70 | ||
| 73 | static unsigned long mmap_base_legacy(void) | 71 | static unsigned long mmap_base_legacy(unsigned long rnd) |
| 74 | { | 72 | { |
| 75 | return TASK_UNMAPPED_BASE + mmap_rnd(); | 73 | return TASK_UNMAPPED_BASE + rnd; |
| 76 | } | 74 | } |
| 77 | 75 | ||
| 78 | static inline unsigned long mmap_base(void) | 76 | static inline unsigned long mmap_base(unsigned long rnd) |
| 79 | { | 77 | { |
| 80 | unsigned long gap = rlimit(RLIMIT_STACK); | 78 | unsigned long gap = rlimit(RLIMIT_STACK); |
| 81 | 79 | ||
| @@ -84,7 +82,7 @@ static inline unsigned long mmap_base(void) | |||
| 84 | else if (gap > MAX_GAP) | 82 | else if (gap > MAX_GAP) |
| 85 | gap = MAX_GAP; | 83 | gap = MAX_GAP; |
| 86 | gap &= PAGE_MASK; | 84 | gap &= PAGE_MASK; |
| 87 | return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; | 85 | return STACK_TOP - stack_maxrandom_size() - rnd - gap; |
| 88 | } | 86 | } |
| 89 | 87 | ||
| 90 | unsigned long | 88 | unsigned long |
| @@ -179,17 +177,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 179 | return addr; | 177 | return addr; |
| 180 | } | 178 | } |
| 181 | 179 | ||
| 182 | unsigned long randomize_et_dyn(void) | ||
| 183 | { | ||
| 184 | unsigned long base; | ||
| 185 | |||
| 186 | base = STACK_TOP / 3 * 2; | ||
| 187 | if (!is_32bit_task()) | ||
| 188 | /* Align to 4GB */ | ||
| 189 | base &= ~((1UL << 32) - 1); | ||
| 190 | return base + mmap_rnd(); | ||
| 191 | } | ||
| 192 | |||
| 193 | #ifndef CONFIG_64BIT | 180 | #ifndef CONFIG_64BIT |
| 194 | 181 | ||
| 195 | /* | 182 | /* |
| @@ -198,15 +185,20 @@ unsigned long randomize_et_dyn(void) | |||
| 198 | */ | 185 | */ |
| 199 | void arch_pick_mmap_layout(struct mm_struct *mm) | 186 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 200 | { | 187 | { |
| 188 | unsigned long random_factor = 0UL; | ||
| 189 | |||
| 190 | if (current->flags & PF_RANDOMIZE) | ||
| 191 | random_factor = arch_mmap_rnd(); | ||
| 192 | |||
| 201 | /* | 193 | /* |
| 202 | * Fall back to the standard layout if the personality | 194 | * Fall back to the standard layout if the personality |
| 203 | * bit is set, or if the expected stack growth is unlimited: | 195 | * bit is set, or if the expected stack growth is unlimited: |
| 204 | */ | 196 | */ |
| 205 | if (mmap_is_legacy()) { | 197 | if (mmap_is_legacy()) { |
| 206 | mm->mmap_base = mmap_base_legacy(); | 198 | mm->mmap_base = mmap_base_legacy(random_factor); |
| 207 | mm->get_unmapped_area = arch_get_unmapped_area; | 199 | mm->get_unmapped_area = arch_get_unmapped_area; |
| 208 | } else { | 200 | } else { |
| 209 | mm->mmap_base = mmap_base(); | 201 | mm->mmap_base = mmap_base(random_factor); |
| 210 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 202 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
| 211 | } | 203 | } |
| 212 | } | 204 | } |
| @@ -273,15 +265,20 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, | |||
| 273 | */ | 265 | */ |
| 274 | void arch_pick_mmap_layout(struct mm_struct *mm) | 266 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 275 | { | 267 | { |
| 268 | unsigned long random_factor = 0UL; | ||
| 269 | |||
| 270 | if (current->flags & PF_RANDOMIZE) | ||
| 271 | random_factor = arch_mmap_rnd(); | ||
| 272 | |||
| 276 | /* | 273 | /* |
| 277 | * Fall back to the standard layout if the personality | 274 | * Fall back to the standard layout if the personality |
| 278 | * bit is set, or if the expected stack growth is unlimited: | 275 | * bit is set, or if the expected stack growth is unlimited: |
| 279 | */ | 276 | */ |
| 280 | if (mmap_is_legacy()) { | 277 | if (mmap_is_legacy()) { |
| 281 | mm->mmap_base = mmap_base_legacy(); | 278 | mm->mmap_base = mmap_base_legacy(random_factor); |
| 282 | mm->get_unmapped_area = s390_get_unmapped_area; | 279 | mm->get_unmapped_area = s390_get_unmapped_area; |
| 283 | } else { | 280 | } else { |
| 284 | mm->mmap_base = mmap_base(); | 281 | mm->mmap_base = mmap_base(random_factor); |
| 285 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; | 282 | mm->get_unmapped_area = s390_get_unmapped_area_topdown; |
| 286 | } | 283 | } |
| 287 | } | 284 | } |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index eb4ef274ae9b..50057fed819d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
| @@ -162,6 +162,10 @@ config NEED_DMA_MAP_STATE | |||
| 162 | config NEED_SG_DMA_LENGTH | 162 | config NEED_SG_DMA_LENGTH |
| 163 | def_bool y | 163 | def_bool y |
| 164 | 164 | ||
| 165 | config PGTABLE_LEVELS | ||
| 166 | default 3 if X2TLB | ||
| 167 | default 2 | ||
| 168 | |||
| 165 | source "init/Kconfig" | 169 | source "init/Kconfig" |
| 166 | 170 | ||
| 167 | source "kernel/Kconfig.freezer" | 171 | source "kernel/Kconfig.freezer" |
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 67a049e75ec1..9d209a07235e 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c | |||
| @@ -993,7 +993,7 @@ static struct unwinder dwarf_unwinder = { | |||
| 993 | .rating = 150, | 993 | .rating = 150, |
| 994 | }; | 994 | }; |
| 995 | 995 | ||
| 996 | static void dwarf_unwinder_cleanup(void) | 996 | static void __init dwarf_unwinder_cleanup(void) |
| 997 | { | 997 | { |
| 998 | struct dwarf_fde *fde, *next_fde; | 998 | struct dwarf_fde *fde, *next_fde; |
| 999 | struct dwarf_cie *cie, *next_cie; | 999 | struct dwarf_cie *cie, *next_cie; |
| @@ -1009,6 +1009,10 @@ static void dwarf_unwinder_cleanup(void) | |||
| 1009 | rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) | 1009 | rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) |
| 1010 | kfree(cie); | 1010 | kfree(cie); |
| 1011 | 1011 | ||
| 1012 | if (dwarf_reg_pool) | ||
| 1013 | mempool_destroy(dwarf_reg_pool); | ||
| 1014 | if (dwarf_frame_pool) | ||
| 1015 | mempool_destroy(dwarf_frame_pool); | ||
| 1012 | kmem_cache_destroy(dwarf_reg_cachep); | 1016 | kmem_cache_destroy(dwarf_reg_cachep); |
| 1013 | kmem_cache_destroy(dwarf_frame_cachep); | 1017 | kmem_cache_destroy(dwarf_frame_cachep); |
| 1014 | } | 1018 | } |
| @@ -1176,17 +1180,13 @@ static int __init dwarf_unwinder_init(void) | |||
| 1176 | sizeof(struct dwarf_reg), 0, | 1180 | sizeof(struct dwarf_reg), 0, |
| 1177 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); | 1181 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); |
| 1178 | 1182 | ||
| 1179 | dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ, | 1183 | dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, |
| 1180 | mempool_alloc_slab, | 1184 | dwarf_frame_cachep); |
| 1181 | mempool_free_slab, | ||
| 1182 | dwarf_frame_cachep); | ||
| 1183 | if (!dwarf_frame_pool) | 1185 | if (!dwarf_frame_pool) |
| 1184 | goto out; | 1186 | goto out; |
| 1185 | 1187 | ||
| 1186 | dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ, | 1188 | dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ, |
| 1187 | mempool_alloc_slab, | 1189 | dwarf_reg_cachep); |
| 1188 | mempool_free_slab, | ||
| 1189 | dwarf_reg_cachep); | ||
| 1190 | if (!dwarf_reg_pool) | 1190 | if (!dwarf_reg_pool) |
| 1191 | goto out; | 1191 | goto out; |
| 1192 | 1192 | ||
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efb00ec75805..e49502acbab4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
| @@ -146,6 +146,10 @@ config GENERIC_ISA_DMA | |||
| 146 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 146 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
| 147 | def_bool y if SPARC64 | 147 | def_bool y if SPARC64 |
| 148 | 148 | ||
| 149 | config PGTABLE_LEVELS | ||
| 150 | default 4 if 64BIT | ||
| 151 | default 3 | ||
| 152 | |||
| 149 | source "init/Kconfig" | 153 | source "init/Kconfig" |
| 150 | 154 | ||
| 151 | source "kernel/Kconfig.freezer" | 155 | source "kernel/Kconfig.freezer" |
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 99632a87e697..26c80e18d7b1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c | |||
| @@ -130,26 +130,26 @@ static struct mdesc_mem_ops memblock_mdesc_ops = { | |||
| 130 | static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) | 130 | static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) |
| 131 | { | 131 | { |
| 132 | unsigned int handle_size; | 132 | unsigned int handle_size; |
| 133 | struct mdesc_handle *hp; | ||
| 134 | unsigned long addr; | ||
| 133 | void *base; | 135 | void *base; |
| 134 | 136 | ||
| 135 | handle_size = (sizeof(struct mdesc_handle) - | 137 | handle_size = (sizeof(struct mdesc_handle) - |
| 136 | sizeof(struct mdesc_hdr) + | 138 | sizeof(struct mdesc_hdr) + |
| 137 | mdesc_size); | 139 | mdesc_size); |
| 138 | 140 | ||
| 141 | /* | ||
| 142 | * Allocation has to succeed because mdesc update would be missed | ||
| 143 | * and such events are not retransmitted. | ||
| 144 | */ | ||
| 139 | base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); | 145 | base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); |
| 140 | if (base) { | 146 | addr = (unsigned long)base; |
| 141 | struct mdesc_handle *hp; | 147 | addr = (addr + 15UL) & ~15UL; |
| 142 | unsigned long addr; | 148 | hp = (struct mdesc_handle *) addr; |
| 143 | |||
| 144 | addr = (unsigned long)base; | ||
| 145 | addr = (addr + 15UL) & ~15UL; | ||
| 146 | hp = (struct mdesc_handle *) addr; | ||
| 147 | 149 | ||
| 148 | mdesc_handle_init(hp, handle_size, base); | 150 | mdesc_handle_init(hp, handle_size, base); |
| 149 | return hp; | ||
| 150 | } | ||
| 151 | 151 | ||
| 152 | return NULL; | 152 | return hp; |
| 153 | } | 153 | } |
| 154 | 154 | ||
| 155 | static void mdesc_kfree(struct mdesc_handle *hp) | 155 | static void mdesc_kfree(struct mdesc_handle *hp) |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 7cca41842a9e..0142d578b5a8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
| @@ -147,6 +147,11 @@ config ARCH_DEFCONFIG | |||
| 147 | default "arch/tile/configs/tilepro_defconfig" if !TILEGX | 147 | default "arch/tile/configs/tilepro_defconfig" if !TILEGX |
| 148 | default "arch/tile/configs/tilegx_defconfig" if TILEGX | 148 | default "arch/tile/configs/tilegx_defconfig" if TILEGX |
| 149 | 149 | ||
| 150 | config PGTABLE_LEVELS | ||
| 151 | int | ||
| 152 | default 3 if 64BIT | ||
| 153 | default 2 | ||
| 154 | |||
| 150 | source "init/Kconfig" | 155 | source "init/Kconfig" |
| 151 | 156 | ||
| 152 | source "kernel/Kconfig.freezer" | 157 | source "kernel/Kconfig.freezer" |
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index a7520c90f62d..5dbfe3d9107c 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um | |||
| @@ -155,3 +155,8 @@ config MMAPPER | |||
| 155 | 155 | ||
| 156 | config NO_DMA | 156 | config NO_DMA |
| 157 | def_bool y | 157 | def_bool y |
| 158 | |||
| 159 | config PGTABLE_LEVELS | ||
| 160 | int | ||
| 161 | default 3 if 3_LEVEL_PGTABLES | ||
| 162 | default 2 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index faff6934c05a..d43e7e1c784b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -87,7 +87,7 @@ config X86 | |||
| 87 | select HAVE_ARCH_KMEMCHECK | 87 | select HAVE_ARCH_KMEMCHECK |
| 88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 88 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
| 89 | select HAVE_USER_RETURN_NOTIFIER | 89 | select HAVE_USER_RETURN_NOTIFIER |
| 90 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 90 | select ARCH_HAS_ELF_RANDOMIZE |
| 91 | select HAVE_ARCH_JUMP_LABEL | 91 | select HAVE_ARCH_JUMP_LABEL |
| 92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 92 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
| 93 | select SPARSE_IRQ | 93 | select SPARSE_IRQ |
| @@ -99,6 +99,7 @@ config X86 | |||
| 99 | select IRQ_FORCED_THREADING | 99 | select IRQ_FORCED_THREADING |
| 100 | select HAVE_BPF_JIT if X86_64 | 100 | select HAVE_BPF_JIT if X86_64 |
| 101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 101 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
| 102 | select HAVE_ARCH_HUGE_VMAP if X86_64 || (X86_32 && X86_PAE) | ||
| 102 | select ARCH_HAS_SG_CHAIN | 103 | select ARCH_HAS_SG_CHAIN |
| 103 | select CLKEVT_I8253 | 104 | select CLKEVT_I8253 |
| 104 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 105 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
| @@ -277,6 +278,12 @@ config ARCH_SUPPORTS_UPROBES | |||
| 277 | config FIX_EARLYCON_MEM | 278 | config FIX_EARLYCON_MEM |
| 278 | def_bool y | 279 | def_bool y |
| 279 | 280 | ||
| 281 | config PGTABLE_LEVELS | ||
| 282 | int | ||
| 283 | default 4 if X86_64 | ||
| 284 | default 3 if X86_PAE | ||
| 285 | default 2 | ||
| 286 | |||
| 280 | source "init/Kconfig" | 287 | source "init/Kconfig" |
| 281 | source "kernel/Kconfig.freezer" | 288 | source "kernel/Kconfig.freezer" |
| 282 | 289 | ||
| @@ -714,17 +721,6 @@ endif #HYPERVISOR_GUEST | |||
| 714 | config NO_BOOTMEM | 721 | config NO_BOOTMEM |
| 715 | def_bool y | 722 | def_bool y |
| 716 | 723 | ||
| 717 | config MEMTEST | ||
| 718 | bool "Memtest" | ||
| 719 | ---help--- | ||
| 720 | This option adds a kernel parameter 'memtest', which allows memtest | ||
| 721 | to be set. | ||
| 722 | memtest=0, mean disabled; -- default | ||
| 723 | memtest=1, mean do 1 test pattern; | ||
| 724 | ... | ||
| 725 | memtest=4, mean do 4 test patterns. | ||
| 726 | If you are unsure how to answer this question, answer N. | ||
| 727 | |||
| 728 | source "arch/x86/Kconfig.cpu" | 724 | source "arch/x86/Kconfig.cpu" |
| 729 | 725 | ||
| 730 | config HPET_TIMER | 726 | config HPET_TIMER |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 779c2efe2e97..3ab0537872fb 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
| @@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn) | |||
| 40 | } | 40 | } |
| 41 | #endif | 41 | #endif |
| 42 | 42 | ||
| 43 | #ifdef CONFIG_MEMTEST | ||
| 44 | extern void early_memtest(unsigned long start, unsigned long end); | ||
| 45 | #else | ||
| 46 | static inline void early_memtest(unsigned long start, unsigned long end) | ||
| 47 | { | ||
| 48 | } | ||
| 49 | #endif | ||
| 50 | |||
| 51 | extern unsigned long e820_end_of_ram_pfn(void); | 43 | extern unsigned long e820_end_of_ram_pfn(void); |
| 52 | extern unsigned long e820_end_of_low_ram_pfn(void); | 44 | extern unsigned long e820_end_of_low_ram_pfn(void); |
| 53 | extern u64 early_reserve_e820(u64 sizet, u64 align); | 45 | extern u64 early_reserve_e820(u64 sizet, u64 align); |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 935588d95c82..f161c189c27b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
| @@ -339,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |||
| 339 | int uses_interp); | 339 | int uses_interp); |
| 340 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages | 340 | #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages |
| 341 | 341 | ||
| 342 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 343 | #define arch_randomize_brk arch_randomize_brk | ||
| 344 | |||
| 345 | /* | 342 | /* |
| 346 | * True on X86_32 or when emulating IA32 on X86_64 | 343 | * True on X86_32 or when emulating IA32 on X86_64 |
| 347 | */ | 344 | */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..c7c712f2648b 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
| @@ -40,8 +40,10 @@ | |||
| 40 | 40 | ||
| 41 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
| 42 | #include <asm/page_64_types.h> | 42 | #include <asm/page_64_types.h> |
| 43 | #define IOREMAP_MAX_ORDER (PUD_SHIFT) | ||
| 43 | #else | 44 | #else |
| 44 | #include <asm/page_32_types.h> | 45 | #include <asm/page_32_types.h> |
| 46 | #define IOREMAP_MAX_ORDER (PMD_SHIFT) | ||
| 45 | #endif /* CONFIG_X86_64 */ | 47 | #endif /* CONFIG_X86_64 */ |
| 46 | 48 | ||
| 47 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5f6051d5d139..8957810ad7d1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
| @@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | |||
| 545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); | 545 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); |
| 546 | } | 546 | } |
| 547 | 547 | ||
| 548 | #if PAGETABLE_LEVELS >= 3 | 548 | #if CONFIG_PGTABLE_LEVELS >= 3 |
| 549 | static inline pmd_t __pmd(pmdval_t val) | 549 | static inline pmd_t __pmd(pmdval_t val) |
| 550 | { | 550 | { |
| 551 | pmdval_t ret; | 551 | pmdval_t ret; |
| @@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) | |||
| 585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, | 585 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, |
| 586 | val); | 586 | val); |
| 587 | } | 587 | } |
| 588 | #if PAGETABLE_LEVELS == 4 | 588 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 589 | static inline pud_t __pud(pudval_t val) | 589 | static inline pud_t __pud(pudval_t val) |
| 590 | { | 590 | { |
| 591 | pudval_t ret; | 591 | pudval_t ret; |
| @@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp) | |||
| 636 | set_pud(pudp, __pud(0)); | 636 | set_pud(pudp, __pud(0)); |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | #endif /* PAGETABLE_LEVELS == 4 */ | 639 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
| 640 | 640 | ||
| 641 | #endif /* PAGETABLE_LEVELS >= 3 */ | 641 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
| 642 | 642 | ||
| 643 | #ifdef CONFIG_X86_PAE | 643 | #ifdef CONFIG_X86_PAE |
| 644 | /* Special-case pte-setting operations for PAE, which can't update a | 644 | /* Special-case pte-setting operations for PAE, which can't update a |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7549b8b369e4..f7b0b5c112f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
| @@ -294,7 +294,7 @@ struct pv_mmu_ops { | |||
| 294 | struct paravirt_callee_save pgd_val; | 294 | struct paravirt_callee_save pgd_val; |
| 295 | struct paravirt_callee_save make_pgd; | 295 | struct paravirt_callee_save make_pgd; |
| 296 | 296 | ||
| 297 | #if PAGETABLE_LEVELS >= 3 | 297 | #if CONFIG_PGTABLE_LEVELS >= 3 |
| 298 | #ifdef CONFIG_X86_PAE | 298 | #ifdef CONFIG_X86_PAE |
| 299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | 299 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); |
| 300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | 300 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, |
| @@ -308,13 +308,13 @@ struct pv_mmu_ops { | |||
| 308 | struct paravirt_callee_save pmd_val; | 308 | struct paravirt_callee_save pmd_val; |
| 309 | struct paravirt_callee_save make_pmd; | 309 | struct paravirt_callee_save make_pmd; |
| 310 | 310 | ||
| 311 | #if PAGETABLE_LEVELS == 4 | 311 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 312 | struct paravirt_callee_save pud_val; | 312 | struct paravirt_callee_save pud_val; |
| 313 | struct paravirt_callee_save make_pud; | 313 | struct paravirt_callee_save make_pud; |
| 314 | 314 | ||
| 315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | 315 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); |
| 316 | #endif /* PAGETABLE_LEVELS == 4 */ | 316 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
| 317 | #endif /* PAGETABLE_LEVELS >= 3 */ | 317 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
| 318 | 318 | ||
| 319 | struct pv_lazy_ops lazy_mode; | 319 | struct pv_lazy_ops lazy_mode; |
| 320 | 320 | ||
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c4412e972bbd..bf7f8b55b0f9 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
| @@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, | |||
| 77 | 77 | ||
| 78 | #define pmd_pgtable(pmd) pmd_page(pmd) | 78 | #define pmd_pgtable(pmd) pmd_page(pmd) |
| 79 | 79 | ||
| 80 | #if PAGETABLE_LEVELS > 2 | 80 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 81 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
| 82 | { | 82 | { |
| 83 | struct page *page; | 83 | struct page *page; |
| @@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
| 116 | } | 116 | } |
| 117 | #endif /* CONFIG_X86_PAE */ | 117 | #endif /* CONFIG_X86_PAE */ |
| 118 | 118 | ||
| 119 | #if PAGETABLE_LEVELS > 3 | 119 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | 120 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) |
| 121 | { | 121 | { |
| 122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | 122 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); |
| @@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | |||
| 142 | ___pud_free_tlb(tlb, pud); | 142 | ___pud_free_tlb(tlb, pud); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | #endif /* PAGETABLE_LEVELS > 3 */ | 145 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
| 146 | #endif /* PAGETABLE_LEVELS > 2 */ | 146 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
| 147 | 147 | ||
| 148 | #endif /* _ASM_X86_PGALLOC_H */ | 148 | #endif /* _ASM_X86_PGALLOC_H */ |
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index daacc23e3fb9..392576433e77 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h | |||
| @@ -17,7 +17,6 @@ typedef union { | |||
| 17 | #endif /* !__ASSEMBLY__ */ | 17 | #endif /* !__ASSEMBLY__ */ |
| 18 | 18 | ||
| 19 | #define SHARED_KERNEL_PMD 0 | 19 | #define SHARED_KERNEL_PMD 0 |
| 20 | #define PAGETABLE_LEVELS 2 | ||
| 21 | 20 | ||
| 22 | /* | 21 | /* |
| 23 | * traditional i386 two-level paging structure: | 22 | * traditional i386 two-level paging structure: |
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 1bd5876c8649..bcc89625ebe5 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h | |||
| @@ -24,8 +24,6 @@ typedef union { | |||
| 24 | #define SHARED_KERNEL_PMD 1 | 24 | #define SHARED_KERNEL_PMD 1 |
| 25 | #endif | 25 | #endif |
| 26 | 26 | ||
| 27 | #define PAGETABLE_LEVELS 3 | ||
| 28 | |||
| 29 | /* | 27 | /* |
| 30 | * PGDIR_SHIFT determines what a top-level page table entry can map | 28 | * PGDIR_SHIFT determines what a top-level page table entry can map |
| 31 | */ | 29 | */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a0c35bf6cb92..fe57e7a98839 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
| @@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) | |||
| 551 | return npg >> (20 - PAGE_SHIFT); | 551 | return npg >> (20 - PAGE_SHIFT); |
| 552 | } | 552 | } |
| 553 | 553 | ||
| 554 | #if PAGETABLE_LEVELS > 2 | 554 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 555 | static inline int pud_none(pud_t pud) | 555 | static inline int pud_none(pud_t pud) |
| 556 | { | 556 | { |
| 557 | return native_pud_val(pud) == 0; | 557 | return native_pud_val(pud) == 0; |
| @@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud) | |||
| 594 | { | 594 | { |
| 595 | return 0; | 595 | return 0; |
| 596 | } | 596 | } |
| 597 | #endif /* PAGETABLE_LEVELS > 2 */ | 597 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
| 598 | 598 | ||
| 599 | #if PAGETABLE_LEVELS > 3 | 599 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 600 | static inline int pgd_present(pgd_t pgd) | 600 | static inline int pgd_present(pgd_t pgd) |
| 601 | { | 601 | { |
| 602 | return pgd_flags(pgd) & _PAGE_PRESENT; | 602 | return pgd_flags(pgd) & _PAGE_PRESENT; |
| @@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd) | |||
| 633 | { | 633 | { |
| 634 | return !native_pgd_val(pgd); | 634 | return !native_pgd_val(pgd); |
| 635 | } | 635 | } |
| 636 | #endif /* PAGETABLE_LEVELS > 3 */ | 636 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
| 637 | 637 | ||
| 638 | #endif /* __ASSEMBLY__ */ | 638 | #endif /* __ASSEMBLY__ */ |
| 639 | 639 | ||
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 602b6028c5b6..e6844dfb4471 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
| @@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t; | |||
| 20 | #endif /* !__ASSEMBLY__ */ | 20 | #endif /* !__ASSEMBLY__ */ |
| 21 | 21 | ||
| 22 | #define SHARED_KERNEL_PMD 0 | 22 | #define SHARED_KERNEL_PMD 0 |
| 23 | #define PAGETABLE_LEVELS 4 | ||
| 24 | 23 | ||
| 25 | /* | 24 | /* |
| 26 | * PGDIR_SHIFT determines what a top-level page table entry can map | 25 | * PGDIR_SHIFT determines what a top-level page table entry can map |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8c7c10802e9c..78f0c8cbe316 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
| @@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd) | |||
| 234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; | 234 | return native_pgd_val(pgd) & PTE_FLAGS_MASK; |
| 235 | } | 235 | } |
| 236 | 236 | ||
| 237 | #if PAGETABLE_LEVELS > 3 | 237 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 238 | typedef struct { pudval_t pud; } pud_t; | 238 | typedef struct { pudval_t pud; } pud_t; |
| 239 | 239 | ||
| 240 | static inline pud_t native_make_pud(pmdval_t val) | 240 | static inline pud_t native_make_pud(pmdval_t val) |
| @@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud) | |||
| 255 | } | 255 | } |
| 256 | #endif | 256 | #endif |
| 257 | 257 | ||
| 258 | #if PAGETABLE_LEVELS > 2 | 258 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 259 | typedef struct { pmdval_t pmd; } pmd_t; | 259 | typedef struct { pmdval_t pmd; } pmd_t; |
| 260 | 260 | ||
| 261 | static inline pmd_t native_make_pmd(pmdval_t val) | 261 | static inline pmd_t native_make_pmd(pmdval_t val) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e354cc6446ab..9435620062df 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -513,7 +513,7 @@ void __init kvm_guest_init(void) | |||
| 513 | * can get false positives too easily, for example if the host is | 513 | * can get false positives too easily, for example if the host is |
| 514 | * overcommitted. | 514 | * overcommitted. |
| 515 | */ | 515 | */ |
| 516 | watchdog_enable_hardlockup_detector(false); | 516 | hardlockup_detector_disable(); |
| 517 | } | 517 | } |
| 518 | 518 | ||
| 519 | static noinline uint32_t __kvm_cpuid_base(void) | 519 | static noinline uint32_t __kvm_cpuid_base(void) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..c614dd492f5f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 443 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 443 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
| 444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 444 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
| 445 | 445 | ||
| 446 | #if PAGETABLE_LEVELS >= 3 | 446 | #if CONFIG_PGTABLE_LEVELS >= 3 |
| 447 | #ifdef CONFIG_X86_PAE | 447 | #ifdef CONFIG_X86_PAE |
| 448 | .set_pte_atomic = native_set_pte_atomic, | 448 | .set_pte_atomic = native_set_pte_atomic, |
| 449 | .pte_clear = native_pte_clear, | 449 | .pte_clear = native_pte_clear, |
| @@ -454,13 +454,13 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 454 | .pmd_val = PTE_IDENT, | 454 | .pmd_val = PTE_IDENT, |
| 455 | .make_pmd = PTE_IDENT, | 455 | .make_pmd = PTE_IDENT, |
| 456 | 456 | ||
| 457 | #if PAGETABLE_LEVELS == 4 | 457 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 458 | .pud_val = PTE_IDENT, | 458 | .pud_val = PTE_IDENT, |
| 459 | .make_pud = PTE_IDENT, | 459 | .make_pud = PTE_IDENT, |
| 460 | 460 | ||
| 461 | .set_pgd = native_set_pgd, | 461 | .set_pgd = native_set_pgd, |
| 462 | #endif | 462 | #endif |
| 463 | #endif /* PAGETABLE_LEVELS >= 3 */ | 463 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
| 464 | 464 | ||
| 465 | .pte_val = PTE_IDENT, | 465 | .pte_val = PTE_IDENT, |
| 466 | .pgd_val = PTE_IDENT, | 466 | .pgd_val = PTE_IDENT, |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c4cc74006c61..a482d105172b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
| @@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o | |||
| 32 | obj-$(CONFIG_ACPI_NUMA) += srat.o | 32 | obj-$(CONFIG_ACPI_NUMA) += srat.o |
| 33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | 33 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o |
| 34 | 34 | ||
| 35 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
| 36 | |||
| 37 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o | 35 | obj-$(CONFIG_X86_INTEL_MPX) += mpx.o |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..5ead4d6cf3a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
| @@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, | |||
| 67 | 67 | ||
| 68 | /* | 68 | /* |
| 69 | * Remap an arbitrary physical address space into the kernel virtual | 69 | * Remap an arbitrary physical address space into the kernel virtual |
| 70 | * address space. Needed when the kernel wants to access high addresses | 70 | * address space. It transparently creates kernel huge I/O mapping when |
| 71 | * directly. | 71 | * the physical address is aligned by a huge page size (1GB or 2MB) and |
| 72 | * the requested size is at least the huge page size. | ||
| 73 | * | ||
| 74 | * NOTE: MTRRs can override PAT memory types with a 4KB granularity. | ||
| 75 | * Therefore, the mapping code falls back to use a smaller page toward 4KB | ||
| 76 | * when a mapping range is covered by non-WB type of MTRRs. | ||
| 72 | * | 77 | * |
| 73 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously | 78 | * NOTE! We need to allow non-page-aligned mappings too: we will obviously |
| 74 | * have to convert them into an offset in a page-aligned mapping, but the | 79 | * have to convert them into an offset in a page-aligned mapping, but the |
| @@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr) | |||
| 326 | } | 331 | } |
| 327 | EXPORT_SYMBOL(iounmap); | 332 | EXPORT_SYMBOL(iounmap); |
| 328 | 333 | ||
| 334 | int arch_ioremap_pud_supported(void) | ||
| 335 | { | ||
| 336 | #ifdef CONFIG_X86_64 | ||
| 337 | return cpu_has_gbpages; | ||
| 338 | #else | ||
| 339 | return 0; | ||
| 340 | #endif | ||
| 341 | } | ||
| 342 | |||
| 343 | int arch_ioremap_pmd_supported(void) | ||
| 344 | { | ||
| 345 | return cpu_has_pse; | ||
| 346 | } | ||
| 347 | |||
| 329 | /* | 348 | /* |
| 330 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem | 349 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem |
| 331 | * access | 350 | * access |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index df4552bd239e..9d518d693b4b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
| @@ -65,24 +65,23 @@ static int mmap_is_legacy(void) | |||
| 65 | return sysctl_legacy_va_layout; | 65 | return sysctl_legacy_va_layout; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static unsigned long mmap_rnd(void) | 68 | unsigned long arch_mmap_rnd(void) |
| 69 | { | 69 | { |
| 70 | unsigned long rnd = 0; | 70 | unsigned long rnd; |
| 71 | 71 | ||
| 72 | /* | 72 | /* |
| 73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits | 73 | * 8 bits of randomness in 32bit mmaps, 20 address space bits |
| 74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits | 74 | * 28 bits of randomness in 64bit mmaps, 40 address space bits |
| 75 | */ | 75 | */ |
| 76 | if (current->flags & PF_RANDOMIZE) { | 76 | if (mmap_is_ia32()) |
| 77 | if (mmap_is_ia32()) | 77 | rnd = (unsigned long)get_random_int() % (1<<8); |
| 78 | rnd = get_random_int() % (1<<8); | 78 | else |
| 79 | else | 79 | rnd = (unsigned long)get_random_int() % (1<<28); |
| 80 | rnd = get_random_int() % (1<<28); | 80 | |
| 81 | } | ||
| 82 | return rnd << PAGE_SHIFT; | 81 | return rnd << PAGE_SHIFT; |
| 83 | } | 82 | } |
| 84 | 83 | ||
| 85 | static unsigned long mmap_base(void) | 84 | static unsigned long mmap_base(unsigned long rnd) |
| 86 | { | 85 | { |
| 87 | unsigned long gap = rlimit(RLIMIT_STACK); | 86 | unsigned long gap = rlimit(RLIMIT_STACK); |
| 88 | 87 | ||
| @@ -91,19 +90,19 @@ static unsigned long mmap_base(void) | |||
| 91 | else if (gap > MAX_GAP) | 90 | else if (gap > MAX_GAP) |
| 92 | gap = MAX_GAP; | 91 | gap = MAX_GAP; |
| 93 | 92 | ||
| 94 | return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); | 93 | return PAGE_ALIGN(TASK_SIZE - gap - rnd); |
| 95 | } | 94 | } |
| 96 | 95 | ||
| 97 | /* | 96 | /* |
| 98 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | 97 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 |
| 99 | * does, but not when emulating X86_32 | 98 | * does, but not when emulating X86_32 |
| 100 | */ | 99 | */ |
| 101 | static unsigned long mmap_legacy_base(void) | 100 | static unsigned long mmap_legacy_base(unsigned long rnd) |
| 102 | { | 101 | { |
| 103 | if (mmap_is_ia32()) | 102 | if (mmap_is_ia32()) |
| 104 | return TASK_UNMAPPED_BASE; | 103 | return TASK_UNMAPPED_BASE; |
| 105 | else | 104 | else |
| 106 | return TASK_UNMAPPED_BASE + mmap_rnd(); | 105 | return TASK_UNMAPPED_BASE + rnd; |
| 107 | } | 106 | } |
| 108 | 107 | ||
| 109 | /* | 108 | /* |
| @@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void) | |||
| 112 | */ | 111 | */ |
| 113 | void arch_pick_mmap_layout(struct mm_struct *mm) | 112 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 114 | { | 113 | { |
| 115 | mm->mmap_legacy_base = mmap_legacy_base(); | 114 | unsigned long random_factor = 0UL; |
| 116 | mm->mmap_base = mmap_base(); | 115 | |
| 116 | if (current->flags & PF_RANDOMIZE) | ||
| 117 | random_factor = arch_mmap_rnd(); | ||
| 118 | |||
| 119 | mm->mmap_legacy_base = mmap_legacy_base(random_factor); | ||
| 117 | 120 | ||
| 118 | if (mmap_is_legacy()) { | 121 | if (mmap_is_legacy()) { |
| 119 | mm->mmap_base = mm->mmap_legacy_base; | 122 | mm->mmap_base = mm->mmap_legacy_base; |
| 120 | mm->get_unmapped_area = arch_get_unmapped_area; | 123 | mm->get_unmapped_area = arch_get_unmapped_area; |
| 121 | } else { | 124 | } else { |
| 125 | mm->mmap_base = mmap_base(random_factor); | ||
| 122 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 126 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
| 123 | } | 127 | } |
| 124 | } | 128 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5a7e5252c878..0b97d2c75df3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
| 5 | #include <asm/tlb.h> | 5 | #include <asm/tlb.h> |
| 6 | #include <asm/fixmap.h> | 6 | #include <asm/fixmap.h> |
| 7 | #include <asm/mtrr.h> | ||
| 7 | 8 | ||
| 8 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 9 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
| 9 | 10 | ||
| @@ -58,7 +59,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
| 58 | tlb_remove_page(tlb, pte); | 59 | tlb_remove_page(tlb, pte); |
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | #if PAGETABLE_LEVELS > 2 | 62 | #if CONFIG_PGTABLE_LEVELS > 2 |
| 62 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 63 | void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
| 63 | { | 64 | { |
| 64 | struct page *page = virt_to_page(pmd); | 65 | struct page *page = virt_to_page(pmd); |
| @@ -74,14 +75,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | |||
| 74 | tlb_remove_page(tlb, page); | 75 | tlb_remove_page(tlb, page); |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 77 | #if PAGETABLE_LEVELS > 3 | 78 | #if CONFIG_PGTABLE_LEVELS > 3 |
| 78 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) | 79 | void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) |
| 79 | { | 80 | { |
| 80 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); | 81 | paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); |
| 81 | tlb_remove_page(tlb, virt_to_page(pud)); | 82 | tlb_remove_page(tlb, virt_to_page(pud)); |
| 82 | } | 83 | } |
| 83 | #endif /* PAGETABLE_LEVELS > 3 */ | 84 | #endif /* CONFIG_PGTABLE_LEVELS > 3 */ |
| 84 | #endif /* PAGETABLE_LEVELS > 2 */ | 85 | #endif /* CONFIG_PGTABLE_LEVELS > 2 */ |
| 85 | 86 | ||
| 86 | static inline void pgd_list_add(pgd_t *pgd) | 87 | static inline void pgd_list_add(pgd_t *pgd) |
| 87 | { | 88 | { |
| @@ -117,9 +118,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
| 117 | /* If the pgd points to a shared pagetable level (either the | 118 | /* If the pgd points to a shared pagetable level (either the |
| 118 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 119 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
| 119 | references from swapper_pg_dir. */ | 120 | references from swapper_pg_dir. */ |
| 120 | if (PAGETABLE_LEVELS == 2 || | 121 | if (CONFIG_PGTABLE_LEVELS == 2 || |
| 121 | (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || | 122 | (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || |
| 122 | PAGETABLE_LEVELS == 4) { | 123 | CONFIG_PGTABLE_LEVELS == 4) { |
| 123 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 124 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
| 124 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 125 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
| 125 | KERNEL_PGD_PTRS); | 126 | KERNEL_PGD_PTRS); |
| @@ -560,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, | |||
| 560 | { | 561 | { |
| 561 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); | 562 | __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); |
| 562 | } | 563 | } |
| 564 | |||
| 565 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
| 566 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) | ||
| 567 | { | ||
| 568 | u8 mtrr; | ||
| 569 | |||
| 570 | /* | ||
| 571 | * Do not use a huge page when the range is covered by non-WB type | ||
| 572 | * of MTRRs. | ||
| 573 | */ | ||
| 574 | mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); | ||
| 575 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
| 576 | return 0; | ||
| 577 | |||
| 578 | prot = pgprot_4k_2_large(prot); | ||
| 579 | |||
| 580 | set_pte((pte_t *)pud, pfn_pte( | ||
| 581 | (u64)addr >> PAGE_SHIFT, | ||
| 582 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
| 583 | |||
| 584 | return 1; | ||
| 585 | } | ||
| 586 | |||
| 587 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | ||
| 588 | { | ||
| 589 | u8 mtrr; | ||
| 590 | |||
| 591 | /* | ||
| 592 | * Do not use a huge page when the range is covered by non-WB type | ||
| 593 | * of MTRRs. | ||
| 594 | */ | ||
| 595 | mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); | ||
| 596 | if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) | ||
| 597 | return 0; | ||
| 598 | |||
| 599 | prot = pgprot_4k_2_large(prot); | ||
| 600 | |||
| 601 | set_pte((pte_t *)pmd, pfn_pte( | ||
| 602 | (u64)addr >> PAGE_SHIFT, | ||
| 603 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | ||
| 604 | |||
| 605 | return 1; | ||
| 606 | } | ||
| 607 | |||
| 608 | int pud_clear_huge(pud_t *pud) | ||
| 609 | { | ||
| 610 | if (pud_large(*pud)) { | ||
| 611 | pud_clear(pud); | ||
| 612 | return 1; | ||
| 613 | } | ||
| 614 | |||
| 615 | return 0; | ||
| 616 | } | ||
| 617 | |||
| 618 | int pmd_clear_huge(pmd_t *pmd) | ||
| 619 | { | ||
| 620 | if (pmd_large(*pmd)) { | ||
| 621 | pmd_clear(pmd); | ||
| 622 | return 1; | ||
| 623 | } | ||
| 624 | |||
| 625 | return 0; | ||
| 626 | } | ||
| 627 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2b6553..65083ad63b6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) | |||
| 502 | } | 502 | } |
| 503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 503 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
| 504 | 504 | ||
| 505 | #if PAGETABLE_LEVELS == 4 | 505 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 506 | __visible pudval_t xen_pud_val(pud_t pud) | 506 | __visible pudval_t xen_pud_val(pud_t pud) |
| 507 | { | 507 | { |
| 508 | return pte_mfn_to_pfn(pud.pud); | 508 | return pte_mfn_to_pfn(pud.pud); |
| @@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
| 589 | 589 | ||
| 590 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 590 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
| 591 | } | 591 | } |
| 592 | #endif /* PAGETABLE_LEVELS == 4 */ | 592 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
| 593 | 593 | ||
| 594 | /* | 594 | /* |
| 595 | * (Yet another) pagetable walker. This one is intended for pinning a | 595 | * (Yet another) pagetable walker. This one is intended for pinning a |
| @@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn) | |||
| 1628 | xen_release_ptpage(pfn, PT_PMD); | 1628 | xen_release_ptpage(pfn, PT_PMD); |
| 1629 | } | 1629 | } |
| 1630 | 1630 | ||
| 1631 | #if PAGETABLE_LEVELS == 4 | 1631 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | 1632 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
| 1633 | { | 1633 | { |
| 1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); | 1634 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
| @@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void) | |||
| 2046 | pv_mmu_ops.set_pte = xen_set_pte; | 2046 | pv_mmu_ops.set_pte = xen_set_pte; |
| 2047 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2047 | pv_mmu_ops.set_pmd = xen_set_pmd; |
| 2048 | pv_mmu_ops.set_pud = xen_set_pud; | 2048 | pv_mmu_ops.set_pud = xen_set_pud; |
| 2049 | #if PAGETABLE_LEVELS == 4 | 2049 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 2050 | pv_mmu_ops.set_pgd = xen_set_pgd; | 2050 | pv_mmu_ops.set_pgd = xen_set_pgd; |
| 2051 | #endif | 2051 | #endif |
| 2052 | 2052 | ||
| @@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void) | |||
| 2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | 2056 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
| 2057 | pv_mmu_ops.release_pte = xen_release_pte; | 2057 | pv_mmu_ops.release_pte = xen_release_pte; |
| 2058 | pv_mmu_ops.release_pmd = xen_release_pmd; | 2058 | pv_mmu_ops.release_pmd = xen_release_pmd; |
| 2059 | #if PAGETABLE_LEVELS == 4 | 2059 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | 2060 | pv_mmu_ops.alloc_pud = xen_alloc_pud; |
| 2061 | pv_mmu_ops.release_pud = xen_release_pud; | 2061 | pv_mmu_ops.release_pud = xen_release_pud; |
| 2062 | #endif | 2062 | #endif |
| @@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
| 2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), | 2122 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), |
| 2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), | 2123 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), |
| 2124 | 2124 | ||
| 2125 | #if PAGETABLE_LEVELS == 4 | 2125 | #if CONFIG_PGTABLE_LEVELS == 4 |
| 2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), | 2126 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), |
| 2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), | 2127 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
| 2128 | .set_pgd = xen_set_pgd_hyper, | 2128 | .set_pgd = xen_set_pgd_hyper, |
| 2129 | 2129 | ||
| 2130 | .alloc_pud = xen_alloc_pmd_init, | 2130 | .alloc_pud = xen_alloc_pmd_init, |
| 2131 | .release_pud = xen_release_pmd_init, | 2131 | .release_pud = xen_release_pmd_init, |
| 2132 | #endif /* PAGETABLE_LEVELS == 4 */ | 2132 | #endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
| 2133 | 2133 | ||
| 2134 | .activate_mm = xen_activate_mm, | 2134 | .activate_mm = xen_activate_mm, |
| 2135 | .dup_mmap = xen_dup_mmap, | 2135 | .dup_mmap = xen_dup_mmap, |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index af9c911cd6b5..2804aed3f416 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
| @@ -219,6 +219,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn) | |||
| 219 | /* | 219 | /* |
| 220 | * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is | 220 | * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is |
| 221 | * OK to have direct references to sparsemem variables in here. | 221 | * OK to have direct references to sparsemem variables in here. |
| 222 | * Must already be protected by mem_hotplug_begin(). | ||
| 222 | */ | 223 | */ |
| 223 | static int | 224 | static int |
| 224 | memory_block_action(unsigned long phys_index, unsigned long action, int online_type) | 225 | memory_block_action(unsigned long phys_index, unsigned long action, int online_type) |
| @@ -228,7 +229,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t | |||
| 228 | struct page *first_page; | 229 | struct page *first_page; |
| 229 | int ret; | 230 | int ret; |
| 230 | 231 | ||
| 231 | start_pfn = phys_index << PFN_SECTION_SHIFT; | 232 | start_pfn = section_nr_to_pfn(phys_index); |
| 232 | first_page = pfn_to_page(start_pfn); | 233 | first_page = pfn_to_page(start_pfn); |
| 233 | 234 | ||
| 234 | switch (action) { | 235 | switch (action) { |
| @@ -286,6 +287,7 @@ static int memory_subsys_online(struct device *dev) | |||
| 286 | if (mem->online_type < 0) | 287 | if (mem->online_type < 0) |
| 287 | mem->online_type = MMOP_ONLINE_KEEP; | 288 | mem->online_type = MMOP_ONLINE_KEEP; |
| 288 | 289 | ||
| 290 | /* Already under protection of mem_hotplug_begin() */ | ||
| 289 | ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); | 291 | ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); |
| 290 | 292 | ||
| 291 | /* clear online_type */ | 293 | /* clear online_type */ |
| @@ -328,17 +330,19 @@ store_mem_state(struct device *dev, | |||
| 328 | goto err; | 330 | goto err; |
| 329 | } | 331 | } |
| 330 | 332 | ||
| 333 | /* | ||
| 334 | * Memory hotplug needs to hold mem_hotplug_begin() for probe to find | ||
| 335 | * the correct memory block to online before doing device_online(dev), | ||
| 336 | * which will take dev->mutex. Take the lock early to prevent an | ||
| 337 | * inversion, memory_subsys_online() callbacks will be implemented by | ||
| 338 | * assuming it's already protected. | ||
| 339 | */ | ||
| 340 | mem_hotplug_begin(); | ||
| 341 | |||
| 331 | switch (online_type) { | 342 | switch (online_type) { |
| 332 | case MMOP_ONLINE_KERNEL: | 343 | case MMOP_ONLINE_KERNEL: |
| 333 | case MMOP_ONLINE_MOVABLE: | 344 | case MMOP_ONLINE_MOVABLE: |
| 334 | case MMOP_ONLINE_KEEP: | 345 | case MMOP_ONLINE_KEEP: |
| 335 | /* | ||
| 336 | * mem->online_type is not protected so there can be a | ||
| 337 | * race here. However, when racing online, the first | ||
| 338 | * will succeed and the second will just return as the | ||
| 339 | * block will already be online. The online type | ||
| 340 | * could be either one, but that is expected. | ||
| 341 | */ | ||
| 342 | mem->online_type = online_type; | 346 | mem->online_type = online_type; |
| 343 | ret = device_online(&mem->dev); | 347 | ret = device_online(&mem->dev); |
| 344 | break; | 348 | break; |
| @@ -349,6 +353,7 @@ store_mem_state(struct device *dev, | |||
| 349 | ret = -EINVAL; /* should never happen */ | 353 | ret = -EINVAL; /* should never happen */ |
| 350 | } | 354 | } |
| 351 | 355 | ||
| 356 | mem_hotplug_done(); | ||
| 352 | err: | 357 | err: |
| 353 | unlock_device_hotplug(); | 358 | unlock_device_hotplug(); |
| 354 | 359 | ||
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 2c5d4567d1da..acde3f5d6e9e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c | |||
| @@ -738,11 +738,11 @@ static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act) | |||
| 738 | return ZFCP_ERP_FAILED; | 738 | return ZFCP_ERP_FAILED; |
| 739 | 739 | ||
| 740 | if (mempool_resize(act->adapter->pool.sr_data, | 740 | if (mempool_resize(act->adapter->pool.sr_data, |
| 741 | act->adapter->stat_read_buf_num, GFP_KERNEL)) | 741 | act->adapter->stat_read_buf_num)) |
| 742 | return ZFCP_ERP_FAILED; | 742 | return ZFCP_ERP_FAILED; |
| 743 | 743 | ||
| 744 | if (mempool_resize(act->adapter->pool.status_read_req, | 744 | if (mempool_resize(act->adapter->pool.status_read_req, |
| 745 | act->adapter->stat_read_buf_num, GFP_KERNEL)) | 745 | act->adapter->stat_read_buf_num)) |
| 746 | return ZFCP_ERP_FAILED; | 746 | return ZFCP_ERP_FAILED; |
| 747 | 747 | ||
| 748 | atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num); | 748 | atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num); |
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h index a260e99a4447..d72605864b0a 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h | |||
| @@ -55,7 +55,9 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
| 55 | if (PagePrivate(page)) | 55 | if (PagePrivate(page)) |
| 56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); | 56 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 57 | 57 | ||
| 58 | cancel_dirty_page(page, PAGE_SIZE); | 58 | if (TestClearPageDirty(page)) |
| 59 | account_page_cleaned(page, mapping); | ||
| 60 | |||
| 59 | ClearPageMappedToDisk(page); | 61 | ClearPageMappedToDisk(page); |
| 60 | ll_delete_from_page_cache(page); | 62 | ll_delete_from_page_cache(page); |
| 61 | } | 63 | } |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 8a65423bc696..c4211a31612d 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c | |||
| @@ -397,13 +397,15 @@ static int __init xen_tmem_init(void) | |||
| 397 | #ifdef CONFIG_CLEANCACHE | 397 | #ifdef CONFIG_CLEANCACHE |
| 398 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | 398 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); |
| 399 | if (tmem_enabled && cleancache) { | 399 | if (tmem_enabled && cleancache) { |
| 400 | char *s = ""; | 400 | int err; |
| 401 | struct cleancache_ops *old_ops = | 401 | |
| 402 | cleancache_register_ops(&tmem_cleancache_ops); | 402 | err = cleancache_register_ops(&tmem_cleancache_ops); |
| 403 | if (old_ops) | 403 | if (err) |
| 404 | s = " (WARNING: cleancache_ops overridden)"; | 404 | pr_warn("xen-tmem: failed to enable cleancache: %d\n", |
| 405 | pr_info("cleancache enabled, RAM provided by Xen Transcendent Memory%s\n", | 405 | err); |
| 406 | s); | 406 | else |
| 407 | pr_info("cleancache enabled, RAM provided by " | ||
| 408 | "Xen Transcendent Memory\n"); | ||
| 407 | } | 409 | } |
| 408 | #endif | 410 | #endif |
| 409 | #ifdef CONFIG_XEN_SELFBALLOONING | 411 | #ifdef CONFIG_XEN_SELFBALLOONING |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 270c48148f79..2d0cbbd14cfc 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
| @@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF | |||
| 27 | bool | 27 | bool |
| 28 | depends on COMPAT && BINFMT_ELF | 28 | depends on COMPAT && BINFMT_ELF |
| 29 | 29 | ||
| 30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
| 31 | bool | ||
| 32 | |||
| 33 | config ARCH_BINFMT_ELF_STATE | 30 | config ARCH_BINFMT_ELF_STATE |
| 34 | bool | 31 | bool |
| 35 | 32 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 995986b8e36b..241ef68d2893 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
| 32 | #include <linux/random.h> | 32 | #include <linux/random.h> |
| 33 | #include <linux/elf.h> | 33 | #include <linux/elf.h> |
| 34 | #include <linux/elf-randomize.h> | ||
| 34 | #include <linux/utsname.h> | 35 | #include <linux/utsname.h> |
| 35 | #include <linux/coredump.h> | 36 | #include <linux/coredump.h> |
| 36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
| @@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 862 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 863 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
| 863 | int elf_prot = 0, elf_flags; | 864 | int elf_prot = 0, elf_flags; |
| 864 | unsigned long k, vaddr; | 865 | unsigned long k, vaddr; |
| 866 | unsigned long total_size = 0; | ||
| 865 | 867 | ||
| 866 | if (elf_ppnt->p_type != PT_LOAD) | 868 | if (elf_ppnt->p_type != PT_LOAD) |
| 867 | continue; | 869 | continue; |
| @@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 909 | * default mmap base, as well as whatever program they | 911 | * default mmap base, as well as whatever program they |
| 910 | * might try to exec. This is because the brk will | 912 | * might try to exec. This is because the brk will |
| 911 | * follow the loader, and is not movable. */ | 913 | * follow the loader, and is not movable. */ |
| 912 | #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE | 914 | load_bias = ELF_ET_DYN_BASE - vaddr; |
| 913 | /* Memory randomization might have been switched off | ||
| 914 | * in runtime via sysctl or explicit setting of | ||
| 915 | * personality flags. | ||
| 916 | * If that is the case, retain the original non-zero | ||
| 917 | * load_bias value in order to establish proper | ||
| 918 | * non-randomized mappings. | ||
| 919 | */ | ||
| 920 | if (current->flags & PF_RANDOMIZE) | 915 | if (current->flags & PF_RANDOMIZE) |
| 921 | load_bias = 0; | 916 | load_bias += arch_mmap_rnd(); |
| 922 | else | 917 | load_bias = ELF_PAGESTART(load_bias); |
| 923 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 918 | total_size = total_mapping_size(elf_phdata, |
| 924 | #else | 919 | loc->elf_ex.e_phnum); |
| 925 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 920 | if (!total_size) { |
| 926 | #endif | 921 | error = -EINVAL; |
| 922 | goto out_free_dentry; | ||
| 923 | } | ||
| 927 | } | 924 | } |
| 928 | 925 | ||
| 929 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, | 926 | error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, |
| 930 | elf_prot, elf_flags, 0); | 927 | elf_prot, elf_flags, total_size); |
| 931 | if (BAD_ADDR(error)) { | 928 | if (BAD_ADDR(error)) { |
| 932 | retval = IS_ERR((void *)error) ? | 929 | retval = IS_ERR((void *)error) ? |
| 933 | PTR_ERR((void*)error) : -EINVAL; | 930 | PTR_ERR((void*)error) : -EINVAL; |
| @@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
| 1053 | current->mm->end_data = end_data; | 1050 | current->mm->end_data = end_data; |
| 1054 | current->mm->start_stack = bprm->p; | 1051 | current->mm->start_stack = bprm->p; |
| 1055 | 1052 | ||
| 1056 | #ifdef arch_randomize_brk | ||
| 1057 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { | 1053 | if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { |
| 1058 | current->mm->brk = current->mm->start_brk = | 1054 | current->mm->brk = current->mm->start_brk = |
| 1059 | arch_randomize_brk(current->mm); | 1055 | arch_randomize_brk(current->mm); |
| 1060 | #ifdef CONFIG_COMPAT_BRK | 1056 | #ifdef compat_brk_randomized |
| 1061 | current->brk_randomized = 1; | 1057 | current->brk_randomized = 1; |
| 1062 | #endif | 1058 | #endif |
| 1063 | } | 1059 | } |
| 1064 | #endif | ||
| 1065 | 1060 | ||
| 1066 | if (current->personality & MMAP_PAGE_ZERO) { | 1061 | if (current->personality & MMAP_PAGE_ZERO) { |
| 1067 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, | 1062 | /* Why this, you ask??? Well SVr4 maps page 0 as read-only, |
diff --git a/fs/buffer.c b/fs/buffer.c index 20805db2c987..c7a5602d01ee 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page) | |||
| 3243 | * to synchronise against __set_page_dirty_buffers and prevent the | 3243 | * to synchronise against __set_page_dirty_buffers and prevent the |
| 3244 | * dirty bit from being lost. | 3244 | * dirty bit from being lost. |
| 3245 | */ | 3245 | */ |
| 3246 | if (ret) | 3246 | if (ret && TestClearPageDirty(page)) |
| 3247 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 3247 | account_page_cleaned(page, mapping); |
| 3248 | spin_unlock(&mapping->private_lock); | 3248 | spin_unlock(&mapping->private_lock); |
| 3249 | out: | 3249 | out: |
| 3250 | if (buffers_to_free) { | 3250 | if (buffers_to_free) { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 480cf9c81d50..f3bfe08e177b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) | |||
| 773 | 773 | ||
| 774 | length = atomic_dec_return(&tcpSesAllocCount); | 774 | length = atomic_dec_return(&tcpSesAllocCount); |
| 775 | if (length > 0) | 775 | if (length > 0) |
| 776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 776 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
| 777 | GFP_KERNEL); | ||
| 778 | } | 777 | } |
| 779 | 778 | ||
| 780 | static int | 779 | static int |
| @@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p) | |||
| 848 | 847 | ||
| 849 | length = atomic_inc_return(&tcpSesAllocCount); | 848 | length = atomic_inc_return(&tcpSesAllocCount); |
| 850 | if (length > 1) | 849 | if (length > 1) |
| 851 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | 850 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv); |
| 852 | GFP_KERNEL); | ||
| 853 | 851 | ||
| 854 | set_freezable(); | 852 | set_freezable(); |
| 855 | while (server->tcpStatus != CifsExiting) { | 853 | while (server->tcpStatus != CifsExiting) { |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c274aca8e8dc..db76cec3ce21 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -319,7 +319,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, | |||
| 319 | 319 | ||
| 320 | static void truncate_huge_page(struct page *page) | 320 | static void truncate_huge_page(struct page *page) |
| 321 | { | 321 | { |
| 322 | cancel_dirty_page(page, /* No IO accounting for huge pages? */0); | 322 | ClearPageDirty(page); |
| 323 | ClearPageUptodate(page); | 323 | ClearPageUptodate(page); |
| 324 | delete_from_page_cache(page); | 324 | delete_from_page_cache(page); |
| 325 | } | 325 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..759931088094 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
| 1876 | * request from the inode / page_private pointer and | 1876 | * request from the inode / page_private pointer and |
| 1877 | * release it */ | 1877 | * release it */ |
| 1878 | nfs_inode_remove_request(req); | 1878 | nfs_inode_remove_request(req); |
| 1879 | /* | ||
| 1880 | * In case nfs_inode_remove_request has marked the | ||
| 1881 | * page as being dirty | ||
| 1882 | */ | ||
| 1883 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | ||
| 1884 | nfs_unlock_and_release_request(req); | 1879 | nfs_unlock_and_release_request(req); |
| 1885 | } | 1880 | } |
| 1886 | 1881 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 044158bd22be..2d7f76e52c37 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
| 3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); | 3370 | ret = ocfs2_get_right_path(et, left_path, &right_path); |
| 3371 | if (ret) { | 3371 | if (ret) { |
| 3372 | mlog_errno(ret); | 3372 | mlog_errno(ret); |
| 3373 | goto out; | 3373 | return ret; |
| 3374 | } | 3374 | } |
| 3375 | 3375 | ||
| 3376 | right_el = path_leaf_el(right_path); | 3376 | right_el = path_leaf_el(right_path); |
| @@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, | |||
| 3453 | subtree_index); | 3453 | subtree_index); |
| 3454 | } | 3454 | } |
| 3455 | out: | 3455 | out: |
| 3456 | if (right_path) | 3456 | ocfs2_free_path(right_path); |
| 3457 | ocfs2_free_path(right_path); | ||
| 3458 | return ret; | 3457 | return ret; |
| 3459 | } | 3458 | } |
| 3460 | 3459 | ||
| @@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
| 3536 | ret = ocfs2_get_left_path(et, right_path, &left_path); | 3535 | ret = ocfs2_get_left_path(et, right_path, &left_path); |
| 3537 | if (ret) { | 3536 | if (ret) { |
| 3538 | mlog_errno(ret); | 3537 | mlog_errno(ret); |
| 3539 | goto out; | 3538 | return ret; |
| 3540 | } | 3539 | } |
| 3541 | 3540 | ||
| 3542 | left_el = path_leaf_el(left_path); | 3541 | left_el = path_leaf_el(left_path); |
| @@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
| 3647 | right_path, subtree_index); | 3646 | right_path, subtree_index); |
| 3648 | } | 3647 | } |
| 3649 | out: | 3648 | out: |
| 3650 | if (left_path) | 3649 | ocfs2_free_path(left_path); |
| 3651 | ocfs2_free_path(left_path); | ||
| 3652 | return ret; | 3650 | return ret; |
| 3653 | } | 3651 | } |
| 3654 | 3652 | ||
| @@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4334 | } else if (path->p_tree_depth > 0) { | 4332 | } else if (path->p_tree_depth > 0) { |
| 4335 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); | 4333 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); |
| 4336 | if (status) | 4334 | if (status) |
| 4337 | goto out; | 4335 | goto exit; |
| 4338 | 4336 | ||
| 4339 | if (left_cpos != 0) { | 4337 | if (left_cpos != 0) { |
| 4340 | left_path = ocfs2_new_path_from_path(path); | 4338 | left_path = ocfs2_new_path_from_path(path); |
| 4341 | if (!left_path) | 4339 | if (!left_path) |
| 4342 | goto out; | 4340 | goto exit; |
| 4343 | 4341 | ||
| 4344 | status = ocfs2_find_path(et->et_ci, left_path, | 4342 | status = ocfs2_find_path(et->et_ci, left_path, |
| 4345 | left_cpos); | 4343 | left_cpos); |
| 4346 | if (status) | 4344 | if (status) |
| 4347 | goto out; | 4345 | goto free_left_path; |
| 4348 | 4346 | ||
| 4349 | new_el = path_leaf_el(left_path); | 4347 | new_el = path_leaf_el(left_path); |
| 4350 | 4348 | ||
| @@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4361 | le16_to_cpu(new_el->l_next_free_rec), | 4359 | le16_to_cpu(new_el->l_next_free_rec), |
| 4362 | le16_to_cpu(new_el->l_count)); | 4360 | le16_to_cpu(new_el->l_count)); |
| 4363 | status = -EINVAL; | 4361 | status = -EINVAL; |
| 4364 | goto out; | 4362 | goto free_left_path; |
| 4365 | } | 4363 | } |
| 4366 | rec = &new_el->l_recs[ | 4364 | rec = &new_el->l_recs[ |
| 4367 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | 4365 | le16_to_cpu(new_el->l_next_free_rec) - 1]; |
| @@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4388 | path->p_tree_depth > 0) { | 4386 | path->p_tree_depth > 0) { |
| 4389 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); | 4387 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); |
| 4390 | if (status) | 4388 | if (status) |
| 4391 | goto out; | 4389 | goto free_left_path; |
| 4392 | 4390 | ||
| 4393 | if (right_cpos == 0) | 4391 | if (right_cpos == 0) |
| 4394 | goto out; | 4392 | goto free_left_path; |
| 4395 | 4393 | ||
| 4396 | right_path = ocfs2_new_path_from_path(path); | 4394 | right_path = ocfs2_new_path_from_path(path); |
| 4397 | if (!right_path) | 4395 | if (!right_path) |
| 4398 | goto out; | 4396 | goto free_left_path; |
| 4399 | 4397 | ||
| 4400 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); | 4398 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
| 4401 | if (status) | 4399 | if (status) |
| 4402 | goto out; | 4400 | goto free_right_path; |
| 4403 | 4401 | ||
| 4404 | new_el = path_leaf_el(right_path); | 4402 | new_el = path_leaf_el(right_path); |
| 4405 | rec = &new_el->l_recs[0]; | 4403 | rec = &new_el->l_recs[0]; |
| @@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4413 | (unsigned long long)le64_to_cpu(eb->h_blkno), | 4411 | (unsigned long long)le64_to_cpu(eb->h_blkno), |
| 4414 | le16_to_cpu(new_el->l_next_free_rec)); | 4412 | le16_to_cpu(new_el->l_next_free_rec)); |
| 4415 | status = -EINVAL; | 4413 | status = -EINVAL; |
| 4416 | goto out; | 4414 | goto free_right_path; |
| 4417 | } | 4415 | } |
| 4418 | rec = &new_el->l_recs[1]; | 4416 | rec = &new_el->l_recs[1]; |
| 4419 | } | 4417 | } |
| @@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, | |||
| 4430 | ret = contig_type; | 4428 | ret = contig_type; |
| 4431 | } | 4429 | } |
| 4432 | 4430 | ||
| 4433 | out: | 4431 | free_right_path: |
| 4434 | if (left_path) | 4432 | ocfs2_free_path(right_path); |
| 4435 | ocfs2_free_path(left_path); | 4433 | free_left_path: |
| 4436 | if (right_path) | 4434 | ocfs2_free_path(left_path); |
| 4437 | ocfs2_free_path(right_path); | 4435 | exit: |
| 4438 | |||
| 4439 | return ret; | 4436 | return ret; |
| 4440 | } | 4437 | } |
| 4441 | 4438 | ||
| @@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6858 | if (pages == NULL) { | 6855 | if (pages == NULL) { |
| 6859 | ret = -ENOMEM; | 6856 | ret = -ENOMEM; |
| 6860 | mlog_errno(ret); | 6857 | mlog_errno(ret); |
| 6861 | goto out; | 6858 | return ret; |
| 6862 | } | 6859 | } |
| 6863 | 6860 | ||
| 6864 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | 6861 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); |
| 6865 | if (ret) { | 6862 | if (ret) { |
| 6866 | mlog_errno(ret); | 6863 | mlog_errno(ret); |
| 6867 | goto out; | 6864 | goto free_pages; |
| 6868 | } | 6865 | } |
| 6869 | } | 6866 | } |
| 6870 | 6867 | ||
| @@ -6996,9 +6993,8 @@ out_commit: | |||
| 6996 | out: | 6993 | out: |
| 6997 | if (data_ac) | 6994 | if (data_ac) |
| 6998 | ocfs2_free_alloc_context(data_ac); | 6995 | ocfs2_free_alloc_context(data_ac); |
| 6999 | if (pages) | 6996 | free_pages: |
| 7000 | kfree(pages); | 6997 | kfree(pages); |
| 7001 | |||
| 7002 | return ret; | 6998 | return ret; |
| 7003 | } | 6999 | } |
| 7004 | 7000 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e1bf18c5d25e..8d2bc840c288 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -664,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb, | |||
| 664 | return 0; | 664 | return 0; |
| 665 | } | 665 | } |
| 666 | 666 | ||
| 667 | static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, | ||
| 668 | struct inode *inode, loff_t offset, | ||
| 669 | u64 zero_len, int cluster_align) | ||
| 670 | { | ||
| 671 | u32 p_cpos = 0; | ||
| 672 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
| 673 | unsigned int num_clusters = 0; | ||
| 674 | unsigned int ext_flags = 0; | ||
| 675 | int ret = 0; | ||
| 676 | |||
| 677 | if (offset <= i_size_read(inode) || cluster_align) | ||
| 678 | return 0; | ||
| 679 | |||
| 680 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
| 681 | &ext_flags); | ||
| 682 | if (ret < 0) { | ||
| 683 | mlog_errno(ret); | ||
| 684 | return ret; | ||
| 685 | } | ||
| 686 | |||
| 687 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
| 688 | u64 s = i_size_read(inode); | ||
| 689 | sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + | ||
| 690 | (do_div(s, osb->s_clustersize) >> 9); | ||
| 691 | |||
| 692 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, | ||
| 693 | zero_len >> 9, GFP_NOFS, false); | ||
| 694 | if (ret < 0) | ||
| 695 | mlog_errno(ret); | ||
| 696 | } | ||
| 697 | |||
| 698 | return ret; | ||
| 699 | } | ||
| 700 | |||
| 701 | static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, | ||
| 702 | struct inode *inode, loff_t offset) | ||
| 703 | { | ||
| 704 | u64 zero_start, zero_len, total_zero_len; | ||
| 705 | u32 p_cpos = 0, clusters_to_add; | ||
| 706 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
| 707 | unsigned int num_clusters = 0; | ||
| 708 | unsigned int ext_flags = 0; | ||
| 709 | u32 size_div, offset_div; | ||
| 710 | int ret = 0; | ||
| 711 | |||
| 712 | { | ||
| 713 | u64 o = offset; | ||
| 714 | u64 s = i_size_read(inode); | ||
| 715 | |||
| 716 | offset_div = do_div(o, osb->s_clustersize); | ||
| 717 | size_div = do_div(s, osb->s_clustersize); | ||
| 718 | } | ||
| 719 | |||
| 720 | if (offset <= i_size_read(inode)) | ||
| 721 | return 0; | ||
| 722 | |||
| 723 | clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - | ||
| 724 | ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); | ||
| 725 | total_zero_len = offset - i_size_read(inode); | ||
| 726 | if (clusters_to_add) | ||
| 727 | total_zero_len -= offset_div; | ||
| 728 | |||
| 729 | /* Allocate clusters to fill out holes, and this is only needed | ||
| 730 | * when we add more than one clusters. Otherwise the cluster will | ||
| 731 | * be allocated during direct IO */ | ||
| 732 | if (clusters_to_add > 1) { | ||
| 733 | ret = ocfs2_extend_allocation(inode, | ||
| 734 | OCFS2_I(inode)->ip_clusters, | ||
| 735 | clusters_to_add - 1, 0); | ||
| 736 | if (ret) { | ||
| 737 | mlog_errno(ret); | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | } | ||
| 741 | |||
| 742 | while (total_zero_len) { | ||
| 743 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
| 744 | &ext_flags); | ||
| 745 | if (ret < 0) { | ||
| 746 | mlog_errno(ret); | ||
| 747 | goto out; | ||
| 748 | } | ||
| 749 | |||
| 750 | zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + | ||
| 751 | size_div; | ||
| 752 | zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - | ||
| 753 | size_div; | ||
| 754 | zero_len = min(total_zero_len, zero_len); | ||
| 755 | |||
| 756 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
| 757 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
| 758 | zero_start >> 9, zero_len >> 9, | ||
| 759 | GFP_NOFS, false); | ||
| 760 | if (ret < 0) { | ||
| 761 | mlog_errno(ret); | ||
| 762 | goto out; | ||
| 763 | } | ||
| 764 | } | ||
| 765 | |||
| 766 | total_zero_len -= zero_len; | ||
| 767 | v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); | ||
| 768 | |||
| 769 | /* Only at first iteration can be cluster not aligned. | ||
| 770 | * So set size_div to 0 for the rest */ | ||
| 771 | size_div = 0; | ||
| 772 | } | ||
| 773 | |||
| 774 | out: | ||
| 775 | return ret; | ||
| 776 | } | ||
| 777 | |||
| 667 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | 778 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, |
| 668 | struct iov_iter *iter, | 779 | struct iov_iter *iter, |
| 669 | loff_t offset) | 780 | loff_t offset) |
| @@ -678,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 678 | struct buffer_head *di_bh = NULL; | 789 | struct buffer_head *di_bh = NULL; |
| 679 | size_t count = iter->count; | 790 | size_t count = iter->count; |
| 680 | journal_t *journal = osb->journal->j_journal; | 791 | journal_t *journal = osb->journal->j_journal; |
| 681 | u32 zero_len; | 792 | u64 zero_len_head, zero_len_tail; |
| 682 | int cluster_align; | 793 | int cluster_align_head, cluster_align_tail; |
| 683 | loff_t final_size = offset + count; | 794 | loff_t final_size = offset + count; |
| 684 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | 795 | int append_write = offset >= i_size_read(inode) ? 1 : 0; |
| 685 | unsigned int num_clusters = 0; | 796 | unsigned int num_clusters = 0; |
| @@ -687,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 687 | 798 | ||
| 688 | { | 799 | { |
| 689 | u64 o = offset; | 800 | u64 o = offset; |
| 801 | u64 s = i_size_read(inode); | ||
| 802 | |||
| 803 | zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); | ||
| 804 | cluster_align_head = !zero_len_head; | ||
| 690 | 805 | ||
| 691 | zero_len = do_div(o, 1 << osb->s_clustersize_bits); | 806 | zero_len_tail = osb->s_clustersize - |
| 692 | cluster_align = !zero_len; | 807 | do_div(s, osb->s_clustersize); |
| 808 | if ((offset - i_size_read(inode)) < zero_len_tail) | ||
| 809 | zero_len_tail = offset - i_size_read(inode); | ||
| 810 | cluster_align_tail = !zero_len_tail; | ||
| 693 | } | 811 | } |
| 694 | 812 | ||
| 695 | /* | 813 | /* |
| @@ -707,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 707 | } | 825 | } |
| 708 | 826 | ||
| 709 | if (append_write) { | 827 | if (append_write) { |
| 710 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 828 | ret = ocfs2_inode_lock(inode, NULL, 1); |
| 711 | if (ret < 0) { | 829 | if (ret < 0) { |
| 712 | mlog_errno(ret); | 830 | mlog_errno(ret); |
| 713 | goto clean_orphan; | 831 | goto clean_orphan; |
| 714 | } | 832 | } |
| 715 | 833 | ||
| 834 | /* zeroing out the previously allocated cluster tail | ||
| 835 | * that but not zeroed */ | ||
| 716 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 836 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
| 717 | ret = ocfs2_zero_extend(inode, di_bh, offset); | 837 | ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, |
| 838 | zero_len_tail, cluster_align_tail); | ||
| 718 | else | 839 | else |
| 719 | ret = ocfs2_extend_no_holes(inode, di_bh, offset, | 840 | ret = ocfs2_direct_IO_extend_no_holes(osb, inode, |
| 720 | offset); | 841 | offset); |
| 721 | if (ret < 0) { | 842 | if (ret < 0) { |
| 722 | mlog_errno(ret); | 843 | mlog_errno(ret); |
| 723 | ocfs2_inode_unlock(inode, 1); | 844 | ocfs2_inode_unlock(inode, 1); |
| 724 | brelse(di_bh); | ||
| 725 | goto clean_orphan; | 845 | goto clean_orphan; |
| 726 | } | 846 | } |
| 727 | 847 | ||
| @@ -729,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 729 | if (is_overwrite < 0) { | 849 | if (is_overwrite < 0) { |
| 730 | mlog_errno(is_overwrite); | 850 | mlog_errno(is_overwrite); |
| 731 | ocfs2_inode_unlock(inode, 1); | 851 | ocfs2_inode_unlock(inode, 1); |
| 732 | brelse(di_bh); | ||
| 733 | goto clean_orphan; | 852 | goto clean_orphan; |
| 734 | } | 853 | } |
| 735 | 854 | ||
| 736 | ocfs2_inode_unlock(inode, 1); | 855 | ocfs2_inode_unlock(inode, 1); |
| 737 | brelse(di_bh); | ||
| 738 | di_bh = NULL; | ||
| 739 | } | 856 | } |
| 740 | 857 | ||
| 741 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, | 858 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, |
| @@ -772,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 772 | if (ret < 0) | 889 | if (ret < 0) |
| 773 | mlog_errno(ret); | 890 | mlog_errno(ret); |
| 774 | } | 891 | } |
| 775 | } else if (written < 0 && append_write && !is_overwrite && | 892 | } else if (written > 0 && append_write && !is_overwrite && |
| 776 | !cluster_align) { | 893 | !cluster_align_head) { |
| 894 | /* zeroing out the allocated cluster head */ | ||
| 777 | u32 p_cpos = 0; | 895 | u32 p_cpos = 0; |
| 778 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | 896 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); |
| 779 | 897 | ||
| 898 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
| 899 | if (ret < 0) { | ||
| 900 | mlog_errno(ret); | ||
| 901 | goto clean_orphan; | ||
| 902 | } | ||
| 903 | |||
| 780 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | 904 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, |
| 781 | &num_clusters, &ext_flags); | 905 | &num_clusters, &ext_flags); |
| 782 | if (ret < 0) { | 906 | if (ret < 0) { |
| 783 | mlog_errno(ret); | 907 | mlog_errno(ret); |
| 908 | ocfs2_inode_unlock(inode, 0); | ||
| 784 | goto clean_orphan; | 909 | goto clean_orphan; |
| 785 | } | 910 | } |
| 786 | 911 | ||
| @@ -788,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | |||
| 788 | 913 | ||
| 789 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | 914 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, |
| 790 | p_cpos << (osb->s_clustersize_bits - 9), | 915 | p_cpos << (osb->s_clustersize_bits - 9), |
| 791 | zero_len >> 9, GFP_KERNEL, false); | 916 | zero_len_head >> 9, GFP_NOFS, false); |
| 792 | if (ret < 0) | 917 | if (ret < 0) |
| 793 | mlog_errno(ret); | 918 | mlog_errno(ret); |
| 919 | |||
| 920 | ocfs2_inode_unlock(inode, 0); | ||
| 794 | } | 921 | } |
| 795 | 922 | ||
| 796 | clean_orphan: | 923 | clean_orphan: |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..8e19b9d7aba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void) | |||
| 1312 | int ret = -ENOMEM; | 1312 | int ret = -ENOMEM; |
| 1313 | 1313 | ||
| 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); |
| 1315 | if (!o2hb_debug_dir) { | 1315 | if (IS_ERR_OR_NULL(o2hb_debug_dir)) { |
| 1316 | ret = o2hb_debug_dir ? | ||
| 1317 | PTR_ERR(o2hb_debug_dir) : -ENOMEM; | ||
| 1316 | mlog_errno(ret); | 1318 | mlog_errno(ret); |
| 1317 | goto bail; | 1319 | goto bail; |
| 1318 | } | 1320 | } |
| @@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void) | |||
| 1325 | sizeof(o2hb_live_node_bitmap), | 1327 | sizeof(o2hb_live_node_bitmap), |
| 1326 | O2NM_MAX_NODES, | 1328 | O2NM_MAX_NODES, |
| 1327 | o2hb_live_node_bitmap); | 1329 | o2hb_live_node_bitmap); |
| 1328 | if (!o2hb_debug_livenodes) { | 1330 | if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { |
| 1331 | ret = o2hb_debug_livenodes ? | ||
| 1332 | PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; | ||
| 1329 | mlog_errno(ret); | 1333 | mlog_errno(ret); |
| 1330 | goto bail; | 1334 | goto bail; |
| 1331 | } | 1335 | } |
| @@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void) | |||
| 1338 | sizeof(o2hb_live_region_bitmap), | 1342 | sizeof(o2hb_live_region_bitmap), |
| 1339 | O2NM_MAX_REGIONS, | 1343 | O2NM_MAX_REGIONS, |
| 1340 | o2hb_live_region_bitmap); | 1344 | o2hb_live_region_bitmap); |
| 1341 | if (!o2hb_debug_liveregions) { | 1345 | if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { |
| 1346 | ret = o2hb_debug_liveregions ? | ||
| 1347 | PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; | ||
| 1342 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
| 1343 | goto bail; | 1349 | goto bail; |
| 1344 | } | 1350 | } |
| @@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void) | |||
| 1352 | sizeof(o2hb_quorum_region_bitmap), | 1358 | sizeof(o2hb_quorum_region_bitmap), |
| 1353 | O2NM_MAX_REGIONS, | 1359 | O2NM_MAX_REGIONS, |
| 1354 | o2hb_quorum_region_bitmap); | 1360 | o2hb_quorum_region_bitmap); |
| 1355 | if (!o2hb_debug_quorumregions) { | 1361 | if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { |
| 1362 | ret = o2hb_debug_quorumregions ? | ||
| 1363 | PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; | ||
| 1356 | mlog_errno(ret); | 1364 | mlog_errno(ret); |
| 1357 | goto bail; | 1365 | goto bail; |
| 1358 | } | 1366 | } |
| @@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void) | |||
| 1366 | sizeof(o2hb_failed_region_bitmap), | 1374 | sizeof(o2hb_failed_region_bitmap), |
| 1367 | O2NM_MAX_REGIONS, | 1375 | O2NM_MAX_REGIONS, |
| 1368 | o2hb_failed_region_bitmap); | 1376 | o2hb_failed_region_bitmap); |
| 1369 | if (!o2hb_debug_failedregions) { | 1377 | if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { |
| 1378 | ret = o2hb_debug_failedregions ? | ||
| 1379 | PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; | ||
| 1370 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
| 1371 | goto bail; | 1381 | goto bail; |
| 1372 | } | 1382 | } |
| @@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2000 | 2010 | ||
| 2001 | reg->hr_debug_dir = | 2011 | reg->hr_debug_dir = |
| 2002 | debugfs_create_dir(config_item_name(®->hr_item), dir); | 2012 | debugfs_create_dir(config_item_name(®->hr_item), dir); |
| 2003 | if (!reg->hr_debug_dir) { | 2013 | if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { |
| 2014 | ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; | ||
| 2004 | mlog_errno(ret); | 2015 | mlog_errno(ret); |
| 2005 | goto bail; | 2016 | goto bail; |
| 2006 | } | 2017 | } |
| @@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2013 | O2HB_DB_TYPE_REGION_LIVENODES, | 2024 | O2HB_DB_TYPE_REGION_LIVENODES, |
| 2014 | sizeof(reg->hr_live_node_bitmap), | 2025 | sizeof(reg->hr_live_node_bitmap), |
| 2015 | O2NM_MAX_NODES, reg); | 2026 | O2NM_MAX_NODES, reg); |
| 2016 | if (!reg->hr_debug_livenodes) { | 2027 | if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { |
| 2028 | ret = reg->hr_debug_livenodes ? | ||
| 2029 | PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; | ||
| 2017 | mlog_errno(ret); | 2030 | mlog_errno(ret); |
| 2018 | goto bail; | 2031 | goto bail; |
| 2019 | } | 2032 | } |
| @@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2025 | sizeof(*(reg->hr_db_regnum)), | 2038 | sizeof(*(reg->hr_db_regnum)), |
| 2026 | O2HB_DB_TYPE_REGION_NUMBER, | 2039 | O2HB_DB_TYPE_REGION_NUMBER, |
| 2027 | 0, O2NM_MAX_NODES, reg); | 2040 | 0, O2NM_MAX_NODES, reg); |
| 2028 | if (!reg->hr_debug_regnum) { | 2041 | if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { |
| 2042 | ret = reg->hr_debug_regnum ? | ||
| 2043 | PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; | ||
| 2029 | mlog_errno(ret); | 2044 | mlog_errno(ret); |
| 2030 | goto bail; | 2045 | goto bail; |
| 2031 | } | 2046 | } |
| @@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2037 | sizeof(*(reg->hr_db_elapsed_time)), | 2052 | sizeof(*(reg->hr_db_elapsed_time)), |
| 2038 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | 2053 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, |
| 2039 | 0, 0, reg); | 2054 | 0, 0, reg); |
| 2040 | if (!reg->hr_debug_elapsed_time) { | 2055 | if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { |
| 2056 | ret = reg->hr_debug_elapsed_time ? | ||
| 2057 | PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; | ||
| 2041 | mlog_errno(ret); | 2058 | mlog_errno(ret); |
| 2042 | goto bail; | 2059 | goto bail; |
| 2043 | } | 2060 | } |
| @@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
| 2049 | sizeof(*(reg->hr_db_pinned)), | 2066 | sizeof(*(reg->hr_db_pinned)), |
| 2050 | O2HB_DB_TYPE_REGION_PINNED, | 2067 | O2HB_DB_TYPE_REGION_PINNED, |
| 2051 | 0, 0, reg); | 2068 | 0, 0, reg); |
| 2052 | if (!reg->hr_debug_pinned) { | 2069 | if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { |
| 2070 | ret = reg->hr_debug_pinned ? | ||
| 2071 | PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; | ||
| 2053 | mlog_errno(ret); | 2072 | mlog_errno(ret); |
| 2054 | goto bail; | 2073 | goto bail; |
| 2055 | } | 2074 | } |
| 2056 | 2075 | ||
| 2057 | ret = 0; | 2076 | return 0; |
| 2058 | bail: | 2077 | bail: |
| 2078 | debugfs_remove_recursive(reg->hr_debug_dir); | ||
| 2059 | return ret; | 2079 | return ret; |
| 2060 | } | 2080 | } |
| 2061 | 2081 | ||
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9e6508..7fdc25a4d8c0 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
| @@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; | |||
| 196 | } \ | 196 | } \ |
| 197 | } while (0) | 197 | } while (0) |
| 198 | 198 | ||
| 199 | #define mlog_errno(st) do { \ | 199 | #define mlog_errno(st) ({ \ |
| 200 | int _st = (st); \ | 200 | int _st = (st); \ |
| 201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ | 201 | if (_st != -ERESTARTSYS && _st != -EINTR && \ |
| 202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ | 202 | _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ |
| 203 | _st != -EDQUOT) \ | 203 | _st != -EDQUOT) \ |
| 204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ | 204 | mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ |
| 205 | } while (0) | 205 | _st; \ |
| 206 | }) | ||
| 206 | 207 | ||
| 207 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ | 208 | #define mlog_bug_on_msg(cond, fmt, args...) do { \ |
| 208 | if (cond) { \ | 209 | if (cond) { \ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b08050bd3f2e..ccd4dcfc3645 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | * | 18 | * |
| 19 | * linux/fs/minix/dir.c | 19 | * linux/fs/minix/dir.c |
| 20 | * | 20 | * |
| 21 | * Copyright (C) 1991, 1992 Linux Torvalds | 21 | * Copyright (C) 1991, 1992 Linus Torvalds |
| 22 | * | 22 | * |
| 23 | * This program is free software; you can redistribute it and/or | 23 | * This program is free software; you can redistribute it and/or |
| 24 | * modify it under the terms of the GNU General Public | 24 | * modify it under the terms of the GNU General Public |
| @@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
| 2047 | const char *name, | 2047 | const char *name, |
| 2048 | int namelen) | 2048 | int namelen) |
| 2049 | { | 2049 | { |
| 2050 | int ret; | 2050 | int ret = 0; |
| 2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 2051 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
| 2052 | 2052 | ||
| 2053 | trace_ocfs2_check_dir_for_entry( | 2053 | trace_ocfs2_check_dir_for_entry( |
| 2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 2054 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
| 2055 | 2055 | ||
| 2056 | ret = -EEXIST; | 2056 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { |
| 2057 | if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) | 2057 | ret = -EEXIST; |
| 2058 | goto bail; | 2058 | mlog_errno(ret); |
| 2059 | } | ||
| 2059 | 2060 | ||
| 2060 | ret = 0; | ||
| 2061 | bail: | ||
| 2062 | ocfs2_free_dir_lookup_result(&lookup); | 2061 | ocfs2_free_dir_lookup_result(&lookup); |
| 2063 | 2062 | ||
| 2064 | if (ret) | ||
| 2065 | mlog_errno(ret); | ||
| 2066 | return ret; | 2063 | return ret; |
| 2067 | } | 2064 | } |
| 2068 | 2065 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 11849a44dc5a..956edf67be20 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, | |||
| 1391 | int noqueue_attempted = 0; | 1391 | int noqueue_attempted = 0; |
| 1392 | int dlm_locked = 0; | 1392 | int dlm_locked = 0; |
| 1393 | 1393 | ||
| 1394 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { | ||
| 1395 | mlog_errno(-EINVAL); | ||
| 1396 | return -EINVAL; | ||
| 1397 | } | ||
| 1398 | |||
| 1394 | ocfs2_init_mask_waiter(&mw); | 1399 | ocfs2_init_mask_waiter(&mw); |
| 1395 | 1400 | ||
| 1396 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1401 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| @@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | |||
| 2954 | osb->osb_debug_root, | 2959 | osb->osb_debug_root, |
| 2955 | osb, | 2960 | osb, |
| 2956 | &ocfs2_dlm_debug_fops); | 2961 | &ocfs2_dlm_debug_fops); |
| 2957 | if (!dlm_debug->d_locking_state) { | 2962 | if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { |
| 2958 | ret = -EINVAL; | 2963 | ret = -EINVAL; |
| 2959 | mlog(ML_ERROR, | 2964 | mlog(ML_ERROR, |
| 2960 | "Unable to create locking state debugfs file.\n"); | 2965 | "Unable to create locking state debugfs file.\n"); |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 29651167190d..540dc4bdd042 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
| @@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | status = ocfs2_test_inode_bit(osb, blkno, &set); | 84 | status = ocfs2_test_inode_bit(osb, blkno, &set); |
| 85 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
| 86 | if (status < 0) { | 85 | if (status < 0) { |
| 87 | if (status == -EINVAL) { | 86 | if (status == -EINVAL) { |
| 88 | /* | 87 | /* |
| @@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
| 96 | goto unlock_nfs_sync; | 95 | goto unlock_nfs_sync; |
| 97 | } | 96 | } |
| 98 | 97 | ||
| 98 | trace_ocfs2_get_dentry_test_bit(status, set); | ||
| 99 | /* If the inode allocator bit is clear, this inode must be stale */ | 99 | /* If the inode allocator bit is clear, this inode must be stale */ |
| 100 | if (!set) { | 100 | if (!set) { |
| 101 | status = -ESTALE; | 101 | status = -ESTALE; |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3025c0da6b8a..be71ca0937f7 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
| 624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, | 624 | ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, |
| 625 | le16_to_cpu(di->i_suballoc_slot)); | 625 | le16_to_cpu(di->i_suballoc_slot)); |
| 626 | if (!inode_alloc_inode) { | 626 | if (!inode_alloc_inode) { |
| 627 | status = -EEXIST; | 627 | status = -ENOENT; |
| 628 | mlog_errno(status); | 628 | mlog_errno(status); |
| 629 | goto bail; | 629 | goto bail; |
| 630 | } | 630 | } |
| @@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 742 | ORPHAN_DIR_SYSTEM_INODE, | 742 | ORPHAN_DIR_SYSTEM_INODE, |
| 743 | orphaned_slot); | 743 | orphaned_slot); |
| 744 | if (!orphan_dir_inode) { | 744 | if (!orphan_dir_inode) { |
| 745 | status = -EEXIST; | 745 | status = -ENOENT; |
| 746 | mlog_errno(status); | 746 | mlog_errno(status); |
| 747 | goto bail; | 747 | goto bail; |
| 748 | } | 748 | } |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 044013455621..857bbbcd39f3 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != | 666 | if (le32_to_cpu(alloc->id1.bitmap1.i_used) != |
| 667 | ocfs2_local_alloc_count_bits(alloc)) { | 667 | ocfs2_local_alloc_count_bits(alloc)) { |
| 668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " | 668 | ocfs2_error(osb->sb, "local alloc inode %llu says it has " |
| 669 | "%u free bits, but a count shows %u", | 669 | "%u used bits, but a count shows %u", |
| 670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), | 670 | (unsigned long long)le64_to_cpu(alloc->i_blkno), |
| 671 | le32_to_cpu(alloc->id1.bitmap1.i_used), | 671 | le32_to_cpu(alloc->id1.bitmap1.i_used), |
| 672 | ocfs2_local_alloc_count_bits(alloc)); | 672 | ocfs2_local_alloc_count_bits(alloc)); |
| @@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
| 839 | u32 *numbits, | 839 | u32 *numbits, |
| 840 | struct ocfs2_alloc_reservation *resv) | 840 | struct ocfs2_alloc_reservation *resv) |
| 841 | { | 841 | { |
| 842 | int numfound, bitoff, left, startoff, lastzero; | 842 | int numfound = 0, bitoff, left, startoff, lastzero; |
| 843 | int local_resv = 0; | 843 | int local_resv = 0; |
| 844 | struct ocfs2_alloc_reservation r; | 844 | struct ocfs2_alloc_reservation r; |
| 845 | void *bitmap = NULL; | 845 | void *bitmap = NULL; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b5c3a5ea3ee6..09f90cbf0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
| 2322 | 2322 | ||
| 2323 | trace_ocfs2_orphan_del( | 2323 | trace_ocfs2_orphan_del( |
| 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
| 2325 | name, namelen); | 2325 | name, strlen(name)); |
| 2326 | 2326 | ||
| 2327 | /* find it's spot in the orphan directory */ | 2327 | /* find it's spot in the orphan directory */ |
| 2328 | status = ocfs2_find_entry(name, namelen, orphan_dir_inode, | 2328 | status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode, |
| 2329 | &lookup); | 2329 | &lookup); |
| 2330 | if (status) { | 2330 | if (status) { |
| 2331 | mlog_errno(status); | 2331 | mlog_errno(status); |
| @@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
| 2808 | ORPHAN_DIR_SYSTEM_INODE, | 2808 | ORPHAN_DIR_SYSTEM_INODE, |
| 2809 | osb->slot_num); | 2809 | osb->slot_num); |
| 2810 | if (!orphan_dir_inode) { | 2810 | if (!orphan_dir_inode) { |
| 2811 | status = -EEXIST; | 2811 | status = -ENOENT; |
| 2812 | mlog_errno(status); | 2812 | mlog_errno(status); |
| 2813 | goto leave; | 2813 | goto leave; |
| 2814 | } | 2814 | } |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ee541f92dab4..df3a500789c7 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); | 4276 | error = posix_acl_create(dir, &mode, &default_acl, &acl); |
| 4277 | if (error) { | 4277 | if (error) { |
| 4278 | mlog_errno(error); | 4278 | mlog_errno(error); |
| 4279 | goto out; | 4279 | return error; |
| 4280 | } | 4280 | } |
| 4281 | 4281 | ||
| 4282 | error = ocfs2_create_inode_in_orphan(dir, mode, | 4282 | error = ocfs2_create_inode_in_orphan(dir, mode, |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index d5493e361a38..e78a203d44c8 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 427 | if (!si) { | 427 | if (!si) { |
| 428 | status = -ENOMEM; | 428 | status = -ENOMEM; |
| 429 | mlog_errno(status); | 429 | mlog_errno(status); |
| 430 | goto bail; | 430 | return status; |
| 431 | } | 431 | } |
| 432 | 432 | ||
| 433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); | 433 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
| @@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 452 | 452 | ||
| 453 | osb->slot_info = (struct ocfs2_slot_info *)si; | 453 | osb->slot_info = (struct ocfs2_slot_info *)si; |
| 454 | bail: | 454 | bail: |
| 455 | if (status < 0 && si) | 455 | if (status < 0) |
| 456 | __ocfs2_free_slot_info(si); | 456 | __ocfs2_free_slot_info(si); |
| 457 | 457 | ||
| 458 | return status; | 458 | return status; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 1724d43d3da1..220cae7bbdbc 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -295,7 +295,7 @@ static int o2cb_cluster_check(void) | |||
| 295 | set_bit(node_num, netmap); | 295 | set_bit(node_num, netmap); |
| 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) | 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) |
| 297 | return 0; | 297 | return 0; |
| 298 | if (i < O2CB_MAP_STABILIZE_COUNT) | 298 | if (i < O2CB_MAP_STABILIZE_COUNT - 1) |
| 299 | msleep(1000); | 299 | msleep(1000); |
| 300 | } | 300 | } |
| 301 | 301 | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 720aa389e0ea..2768eb1da2b8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
| @@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 1004 | BUG_ON(conn == NULL); | 1004 | BUG_ON(conn == NULL); |
| 1005 | 1005 | ||
| 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | 1006 | lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); |
| 1007 | if (!lc) { | 1007 | if (!lc) |
| 1008 | rc = -ENOMEM; | 1008 | return -ENOMEM; |
| 1009 | goto out; | ||
| 1010 | } | ||
| 1011 | 1009 | ||
| 1012 | init_waitqueue_head(&lc->oc_wait); | 1010 | init_waitqueue_head(&lc->oc_wait); |
| 1013 | init_completion(&lc->oc_sync_wait); | 1011 | init_completion(&lc->oc_sync_wait); |
| @@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 1063 | } | 1061 | } |
| 1064 | 1062 | ||
| 1065 | out: | 1063 | out: |
| 1066 | if (rc && lc) | 1064 | if (rc) |
| 1067 | kfree(lc); | 1065 | kfree(lc); |
| 1068 | return rc; | 1066 | return rc; |
| 1069 | } | 1067 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 0cb889a17ae1..4479029630bb 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, | |||
| 2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 2499 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
| 2500 | if (status < 0) { | 2500 | if (status < 0) { |
| 2501 | mlog_errno(status); | 2501 | mlog_errno(status); |
| 2502 | ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, | ||
| 2503 | start_bit, count); | ||
| 2502 | goto bail; | 2504 | goto bail; |
| 2503 | } | 2505 | } |
| 2504 | 2506 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26675185b886..837ddce4b659 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1112 | 1112 | ||
| 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, | 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, |
| 1114 | ocfs2_debugfs_root); | 1114 | ocfs2_debugfs_root); |
| 1115 | if (!osb->osb_debug_root) { | 1115 | if (IS_ERR_OR_NULL(osb->osb_debug_root)) { |
| 1116 | status = -EINVAL; | 1116 | status = -EINVAL; |
| 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); | 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); |
| 1118 | goto read_super_error; | 1118 | goto read_super_error; |
| @@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1122 | osb->osb_debug_root, | 1122 | osb->osb_debug_root, |
| 1123 | osb, | 1123 | osb, |
| 1124 | &ocfs2_osb_debug_fops); | 1124 | &ocfs2_osb_debug_fops); |
| 1125 | if (!osb->osb_ctxt) { | 1125 | if (IS_ERR_OR_NULL(osb->osb_ctxt)) { |
| 1126 | status = -EINVAL; | 1126 | status = -EINVAL; |
| 1127 | mlog_errno(status); | 1127 | mlog_errno(status); |
| 1128 | goto read_super_error; | 1128 | goto read_super_error; |
| @@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void) | |||
| 1606 | } | 1606 | } |
| 1607 | 1607 | ||
| 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); | 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); |
| 1609 | if (!ocfs2_debugfs_root) { | 1609 | if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { |
| 1610 | status = -ENOMEM; | 1610 | status = ocfs2_debugfs_root ? |
| 1611 | PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; | ||
| 1611 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1612 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
| 1612 | goto out4; | 1613 | goto out4; |
| 1613 | } | 1614 | } |
| @@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2069 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); | 2070 | cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); |
| 2070 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); | 2071 | bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); |
| 2071 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); | 2072 | sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); |
| 2073 | memcpy(sb->s_uuid, di->id2.i_super.s_uuid, | ||
| 2074 | sizeof(di->id2.i_super.s_uuid)); | ||
| 2072 | 2075 | ||
| 2073 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; | 2076 | osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; |
| 2074 | 2077 | ||
| @@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2333 | mlog_errno(status); | 2336 | mlog_errno(status); |
| 2334 | goto bail; | 2337 | goto bail; |
| 2335 | } | 2338 | } |
| 2336 | cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); | 2339 | cleancache_init_shared_fs(sb); |
| 2337 | 2340 | ||
| 2338 | bail: | 2341 | bail: |
| 2339 | return status; | 2342 | return status; |
| @@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb) | |||
| 2563 | ocfs2_set_ro_flag(osb, 0); | 2566 | ocfs2_set_ro_flag(osb, 0); |
| 2564 | } | 2567 | } |
| 2565 | 2568 | ||
| 2566 | static char error_buf[1024]; | 2569 | void __ocfs2_error(struct super_block *sb, const char *function, |
| 2567 | 2570 | const char *fmt, ...) | |
| 2568 | void __ocfs2_error(struct super_block *sb, | ||
| 2569 | const char *function, | ||
| 2570 | const char *fmt, ...) | ||
| 2571 | { | 2571 | { |
| 2572 | struct va_format vaf; | ||
| 2572 | va_list args; | 2573 | va_list args; |
| 2573 | 2574 | ||
| 2574 | va_start(args, fmt); | 2575 | va_start(args, fmt); |
| 2575 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | 2576 | vaf.fmt = fmt; |
| 2576 | va_end(args); | 2577 | vaf.va = &args; |
| 2577 | 2578 | ||
| 2578 | /* Not using mlog here because we want to show the actual | 2579 | /* Not using mlog here because we want to show the actual |
| 2579 | * function the error came from. */ | 2580 | * function the error came from. */ |
| 2580 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", | 2581 | printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n", |
| 2581 | sb->s_id, function, error_buf); | 2582 | sb->s_id, function, &vaf); |
| 2583 | |||
| 2584 | va_end(args); | ||
| 2582 | 2585 | ||
| 2583 | ocfs2_handle_error(sb); | 2586 | ocfs2_handle_error(sb); |
| 2584 | } | 2587 | } |
| @@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb, | |||
| 2586 | /* Handle critical errors. This is intentionally more drastic than | 2589 | /* Handle critical errors. This is intentionally more drastic than |
| 2587 | * ocfs2_handle_error, so we only use for things like journal errors, | 2590 | * ocfs2_handle_error, so we only use for things like journal errors, |
| 2588 | * etc. */ | 2591 | * etc. */ |
| 2589 | void __ocfs2_abort(struct super_block* sb, | 2592 | void __ocfs2_abort(struct super_block *sb, const char *function, |
| 2590 | const char *function, | ||
| 2591 | const char *fmt, ...) | 2593 | const char *fmt, ...) |
| 2592 | { | 2594 | { |
| 2595 | struct va_format vaf; | ||
| 2593 | va_list args; | 2596 | va_list args; |
| 2594 | 2597 | ||
| 2595 | va_start(args, fmt); | 2598 | va_start(args, fmt); |
| 2596 | vsnprintf(error_buf, sizeof(error_buf), fmt, args); | ||
| 2597 | va_end(args); | ||
| 2598 | 2599 | ||
| 2599 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", | 2600 | vaf.fmt = fmt; |
| 2600 | sb->s_id, function, error_buf); | 2601 | vaf.va = &args; |
| 2602 | |||
| 2603 | printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n", | ||
| 2604 | sb->s_id, function, &vaf); | ||
| 2605 | |||
| 2606 | va_end(args); | ||
| 2601 | 2607 | ||
| 2602 | /* We don't have the cluster support yet to go straight to | 2608 | /* We don't have the cluster support yet to go straight to |
| 2603 | * hard readonly in here. Until then, we want to keep | 2609 | * hard readonly in here. Until then, we want to keep |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 85b190dc132f..4ca7533be479 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode, | |||
| 1238 | i, | 1238 | i, |
| 1239 | &block_off, | 1239 | &block_off, |
| 1240 | &name_offset); | 1240 | &name_offset); |
| 1241 | if (ret) { | ||
| 1242 | mlog_errno(ret); | ||
| 1243 | goto cleanup; | ||
| 1244 | } | ||
| 1241 | xs->base = bucket_block(xs->bucket, block_off); | 1245 | xs->base = bucket_block(xs->bucket, block_off); |
| 1242 | } | 1246 | } |
| 1243 | if (ocfs2_xattr_is_local(xs->here)) { | 1247 | if (ocfs2_xattr_is_local(xs->here)) { |
| @@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
| 5665 | 5669 | ||
| 5666 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, | 5670 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, |
| 5667 | i, &xv, NULL); | 5671 | i, &xv, NULL); |
| 5672 | if (ret) { | ||
| 5673 | mlog_errno(ret); | ||
| 5674 | break; | ||
| 5675 | } | ||
| 5668 | 5676 | ||
| 5669 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, | 5677 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, |
| 5670 | args->ref_ci, | 5678 | args->ref_ci, |
diff --git a/fs/super.c b/fs/super.c index 2b7dc90ccdbb..928c20f47af9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
| 224 | s->s_maxbytes = MAX_NON_LFS; | 224 | s->s_maxbytes = MAX_NON_LFS; |
| 225 | s->s_op = &default_op; | 225 | s->s_op = &default_op; |
| 226 | s->s_time_gran = 1000000000; | 226 | s->s_time_gran = 1000000000; |
| 227 | s->cleancache_poolid = -1; | 227 | s->cleancache_poolid = CLEANCACHE_NO_POOL; |
| 228 | 228 | ||
| 229 | s->s_shrink.seeks = DEFAULT_SEEKS; | 229 | s->s_shrink.seeks = DEFAULT_SEEKS; |
| 230 | s->s_shrink.scan_objects = super_cache_scan; | 230 | s->s_shrink.scan_objects = super_cache_scan; |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d46085c1b90..39f1d6a2b04d 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
| @@ -6,6 +6,12 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/mm_types.h> | 7 | #include <linux/mm_types.h> |
| 8 | #include <linux/bug.h> | 8 | #include <linux/bug.h> |
| 9 | #include <linux/errno.h> | ||
| 10 | |||
| 11 | #if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ | ||
| 12 | CONFIG_PGTABLE_LEVELS | ||
| 13 | #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED | ||
| 14 | #endif | ||
| 9 | 15 | ||
| 10 | /* | 16 | /* |
| 11 | * On almost all architectures and configurations, 0 can be used as the | 17 | * On almost all architectures and configurations, 0 can be used as the |
| @@ -691,6 +697,30 @@ static inline int pmd_protnone(pmd_t pmd) | |||
| 691 | 697 | ||
| 692 | #endif /* CONFIG_MMU */ | 698 | #endif /* CONFIG_MMU */ |
| 693 | 699 | ||
| 700 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
| 701 | int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); | ||
| 702 | int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); | ||
| 703 | int pud_clear_huge(pud_t *pud); | ||
| 704 | int pmd_clear_huge(pmd_t *pmd); | ||
| 705 | #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
| 706 | static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) | ||
| 707 | { | ||
| 708 | return 0; | ||
| 709 | } | ||
| 710 | static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) | ||
| 711 | { | ||
| 712 | return 0; | ||
| 713 | } | ||
| 714 | static inline int pud_clear_huge(pud_t *pud) | ||
| 715 | { | ||
| 716 | return 0; | ||
| 717 | } | ||
| 718 | static inline int pmd_clear_huge(pmd_t *pmd) | ||
| 719 | { | ||
| 720 | return 0; | ||
| 721 | } | ||
| 722 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
| 723 | |||
| 694 | #endif /* !__ASSEMBLY__ */ | 724 | #endif /* !__ASSEMBLY__ */ |
| 695 | 725 | ||
| 696 | #ifndef io_remap_pfn_range | 726 | #ifndef io_remap_pfn_range |
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 4ce9056b31a8..bda5ec0b4b4d 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h | |||
| @@ -5,6 +5,10 @@ | |||
| 5 | #include <linux/exportfs.h> | 5 | #include <linux/exportfs.h> |
| 6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
| 7 | 7 | ||
| 8 | #define CLEANCACHE_NO_POOL -1 | ||
| 9 | #define CLEANCACHE_NO_BACKEND -2 | ||
| 10 | #define CLEANCACHE_NO_BACKEND_SHARED -3 | ||
| 11 | |||
| 8 | #define CLEANCACHE_KEY_MAX 6 | 12 | #define CLEANCACHE_KEY_MAX 6 |
| 9 | 13 | ||
| 10 | /* | 14 | /* |
| @@ -33,10 +37,9 @@ struct cleancache_ops { | |||
| 33 | void (*invalidate_fs)(int); | 37 | void (*invalidate_fs)(int); |
| 34 | }; | 38 | }; |
| 35 | 39 | ||
| 36 | extern struct cleancache_ops * | 40 | extern int cleancache_register_ops(struct cleancache_ops *ops); |
| 37 | cleancache_register_ops(struct cleancache_ops *ops); | ||
| 38 | extern void __cleancache_init_fs(struct super_block *); | 41 | extern void __cleancache_init_fs(struct super_block *); |
| 39 | extern void __cleancache_init_shared_fs(char *, struct super_block *); | 42 | extern void __cleancache_init_shared_fs(struct super_block *); |
| 40 | extern int __cleancache_get_page(struct page *); | 43 | extern int __cleancache_get_page(struct page *); |
| 41 | extern void __cleancache_put_page(struct page *); | 44 | extern void __cleancache_put_page(struct page *); |
| 42 | extern void __cleancache_invalidate_page(struct address_space *, struct page *); | 45 | extern void __cleancache_invalidate_page(struct address_space *, struct page *); |
| @@ -78,10 +81,10 @@ static inline void cleancache_init_fs(struct super_block *sb) | |||
| 78 | __cleancache_init_fs(sb); | 81 | __cleancache_init_fs(sb); |
| 79 | } | 82 | } |
| 80 | 83 | ||
| 81 | static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) | 84 | static inline void cleancache_init_shared_fs(struct super_block *sb) |
| 82 | { | 85 | { |
| 83 | if (cleancache_enabled) | 86 | if (cleancache_enabled) |
| 84 | __cleancache_init_shared_fs(uuid, sb); | 87 | __cleancache_init_shared_fs(sb); |
| 85 | } | 88 | } |
| 86 | 89 | ||
| 87 | static inline int cleancache_get_page(struct page *page) | 90 | static inline int cleancache_get_page(struct page *page) |
diff --git a/include/linux/cma.h b/include/linux/cma.h index 9384ba66e975..f7ef093ec49a 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h | |||
| @@ -16,16 +16,16 @@ | |||
| 16 | struct cma; | 16 | struct cma; |
| 17 | 17 | ||
| 18 | extern unsigned long totalcma_pages; | 18 | extern unsigned long totalcma_pages; |
| 19 | extern phys_addr_t cma_get_base(struct cma *cma); | 19 | extern phys_addr_t cma_get_base(const struct cma *cma); |
| 20 | extern unsigned long cma_get_size(struct cma *cma); | 20 | extern unsigned long cma_get_size(const struct cma *cma); |
| 21 | 21 | ||
| 22 | extern int __init cma_declare_contiguous(phys_addr_t base, | 22 | extern int __init cma_declare_contiguous(phys_addr_t base, |
| 23 | phys_addr_t size, phys_addr_t limit, | 23 | phys_addr_t size, phys_addr_t limit, |
| 24 | phys_addr_t alignment, unsigned int order_per_bit, | 24 | phys_addr_t alignment, unsigned int order_per_bit, |
| 25 | bool fixed, struct cma **res_cma); | 25 | bool fixed, struct cma **res_cma); |
| 26 | extern int cma_init_reserved_mem(phys_addr_t base, | 26 | extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, |
| 27 | phys_addr_t size, int order_per_bit, | 27 | unsigned int order_per_bit, |
| 28 | struct cma **res_cma); | 28 | struct cma **res_cma); |
| 29 | extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); | 29 | extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align); |
| 30 | extern bool cma_release(struct cma *cma, struct page *pages, int count); | 30 | extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); |
| 31 | #endif | 31 | #endif |
diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h new file mode 100644 index 000000000000..b5f0bda9472e --- /dev/null +++ b/include/linux/elf-randomize.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #ifndef _ELF_RANDOMIZE_H | ||
| 2 | #define _ELF_RANDOMIZE_H | ||
| 3 | |||
| 4 | struct mm_struct; | ||
| 5 | |||
| 6 | #ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE | ||
| 7 | static inline unsigned long arch_mmap_rnd(void) { return 0; } | ||
| 8 | # if defined(arch_randomize_brk) && defined(CONFIG_COMPAT_BRK) | ||
| 9 | # define compat_brk_randomized | ||
| 10 | # endif | ||
| 11 | # ifndef arch_randomize_brk | ||
| 12 | # define arch_randomize_brk(mm) (mm->brk) | ||
| 13 | # endif | ||
| 14 | #else | ||
| 15 | extern unsigned long arch_mmap_rnd(void); | ||
| 16 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | ||
| 17 | # ifdef CONFIG_COMPAT_BRK | ||
| 18 | # define compat_brk_randomized | ||
| 19 | # endif | ||
| 20 | #endif | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 51bd1e72a917..97a9373e61e8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -57,8 +57,10 @@ struct vm_area_struct; | |||
| 57 | * _might_ fail. This depends upon the particular VM implementation. | 57 | * _might_ fail. This depends upon the particular VM implementation. |
| 58 | * | 58 | * |
| 59 | * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller | 59 | * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller |
| 60 | * cannot handle allocation failures. This modifier is deprecated and no new | 60 | * cannot handle allocation failures. New users should be evaluated carefully |
| 61 | * users should be added. | 61 | * (and the flag should be used only when there is no reasonable failure policy) |
| 62 | * but it is definitely preferable to use the flag rather than opencode endless | ||
| 63 | * loop around allocator. | ||
| 62 | * | 64 | * |
| 63 | * __GFP_NORETRY: The VM implementation must not retry indefinitely. | 65 | * __GFP_NORETRY: The VM implementation must not retry indefinitely. |
| 64 | * | 66 | * |
| @@ -117,16 +119,6 @@ struct vm_area_struct; | |||
| 117 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ | 119 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ |
| 118 | __GFP_NO_KSWAPD) | 120 | __GFP_NO_KSWAPD) |
| 119 | 121 | ||
| 120 | /* | ||
| 121 | * GFP_THISNODE does not perform any reclaim, you most likely want to | ||
| 122 | * use __GFP_THISNODE to allocate from a given node without fallback! | ||
| 123 | */ | ||
| 124 | #ifdef CONFIG_NUMA | ||
| 125 | #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) | ||
| 126 | #else | ||
| 127 | #define GFP_THISNODE ((__force gfp_t)0) | ||
| 128 | #endif | ||
| 129 | |||
| 130 | /* This mask makes up all the page movable related flags */ | 122 | /* This mask makes up all the page movable related flags */ |
| 131 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) | 123 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) |
| 132 | 124 | ||
diff --git a/include/linux/io.h b/include/linux/io.h index fa02e55e5a2e..4cc299c598e0 100644 --- a/include/linux/io.h +++ b/include/linux/io.h | |||
| @@ -38,6 +38,14 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, | |||
| 38 | } | 38 | } |
| 39 | #endif | 39 | #endif |
| 40 | 40 | ||
| 41 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
| 42 | void __init ioremap_huge_init(void); | ||
| 43 | int arch_ioremap_pud_supported(void); | ||
| 44 | int arch_ioremap_pmd_supported(void); | ||
| 45 | #else | ||
| 46 | static inline void ioremap_huge_init(void) { } | ||
| 47 | #endif | ||
| 48 | |||
| 41 | /* | 49 | /* |
| 42 | * Managed iomap interface | 50 | * Managed iomap interface |
| 43 | */ | 51 | */ |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e8cc45307f8f..9497ec7c77ea 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
| @@ -365,6 +365,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo | |||
| 365 | #define __initdata_memblock | 365 | #define __initdata_memblock |
| 366 | #endif | 366 | #endif |
| 367 | 367 | ||
| 368 | #ifdef CONFIG_MEMTEST | ||
| 369 | extern void early_memtest(phys_addr_t start, phys_addr_t end); | ||
| 370 | #else | ||
| 371 | static inline void early_memtest(phys_addr_t start, phys_addr_t end) | ||
| 372 | { | ||
| 373 | } | ||
| 374 | #endif | ||
| 375 | |||
| 368 | #else | 376 | #else |
| 369 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) | 377 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) |
| 370 | { | 378 | { |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8f1a41951df9..6ffa0ac7f7d6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
| @@ -192,6 +192,9 @@ extern void get_page_bootmem(unsigned long ingo, struct page *page, | |||
| 192 | void get_online_mems(void); | 192 | void get_online_mems(void); |
| 193 | void put_online_mems(void); | 193 | void put_online_mems(void); |
| 194 | 194 | ||
| 195 | void mem_hotplug_begin(void); | ||
| 196 | void mem_hotplug_done(void); | ||
| 197 | |||
| 195 | #else /* ! CONFIG_MEMORY_HOTPLUG */ | 198 | #else /* ! CONFIG_MEMORY_HOTPLUG */ |
| 196 | /* | 199 | /* |
| 197 | * Stub functions for when hotplug is off | 200 | * Stub functions for when hotplug is off |
| @@ -231,6 +234,9 @@ static inline int try_online_node(int nid) | |||
| 231 | static inline void get_online_mems(void) {} | 234 | static inline void get_online_mems(void) {} |
| 232 | static inline void put_online_mems(void) {} | 235 | static inline void put_online_mems(void) {} |
| 233 | 236 | ||
| 237 | static inline void mem_hotplug_begin(void) {} | ||
| 238 | static inline void mem_hotplug_done(void) {} | ||
| 239 | |||
| 234 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ | 240 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ |
| 235 | 241 | ||
| 236 | #ifdef CONFIG_MEMORY_HOTREMOVE | 242 | #ifdef CONFIG_MEMORY_HOTREMOVE |
diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 39ed62ab5b8a..b19b3023c880 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h | |||
| @@ -29,7 +29,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | |||
| 29 | mempool_free_t *free_fn, void *pool_data, | 29 | mempool_free_t *free_fn, void *pool_data, |
| 30 | gfp_t gfp_mask, int nid); | 30 | gfp_t gfp_mask, int nid); |
| 31 | 31 | ||
| 32 | extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); | 32 | extern int mempool_resize(mempool_t *pool, int new_min_nr); |
| 33 | extern void mempool_destroy(mempool_t *pool); | 33 | extern void mempool_destroy(mempool_t *pool); |
| 34 | extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); | 34 | extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); |
| 35 | extern void mempool_free(void *element, mempool_t *pool); | 35 | extern void mempool_free(void *element, mempool_t *pool); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 78baed5f2952..cac1c0904d5f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
| @@ -69,7 +69,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
| 69 | extern bool pmd_trans_migrating(pmd_t pmd); | 69 | extern bool pmd_trans_migrating(pmd_t pmd); |
| 70 | extern int migrate_misplaced_page(struct page *page, | 70 | extern int migrate_misplaced_page(struct page *page, |
| 71 | struct vm_area_struct *vma, int node); | 71 | struct vm_area_struct *vma, int node); |
| 72 | extern bool migrate_ratelimited(int node); | ||
| 73 | #else | 72 | #else |
| 74 | static inline bool pmd_trans_migrating(pmd_t pmd) | 73 | static inline bool pmd_trans_migrating(pmd_t pmd) |
| 75 | { | 74 | { |
| @@ -80,10 +79,6 @@ static inline int migrate_misplaced_page(struct page *page, | |||
| 80 | { | 79 | { |
| 81 | return -EAGAIN; /* can't migrate now */ | 80 | return -EAGAIN; /* can't migrate now */ |
| 82 | } | 81 | } |
| 83 | static inline bool migrate_ratelimited(int node) | ||
| 84 | { | ||
| 85 | return false; | ||
| 86 | } | ||
| 87 | #endif /* CONFIG_NUMA_BALANCING */ | 82 | #endif /* CONFIG_NUMA_BALANCING */ |
| 88 | 83 | ||
| 89 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) | 84 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 47a93928b90f..6571dd78e984 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1294,9 +1294,11 @@ int __set_page_dirty_no_writeback(struct page *page); | |||
| 1294 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1294 | int redirty_page_for_writepage(struct writeback_control *wbc, |
| 1295 | struct page *page); | 1295 | struct page *page); |
| 1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); | 1296 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
| 1297 | void account_page_cleaned(struct page *page, struct address_space *mapping); | ||
| 1297 | int set_page_dirty(struct page *page); | 1298 | int set_page_dirty(struct page *page); |
| 1298 | int set_page_dirty_lock(struct page *page); | 1299 | int set_page_dirty_lock(struct page *page); |
| 1299 | int clear_page_dirty_for_io(struct page *page); | 1300 | int clear_page_dirty_for_io(struct page *page); |
| 1301 | |||
| 1300 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); | 1302 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); |
| 1301 | 1303 | ||
| 1302 | /* Is the vma a continuation of the stack vma above it? */ | 1304 | /* Is the vma a continuation of the stack vma above it? */ |
| @@ -2109,7 +2111,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, | |||
| 2109 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ | 2111 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ |
| 2110 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO | 2112 | #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO |
| 2111 | * and return without waiting upon it */ | 2113 | * and return without waiting upon it */ |
| 2112 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ | 2114 | #define FOLL_POPULATE 0x40 /* fault in page */ |
| 2113 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ | 2115 | #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ |
| 2114 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ | 2116 | #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ |
| 2115 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ | 2117 | #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 199a03aab8dc..590630eb59ba 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -364,7 +364,9 @@ struct mm_struct { | |||
| 364 | atomic_t mm_users; /* How many users with user space? */ | 364 | atomic_t mm_users; /* How many users with user space? */ |
| 365 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ | 365 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ |
| 366 | atomic_long_t nr_ptes; /* PTE page table pages */ | 366 | atomic_long_t nr_ptes; /* PTE page table pages */ |
| 367 | #if CONFIG_PGTABLE_LEVELS > 2 | ||
| 367 | atomic_long_t nr_pmds; /* PMD page table pages */ | 368 | atomic_long_t nr_pmds; /* PMD page table pages */ |
| 369 | #endif | ||
| 368 | int map_count; /* number of VMAs */ | 370 | int map_count; /* number of VMAs */ |
| 369 | 371 | ||
| 370 | spinlock_t page_table_lock; /* Protects page tables and some counters */ | 372 | spinlock_t page_table_lock; /* Protects page tables and some counters */ |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9b2022ab4d85..3d46fb4708e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
| @@ -25,16 +25,11 @@ static inline void touch_nmi_watchdog(void) | |||
| 25 | #endif | 25 | #endif |
| 26 | 26 | ||
| 27 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) | 27 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) |
| 28 | extern void watchdog_enable_hardlockup_detector(bool val); | 28 | extern void hardlockup_detector_disable(void); |
| 29 | extern bool watchdog_hardlockup_detector_is_enabled(void); | ||
| 30 | #else | 29 | #else |
| 31 | static inline void watchdog_enable_hardlockup_detector(bool val) | 30 | static inline void hardlockup_detector_disable(void) |
| 32 | { | 31 | { |
| 33 | } | 32 | } |
| 34 | static inline bool watchdog_hardlockup_detector_is_enabled(void) | ||
| 35 | { | ||
| 36 | return true; | ||
| 37 | } | ||
| 38 | #endif | 33 | #endif |
| 39 | 34 | ||
| 40 | /* | 35 | /* |
| @@ -68,12 +63,20 @@ static inline bool trigger_allbutself_cpu_backtrace(void) | |||
| 68 | #ifdef CONFIG_LOCKUP_DETECTOR | 63 | #ifdef CONFIG_LOCKUP_DETECTOR |
| 69 | int hw_nmi_is_cpu_stuck(struct pt_regs *); | 64 | int hw_nmi_is_cpu_stuck(struct pt_regs *); |
| 70 | u64 hw_nmi_get_sample_period(int watchdog_thresh); | 65 | u64 hw_nmi_get_sample_period(int watchdog_thresh); |
| 66 | extern int nmi_watchdog_enabled; | ||
| 67 | extern int soft_watchdog_enabled; | ||
| 71 | extern int watchdog_user_enabled; | 68 | extern int watchdog_user_enabled; |
| 72 | extern int watchdog_thresh; | 69 | extern int watchdog_thresh; |
| 73 | extern int sysctl_softlockup_all_cpu_backtrace; | 70 | extern int sysctl_softlockup_all_cpu_backtrace; |
| 74 | struct ctl_table; | 71 | struct ctl_table; |
| 75 | extern int proc_dowatchdog(struct ctl_table *, int , | 72 | extern int proc_watchdog(struct ctl_table *, int , |
| 76 | void __user *, size_t *, loff_t *); | 73 | void __user *, size_t *, loff_t *); |
| 74 | extern int proc_nmi_watchdog(struct ctl_table *, int , | ||
| 75 | void __user *, size_t *, loff_t *); | ||
| 76 | extern int proc_soft_watchdog(struct ctl_table *, int , | ||
| 77 | void __user *, size_t *, loff_t *); | ||
| 78 | extern int proc_watchdog_thresh(struct ctl_table *, int , | ||
| 79 | void __user *, size_t *, loff_t *); | ||
| 77 | #endif | 80 | #endif |
| 78 | 81 | ||
| 79 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | 82 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI |
diff --git a/include/linux/oom.h b/include/linux/oom.h index d5771bed59c9..44b2f6f7bbd8 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
| @@ -66,7 +66,8 @@ extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); | |||
| 66 | extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); | 66 | extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); |
| 67 | 67 | ||
| 68 | extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | 68 | extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, |
| 69 | int order, const nodemask_t *nodemask); | 69 | int order, const nodemask_t *nodemask, |
| 70 | struct mem_cgroup *memcg); | ||
| 70 | 71 | ||
| 71 | extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, | 72 | extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, |
| 72 | unsigned long totalpages, const nodemask_t *nodemask, | 73 | unsigned long totalpages, const nodemask_t *nodemask, |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5ed7bdaf22d5..c851ff92d5b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
| @@ -328,8 +328,6 @@ static inline void SetPageUptodate(struct page *page) | |||
| 328 | 328 | ||
| 329 | CLEARPAGEFLAG(Uptodate, uptodate) | 329 | CLEARPAGEFLAG(Uptodate, uptodate) |
| 330 | 330 | ||
| 331 | extern void cancel_dirty_page(struct page *page, unsigned int account_size); | ||
| 332 | |||
| 333 | int test_clear_page_writeback(struct page *page); | 331 | int test_clear_page_writeback(struct page *page); |
| 334 | int __test_set_page_writeback(struct page *page, bool keep_write); | 332 | int __test_set_page_writeback(struct page *page, bool keep_write); |
| 335 | 333 | ||
diff --git a/include/linux/slab.h b/include/linux/slab.h index 76f1feeabd38..ffd24c830151 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | /* | 19 | /* |
| 20 | * Flags to pass to kmem_cache_create(). | 20 | * Flags to pass to kmem_cache_create(). |
| 21 | * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. | 21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. |
| 22 | */ | 22 | */ |
| 23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ | 23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ |
| 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ | 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ |
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index d06b6da5c1e3..bce990f5a35d 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h | |||
| @@ -224,7 +224,7 @@ TRACE_EVENT(xen_mmu_pmd_clear, | |||
| 224 | TP_printk("pmdp %p", __entry->pmdp) | 224 | TP_printk("pmdp %p", __entry->pmdp) |
| 225 | ); | 225 | ); |
| 226 | 226 | ||
| 227 | #if PAGETABLE_LEVELS >= 4 | 227 | #if CONFIG_PGTABLE_LEVELS >= 4 |
| 228 | 228 | ||
| 229 | TRACE_EVENT(xen_mmu_set_pud, | 229 | TRACE_EVENT(xen_mmu_set_pud, |
| 230 | TP_PROTO(pud_t *pudp, pud_t pudval), | 230 | TP_PROTO(pud_t *pudp, pud_t pudval), |
diff --git a/init/main.c b/init/main.c index e82171b99874..a7e969d12f51 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -80,6 +80,7 @@ | |||
| 80 | #include <linux/list.h> | 80 | #include <linux/list.h> |
| 81 | #include <linux/integrity.h> | 81 | #include <linux/integrity.h> |
| 82 | #include <linux/proc_ns.h> | 82 | #include <linux/proc_ns.h> |
| 83 | #include <linux/io.h> | ||
| 83 | 84 | ||
| 84 | #include <asm/io.h> | 85 | #include <asm/io.h> |
| 85 | #include <asm/bugs.h> | 86 | #include <asm/bugs.h> |
| @@ -485,6 +486,7 @@ static void __init mm_init(void) | |||
| 485 | percpu_init_late(); | 486 | percpu_init_late(); |
| 486 | pgtable_init(); | 487 | pgtable_init(); |
| 487 | vmalloc_init(); | 488 | vmalloc_init(); |
| 489 | ioremap_huge_init(); | ||
| 488 | } | 490 | } |
| 489 | 491 | ||
| 490 | asmlinkage __visible void __init start_kernel(void) | 492 | asmlinkage __visible void __init start_kernel(void) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c68f0721df10..ee14e3a35a29 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -2453,20 +2453,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
| 2453 | * @node: is this an allowed node? | 2453 | * @node: is this an allowed node? |
| 2454 | * @gfp_mask: memory allocation flags | 2454 | * @gfp_mask: memory allocation flags |
| 2455 | * | 2455 | * |
| 2456 | * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is | 2456 | * If we're in interrupt, yes, we can always allocate. If @node is set in |
| 2457 | * set, yes, we can always allocate. If node is in our task's mems_allowed, | 2457 | * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this |
| 2458 | * yes. If it's not a __GFP_HARDWALL request and this node is in the nearest | 2458 | * node is set in the nearest hardwalled cpuset ancestor to current's cpuset, |
| 2459 | * hardwalled cpuset ancestor to this task's cpuset, yes. If the task has been | 2459 | * yes. If current has access to memory reserves due to TIF_MEMDIE, yes. |
| 2460 | * OOM killed and has access to memory reserves as specified by the TIF_MEMDIE | ||
| 2461 | * flag, yes. | ||
| 2462 | * Otherwise, no. | 2460 | * Otherwise, no. |
| 2463 | * | 2461 | * |
| 2464 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
| 2465 | * by forcibly using a zonelist starting at a specified node, and by | ||
| 2466 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
| 2467 | * any node on the zonelist except the first. By the time any such | ||
| 2468 | * calls get to this routine, we should just shut up and say 'yes'. | ||
| 2469 | * | ||
| 2470 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2462 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
| 2471 | * and do not allow allocations outside the current tasks cpuset | 2463 | * and do not allow allocations outside the current tasks cpuset |
| 2472 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | 2464 | * unless the task has been OOM killed as is marked TIF_MEMDIE. |
| @@ -2502,7 +2494,7 @@ int __cpuset_node_allowed(int node, gfp_t gfp_mask) | |||
| 2502 | int allowed; /* is allocation in zone z allowed? */ | 2494 | int allowed; /* is allocation in zone z allowed? */ |
| 2503 | unsigned long flags; | 2495 | unsigned long flags; |
| 2504 | 2496 | ||
| 2505 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | 2497 | if (in_interrupt()) |
| 2506 | return 1; | 2498 | return 1; |
| 2507 | if (node_isset(node, current->mems_allowed)) | 2499 | if (node_isset(node, current->mems_allowed)) |
| 2508 | return 1; | 2500 | return 1; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4012336de30f..8c0eabd41886 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -847,7 +847,7 @@ static struct ctl_table kern_table[] = { | |||
| 847 | .data = &watchdog_user_enabled, | 847 | .data = &watchdog_user_enabled, |
| 848 | .maxlen = sizeof (int), | 848 | .maxlen = sizeof (int), |
| 849 | .mode = 0644, | 849 | .mode = 0644, |
| 850 | .proc_handler = proc_dowatchdog, | 850 | .proc_handler = proc_watchdog, |
| 851 | .extra1 = &zero, | 851 | .extra1 = &zero, |
| 852 | .extra2 = &one, | 852 | .extra2 = &one, |
| 853 | }, | 853 | }, |
| @@ -856,11 +856,33 @@ static struct ctl_table kern_table[] = { | |||
| 856 | .data = &watchdog_thresh, | 856 | .data = &watchdog_thresh, |
| 857 | .maxlen = sizeof(int), | 857 | .maxlen = sizeof(int), |
| 858 | .mode = 0644, | 858 | .mode = 0644, |
| 859 | .proc_handler = proc_dowatchdog, | 859 | .proc_handler = proc_watchdog_thresh, |
| 860 | .extra1 = &zero, | 860 | .extra1 = &zero, |
| 861 | .extra2 = &sixty, | 861 | .extra2 = &sixty, |
| 862 | }, | 862 | }, |
| 863 | { | 863 | { |
| 864 | .procname = "nmi_watchdog", | ||
| 865 | .data = &nmi_watchdog_enabled, | ||
| 866 | .maxlen = sizeof (int), | ||
| 867 | .mode = 0644, | ||
| 868 | .proc_handler = proc_nmi_watchdog, | ||
| 869 | .extra1 = &zero, | ||
| 870 | #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) | ||
| 871 | .extra2 = &one, | ||
| 872 | #else | ||
| 873 | .extra2 = &zero, | ||
| 874 | #endif | ||
| 875 | }, | ||
| 876 | { | ||
| 877 | .procname = "soft_watchdog", | ||
| 878 | .data = &soft_watchdog_enabled, | ||
| 879 | .maxlen = sizeof (int), | ||
| 880 | .mode = 0644, | ||
| 881 | .proc_handler = proc_soft_watchdog, | ||
| 882 | .extra1 = &zero, | ||
| 883 | .extra2 = &one, | ||
| 884 | }, | ||
| 885 | { | ||
| 864 | .procname = "softlockup_panic", | 886 | .procname = "softlockup_panic", |
| 865 | .data = &softlockup_panic, | 887 | .data = &softlockup_panic, |
| 866 | .maxlen = sizeof(int), | 888 | .maxlen = sizeof(int), |
| @@ -880,15 +902,6 @@ static struct ctl_table kern_table[] = { | |||
| 880 | .extra2 = &one, | 902 | .extra2 = &one, |
| 881 | }, | 903 | }, |
| 882 | #endif /* CONFIG_SMP */ | 904 | #endif /* CONFIG_SMP */ |
| 883 | { | ||
| 884 | .procname = "nmi_watchdog", | ||
| 885 | .data = &watchdog_user_enabled, | ||
| 886 | .maxlen = sizeof (int), | ||
| 887 | .mode = 0644, | ||
| 888 | .proc_handler = proc_dowatchdog, | ||
| 889 | .extra1 = &zero, | ||
| 890 | .extra2 = &one, | ||
| 891 | }, | ||
| 892 | #endif | 905 | #endif |
| 893 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 906 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
| 894 | { | 907 | { |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9a056f5bc02c..2316f50b07a4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -24,8 +24,33 @@ | |||
| 24 | #include <linux/kvm_para.h> | 24 | #include <linux/kvm_para.h> |
| 25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
| 26 | 26 | ||
| 27 | int watchdog_user_enabled = 1; | 27 | /* |
| 28 | * The run state of the lockup detectors is controlled by the content of the | ||
| 29 | * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - | ||
| 30 | * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. | ||
| 31 | * | ||
| 32 | * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' | ||
| 33 | * are variables that are only used as an 'interface' between the parameters | ||
| 34 | * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The | ||
| 35 | * 'watchdog_thresh' variable is handled differently because its value is not | ||
| 36 | * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' | ||
| 37 | * is equal zero. | ||
| 38 | */ | ||
| 39 | #define NMI_WATCHDOG_ENABLED_BIT 0 | ||
| 40 | #define SOFT_WATCHDOG_ENABLED_BIT 1 | ||
| 41 | #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) | ||
| 42 | #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) | ||
| 43 | |||
| 44 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
| 45 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; | ||
| 46 | #else | ||
| 47 | static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; | ||
| 48 | #endif | ||
| 49 | int __read_mostly nmi_watchdog_enabled; | ||
| 50 | int __read_mostly soft_watchdog_enabled; | ||
| 51 | int __read_mostly watchdog_user_enabled; | ||
| 28 | int __read_mostly watchdog_thresh = 10; | 52 | int __read_mostly watchdog_thresh = 10; |
| 53 | |||
| 29 | #ifdef CONFIG_SMP | 54 | #ifdef CONFIG_SMP |
| 30 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | 55 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; |
| 31 | #else | 56 | #else |
| @@ -58,8 +83,6 @@ static unsigned long soft_lockup_nmi_warn; | |||
| 58 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 83 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| 59 | static int hardlockup_panic = | 84 | static int hardlockup_panic = |
| 60 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; | 85 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; |
| 61 | |||
| 62 | static bool hardlockup_detector_enabled = true; | ||
| 63 | /* | 86 | /* |
| 64 | * We may not want to enable hard lockup detection by default in all cases, | 87 | * We may not want to enable hard lockup detection by default in all cases, |
| 65 | * for example when running the kernel as a guest on a hypervisor. In these | 88 | * for example when running the kernel as a guest on a hypervisor. In these |
| @@ -68,14 +91,9 @@ static bool hardlockup_detector_enabled = true; | |||
| 68 | * kernel command line parameters are parsed, because otherwise it is not | 91 | * kernel command line parameters are parsed, because otherwise it is not |
| 69 | * possible to override this in hardlockup_panic_setup(). | 92 | * possible to override this in hardlockup_panic_setup(). |
| 70 | */ | 93 | */ |
| 71 | void watchdog_enable_hardlockup_detector(bool val) | 94 | void hardlockup_detector_disable(void) |
| 72 | { | ||
| 73 | hardlockup_detector_enabled = val; | ||
| 74 | } | ||
| 75 | |||
| 76 | bool watchdog_hardlockup_detector_is_enabled(void) | ||
| 77 | { | 95 | { |
| 78 | return hardlockup_detector_enabled; | 96 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; |
| 79 | } | 97 | } |
| 80 | 98 | ||
| 81 | static int __init hardlockup_panic_setup(char *str) | 99 | static int __init hardlockup_panic_setup(char *str) |
| @@ -85,15 +103,9 @@ static int __init hardlockup_panic_setup(char *str) | |||
| 85 | else if (!strncmp(str, "nopanic", 7)) | 103 | else if (!strncmp(str, "nopanic", 7)) |
| 86 | hardlockup_panic = 0; | 104 | hardlockup_panic = 0; |
| 87 | else if (!strncmp(str, "0", 1)) | 105 | else if (!strncmp(str, "0", 1)) |
| 88 | watchdog_user_enabled = 0; | 106 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; |
| 89 | else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) { | 107 | else if (!strncmp(str, "1", 1)) |
| 90 | /* | 108 | watchdog_enabled |= NMI_WATCHDOG_ENABLED; |
| 91 | * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option) | ||
| 92 | * has the same effect. | ||
| 93 | */ | ||
| 94 | watchdog_user_enabled = 1; | ||
| 95 | watchdog_enable_hardlockup_detector(true); | ||
| 96 | } | ||
| 97 | return 1; | 109 | return 1; |
| 98 | } | 110 | } |
| 99 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 111 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
| @@ -112,19 +124,18 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
| 112 | 124 | ||
| 113 | static int __init nowatchdog_setup(char *str) | 125 | static int __init nowatchdog_setup(char *str) |
| 114 | { | 126 | { |
| 115 | watchdog_user_enabled = 0; | 127 | watchdog_enabled = 0; |
| 116 | return 1; | 128 | return 1; |
| 117 | } | 129 | } |
| 118 | __setup("nowatchdog", nowatchdog_setup); | 130 | __setup("nowatchdog", nowatchdog_setup); |
| 119 | 131 | ||
| 120 | /* deprecated */ | ||
| 121 | static int __init nosoftlockup_setup(char *str) | 132 | static int __init nosoftlockup_setup(char *str) |
| 122 | { | 133 | { |
| 123 | watchdog_user_enabled = 0; | 134 | watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; |
| 124 | return 1; | 135 | return 1; |
| 125 | } | 136 | } |
| 126 | __setup("nosoftlockup", nosoftlockup_setup); | 137 | __setup("nosoftlockup", nosoftlockup_setup); |
| 127 | /* */ | 138 | |
| 128 | #ifdef CONFIG_SMP | 139 | #ifdef CONFIG_SMP |
| 129 | static int __init softlockup_all_cpu_backtrace_setup(char *str) | 140 | static int __init softlockup_all_cpu_backtrace_setup(char *str) |
| 130 | { | 141 | { |
| @@ -239,10 +250,11 @@ static int is_softlockup(unsigned long touch_ts) | |||
| 239 | { | 250 | { |
| 240 | unsigned long now = get_timestamp(); | 251 | unsigned long now = get_timestamp(); |
| 241 | 252 | ||
| 242 | /* Warn about unreasonable delays: */ | 253 | if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) { |
| 243 | if (time_after(now, touch_ts + get_softlockup_thresh())) | 254 | /* Warn about unreasonable delays. */ |
| 244 | return now - touch_ts; | 255 | if (time_after(now, touch_ts + get_softlockup_thresh())) |
| 245 | 256 | return now - touch_ts; | |
| 257 | } | ||
| 246 | return 0; | 258 | return 0; |
| 247 | } | 259 | } |
| 248 | 260 | ||
| @@ -477,6 +489,21 @@ static void watchdog(unsigned int cpu) | |||
| 477 | __this_cpu_write(soft_lockup_hrtimer_cnt, | 489 | __this_cpu_write(soft_lockup_hrtimer_cnt, |
| 478 | __this_cpu_read(hrtimer_interrupts)); | 490 | __this_cpu_read(hrtimer_interrupts)); |
| 479 | __touch_watchdog(); | 491 | __touch_watchdog(); |
| 492 | |||
| 493 | /* | ||
| 494 | * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the | ||
| 495 | * failure path. Check for failures that can occur asynchronously - | ||
| 496 | * for example, when CPUs are on-lined - and shut down the hardware | ||
| 497 | * perf event on each CPU accordingly. | ||
| 498 | * | ||
| 499 | * The only non-obvious place this bit can be cleared is through | ||
| 500 | * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a | ||
| 501 | * pr_info here would be too noisy as it would result in a message | ||
| 502 | * every few seconds if the hardlockup was disabled but the softlockup | ||
| 503 | * enabled. | ||
| 504 | */ | ||
| 505 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
| 506 | watchdog_nmi_disable(cpu); | ||
| 480 | } | 507 | } |
| 481 | 508 | ||
| 482 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 509 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| @@ -492,14 +519,9 @@ static int watchdog_nmi_enable(unsigned int cpu) | |||
| 492 | struct perf_event_attr *wd_attr; | 519 | struct perf_event_attr *wd_attr; |
| 493 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 520 | struct perf_event *event = per_cpu(watchdog_ev, cpu); |
| 494 | 521 | ||
| 495 | /* | 522 | /* nothing to do if the hard lockup detector is disabled */ |
| 496 | * Some kernels need to default hard lockup detection to | 523 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) |
| 497 | * 'disabled', for example a guest on a hypervisor. | 524 | goto out; |
| 498 | */ | ||
| 499 | if (!watchdog_hardlockup_detector_is_enabled()) { | ||
| 500 | event = ERR_PTR(-ENOENT); | ||
| 501 | goto handle_err; | ||
| 502 | } | ||
| 503 | 525 | ||
| 504 | /* is it already setup and enabled? */ | 526 | /* is it already setup and enabled? */ |
| 505 | if (event && event->state > PERF_EVENT_STATE_OFF) | 527 | if (event && event->state > PERF_EVENT_STATE_OFF) |
| @@ -515,7 +537,6 @@ static int watchdog_nmi_enable(unsigned int cpu) | |||
| 515 | /* Try to register using hardware perf events */ | 537 | /* Try to register using hardware perf events */ |
| 516 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 538 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
| 517 | 539 | ||
| 518 | handle_err: | ||
| 519 | /* save cpu0 error for future comparision */ | 540 | /* save cpu0 error for future comparision */ |
| 520 | if (cpu == 0 && IS_ERR(event)) | 541 | if (cpu == 0 && IS_ERR(event)) |
| 521 | cpu0_err = PTR_ERR(event); | 542 | cpu0_err = PTR_ERR(event); |
| @@ -527,6 +548,18 @@ handle_err: | |||
| 527 | goto out_save; | 548 | goto out_save; |
| 528 | } | 549 | } |
| 529 | 550 | ||
| 551 | /* | ||
| 552 | * Disable the hard lockup detector if _any_ CPU fails to set up | ||
| 553 | * set up the hardware perf event. The watchdog() function checks | ||
| 554 | * the NMI_WATCHDOG_ENABLED bit periodically. | ||
| 555 | * | ||
| 556 | * The barriers are for syncing up watchdog_enabled across all the | ||
| 557 | * cpus, as clear_bit() does not use barriers. | ||
| 558 | */ | ||
| 559 | smp_mb__before_atomic(); | ||
| 560 | clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); | ||
| 561 | smp_mb__after_atomic(); | ||
| 562 | |||
| 530 | /* skip displaying the same error again */ | 563 | /* skip displaying the same error again */ |
| 531 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) | 564 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) |
| 532 | return PTR_ERR(event); | 565 | return PTR_ERR(event); |
| @@ -540,6 +573,9 @@ handle_err: | |||
| 540 | else | 573 | else |
| 541 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", | 574 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", |
| 542 | cpu, PTR_ERR(event)); | 575 | cpu, PTR_ERR(event)); |
| 576 | |||
| 577 | pr_info("Shutting down hard lockup detector on all cpus\n"); | ||
| 578 | |||
| 543 | return PTR_ERR(event); | 579 | return PTR_ERR(event); |
| 544 | 580 | ||
| 545 | /* success path */ | 581 | /* success path */ |
| @@ -628,7 +664,7 @@ static void restart_watchdog_hrtimer(void *info) | |||
| 628 | HRTIMER_MODE_REL_PINNED); | 664 | HRTIMER_MODE_REL_PINNED); |
| 629 | } | 665 | } |
| 630 | 666 | ||
| 631 | static void update_timers(int cpu) | 667 | static void update_watchdog(int cpu) |
| 632 | { | 668 | { |
| 633 | /* | 669 | /* |
| 634 | * Make sure that perf event counter will adopt to a new | 670 | * Make sure that perf event counter will adopt to a new |
| @@ -643,17 +679,17 @@ static void update_timers(int cpu) | |||
| 643 | watchdog_nmi_enable(cpu); | 679 | watchdog_nmi_enable(cpu); |
| 644 | } | 680 | } |
| 645 | 681 | ||
| 646 | static void update_timers_all_cpus(void) | 682 | static void update_watchdog_all_cpus(void) |
| 647 | { | 683 | { |
| 648 | int cpu; | 684 | int cpu; |
| 649 | 685 | ||
| 650 | get_online_cpus(); | 686 | get_online_cpus(); |
| 651 | for_each_online_cpu(cpu) | 687 | for_each_online_cpu(cpu) |
| 652 | update_timers(cpu); | 688 | update_watchdog(cpu); |
| 653 | put_online_cpus(); | 689 | put_online_cpus(); |
| 654 | } | 690 | } |
| 655 | 691 | ||
| 656 | static int watchdog_enable_all_cpus(bool sample_period_changed) | 692 | static int watchdog_enable_all_cpus(void) |
| 657 | { | 693 | { |
| 658 | int err = 0; | 694 | int err = 0; |
| 659 | 695 | ||
| @@ -663,8 +699,12 @@ static int watchdog_enable_all_cpus(bool sample_period_changed) | |||
| 663 | pr_err("Failed to create watchdog threads, disabled\n"); | 699 | pr_err("Failed to create watchdog threads, disabled\n"); |
| 664 | else | 700 | else |
| 665 | watchdog_running = 1; | 701 | watchdog_running = 1; |
| 666 | } else if (sample_period_changed) { | 702 | } else { |
| 667 | update_timers_all_cpus(); | 703 | /* |
| 704 | * Enable/disable the lockup detectors or | ||
| 705 | * change the sample period 'on the fly'. | ||
| 706 | */ | ||
| 707 | update_watchdog_all_cpus(); | ||
| 668 | } | 708 | } |
| 669 | 709 | ||
| 670 | return err; | 710 | return err; |
| @@ -682,48 +722,149 @@ static void watchdog_disable_all_cpus(void) | |||
| 682 | } | 722 | } |
| 683 | 723 | ||
| 684 | /* | 724 | /* |
| 685 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh | 725 | * Update the run state of the lockup detectors. |
| 726 | */ | ||
| 727 | static int proc_watchdog_update(void) | ||
| 728 | { | ||
| 729 | int err = 0; | ||
| 730 | |||
| 731 | /* | ||
| 732 | * Watchdog threads won't be started if they are already active. | ||
| 733 | * The 'watchdog_running' variable in watchdog_*_all_cpus() takes | ||
| 734 | * care of this. If those threads are already active, the sample | ||
| 735 | * period will be updated and the lockup detectors will be enabled | ||
| 736 | * or disabled 'on the fly'. | ||
| 737 | */ | ||
| 738 | if (watchdog_enabled && watchdog_thresh) | ||
| 739 | err = watchdog_enable_all_cpus(); | ||
| 740 | else | ||
| 741 | watchdog_disable_all_cpus(); | ||
| 742 | |||
| 743 | return err; | ||
| 744 | |||
| 745 | } | ||
| 746 | |||
| 747 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
| 748 | |||
| 749 | /* | ||
| 750 | * common function for watchdog, nmi_watchdog and soft_watchdog parameter | ||
| 751 | * | ||
| 752 | * caller | table->data points to | 'which' contains the flag(s) | ||
| 753 | * -------------------|-----------------------|----------------------------- | ||
| 754 | * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed | ||
| 755 | * | | with SOFT_WATCHDOG_ENABLED | ||
| 756 | * -------------------|-----------------------|----------------------------- | ||
| 757 | * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED | ||
| 758 | * -------------------|-----------------------|----------------------------- | ||
| 759 | * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED | ||
| 760 | */ | ||
| 761 | static int proc_watchdog_common(int which, struct ctl_table *table, int write, | ||
| 762 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 763 | { | ||
| 764 | int err, old, new; | ||
| 765 | int *watchdog_param = (int *)table->data; | ||
| 766 | |||
| 767 | mutex_lock(&watchdog_proc_mutex); | ||
| 768 | |||
| 769 | /* | ||
| 770 | * If the parameter is being read return the state of the corresponding | ||
| 771 | * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the | ||
| 772 | * run state of the lockup detectors. | ||
| 773 | */ | ||
| 774 | if (!write) { | ||
| 775 | *watchdog_param = (watchdog_enabled & which) != 0; | ||
| 776 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 777 | } else { | ||
| 778 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 779 | if (err) | ||
| 780 | goto out; | ||
| 781 | |||
| 782 | /* | ||
| 783 | * There is a race window between fetching the current value | ||
| 784 | * from 'watchdog_enabled' and storing the new value. During | ||
| 785 | * this race window, watchdog_nmi_enable() can sneak in and | ||
| 786 | * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. | ||
| 787 | * The 'cmpxchg' detects this race and the loop retries. | ||
| 788 | */ | ||
| 789 | do { | ||
| 790 | old = watchdog_enabled; | ||
| 791 | /* | ||
| 792 | * If the parameter value is not zero set the | ||
| 793 | * corresponding bit(s), else clear it(them). | ||
| 794 | */ | ||
| 795 | if (*watchdog_param) | ||
| 796 | new = old | which; | ||
| 797 | else | ||
| 798 | new = old & ~which; | ||
| 799 | } while (cmpxchg(&watchdog_enabled, old, new) != old); | ||
| 800 | |||
| 801 | /* | ||
| 802 | * Update the run state of the lockup detectors. | ||
| 803 | * Restore 'watchdog_enabled' on failure. | ||
| 804 | */ | ||
| 805 | err = proc_watchdog_update(); | ||
| 806 | if (err) | ||
| 807 | watchdog_enabled = old; | ||
| 808 | } | ||
| 809 | out: | ||
| 810 | mutex_unlock(&watchdog_proc_mutex); | ||
| 811 | return err; | ||
| 812 | } | ||
| 813 | |||
| 814 | /* | ||
| 815 | * /proc/sys/kernel/watchdog | ||
| 816 | */ | ||
| 817 | int proc_watchdog(struct ctl_table *table, int write, | ||
| 818 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 819 | { | ||
| 820 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, | ||
| 821 | table, write, buffer, lenp, ppos); | ||
| 822 | } | ||
| 823 | |||
| 824 | /* | ||
| 825 | * /proc/sys/kernel/nmi_watchdog | ||
| 686 | */ | 826 | */ |
| 827 | int proc_nmi_watchdog(struct ctl_table *table, int write, | ||
| 828 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 829 | { | ||
| 830 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED, | ||
| 831 | table, write, buffer, lenp, ppos); | ||
| 832 | } | ||
| 833 | |||
| 834 | /* | ||
| 835 | * /proc/sys/kernel/soft_watchdog | ||
| 836 | */ | ||
| 837 | int proc_soft_watchdog(struct ctl_table *table, int write, | ||
| 838 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 839 | { | ||
| 840 | return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, | ||
| 841 | table, write, buffer, lenp, ppos); | ||
| 842 | } | ||
| 687 | 843 | ||
| 688 | int proc_dowatchdog(struct ctl_table *table, int write, | 844 | /* |
| 689 | void __user *buffer, size_t *lenp, loff_t *ppos) | 845 | * /proc/sys/kernel/watchdog_thresh |
| 846 | */ | ||
| 847 | int proc_watchdog_thresh(struct ctl_table *table, int write, | ||
| 848 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 690 | { | 849 | { |
| 691 | int err, old_thresh, old_enabled; | 850 | int err, old; |
| 692 | bool old_hardlockup; | ||
| 693 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
| 694 | 851 | ||
| 695 | mutex_lock(&watchdog_proc_mutex); | 852 | mutex_lock(&watchdog_proc_mutex); |
| 696 | old_thresh = ACCESS_ONCE(watchdog_thresh); | ||
| 697 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); | ||
| 698 | old_hardlockup = watchdog_hardlockup_detector_is_enabled(); | ||
| 699 | 853 | ||
| 854 | old = ACCESS_ONCE(watchdog_thresh); | ||
| 700 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 855 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 856 | |||
| 701 | if (err || !write) | 857 | if (err || !write) |
| 702 | goto out; | 858 | goto out; |
| 703 | 859 | ||
| 704 | set_sample_period(); | ||
| 705 | /* | 860 | /* |
| 706 | * Watchdog threads shouldn't be enabled if they are | 861 | * Update the sample period. |
| 707 | * disabled. The 'watchdog_running' variable check in | 862 | * Restore 'watchdog_thresh' on failure. |
| 708 | * watchdog_*_all_cpus() function takes care of this. | ||
| 709 | */ | 863 | */ |
| 710 | if (watchdog_user_enabled && watchdog_thresh) { | 864 | set_sample_period(); |
| 711 | /* | 865 | err = proc_watchdog_update(); |
| 712 | * Prevent a change in watchdog_thresh accidentally overriding | 866 | if (err) |
| 713 | * the enablement of the hardlockup detector. | 867 | watchdog_thresh = old; |
| 714 | */ | ||
| 715 | if (watchdog_user_enabled != old_enabled) | ||
| 716 | watchdog_enable_hardlockup_detector(true); | ||
| 717 | err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); | ||
| 718 | } else | ||
| 719 | watchdog_disable_all_cpus(); | ||
| 720 | |||
| 721 | /* Restore old values on failure */ | ||
| 722 | if (err) { | ||
| 723 | watchdog_thresh = old_thresh; | ||
| 724 | watchdog_user_enabled = old_enabled; | ||
| 725 | watchdog_enable_hardlockup_detector(old_hardlockup); | ||
| 726 | } | ||
| 727 | out: | 868 | out: |
| 728 | mutex_unlock(&watchdog_proc_mutex); | 869 | mutex_unlock(&watchdog_proc_mutex); |
| 729 | return err; | 870 | return err; |
| @@ -734,6 +875,6 @@ void __init lockup_detector_init(void) | |||
| 734 | { | 875 | { |
| 735 | set_sample_period(); | 876 | set_sample_period(); |
| 736 | 877 | ||
| 737 | if (watchdog_user_enabled) | 878 | if (watchdog_enabled) |
| 738 | watchdog_enable_all_cpus(false); | 879 | watchdog_enable_all_cpus(); |
| 739 | } | 880 | } |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 93967e634a1e..17670573dda8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -1760,6 +1760,18 @@ config TEST_UDELAY | |||
| 1760 | 1760 | ||
| 1761 | If unsure, say N. | 1761 | If unsure, say N. |
| 1762 | 1762 | ||
| 1763 | config MEMTEST | ||
| 1764 | bool "Memtest" | ||
| 1765 | depends on HAVE_MEMBLOCK | ||
| 1766 | ---help--- | ||
| 1767 | This option adds a kernel parameter 'memtest', which allows memtest | ||
| 1768 | to be set. | ||
| 1769 | memtest=0, mean disabled; -- default | ||
| 1770 | memtest=1, mean do 1 test pattern; | ||
| 1771 | ... | ||
| 1772 | memtest=17, mean do 17 test patterns. | ||
| 1773 | If you are unsure how to answer this question, answer N. | ||
| 1774 | |||
| 1763 | source "samples/Kconfig" | 1775 | source "samples/Kconfig" |
| 1764 | 1776 | ||
| 1765 | source "lib/Kconfig.kgdb" | 1777 | source "lib/Kconfig.kgdb" |
diff --git a/lib/ioremap.c b/lib/ioremap.c index 0c9216c48762..86c8911b0e3a 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c | |||
| @@ -13,6 +13,43 @@ | |||
| 13 | #include <asm/cacheflush.h> | 13 | #include <asm/cacheflush.h> |
| 14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
| 15 | 15 | ||
| 16 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP | ||
| 17 | static int __read_mostly ioremap_pud_capable; | ||
| 18 | static int __read_mostly ioremap_pmd_capable; | ||
| 19 | static int __read_mostly ioremap_huge_disabled; | ||
| 20 | |||
| 21 | static int __init set_nohugeiomap(char *str) | ||
| 22 | { | ||
| 23 | ioremap_huge_disabled = 1; | ||
| 24 | return 0; | ||
| 25 | } | ||
| 26 | early_param("nohugeiomap", set_nohugeiomap); | ||
| 27 | |||
| 28 | void __init ioremap_huge_init(void) | ||
| 29 | { | ||
| 30 | if (!ioremap_huge_disabled) { | ||
| 31 | if (arch_ioremap_pud_supported()) | ||
| 32 | ioremap_pud_capable = 1; | ||
| 33 | if (arch_ioremap_pmd_supported()) | ||
| 34 | ioremap_pmd_capable = 1; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | static inline int ioremap_pud_enabled(void) | ||
| 39 | { | ||
| 40 | return ioremap_pud_capable; | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline int ioremap_pmd_enabled(void) | ||
| 44 | { | ||
| 45 | return ioremap_pmd_capable; | ||
| 46 | } | ||
| 47 | |||
| 48 | #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
| 49 | static inline int ioremap_pud_enabled(void) { return 0; } | ||
| 50 | static inline int ioremap_pmd_enabled(void) { return 0; } | ||
| 51 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ | ||
| 52 | |||
| 16 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, | 53 | static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, |
| 17 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) | 54 | unsigned long end, phys_addr_t phys_addr, pgprot_t prot) |
| 18 | { | 55 | { |
| @@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, | |||
| 43 | return -ENOMEM; | 80 | return -ENOMEM; |
| 44 | do { | 81 | do { |
| 45 | next = pmd_addr_end(addr, end); | 82 | next = pmd_addr_end(addr, end); |
| 83 | |||
| 84 | if (ioremap_pmd_enabled() && | ||
| 85 | ((next - addr) == PMD_SIZE) && | ||
| 86 | IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { | ||
| 87 | if (pmd_set_huge(pmd, phys_addr + addr, prot)) | ||
| 88 | continue; | ||
| 89 | } | ||
| 90 | |||
| 46 | if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) | 91 | if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) |
| 47 | return -ENOMEM; | 92 | return -ENOMEM; |
| 48 | } while (pmd++, addr = next, addr != end); | 93 | } while (pmd++, addr = next, addr != end); |
| @@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, | |||
| 61 | return -ENOMEM; | 106 | return -ENOMEM; |
| 62 | do { | 107 | do { |
| 63 | next = pud_addr_end(addr, end); | 108 | next = pud_addr_end(addr, end); |
| 109 | |||
| 110 | if (ioremap_pud_enabled() && | ||
| 111 | ((next - addr) == PUD_SIZE) && | ||
| 112 | IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { | ||
| 113 | if (pud_set_huge(pud, phys_addr + addr, prot)) | ||
| 114 | continue; | ||
| 115 | } | ||
| 116 | |||
| 64 | if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) | 117 | if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) |
| 65 | return -ENOMEM; | 118 | return -ENOMEM; |
| 66 | } while (pud++, addr = next, addr != end); | 119 | } while (pud++, addr = next, addr != end); |
diff --git a/mm/Kconfig b/mm/Kconfig index a03131b6ba8e..390214da4546 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -517,6 +517,12 @@ config CMA_DEBUG | |||
| 517 | processing calls such as dma_alloc_from_contiguous(). | 517 | processing calls such as dma_alloc_from_contiguous(). |
| 518 | This option does not affect warning and error messages. | 518 | This option does not affect warning and error messages. |
| 519 | 519 | ||
| 520 | config CMA_DEBUGFS | ||
| 521 | bool "CMA debugfs interface" | ||
| 522 | depends on CMA && DEBUG_FS | ||
| 523 | help | ||
| 524 | Turns on the DebugFS interface for CMA. | ||
| 525 | |||
| 520 | config CMA_AREAS | 526 | config CMA_AREAS |
| 521 | int "Maximum count of the CMA areas" | 527 | int "Maximum count of the CMA areas" |
| 522 | depends on CMA | 528 | depends on CMA |
diff --git a/mm/Makefile b/mm/Makefile index 15dbe9903c27..98c4eaeabdcb 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -55,6 +55,7 @@ obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | |||
| 55 | obj-$(CONFIG_KASAN) += kasan/ | 55 | obj-$(CONFIG_KASAN) += kasan/ |
| 56 | obj-$(CONFIG_FAILSLAB) += failslab.o | 56 | obj-$(CONFIG_FAILSLAB) += failslab.o |
| 57 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 57 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
| 58 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
| 58 | obj-$(CONFIG_MIGRATION) += migrate.o | 59 | obj-$(CONFIG_MIGRATION) += migrate.o |
| 59 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 60 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
| 60 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o | 61 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o |
| @@ -76,3 +77,4 @@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o | |||
| 76 | obj-$(CONFIG_CMA) += cma.o | 77 | obj-$(CONFIG_CMA) += cma.o |
| 77 | obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o | 78 | obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o |
| 78 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o | 79 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o |
| 80 | obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c index 053bcd8f12fb..8fc50811119b 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include <linux/cleancache.h> | 19 | #include <linux/cleancache.h> |
| 20 | 20 | ||
| 21 | /* | 21 | /* |
| 22 | * cleancache_ops is set by cleancache_ops_register to contain the pointers | 22 | * cleancache_ops is set by cleancache_register_ops to contain the pointers |
| 23 | * to the cleancache "backend" implementation functions. | 23 | * to the cleancache "backend" implementation functions. |
| 24 | */ | 24 | */ |
| 25 | static struct cleancache_ops *cleancache_ops __read_mostly; | 25 | static struct cleancache_ops *cleancache_ops __read_mostly; |
| @@ -34,145 +34,107 @@ static u64 cleancache_failed_gets; | |||
| 34 | static u64 cleancache_puts; | 34 | static u64 cleancache_puts; |
| 35 | static u64 cleancache_invalidates; | 35 | static u64 cleancache_invalidates; |
| 36 | 36 | ||
| 37 | /* | 37 | static void cleancache_register_ops_sb(struct super_block *sb, void *unused) |
| 38 | * When no backend is registered all calls to init_fs and init_shared_fs | 38 | { |
| 39 | * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or | 39 | switch (sb->cleancache_poolid) { |
| 40 | * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array | 40 | case CLEANCACHE_NO_BACKEND: |
| 41 | * [shared_|]fs_poolid_map) are given to the respective super block | 41 | __cleancache_init_fs(sb); |
| 42 | * (sb->cleancache_poolid) and no tmem_pools are created. When a backend | 42 | break; |
| 43 | * registers with cleancache the previous calls to init_fs and init_shared_fs | 43 | case CLEANCACHE_NO_BACKEND_SHARED: |
| 44 | * are executed to create tmem_pools and set the respective poolids. While no | 44 | __cleancache_init_shared_fs(sb); |
| 45 | * backend is registered all "puts", "gets" and "flushes" are ignored or failed. | 45 | break; |
| 46 | */ | 46 | } |
| 47 | #define MAX_INITIALIZABLE_FS 32 | 47 | } |
| 48 | #define FAKE_FS_POOLID_OFFSET 1000 | ||
| 49 | #define FAKE_SHARED_FS_POOLID_OFFSET 2000 | ||
| 50 | |||
| 51 | #define FS_NO_BACKEND (-1) | ||
| 52 | #define FS_UNKNOWN (-2) | ||
| 53 | static int fs_poolid_map[MAX_INITIALIZABLE_FS]; | ||
| 54 | static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS]; | ||
| 55 | static char *uuids[MAX_INITIALIZABLE_FS]; | ||
| 56 | /* | ||
| 57 | * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads | ||
| 58 | * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple | ||
| 59 | * threads calling mount (and ending up in __cleancache_init_[shared|]fs). | ||
| 60 | */ | ||
| 61 | static DEFINE_MUTEX(poolid_mutex); | ||
| 62 | /* | ||
| 63 | * When set to false (default) all calls to the cleancache functions, except | ||
| 64 | * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded | ||
| 65 | * by the if (!cleancache_ops) return. This means multiple threads (from | ||
| 66 | * different filesystems) will be checking cleancache_ops. The usage of a | ||
| 67 | * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are | ||
| 68 | * OK if the time between the backend's have been initialized (and | ||
| 69 | * cleancache_ops has been set to not NULL) and when the filesystems start | ||
| 70 | * actually calling the backends. The inverse (when unloading) is obviously | ||
| 71 | * not good - but this shim does not do that (yet). | ||
| 72 | */ | ||
| 73 | |||
| 74 | /* | ||
| 75 | * The backends and filesystems work all asynchronously. This is b/c the | ||
| 76 | * backends can be built as modules. | ||
| 77 | * The usual sequence of events is: | ||
| 78 | * a) mount / -> __cleancache_init_fs is called. We set the | ||
| 79 | * [shared_|]fs_poolid_map and uuids for. | ||
| 80 | * | ||
| 81 | * b). user does I/Os -> we call the rest of __cleancache_* functions | ||
| 82 | * which return immediately as cleancache_ops is false. | ||
| 83 | * | ||
| 84 | * c). modprobe zcache -> cleancache_register_ops. We init the backend | ||
| 85 | * and set cleancache_ops to true, and for any fs_poolid_map | ||
| 86 | * (which is set by __cleancache_init_fs) we initialize the poolid. | ||
| 87 | * | ||
| 88 | * d). user does I/Os -> now that cleancache_ops is true all the | ||
| 89 | * __cleancache_* functions can call the backend. They all check | ||
| 90 | * that fs_poolid_map is valid and if so invoke the backend. | ||
| 91 | * | ||
| 92 | * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is | ||
| 93 | * reset (which is the second check in the __cleancache_* ops | ||
| 94 | * to call the backend). | ||
| 95 | * | ||
| 96 | * The sequence of event could also be c), followed by a), and d). and e). The | ||
| 97 | * c) would not happen anymore. There is also the chance of c), and one thread | ||
| 98 | * doing a) + d), and another doing e). For that case we depend on the | ||
| 99 | * filesystem calling __cleancache_invalidate_fs in the proper sequence (so | ||
| 100 | * that it handles all I/Os before it invalidates the fs (which is last part | ||
| 101 | * of unmounting process). | ||
| 102 | * | ||
| 103 | * Note: The acute reader will notice that there is no "rmmod zcache" case. | ||
| 104 | * This is b/c the functionality for that is not yet implemented and when | ||
| 105 | * done, will require some extra locking not yet devised. | ||
| 106 | */ | ||
| 107 | 48 | ||
| 108 | /* | 49 | /* |
| 109 | * Register operations for cleancache, returning previous thus allowing | 50 | * Register operations for cleancache. Returns 0 on success. |
| 110 | * detection of multiple backends and possible nesting. | ||
| 111 | */ | 51 | */ |
| 112 | struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) | 52 | int cleancache_register_ops(struct cleancache_ops *ops) |
| 113 | { | 53 | { |
| 114 | struct cleancache_ops *old = cleancache_ops; | 54 | if (cmpxchg(&cleancache_ops, NULL, ops)) |
| 115 | int i; | 55 | return -EBUSY; |
| 116 | 56 | ||
| 117 | mutex_lock(&poolid_mutex); | ||
| 118 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | ||
| 119 | if (fs_poolid_map[i] == FS_NO_BACKEND) | ||
| 120 | fs_poolid_map[i] = ops->init_fs(PAGE_SIZE); | ||
| 121 | if (shared_fs_poolid_map[i] == FS_NO_BACKEND) | ||
| 122 | shared_fs_poolid_map[i] = ops->init_shared_fs | ||
| 123 | (uuids[i], PAGE_SIZE); | ||
| 124 | } | ||
| 125 | /* | 57 | /* |
| 126 | * We MUST set cleancache_ops _after_ we have called the backends | 58 | * A cleancache backend can be built as a module and hence loaded after |
| 127 | * init_fs or init_shared_fs functions. Otherwise the compiler might | 59 | * a cleancache enabled filesystem has called cleancache_init_fs. To |
| 128 | * re-order where cleancache_ops is set in this function. | 60 | * handle such a scenario, here we call ->init_fs or ->init_shared_fs |
| 61 | * for each active super block. To differentiate between local and | ||
| 62 | * shared filesystems, we temporarily initialize sb->cleancache_poolid | ||
| 63 | * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED | ||
| 64 | * respectively in case there is no backend registered at the time | ||
| 65 | * cleancache_init_fs or cleancache_init_shared_fs is called. | ||
| 66 | * | ||
| 67 | * Since filesystems can be mounted concurrently with cleancache | ||
| 68 | * backend registration, we have to be careful to guarantee that all | ||
| 69 | * cleancache enabled filesystems that has been mounted by the time | ||
| 70 | * cleancache_register_ops is called has got and all mounted later will | ||
| 71 | * get cleancache_poolid. This is assured by the following statements | ||
| 72 | * tied together: | ||
| 73 | * | ||
| 74 | * a) iterate_supers skips only those super blocks that has started | ||
| 75 | * ->kill_sb | ||
| 76 | * | ||
| 77 | * b) if iterate_supers encounters a super block that has not finished | ||
| 78 | * ->mount yet, it waits until it is finished | ||
| 79 | * | ||
| 80 | * c) cleancache_init_fs is called from ->mount and | ||
| 81 | * cleancache_invalidate_fs is called from ->kill_sb | ||
| 82 | * | ||
| 83 | * d) we call iterate_supers after cleancache_ops has been set | ||
| 84 | * | ||
| 85 | * From a) it follows that if iterate_supers skips a super block, then | ||
| 86 | * either the super block is already dead, in which case we do not need | ||
| 87 | * to bother initializing cleancache for it, or it was mounted after we | ||
| 88 | * initiated iterate_supers. In the latter case, it must have seen | ||
| 89 | * cleancache_ops set according to d) and initialized cleancache from | ||
| 90 | * ->mount by itself according to c). This proves that we call | ||
| 91 | * ->init_fs at least once for each active super block. | ||
| 92 | * | ||
| 93 | * From b) and c) it follows that if iterate_supers encounters a super | ||
| 94 | * block that has already started ->init_fs, it will wait until ->mount | ||
| 95 | * and hence ->init_fs has finished, then check cleancache_poolid, see | ||
| 96 | * that it has already been set and therefore do nothing. This proves | ||
| 97 | * that we call ->init_fs no more than once for each super block. | ||
| 98 | * | ||
| 99 | * Combined together, the last two paragraphs prove the function | ||
| 100 | * correctness. | ||
| 101 | * | ||
| 102 | * Note that various cleancache callbacks may proceed before this | ||
| 103 | * function is called or even concurrently with it, but since | ||
| 104 | * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop | ||
| 105 | * until the corresponding ->init_fs has been actually called and | ||
| 106 | * cleancache_ops has been set. | ||
| 129 | */ | 107 | */ |
| 130 | barrier(); | 108 | iterate_supers(cleancache_register_ops_sb, NULL); |
| 131 | cleancache_ops = ops; | 109 | return 0; |
| 132 | mutex_unlock(&poolid_mutex); | ||
| 133 | return old; | ||
| 134 | } | 110 | } |
| 135 | EXPORT_SYMBOL(cleancache_register_ops); | 111 | EXPORT_SYMBOL(cleancache_register_ops); |
| 136 | 112 | ||
| 137 | /* Called by a cleancache-enabled filesystem at time of mount */ | 113 | /* Called by a cleancache-enabled filesystem at time of mount */ |
| 138 | void __cleancache_init_fs(struct super_block *sb) | 114 | void __cleancache_init_fs(struct super_block *sb) |
| 139 | { | 115 | { |
| 140 | int i; | 116 | int pool_id = CLEANCACHE_NO_BACKEND; |
| 141 | 117 | ||
| 142 | mutex_lock(&poolid_mutex); | 118 | if (cleancache_ops) { |
| 143 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | 119 | pool_id = cleancache_ops->init_fs(PAGE_SIZE); |
| 144 | if (fs_poolid_map[i] == FS_UNKNOWN) { | 120 | if (pool_id < 0) |
| 145 | sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; | 121 | pool_id = CLEANCACHE_NO_POOL; |
| 146 | if (cleancache_ops) | ||
| 147 | fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE); | ||
| 148 | else | ||
| 149 | fs_poolid_map[i] = FS_NO_BACKEND; | ||
| 150 | break; | ||
| 151 | } | ||
| 152 | } | 122 | } |
| 153 | mutex_unlock(&poolid_mutex); | 123 | sb->cleancache_poolid = pool_id; |
| 154 | } | 124 | } |
| 155 | EXPORT_SYMBOL(__cleancache_init_fs); | 125 | EXPORT_SYMBOL(__cleancache_init_fs); |
| 156 | 126 | ||
| 157 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ | 127 | /* Called by a cleancache-enabled clustered filesystem at time of mount */ |
| 158 | void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) | 128 | void __cleancache_init_shared_fs(struct super_block *sb) |
| 159 | { | 129 | { |
| 160 | int i; | 130 | int pool_id = CLEANCACHE_NO_BACKEND_SHARED; |
| 161 | 131 | ||
| 162 | mutex_lock(&poolid_mutex); | 132 | if (cleancache_ops) { |
| 163 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | 133 | pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE); |
| 164 | if (shared_fs_poolid_map[i] == FS_UNKNOWN) { | 134 | if (pool_id < 0) |
| 165 | sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; | 135 | pool_id = CLEANCACHE_NO_POOL; |
| 166 | uuids[i] = uuid; | ||
| 167 | if (cleancache_ops) | ||
| 168 | shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs | ||
| 169 | (uuid, PAGE_SIZE); | ||
| 170 | else | ||
| 171 | shared_fs_poolid_map[i] = FS_NO_BACKEND; | ||
| 172 | break; | ||
| 173 | } | ||
| 174 | } | 136 | } |
| 175 | mutex_unlock(&poolid_mutex); | 137 | sb->cleancache_poolid = pool_id; |
| 176 | } | 138 | } |
| 177 | EXPORT_SYMBOL(__cleancache_init_shared_fs); | 139 | EXPORT_SYMBOL(__cleancache_init_shared_fs); |
| 178 | 140 | ||
| @@ -202,19 +164,6 @@ static int cleancache_get_key(struct inode *inode, | |||
| 202 | } | 164 | } |
| 203 | 165 | ||
| 204 | /* | 166 | /* |
| 205 | * Returns a pool_id that is associated with a given fake poolid. | ||
| 206 | */ | ||
| 207 | static int get_poolid_from_fake(int fake_pool_id) | ||
| 208 | { | ||
| 209 | if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) | ||
| 210 | return shared_fs_poolid_map[fake_pool_id - | ||
| 211 | FAKE_SHARED_FS_POOLID_OFFSET]; | ||
| 212 | else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) | ||
| 213 | return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET]; | ||
| 214 | return FS_NO_BACKEND; | ||
| 215 | } | ||
| 216 | |||
| 217 | /* | ||
| 218 | * "Get" data from cleancache associated with the poolid/inode/index | 167 | * "Get" data from cleancache associated with the poolid/inode/index |
| 219 | * that were specified when the data was put to cleanache and, if | 168 | * that were specified when the data was put to cleanache and, if |
| 220 | * successful, use it to fill the specified page with data and return 0. | 169 | * successful, use it to fill the specified page with data and return 0. |
| @@ -229,7 +178,6 @@ int __cleancache_get_page(struct page *page) | |||
| 229 | { | 178 | { |
| 230 | int ret = -1; | 179 | int ret = -1; |
| 231 | int pool_id; | 180 | int pool_id; |
| 232 | int fake_pool_id; | ||
| 233 | struct cleancache_filekey key = { .u.key = { 0 } }; | 181 | struct cleancache_filekey key = { .u.key = { 0 } }; |
| 234 | 182 | ||
| 235 | if (!cleancache_ops) { | 183 | if (!cleancache_ops) { |
| @@ -238,17 +186,14 @@ int __cleancache_get_page(struct page *page) | |||
| 238 | } | 186 | } |
| 239 | 187 | ||
| 240 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 188 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
| 241 | fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; | 189 | pool_id = page->mapping->host->i_sb->cleancache_poolid; |
| 242 | if (fake_pool_id < 0) | 190 | if (pool_id < 0) |
| 243 | goto out; | 191 | goto out; |
| 244 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
| 245 | 192 | ||
| 246 | if (cleancache_get_key(page->mapping->host, &key) < 0) | 193 | if (cleancache_get_key(page->mapping->host, &key) < 0) |
| 247 | goto out; | 194 | goto out; |
| 248 | 195 | ||
| 249 | if (pool_id >= 0) | 196 | ret = cleancache_ops->get_page(pool_id, key, page->index, page); |
| 250 | ret = cleancache_ops->get_page(pool_id, | ||
| 251 | key, page->index, page); | ||
| 252 | if (ret == 0) | 197 | if (ret == 0) |
| 253 | cleancache_succ_gets++; | 198 | cleancache_succ_gets++; |
| 254 | else | 199 | else |
| @@ -271,7 +216,6 @@ EXPORT_SYMBOL(__cleancache_get_page); | |||
| 271 | void __cleancache_put_page(struct page *page) | 216 | void __cleancache_put_page(struct page *page) |
| 272 | { | 217 | { |
| 273 | int pool_id; | 218 | int pool_id; |
| 274 | int fake_pool_id; | ||
| 275 | struct cleancache_filekey key = { .u.key = { 0 } }; | 219 | struct cleancache_filekey key = { .u.key = { 0 } }; |
| 276 | 220 | ||
| 277 | if (!cleancache_ops) { | 221 | if (!cleancache_ops) { |
| @@ -280,12 +224,7 @@ void __cleancache_put_page(struct page *page) | |||
| 280 | } | 224 | } |
| 281 | 225 | ||
| 282 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 226 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
| 283 | fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; | 227 | pool_id = page->mapping->host->i_sb->cleancache_poolid; |
| 284 | if (fake_pool_id < 0) | ||
| 285 | return; | ||
| 286 | |||
| 287 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
| 288 | |||
| 289 | if (pool_id >= 0 && | 228 | if (pool_id >= 0 && |
| 290 | cleancache_get_key(page->mapping->host, &key) >= 0) { | 229 | cleancache_get_key(page->mapping->host, &key) >= 0) { |
| 291 | cleancache_ops->put_page(pool_id, key, page->index, page); | 230 | cleancache_ops->put_page(pool_id, key, page->index, page); |
| @@ -306,18 +245,13 @@ void __cleancache_invalidate_page(struct address_space *mapping, | |||
| 306 | struct page *page) | 245 | struct page *page) |
| 307 | { | 246 | { |
| 308 | /* careful... page->mapping is NULL sometimes when this is called */ | 247 | /* careful... page->mapping is NULL sometimes when this is called */ |
| 309 | int pool_id; | 248 | int pool_id = mapping->host->i_sb->cleancache_poolid; |
| 310 | int fake_pool_id = mapping->host->i_sb->cleancache_poolid; | ||
| 311 | struct cleancache_filekey key = { .u.key = { 0 } }; | 249 | struct cleancache_filekey key = { .u.key = { 0 } }; |
| 312 | 250 | ||
| 313 | if (!cleancache_ops) | 251 | if (!cleancache_ops) |
| 314 | return; | 252 | return; |
| 315 | 253 | ||
| 316 | if (fake_pool_id >= 0) { | 254 | if (pool_id >= 0) { |
| 317 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
| 318 | if (pool_id < 0) | ||
| 319 | return; | ||
| 320 | |||
| 321 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 255 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
| 322 | if (cleancache_get_key(mapping->host, &key) >= 0) { | 256 | if (cleancache_get_key(mapping->host, &key) >= 0) { |
| 323 | cleancache_ops->invalidate_page(pool_id, | 257 | cleancache_ops->invalidate_page(pool_id, |
| @@ -339,18 +273,12 @@ EXPORT_SYMBOL(__cleancache_invalidate_page); | |||
| 339 | */ | 273 | */ |
| 340 | void __cleancache_invalidate_inode(struct address_space *mapping) | 274 | void __cleancache_invalidate_inode(struct address_space *mapping) |
| 341 | { | 275 | { |
| 342 | int pool_id; | 276 | int pool_id = mapping->host->i_sb->cleancache_poolid; |
| 343 | int fake_pool_id = mapping->host->i_sb->cleancache_poolid; | ||
| 344 | struct cleancache_filekey key = { .u.key = { 0 } }; | 277 | struct cleancache_filekey key = { .u.key = { 0 } }; |
| 345 | 278 | ||
| 346 | if (!cleancache_ops) | 279 | if (!cleancache_ops) |
| 347 | return; | 280 | return; |
| 348 | 281 | ||
| 349 | if (fake_pool_id < 0) | ||
| 350 | return; | ||
| 351 | |||
| 352 | pool_id = get_poolid_from_fake(fake_pool_id); | ||
| 353 | |||
| 354 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) | 282 | if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) |
| 355 | cleancache_ops->invalidate_inode(pool_id, key); | 283 | cleancache_ops->invalidate_inode(pool_id, key); |
| 356 | } | 284 | } |
| @@ -363,32 +291,18 @@ EXPORT_SYMBOL(__cleancache_invalidate_inode); | |||
| 363 | */ | 291 | */ |
| 364 | void __cleancache_invalidate_fs(struct super_block *sb) | 292 | void __cleancache_invalidate_fs(struct super_block *sb) |
| 365 | { | 293 | { |
| 366 | int index; | 294 | int pool_id; |
| 367 | int fake_pool_id = sb->cleancache_poolid; | ||
| 368 | int old_poolid = fake_pool_id; | ||
| 369 | 295 | ||
| 370 | mutex_lock(&poolid_mutex); | 296 | pool_id = sb->cleancache_poolid; |
| 371 | if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { | 297 | sb->cleancache_poolid = CLEANCACHE_NO_POOL; |
| 372 | index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; | 298 | |
| 373 | old_poolid = shared_fs_poolid_map[index]; | 299 | if (cleancache_ops && pool_id >= 0) |
| 374 | shared_fs_poolid_map[index] = FS_UNKNOWN; | 300 | cleancache_ops->invalidate_fs(pool_id); |
| 375 | uuids[index] = NULL; | ||
| 376 | } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) { | ||
| 377 | index = fake_pool_id - FAKE_FS_POOLID_OFFSET; | ||
| 378 | old_poolid = fs_poolid_map[index]; | ||
| 379 | fs_poolid_map[index] = FS_UNKNOWN; | ||
| 380 | } | ||
| 381 | sb->cleancache_poolid = -1; | ||
| 382 | if (cleancache_ops) | ||
| 383 | cleancache_ops->invalidate_fs(old_poolid); | ||
| 384 | mutex_unlock(&poolid_mutex); | ||
| 385 | } | 301 | } |
| 386 | EXPORT_SYMBOL(__cleancache_invalidate_fs); | 302 | EXPORT_SYMBOL(__cleancache_invalidate_fs); |
| 387 | 303 | ||
| 388 | static int __init init_cleancache(void) | 304 | static int __init init_cleancache(void) |
| 389 | { | 305 | { |
| 390 | int i; | ||
| 391 | |||
| 392 | #ifdef CONFIG_DEBUG_FS | 306 | #ifdef CONFIG_DEBUG_FS |
| 393 | struct dentry *root = debugfs_create_dir("cleancache", NULL); | 307 | struct dentry *root = debugfs_create_dir("cleancache", NULL); |
| 394 | if (root == NULL) | 308 | if (root == NULL) |
| @@ -400,10 +314,6 @@ static int __init init_cleancache(void) | |||
| 400 | debugfs_create_u64("invalidates", S_IRUGO, | 314 | debugfs_create_u64("invalidates", S_IRUGO, |
| 401 | root, &cleancache_invalidates); | 315 | root, &cleancache_invalidates); |
| 402 | #endif | 316 | #endif |
| 403 | for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { | ||
| 404 | fs_poolid_map[i] = FS_UNKNOWN; | ||
| 405 | shared_fs_poolid_map[i] = FS_UNKNOWN; | ||
| 406 | } | ||
| 407 | return 0; | 317 | return 0; |
| 408 | } | 318 | } |
| 409 | module_init(init_cleancache) | 319 | module_init(init_cleancache) |
| @@ -35,29 +35,24 @@ | |||
| 35 | #include <linux/highmem.h> | 35 | #include <linux/highmem.h> |
| 36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
| 37 | 37 | ||
| 38 | struct cma { | 38 | #include "cma.h" |
| 39 | unsigned long base_pfn; | 39 | |
| 40 | unsigned long count; | 40 | struct cma cma_areas[MAX_CMA_AREAS]; |
| 41 | unsigned long *bitmap; | 41 | unsigned cma_area_count; |
| 42 | unsigned int order_per_bit; /* Order of pages represented by one bit */ | ||
| 43 | struct mutex lock; | ||
| 44 | }; | ||
| 45 | |||
| 46 | static struct cma cma_areas[MAX_CMA_AREAS]; | ||
| 47 | static unsigned cma_area_count; | ||
| 48 | static DEFINE_MUTEX(cma_mutex); | 42 | static DEFINE_MUTEX(cma_mutex); |
| 49 | 43 | ||
| 50 | phys_addr_t cma_get_base(struct cma *cma) | 44 | phys_addr_t cma_get_base(const struct cma *cma) |
| 51 | { | 45 | { |
| 52 | return PFN_PHYS(cma->base_pfn); | 46 | return PFN_PHYS(cma->base_pfn); |
| 53 | } | 47 | } |
| 54 | 48 | ||
| 55 | unsigned long cma_get_size(struct cma *cma) | 49 | unsigned long cma_get_size(const struct cma *cma) |
| 56 | { | 50 | { |
| 57 | return cma->count << PAGE_SHIFT; | 51 | return cma->count << PAGE_SHIFT; |
| 58 | } | 52 | } |
| 59 | 53 | ||
| 60 | static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) | 54 | static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, |
| 55 | int align_order) | ||
| 61 | { | 56 | { |
| 62 | if (align_order <= cma->order_per_bit) | 57 | if (align_order <= cma->order_per_bit) |
| 63 | return 0; | 58 | return 0; |
| @@ -68,7 +63,8 @@ static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) | |||
| 68 | * Find a PFN aligned to the specified order and return an offset represented in | 63 | * Find a PFN aligned to the specified order and return an offset represented in |
| 69 | * order_per_bits. | 64 | * order_per_bits. |
| 70 | */ | 65 | */ |
| 71 | static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order) | 66 | static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, |
| 67 | int align_order) | ||
| 72 | { | 68 | { |
| 73 | if (align_order <= cma->order_per_bit) | 69 | if (align_order <= cma->order_per_bit) |
| 74 | return 0; | 70 | return 0; |
| @@ -77,18 +73,14 @@ static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order) | |||
| 77 | - cma->base_pfn) >> cma->order_per_bit; | 73 | - cma->base_pfn) >> cma->order_per_bit; |
| 78 | } | 74 | } |
| 79 | 75 | ||
| 80 | static unsigned long cma_bitmap_maxno(struct cma *cma) | 76 | static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, |
| 81 | { | 77 | unsigned long pages) |
| 82 | return cma->count >> cma->order_per_bit; | ||
| 83 | } | ||
| 84 | |||
| 85 | static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, | ||
| 86 | unsigned long pages) | ||
| 87 | { | 78 | { |
| 88 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; | 79 | return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; |
| 89 | } | 80 | } |
| 90 | 81 | ||
| 91 | static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) | 82 | static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, |
| 83 | unsigned int count) | ||
| 92 | { | 84 | { |
| 93 | unsigned long bitmap_no, bitmap_count; | 85 | unsigned long bitmap_no, bitmap_count; |
| 94 | 86 | ||
| @@ -134,6 +126,12 @@ static int __init cma_activate_area(struct cma *cma) | |||
| 134 | } while (--i); | 126 | } while (--i); |
| 135 | 127 | ||
| 136 | mutex_init(&cma->lock); | 128 | mutex_init(&cma->lock); |
| 129 | |||
| 130 | #ifdef CONFIG_CMA_DEBUGFS | ||
| 131 | INIT_HLIST_HEAD(&cma->mem_head); | ||
| 132 | spin_lock_init(&cma->mem_head_lock); | ||
| 133 | #endif | ||
| 134 | |||
| 137 | return 0; | 135 | return 0; |
| 138 | 136 | ||
| 139 | err: | 137 | err: |
| @@ -167,7 +165,8 @@ core_initcall(cma_init_reserved_areas); | |||
| 167 | * This function creates custom contiguous area from already reserved memory. | 165 | * This function creates custom contiguous area from already reserved memory. |
| 168 | */ | 166 | */ |
| 169 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, | 167 | int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, |
| 170 | int order_per_bit, struct cma **res_cma) | 168 | unsigned int order_per_bit, |
| 169 | struct cma **res_cma) | ||
| 171 | { | 170 | { |
| 172 | struct cma *cma; | 171 | struct cma *cma; |
| 173 | phys_addr_t alignment; | 172 | phys_addr_t alignment; |
| @@ -358,7 +357,7 @@ err: | |||
| 358 | * This function allocates part of contiguous memory on specific | 357 | * This function allocates part of contiguous memory on specific |
| 359 | * contiguous memory area. | 358 | * contiguous memory area. |
| 360 | */ | 359 | */ |
| 361 | struct page *cma_alloc(struct cma *cma, int count, unsigned int align) | 360 | struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align) |
| 362 | { | 361 | { |
| 363 | unsigned long mask, offset, pfn, start = 0; | 362 | unsigned long mask, offset, pfn, start = 0; |
| 364 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; | 363 | unsigned long bitmap_maxno, bitmap_no, bitmap_count; |
| @@ -429,7 +428,7 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align) | |||
| 429 | * It returns false when provided pages do not belong to contiguous area and | 428 | * It returns false when provided pages do not belong to contiguous area and |
| 430 | * true otherwise. | 429 | * true otherwise. |
| 431 | */ | 430 | */ |
| 432 | bool cma_release(struct cma *cma, struct page *pages, int count) | 431 | bool cma_release(struct cma *cma, const struct page *pages, unsigned int count) |
| 433 | { | 432 | { |
| 434 | unsigned long pfn; | 433 | unsigned long pfn; |
| 435 | 434 | ||
diff --git a/mm/cma.h b/mm/cma.h new file mode 100644 index 000000000000..1132d733556d --- /dev/null +++ b/mm/cma.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | #ifndef __MM_CMA_H__ | ||
| 2 | #define __MM_CMA_H__ | ||
| 3 | |||
| 4 | struct cma { | ||
| 5 | unsigned long base_pfn; | ||
| 6 | unsigned long count; | ||
| 7 | unsigned long *bitmap; | ||
| 8 | unsigned int order_per_bit; /* Order of pages represented by one bit */ | ||
| 9 | struct mutex lock; | ||
| 10 | #ifdef CONFIG_CMA_DEBUGFS | ||
| 11 | struct hlist_head mem_head; | ||
| 12 | spinlock_t mem_head_lock; | ||
| 13 | #endif | ||
| 14 | }; | ||
| 15 | |||
| 16 | extern struct cma cma_areas[MAX_CMA_AREAS]; | ||
| 17 | extern unsigned cma_area_count; | ||
| 18 | |||
| 19 | static unsigned long cma_bitmap_maxno(struct cma *cma) | ||
| 20 | { | ||
| 21 | return cma->count >> cma->order_per_bit; | ||
| 22 | } | ||
| 23 | |||
| 24 | #endif | ||
diff --git a/mm/cma_debug.c b/mm/cma_debug.c new file mode 100644 index 000000000000..0b377536ccde --- /dev/null +++ b/mm/cma_debug.c | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | /* | ||
| 2 | * CMA DebugFS Interface | ||
| 3 | * | ||
| 4 | * Copyright (c) 2015 Sasha Levin <sasha.levin@oracle.com> | ||
| 5 | */ | ||
| 6 | |||
| 7 | |||
| 8 | #include <linux/debugfs.h> | ||
| 9 | #include <linux/cma.h> | ||
| 10 | #include <linux/list.h> | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/slab.h> | ||
| 13 | #include <linux/mm_types.h> | ||
| 14 | |||
| 15 | #include "cma.h" | ||
| 16 | |||
| 17 | struct cma_mem { | ||
| 18 | struct hlist_node node; | ||
| 19 | struct page *p; | ||
| 20 | unsigned long n; | ||
| 21 | }; | ||
| 22 | |||
| 23 | static struct dentry *cma_debugfs_root; | ||
| 24 | |||
| 25 | static int cma_debugfs_get(void *data, u64 *val) | ||
| 26 | { | ||
| 27 | unsigned long *p = data; | ||
| 28 | |||
| 29 | *val = *p; | ||
| 30 | |||
| 31 | return 0; | ||
| 32 | } | ||
| 33 | |||
| 34 | DEFINE_SIMPLE_ATTRIBUTE(cma_debugfs_fops, cma_debugfs_get, NULL, "%llu\n"); | ||
| 35 | |||
| 36 | static void cma_add_to_cma_mem_list(struct cma *cma, struct cma_mem *mem) | ||
| 37 | { | ||
| 38 | spin_lock(&cma->mem_head_lock); | ||
| 39 | hlist_add_head(&mem->node, &cma->mem_head); | ||
| 40 | spin_unlock(&cma->mem_head_lock); | ||
| 41 | } | ||
| 42 | |||
| 43 | static struct cma_mem *cma_get_entry_from_list(struct cma *cma) | ||
| 44 | { | ||
| 45 | struct cma_mem *mem = NULL; | ||
| 46 | |||
| 47 | spin_lock(&cma->mem_head_lock); | ||
| 48 | if (!hlist_empty(&cma->mem_head)) { | ||
| 49 | mem = hlist_entry(cma->mem_head.first, struct cma_mem, node); | ||
| 50 | hlist_del_init(&mem->node); | ||
| 51 | } | ||
| 52 | spin_unlock(&cma->mem_head_lock); | ||
| 53 | |||
| 54 | return mem; | ||
| 55 | } | ||
| 56 | |||
| 57 | static int cma_free_mem(struct cma *cma, int count) | ||
| 58 | { | ||
| 59 | struct cma_mem *mem = NULL; | ||
| 60 | |||
| 61 | while (count) { | ||
| 62 | mem = cma_get_entry_from_list(cma); | ||
| 63 | if (mem == NULL) | ||
| 64 | return 0; | ||
| 65 | |||
| 66 | if (mem->n <= count) { | ||
| 67 | cma_release(cma, mem->p, mem->n); | ||
| 68 | count -= mem->n; | ||
| 69 | kfree(mem); | ||
| 70 | } else if (cma->order_per_bit == 0) { | ||
| 71 | cma_release(cma, mem->p, count); | ||
| 72 | mem->p += count; | ||
| 73 | mem->n -= count; | ||
| 74 | count = 0; | ||
| 75 | cma_add_to_cma_mem_list(cma, mem); | ||
| 76 | } else { | ||
| 77 | pr_debug("cma: cannot release partial block when order_per_bit != 0\n"); | ||
| 78 | cma_add_to_cma_mem_list(cma, mem); | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | return 0; | ||
| 84 | |||
| 85 | } | ||
| 86 | |||
| 87 | static int cma_free_write(void *data, u64 val) | ||
| 88 | { | ||
| 89 | int pages = val; | ||
| 90 | struct cma *cma = data; | ||
| 91 | |||
| 92 | return cma_free_mem(cma, pages); | ||
| 93 | } | ||
| 94 | |||
| 95 | DEFINE_SIMPLE_ATTRIBUTE(cma_free_fops, NULL, cma_free_write, "%llu\n"); | ||
| 96 | |||
| 97 | static int cma_alloc_mem(struct cma *cma, int count) | ||
| 98 | { | ||
| 99 | struct cma_mem *mem; | ||
| 100 | struct page *p; | ||
| 101 | |||
| 102 | mem = kzalloc(sizeof(*mem), GFP_KERNEL); | ||
| 103 | if (!mem) | ||
| 104 | return -ENOMEM; | ||
| 105 | |||
| 106 | p = cma_alloc(cma, count, 0); | ||
| 107 | if (!p) { | ||
| 108 | kfree(mem); | ||
| 109 | return -ENOMEM; | ||
| 110 | } | ||
| 111 | |||
| 112 | mem->p = p; | ||
| 113 | mem->n = count; | ||
| 114 | |||
| 115 | cma_add_to_cma_mem_list(cma, mem); | ||
| 116 | |||
| 117 | return 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | static int cma_alloc_write(void *data, u64 val) | ||
| 121 | { | ||
| 122 | int pages = val; | ||
| 123 | struct cma *cma = data; | ||
| 124 | |||
| 125 | return cma_alloc_mem(cma, pages); | ||
| 126 | } | ||
| 127 | |||
| 128 | DEFINE_SIMPLE_ATTRIBUTE(cma_alloc_fops, NULL, cma_alloc_write, "%llu\n"); | ||
| 129 | |||
| 130 | static void cma_debugfs_add_one(struct cma *cma, int idx) | ||
| 131 | { | ||
| 132 | struct dentry *tmp; | ||
| 133 | char name[16]; | ||
| 134 | int u32s; | ||
| 135 | |||
| 136 | sprintf(name, "cma-%d", idx); | ||
| 137 | |||
| 138 | tmp = debugfs_create_dir(name, cma_debugfs_root); | ||
| 139 | |||
| 140 | debugfs_create_file("alloc", S_IWUSR, cma_debugfs_root, cma, | ||
| 141 | &cma_alloc_fops); | ||
| 142 | |||
| 143 | debugfs_create_file("free", S_IWUSR, cma_debugfs_root, cma, | ||
| 144 | &cma_free_fops); | ||
| 145 | |||
| 146 | debugfs_create_file("base_pfn", S_IRUGO, tmp, | ||
| 147 | &cma->base_pfn, &cma_debugfs_fops); | ||
| 148 | debugfs_create_file("count", S_IRUGO, tmp, | ||
| 149 | &cma->count, &cma_debugfs_fops); | ||
| 150 | debugfs_create_file("order_per_bit", S_IRUGO, tmp, | ||
| 151 | &cma->order_per_bit, &cma_debugfs_fops); | ||
| 152 | |||
| 153 | u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); | ||
| 154 | debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s); | ||
| 155 | } | ||
| 156 | |||
| 157 | static int __init cma_debugfs_init(void) | ||
| 158 | { | ||
| 159 | int i; | ||
| 160 | |||
| 161 | cma_debugfs_root = debugfs_create_dir("cma", NULL); | ||
| 162 | if (!cma_debugfs_root) | ||
| 163 | return -ENOMEM; | ||
| 164 | |||
| 165 | for (i = 0; i < cma_area_count; i++) | ||
| 166 | cma_debugfs_add_one(&cma_areas[i], i); | ||
| 167 | |||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | late_initcall(cma_debugfs_init); | ||
diff --git a/mm/compaction.c b/mm/compaction.c index 8c0d9459b54a..a18201a8124e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
| @@ -1174,13 +1174,24 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc, | |||
| 1174 | /* Direct compactor: Is a suitable page free? */ | 1174 | /* Direct compactor: Is a suitable page free? */ |
| 1175 | for (order = cc->order; order < MAX_ORDER; order++) { | 1175 | for (order = cc->order; order < MAX_ORDER; order++) { |
| 1176 | struct free_area *area = &zone->free_area[order]; | 1176 | struct free_area *area = &zone->free_area[order]; |
| 1177 | bool can_steal; | ||
| 1177 | 1178 | ||
| 1178 | /* Job done if page is free of the right migratetype */ | 1179 | /* Job done if page is free of the right migratetype */ |
| 1179 | if (!list_empty(&area->free_list[migratetype])) | 1180 | if (!list_empty(&area->free_list[migratetype])) |
| 1180 | return COMPACT_PARTIAL; | 1181 | return COMPACT_PARTIAL; |
| 1181 | 1182 | ||
| 1182 | /* Job done if allocation would set block type */ | 1183 | #ifdef CONFIG_CMA |
| 1183 | if (order >= pageblock_order && area->nr_free) | 1184 | /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ |
| 1185 | if (migratetype == MIGRATE_MOVABLE && | ||
| 1186 | !list_empty(&area->free_list[MIGRATE_CMA])) | ||
| 1187 | return COMPACT_PARTIAL; | ||
| 1188 | #endif | ||
| 1189 | /* | ||
| 1190 | * Job done if allocation would steal freepages from | ||
| 1191 | * other migratetype buddy lists. | ||
| 1192 | */ | ||
| 1193 | if (find_suitable_fallback(area, order, migratetype, | ||
| 1194 | true, &can_steal) != -1) | ||
| 1184 | return COMPACT_PARTIAL; | 1195 | return COMPACT_PARTIAL; |
| 1185 | } | 1196 | } |
| 1186 | 1197 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 876f4e6f3ed6..12548d03c11d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -202,16 +202,15 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
| 202 | BUG_ON(page_mapped(page)); | 202 | BUG_ON(page_mapped(page)); |
| 203 | 203 | ||
| 204 | /* | 204 | /* |
| 205 | * Some filesystems seem to re-dirty the page even after | 205 | * At this point page must be either written or cleaned by truncate. |
| 206 | * the VM has canceled the dirty bit (eg ext3 journaling). | 206 | * Dirty page here signals a bug and loss of unwritten data. |
| 207 | * | 207 | * |
| 208 | * Fix it up by doing a final dirty accounting check after | 208 | * This fixes dirty accounting after removing the page entirely but |
| 209 | * having removed the page entirely. | 209 | * leaves PageDirty set: it has no effect for truncated page and |
| 210 | * anyway will be cleared before returning page into buddy allocator. | ||
| 210 | */ | 211 | */ |
| 211 | if (PageDirty(page) && mapping_cap_account_dirty(mapping)) { | 212 | if (WARN_ON_ONCE(PageDirty(page))) |
| 212 | dec_zone_page_state(page, NR_FILE_DIRTY); | 213 | account_page_cleaned(page, mapping); |
| 213 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
| 214 | } | ||
| 215 | } | 214 | } |
| 216 | 215 | ||
| 217 | /** | 216 | /** |
| @@ -92,7 +92,7 @@ retry: | |||
| 92 | */ | 92 | */ |
| 93 | mark_page_accessed(page); | 93 | mark_page_accessed(page); |
| 94 | } | 94 | } |
| 95 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | 95 | if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { |
| 96 | /* | 96 | /* |
| 97 | * The preliminary mapping check is mainly to avoid the | 97 | * The preliminary mapping check is mainly to avoid the |
| 98 | * pointless overhead of lock_page on the ZERO_PAGE | 98 | * pointless overhead of lock_page on the ZERO_PAGE |
| @@ -265,8 +265,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | |||
| 265 | unsigned int fault_flags = 0; | 265 | unsigned int fault_flags = 0; |
| 266 | int ret; | 266 | int ret; |
| 267 | 267 | ||
| 268 | /* For mlock, just skip the stack guard page. */ | 268 | /* For mm_populate(), just skip the stack guard page. */ |
| 269 | if ((*flags & FOLL_MLOCK) && | 269 | if ((*flags & FOLL_POPULATE) && |
| 270 | (stack_guard_page_start(vma, address) || | 270 | (stack_guard_page_start(vma, address) || |
| 271 | stack_guard_page_end(vma, address + PAGE_SIZE))) | 271 | stack_guard_page_end(vma, address + PAGE_SIZE))) |
| 272 | return -ENOENT; | 272 | return -ENOENT; |
| @@ -819,6 +819,124 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 819 | EXPORT_SYMBOL(get_user_pages); | 819 | EXPORT_SYMBOL(get_user_pages); |
| 820 | 820 | ||
| 821 | /** | 821 | /** |
| 822 | * populate_vma_page_range() - populate a range of pages in the vma. | ||
| 823 | * @vma: target vma | ||
| 824 | * @start: start address | ||
| 825 | * @end: end address | ||
| 826 | * @nonblocking: | ||
| 827 | * | ||
| 828 | * This takes care of mlocking the pages too if VM_LOCKED is set. | ||
| 829 | * | ||
| 830 | * return 0 on success, negative error code on error. | ||
| 831 | * | ||
| 832 | * vma->vm_mm->mmap_sem must be held. | ||
| 833 | * | ||
| 834 | * If @nonblocking is NULL, it may be held for read or write and will | ||
| 835 | * be unperturbed. | ||
| 836 | * | ||
| 837 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
| 838 | * released. If it's released, *@nonblocking will be set to 0. | ||
| 839 | */ | ||
| 840 | long populate_vma_page_range(struct vm_area_struct *vma, | ||
| 841 | unsigned long start, unsigned long end, int *nonblocking) | ||
| 842 | { | ||
| 843 | struct mm_struct *mm = vma->vm_mm; | ||
| 844 | unsigned long nr_pages = (end - start) / PAGE_SIZE; | ||
| 845 | int gup_flags; | ||
| 846 | |||
| 847 | VM_BUG_ON(start & ~PAGE_MASK); | ||
| 848 | VM_BUG_ON(end & ~PAGE_MASK); | ||
| 849 | VM_BUG_ON_VMA(start < vma->vm_start, vma); | ||
| 850 | VM_BUG_ON_VMA(end > vma->vm_end, vma); | ||
| 851 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
| 852 | |||
| 853 | gup_flags = FOLL_TOUCH | FOLL_POPULATE; | ||
| 854 | /* | ||
| 855 | * We want to touch writable mappings with a write fault in order | ||
| 856 | * to break COW, except for shared mappings because these don't COW | ||
| 857 | * and we would not want to dirty them for nothing. | ||
| 858 | */ | ||
| 859 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
| 860 | gup_flags |= FOLL_WRITE; | ||
| 861 | |||
| 862 | /* | ||
| 863 | * We want mlock to succeed for regions that have any permissions | ||
| 864 | * other than PROT_NONE. | ||
| 865 | */ | ||
| 866 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
| 867 | gup_flags |= FOLL_FORCE; | ||
| 868 | |||
| 869 | /* | ||
| 870 | * We made sure addr is within a VMA, so the following will | ||
| 871 | * not result in a stack expansion that recurses back here. | ||
| 872 | */ | ||
| 873 | return __get_user_pages(current, mm, start, nr_pages, gup_flags, | ||
| 874 | NULL, NULL, nonblocking); | ||
| 875 | } | ||
| 876 | |||
| 877 | /* | ||
| 878 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
| 879 | * | ||
| 880 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
| 881 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
| 882 | * mmap_sem must not be held. | ||
| 883 | */ | ||
| 884 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
| 885 | { | ||
| 886 | struct mm_struct *mm = current->mm; | ||
| 887 | unsigned long end, nstart, nend; | ||
| 888 | struct vm_area_struct *vma = NULL; | ||
| 889 | int locked = 0; | ||
| 890 | long ret = 0; | ||
| 891 | |||
| 892 | VM_BUG_ON(start & ~PAGE_MASK); | ||
| 893 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
| 894 | end = start + len; | ||
| 895 | |||
| 896 | for (nstart = start; nstart < end; nstart = nend) { | ||
| 897 | /* | ||
| 898 | * We want to fault in pages for [nstart; end) address range. | ||
| 899 | * Find first corresponding VMA. | ||
| 900 | */ | ||
| 901 | if (!locked) { | ||
| 902 | locked = 1; | ||
| 903 | down_read(&mm->mmap_sem); | ||
| 904 | vma = find_vma(mm, nstart); | ||
| 905 | } else if (nstart >= vma->vm_end) | ||
| 906 | vma = vma->vm_next; | ||
| 907 | if (!vma || vma->vm_start >= end) | ||
| 908 | break; | ||
| 909 | /* | ||
| 910 | * Set [nstart; nend) to intersection of desired address | ||
| 911 | * range with the first VMA. Also, skip undesirable VMA types. | ||
| 912 | */ | ||
| 913 | nend = min(end, vma->vm_end); | ||
| 914 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
| 915 | continue; | ||
| 916 | if (nstart < vma->vm_start) | ||
| 917 | nstart = vma->vm_start; | ||
| 918 | /* | ||
| 919 | * Now fault in a range of pages. populate_vma_page_range() | ||
| 920 | * double checks the vma flags, so that it won't mlock pages | ||
| 921 | * if the vma was already munlocked. | ||
| 922 | */ | ||
| 923 | ret = populate_vma_page_range(vma, nstart, nend, &locked); | ||
| 924 | if (ret < 0) { | ||
| 925 | if (ignore_errors) { | ||
| 926 | ret = 0; | ||
| 927 | continue; /* continue at next VMA */ | ||
| 928 | } | ||
| 929 | break; | ||
| 930 | } | ||
| 931 | nend = nstart + ret * PAGE_SIZE; | ||
| 932 | ret = 0; | ||
| 933 | } | ||
| 934 | if (locked) | ||
| 935 | up_read(&mm->mmap_sem); | ||
| 936 | return ret; /* 0 or negative error code */ | ||
| 937 | } | ||
| 938 | |||
| 939 | /** | ||
| 822 | * get_dump_page() - pin user page in memory while writing it to core dump | 940 | * get_dump_page() - pin user page in memory while writing it to core dump |
| 823 | * @addr: user address | 941 | * @addr: user address |
| 824 | * | 942 | * |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6817b0350c71..3afb5cbe1312 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1231,7 +1231,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
| 1231 | pmd, _pmd, 1)) | 1231 | pmd, _pmd, 1)) |
| 1232 | update_mmu_cache_pmd(vma, addr, pmd); | 1232 | update_mmu_cache_pmd(vma, addr, pmd); |
| 1233 | } | 1233 | } |
| 1234 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | 1234 | if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) { |
| 1235 | if (page->mapping && trylock_page(page)) { | 1235 | if (page->mapping && trylock_page(page)) { |
| 1236 | lru_add_drain(); | 1236 | lru_add_drain(); |
| 1237 | if (page->mapping) | 1237 | if (page->mapping) |
| @@ -2109,7 +2109,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte) | |||
| 2109 | { | 2109 | { |
| 2110 | while (--_pte >= pte) { | 2110 | while (--_pte >= pte) { |
| 2111 | pte_t pteval = *_pte; | 2111 | pte_t pteval = *_pte; |
| 2112 | if (!pte_none(pteval)) | 2112 | if (!pte_none(pteval) && !is_zero_pfn(pte_pfn(pteval))) |
| 2113 | release_pte_page(pte_page(pteval)); | 2113 | release_pte_page(pte_page(pteval)); |
| 2114 | } | 2114 | } |
| 2115 | } | 2115 | } |
| @@ -2120,13 +2120,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
| 2120 | { | 2120 | { |
| 2121 | struct page *page; | 2121 | struct page *page; |
| 2122 | pte_t *_pte; | 2122 | pte_t *_pte; |
| 2123 | int none = 0; | 2123 | int none_or_zero = 0; |
| 2124 | bool referenced = false, writable = false; | 2124 | bool referenced = false, writable = false; |
| 2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | 2125 | for (_pte = pte; _pte < pte+HPAGE_PMD_NR; |
| 2126 | _pte++, address += PAGE_SIZE) { | 2126 | _pte++, address += PAGE_SIZE) { |
| 2127 | pte_t pteval = *_pte; | 2127 | pte_t pteval = *_pte; |
| 2128 | if (pte_none(pteval)) { | 2128 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
| 2129 | if (++none <= khugepaged_max_ptes_none) | 2129 | if (++none_or_zero <= khugepaged_max_ptes_none) |
| 2130 | continue; | 2130 | continue; |
| 2131 | else | 2131 | else |
| 2132 | goto out; | 2132 | goto out; |
| @@ -2207,9 +2207,21 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
| 2207 | pte_t pteval = *_pte; | 2207 | pte_t pteval = *_pte; |
| 2208 | struct page *src_page; | 2208 | struct page *src_page; |
| 2209 | 2209 | ||
| 2210 | if (pte_none(pteval)) { | 2210 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
| 2211 | clear_user_highpage(page, address); | 2211 | clear_user_highpage(page, address); |
| 2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); | 2212 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); |
| 2213 | if (is_zero_pfn(pte_pfn(pteval))) { | ||
| 2214 | /* | ||
| 2215 | * ptl mostly unnecessary. | ||
| 2216 | */ | ||
| 2217 | spin_lock(ptl); | ||
| 2218 | /* | ||
| 2219 | * paravirt calls inside pte_clear here are | ||
| 2220 | * superfluous. | ||
| 2221 | */ | ||
| 2222 | pte_clear(vma->vm_mm, address, _pte); | ||
| 2223 | spin_unlock(ptl); | ||
| 2224 | } | ||
| 2213 | } else { | 2225 | } else { |
| 2214 | src_page = pte_page(pteval); | 2226 | src_page = pte_page(pteval); |
| 2215 | copy_user_highpage(page, src_page, address, vma); | 2227 | copy_user_highpage(page, src_page, address, vma); |
| @@ -2316,8 +2328,14 @@ static struct page | |||
| 2316 | struct vm_area_struct *vma, unsigned long address, | 2328 | struct vm_area_struct *vma, unsigned long address, |
| 2317 | int node) | 2329 | int node) |
| 2318 | { | 2330 | { |
| 2331 | gfp_t flags; | ||
| 2332 | |||
| 2319 | VM_BUG_ON_PAGE(*hpage, *hpage); | 2333 | VM_BUG_ON_PAGE(*hpage, *hpage); |
| 2320 | 2334 | ||
| 2335 | /* Only allocate from the target node */ | ||
| 2336 | flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) | | ||
| 2337 | __GFP_THISNODE; | ||
| 2338 | |||
| 2321 | /* | 2339 | /* |
| 2322 | * Before allocating the hugepage, release the mmap_sem read lock. | 2340 | * Before allocating the hugepage, release the mmap_sem read lock. |
| 2323 | * The allocation can take potentially a long time if it involves | 2341 | * The allocation can take potentially a long time if it involves |
| @@ -2326,8 +2344,7 @@ static struct page | |||
| 2326 | */ | 2344 | */ |
| 2327 | up_read(&mm->mmap_sem); | 2345 | up_read(&mm->mmap_sem); |
| 2328 | 2346 | ||
| 2329 | *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask( | 2347 | *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); |
| 2330 | khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER); | ||
| 2331 | if (unlikely(!*hpage)) { | 2348 | if (unlikely(!*hpage)) { |
| 2332 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | 2349 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); |
| 2333 | *hpage = ERR_PTR(-ENOMEM); | 2350 | *hpage = ERR_PTR(-ENOMEM); |
| @@ -2543,7 +2560,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
| 2543 | { | 2560 | { |
| 2544 | pmd_t *pmd; | 2561 | pmd_t *pmd; |
| 2545 | pte_t *pte, *_pte; | 2562 | pte_t *pte, *_pte; |
| 2546 | int ret = 0, none = 0; | 2563 | int ret = 0, none_or_zero = 0; |
| 2547 | struct page *page; | 2564 | struct page *page; |
| 2548 | unsigned long _address; | 2565 | unsigned long _address; |
| 2549 | spinlock_t *ptl; | 2566 | spinlock_t *ptl; |
| @@ -2561,8 +2578,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
| 2561 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; | 2578 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; |
| 2562 | _pte++, _address += PAGE_SIZE) { | 2579 | _pte++, _address += PAGE_SIZE) { |
| 2563 | pte_t pteval = *_pte; | 2580 | pte_t pteval = *_pte; |
| 2564 | if (pte_none(pteval)) { | 2581 | if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { |
| 2565 | if (++none <= khugepaged_max_ptes_none) | 2582 | if (++none_or_zero <= khugepaged_max_ptes_none) |
| 2566 | continue; | 2583 | continue; |
| 2567 | else | 2584 | else |
| 2568 | goto out_unmap; | 2585 | goto out_unmap; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c41b2a0ee273..8874c8ad55aa 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -3278,6 +3278,15 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3278 | struct page *page; | 3278 | struct page *page; |
| 3279 | 3279 | ||
| 3280 | /* | 3280 | /* |
| 3281 | * If we have a pending SIGKILL, don't keep faulting pages and | ||
| 3282 | * potentially allocating memory. | ||
| 3283 | */ | ||
| 3284 | if (unlikely(fatal_signal_pending(current))) { | ||
| 3285 | remainder = 0; | ||
| 3286 | break; | ||
| 3287 | } | ||
| 3288 | |||
| 3289 | /* | ||
| 3281 | * Some archs (sparc64, sh*) have multiple pte_ts to | 3290 | * Some archs (sparc64, sh*) have multiple pte_ts to |
| 3282 | * each hugepage. We have to make sure we get the | 3291 | * each hugepage. We have to make sure we get the |
| 3283 | * first, for the page indexing below to work. | 3292 | * first, for the page indexing below to work. |
| @@ -3735,8 +3744,7 @@ retry: | |||
| 3735 | if (!pmd_huge(*pmd)) | 3744 | if (!pmd_huge(*pmd)) |
| 3736 | goto out; | 3745 | goto out; |
| 3737 | if (pmd_present(*pmd)) { | 3746 | if (pmd_present(*pmd)) { |
| 3738 | page = pte_page(*(pte_t *)pmd) + | 3747 | page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); |
| 3739 | ((address & ~PMD_MASK) >> PAGE_SHIFT); | ||
| 3740 | if (flags & FOLL_GET) | 3748 | if (flags & FOLL_GET) |
| 3741 | get_page(page); | 3749 | get_page(page); |
| 3742 | } else { | 3750 | } else { |
diff --git a/mm/internal.h b/mm/internal.h index a96da5b0029d..edaab69a9c35 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
| @@ -200,6 +200,8 @@ isolate_freepages_range(struct compact_control *cc, | |||
| 200 | unsigned long | 200 | unsigned long |
| 201 | isolate_migratepages_range(struct compact_control *cc, | 201 | isolate_migratepages_range(struct compact_control *cc, |
| 202 | unsigned long low_pfn, unsigned long end_pfn); | 202 | unsigned long low_pfn, unsigned long end_pfn); |
| 203 | int find_suitable_fallback(struct free_area *area, unsigned int order, | ||
| 204 | int migratetype, bool only_stealable, bool *can_steal); | ||
| 203 | 205 | ||
| 204 | #endif | 206 | #endif |
| 205 | 207 | ||
| @@ -240,7 +242,7 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 240 | struct vm_area_struct *prev, struct rb_node *rb_parent); | 242 | struct vm_area_struct *prev, struct rb_node *rb_parent); |
| 241 | 243 | ||
| 242 | #ifdef CONFIG_MMU | 244 | #ifdef CONFIG_MMU |
| 243 | extern long __mlock_vma_pages_range(struct vm_area_struct *vma, | 245 | extern long populate_vma_page_range(struct vm_area_struct *vma, |
| 244 | unsigned long start, unsigned long end, int *nonblocking); | 246 | unsigned long start, unsigned long end, int *nonblocking); |
| 245 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, | 247 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, |
| 246 | unsigned long start, unsigned long end); | 248 | unsigned long start, unsigned long end); |
diff --git a/mm/memblock.c b/mm/memblock.c index 252b77bdf65e..3f37a0bca5d5 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
| @@ -699,14 +699,14 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base, | |||
| 699 | int nid, | 699 | int nid, |
| 700 | unsigned long flags) | 700 | unsigned long flags) |
| 701 | { | 701 | { |
| 702 | struct memblock_type *_rgn = &memblock.reserved; | 702 | struct memblock_type *type = &memblock.reserved; |
| 703 | 703 | ||
| 704 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", | 704 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", |
| 705 | (unsigned long long)base, | 705 | (unsigned long long)base, |
| 706 | (unsigned long long)base + size - 1, | 706 | (unsigned long long)base + size - 1, |
| 707 | flags, (void *)_RET_IP_); | 707 | flags, (void *)_RET_IP_); |
| 708 | 708 | ||
| 709 | return memblock_add_range(_rgn, base, size, nid, flags); | 709 | return memblock_add_range(type, base, size, nid, flags); |
| 710 | } | 710 | } |
| 711 | 711 | ||
| 712 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | 712 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b34ef4a32a3b..c3f09b2dda5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -14,6 +14,12 @@ | |||
| 14 | * Copyright (C) 2012 Parallels Inc. and Google Inc. | 14 | * Copyright (C) 2012 Parallels Inc. and Google Inc. |
| 15 | * Authors: Glauber Costa and Suleiman Souhlal | 15 | * Authors: Glauber Costa and Suleiman Souhlal |
| 16 | * | 16 | * |
| 17 | * Native page reclaim | ||
| 18 | * Charge lifetime sanitation | ||
| 19 | * Lockless page tracking & accounting | ||
| 20 | * Unified hierarchy configuration model | ||
| 21 | * Copyright (C) 2015 Red Hat, Inc., Johannes Weiner | ||
| 22 | * | ||
| 17 | * This program is free software; you can redistribute it and/or modify | 23 | * This program is free software; you can redistribute it and/or modify |
| 18 | * it under the terms of the GNU General Public License as published by | 24 | * it under the terms of the GNU General Public License as published by |
| 19 | * the Free Software Foundation; either version 2 of the License, or | 25 | * the Free Software Foundation; either version 2 of the License, or |
| @@ -1436,15 +1442,17 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
| 1436 | struct mem_cgroup *iter; | 1442 | struct mem_cgroup *iter; |
| 1437 | unsigned int i; | 1443 | unsigned int i; |
| 1438 | 1444 | ||
| 1439 | if (!p) | ||
| 1440 | return; | ||
| 1441 | |||
| 1442 | mutex_lock(&oom_info_lock); | 1445 | mutex_lock(&oom_info_lock); |
| 1443 | rcu_read_lock(); | 1446 | rcu_read_lock(); |
| 1444 | 1447 | ||
| 1445 | pr_info("Task in "); | 1448 | if (p) { |
| 1446 | pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); | 1449 | pr_info("Task in "); |
| 1447 | pr_cont(" killed as a result of limit of "); | 1450 | pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); |
| 1451 | pr_cont(" killed as a result of limit of "); | ||
| 1452 | } else { | ||
| 1453 | pr_info("Memory limit reached of cgroup "); | ||
| 1454 | } | ||
| 1455 | |||
| 1448 | pr_cont_cgroup_path(memcg->css.cgroup); | 1456 | pr_cont_cgroup_path(memcg->css.cgroup); |
| 1449 | pr_cont("\n"); | 1457 | pr_cont("\n"); |
| 1450 | 1458 | ||
| @@ -1531,7 +1539,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
| 1531 | return; | 1539 | return; |
| 1532 | } | 1540 | } |
| 1533 | 1541 | ||
| 1534 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); | 1542 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); |
| 1535 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; | 1543 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; |
| 1536 | for_each_mem_cgroup_tree(iter, memcg) { | 1544 | for_each_mem_cgroup_tree(iter, memcg) { |
| 1537 | struct css_task_iter it; | 1545 | struct css_task_iter it; |
| @@ -2779,92 +2787,6 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
| 2779 | } | 2787 | } |
| 2780 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 2788 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
| 2781 | 2789 | ||
| 2782 | /** | ||
| 2783 | * mem_cgroup_move_account - move account of the page | ||
| 2784 | * @page: the page | ||
| 2785 | * @nr_pages: number of regular pages (>1 for huge pages) | ||
| 2786 | * @from: mem_cgroup which the page is moved from. | ||
| 2787 | * @to: mem_cgroup which the page is moved to. @from != @to. | ||
| 2788 | * | ||
| 2789 | * The caller must confirm following. | ||
| 2790 | * - page is not on LRU (isolate_page() is useful.) | ||
| 2791 | * - compound_lock is held when nr_pages > 1 | ||
| 2792 | * | ||
| 2793 | * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" | ||
| 2794 | * from old cgroup. | ||
| 2795 | */ | ||
| 2796 | static int mem_cgroup_move_account(struct page *page, | ||
| 2797 | unsigned int nr_pages, | ||
| 2798 | struct mem_cgroup *from, | ||
| 2799 | struct mem_cgroup *to) | ||
| 2800 | { | ||
| 2801 | unsigned long flags; | ||
| 2802 | int ret; | ||
| 2803 | |||
| 2804 | VM_BUG_ON(from == to); | ||
| 2805 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
| 2806 | /* | ||
| 2807 | * The page is isolated from LRU. So, collapse function | ||
| 2808 | * will not handle this page. But page splitting can happen. | ||
| 2809 | * Do this check under compound_page_lock(). The caller should | ||
| 2810 | * hold it. | ||
| 2811 | */ | ||
| 2812 | ret = -EBUSY; | ||
| 2813 | if (nr_pages > 1 && !PageTransHuge(page)) | ||
| 2814 | goto out; | ||
| 2815 | |||
| 2816 | /* | ||
| 2817 | * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup | ||
| 2818 | * of its source page while we change it: page migration takes | ||
| 2819 | * both pages off the LRU, but page cache replacement doesn't. | ||
| 2820 | */ | ||
| 2821 | if (!trylock_page(page)) | ||
| 2822 | goto out; | ||
| 2823 | |||
| 2824 | ret = -EINVAL; | ||
| 2825 | if (page->mem_cgroup != from) | ||
| 2826 | goto out_unlock; | ||
| 2827 | |||
| 2828 | spin_lock_irqsave(&from->move_lock, flags); | ||
| 2829 | |||
| 2830 | if (!PageAnon(page) && page_mapped(page)) { | ||
| 2831 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
| 2832 | nr_pages); | ||
| 2833 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
| 2834 | nr_pages); | ||
| 2835 | } | ||
| 2836 | |||
| 2837 | if (PageWriteback(page)) { | ||
| 2838 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
| 2839 | nr_pages); | ||
| 2840 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
| 2841 | nr_pages); | ||
| 2842 | } | ||
| 2843 | |||
| 2844 | /* | ||
| 2845 | * It is safe to change page->mem_cgroup here because the page | ||
| 2846 | * is referenced, charged, and isolated - we can't race with | ||
| 2847 | * uncharging, charging, migration, or LRU putback. | ||
| 2848 | */ | ||
| 2849 | |||
| 2850 | /* caller should have done css_get */ | ||
| 2851 | page->mem_cgroup = to; | ||
| 2852 | spin_unlock_irqrestore(&from->move_lock, flags); | ||
| 2853 | |||
| 2854 | ret = 0; | ||
| 2855 | |||
| 2856 | local_irq_disable(); | ||
| 2857 | mem_cgroup_charge_statistics(to, page, nr_pages); | ||
| 2858 | memcg_check_events(to, page); | ||
| 2859 | mem_cgroup_charge_statistics(from, page, -nr_pages); | ||
| 2860 | memcg_check_events(from, page); | ||
| 2861 | local_irq_enable(); | ||
| 2862 | out_unlock: | ||
| 2863 | unlock_page(page); | ||
| 2864 | out: | ||
| 2865 | return ret; | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | #ifdef CONFIG_MEMCG_SWAP | 2790 | #ifdef CONFIG_MEMCG_SWAP |
| 2869 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, | 2791 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, |
| 2870 | bool charge) | 2792 | bool charge) |
| @@ -4816,6 +4738,92 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
| 4816 | return page; | 4738 | return page; |
| 4817 | } | 4739 | } |
| 4818 | 4740 | ||
| 4741 | /** | ||
| 4742 | * mem_cgroup_move_account - move account of the page | ||
| 4743 | * @page: the page | ||
| 4744 | * @nr_pages: number of regular pages (>1 for huge pages) | ||
| 4745 | * @from: mem_cgroup which the page is moved from. | ||
| 4746 | * @to: mem_cgroup which the page is moved to. @from != @to. | ||
| 4747 | * | ||
| 4748 | * The caller must confirm following. | ||
| 4749 | * - page is not on LRU (isolate_page() is useful.) | ||
| 4750 | * - compound_lock is held when nr_pages > 1 | ||
| 4751 | * | ||
| 4752 | * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" | ||
| 4753 | * from old cgroup. | ||
| 4754 | */ | ||
| 4755 | static int mem_cgroup_move_account(struct page *page, | ||
| 4756 | unsigned int nr_pages, | ||
| 4757 | struct mem_cgroup *from, | ||
| 4758 | struct mem_cgroup *to) | ||
| 4759 | { | ||
| 4760 | unsigned long flags; | ||
| 4761 | int ret; | ||
| 4762 | |||
| 4763 | VM_BUG_ON(from == to); | ||
| 4764 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
| 4765 | /* | ||
| 4766 | * The page is isolated from LRU. So, collapse function | ||
| 4767 | * will not handle this page. But page splitting can happen. | ||
| 4768 | * Do this check under compound_page_lock(). The caller should | ||
| 4769 | * hold it. | ||
| 4770 | */ | ||
| 4771 | ret = -EBUSY; | ||
| 4772 | if (nr_pages > 1 && !PageTransHuge(page)) | ||
| 4773 | goto out; | ||
| 4774 | |||
| 4775 | /* | ||
| 4776 | * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup | ||
| 4777 | * of its source page while we change it: page migration takes | ||
| 4778 | * both pages off the LRU, but page cache replacement doesn't. | ||
| 4779 | */ | ||
| 4780 | if (!trylock_page(page)) | ||
| 4781 | goto out; | ||
| 4782 | |||
| 4783 | ret = -EINVAL; | ||
| 4784 | if (page->mem_cgroup != from) | ||
| 4785 | goto out_unlock; | ||
| 4786 | |||
| 4787 | spin_lock_irqsave(&from->move_lock, flags); | ||
| 4788 | |||
| 4789 | if (!PageAnon(page) && page_mapped(page)) { | ||
| 4790 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
| 4791 | nr_pages); | ||
| 4792 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | ||
| 4793 | nr_pages); | ||
| 4794 | } | ||
| 4795 | |||
| 4796 | if (PageWriteback(page)) { | ||
| 4797 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
| 4798 | nr_pages); | ||
| 4799 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], | ||
| 4800 | nr_pages); | ||
| 4801 | } | ||
| 4802 | |||
| 4803 | /* | ||
| 4804 | * It is safe to change page->mem_cgroup here because the page | ||
| 4805 | * is referenced, charged, and isolated - we can't race with | ||
| 4806 | * uncharging, charging, migration, or LRU putback. | ||
| 4807 | */ | ||
| 4808 | |||
| 4809 | /* caller should have done css_get */ | ||
| 4810 | page->mem_cgroup = to; | ||
| 4811 | spin_unlock_irqrestore(&from->move_lock, flags); | ||
| 4812 | |||
| 4813 | ret = 0; | ||
| 4814 | |||
| 4815 | local_irq_disable(); | ||
| 4816 | mem_cgroup_charge_statistics(to, page, nr_pages); | ||
| 4817 | memcg_check_events(to, page); | ||
| 4818 | mem_cgroup_charge_statistics(from, page, -nr_pages); | ||
| 4819 | memcg_check_events(from, page); | ||
| 4820 | local_irq_enable(); | ||
| 4821 | out_unlock: | ||
| 4822 | unlock_page(page); | ||
| 4823 | out: | ||
| 4824 | return ret; | ||
| 4825 | } | ||
| 4826 | |||
| 4819 | static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, | 4827 | static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, |
| 4820 | unsigned long addr, pte_t ptent, union mc_target *target) | 4828 | unsigned long addr, pte_t ptent, union mc_target *target) |
| 4821 | { | 4829 | { |
diff --git a/mm/memory.c b/mm/memory.c index 97839f5c8c30..ac20b2a6a0c3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1983,167 +1983,91 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
| 1983 | } | 1983 | } |
| 1984 | 1984 | ||
| 1985 | /* | 1985 | /* |
| 1986 | * This routine handles present pages, when users try to write | 1986 | * Handle write page faults for pages that can be reused in the current vma |
| 1987 | * to a shared page. It is done by copying the page to a new address | ||
| 1988 | * and decrementing the shared-page counter for the old page. | ||
| 1989 | * | ||
| 1990 | * Note that this routine assumes that the protection checks have been | ||
| 1991 | * done by the caller (the low-level page fault routine in most cases). | ||
| 1992 | * Thus we can safely just mark it writable once we've done any necessary | ||
| 1993 | * COW. | ||
| 1994 | * | 1987 | * |
| 1995 | * We also mark the page dirty at this point even though the page will | 1988 | * This can happen either due to the mapping being with the VM_SHARED flag, |
| 1996 | * change only once the write actually happens. This avoids a few races, | 1989 | * or due to us being the last reference standing to the page. In either |
| 1997 | * and potentially makes it more efficient. | 1990 | * case, all we need to do here is to mark the page as writable and update |
| 1998 | * | 1991 | * any related book-keeping. |
| 1999 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
| 2000 | * but allow concurrent faults), with pte both mapped and locked. | ||
| 2001 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
| 2002 | */ | 1992 | */ |
| 2003 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1993 | static inline int wp_page_reuse(struct mm_struct *mm, |
| 2004 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1994 | struct vm_area_struct *vma, unsigned long address, |
| 2005 | spinlock_t *ptl, pte_t orig_pte) | 1995 | pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, |
| 1996 | struct page *page, int page_mkwrite, | ||
| 1997 | int dirty_shared) | ||
| 2006 | __releases(ptl) | 1998 | __releases(ptl) |
| 2007 | { | 1999 | { |
| 2008 | struct page *old_page, *new_page = NULL; | ||
| 2009 | pte_t entry; | 2000 | pte_t entry; |
| 2010 | int ret = 0; | ||
| 2011 | int page_mkwrite = 0; | ||
| 2012 | bool dirty_shared = false; | ||
| 2013 | unsigned long mmun_start = 0; /* For mmu_notifiers */ | ||
| 2014 | unsigned long mmun_end = 0; /* For mmu_notifiers */ | ||
| 2015 | struct mem_cgroup *memcg; | ||
| 2016 | |||
| 2017 | old_page = vm_normal_page(vma, address, orig_pte); | ||
| 2018 | if (!old_page) { | ||
| 2019 | /* | ||
| 2020 | * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a | ||
| 2021 | * VM_PFNMAP VMA. | ||
| 2022 | * | ||
| 2023 | * We should not cow pages in a shared writeable mapping. | ||
| 2024 | * Just mark the pages writable as we can't do any dirty | ||
| 2025 | * accounting on raw pfn maps. | ||
| 2026 | */ | ||
| 2027 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
| 2028 | (VM_WRITE|VM_SHARED)) | ||
| 2029 | goto reuse; | ||
| 2030 | goto gotten; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | /* | 2001 | /* |
| 2034 | * Take out anonymous pages first, anonymous shared vmas are | 2002 | * Clear the pages cpupid information as the existing |
| 2035 | * not dirty accountable. | 2003 | * information potentially belongs to a now completely |
| 2004 | * unrelated process. | ||
| 2036 | */ | 2005 | */ |
| 2037 | if (PageAnon(old_page) && !PageKsm(old_page)) { | 2006 | if (page) |
| 2038 | if (!trylock_page(old_page)) { | 2007 | page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); |
| 2039 | page_cache_get(old_page); | ||
| 2040 | pte_unmap_unlock(page_table, ptl); | ||
| 2041 | lock_page(old_page); | ||
| 2042 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
| 2043 | &ptl); | ||
| 2044 | if (!pte_same(*page_table, orig_pte)) { | ||
| 2045 | unlock_page(old_page); | ||
| 2046 | goto unlock; | ||
| 2047 | } | ||
| 2048 | page_cache_release(old_page); | ||
| 2049 | } | ||
| 2050 | if (reuse_swap_page(old_page)) { | ||
| 2051 | /* | ||
| 2052 | * The page is all ours. Move it to our anon_vma so | ||
| 2053 | * the rmap code will not search our parent or siblings. | ||
| 2054 | * Protected against the rmap code by the page lock. | ||
| 2055 | */ | ||
| 2056 | page_move_anon_rmap(old_page, vma, address); | ||
| 2057 | unlock_page(old_page); | ||
| 2058 | goto reuse; | ||
| 2059 | } | ||
| 2060 | unlock_page(old_page); | ||
| 2061 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
| 2062 | (VM_WRITE|VM_SHARED))) { | ||
| 2063 | page_cache_get(old_page); | ||
| 2064 | /* | ||
| 2065 | * Only catch write-faults on shared writable pages, | ||
| 2066 | * read-only shared pages can get COWed by | ||
| 2067 | * get_user_pages(.write=1, .force=1). | ||
| 2068 | */ | ||
| 2069 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | ||
| 2070 | int tmp; | ||
| 2071 | |||
| 2072 | pte_unmap_unlock(page_table, ptl); | ||
| 2073 | tmp = do_page_mkwrite(vma, old_page, address); | ||
| 2074 | if (unlikely(!tmp || (tmp & | ||
| 2075 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { | ||
| 2076 | page_cache_release(old_page); | ||
| 2077 | return tmp; | ||
| 2078 | } | ||
| 2079 | /* | ||
| 2080 | * Since we dropped the lock we need to revalidate | ||
| 2081 | * the PTE as someone else may have changed it. If | ||
| 2082 | * they did, we just return, as we can count on the | ||
| 2083 | * MMU to tell us if they didn't also make it writable. | ||
| 2084 | */ | ||
| 2085 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
| 2086 | &ptl); | ||
| 2087 | if (!pte_same(*page_table, orig_pte)) { | ||
| 2088 | unlock_page(old_page); | ||
| 2089 | goto unlock; | ||
| 2090 | } | ||
| 2091 | page_mkwrite = 1; | ||
| 2092 | } | ||
| 2093 | |||
| 2094 | dirty_shared = true; | ||
| 2095 | |||
| 2096 | reuse: | ||
| 2097 | /* | ||
| 2098 | * Clear the pages cpupid information as the existing | ||
| 2099 | * information potentially belongs to a now completely | ||
| 2100 | * unrelated process. | ||
| 2101 | */ | ||
| 2102 | if (old_page) | ||
| 2103 | page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1); | ||
| 2104 | |||
| 2105 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | ||
| 2106 | entry = pte_mkyoung(orig_pte); | ||
| 2107 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
| 2108 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) | ||
| 2109 | update_mmu_cache(vma, address, page_table); | ||
| 2110 | pte_unmap_unlock(page_table, ptl); | ||
| 2111 | ret |= VM_FAULT_WRITE; | ||
| 2112 | 2008 | ||
| 2113 | if (dirty_shared) { | 2009 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
| 2114 | struct address_space *mapping; | 2010 | entry = pte_mkyoung(orig_pte); |
| 2115 | int dirtied; | 2011 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| 2012 | if (ptep_set_access_flags(vma, address, page_table, entry, 1)) | ||
| 2013 | update_mmu_cache(vma, address, page_table); | ||
| 2014 | pte_unmap_unlock(page_table, ptl); | ||
| 2116 | 2015 | ||
| 2117 | if (!page_mkwrite) | 2016 | if (dirty_shared) { |
| 2118 | lock_page(old_page); | 2017 | struct address_space *mapping; |
| 2018 | int dirtied; | ||
| 2119 | 2019 | ||
| 2120 | dirtied = set_page_dirty(old_page); | 2020 | if (!page_mkwrite) |
| 2121 | VM_BUG_ON_PAGE(PageAnon(old_page), old_page); | 2021 | lock_page(page); |
| 2122 | mapping = old_page->mapping; | ||
| 2123 | unlock_page(old_page); | ||
| 2124 | page_cache_release(old_page); | ||
| 2125 | 2022 | ||
| 2126 | if ((dirtied || page_mkwrite) && mapping) { | 2023 | dirtied = set_page_dirty(page); |
| 2127 | /* | 2024 | VM_BUG_ON_PAGE(PageAnon(page), page); |
| 2128 | * Some device drivers do not set page.mapping | 2025 | mapping = page->mapping; |
| 2129 | * but still dirty their pages | 2026 | unlock_page(page); |
| 2130 | */ | 2027 | page_cache_release(page); |
| 2131 | balance_dirty_pages_ratelimited(mapping); | ||
| 2132 | } | ||
| 2133 | 2028 | ||
| 2134 | if (!page_mkwrite) | 2029 | if ((dirtied || page_mkwrite) && mapping) { |
| 2135 | file_update_time(vma->vm_file); | 2030 | /* |
| 2031 | * Some device drivers do not set page.mapping | ||
| 2032 | * but still dirty their pages | ||
| 2033 | */ | ||
| 2034 | balance_dirty_pages_ratelimited(mapping); | ||
| 2136 | } | 2035 | } |
| 2137 | 2036 | ||
| 2138 | return ret; | 2037 | if (!page_mkwrite) |
| 2038 | file_update_time(vma->vm_file); | ||
| 2139 | } | 2039 | } |
| 2140 | 2040 | ||
| 2141 | /* | 2041 | return VM_FAULT_WRITE; |
| 2142 | * Ok, we need to copy. Oh, well.. | 2042 | } |
| 2143 | */ | 2043 | |
| 2144 | page_cache_get(old_page); | 2044 | /* |
| 2145 | gotten: | 2045 | * Handle the case of a page which we actually need to copy to a new page. |
| 2146 | pte_unmap_unlock(page_table, ptl); | 2046 | * |
| 2047 | * Called with mmap_sem locked and the old page referenced, but | ||
| 2048 | * without the ptl held. | ||
| 2049 | * | ||
| 2050 | * High level logic flow: | ||
| 2051 | * | ||
| 2052 | * - Allocate a page, copy the content of the old page to the new one. | ||
| 2053 | * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. | ||
| 2054 | * - Take the PTL. If the pte changed, bail out and release the allocated page | ||
| 2055 | * - If the pte is still the way we remember it, update the page table and all | ||
| 2056 | * relevant references. This includes dropping the reference the page-table | ||
| 2057 | * held to the old page, as well as updating the rmap. | ||
| 2058 | * - In any case, unlock the PTL and drop the reference we took to the old page. | ||
| 2059 | */ | ||
| 2060 | static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, | ||
| 2061 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
| 2062 | pte_t orig_pte, struct page *old_page) | ||
| 2063 | { | ||
| 2064 | struct page *new_page = NULL; | ||
| 2065 | spinlock_t *ptl = NULL; | ||
| 2066 | pte_t entry; | ||
| 2067 | int page_copied = 0; | ||
| 2068 | const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ | ||
| 2069 | const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ | ||
| 2070 | struct mem_cgroup *memcg; | ||
| 2147 | 2071 | ||
| 2148 | if (unlikely(anon_vma_prepare(vma))) | 2072 | if (unlikely(anon_vma_prepare(vma))) |
| 2149 | goto oom; | 2073 | goto oom; |
| @@ -2163,8 +2087,6 @@ gotten: | |||
| 2163 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) | 2087 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) |
| 2164 | goto oom_free_new; | 2088 | goto oom_free_new; |
| 2165 | 2089 | ||
| 2166 | mmun_start = address & PAGE_MASK; | ||
| 2167 | mmun_end = mmun_start + PAGE_SIZE; | ||
| 2168 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2090 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
| 2169 | 2091 | ||
| 2170 | /* | 2092 | /* |
| @@ -2177,8 +2099,9 @@ gotten: | |||
| 2177 | dec_mm_counter_fast(mm, MM_FILEPAGES); | 2099 | dec_mm_counter_fast(mm, MM_FILEPAGES); |
| 2178 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2100 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2179 | } | 2101 | } |
| 2180 | } else | 2102 | } else { |
| 2181 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2103 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2104 | } | ||
| 2182 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 2105 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
| 2183 | entry = mk_pte(new_page, vma->vm_page_prot); | 2106 | entry = mk_pte(new_page, vma->vm_page_prot); |
| 2184 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2107 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| @@ -2227,29 +2150,29 @@ gotten: | |||
| 2227 | 2150 | ||
| 2228 | /* Free the old page.. */ | 2151 | /* Free the old page.. */ |
| 2229 | new_page = old_page; | 2152 | new_page = old_page; |
| 2230 | ret |= VM_FAULT_WRITE; | 2153 | page_copied = 1; |
| 2231 | } else | 2154 | } else { |
| 2232 | mem_cgroup_cancel_charge(new_page, memcg); | 2155 | mem_cgroup_cancel_charge(new_page, memcg); |
| 2156 | } | ||
| 2233 | 2157 | ||
| 2234 | if (new_page) | 2158 | if (new_page) |
| 2235 | page_cache_release(new_page); | 2159 | page_cache_release(new_page); |
| 2236 | unlock: | 2160 | |
| 2237 | pte_unmap_unlock(page_table, ptl); | 2161 | pte_unmap_unlock(page_table, ptl); |
| 2238 | if (mmun_end > mmun_start) | 2162 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
| 2239 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
| 2240 | if (old_page) { | 2163 | if (old_page) { |
| 2241 | /* | 2164 | /* |
| 2242 | * Don't let another task, with possibly unlocked vma, | 2165 | * Don't let another task, with possibly unlocked vma, |
| 2243 | * keep the mlocked page. | 2166 | * keep the mlocked page. |
| 2244 | */ | 2167 | */ |
| 2245 | if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { | 2168 | if (page_copied && (vma->vm_flags & VM_LOCKED)) { |
| 2246 | lock_page(old_page); /* LRU manipulation */ | 2169 | lock_page(old_page); /* LRU manipulation */ |
| 2247 | munlock_vma_page(old_page); | 2170 | munlock_vma_page(old_page); |
| 2248 | unlock_page(old_page); | 2171 | unlock_page(old_page); |
| 2249 | } | 2172 | } |
| 2250 | page_cache_release(old_page); | 2173 | page_cache_release(old_page); |
| 2251 | } | 2174 | } |
| 2252 | return ret; | 2175 | return page_copied ? VM_FAULT_WRITE : 0; |
| 2253 | oom_free_new: | 2176 | oom_free_new: |
| 2254 | page_cache_release(new_page); | 2177 | page_cache_release(new_page); |
| 2255 | oom: | 2178 | oom: |
| @@ -2258,6 +2181,144 @@ oom: | |||
| 2258 | return VM_FAULT_OOM; | 2181 | return VM_FAULT_OOM; |
| 2259 | } | 2182 | } |
| 2260 | 2183 | ||
| 2184 | static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, | ||
| 2185 | unsigned long address, pte_t *page_table, | ||
| 2186 | pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, | ||
| 2187 | struct page *old_page) | ||
| 2188 | __releases(ptl) | ||
| 2189 | { | ||
| 2190 | int page_mkwrite = 0; | ||
| 2191 | |||
| 2192 | page_cache_get(old_page); | ||
| 2193 | |||
| 2194 | /* | ||
| 2195 | * Only catch write-faults on shared writable pages, | ||
| 2196 | * read-only shared pages can get COWed by | ||
| 2197 | * get_user_pages(.write=1, .force=1). | ||
| 2198 | */ | ||
| 2199 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | ||
| 2200 | int tmp; | ||
| 2201 | |||
| 2202 | pte_unmap_unlock(page_table, ptl); | ||
| 2203 | tmp = do_page_mkwrite(vma, old_page, address); | ||
| 2204 | if (unlikely(!tmp || (tmp & | ||
| 2205 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { | ||
| 2206 | page_cache_release(old_page); | ||
| 2207 | return tmp; | ||
| 2208 | } | ||
| 2209 | /* | ||
| 2210 | * Since we dropped the lock we need to revalidate | ||
| 2211 | * the PTE as someone else may have changed it. If | ||
| 2212 | * they did, we just return, as we can count on the | ||
| 2213 | * MMU to tell us if they didn't also make it writable. | ||
| 2214 | */ | ||
| 2215 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
| 2216 | &ptl); | ||
| 2217 | if (!pte_same(*page_table, orig_pte)) { | ||
| 2218 | unlock_page(old_page); | ||
| 2219 | pte_unmap_unlock(page_table, ptl); | ||
| 2220 | page_cache_release(old_page); | ||
| 2221 | return 0; | ||
| 2222 | } | ||
| 2223 | page_mkwrite = 1; | ||
| 2224 | } | ||
| 2225 | |||
| 2226 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
| 2227 | orig_pte, old_page, page_mkwrite, 1); | ||
| 2228 | } | ||
| 2229 | |||
| 2230 | /* | ||
| 2231 | * This routine handles present pages, when users try to write | ||
| 2232 | * to a shared page. It is done by copying the page to a new address | ||
| 2233 | * and decrementing the shared-page counter for the old page. | ||
| 2234 | * | ||
| 2235 | * Note that this routine assumes that the protection checks have been | ||
| 2236 | * done by the caller (the low-level page fault routine in most cases). | ||
| 2237 | * Thus we can safely just mark it writable once we've done any necessary | ||
| 2238 | * COW. | ||
| 2239 | * | ||
| 2240 | * We also mark the page dirty at this point even though the page will | ||
| 2241 | * change only once the write actually happens. This avoids a few races, | ||
| 2242 | * and potentially makes it more efficient. | ||
| 2243 | * | ||
| 2244 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
| 2245 | * but allow concurrent faults), with pte both mapped and locked. | ||
| 2246 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
| 2247 | */ | ||
| 2248 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
| 2249 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
| 2250 | spinlock_t *ptl, pte_t orig_pte) | ||
| 2251 | __releases(ptl) | ||
| 2252 | { | ||
| 2253 | struct page *old_page; | ||
| 2254 | |||
| 2255 | old_page = vm_normal_page(vma, address, orig_pte); | ||
| 2256 | if (!old_page) { | ||
| 2257 | /* | ||
| 2258 | * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a | ||
| 2259 | * VM_PFNMAP VMA. | ||
| 2260 | * | ||
| 2261 | * We should not cow pages in a shared writeable mapping. | ||
| 2262 | * Just mark the pages writable as we can't do any dirty | ||
| 2263 | * accounting on raw pfn maps. | ||
| 2264 | */ | ||
| 2265 | if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
| 2266 | (VM_WRITE|VM_SHARED)) | ||
| 2267 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
| 2268 | orig_pte, old_page, 0, 0); | ||
| 2269 | |||
| 2270 | pte_unmap_unlock(page_table, ptl); | ||
| 2271 | return wp_page_copy(mm, vma, address, page_table, pmd, | ||
| 2272 | orig_pte, old_page); | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | /* | ||
| 2276 | * Take out anonymous pages first, anonymous shared vmas are | ||
| 2277 | * not dirty accountable. | ||
| 2278 | */ | ||
| 2279 | if (PageAnon(old_page) && !PageKsm(old_page)) { | ||
| 2280 | if (!trylock_page(old_page)) { | ||
| 2281 | page_cache_get(old_page); | ||
| 2282 | pte_unmap_unlock(page_table, ptl); | ||
| 2283 | lock_page(old_page); | ||
| 2284 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
| 2285 | &ptl); | ||
| 2286 | if (!pte_same(*page_table, orig_pte)) { | ||
| 2287 | unlock_page(old_page); | ||
| 2288 | pte_unmap_unlock(page_table, ptl); | ||
| 2289 | page_cache_release(old_page); | ||
| 2290 | return 0; | ||
| 2291 | } | ||
| 2292 | page_cache_release(old_page); | ||
| 2293 | } | ||
| 2294 | if (reuse_swap_page(old_page)) { | ||
| 2295 | /* | ||
| 2296 | * The page is all ours. Move it to our anon_vma so | ||
| 2297 | * the rmap code will not search our parent or siblings. | ||
| 2298 | * Protected against the rmap code by the page lock. | ||
| 2299 | */ | ||
| 2300 | page_move_anon_rmap(old_page, vma, address); | ||
| 2301 | unlock_page(old_page); | ||
| 2302 | return wp_page_reuse(mm, vma, address, page_table, ptl, | ||
| 2303 | orig_pte, old_page, 0, 0); | ||
| 2304 | } | ||
| 2305 | unlock_page(old_page); | ||
| 2306 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
| 2307 | (VM_WRITE|VM_SHARED))) { | ||
| 2308 | return wp_page_shared(mm, vma, address, page_table, pmd, | ||
| 2309 | ptl, orig_pte, old_page); | ||
| 2310 | } | ||
| 2311 | |||
| 2312 | /* | ||
| 2313 | * Ok, we need to copy. Oh, well.. | ||
| 2314 | */ | ||
| 2315 | page_cache_get(old_page); | ||
| 2316 | |||
| 2317 | pte_unmap_unlock(page_table, ptl); | ||
| 2318 | return wp_page_copy(mm, vma, address, page_table, pmd, | ||
| 2319 | orig_pte, old_page); | ||
| 2320 | } | ||
| 2321 | |||
| 2261 | static void unmap_mapping_range_vma(struct vm_area_struct *vma, | 2322 | static void unmap_mapping_range_vma(struct vm_area_struct *vma, |
| 2262 | unsigned long start_addr, unsigned long end_addr, | 2323 | unsigned long start_addr, unsigned long end_addr, |
| 2263 | struct zap_details *details) | 2324 | struct zap_details *details) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 65842d688b7c..e2e8014fb755 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -104,7 +104,7 @@ void put_online_mems(void) | |||
| 104 | 104 | ||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | static void mem_hotplug_begin(void) | 107 | void mem_hotplug_begin(void) |
| 108 | { | 108 | { |
| 109 | mem_hotplug.active_writer = current; | 109 | mem_hotplug.active_writer = current; |
| 110 | 110 | ||
| @@ -119,7 +119,7 @@ static void mem_hotplug_begin(void) | |||
| 119 | } | 119 | } |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | static void mem_hotplug_done(void) | 122 | void mem_hotplug_done(void) |
| 123 | { | 123 | { |
| 124 | mem_hotplug.active_writer = NULL; | 124 | mem_hotplug.active_writer = NULL; |
| 125 | mutex_unlock(&mem_hotplug.lock); | 125 | mutex_unlock(&mem_hotplug.lock); |
| @@ -502,7 +502,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
| 502 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 502 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
| 503 | 503 | ||
| 504 | for (i = start_sec; i <= end_sec; i++) { | 504 | for (i = start_sec; i <= end_sec; i++) { |
| 505 | err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); | 505 | err = __add_section(nid, zone, section_nr_to_pfn(i)); |
| 506 | 506 | ||
| 507 | /* | 507 | /* |
| 508 | * EEXIST is finally dealt with by ioresource collision | 508 | * EEXIST is finally dealt with by ioresource collision |
| @@ -959,6 +959,7 @@ static void node_states_set_node(int node, struct memory_notify *arg) | |||
| 959 | } | 959 | } |
| 960 | 960 | ||
| 961 | 961 | ||
| 962 | /* Must be protected by mem_hotplug_begin() */ | ||
| 962 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) | 963 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) |
| 963 | { | 964 | { |
| 964 | unsigned long flags; | 965 | unsigned long flags; |
| @@ -969,7 +970,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
| 969 | int ret; | 970 | int ret; |
| 970 | struct memory_notify arg; | 971 | struct memory_notify arg; |
| 971 | 972 | ||
| 972 | mem_hotplug_begin(); | ||
| 973 | /* | 973 | /* |
| 974 | * This doesn't need a lock to do pfn_to_page(). | 974 | * This doesn't need a lock to do pfn_to_page(). |
| 975 | * The section can't be removed here because of the | 975 | * The section can't be removed here because of the |
| @@ -977,21 +977,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
| 977 | */ | 977 | */ |
| 978 | zone = page_zone(pfn_to_page(pfn)); | 978 | zone = page_zone(pfn_to_page(pfn)); |
| 979 | 979 | ||
| 980 | ret = -EINVAL; | ||
| 981 | if ((zone_idx(zone) > ZONE_NORMAL || | 980 | if ((zone_idx(zone) > ZONE_NORMAL || |
| 982 | online_type == MMOP_ONLINE_MOVABLE) && | 981 | online_type == MMOP_ONLINE_MOVABLE) && |
| 983 | !can_online_high_movable(zone)) | 982 | !can_online_high_movable(zone)) |
| 984 | goto out; | 983 | return -EINVAL; |
| 985 | 984 | ||
| 986 | if (online_type == MMOP_ONLINE_KERNEL && | 985 | if (online_type == MMOP_ONLINE_KERNEL && |
| 987 | zone_idx(zone) == ZONE_MOVABLE) { | 986 | zone_idx(zone) == ZONE_MOVABLE) { |
| 988 | if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) | 987 | if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) |
| 989 | goto out; | 988 | return -EINVAL; |
| 990 | } | 989 | } |
| 991 | if (online_type == MMOP_ONLINE_MOVABLE && | 990 | if (online_type == MMOP_ONLINE_MOVABLE && |
| 992 | zone_idx(zone) == ZONE_MOVABLE - 1) { | 991 | zone_idx(zone) == ZONE_MOVABLE - 1) { |
| 993 | if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) | 992 | if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) |
| 994 | goto out; | 993 | return -EINVAL; |
| 995 | } | 994 | } |
| 996 | 995 | ||
| 997 | /* Previous code may changed the zone of the pfn range */ | 996 | /* Previous code may changed the zone of the pfn range */ |
| @@ -1007,7 +1006,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
| 1007 | ret = notifier_to_errno(ret); | 1006 | ret = notifier_to_errno(ret); |
| 1008 | if (ret) { | 1007 | if (ret) { |
| 1009 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 1008 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
| 1010 | goto out; | 1009 | return ret; |
| 1011 | } | 1010 | } |
| 1012 | /* | 1011 | /* |
| 1013 | * If this zone is not populated, then it is not in zonelist. | 1012 | * If this zone is not populated, then it is not in zonelist. |
| @@ -1031,7 +1030,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
| 1031 | (((unsigned long long) pfn + nr_pages) | 1030 | (((unsigned long long) pfn + nr_pages) |
| 1032 | << PAGE_SHIFT) - 1); | 1031 | << PAGE_SHIFT) - 1); |
| 1033 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 1032 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
| 1034 | goto out; | 1033 | return ret; |
| 1035 | } | 1034 | } |
| 1036 | 1035 | ||
| 1037 | zone->present_pages += onlined_pages; | 1036 | zone->present_pages += onlined_pages; |
| @@ -1061,9 +1060,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
| 1061 | 1060 | ||
| 1062 | if (onlined_pages) | 1061 | if (onlined_pages) |
| 1063 | memory_notify(MEM_ONLINE, &arg); | 1062 | memory_notify(MEM_ONLINE, &arg); |
| 1064 | out: | 1063 | return 0; |
| 1065 | mem_hotplug_done(); | ||
| 1066 | return ret; | ||
| 1067 | } | 1064 | } |
| 1068 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 1065 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
| 1069 | 1066 | ||
| @@ -1688,21 +1685,18 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
| 1688 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 1685 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
| 1689 | return -EINVAL; | 1686 | return -EINVAL; |
| 1690 | 1687 | ||
| 1691 | mem_hotplug_begin(); | ||
| 1692 | |||
| 1693 | zone = page_zone(pfn_to_page(start_pfn)); | 1688 | zone = page_zone(pfn_to_page(start_pfn)); |
| 1694 | node = zone_to_nid(zone); | 1689 | node = zone_to_nid(zone); |
| 1695 | nr_pages = end_pfn - start_pfn; | 1690 | nr_pages = end_pfn - start_pfn; |
| 1696 | 1691 | ||
| 1697 | ret = -EINVAL; | ||
| 1698 | if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages)) | 1692 | if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages)) |
| 1699 | goto out; | 1693 | return -EINVAL; |
| 1700 | 1694 | ||
| 1701 | /* set above range as isolated */ | 1695 | /* set above range as isolated */ |
| 1702 | ret = start_isolate_page_range(start_pfn, end_pfn, | 1696 | ret = start_isolate_page_range(start_pfn, end_pfn, |
| 1703 | MIGRATE_MOVABLE, true); | 1697 | MIGRATE_MOVABLE, true); |
| 1704 | if (ret) | 1698 | if (ret) |
| 1705 | goto out; | 1699 | return ret; |
| 1706 | 1700 | ||
| 1707 | arg.start_pfn = start_pfn; | 1701 | arg.start_pfn = start_pfn; |
| 1708 | arg.nr_pages = nr_pages; | 1702 | arg.nr_pages = nr_pages; |
| @@ -1795,7 +1789,6 @@ repeat: | |||
| 1795 | writeback_set_ratelimit(); | 1789 | writeback_set_ratelimit(); |
| 1796 | 1790 | ||
| 1797 | memory_notify(MEM_OFFLINE, &arg); | 1791 | memory_notify(MEM_OFFLINE, &arg); |
| 1798 | mem_hotplug_done(); | ||
| 1799 | return 0; | 1792 | return 0; |
| 1800 | 1793 | ||
| 1801 | failed_removal: | 1794 | failed_removal: |
| @@ -1805,12 +1798,10 @@ failed_removal: | |||
| 1805 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 1798 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
| 1806 | /* pushback to free area */ | 1799 | /* pushback to free area */ |
| 1807 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | 1800 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
| 1808 | |||
| 1809 | out: | ||
| 1810 | mem_hotplug_done(); | ||
| 1811 | return ret; | 1801 | return ret; |
| 1812 | } | 1802 | } |
| 1813 | 1803 | ||
| 1804 | /* Must be protected by mem_hotplug_begin() */ | ||
| 1814 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) | 1805 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) |
| 1815 | { | 1806 | { |
| 1816 | return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); | 1807 | return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4721046a134a..ede26291d4aa 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -945,7 +945,8 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x | |||
| 945 | return alloc_huge_page_node(page_hstate(compound_head(page)), | 945 | return alloc_huge_page_node(page_hstate(compound_head(page)), |
| 946 | node); | 946 | node); |
| 947 | else | 947 | else |
| 948 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); | 948 | return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE | |
| 949 | __GFP_THISNODE, 0); | ||
| 949 | } | 950 | } |
| 950 | 951 | ||
| 951 | /* | 952 | /* |
| @@ -1985,7 +1986,8 @@ retry_cpuset: | |||
| 1985 | nmask = policy_nodemask(gfp, pol); | 1986 | nmask = policy_nodemask(gfp, pol); |
| 1986 | if (!nmask || node_isset(node, *nmask)) { | 1987 | if (!nmask || node_isset(node, *nmask)) { |
| 1987 | mpol_cond_put(pol); | 1988 | mpol_cond_put(pol); |
| 1988 | page = alloc_pages_exact_node(node, gfp, order); | 1989 | page = alloc_pages_exact_node(node, |
| 1990 | gfp | __GFP_THISNODE, order); | ||
| 1989 | goto out; | 1991 | goto out; |
| 1990 | } | 1992 | } |
| 1991 | } | 1993 | } |
diff --git a/mm/mempool.c b/mm/mempool.c index e209c98c7203..949970db2874 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
| @@ -113,23 +113,24 @@ EXPORT_SYMBOL(mempool_create_node); | |||
| 113 | * mempool_create(). | 113 | * mempool_create(). |
| 114 | * @new_min_nr: the new minimum number of elements guaranteed to be | 114 | * @new_min_nr: the new minimum number of elements guaranteed to be |
| 115 | * allocated for this pool. | 115 | * allocated for this pool. |
| 116 | * @gfp_mask: the usual allocation bitmask. | ||
| 117 | * | 116 | * |
| 118 | * This function shrinks/grows the pool. In the case of growing, | 117 | * This function shrinks/grows the pool. In the case of growing, |
| 119 | * it cannot be guaranteed that the pool will be grown to the new | 118 | * it cannot be guaranteed that the pool will be grown to the new |
| 120 | * size immediately, but new mempool_free() calls will refill it. | 119 | * size immediately, but new mempool_free() calls will refill it. |
| 120 | * This function may sleep. | ||
| 121 | * | 121 | * |
| 122 | * Note, the caller must guarantee that no mempool_destroy is called | 122 | * Note, the caller must guarantee that no mempool_destroy is called |
| 123 | * while this function is running. mempool_alloc() & mempool_free() | 123 | * while this function is running. mempool_alloc() & mempool_free() |
| 124 | * might be called (eg. from IRQ contexts) while this function executes. | 124 | * might be called (eg. from IRQ contexts) while this function executes. |
| 125 | */ | 125 | */ |
| 126 | int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | 126 | int mempool_resize(mempool_t *pool, int new_min_nr) |
| 127 | { | 127 | { |
| 128 | void *element; | 128 | void *element; |
| 129 | void **new_elements; | 129 | void **new_elements; |
| 130 | unsigned long flags; | 130 | unsigned long flags; |
| 131 | 131 | ||
| 132 | BUG_ON(new_min_nr <= 0); | 132 | BUG_ON(new_min_nr <= 0); |
| 133 | might_sleep(); | ||
| 133 | 134 | ||
| 134 | spin_lock_irqsave(&pool->lock, flags); | 135 | spin_lock_irqsave(&pool->lock, flags); |
| 135 | if (new_min_nr <= pool->min_nr) { | 136 | if (new_min_nr <= pool->min_nr) { |
| @@ -145,7 +146,8 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | |||
| 145 | spin_unlock_irqrestore(&pool->lock, flags); | 146 | spin_unlock_irqrestore(&pool->lock, flags); |
| 146 | 147 | ||
| 147 | /* Grow the pool */ | 148 | /* Grow the pool */ |
| 148 | new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); | 149 | new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), |
| 150 | GFP_KERNEL); | ||
| 149 | if (!new_elements) | 151 | if (!new_elements) |
| 150 | return -ENOMEM; | 152 | return -ENOMEM; |
| 151 | 153 | ||
| @@ -164,7 +166,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) | |||
| 164 | 166 | ||
| 165 | while (pool->curr_nr < pool->min_nr) { | 167 | while (pool->curr_nr < pool->min_nr) { |
| 166 | spin_unlock_irqrestore(&pool->lock, flags); | 168 | spin_unlock_irqrestore(&pool->lock, flags); |
| 167 | element = pool->alloc(gfp_mask, pool->pool_data); | 169 | element = pool->alloc(GFP_KERNEL, pool->pool_data); |
| 168 | if (!element) | 170 | if (!element) |
| 169 | goto out; | 171 | goto out; |
| 170 | spin_lock_irqsave(&pool->lock, flags); | 172 | spin_lock_irqsave(&pool->lock, flags); |
diff --git a/arch/x86/mm/memtest.c b/mm/memtest.c index 1e9da795767a..1997d934b13b 100644 --- a/arch/x86/mm/memtest.c +++ b/mm/memtest.c | |||
| @@ -29,7 +29,7 @@ static u64 patterns[] __initdata = { | |||
| 29 | 0x7a6c7258554e494cULL, /* yeah ;-) */ | 29 | 0x7a6c7258554e494cULL, /* yeah ;-) */ |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | 32 | static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, phys_addr_t end_bad) |
| 33 | { | 33 | { |
| 34 | printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", | 34 | printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", |
| 35 | (unsigned long long) pattern, | 35 | (unsigned long long) pattern, |
| @@ -38,11 +38,11 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
| 38 | memblock_reserve(start_bad, end_bad - start_bad); | 38 | memblock_reserve(start_bad, end_bad - start_bad); |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size) |
| 42 | { | 42 | { |
| 43 | u64 *p, *start, *end; | 43 | u64 *p, *start, *end; |
| 44 | u64 start_bad, last_bad; | 44 | phys_addr_t start_bad, last_bad; |
| 45 | u64 start_phys_aligned; | 45 | phys_addr_t start_phys_aligned; |
| 46 | const size_t incr = sizeof(pattern); | 46 | const size_t incr = sizeof(pattern); |
| 47 | 47 | ||
| 48 | start_phys_aligned = ALIGN(start_phys, incr); | 48 | start_phys_aligned = ALIGN(start_phys, incr); |
| @@ -69,14 +69,14 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size) | |||
| 69 | reserve_bad_mem(pattern, start_bad, last_bad + incr); | 69 | reserve_bad_mem(pattern, start_bad, last_bad + incr); |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | static void __init do_one_pass(u64 pattern, u64 start, u64 end) | 72 | static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end) |
| 73 | { | 73 | { |
| 74 | u64 i; | 74 | u64 i; |
| 75 | phys_addr_t this_start, this_end; | 75 | phys_addr_t this_start, this_end; |
| 76 | 76 | ||
| 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { | 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { |
| 78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | 78 | this_start = clamp(this_start, start, end); |
| 79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | 79 | this_end = clamp(this_end, start, end); |
| 80 | if (this_start < this_end) { | 80 | if (this_start < this_end) { |
| 81 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", | 81 | printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", |
| 82 | (unsigned long long)this_start, | 82 | (unsigned long long)this_start, |
| @@ -102,7 +102,7 @@ static int __init parse_memtest(char *arg) | |||
| 102 | 102 | ||
| 103 | early_param("memtest", parse_memtest); | 103 | early_param("memtest", parse_memtest); |
| 104 | 104 | ||
| 105 | void __init early_memtest(unsigned long start, unsigned long end) | 105 | void __init early_memtest(phys_addr_t start, phys_addr_t end) |
| 106 | { | 106 | { |
| 107 | unsigned int i; | 107 | unsigned int i; |
| 108 | unsigned int idx = 0; | 108 | unsigned int idx = 0; |
diff --git a/mm/migrate.c b/mm/migrate.c index 85e042686031..a65ff72ab739 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -901,12 +901,23 @@ out: | |||
| 901 | } | 901 | } |
| 902 | 902 | ||
| 903 | /* | 903 | /* |
| 904 | * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work | ||
| 905 | * around it. | ||
| 906 | */ | ||
| 907 | #if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM) | ||
| 908 | #define ICE_noinline noinline | ||
| 909 | #else | ||
| 910 | #define ICE_noinline | ||
| 911 | #endif | ||
| 912 | |||
| 913 | /* | ||
| 904 | * Obtain the lock on page, remove all ptes and migrate the page | 914 | * Obtain the lock on page, remove all ptes and migrate the page |
| 905 | * to the newly allocated page in newpage. | 915 | * to the newly allocated page in newpage. |
| 906 | */ | 916 | */ |
| 907 | static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, | 917 | static ICE_noinline int unmap_and_move(new_page_t get_new_page, |
| 908 | unsigned long private, struct page *page, int force, | 918 | free_page_t put_new_page, |
| 909 | enum migrate_mode mode) | 919 | unsigned long private, struct page *page, |
| 920 | int force, enum migrate_mode mode) | ||
| 910 | { | 921 | { |
| 911 | int rc = 0; | 922 | int rc = 0; |
| 912 | int *result = NULL; | 923 | int *result = NULL; |
| @@ -1554,30 +1565,10 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
| 1554 | * page migration rate limiting control. | 1565 | * page migration rate limiting control. |
| 1555 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs | 1566 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs |
| 1556 | * window of time. Default here says do not migrate more than 1280M per second. | 1567 | * window of time. Default here says do not migrate more than 1280M per second. |
| 1557 | * If a node is rate-limited then PTE NUMA updates are also rate-limited. However | ||
| 1558 | * as it is faults that reset the window, pte updates will happen unconditionally | ||
| 1559 | * if there has not been a fault since @pteupdate_interval_millisecs after the | ||
| 1560 | * throttle window closed. | ||
| 1561 | */ | 1568 | */ |
| 1562 | static unsigned int migrate_interval_millisecs __read_mostly = 100; | 1569 | static unsigned int migrate_interval_millisecs __read_mostly = 100; |
| 1563 | static unsigned int pteupdate_interval_millisecs __read_mostly = 1000; | ||
| 1564 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); | 1570 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); |
| 1565 | 1571 | ||
| 1566 | /* Returns true if NUMA migration is currently rate limited */ | ||
| 1567 | bool migrate_ratelimited(int node) | ||
| 1568 | { | ||
| 1569 | pg_data_t *pgdat = NODE_DATA(node); | ||
| 1570 | |||
| 1571 | if (time_after(jiffies, pgdat->numabalancing_migrate_next_window + | ||
| 1572 | msecs_to_jiffies(pteupdate_interval_millisecs))) | ||
| 1573 | return false; | ||
| 1574 | |||
| 1575 | if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages) | ||
| 1576 | return false; | ||
| 1577 | |||
| 1578 | return true; | ||
| 1579 | } | ||
| 1580 | |||
| 1581 | /* Returns true if the node is migrate rate-limited after the update */ | 1572 | /* Returns true if the node is migrate rate-limited after the update */ |
| 1582 | static bool numamigrate_update_ratelimit(pg_data_t *pgdat, | 1573 | static bool numamigrate_update_ratelimit(pg_data_t *pgdat, |
| 1583 | unsigned long nr_pages) | 1574 | unsigned long nr_pages) |
diff --git a/mm/mlock.c b/mm/mlock.c index 8a54cd214925..6fd2cf15e868 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
| @@ -205,62 +205,6 @@ out: | |||
| 205 | return nr_pages - 1; | 205 | return nr_pages - 1; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | /** | ||
| 209 | * __mlock_vma_pages_range() - mlock a range of pages in the vma. | ||
| 210 | * @vma: target vma | ||
| 211 | * @start: start address | ||
| 212 | * @end: end address | ||
| 213 | * @nonblocking: | ||
| 214 | * | ||
| 215 | * This takes care of making the pages present too. | ||
| 216 | * | ||
| 217 | * return 0 on success, negative error code on error. | ||
| 218 | * | ||
| 219 | * vma->vm_mm->mmap_sem must be held. | ||
| 220 | * | ||
| 221 | * If @nonblocking is NULL, it may be held for read or write and will | ||
| 222 | * be unperturbed. | ||
| 223 | * | ||
| 224 | * If @nonblocking is non-NULL, it must held for read only and may be | ||
| 225 | * released. If it's released, *@nonblocking will be set to 0. | ||
| 226 | */ | ||
| 227 | long __mlock_vma_pages_range(struct vm_area_struct *vma, | ||
| 228 | unsigned long start, unsigned long end, int *nonblocking) | ||
| 229 | { | ||
| 230 | struct mm_struct *mm = vma->vm_mm; | ||
| 231 | unsigned long nr_pages = (end - start) / PAGE_SIZE; | ||
| 232 | int gup_flags; | ||
| 233 | |||
| 234 | VM_BUG_ON(start & ~PAGE_MASK); | ||
| 235 | VM_BUG_ON(end & ~PAGE_MASK); | ||
| 236 | VM_BUG_ON_VMA(start < vma->vm_start, vma); | ||
| 237 | VM_BUG_ON_VMA(end > vma->vm_end, vma); | ||
| 238 | VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); | ||
| 239 | |||
| 240 | gup_flags = FOLL_TOUCH | FOLL_MLOCK; | ||
| 241 | /* | ||
| 242 | * We want to touch writable mappings with a write fault in order | ||
| 243 | * to break COW, except for shared mappings because these don't COW | ||
| 244 | * and we would not want to dirty them for nothing. | ||
| 245 | */ | ||
| 246 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | ||
| 247 | gup_flags |= FOLL_WRITE; | ||
| 248 | |||
| 249 | /* | ||
| 250 | * We want mlock to succeed for regions that have any permissions | ||
| 251 | * other than PROT_NONE. | ||
| 252 | */ | ||
| 253 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
| 254 | gup_flags |= FOLL_FORCE; | ||
| 255 | |||
| 256 | /* | ||
| 257 | * We made sure addr is within a VMA, so the following will | ||
| 258 | * not result in a stack expansion that recurses back here. | ||
| 259 | */ | ||
| 260 | return __get_user_pages(current, mm, start, nr_pages, gup_flags, | ||
| 261 | NULL, NULL, nonblocking); | ||
| 262 | } | ||
| 263 | |||
| 264 | /* | 208 | /* |
| 265 | * convert get_user_pages() return value to posix mlock() error | 209 | * convert get_user_pages() return value to posix mlock() error |
| 266 | */ | 210 | */ |
| @@ -596,7 +540,7 @@ success: | |||
| 596 | /* | 540 | /* |
| 597 | * vm_flags is protected by the mmap_sem held in write mode. | 541 | * vm_flags is protected by the mmap_sem held in write mode. |
| 598 | * It's okay if try_to_unmap_one unmaps a page just after we | 542 | * It's okay if try_to_unmap_one unmaps a page just after we |
| 599 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. | 543 | * set VM_LOCKED, populate_vma_page_range will bring it back. |
| 600 | */ | 544 | */ |
| 601 | 545 | ||
| 602 | if (lock) | 546 | if (lock) |
| @@ -660,69 +604,6 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
| 660 | return error; | 604 | return error; |
| 661 | } | 605 | } |
| 662 | 606 | ||
| 663 | /* | ||
| 664 | * __mm_populate - populate and/or mlock pages within a range of address space. | ||
| 665 | * | ||
| 666 | * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap | ||
| 667 | * flags. VMAs must be already marked with the desired vm_flags, and | ||
| 668 | * mmap_sem must not be held. | ||
| 669 | */ | ||
| 670 | int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) | ||
| 671 | { | ||
| 672 | struct mm_struct *mm = current->mm; | ||
| 673 | unsigned long end, nstart, nend; | ||
| 674 | struct vm_area_struct *vma = NULL; | ||
| 675 | int locked = 0; | ||
| 676 | long ret = 0; | ||
| 677 | |||
| 678 | VM_BUG_ON(start & ~PAGE_MASK); | ||
| 679 | VM_BUG_ON(len != PAGE_ALIGN(len)); | ||
| 680 | end = start + len; | ||
| 681 | |||
| 682 | for (nstart = start; nstart < end; nstart = nend) { | ||
| 683 | /* | ||
| 684 | * We want to fault in pages for [nstart; end) address range. | ||
| 685 | * Find first corresponding VMA. | ||
| 686 | */ | ||
| 687 | if (!locked) { | ||
| 688 | locked = 1; | ||
| 689 | down_read(&mm->mmap_sem); | ||
| 690 | vma = find_vma(mm, nstart); | ||
| 691 | } else if (nstart >= vma->vm_end) | ||
| 692 | vma = vma->vm_next; | ||
| 693 | if (!vma || vma->vm_start >= end) | ||
| 694 | break; | ||
| 695 | /* | ||
| 696 | * Set [nstart; nend) to intersection of desired address | ||
| 697 | * range with the first VMA. Also, skip undesirable VMA types. | ||
| 698 | */ | ||
| 699 | nend = min(end, vma->vm_end); | ||
| 700 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | ||
| 701 | continue; | ||
| 702 | if (nstart < vma->vm_start) | ||
| 703 | nstart = vma->vm_start; | ||
| 704 | /* | ||
| 705 | * Now fault in a range of pages. __mlock_vma_pages_range() | ||
| 706 | * double checks the vma flags, so that it won't mlock pages | ||
| 707 | * if the vma was already munlocked. | ||
| 708 | */ | ||
| 709 | ret = __mlock_vma_pages_range(vma, nstart, nend, &locked); | ||
| 710 | if (ret < 0) { | ||
| 711 | if (ignore_errors) { | ||
| 712 | ret = 0; | ||
| 713 | continue; /* continue at next VMA */ | ||
| 714 | } | ||
| 715 | ret = __mlock_posix_error_return(ret); | ||
| 716 | break; | ||
| 717 | } | ||
| 718 | nend = nstart + ret * PAGE_SIZE; | ||
| 719 | ret = 0; | ||
| 720 | } | ||
| 721 | if (locked) | ||
| 722 | up_read(&mm->mmap_sem); | ||
| 723 | return ret; /* 0 or negative error code */ | ||
| 724 | } | ||
| 725 | |||
| 726 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | 607 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) |
| 727 | { | 608 | { |
| 728 | unsigned long locked; | 609 | unsigned long locked; |
| @@ -750,9 +631,13 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) | |||
| 750 | error = do_mlock(start, len, 1); | 631 | error = do_mlock(start, len, 1); |
| 751 | 632 | ||
| 752 | up_write(¤t->mm->mmap_sem); | 633 | up_write(¤t->mm->mmap_sem); |
| 753 | if (!error) | 634 | if (error) |
| 754 | error = __mm_populate(start, len, 0); | 635 | return error; |
| 755 | return error; | 636 | |
| 637 | error = __mm_populate(start, len, 0); | ||
| 638 | if (error) | ||
| 639 | return __mlock_posix_error_return(error); | ||
| 640 | return 0; | ||
| 756 | } | 641 | } |
| 757 | 642 | ||
| 758 | SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) | 643 | SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) |
| @@ -2316,7 +2316,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
| 2316 | if (!prev || expand_stack(prev, addr)) | 2316 | if (!prev || expand_stack(prev, addr)) |
| 2317 | return NULL; | 2317 | return NULL; |
| 2318 | if (prev->vm_flags & VM_LOCKED) | 2318 | if (prev->vm_flags & VM_LOCKED) |
| 2319 | __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL); | 2319 | populate_vma_page_range(prev, addr, prev->vm_end, NULL); |
| 2320 | return prev; | 2320 | return prev; |
| 2321 | } | 2321 | } |
| 2322 | #else | 2322 | #else |
| @@ -2351,7 +2351,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
| 2351 | if (expand_stack(vma, addr)) | 2351 | if (expand_stack(vma, addr)) |
| 2352 | return NULL; | 2352 | return NULL; |
| 2353 | if (vma->vm_flags & VM_LOCKED) | 2353 | if (vma->vm_flags & VM_LOCKED) |
| 2354 | __mlock_vma_pages_range(vma, addr, start, NULL); | 2354 | populate_vma_page_range(vma, addr, start, NULL); |
| 2355 | return vma; | 2355 | return vma; |
| 2356 | } | 2356 | } |
| 2357 | #endif | 2357 | #endif |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 642f38cb175a..52628c819bf7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -612,7 +612,8 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
| 612 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. | 612 | * Determines whether the kernel must panic because of the panic_on_oom sysctl. |
| 613 | */ | 613 | */ |
| 614 | void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | 614 | void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, |
| 615 | int order, const nodemask_t *nodemask) | 615 | int order, const nodemask_t *nodemask, |
| 616 | struct mem_cgroup *memcg) | ||
| 616 | { | 617 | { |
| 617 | if (likely(!sysctl_panic_on_oom)) | 618 | if (likely(!sysctl_panic_on_oom)) |
| 618 | return; | 619 | return; |
| @@ -625,7 +626,7 @@ void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | |||
| 625 | if (constraint != CONSTRAINT_NONE) | 626 | if (constraint != CONSTRAINT_NONE) |
| 626 | return; | 627 | return; |
| 627 | } | 628 | } |
| 628 | dump_header(NULL, gfp_mask, order, NULL, nodemask); | 629 | dump_header(NULL, gfp_mask, order, memcg, nodemask); |
| 629 | panic("Out of memory: %s panic_on_oom is enabled\n", | 630 | panic("Out of memory: %s panic_on_oom is enabled\n", |
| 630 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); | 631 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); |
| 631 | } | 632 | } |
| @@ -740,7 +741,7 @@ static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
| 740 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask, | 741 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask, |
| 741 | &totalpages); | 742 | &totalpages); |
| 742 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; | 743 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; |
| 743 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); | 744 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL); |
| 744 | 745 | ||
| 745 | if (sysctl_oom_kill_allocating_task && current->mm && | 746 | if (sysctl_oom_kill_allocating_task && current->mm && |
| 746 | !oom_unkillable_task(current, NULL, nodemask) && | 747 | !oom_unkillable_task(current, NULL, nodemask) && |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 644bcb665773..0372411f38fc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -2111,6 +2111,25 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
| 2111 | EXPORT_SYMBOL(account_page_dirtied); | 2111 | EXPORT_SYMBOL(account_page_dirtied); |
| 2112 | 2112 | ||
| 2113 | /* | 2113 | /* |
| 2114 | * Helper function for deaccounting dirty page without writeback. | ||
| 2115 | * | ||
| 2116 | * Doing this should *normally* only ever be done when a page | ||
| 2117 | * is truncated, and is not actually mapped anywhere at all. However, | ||
| 2118 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
| 2119 | * out all the buffers on a page without actually doing it through | ||
| 2120 | * the VM. Can you say "ext3 is horribly ugly"? Thought you could. | ||
| 2121 | */ | ||
| 2122 | void account_page_cleaned(struct page *page, struct address_space *mapping) | ||
| 2123 | { | ||
| 2124 | if (mapping_cap_account_dirty(mapping)) { | ||
| 2125 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
| 2126 | dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); | ||
| 2127 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); | ||
| 2128 | } | ||
| 2129 | } | ||
| 2130 | EXPORT_SYMBOL(account_page_cleaned); | ||
| 2131 | |||
| 2132 | /* | ||
| 2114 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 2133 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
| 2115 | * its radix tree. | 2134 | * its radix tree. |
| 2116 | * | 2135 | * |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40e29429e7b0..1b849500640c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -1032,11 +1032,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
| 1032 | static int fallbacks[MIGRATE_TYPES][4] = { | 1032 | static int fallbacks[MIGRATE_TYPES][4] = { |
| 1033 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 1033 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
| 1034 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, | 1034 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, |
| 1035 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
| 1035 | #ifdef CONFIG_CMA | 1036 | #ifdef CONFIG_CMA |
| 1036 | [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
| 1037 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ | 1037 | [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ |
| 1038 | #else | ||
| 1039 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | ||
| 1040 | #endif | 1038 | #endif |
| 1041 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ | 1039 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ |
| 1042 | #ifdef CONFIG_MEMORY_ISOLATION | 1040 | #ifdef CONFIG_MEMORY_ISOLATION |
| @@ -1044,6 +1042,17 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
| 1044 | #endif | 1042 | #endif |
| 1045 | }; | 1043 | }; |
| 1046 | 1044 | ||
| 1045 | #ifdef CONFIG_CMA | ||
| 1046 | static struct page *__rmqueue_cma_fallback(struct zone *zone, | ||
| 1047 | unsigned int order) | ||
| 1048 | { | ||
| 1049 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); | ||
| 1050 | } | ||
| 1051 | #else | ||
| 1052 | static inline struct page *__rmqueue_cma_fallback(struct zone *zone, | ||
| 1053 | unsigned int order) { return NULL; } | ||
| 1054 | #endif | ||
| 1055 | |||
| 1047 | /* | 1056 | /* |
| 1048 | * Move the free pages in a range to the free lists of the requested type. | 1057 | * Move the free pages in a range to the free lists of the requested type. |
| 1049 | * Note that start_page and end_pages are not aligned on a pageblock | 1058 | * Note that start_page and end_pages are not aligned on a pageblock |
| @@ -1136,14 +1145,40 @@ static void change_pageblock_range(struct page *pageblock_page, | |||
| 1136 | * as fragmentation caused by those allocations polluting movable pageblocks | 1145 | * as fragmentation caused by those allocations polluting movable pageblocks |
| 1137 | * is worse than movable allocations stealing from unmovable and reclaimable | 1146 | * is worse than movable allocations stealing from unmovable and reclaimable |
| 1138 | * pageblocks. | 1147 | * pageblocks. |
| 1139 | * | ||
| 1140 | * If we claim more than half of the pageblock, change pageblock's migratetype | ||
| 1141 | * as well. | ||
| 1142 | */ | 1148 | */ |
| 1143 | static void try_to_steal_freepages(struct zone *zone, struct page *page, | 1149 | static bool can_steal_fallback(unsigned int order, int start_mt) |
| 1144 | int start_type, int fallback_type) | 1150 | { |
| 1151 | /* | ||
| 1152 | * Leaving this order check is intended, although there is | ||
| 1153 | * relaxed order check in next check. The reason is that | ||
| 1154 | * we can actually steal whole pageblock if this condition met, | ||
| 1155 | * but, below check doesn't guarantee it and that is just heuristic | ||
| 1156 | * so could be changed anytime. | ||
| 1157 | */ | ||
| 1158 | if (order >= pageblock_order) | ||
| 1159 | return true; | ||
| 1160 | |||
| 1161 | if (order >= pageblock_order / 2 || | ||
| 1162 | start_mt == MIGRATE_RECLAIMABLE || | ||
| 1163 | start_mt == MIGRATE_UNMOVABLE || | ||
| 1164 | page_group_by_mobility_disabled) | ||
| 1165 | return true; | ||
| 1166 | |||
| 1167 | return false; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | /* | ||
| 1171 | * This function implements actual steal behaviour. If order is large enough, | ||
| 1172 | * we can steal whole pageblock. If not, we first move freepages in this | ||
| 1173 | * pageblock and check whether half of pages are moved or not. If half of | ||
| 1174 | * pages are moved, we can change migratetype of pageblock and permanently | ||
| 1175 | * use it's pages as requested migratetype in the future. | ||
| 1176 | */ | ||
| 1177 | static void steal_suitable_fallback(struct zone *zone, struct page *page, | ||
| 1178 | int start_type) | ||
| 1145 | { | 1179 | { |
| 1146 | int current_order = page_order(page); | 1180 | int current_order = page_order(page); |
| 1181 | int pages; | ||
| 1147 | 1182 | ||
| 1148 | /* Take ownership for orders >= pageblock_order */ | 1183 | /* Take ownership for orders >= pageblock_order */ |
| 1149 | if (current_order >= pageblock_order) { | 1184 | if (current_order >= pageblock_order) { |
| @@ -1151,19 +1186,49 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page, | |||
| 1151 | return; | 1186 | return; |
| 1152 | } | 1187 | } |
| 1153 | 1188 | ||
| 1154 | if (current_order >= pageblock_order / 2 || | 1189 | pages = move_freepages_block(zone, page, start_type); |
| 1155 | start_type == MIGRATE_RECLAIMABLE || | 1190 | |
| 1156 | start_type == MIGRATE_UNMOVABLE || | 1191 | /* Claim the whole block if over half of it is free */ |
| 1157 | page_group_by_mobility_disabled) { | 1192 | if (pages >= (1 << (pageblock_order-1)) || |
| 1158 | int pages; | 1193 | page_group_by_mobility_disabled) |
| 1194 | set_pageblock_migratetype(page, start_type); | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | /* | ||
| 1198 | * Check whether there is a suitable fallback freepage with requested order. | ||
| 1199 | * If only_stealable is true, this function returns fallback_mt only if | ||
| 1200 | * we can steal other freepages all together. This would help to reduce | ||
| 1201 | * fragmentation due to mixed migratetype pages in one pageblock. | ||
| 1202 | */ | ||
| 1203 | int find_suitable_fallback(struct free_area *area, unsigned int order, | ||
| 1204 | int migratetype, bool only_stealable, bool *can_steal) | ||
| 1205 | { | ||
| 1206 | int i; | ||
| 1207 | int fallback_mt; | ||
| 1208 | |||
| 1209 | if (area->nr_free == 0) | ||
| 1210 | return -1; | ||
| 1211 | |||
| 1212 | *can_steal = false; | ||
| 1213 | for (i = 0;; i++) { | ||
| 1214 | fallback_mt = fallbacks[migratetype][i]; | ||
| 1215 | if (fallback_mt == MIGRATE_RESERVE) | ||
| 1216 | break; | ||
| 1217 | |||
| 1218 | if (list_empty(&area->free_list[fallback_mt])) | ||
| 1219 | continue; | ||
| 1159 | 1220 | ||
| 1160 | pages = move_freepages_block(zone, page, start_type); | 1221 | if (can_steal_fallback(order, migratetype)) |
| 1222 | *can_steal = true; | ||
| 1161 | 1223 | ||
| 1162 | /* Claim the whole block if over half of it is free */ | 1224 | if (!only_stealable) |
| 1163 | if (pages >= (1 << (pageblock_order-1)) || | 1225 | return fallback_mt; |
| 1164 | page_group_by_mobility_disabled) | 1226 | |
| 1165 | set_pageblock_migratetype(page, start_type); | 1227 | if (*can_steal) |
| 1228 | return fallback_mt; | ||
| 1166 | } | 1229 | } |
| 1230 | |||
| 1231 | return -1; | ||
| 1167 | } | 1232 | } |
| 1168 | 1233 | ||
| 1169 | /* Remove an element from the buddy allocator from the fallback list */ | 1234 | /* Remove an element from the buddy allocator from the fallback list */ |
| @@ -1173,64 +1238,45 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
| 1173 | struct free_area *area; | 1238 | struct free_area *area; |
| 1174 | unsigned int current_order; | 1239 | unsigned int current_order; |
| 1175 | struct page *page; | 1240 | struct page *page; |
| 1241 | int fallback_mt; | ||
| 1242 | bool can_steal; | ||
| 1176 | 1243 | ||
| 1177 | /* Find the largest possible block of pages in the other list */ | 1244 | /* Find the largest possible block of pages in the other list */ |
| 1178 | for (current_order = MAX_ORDER-1; | 1245 | for (current_order = MAX_ORDER-1; |
| 1179 | current_order >= order && current_order <= MAX_ORDER-1; | 1246 | current_order >= order && current_order <= MAX_ORDER-1; |
| 1180 | --current_order) { | 1247 | --current_order) { |
| 1181 | int i; | 1248 | area = &(zone->free_area[current_order]); |
| 1182 | for (i = 0;; i++) { | 1249 | fallback_mt = find_suitable_fallback(area, current_order, |
| 1183 | int migratetype = fallbacks[start_migratetype][i]; | 1250 | start_migratetype, false, &can_steal); |
| 1184 | int buddy_type = start_migratetype; | 1251 | if (fallback_mt == -1) |
| 1185 | 1252 | continue; | |
| 1186 | /* MIGRATE_RESERVE handled later if necessary */ | ||
| 1187 | if (migratetype == MIGRATE_RESERVE) | ||
| 1188 | break; | ||
| 1189 | |||
| 1190 | area = &(zone->free_area[current_order]); | ||
| 1191 | if (list_empty(&area->free_list[migratetype])) | ||
| 1192 | continue; | ||
| 1193 | |||
| 1194 | page = list_entry(area->free_list[migratetype].next, | ||
| 1195 | struct page, lru); | ||
| 1196 | area->nr_free--; | ||
| 1197 | |||
| 1198 | if (!is_migrate_cma(migratetype)) { | ||
| 1199 | try_to_steal_freepages(zone, page, | ||
| 1200 | start_migratetype, | ||
| 1201 | migratetype); | ||
| 1202 | } else { | ||
| 1203 | /* | ||
| 1204 | * When borrowing from MIGRATE_CMA, we need to | ||
| 1205 | * release the excess buddy pages to CMA | ||
| 1206 | * itself, and we do not try to steal extra | ||
| 1207 | * free pages. | ||
| 1208 | */ | ||
| 1209 | buddy_type = migratetype; | ||
| 1210 | } | ||
| 1211 | 1253 | ||
| 1212 | /* Remove the page from the freelists */ | 1254 | page = list_entry(area->free_list[fallback_mt].next, |
| 1213 | list_del(&page->lru); | 1255 | struct page, lru); |
| 1214 | rmv_page_order(page); | 1256 | if (can_steal) |
| 1257 | steal_suitable_fallback(zone, page, start_migratetype); | ||
| 1215 | 1258 | ||
| 1216 | expand(zone, page, order, current_order, area, | 1259 | /* Remove the page from the freelists */ |
| 1217 | buddy_type); | 1260 | area->nr_free--; |
| 1261 | list_del(&page->lru); | ||
| 1262 | rmv_page_order(page); | ||
| 1218 | 1263 | ||
| 1219 | /* | 1264 | expand(zone, page, order, current_order, area, |
| 1220 | * The freepage_migratetype may differ from pageblock's | 1265 | start_migratetype); |
| 1221 | * migratetype depending on the decisions in | 1266 | /* |
| 1222 | * try_to_steal_freepages(). This is OK as long as it | 1267 | * The freepage_migratetype may differ from pageblock's |
| 1223 | * does not differ for MIGRATE_CMA pageblocks. For CMA | 1268 | * migratetype depending on the decisions in |
| 1224 | * we need to make sure unallocated pages flushed from | 1269 | * try_to_steal_freepages(). This is OK as long as it |
| 1225 | * pcp lists are returned to the correct freelist. | 1270 | * does not differ for MIGRATE_CMA pageblocks. For CMA |
| 1226 | */ | 1271 | * we need to make sure unallocated pages flushed from |
| 1227 | set_freepage_migratetype(page, buddy_type); | 1272 | * pcp lists are returned to the correct freelist. |
| 1273 | */ | ||
| 1274 | set_freepage_migratetype(page, start_migratetype); | ||
| 1228 | 1275 | ||
| 1229 | trace_mm_page_alloc_extfrag(page, order, current_order, | 1276 | trace_mm_page_alloc_extfrag(page, order, current_order, |
| 1230 | start_migratetype, migratetype); | 1277 | start_migratetype, fallback_mt); |
| 1231 | 1278 | ||
| 1232 | return page; | 1279 | return page; |
| 1233 | } | ||
| 1234 | } | 1280 | } |
| 1235 | 1281 | ||
| 1236 | return NULL; | 1282 | return NULL; |
| @@ -1249,7 +1295,11 @@ retry_reserve: | |||
| 1249 | page = __rmqueue_smallest(zone, order, migratetype); | 1295 | page = __rmqueue_smallest(zone, order, migratetype); |
| 1250 | 1296 | ||
| 1251 | if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { | 1297 | if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { |
| 1252 | page = __rmqueue_fallback(zone, order, migratetype); | 1298 | if (migratetype == MIGRATE_MOVABLE) |
| 1299 | page = __rmqueue_cma_fallback(zone, order); | ||
| 1300 | |||
| 1301 | if (!page) | ||
| 1302 | page = __rmqueue_fallback(zone, order, migratetype); | ||
| 1253 | 1303 | ||
| 1254 | /* | 1304 | /* |
| 1255 | * Use MIGRATE_RESERVE rather than fail an allocation. goto | 1305 | * Use MIGRATE_RESERVE rather than fail an allocation. goto |
| @@ -2362,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
| 2362 | *did_some_progress = 1; | 2412 | *did_some_progress = 1; |
| 2363 | goto out; | 2413 | goto out; |
| 2364 | } | 2414 | } |
| 2365 | /* | 2415 | /* The OOM killer may not free memory on a specific node */ |
| 2366 | * GFP_THISNODE contains __GFP_NORETRY and we never hit this. | ||
| 2367 | * Sanity check for bare calls of __GFP_THISNODE, not real OOM. | ||
| 2368 | * The caller should handle page allocation failure by itself if | ||
| 2369 | * it specifies __GFP_THISNODE. | ||
| 2370 | * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. | ||
| 2371 | */ | ||
| 2372 | if (gfp_mask & __GFP_THISNODE) | 2416 | if (gfp_mask & __GFP_THISNODE) |
| 2373 | goto out; | 2417 | goto out; |
| 2374 | } | 2418 | } |
| @@ -2623,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
| 2623 | } | 2667 | } |
| 2624 | 2668 | ||
| 2625 | /* | 2669 | /* |
| 2626 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and | 2670 | * If this allocation cannot block and it is for a specific node, then |
| 2627 | * __GFP_NOWARN set) should not cause reclaim since the subsystem | 2671 | * fail early. There's no need to wakeup kswapd or retry for a |
| 2628 | * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim | 2672 | * speculative node-specific allocation. |
| 2629 | * using a larger set of nodes after it has established that the | ||
| 2630 | * allowed per node queues are empty and that nodes are | ||
| 2631 | * over allocated. | ||
| 2632 | */ | 2673 | */ |
| 2633 | if (IS_ENABLED(CONFIG_NUMA) && | 2674 | if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) |
| 2634 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
| 2635 | goto nopage; | 2675 | goto nopage; |
| 2636 | 2676 | ||
| 2637 | retry: | 2677 | retry: |
| @@ -2824,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
| 2824 | /* | 2864 | /* |
| 2825 | * Check the zones suitable for the gfp_mask contain at least one | 2865 | * Check the zones suitable for the gfp_mask contain at least one |
| 2826 | * valid zone. It's possible to have an empty zonelist as a result | 2866 | * valid zone. It's possible to have an empty zonelist as a result |
| 2827 | * of GFP_THISNODE and a memoryless node | 2867 | * of __GFP_THISNODE and a memoryless node |
| 2828 | */ | 2868 | */ |
| 2829 | if (unlikely(!zonelist->_zonerefs->zone)) | 2869 | if (unlikely(!zonelist->_zonerefs->zone)) |
| 2830 | return NULL; | 2870 | return NULL; |
| @@ -3201,38 +3241,31 @@ static void show_migration_types(unsigned char type) | |||
| 3201 | * Show free area list (used inside shift_scroll-lock stuff) | 3241 | * Show free area list (used inside shift_scroll-lock stuff) |
| 3202 | * We also calculate the percentage fragmentation. We do this by counting the | 3242 | * We also calculate the percentage fragmentation. We do this by counting the |
| 3203 | * memory on each free list with the exception of the first item on the list. | 3243 | * memory on each free list with the exception of the first item on the list. |
| 3204 | * Suppresses nodes that are not allowed by current's cpuset if | 3244 | * |
| 3205 | * SHOW_MEM_FILTER_NODES is passed. | 3245 | * Bits in @filter: |
| 3246 | * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's | ||
| 3247 | * cpuset. | ||
| 3206 | */ | 3248 | */ |
| 3207 | void show_free_areas(unsigned int filter) | 3249 | void show_free_areas(unsigned int filter) |
| 3208 | { | 3250 | { |
| 3251 | unsigned long free_pcp = 0; | ||
| 3209 | int cpu; | 3252 | int cpu; |
| 3210 | struct zone *zone; | 3253 | struct zone *zone; |
| 3211 | 3254 | ||
| 3212 | for_each_populated_zone(zone) { | 3255 | for_each_populated_zone(zone) { |
| 3213 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3256 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
| 3214 | continue; | 3257 | continue; |
| 3215 | show_node(zone); | ||
| 3216 | printk("%s per-cpu:\n", zone->name); | ||
| 3217 | 3258 | ||
| 3218 | for_each_online_cpu(cpu) { | 3259 | for_each_online_cpu(cpu) |
| 3219 | struct per_cpu_pageset *pageset; | 3260 | free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; |
| 3220 | |||
| 3221 | pageset = per_cpu_ptr(zone->pageset, cpu); | ||
| 3222 | |||
| 3223 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", | ||
| 3224 | cpu, pageset->pcp.high, | ||
| 3225 | pageset->pcp.batch, pageset->pcp.count); | ||
| 3226 | } | ||
| 3227 | } | 3261 | } |
| 3228 | 3262 | ||
| 3229 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" | 3263 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
| 3230 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" | 3264 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
| 3231 | " unevictable:%lu" | 3265 | " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n" |
| 3232 | " dirty:%lu writeback:%lu unstable:%lu\n" | 3266 | " slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
| 3233 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | ||
| 3234 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" | 3267 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" |
| 3235 | " free_cma:%lu\n", | 3268 | " free:%lu free_pcp:%lu free_cma:%lu\n", |
| 3236 | global_page_state(NR_ACTIVE_ANON), | 3269 | global_page_state(NR_ACTIVE_ANON), |
| 3237 | global_page_state(NR_INACTIVE_ANON), | 3270 | global_page_state(NR_INACTIVE_ANON), |
| 3238 | global_page_state(NR_ISOLATED_ANON), | 3271 | global_page_state(NR_ISOLATED_ANON), |
| @@ -3243,13 +3276,14 @@ void show_free_areas(unsigned int filter) | |||
| 3243 | global_page_state(NR_FILE_DIRTY), | 3276 | global_page_state(NR_FILE_DIRTY), |
| 3244 | global_page_state(NR_WRITEBACK), | 3277 | global_page_state(NR_WRITEBACK), |
| 3245 | global_page_state(NR_UNSTABLE_NFS), | 3278 | global_page_state(NR_UNSTABLE_NFS), |
| 3246 | global_page_state(NR_FREE_PAGES), | ||
| 3247 | global_page_state(NR_SLAB_RECLAIMABLE), | 3279 | global_page_state(NR_SLAB_RECLAIMABLE), |
| 3248 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 3280 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
| 3249 | global_page_state(NR_FILE_MAPPED), | 3281 | global_page_state(NR_FILE_MAPPED), |
| 3250 | global_page_state(NR_SHMEM), | 3282 | global_page_state(NR_SHMEM), |
| 3251 | global_page_state(NR_PAGETABLE), | 3283 | global_page_state(NR_PAGETABLE), |
| 3252 | global_page_state(NR_BOUNCE), | 3284 | global_page_state(NR_BOUNCE), |
| 3285 | global_page_state(NR_FREE_PAGES), | ||
| 3286 | free_pcp, | ||
| 3253 | global_page_state(NR_FREE_CMA_PAGES)); | 3287 | global_page_state(NR_FREE_CMA_PAGES)); |
| 3254 | 3288 | ||
| 3255 | for_each_populated_zone(zone) { | 3289 | for_each_populated_zone(zone) { |
| @@ -3257,6 +3291,11 @@ void show_free_areas(unsigned int filter) | |||
| 3257 | 3291 | ||
| 3258 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3292 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
| 3259 | continue; | 3293 | continue; |
| 3294 | |||
| 3295 | free_pcp = 0; | ||
| 3296 | for_each_online_cpu(cpu) | ||
| 3297 | free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; | ||
| 3298 | |||
| 3260 | show_node(zone); | 3299 | show_node(zone); |
| 3261 | printk("%s" | 3300 | printk("%s" |
| 3262 | " free:%lukB" | 3301 | " free:%lukB" |
| @@ -3283,6 +3322,8 @@ void show_free_areas(unsigned int filter) | |||
| 3283 | " pagetables:%lukB" | 3322 | " pagetables:%lukB" |
| 3284 | " unstable:%lukB" | 3323 | " unstable:%lukB" |
| 3285 | " bounce:%lukB" | 3324 | " bounce:%lukB" |
| 3325 | " free_pcp:%lukB" | ||
| 3326 | " local_pcp:%ukB" | ||
| 3286 | " free_cma:%lukB" | 3327 | " free_cma:%lukB" |
| 3287 | " writeback_tmp:%lukB" | 3328 | " writeback_tmp:%lukB" |
| 3288 | " pages_scanned:%lu" | 3329 | " pages_scanned:%lu" |
| @@ -3314,6 +3355,8 @@ void show_free_areas(unsigned int filter) | |||
| 3314 | K(zone_page_state(zone, NR_PAGETABLE)), | 3355 | K(zone_page_state(zone, NR_PAGETABLE)), |
| 3315 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | 3356 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), |
| 3316 | K(zone_page_state(zone, NR_BOUNCE)), | 3357 | K(zone_page_state(zone, NR_BOUNCE)), |
| 3358 | K(free_pcp), | ||
| 3359 | K(this_cpu_read(zone->pageset->pcp.count)), | ||
| 3317 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), | 3360 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), |
| 3318 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | 3361 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), |
| 3319 | K(zone_page_state(zone, NR_PAGES_SCANNED)), | 3362 | K(zone_page_state(zone, NR_PAGES_SCANNED)), |
| @@ -5717,7 +5760,7 @@ static void __setup_per_zone_wmarks(void) | |||
| 5717 | * value here. | 5760 | * value here. |
| 5718 | * | 5761 | * |
| 5719 | * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) | 5762 | * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) |
| 5720 | * deltas controls asynch page reclaim, and so should | 5763 | * deltas control asynch page reclaim, and so should |
| 5721 | * not be capped for highmem. | 5764 | * not be capped for highmem. |
| 5722 | */ | 5765 | */ |
| 5723 | unsigned long min_pages; | 5766 | unsigned long min_pages; |
| @@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, | |||
| 857 | return NULL; | 857 | return NULL; |
| 858 | } | 858 | } |
| 859 | 859 | ||
| 860 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
| 861 | { | ||
| 862 | return flags; | ||
| 863 | } | ||
| 864 | |||
| 860 | #else /* CONFIG_NUMA */ | 865 | #else /* CONFIG_NUMA */ |
| 861 | 866 | ||
| 862 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 867 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
| @@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
| 1023 | 1028 | ||
| 1024 | return __cache_free_alien(cachep, objp, node, page_node); | 1029 | return __cache_free_alien(cachep, objp, node, page_node); |
| 1025 | } | 1030 | } |
| 1031 | |||
| 1032 | /* | ||
| 1033 | * Construct gfp mask to allocate from a specific node but do not invoke reclaim | ||
| 1034 | * or warn about failures. | ||
| 1035 | */ | ||
| 1036 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
| 1037 | { | ||
| 1038 | return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; | ||
| 1039 | } | ||
| 1026 | #endif | 1040 | #endif |
| 1027 | 1041 | ||
| 1028 | /* | 1042 | /* |
| @@ -2825,7 +2839,7 @@ alloc_done: | |||
| 2825 | if (unlikely(!ac->avail)) { | 2839 | if (unlikely(!ac->avail)) { |
| 2826 | int x; | 2840 | int x; |
| 2827 | force_grow: | 2841 | force_grow: |
| 2828 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 2842 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); |
| 2829 | 2843 | ||
| 2830 | /* cache_grow can reenable interrupts, then ac could change. */ | 2844 | /* cache_grow can reenable interrupts, then ac could change. */ |
| 2831 | ac = cpu_cache_get(cachep); | 2845 | ac = cpu_cache_get(cachep); |
| @@ -3019,7 +3033,7 @@ retry: | |||
| 3019 | get_node(cache, nid) && | 3033 | get_node(cache, nid) && |
| 3020 | get_node(cache, nid)->free_objects) { | 3034 | get_node(cache, nid)->free_objects) { |
| 3021 | obj = ____cache_alloc_node(cache, | 3035 | obj = ____cache_alloc_node(cache, |
| 3022 | flags | GFP_THISNODE, nid); | 3036 | gfp_exact_node(flags), nid); |
| 3023 | if (obj) | 3037 | if (obj) |
| 3024 | break; | 3038 | break; |
| 3025 | } | 3039 | } |
| @@ -3047,7 +3061,7 @@ retry: | |||
| 3047 | nid = page_to_nid(page); | 3061 | nid = page_to_nid(page); |
| 3048 | if (cache_grow(cache, flags, nid, page)) { | 3062 | if (cache_grow(cache, flags, nid, page)) { |
| 3049 | obj = ____cache_alloc_node(cache, | 3063 | obj = ____cache_alloc_node(cache, |
| 3050 | flags | GFP_THISNODE, nid); | 3064 | gfp_exact_node(flags), nid); |
| 3051 | if (!obj) | 3065 | if (!obj) |
| 3052 | /* | 3066 | /* |
| 3053 | * Another processor may allocate the | 3067 | * Another processor may allocate the |
| @@ -3118,7 +3132,7 @@ retry: | |||
| 3118 | 3132 | ||
| 3119 | must_grow: | 3133 | must_grow: |
| 3120 | spin_unlock(&n->list_lock); | 3134 | spin_unlock(&n->list_lock); |
| 3121 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); | 3135 | x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL); |
| 3122 | if (x) | 3136 | if (x) |
| 3123 | goto retry; | 3137 | goto retry; |
| 3124 | 3138 | ||
| @@ -532,7 +532,7 @@ int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) | |||
| 532 | return 0; | 532 | return 0; |
| 533 | } | 533 | } |
| 534 | 534 | ||
| 535 | void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | 535 | static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) |
| 536 | { | 536 | { |
| 537 | void *b; | 537 | void *b; |
| 538 | 538 | ||
| @@ -558,7 +558,6 @@ void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
| 558 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); | 558 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); |
| 559 | return b; | 559 | return b; |
| 560 | } | 560 | } |
| 561 | EXPORT_SYMBOL(slob_alloc_node); | ||
| 562 | 561 | ||
| 563 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 562 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
| 564 | { | 563 | { |
| @@ -374,7 +374,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
| 374 | if (cmpxchg_double(&page->freelist, &page->counters, | 374 | if (cmpxchg_double(&page->freelist, &page->counters, |
| 375 | freelist_old, counters_old, | 375 | freelist_old, counters_old, |
| 376 | freelist_new, counters_new)) | 376 | freelist_new, counters_new)) |
| 377 | return 1; | 377 | return true; |
| 378 | } else | 378 | } else |
| 379 | #endif | 379 | #endif |
| 380 | { | 380 | { |
| @@ -384,7 +384,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
| 384 | page->freelist = freelist_new; | 384 | page->freelist = freelist_new; |
| 385 | set_page_slub_counters(page, counters_new); | 385 | set_page_slub_counters(page, counters_new); |
| 386 | slab_unlock(page); | 386 | slab_unlock(page); |
| 387 | return 1; | 387 | return true; |
| 388 | } | 388 | } |
| 389 | slab_unlock(page); | 389 | slab_unlock(page); |
| 390 | } | 390 | } |
| @@ -396,7 +396,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
| 396 | pr_info("%s %s: cmpxchg double redo ", n, s->name); | 396 | pr_info("%s %s: cmpxchg double redo ", n, s->name); |
| 397 | #endif | 397 | #endif |
| 398 | 398 | ||
| 399 | return 0; | 399 | return false; |
| 400 | } | 400 | } |
| 401 | 401 | ||
| 402 | static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | 402 | static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, |
| @@ -410,7 +410,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
| 410 | if (cmpxchg_double(&page->freelist, &page->counters, | 410 | if (cmpxchg_double(&page->freelist, &page->counters, |
| 411 | freelist_old, counters_old, | 411 | freelist_old, counters_old, |
| 412 | freelist_new, counters_new)) | 412 | freelist_new, counters_new)) |
| 413 | return 1; | 413 | return true; |
| 414 | } else | 414 | } else |
| 415 | #endif | 415 | #endif |
| 416 | { | 416 | { |
| @@ -424,7 +424,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
| 424 | set_page_slub_counters(page, counters_new); | 424 | set_page_slub_counters(page, counters_new); |
| 425 | slab_unlock(page); | 425 | slab_unlock(page); |
| 426 | local_irq_restore(flags); | 426 | local_irq_restore(flags); |
| 427 | return 1; | 427 | return true; |
| 428 | } | 428 | } |
| 429 | slab_unlock(page); | 429 | slab_unlock(page); |
| 430 | local_irq_restore(flags); | 430 | local_irq_restore(flags); |
| @@ -437,7 +437,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
| 437 | pr_info("%s %s: cmpxchg double redo ", n, s->name); | 437 | pr_info("%s %s: cmpxchg double redo ", n, s->name); |
| 438 | #endif | 438 | #endif |
| 439 | 439 | ||
| 440 | return 0; | 440 | return false; |
| 441 | } | 441 | } |
| 442 | 442 | ||
| 443 | #ifdef CONFIG_SLUB_DEBUG | 443 | #ifdef CONFIG_SLUB_DEBUG |
| @@ -1137,15 +1137,6 @@ static int __init setup_slub_debug(char *str) | |||
| 1137 | */ | 1137 | */ |
| 1138 | goto check_slabs; | 1138 | goto check_slabs; |
| 1139 | 1139 | ||
| 1140 | if (tolower(*str) == 'o') { | ||
| 1141 | /* | ||
| 1142 | * Avoid enabling debugging on caches if its minimum order | ||
| 1143 | * would increase as a result. | ||
| 1144 | */ | ||
| 1145 | disable_higher_order_debug = 1; | ||
| 1146 | goto out; | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | slub_debug = 0; | 1140 | slub_debug = 0; |
| 1150 | if (*str == '-') | 1141 | if (*str == '-') |
| 1151 | /* | 1142 | /* |
| @@ -1176,6 +1167,13 @@ static int __init setup_slub_debug(char *str) | |||
| 1176 | case 'a': | 1167 | case 'a': |
| 1177 | slub_debug |= SLAB_FAILSLAB; | 1168 | slub_debug |= SLAB_FAILSLAB; |
| 1178 | break; | 1169 | break; |
| 1170 | case 'o': | ||
| 1171 | /* | ||
| 1172 | * Avoid enabling debugging on caches if its minimum | ||
| 1173 | * order would increase as a result. | ||
| 1174 | */ | ||
| 1175 | disable_higher_order_debug = 1; | ||
| 1176 | break; | ||
| 1179 | default: | 1177 | default: |
| 1180 | pr_err("slub_debug option '%c' unknown. skipped\n", | 1178 | pr_err("slub_debug option '%c' unknown. skipped\n", |
| 1181 | *str); | 1179 | *str); |
diff --git a/mm/truncate.c b/mm/truncate.c index ddec5a5966d7..7a9d8a3cb143 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -93,35 +93,6 @@ void do_invalidatepage(struct page *page, unsigned int offset, | |||
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | /* | 95 | /* |
| 96 | * This cancels just the dirty bit on the kernel page itself, it | ||
| 97 | * does NOT actually remove dirty bits on any mmap's that may be | ||
| 98 | * around. It also leaves the page tagged dirty, so any sync | ||
| 99 | * activity will still find it on the dirty lists, and in particular, | ||
| 100 | * clear_page_dirty_for_io() will still look at the dirty bits in | ||
| 101 | * the VM. | ||
| 102 | * | ||
| 103 | * Doing this should *normally* only ever be done when a page | ||
| 104 | * is truncated, and is not actually mapped anywhere at all. However, | ||
| 105 | * fs/buffer.c does this when it notices that somebody has cleaned | ||
| 106 | * out all the buffers on a page without actually doing it through | ||
| 107 | * the VM. Can you say "ext3 is horribly ugly"? Tought you could. | ||
| 108 | */ | ||
| 109 | void cancel_dirty_page(struct page *page, unsigned int account_size) | ||
| 110 | { | ||
| 111 | if (TestClearPageDirty(page)) { | ||
| 112 | struct address_space *mapping = page->mapping; | ||
| 113 | if (mapping && mapping_cap_account_dirty(mapping)) { | ||
| 114 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
| 115 | dec_bdi_stat(inode_to_bdi(mapping->host), | ||
| 116 | BDI_RECLAIMABLE); | ||
| 117 | if (account_size) | ||
| 118 | task_io_account_cancelled_write(account_size); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } | ||
| 122 | EXPORT_SYMBOL(cancel_dirty_page); | ||
| 123 | |||
| 124 | /* | ||
| 125 | * If truncate cannot remove the fs-private metadata from the page, the page | 96 | * If truncate cannot remove the fs-private metadata from the page, the page |
| 126 | * becomes orphaned. It will be left on the LRU and may even be mapped into | 97 | * becomes orphaned. It will be left on the LRU and may even be mapped into |
| 127 | * user pagetables if we're racing with filemap_fault(). | 98 | * user pagetables if we're racing with filemap_fault(). |
| @@ -140,7 +111,13 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
| 140 | if (page_has_private(page)) | 111 | if (page_has_private(page)) |
| 141 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); | 112 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 142 | 113 | ||
| 143 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 114 | /* |
| 115 | * Some filesystems seem to re-dirty the page even after | ||
| 116 | * the VM has canceled the dirty bit (eg ext3 journaling). | ||
| 117 | * Hence dirty accounting check is placed after invalidation. | ||
| 118 | */ | ||
| 119 | if (TestClearPageDirty(page)) | ||
| 120 | account_page_cleaned(page, mapping); | ||
| 144 | 121 | ||
| 145 | ClearPageMappedToDisk(page); | 122 | ClearPageMappedToDisk(page); |
| 146 | delete_from_page_cache(page); | 123 | delete_from_page_cache(page); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 49abccf29a29..a5bbdd3b5d67 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/atomic.h> | 29 | #include <linux/atomic.h> |
| 30 | #include <linux/compiler.h> | 30 | #include <linux/compiler.h> |
| 31 | #include <linux/llist.h> | 31 | #include <linux/llist.h> |
| 32 | #include <linux/bitops.h> | ||
| 32 | 33 | ||
| 33 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
| 34 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
| @@ -74,6 +75,8 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) | |||
| 74 | pmd = pmd_offset(pud, addr); | 75 | pmd = pmd_offset(pud, addr); |
| 75 | do { | 76 | do { |
| 76 | next = pmd_addr_end(addr, end); | 77 | next = pmd_addr_end(addr, end); |
| 78 | if (pmd_clear_huge(pmd)) | ||
| 79 | continue; | ||
| 77 | if (pmd_none_or_clear_bad(pmd)) | 80 | if (pmd_none_or_clear_bad(pmd)) |
| 78 | continue; | 81 | continue; |
| 79 | vunmap_pte_range(pmd, addr, next); | 82 | vunmap_pte_range(pmd, addr, next); |
| @@ -88,6 +91,8 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) | |||
| 88 | pud = pud_offset(pgd, addr); | 91 | pud = pud_offset(pgd, addr); |
| 89 | do { | 92 | do { |
| 90 | next = pud_addr_end(addr, end); | 93 | next = pud_addr_end(addr, end); |
| 94 | if (pud_clear_huge(pud)) | ||
| 95 | continue; | ||
| 91 | if (pud_none_or_clear_bad(pud)) | 96 | if (pud_none_or_clear_bad(pud)) |
| 92 | continue; | 97 | continue; |
| 93 | vunmap_pmd_range(pud, addr, next); | 98 | vunmap_pmd_range(pud, addr, next); |
| @@ -1314,7 +1319,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
| 1314 | 1319 | ||
| 1315 | BUG_ON(in_interrupt()); | 1320 | BUG_ON(in_interrupt()); |
| 1316 | if (flags & VM_IOREMAP) | 1321 | if (flags & VM_IOREMAP) |
| 1317 | align = 1ul << clamp(fls(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); | 1322 | align = 1ul << clamp_t(int, fls_long(size), |
| 1323 | PAGE_SHIFT, IOREMAP_MAX_ORDER); | ||
| 1318 | 1324 | ||
| 1319 | size = PAGE_ALIGN(size); | 1325 | size = PAGE_ALIGN(size); |
| 1320 | if (unlikely(!size)) | 1326 | if (unlikely(!size)) |
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 50ec42f170a0..2dacc7b5af23 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c | |||
| @@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, | |||
| 100 | 100 | ||
| 101 | new_stats = | 101 | new_stats = |
| 102 | kmem_cache_alloc_node(flow_stats_cache, | 102 | kmem_cache_alloc_node(flow_stats_cache, |
| 103 | GFP_THISNODE | | 103 | GFP_NOWAIT | |
| 104 | __GFP_THISNODE | | ||
| 105 | __GFP_NOWARN | | ||
| 104 | __GFP_NOMEMALLOC, | 106 | __GFP_NOMEMALLOC, |
| 105 | node); | 107 | node); |
| 106 | if (likely(new_stats)) { | 108 | if (likely(new_stats)) { |
diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci index 3b7eec24fb5a..27c97f1f2767 100644 --- a/scripts/coccinelle/misc/bugon.cocci +++ b/scripts/coccinelle/misc/bugon.cocci | |||
| @@ -57,6 +57,6 @@ coccilib.org.print_todo(p[0], "WARNING use BUG_ON") | |||
| 57 | p << r.p; | 57 | p << r.p; |
| 58 | @@ | 58 | @@ |
| 59 | 59 | ||
| 60 | msg="WARNING: Use BUG_ON" | 60 | msg="WARNING: Use BUG_ON instead of if condition followed by BUG.\nPlease make sure the condition has no side effects (see conditional BUG_ON definition in include/asm-generic/bug.h)" |
| 61 | coccilib.report.print_report(p[0], msg) | 61 | coccilib.report.print_report(p[0], msg) |
| 62 | 62 | ||
