diff options
119 files changed, 3139 insertions, 1805 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4d9ca7d92a20..5b47acb86111 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1759,7 +1759,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1759 | 1759 | ||
| 1760 | keepinitrd [HW,ARM] | 1760 | keepinitrd [HW,ARM] |
| 1761 | 1761 | ||
| 1762 | kernelcore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter | 1762 | kernelcore= [KNL,X86,IA-64,PPC] |
| 1763 | Format: nn[KMGTPE] | "mirror" | ||
| 1764 | This parameter | ||
| 1763 | specifies the amount of memory usable by the kernel | 1765 | specifies the amount of memory usable by the kernel |
| 1764 | for non-movable allocations. The requested amount is | 1766 | for non-movable allocations. The requested amount is |
| 1765 | spread evenly throughout all nodes in the system. The | 1767 | spread evenly throughout all nodes in the system. The |
| @@ -1775,6 +1777,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1775 | use the HighMem zone if it exists, and the Normal | 1777 | use the HighMem zone if it exists, and the Normal |
| 1776 | zone if it does not. | 1778 | zone if it does not. |
| 1777 | 1779 | ||
| 1780 | Instead of specifying the amount of memory (nn[KMGTPE]), | ||
| 1781 | you can specify "mirror" option. In case "mirror" | ||
| 1782 | option is specified, mirrored (reliable) memory is used | ||
| 1783 | for non-movable allocations and remaining memory is used | ||
| 1784 | for Movable pages. nn[KMGTPE] and "mirror" are exclusive, | ||
| 1785 | so you can NOT specify nn[KMGTPE] and "mirror" at the same | ||
| 1786 | time. | ||
| 1787 | |||
| 1778 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. | 1788 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. |
| 1779 | Format: <Controller#>[,poll interval] | 1789 | Format: <Controller#>[,poll interval] |
| 1780 | The controller # is the number of the ehci usb debug | 1790 | The controller # is the number of the ehci usb debug |
| @@ -2732,6 +2742,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 2732 | we can turn it on. | 2742 | we can turn it on. |
| 2733 | on: enable the feature | 2743 | on: enable the feature |
| 2734 | 2744 | ||
| 2745 | page_poison= [KNL] Boot-time parameter changing the state of | ||
| 2746 | poisoning on the buddy allocator. | ||
| 2747 | off: turn off poisoning | ||
| 2748 | on: turn on poisoning | ||
| 2749 | |||
| 2735 | panic= [KNL] Kernel behaviour on panic: delay <timeout> | 2750 | panic= [KNL] Kernel behaviour on panic: delay <timeout> |
| 2736 | timeout > 0: seconds before rebooting | 2751 | timeout > 0: seconds before rebooting |
| 2737 | timeout = 0: wait forever | 2752 | timeout = 0: wait forever |
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index ce2cfcf35c27..443f4b44ad97 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
| @@ -256,10 +256,27 @@ If the memory block is offline, you'll read "offline". | |||
| 256 | 256 | ||
| 257 | 5.2. How to online memory | 257 | 5.2. How to online memory |
| 258 | ------------ | 258 | ------------ |
| 259 | Even if the memory is hot-added, it is not at ready-to-use state. | 259 | When the memory is hot-added, the kernel decides whether or not to "online" |
| 260 | For using newly added memory, you have to "online" the memory block. | 260 | it according to the policy which can be read from "auto_online_blocks" file: |
| 261 | 261 | ||
| 262 | For onlining, you have to write "online" to the memory block's state file as: | 262 | % cat /sys/devices/system/memory/auto_online_blocks |
| 263 | |||
| 264 | The default is "offline" which means the newly added memory is not in a | ||
| 265 | ready-to-use state and you have to "online" the newly added memory blocks | ||
| 266 | manually. Automatic onlining can be requested by writing "online" to | ||
| 267 | "auto_online_blocks" file: | ||
| 268 | |||
| 269 | % echo online > /sys/devices/system/memory/auto_online_blocks | ||
| 270 | |||
| 271 | This sets a global policy and impacts all memory blocks that will subsequently | ||
| 272 | be hotplugged. Currently offline blocks keep their state. It is possible, under | ||
| 273 | certain circumstances, that some memory blocks will be added but will fail to | ||
| 274 | online. User space tools can check their "state" files | ||
| 275 | (/sys/devices/system/memory/memoryXXX/state) and try to online them manually. | ||
| 276 | |||
| 277 | If the automatic onlining wasn't requested, failed, or some memory block was | ||
| 278 | offlined it is possible to change the individual block's state by writing to the | ||
| 279 | "state" file: | ||
| 263 | 280 | ||
| 264 | % echo online > /sys/devices/system/memory/memoryXXX/state | 281 | % echo online > /sys/devices/system/memory/memoryXXX/state |
| 265 | 282 | ||
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 5d1128bf0282..5962949944fd 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt | |||
| @@ -298,6 +298,24 @@ bitmap and its derivatives such as cpumask and nodemask: | |||
| 298 | 298 | ||
| 299 | Passed by reference. | 299 | Passed by reference. |
| 300 | 300 | ||
| 301 | Flags bitfields such as page flags, gfp_flags: | ||
| 302 | |||
| 303 | %pGp referenced|uptodate|lru|active|private | ||
| 304 | %pGg GFP_USER|GFP_DMA32|GFP_NOWARN | ||
| 305 | %pGv read|exec|mayread|maywrite|mayexec|denywrite | ||
| 306 | |||
| 307 | For printing flags bitfields as a collection of symbolic constants that | ||
| 308 | would construct the value. The type of flags is given by the third | ||
| 309 | character. Currently supported are [p]age flags, [v]ma_flags (both | ||
| 310 | expect unsigned long *) and [g]fp_flags (expects gfp_t *). The flag | ||
| 311 | names and print order depends on the particular type. | ||
| 312 | |||
| 313 | Note that this format should not be used directly in TP_printk() part | ||
| 314 | of a tracepoint. Instead, use the show_*_flags() functions from | ||
| 315 | <trace/events/mmflags.h>. | ||
| 316 | |||
| 317 | Passed by reference. | ||
| 318 | |||
| 301 | Network device features: | 319 | Network device features: |
| 302 | 320 | ||
| 303 | %pNF 0x000000000000c000 | 321 | %pNF 0x000000000000c000 |
diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.txt index 8f3ce9b3aa11..ffff1439076a 100644 --- a/Documentation/vm/page_owner.txt +++ b/Documentation/vm/page_owner.txt | |||
| @@ -28,10 +28,11 @@ with page owner and page owner is disabled in runtime due to no enabling | |||
| 28 | boot option, runtime overhead is marginal. If disabled in runtime, it | 28 | boot option, runtime overhead is marginal. If disabled in runtime, it |
| 29 | doesn't require memory to store owner information, so there is no runtime | 29 | doesn't require memory to store owner information, so there is no runtime |
| 30 | memory overhead. And, page owner inserts just two unlikely branches into | 30 | memory overhead. And, page owner inserts just two unlikely branches into |
| 31 | the page allocator hotpath and if it returns false then allocation is | 31 | the page allocator hotpath and if not enabled, then allocation is done |
| 32 | done like as the kernel without page owner. These two unlikely branches | 32 | like as the kernel without page owner. These two unlikely branches should |
| 33 | would not affect to allocation performance. Following is the kernel's | 33 | not affect to allocation performance, especially if the static keys jump |
| 34 | code size change due to this facility. | 34 | label patching functionality is available. Following is the kernel's code |
| 35 | size change due to this facility. | ||
| 35 | 36 | ||
| 36 | - Without page owner | 37 | - Without page owner |
| 37 | text data bss dec hex filename | 38 | text data bss dec hex filename |
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index f0d340959319..84652419bff2 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt | |||
| @@ -35,8 +35,8 @@ slub_debug=<Debug-Options>,<slab name> | |||
| 35 | Enable options only for select slabs | 35 | Enable options only for select slabs |
| 36 | 36 | ||
| 37 | Possible debug options are | 37 | Possible debug options are |
| 38 | F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry | 38 | F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS |
| 39 | SLAB legacy issues) | 39 | Sorry SLAB legacy issues) |
| 40 | Z Red zoning | 40 | Z Red zoning |
| 41 | P Poisoning (object and padding) | 41 | P Poisoning (object and padding) |
| 42 | U User tracking (free and alloc) | 42 | U User tracking (free and alloc) |
diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h index b88a1558b0b9..c1ee3d6533fb 100644 --- a/arch/blackfin/include/asm/pgtable.h +++ b/arch/blackfin/include/asm/pgtable.h | |||
| @@ -97,6 +97,8 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, | |||
| 97 | unsigned long); | 97 | unsigned long); |
| 98 | #define HAVE_ARCH_FB_UNMAPPED_AREA | 98 | #define HAVE_ARCH_FB_UNMAPPED_AREA |
| 99 | 99 | ||
| 100 | #define pgprot_writecombine pgprot_noncached | ||
| 101 | |||
| 100 | #include <asm-generic/pgtable.h> | 102 | #include <asm-generic/pgtable.h> |
| 101 | 103 | ||
| 102 | #endif /* _BLACKFIN_PGTABLE_H */ | 104 | #endif /* _BLACKFIN_PGTABLE_H */ |
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 0d4146f644dc..11fa717d93b1 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c | |||
| @@ -59,21 +59,24 @@ void free_initrd_mem(unsigned long, unsigned long); | |||
| 59 | void __init zone_sizes_init(void) | 59 | void __init zone_sizes_init(void) |
| 60 | { | 60 | { |
| 61 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; | 61 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; |
| 62 | unsigned long max_dma; | ||
| 63 | unsigned long low; | ||
| 64 | unsigned long start_pfn; | 62 | unsigned long start_pfn; |
| 65 | 63 | ||
| 66 | #ifdef CONFIG_MMU | 64 | #ifdef CONFIG_MMU |
| 67 | start_pfn = START_PFN(0); | 65 | { |
| 68 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 66 | unsigned long low; |
| 69 | low = MAX_LOW_PFN(0); | 67 | unsigned long max_dma; |
| 70 | 68 | ||
| 71 | if (low < max_dma){ | 69 | start_pfn = START_PFN(0); |
| 72 | zones_size[ZONE_DMA] = low - start_pfn; | 70 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
| 73 | zones_size[ZONE_NORMAL] = 0; | 71 | low = MAX_LOW_PFN(0); |
| 74 | } else { | 72 | |
| 75 | zones_size[ZONE_DMA] = low - start_pfn; | 73 | if (low < max_dma) { |
| 76 | zones_size[ZONE_NORMAL] = low - max_dma; | 74 | zones_size[ZONE_DMA] = low - start_pfn; |
| 75 | zones_size[ZONE_NORMAL] = 0; | ||
| 76 | } else { | ||
| 77 | zones_size[ZONE_DMA] = low - start_pfn; | ||
| 78 | zones_size[ZONE_NORMAL] = low - max_dma; | ||
| 79 | } | ||
| 77 | } | 80 | } |
| 78 | #else | 81 | #else |
| 79 | zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; | 82 | zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; |
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2150b0139a0b..1b6081c0aff9 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/export.h> | 11 | #include <linux/export.h> |
| 12 | #include <linux/kdebug.h> | 12 | #include <linux/kdebug.h> |
| 13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
| 14 | #include <linux/mm.h> | ||
| 14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 15 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
| 16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
| @@ -189,9 +190,8 @@ void die(struct pt_regs *regs, const char *str) | |||
| 189 | #ifdef CONFIG_SMP | 190 | #ifdef CONFIG_SMP |
| 190 | printk("SMP "); | 191 | printk("SMP "); |
| 191 | #endif | 192 | #endif |
| 192 | #ifdef CONFIG_DEBUG_PAGEALLOC | 193 | if (debug_pagealloc_enabled()) |
| 193 | printk("DEBUG_PAGEALLOC"); | 194 | printk("DEBUG_PAGEALLOC"); |
| 194 | #endif | ||
| 195 | printk("\n"); | 195 | printk("\n"); |
| 196 | notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); | 196 | notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); |
| 197 | print_modules(); | 197 | print_modules(); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ef7d6c8fea66..d27fccbad7c1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
| @@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
| 94 | pgd_populate(&init_mm, pg_dir, pu_dir); | 94 | pgd_populate(&init_mm, pg_dir, pu_dir); |
| 95 | } | 95 | } |
| 96 | pu_dir = pud_offset(pg_dir, address); | 96 | pu_dir = pud_offset(pg_dir, address); |
| 97 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
| 98 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && | 97 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && |
| 99 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { | 98 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && |
| 99 | !debug_pagealloc_enabled()) { | ||
| 100 | pud_val(*pu_dir) = __pa(address) | | 100 | pud_val(*pu_dir) = __pa(address) | |
| 101 | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | | 101 | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | |
| 102 | (ro ? _REGION_ENTRY_PROTECT : 0); | 102 | (ro ? _REGION_ENTRY_PROTECT : 0); |
| 103 | address += PUD_SIZE; | 103 | address += PUD_SIZE; |
| 104 | continue; | 104 | continue; |
| 105 | } | 105 | } |
| 106 | #endif | ||
| 107 | if (pud_none(*pu_dir)) { | 106 | if (pud_none(*pu_dir)) { |
| 108 | pm_dir = vmem_pmd_alloc(); | 107 | pm_dir = vmem_pmd_alloc(); |
| 109 | if (!pm_dir) | 108 | if (!pm_dir) |
| @@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
| 111 | pud_populate(&init_mm, pu_dir, pm_dir); | 110 | pud_populate(&init_mm, pu_dir, pm_dir); |
| 112 | } | 111 | } |
| 113 | pm_dir = pmd_offset(pu_dir, address); | 112 | pm_dir = pmd_offset(pu_dir, address); |
| 114 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
| 115 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && | 113 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && |
| 116 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { | 114 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && |
| 115 | !debug_pagealloc_enabled()) { | ||
| 117 | pmd_val(*pm_dir) = __pa(address) | | 116 | pmd_val(*pm_dir) = __pa(address) | |
| 118 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | | 117 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | |
| 119 | _SEGMENT_ENTRY_YOUNG | | 118 | _SEGMENT_ENTRY_YOUNG | |
| @@ -121,7 +120,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
| 121 | address += PMD_SIZE; | 120 | address += PMD_SIZE; |
| 122 | continue; | 121 | continue; |
| 123 | } | 122 | } |
| 124 | #endif | ||
| 125 | if (pmd_none(*pm_dir)) { | 123 | if (pmd_none(*pm_dir)) { |
| 126 | pt_dir = vmem_pte_alloc(address); | 124 | pt_dir = vmem_pte_alloc(address); |
| 127 | if (!pt_dir) | 125 | if (!pt_dir) |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9c30acfadae2..32e5699eadfe 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -265,9 +265,8 @@ int __die(const char *str, struct pt_regs *regs, long err) | |||
| 265 | #ifdef CONFIG_SMP | 265 | #ifdef CONFIG_SMP |
| 266 | printk("SMP "); | 266 | printk("SMP "); |
| 267 | #endif | 267 | #endif |
| 268 | #ifdef CONFIG_DEBUG_PAGEALLOC | 268 | if (debug_pagealloc_enabled()) |
| 269 | printk("DEBUG_PAGEALLOC "); | 269 | printk("DEBUG_PAGEALLOC "); |
| 270 | #endif | ||
| 271 | #ifdef CONFIG_KASAN | 270 | #ifdef CONFIG_KASAN |
| 272 | printk("KASAN"); | 271 | printk("KASAN"); |
| 273 | #endif | 272 | #endif |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 493f54172b4a..9d56f271d519 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
| @@ -150,13 +150,14 @@ static int page_size_mask; | |||
| 150 | 150 | ||
| 151 | static void __init probe_page_size_mask(void) | 151 | static void __init probe_page_size_mask(void) |
| 152 | { | 152 | { |
| 153 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | 153 | #if !defined(CONFIG_KMEMCHECK) |
| 154 | /* | 154 | /* |
| 155 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 155 | * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will |
| 156 | * use small pages. | ||
| 156 | * This will simplify cpa(), which otherwise needs to support splitting | 157 | * This will simplify cpa(), which otherwise needs to support splitting |
| 157 | * large pages into small in interrupt context, etc. | 158 | * large pages into small in interrupt context, etc. |
| 158 | */ | 159 | */ |
| 159 | if (cpu_has_pse) | 160 | if (cpu_has_pse && !debug_pagealloc_enabled()) |
| 160 | page_size_mask |= 1 << PG_LEVEL_2M; | 161 | page_size_mask |= 1 << PG_LEVEL_2M; |
| 161 | #endif | 162 | #endif |
| 162 | 163 | ||
| @@ -666,21 +667,22 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
| 666 | * mark them not present - any buggy init-section access will | 667 | * mark them not present - any buggy init-section access will |
| 667 | * create a kernel page fault: | 668 | * create a kernel page fault: |
| 668 | */ | 669 | */ |
| 669 | #ifdef CONFIG_DEBUG_PAGEALLOC | 670 | if (debug_pagealloc_enabled()) { |
| 670 | printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", | 671 | pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", |
| 671 | begin, end - 1); | 672 | begin, end - 1); |
| 672 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 673 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
| 673 | #else | 674 | } else { |
| 674 | /* | 675 | /* |
| 675 | * We just marked the kernel text read only above, now that | 676 | * We just marked the kernel text read only above, now that |
| 676 | * we are going to free part of that, we need to make that | 677 | * we are going to free part of that, we need to make that |
| 677 | * writeable and non-executable first. | 678 | * writeable and non-executable first. |
| 678 | */ | 679 | */ |
| 679 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | 680 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); |
| 680 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 681 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
| 681 | 682 | ||
| 682 | free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); | 683 | free_reserved_area((void *)begin, (void *)end, |
| 683 | #endif | 684 | POISON_FREE_INITMEM, what); |
| 685 | } | ||
| 684 | } | 686 | } |
| 685 | 687 | ||
| 686 | void free_initmem(void) | 688 | void free_initmem(void) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 007ebe2d8157..4d0b26253042 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -106,12 +106,6 @@ static inline unsigned long highmap_end_pfn(void) | |||
| 106 | 106 | ||
| 107 | #endif | 107 | #endif |
| 108 | 108 | ||
| 109 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 110 | # define debug_pagealloc 1 | ||
| 111 | #else | ||
| 112 | # define debug_pagealloc 0 | ||
| 113 | #endif | ||
| 114 | |||
| 115 | static inline int | 109 | static inline int |
| 116 | within(unsigned long addr, unsigned long start, unsigned long end) | 110 | within(unsigned long addr, unsigned long start, unsigned long end) |
| 117 | { | 111 | { |
| @@ -714,10 +708,10 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, | |||
| 714 | { | 708 | { |
| 715 | struct page *base; | 709 | struct page *base; |
| 716 | 710 | ||
| 717 | if (!debug_pagealloc) | 711 | if (!debug_pagealloc_enabled()) |
| 718 | spin_unlock(&cpa_lock); | 712 | spin_unlock(&cpa_lock); |
| 719 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); | 713 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
| 720 | if (!debug_pagealloc) | 714 | if (!debug_pagealloc_enabled()) |
| 721 | spin_lock(&cpa_lock); | 715 | spin_lock(&cpa_lock); |
| 722 | if (!base) | 716 | if (!base) |
| 723 | return -ENOMEM; | 717 | return -ENOMEM; |
| @@ -1339,10 +1333,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
| 1339 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) | 1333 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) |
| 1340 | cpa->numpages = 1; | 1334 | cpa->numpages = 1; |
| 1341 | 1335 | ||
| 1342 | if (!debug_pagealloc) | 1336 | if (!debug_pagealloc_enabled()) |
| 1343 | spin_lock(&cpa_lock); | 1337 | spin_lock(&cpa_lock); |
| 1344 | ret = __change_page_attr(cpa, checkalias); | 1338 | ret = __change_page_attr(cpa, checkalias); |
| 1345 | if (!debug_pagealloc) | 1339 | if (!debug_pagealloc_enabled()) |
| 1346 | spin_unlock(&cpa_lock); | 1340 | spin_unlock(&cpa_lock); |
| 1347 | if (ret) | 1341 | if (ret) |
| 1348 | return ret; | 1342 | return ret; |
diff --git a/block/partition-generic.c b/block/partition-generic.c index fefd01b496a0..5d8701941054 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
| @@ -217,10 +217,21 @@ static void part_release(struct device *dev) | |||
| 217 | kfree(p); | 217 | kfree(p); |
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | static int part_uevent(struct device *dev, struct kobj_uevent_env *env) | ||
| 221 | { | ||
| 222 | struct hd_struct *part = dev_to_part(dev); | ||
| 223 | |||
| 224 | add_uevent_var(env, "PARTN=%u", part->partno); | ||
| 225 | if (part->info && part->info->volname[0]) | ||
| 226 | add_uevent_var(env, "PARTNAME=%s", part->info->volname); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | |||
| 220 | struct device_type part_type = { | 230 | struct device_type part_type = { |
| 221 | .name = "partition", | 231 | .name = "partition", |
| 222 | .groups = part_attr_groups, | 232 | .groups = part_attr_groups, |
| 223 | .release = part_release, | 233 | .release = part_release, |
| 234 | .uevent = part_uevent, | ||
| 224 | }; | 235 | }; |
| 225 | 236 | ||
| 226 | static void delete_partition_rcu_cb(struct rcu_head *head) | 237 | static void delete_partition_rcu_cb(struct rcu_head *head) |
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 175c86bee3a9..9ca2b2fefd76 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
| @@ -61,8 +61,8 @@ module_param(latency_factor, uint, 0644); | |||
| 61 | 61 | ||
| 62 | static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); | 62 | static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); |
| 63 | 63 | ||
| 64 | static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], | 64 | static |
| 65 | acpi_cstate); | 65 | DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); |
| 66 | 66 | ||
| 67 | static int disabled_by_idle_boot_param(void) | 67 | static int disabled_by_idle_boot_param(void) |
| 68 | { | 68 | { |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 213456c2b123..f46dba8b7092 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
| @@ -251,7 +251,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t | |||
| 251 | return ret; | 251 | return ret; |
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | static int memory_block_change_state(struct memory_block *mem, | 254 | int memory_block_change_state(struct memory_block *mem, |
| 255 | unsigned long to_state, unsigned long from_state_req) | 255 | unsigned long to_state, unsigned long from_state_req) |
| 256 | { | 256 | { |
| 257 | int ret = 0; | 257 | int ret = 0; |
| @@ -439,6 +439,37 @@ print_block_size(struct device *dev, struct device_attribute *attr, | |||
| 439 | static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); | 439 | static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); |
| 440 | 440 | ||
| 441 | /* | 441 | /* |
| 442 | * Memory auto online policy. | ||
| 443 | */ | ||
| 444 | |||
| 445 | static ssize_t | ||
| 446 | show_auto_online_blocks(struct device *dev, struct device_attribute *attr, | ||
| 447 | char *buf) | ||
| 448 | { | ||
| 449 | if (memhp_auto_online) | ||
| 450 | return sprintf(buf, "online\n"); | ||
| 451 | else | ||
| 452 | return sprintf(buf, "offline\n"); | ||
| 453 | } | ||
| 454 | |||
| 455 | static ssize_t | ||
| 456 | store_auto_online_blocks(struct device *dev, struct device_attribute *attr, | ||
| 457 | const char *buf, size_t count) | ||
| 458 | { | ||
| 459 | if (sysfs_streq(buf, "online")) | ||
| 460 | memhp_auto_online = true; | ||
| 461 | else if (sysfs_streq(buf, "offline")) | ||
| 462 | memhp_auto_online = false; | ||
| 463 | else | ||
| 464 | return -EINVAL; | ||
| 465 | |||
| 466 | return count; | ||
| 467 | } | ||
| 468 | |||
| 469 | static DEVICE_ATTR(auto_online_blocks, 0644, show_auto_online_blocks, | ||
| 470 | store_auto_online_blocks); | ||
| 471 | |||
| 472 | /* | ||
| 442 | * Some architectures will have custom drivers to do this, and | 473 | * Some architectures will have custom drivers to do this, and |
| 443 | * will not need to do it from userspace. The fake hot-add code | 474 | * will not need to do it from userspace. The fake hot-add code |
| 444 | * as well as ppc64 will do all of their discovery in userspace | 475 | * as well as ppc64 will do all of their discovery in userspace |
| @@ -746,6 +777,7 @@ static struct attribute *memory_root_attrs[] = { | |||
| 746 | #endif | 777 | #endif |
| 747 | 778 | ||
| 748 | &dev_attr_block_size_bytes.attr, | 779 | &dev_attr_block_size_bytes.attr, |
| 780 | &dev_attr_auto_online_blocks.attr, | ||
| 749 | NULL | 781 | NULL |
| 750 | }; | 782 | }; |
| 751 | 783 | ||
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 562b5a4ca7b7..78a39f736c64 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
| @@ -126,7 +126,7 @@ | |||
| 126 | */ | 126 | */ |
| 127 | #include <linux/types.h> | 127 | #include <linux/types.h> |
| 128 | 128 | ||
| 129 | static bool verbose = 0; | 129 | static int verbose = 0; |
| 130 | static int major = PD_MAJOR; | 130 | static int major = PD_MAJOR; |
| 131 | static char *name = PD_NAME; | 131 | static char *name = PD_NAME; |
| 132 | static int cluster = 64; | 132 | static int cluster = 64; |
| @@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; | |||
| 161 | static DEFINE_MUTEX(pd_mutex); | 161 | static DEFINE_MUTEX(pd_mutex); |
| 162 | static DEFINE_SPINLOCK(pd_lock); | 162 | static DEFINE_SPINLOCK(pd_lock); |
| 163 | 163 | ||
| 164 | module_param(verbose, bool, 0); | 164 | module_param(verbose, int, 0); |
| 165 | module_param(major, int, 0); | 165 | module_param(major, int, 0); |
| 166 | module_param(name, charp, 0); | 166 | module_param(name, charp, 0); |
| 167 | module_param(cluster, int, 0); | 167 | module_param(cluster, int, 0); |
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 1740d75e8a32..216a94fed5b4 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c | |||
| @@ -117,7 +117,7 @@ | |||
| 117 | 117 | ||
| 118 | */ | 118 | */ |
| 119 | 119 | ||
| 120 | static bool verbose = 0; | 120 | static int verbose = 0; |
| 121 | static int major = PT_MAJOR; | 121 | static int major = PT_MAJOR; |
| 122 | static char *name = PT_NAME; | 122 | static char *name = PT_NAME; |
| 123 | static int disable = 0; | 123 | static int disable = 0; |
| @@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; | |||
| 152 | 152 | ||
| 153 | #include <asm/uaccess.h> | 153 | #include <asm/uaccess.h> |
| 154 | 154 | ||
| 155 | module_param(verbose, bool, 0); | 155 | module_param(verbose, int, 0); |
| 156 | module_param(major, int, 0); | 156 | module_param(major, int, 0); |
| 157 | module_param(name, charp, 0); | 157 | module_param(name, charp, 0); |
| 158 | module_param_array(drive0, int, NULL, 0); | 158 | module_param_array(drive0, int, NULL, 0); |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 73708acce3ca..979a8317204f 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
| @@ -37,23 +37,30 @@ config XEN_BALLOON_MEMORY_HOTPLUG | |||
| 37 | 37 | ||
| 38 | Memory could be hotplugged in following steps: | 38 | Memory could be hotplugged in following steps: |
| 39 | 39 | ||
| 40 | 1) dom0: xl mem-max <domU> <maxmem> | 40 | 1) target domain: ensure that memory auto online policy is in |
| 41 | effect by checking /sys/devices/system/memory/auto_online_blocks | ||
| 42 | file (should be 'online'). | ||
| 43 | |||
| 44 | 2) control domain: xl mem-max <target-domain> <maxmem> | ||
| 41 | where <maxmem> is >= requested memory size, | 45 | where <maxmem> is >= requested memory size, |
| 42 | 46 | ||
| 43 | 2) dom0: xl mem-set <domU> <memory> | 47 | 3) control domain: xl mem-set <target-domain> <memory> |
| 44 | where <memory> is requested memory size; alternatively memory | 48 | where <memory> is requested memory size; alternatively memory |
| 45 | could be added by writing proper value to | 49 | could be added by writing proper value to |
| 46 | /sys/devices/system/xen_memory/xen_memory0/target or | 50 | /sys/devices/system/xen_memory/xen_memory0/target or |
| 47 | /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU, | 51 | /sys/devices/system/xen_memory/xen_memory0/target_kb on the |
| 52 | target domain. | ||
| 48 | 53 | ||
| 49 | 3) domU: for i in /sys/devices/system/memory/memory*/state; do \ | 54 | Alternatively, if memory auto onlining was not requested at step 1 |
| 50 | [ "`cat "$i"`" = offline ] && echo online > "$i"; done | 55 | the newly added memory can be manually onlined in the target domain |
| 56 | by doing the following: | ||
| 51 | 57 | ||
| 52 | Memory could be onlined automatically on domU by adding following line to udev rules: | 58 | for i in /sys/devices/system/memory/memory*/state; do \ |
| 59 | [ "`cat "$i"`" = offline ] && echo online > "$i"; done | ||
| 53 | 60 | ||
| 54 | SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" | 61 | or by adding the following line to udev rules: |
| 55 | 62 | ||
| 56 | In that case step 3 should be omitted. | 63 | SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" |
| 57 | 64 | ||
| 58 | config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | 65 | config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT |
| 59 | int "Hotplugged memory limit (in GiB) for a PV guest" | 66 | int "Hotplugged memory limit (in GiB) for a PV guest" |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index dc4305b407bf..7c8a2cf16f58 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
| @@ -338,7 +338,16 @@ static enum bp_state reserve_additional_memory(void) | |||
| 338 | } | 338 | } |
| 339 | #endif | 339 | #endif |
| 340 | 340 | ||
| 341 | rc = add_memory_resource(nid, resource); | 341 | /* |
| 342 | * add_memory_resource() will call online_pages() which in its turn | ||
| 343 | * will call xen_online_page() callback causing deadlock if we don't | ||
| 344 | * release balloon_mutex here. Unlocking here is safe because the | ||
| 345 | * callers drop the mutex before trying again. | ||
| 346 | */ | ||
| 347 | mutex_unlock(&balloon_mutex); | ||
| 348 | rc = add_memory_resource(nid, resource, memhp_auto_online); | ||
| 349 | mutex_lock(&balloon_mutex); | ||
| 350 | |||
| 342 | if (rc) { | 351 | if (rc) { |
| 343 | pr_warn("Cannot add additional memory (%i)\n", rc); | 352 | pr_warn("Cannot add additional memory (%i)\n", rc); |
| 344 | goto err; | 353 | goto err; |
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 7dd46312c180..403fe3955393 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c | |||
| @@ -38,8 +38,9 @@ | |||
| 38 | /* Find the first set bit in a evtchn mask */ | 38 | /* Find the first set bit in a evtchn mask */ |
| 39 | #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD) | 39 | #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD) |
| 40 | 40 | ||
| 41 | static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD], | 41 | #define EVTCHN_MASK_SIZE (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD) |
| 42 | cpu_evtchn_mask); | 42 | |
| 43 | static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_MASK_SIZE], cpu_evtchn_mask); | ||
| 43 | 44 | ||
| 44 | static unsigned evtchn_2l_max_channels(void) | 45 | static unsigned evtchn_2l_max_channels(void) |
| 45 | { | 46 | { |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c37149b929be..f0d268b97d19 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
| @@ -1,15 +1,11 @@ | |||
| 1 | /* -*- c -*- ------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved |
| 3 | * linux/fs/autofs/autofs_i.h | 3 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> |
| 4 | * | ||
| 5 | * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved | ||
| 6 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> | ||
| 7 | * | 4 | * |
| 8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
| 9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
| 10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
| 11 | * | 8 | */ |
| 12 | * ----------------------------------------------------------------------- */ | ||
| 13 | 9 | ||
| 14 | /* Internal header file for autofs */ | 10 | /* Internal header file for autofs */ |
| 15 | 11 | ||
| @@ -35,28 +31,23 @@ | |||
| 35 | #include <linux/mount.h> | 31 | #include <linux/mount.h> |
| 36 | #include <linux/namei.h> | 32 | #include <linux/namei.h> |
| 37 | #include <asm/current.h> | 33 | #include <asm/current.h> |
| 38 | #include <asm/uaccess.h> | 34 | #include <linux/uaccess.h> |
| 39 | 35 | ||
| 40 | /* #define DEBUG */ | 36 | /* #define DEBUG */ |
| 41 | 37 | ||
| 42 | #define DPRINTK(fmt, ...) \ | 38 | #ifdef pr_fmt |
| 43 | pr_debug("pid %d: %s: " fmt "\n", \ | 39 | #undef pr_fmt |
| 44 | current->pid, __func__, ##__VA_ARGS__) | 40 | #endif |
| 45 | 41 | #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ | |
| 46 | #define AUTOFS_WARN(fmt, ...) \ | 42 | |
| 47 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | 43 | /* |
| 48 | current->pid, __func__, ##__VA_ARGS__) | 44 | * Unified info structure. This is pointed to by both the dentry and |
| 49 | 45 | * inode structures. Each file in the filesystem has an instance of this | |
| 50 | #define AUTOFS_ERROR(fmt, ...) \ | 46 | * structure. It holds a reference to the dentry, so dentries are never |
| 51 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | 47 | * flushed while the file exists. All name lookups are dealt with at the |
| 52 | current->pid, __func__, ##__VA_ARGS__) | 48 | * dentry level, although the filesystem can interfere in the validation |
| 53 | 49 | * process. Readdir is implemented by traversing the dentry lists. | |
| 54 | /* Unified info structure. This is pointed to by both the dentry and | 50 | */ |
| 55 | inode structures. Each file in the filesystem has an instance of this | ||
| 56 | structure. It holds a reference to the dentry, so dentries are never | ||
| 57 | flushed while the file exists. All name lookups are dealt with at the | ||
| 58 | dentry level, although the filesystem can interfere in the validation | ||
| 59 | process. Readdir is implemented by traversing the dentry lists. */ | ||
| 60 | struct autofs_info { | 51 | struct autofs_info { |
| 61 | struct dentry *dentry; | 52 | struct dentry *dentry; |
| 62 | struct inode *inode; | 53 | struct inode *inode; |
| @@ -78,7 +69,7 @@ struct autofs_info { | |||
| 78 | kgid_t gid; | 69 | kgid_t gid; |
| 79 | }; | 70 | }; |
| 80 | 71 | ||
| 81 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ | 72 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ |
| 82 | #define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered | 73 | #define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered |
| 83 | * for expiry, so RCU_walk is | 74 | * for expiry, so RCU_walk is |
| 84 | * not permitted | 75 | * not permitted |
| @@ -140,10 +131,11 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) | |||
| 140 | } | 131 | } |
| 141 | 132 | ||
| 142 | /* autofs4_oz_mode(): do we see the man behind the curtain? (The | 133 | /* autofs4_oz_mode(): do we see the man behind the curtain? (The |
| 143 | processes which do manipulations for us in user space sees the raw | 134 | * processes which do manipulations for us in user space sees the raw |
| 144 | filesystem without "magic".) */ | 135 | * filesystem without "magic".) |
| 145 | 136 | */ | |
| 146 | static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { | 137 | static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) |
| 138 | { | ||
| 147 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; | 139 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; |
| 148 | } | 140 | } |
| 149 | 141 | ||
| @@ -154,12 +146,12 @@ void autofs4_free_ino(struct autofs_info *); | |||
| 154 | int is_autofs4_dentry(struct dentry *); | 146 | int is_autofs4_dentry(struct dentry *); |
| 155 | int autofs4_expire_wait(struct dentry *dentry, int rcu_walk); | 147 | int autofs4_expire_wait(struct dentry *dentry, int rcu_walk); |
| 156 | int autofs4_expire_run(struct super_block *, struct vfsmount *, | 148 | int autofs4_expire_run(struct super_block *, struct vfsmount *, |
| 157 | struct autofs_sb_info *, | 149 | struct autofs_sb_info *, |
| 158 | struct autofs_packet_expire __user *); | 150 | struct autofs_packet_expire __user *); |
| 159 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 151 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
| 160 | struct autofs_sb_info *sbi, int when); | 152 | struct autofs_sb_info *sbi, int when); |
| 161 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, | 153 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, |
| 162 | struct autofs_sb_info *, int __user *); | 154 | struct autofs_sb_info *, int __user *); |
| 163 | struct dentry *autofs4_expire_direct(struct super_block *sb, | 155 | struct dentry *autofs4_expire_direct(struct super_block *sb, |
| 164 | struct vfsmount *mnt, | 156 | struct vfsmount *mnt, |
| 165 | struct autofs_sb_info *sbi, int how); | 157 | struct autofs_sb_info *sbi, int how); |
| @@ -224,8 +216,8 @@ static inline int autofs_prepare_pipe(struct file *pipe) | |||
| 224 | 216 | ||
| 225 | /* Queue management functions */ | 217 | /* Queue management functions */ |
| 226 | 218 | ||
| 227 | int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); | 219 | int autofs4_wait(struct autofs_sb_info *, struct dentry *, enum autofs_notify); |
| 228 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); | 220 | int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); |
| 229 | void autofs4_catatonic_mode(struct autofs_sb_info *); | 221 | void autofs4_catatonic_mode(struct autofs_sb_info *); |
| 230 | 222 | ||
| 231 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) | 223 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) |
| @@ -242,37 +234,37 @@ static inline void __autofs4_add_expiring(struct dentry *dentry) | |||
| 242 | { | 234 | { |
| 243 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 235 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 244 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 236 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 237 | |||
| 245 | if (ino) { | 238 | if (ino) { |
| 246 | if (list_empty(&ino->expiring)) | 239 | if (list_empty(&ino->expiring)) |
| 247 | list_add(&ino->expiring, &sbi->expiring_list); | 240 | list_add(&ino->expiring, &sbi->expiring_list); |
| 248 | } | 241 | } |
| 249 | return; | ||
| 250 | } | 242 | } |
| 251 | 243 | ||
| 252 | static inline void autofs4_add_expiring(struct dentry *dentry) | 244 | static inline void autofs4_add_expiring(struct dentry *dentry) |
| 253 | { | 245 | { |
| 254 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 246 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 255 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 247 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 248 | |||
| 256 | if (ino) { | 249 | if (ino) { |
| 257 | spin_lock(&sbi->lookup_lock); | 250 | spin_lock(&sbi->lookup_lock); |
| 258 | if (list_empty(&ino->expiring)) | 251 | if (list_empty(&ino->expiring)) |
| 259 | list_add(&ino->expiring, &sbi->expiring_list); | 252 | list_add(&ino->expiring, &sbi->expiring_list); |
| 260 | spin_unlock(&sbi->lookup_lock); | 253 | spin_unlock(&sbi->lookup_lock); |
| 261 | } | 254 | } |
| 262 | return; | ||
| 263 | } | 255 | } |
| 264 | 256 | ||
| 265 | static inline void autofs4_del_expiring(struct dentry *dentry) | 257 | static inline void autofs4_del_expiring(struct dentry *dentry) |
| 266 | { | 258 | { |
| 267 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 259 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 268 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 260 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 261 | |||
| 269 | if (ino) { | 262 | if (ino) { |
| 270 | spin_lock(&sbi->lookup_lock); | 263 | spin_lock(&sbi->lookup_lock); |
| 271 | if (!list_empty(&ino->expiring)) | 264 | if (!list_empty(&ino->expiring)) |
| 272 | list_del_init(&ino->expiring); | 265 | list_del_init(&ino->expiring); |
| 273 | spin_unlock(&sbi->lookup_lock); | 266 | spin_unlock(&sbi->lookup_lock); |
| 274 | } | 267 | } |
| 275 | return; | ||
| 276 | } | 268 | } |
| 277 | 269 | ||
| 278 | extern void autofs4_kill_sb(struct super_block *); | 270 | extern void autofs4_kill_sb(struct super_block *); |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index ac7d921ed984..c7fcc7438843 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
| @@ -72,13 +72,13 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
| 72 | { | 72 | { |
| 73 | int err = 0; | 73 | int err = 0; |
| 74 | 74 | ||
| 75 | if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) || | 75 | if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) || |
| 76 | (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) { | 76 | (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) { |
| 77 | AUTOFS_WARN("ioctl control interface version mismatch: " | 77 | pr_warn("ioctl control interface version mismatch: " |
| 78 | "kernel(%u.%u), user(%u.%u), cmd(%d)", | 78 | "kernel(%u.%u), user(%u.%u), cmd(%d)\n", |
| 79 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, | 79 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, |
| 80 | AUTOFS_DEV_IOCTL_VERSION_MINOR, | 80 | AUTOFS_DEV_IOCTL_VERSION_MINOR, |
| 81 | param->ver_major, param->ver_minor, cmd); | 81 | param->ver_major, param->ver_minor, cmd); |
| 82 | err = -EINVAL; | 82 | err = -EINVAL; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| @@ -93,7 +93,8 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
| 93 | * Copy parameter control struct, including a possible path allocated | 93 | * Copy parameter control struct, including a possible path allocated |
| 94 | * at the end of the struct. | 94 | * at the end of the struct. |
| 95 | */ | 95 | */ |
| 96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | 96 | static struct autofs_dev_ioctl * |
| 97 | copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | ||
| 97 | { | 98 | { |
| 98 | struct autofs_dev_ioctl tmp, *res; | 99 | struct autofs_dev_ioctl tmp, *res; |
| 99 | 100 | ||
| @@ -116,7 +117,6 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i | |||
| 116 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | 117 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) |
| 117 | { | 118 | { |
| 118 | kfree(param); | 119 | kfree(param); |
| 119 | return; | ||
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | /* | 122 | /* |
| @@ -129,24 +129,24 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | |||
| 129 | 129 | ||
| 130 | err = check_dev_ioctl_version(cmd, param); | 130 | err = check_dev_ioctl_version(cmd, param); |
| 131 | if (err) { | 131 | if (err) { |
| 132 | AUTOFS_WARN("invalid device control module version " | 132 | pr_warn("invalid device control module version " |
| 133 | "supplied for cmd(0x%08x)", cmd); | 133 | "supplied for cmd(0x%08x)\n", cmd); |
| 134 | goto out; | 134 | goto out; |
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | if (param->size > sizeof(*param)) { | 137 | if (param->size > sizeof(*param)) { |
| 138 | err = invalid_str(param->path, param->size - sizeof(*param)); | 138 | err = invalid_str(param->path, param->size - sizeof(*param)); |
| 139 | if (err) { | 139 | if (err) { |
| 140 | AUTOFS_WARN( | 140 | pr_warn( |
| 141 | "path string terminator missing for cmd(0x%08x)", | 141 | "path string terminator missing for cmd(0x%08x)\n", |
| 142 | cmd); | 142 | cmd); |
| 143 | goto out; | 143 | goto out; |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | err = check_name(param->path); | 146 | err = check_name(param->path); |
| 147 | if (err) { | 147 | if (err) { |
| 148 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | 148 | pr_warn("invalid path supplied for cmd(0x%08x)\n", |
| 149 | cmd); | 149 | cmd); |
| 150 | goto out; | 150 | goto out; |
| 151 | } | 151 | } |
| 152 | } | 152 | } |
| @@ -197,7 +197,9 @@ static int find_autofs_mount(const char *pathname, | |||
| 197 | void *data) | 197 | void *data) |
| 198 | { | 198 | { |
| 199 | struct path path; | 199 | struct path path; |
| 200 | int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); | 200 | int err; |
| 201 | |||
| 202 | err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); | ||
| 201 | if (err) | 203 | if (err) |
| 202 | return err; | 204 | return err; |
| 203 | err = -ENOENT; | 205 | err = -ENOENT; |
| @@ -225,6 +227,7 @@ static int test_by_dev(struct path *path, void *p) | |||
| 225 | static int test_by_type(struct path *path, void *p) | 227 | static int test_by_type(struct path *path, void *p) |
| 226 | { | 228 | { |
| 227 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); | 229 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); |
| 230 | |||
| 228 | return ino && ino->sbi->type & *(unsigned *)p; | 231 | return ino && ino->sbi->type & *(unsigned *)p; |
| 229 | } | 232 | } |
| 230 | 233 | ||
| @@ -370,7 +373,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, | |||
| 370 | new_pid = get_task_pid(current, PIDTYPE_PGID); | 373 | new_pid = get_task_pid(current, PIDTYPE_PGID); |
| 371 | 374 | ||
| 372 | if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { | 375 | if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { |
| 373 | AUTOFS_WARN("Not allowed to change PID namespace"); | 376 | pr_warn("not allowed to change PID namespace\n"); |
| 374 | err = -EINVAL; | 377 | err = -EINVAL; |
| 375 | goto out; | 378 | goto out; |
| 376 | } | 379 | } |
| @@ -456,8 +459,10 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
| 456 | err = 0; | 459 | err = 0; |
| 457 | autofs4_expire_wait(path.dentry, 0); | 460 | autofs4_expire_wait(path.dentry, 0); |
| 458 | spin_lock(&sbi->fs_lock); | 461 | spin_lock(&sbi->fs_lock); |
| 459 | param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); | 462 | param->requester.uid = |
| 460 | param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); | 463 | from_kuid_munged(current_user_ns(), ino->uid); |
| 464 | param->requester.gid = | ||
| 465 | from_kgid_munged(current_user_ns(), ino->gid); | ||
| 461 | spin_unlock(&sbi->fs_lock); | 466 | spin_unlock(&sbi->fs_lock); |
| 462 | } | 467 | } |
| 463 | path_put(&path); | 468 | path_put(&path); |
| @@ -619,7 +624,8 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd) | |||
| 619 | } | 624 | } |
| 620 | 625 | ||
| 621 | /* ioctl dispatcher */ | 626 | /* ioctl dispatcher */ |
| 622 | static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) | 627 | static int _autofs_dev_ioctl(unsigned int command, |
| 628 | struct autofs_dev_ioctl __user *user) | ||
| 623 | { | 629 | { |
| 624 | struct autofs_dev_ioctl *param; | 630 | struct autofs_dev_ioctl *param; |
| 625 | struct file *fp; | 631 | struct file *fp; |
| @@ -655,7 +661,7 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use | |||
| 655 | 661 | ||
| 656 | fn = lookup_dev_ioctl(cmd); | 662 | fn = lookup_dev_ioctl(cmd); |
| 657 | if (!fn) { | 663 | if (!fn) { |
| 658 | AUTOFS_WARN("unknown command 0x%08x", command); | 664 | pr_warn("unknown command 0x%08x\n", command); |
| 659 | return -ENOTTY; | 665 | return -ENOTTY; |
| 660 | } | 666 | } |
| 661 | 667 | ||
| @@ -711,6 +717,7 @@ out: | |||
| 711 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) | 717 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) |
| 712 | { | 718 | { |
| 713 | int err; | 719 | int err; |
| 720 | |||
| 714 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); | 721 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); |
| 715 | return (long) err; | 722 | return (long) err; |
| 716 | } | 723 | } |
| @@ -733,8 +740,8 @@ static const struct file_operations _dev_ioctl_fops = { | |||
| 733 | 740 | ||
| 734 | static struct miscdevice _autofs_dev_ioctl_misc = { | 741 | static struct miscdevice _autofs_dev_ioctl_misc = { |
| 735 | .minor = AUTOFS_MINOR, | 742 | .minor = AUTOFS_MINOR, |
| 736 | .name = AUTOFS_DEVICE_NAME, | 743 | .name = AUTOFS_DEVICE_NAME, |
| 737 | .fops = &_dev_ioctl_fops | 744 | .fops = &_dev_ioctl_fops |
| 738 | }; | 745 | }; |
| 739 | 746 | ||
| 740 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); | 747 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); |
| @@ -747,7 +754,7 @@ int __init autofs_dev_ioctl_init(void) | |||
| 747 | 754 | ||
| 748 | r = misc_register(&_autofs_dev_ioctl_misc); | 755 | r = misc_register(&_autofs_dev_ioctl_misc); |
| 749 | if (r) { | 756 | if (r) { |
| 750 | AUTOFS_ERROR("misc_register failed for control device"); | 757 | pr_err("misc_register failed for control device\n"); |
| 751 | return r; | 758 | return r; |
| 752 | } | 759 | } |
| 753 | 760 | ||
| @@ -757,6 +764,4 @@ int __init autofs_dev_ioctl_init(void) | |||
| 757 | void autofs_dev_ioctl_exit(void) | 764 | void autofs_dev_ioctl_exit(void) |
| 758 | { | 765 | { |
| 759 | misc_deregister(&_autofs_dev_ioctl_misc); | 766 | misc_deregister(&_autofs_dev_ioctl_misc); |
| 760 | return; | ||
| 761 | } | 767 | } |
| 762 | |||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1cebc3c52fa5..9510d8d2e9cd 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
| @@ -1,16 +1,12 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/expire.c | 3 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
| 4 | * | 4 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | ||
| 7 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
| 8 | * | 5 | * |
| 9 | * This file is part of the Linux kernel and is made available under | 6 | * This file is part of the Linux kernel and is made available under |
| 10 | * the terms of the GNU General Public License, version 2, or at your | 7 | * the terms of the GNU General Public License, version 2, or at your |
| 11 | * option, any later version, incorporated herein by reference. | 8 | * option, any later version, incorporated herein by reference. |
| 12 | * | 9 | */ |
| 13 | * ------------------------------------------------------------------------- */ | ||
| 14 | 10 | ||
| 15 | #include "autofs_i.h" | 11 | #include "autofs_i.h" |
| 16 | 12 | ||
| @@ -18,7 +14,7 @@ static unsigned long now; | |||
| 18 | 14 | ||
| 19 | /* Check if a dentry can be expired */ | 15 | /* Check if a dentry can be expired */ |
| 20 | static inline int autofs4_can_expire(struct dentry *dentry, | 16 | static inline int autofs4_can_expire(struct dentry *dentry, |
| 21 | unsigned long timeout, int do_now) | 17 | unsigned long timeout, int do_now) |
| 22 | { | 18 | { |
| 23 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 19 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 24 | 20 | ||
| @@ -41,7 +37,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
| 41 | struct path path = {.mnt = mnt, .dentry = dentry}; | 37 | struct path path = {.mnt = mnt, .dentry = dentry}; |
| 42 | int status = 1; | 38 | int status = 1; |
| 43 | 39 | ||
| 44 | DPRINTK("dentry %p %pd", dentry, dentry); | 40 | pr_debug("dentry %p %pd\n", dentry, dentry); |
| 45 | 41 | ||
| 46 | path_get(&path); | 42 | path_get(&path); |
| 47 | 43 | ||
| @@ -58,14 +54,16 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
| 58 | 54 | ||
| 59 | /* Update the expiry counter if fs is busy */ | 55 | /* Update the expiry counter if fs is busy */ |
| 60 | if (!may_umount_tree(path.mnt)) { | 56 | if (!may_umount_tree(path.mnt)) { |
| 61 | struct autofs_info *ino = autofs4_dentry_ino(top); | 57 | struct autofs_info *ino; |
| 58 | |||
| 59 | ino = autofs4_dentry_ino(top); | ||
| 62 | ino->last_used = jiffies; | 60 | ino->last_used = jiffies; |
| 63 | goto done; | 61 | goto done; |
| 64 | } | 62 | } |
| 65 | 63 | ||
| 66 | status = 0; | 64 | status = 0; |
| 67 | done: | 65 | done: |
| 68 | DPRINTK("returning = %d", status); | 66 | pr_debug("returning = %d\n", status); |
| 69 | path_put(&path); | 67 | path_put(&path); |
| 70 | return status; | 68 | return status; |
| 71 | } | 69 | } |
| @@ -74,7 +72,7 @@ done: | |||
| 74 | * Calculate and dget next entry in the subdirs list under root. | 72 | * Calculate and dget next entry in the subdirs list under root. |
| 75 | */ | 73 | */ |
| 76 | static struct dentry *get_next_positive_subdir(struct dentry *prev, | 74 | static struct dentry *get_next_positive_subdir(struct dentry *prev, |
| 77 | struct dentry *root) | 75 | struct dentry *root) |
| 78 | { | 76 | { |
| 79 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 77 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); |
| 80 | struct list_head *next; | 78 | struct list_head *next; |
| @@ -121,7 +119,7 @@ cont: | |||
| 121 | * Calculate and dget next entry in top down tree traversal. | 119 | * Calculate and dget next entry in top down tree traversal. |
| 122 | */ | 120 | */ |
| 123 | static struct dentry *get_next_positive_dentry(struct dentry *prev, | 121 | static struct dentry *get_next_positive_dentry(struct dentry *prev, |
| 124 | struct dentry *root) | 122 | struct dentry *root) |
| 125 | { | 123 | { |
| 126 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 124 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); |
| 127 | struct list_head *next; | 125 | struct list_head *next; |
| @@ -187,15 +185,17 @@ again: | |||
| 187 | * autofs submounts. | 185 | * autofs submounts. |
| 188 | */ | 186 | */ |
| 189 | static int autofs4_direct_busy(struct vfsmount *mnt, | 187 | static int autofs4_direct_busy(struct vfsmount *mnt, |
| 190 | struct dentry *top, | 188 | struct dentry *top, |
| 191 | unsigned long timeout, | 189 | unsigned long timeout, |
| 192 | int do_now) | 190 | int do_now) |
| 193 | { | 191 | { |
| 194 | DPRINTK("top %p %pd", top, top); | 192 | pr_debug("top %p %pd\n", top, top); |
| 195 | 193 | ||
| 196 | /* If it's busy update the expiry counters */ | 194 | /* If it's busy update the expiry counters */ |
| 197 | if (!may_umount_tree(mnt)) { | 195 | if (!may_umount_tree(mnt)) { |
| 198 | struct autofs_info *ino = autofs4_dentry_ino(top); | 196 | struct autofs_info *ino; |
| 197 | |||
| 198 | ino = autofs4_dentry_ino(top); | ||
| 199 | if (ino) | 199 | if (ino) |
| 200 | ino->last_used = jiffies; | 200 | ino->last_used = jiffies; |
| 201 | return 1; | 201 | return 1; |
| @@ -208,7 +208,8 @@ static int autofs4_direct_busy(struct vfsmount *mnt, | |||
| 208 | return 0; | 208 | return 0; |
| 209 | } | 209 | } |
| 210 | 210 | ||
| 211 | /* Check a directory tree of mount points for busyness | 211 | /* |
| 212 | * Check a directory tree of mount points for busyness | ||
| 212 | * The tree is not busy iff no mountpoints are busy | 213 | * The tree is not busy iff no mountpoints are busy |
| 213 | */ | 214 | */ |
| 214 | static int autofs4_tree_busy(struct vfsmount *mnt, | 215 | static int autofs4_tree_busy(struct vfsmount *mnt, |
| @@ -219,7 +220,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
| 219 | struct autofs_info *top_ino = autofs4_dentry_ino(top); | 220 | struct autofs_info *top_ino = autofs4_dentry_ino(top); |
| 220 | struct dentry *p; | 221 | struct dentry *p; |
| 221 | 222 | ||
| 222 | DPRINTK("top %p %pd", top, top); | 223 | pr_debug("top %p %pd\n", top, top); |
| 223 | 224 | ||
| 224 | /* Negative dentry - give up */ | 225 | /* Negative dentry - give up */ |
| 225 | if (!simple_positive(top)) | 226 | if (!simple_positive(top)) |
| @@ -227,7 +228,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
| 227 | 228 | ||
| 228 | p = NULL; | 229 | p = NULL; |
| 229 | while ((p = get_next_positive_dentry(p, top))) { | 230 | while ((p = get_next_positive_dentry(p, top))) { |
| 230 | DPRINTK("dentry %p %pd", p, p); | 231 | pr_debug("dentry %p %pd\n", p, p); |
| 231 | 232 | ||
| 232 | /* | 233 | /* |
| 233 | * Is someone visiting anywhere in the subtree ? | 234 | * Is someone visiting anywhere in the subtree ? |
| @@ -273,11 +274,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, | |||
| 273 | { | 274 | { |
| 274 | struct dentry *p; | 275 | struct dentry *p; |
| 275 | 276 | ||
| 276 | DPRINTK("parent %p %pd", parent, parent); | 277 | pr_debug("parent %p %pd\n", parent, parent); |
| 277 | 278 | ||
| 278 | p = NULL; | 279 | p = NULL; |
| 279 | while ((p = get_next_positive_dentry(p, parent))) { | 280 | while ((p = get_next_positive_dentry(p, parent))) { |
| 280 | DPRINTK("dentry %p %pd", p, p); | 281 | pr_debug("dentry %p %pd\n", p, p); |
| 281 | 282 | ||
| 282 | if (d_mountpoint(p)) { | 283 | if (d_mountpoint(p)) { |
| 283 | /* Can we umount this guy */ | 284 | /* Can we umount this guy */ |
| @@ -362,7 +363,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
| 362 | * offset (autofs-5.0+). | 363 | * offset (autofs-5.0+). |
| 363 | */ | 364 | */ |
| 364 | if (d_mountpoint(dentry)) { | 365 | if (d_mountpoint(dentry)) { |
| 365 | DPRINTK("checking mountpoint %p %pd", dentry, dentry); | 366 | pr_debug("checking mountpoint %p %pd\n", dentry, dentry); |
| 366 | 367 | ||
| 367 | /* Can we umount this guy */ | 368 | /* Can we umount this guy */ |
| 368 | if (autofs4_mount_busy(mnt, dentry)) | 369 | if (autofs4_mount_busy(mnt, dentry)) |
| @@ -375,7 +376,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
| 375 | } | 376 | } |
| 376 | 377 | ||
| 377 | if (d_really_is_positive(dentry) && d_is_symlink(dentry)) { | 378 | if (d_really_is_positive(dentry) && d_is_symlink(dentry)) { |
| 378 | DPRINTK("checking symlink %p %pd", dentry, dentry); | 379 | pr_debug("checking symlink %p %pd\n", dentry, dentry); |
| 379 | /* | 380 | /* |
| 380 | * A symlink can't be "busy" in the usual sense so | 381 | * A symlink can't be "busy" in the usual sense so |
| 381 | * just check last used for expire timeout. | 382 | * just check last used for expire timeout. |
| @@ -404,6 +405,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
| 404 | } else { | 405 | } else { |
| 405 | /* Path walk currently on this dentry? */ | 406 | /* Path walk currently on this dentry? */ |
| 406 | struct dentry *expired; | 407 | struct dentry *expired; |
| 408 | |||
| 407 | ino_count = atomic_read(&ino->count) + 1; | 409 | ino_count = atomic_read(&ino->count) + 1; |
| 408 | if (d_count(dentry) > ino_count) | 410 | if (d_count(dentry) > ino_count) |
| 409 | return NULL; | 411 | return NULL; |
| @@ -471,7 +473,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
| 471 | return NULL; | 473 | return NULL; |
| 472 | 474 | ||
| 473 | found: | 475 | found: |
| 474 | DPRINTK("returning %p %pd", expired, expired); | 476 | pr_debug("returning %p %pd\n", expired, expired); |
| 475 | ino->flags |= AUTOFS_INF_EXPIRING; | 477 | ino->flags |= AUTOFS_INF_EXPIRING; |
| 476 | smp_mb(); | 478 | smp_mb(); |
| 477 | ino->flags &= ~AUTOFS_INF_NO_RCU; | 479 | ino->flags &= ~AUTOFS_INF_NO_RCU; |
| @@ -503,12 +505,12 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
| 503 | if (ino->flags & AUTOFS_INF_EXPIRING) { | 505 | if (ino->flags & AUTOFS_INF_EXPIRING) { |
| 504 | spin_unlock(&sbi->fs_lock); | 506 | spin_unlock(&sbi->fs_lock); |
| 505 | 507 | ||
| 506 | DPRINTK("waiting for expire %p name=%pd", dentry, dentry); | 508 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); |
| 507 | 509 | ||
| 508 | status = autofs4_wait(sbi, dentry, NFY_NONE); | 510 | status = autofs4_wait(sbi, dentry, NFY_NONE); |
| 509 | wait_for_completion(&ino->expire_complete); | 511 | wait_for_completion(&ino->expire_complete); |
| 510 | 512 | ||
| 511 | DPRINTK("expire done status=%d", status); | 513 | pr_debug("expire done status=%d\n", status); |
| 512 | 514 | ||
| 513 | if (d_unhashed(dentry)) | 515 | if (d_unhashed(dentry)) |
| 514 | return -EAGAIN; | 516 | return -EAGAIN; |
| @@ -522,21 +524,22 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
| 522 | 524 | ||
| 523 | /* Perform an expiry operation */ | 525 | /* Perform an expiry operation */ |
| 524 | int autofs4_expire_run(struct super_block *sb, | 526 | int autofs4_expire_run(struct super_block *sb, |
| 525 | struct vfsmount *mnt, | 527 | struct vfsmount *mnt, |
| 526 | struct autofs_sb_info *sbi, | 528 | struct autofs_sb_info *sbi, |
| 527 | struct autofs_packet_expire __user *pkt_p) | 529 | struct autofs_packet_expire __user *pkt_p) |
| 528 | { | 530 | { |
| 529 | struct autofs_packet_expire pkt; | 531 | struct autofs_packet_expire pkt; |
| 530 | struct autofs_info *ino; | 532 | struct autofs_info *ino; |
| 531 | struct dentry *dentry; | 533 | struct dentry *dentry; |
| 532 | int ret = 0; | 534 | int ret = 0; |
| 533 | 535 | ||
| 534 | memset(&pkt,0,sizeof pkt); | 536 | memset(&pkt, 0, sizeof(pkt)); |
| 535 | 537 | ||
| 536 | pkt.hdr.proto_version = sbi->version; | 538 | pkt.hdr.proto_version = sbi->version; |
| 537 | pkt.hdr.type = autofs_ptype_expire; | 539 | pkt.hdr.type = autofs_ptype_expire; |
| 538 | 540 | ||
| 539 | if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL) | 541 | dentry = autofs4_expire_indirect(sb, mnt, sbi, 0); |
| 542 | if (!dentry) | ||
| 540 | return -EAGAIN; | 543 | return -EAGAIN; |
| 541 | 544 | ||
| 542 | pkt.len = dentry->d_name.len; | 545 | pkt.len = dentry->d_name.len; |
| @@ -544,7 +547,7 @@ int autofs4_expire_run(struct super_block *sb, | |||
| 544 | pkt.name[pkt.len] = '\0'; | 547 | pkt.name[pkt.len] = '\0'; |
| 545 | dput(dentry); | 548 | dput(dentry); |
| 546 | 549 | ||
| 547 | if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) | 550 | if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) |
| 548 | ret = -EFAULT; | 551 | ret = -EFAULT; |
| 549 | 552 | ||
| 550 | spin_lock(&sbi->fs_lock); | 553 | spin_lock(&sbi->fs_lock); |
| @@ -573,7 +576,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
| 573 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 576 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 574 | 577 | ||
| 575 | /* This is synchronous because it makes the daemon a | 578 | /* This is synchronous because it makes the daemon a |
| 576 | little easier */ | 579 | * little easier |
| 580 | */ | ||
| 577 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); | 581 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); |
| 578 | 582 | ||
| 579 | spin_lock(&sbi->fs_lock); | 583 | spin_lock(&sbi->fs_lock); |
| @@ -588,8 +592,10 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
| 588 | return ret; | 592 | return ret; |
| 589 | } | 593 | } |
| 590 | 594 | ||
| 591 | /* Call repeatedly until it returns -EAGAIN, meaning there's nothing | 595 | /* |
| 592 | more to be done */ | 596 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing |
| 597 | * more to be done. | ||
| 598 | */ | ||
| 593 | int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 599 | int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
| 594 | struct autofs_sb_info *sbi, int __user *arg) | 600 | struct autofs_sb_info *sbi, int __user *arg) |
| 595 | { | 601 | { |
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index b3db517e89ec..8cf0e63389ae 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c | |||
| @@ -1,14 +1,10 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/init.c | ||
| 4 | * | ||
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * | 3 | * |
| 7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
| 8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
| 9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
| 10 | * | 7 | */ |
| 11 | * ------------------------------------------------------------------------- */ | ||
| 12 | 8 | ||
| 13 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 14 | #include <linux/init.h> | 10 | #include <linux/init.h> |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index a3ae0b2aeb5a..61b21051bd5a 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
| @@ -1,15 +1,11 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/inode.c | 3 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> |
| 4 | * | ||
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> | ||
| 7 | * | 4 | * |
| 8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
| 9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
| 10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
| 11 | * | 8 | */ |
| 12 | * ------------------------------------------------------------------------- */ | ||
| 13 | 9 | ||
| 14 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
| 15 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
| @@ -24,7 +20,9 @@ | |||
| 24 | 20 | ||
| 25 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) | 21 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) |
| 26 | { | 22 | { |
| 27 | struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL); | 23 | struct autofs_info *ino; |
| 24 | |||
| 25 | ino = kzalloc(sizeof(*ino), GFP_KERNEL); | ||
| 28 | if (ino) { | 26 | if (ino) { |
| 29 | INIT_LIST_HEAD(&ino->active); | 27 | INIT_LIST_HEAD(&ino->active); |
| 30 | INIT_LIST_HEAD(&ino->expiring); | 28 | INIT_LIST_HEAD(&ino->expiring); |
| @@ -62,7 +60,7 @@ void autofs4_kill_sb(struct super_block *sb) | |||
| 62 | put_pid(sbi->oz_pgrp); | 60 | put_pid(sbi->oz_pgrp); |
| 63 | } | 61 | } |
| 64 | 62 | ||
| 65 | DPRINTK("shutting down"); | 63 | pr_debug("shutting down\n"); |
| 66 | kill_litter_super(sb); | 64 | kill_litter_super(sb); |
| 67 | if (sbi) | 65 | if (sbi) |
| 68 | kfree_rcu(sbi, rcu); | 66 | kfree_rcu(sbi, rcu); |
| @@ -94,7 +92,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) | |||
| 94 | seq_printf(m, ",direct"); | 92 | seq_printf(m, ",direct"); |
| 95 | else | 93 | else |
| 96 | seq_printf(m, ",indirect"); | 94 | seq_printf(m, ",indirect"); |
| 97 | 95 | #ifdef CONFIG_CHECKPOINT_RESTORE | |
| 96 | if (sbi->pipe) | ||
| 97 | seq_printf(m, ",pipe_ino=%ld", sbi->pipe->f_inode->i_ino); | ||
| 98 | else | ||
| 99 | seq_printf(m, ",pipe_ino=-1"); | ||
| 100 | #endif | ||
| 98 | return 0; | 101 | return 0; |
| 99 | } | 102 | } |
| 100 | 103 | ||
| @@ -147,6 +150,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, | |||
| 147 | 150 | ||
| 148 | while ((p = strsep(&options, ",")) != NULL) { | 151 | while ((p = strsep(&options, ",")) != NULL) { |
| 149 | int token; | 152 | int token; |
| 153 | |||
| 150 | if (!*p) | 154 | if (!*p) |
| 151 | continue; | 155 | continue; |
| 152 | 156 | ||
| @@ -204,9 +208,9 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, | |||
| 204 | 208 | ||
| 205 | int autofs4_fill_super(struct super_block *s, void *data, int silent) | 209 | int autofs4_fill_super(struct super_block *s, void *data, int silent) |
| 206 | { | 210 | { |
| 207 | struct inode * root_inode; | 211 | struct inode *root_inode; |
| 208 | struct dentry * root; | 212 | struct dentry *root; |
| 209 | struct file * pipe; | 213 | struct file *pipe; |
| 210 | int pipefd; | 214 | int pipefd; |
| 211 | struct autofs_sb_info *sbi; | 215 | struct autofs_sb_info *sbi; |
| 212 | struct autofs_info *ino; | 216 | struct autofs_info *ino; |
| @@ -217,7 +221,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 217 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 221 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
| 218 | if (!sbi) | 222 | if (!sbi) |
| 219 | return -ENOMEM; | 223 | return -ENOMEM; |
| 220 | DPRINTK("starting up, sbi = %p",sbi); | 224 | pr_debug("starting up, sbi = %p\n", sbi); |
| 221 | 225 | ||
| 222 | s->s_fs_info = sbi; | 226 | s->s_fs_info = sbi; |
| 223 | sbi->magic = AUTOFS_SBI_MAGIC; | 227 | sbi->magic = AUTOFS_SBI_MAGIC; |
| @@ -266,14 +270,14 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 266 | if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, | 270 | if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, |
| 267 | &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, | 271 | &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, |
| 268 | &sbi->max_proto)) { | 272 | &sbi->max_proto)) { |
| 269 | printk("autofs: called with bogus options\n"); | 273 | pr_err("called with bogus options\n"); |
| 270 | goto fail_dput; | 274 | goto fail_dput; |
| 271 | } | 275 | } |
| 272 | 276 | ||
| 273 | if (pgrp_set) { | 277 | if (pgrp_set) { |
| 274 | sbi->oz_pgrp = find_get_pid(pgrp); | 278 | sbi->oz_pgrp = find_get_pid(pgrp); |
| 275 | if (!sbi->oz_pgrp) { | 279 | if (!sbi->oz_pgrp) { |
| 276 | pr_warn("autofs: could not find process group %d\n", | 280 | pr_err("could not find process group %d\n", |
| 277 | pgrp); | 281 | pgrp); |
| 278 | goto fail_dput; | 282 | goto fail_dput; |
| 279 | } | 283 | } |
| @@ -290,10 +294,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 290 | /* Couldn't this be tested earlier? */ | 294 | /* Couldn't this be tested earlier? */ |
| 291 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || | 295 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || |
| 292 | sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { | 296 | sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { |
| 293 | printk("autofs: kernel does not match daemon version " | 297 | pr_err("kernel does not match daemon version " |
| 294 | "daemon (%d, %d) kernel (%d, %d)\n", | 298 | "daemon (%d, %d) kernel (%d, %d)\n", |
| 295 | sbi->min_proto, sbi->max_proto, | 299 | sbi->min_proto, sbi->max_proto, |
| 296 | AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); | 300 | AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); |
| 297 | goto fail_dput; | 301 | goto fail_dput; |
| 298 | } | 302 | } |
| 299 | 303 | ||
| @@ -304,11 +308,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 304 | sbi->version = sbi->max_proto; | 308 | sbi->version = sbi->max_proto; |
| 305 | sbi->sub_version = AUTOFS_PROTO_SUBVERSION; | 309 | sbi->sub_version = AUTOFS_PROTO_SUBVERSION; |
| 306 | 310 | ||
| 307 | DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp)); | 311 | pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); |
| 308 | pipe = fget(pipefd); | 312 | pipe = fget(pipefd); |
| 309 | 313 | ||
| 310 | if (!pipe) { | 314 | if (!pipe) { |
| 311 | printk("autofs: could not open pipe file descriptor\n"); | 315 | pr_err("could not open pipe file descriptor\n"); |
| 312 | goto fail_dput; | 316 | goto fail_dput; |
| 313 | } | 317 | } |
| 314 | ret = autofs_prepare_pipe(pipe); | 318 | ret = autofs_prepare_pipe(pipe); |
| @@ -323,12 +327,12 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 323 | */ | 327 | */ |
| 324 | s->s_root = root; | 328 | s->s_root = root; |
| 325 | return 0; | 329 | return 0; |
| 326 | 330 | ||
| 327 | /* | 331 | /* |
| 328 | * Failure ... clean up. | 332 | * Failure ... clean up. |
| 329 | */ | 333 | */ |
| 330 | fail_fput: | 334 | fail_fput: |
| 331 | printk("autofs: pipe file descriptor does not contain proper ops\n"); | 335 | pr_err("pipe file descriptor does not contain proper ops\n"); |
| 332 | fput(pipe); | 336 | fput(pipe); |
| 333 | /* fall through */ | 337 | /* fall through */ |
| 334 | fail_dput: | 338 | fail_dput: |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c6d7d3dbd52a..9328b5861c7a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
| @@ -1,16 +1,12 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/root.c | 3 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
| 4 | * | 4 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | ||
| 7 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
| 8 | * | 5 | * |
| 9 | * This file is part of the Linux kernel and is made available under | 6 | * This file is part of the Linux kernel and is made available under |
| 10 | * the terms of the GNU General Public License, version 2, or at your | 7 | * the terms of the GNU General Public License, version 2, or at your |
| 11 | * option, any later version, incorporated herein by reference. | 8 | * option, any later version, incorporated herein by reference. |
| 12 | * | 9 | */ |
| 13 | * ------------------------------------------------------------------------- */ | ||
| 14 | 10 | ||
| 15 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
| 16 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
| @@ -23,16 +19,18 @@ | |||
| 23 | 19 | ||
| 24 | #include "autofs_i.h" | 20 | #include "autofs_i.h" |
| 25 | 21 | ||
| 26 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 22 | static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *); |
| 27 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 23 | static int autofs4_dir_unlink(struct inode *, struct dentry *); |
| 28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 24 | static int autofs4_dir_rmdir(struct inode *, struct dentry *); |
| 29 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t); | 25 | static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t); |
| 30 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); | 26 | static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long); |
| 31 | #ifdef CONFIG_COMPAT | 27 | #ifdef CONFIG_COMPAT |
| 32 | static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); | 28 | static long autofs4_root_compat_ioctl(struct file *, |
| 29 | unsigned int, unsigned long); | ||
| 33 | #endif | 30 | #endif |
| 34 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 31 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
| 35 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int); | 32 | static struct dentry *autofs4_lookup(struct inode *, |
| 33 | struct dentry *, unsigned int); | ||
| 36 | static struct vfsmount *autofs4_d_automount(struct path *); | 34 | static struct vfsmount *autofs4_d_automount(struct path *); |
| 37 | static int autofs4_d_manage(struct dentry *, bool); | 35 | static int autofs4_d_manage(struct dentry *, bool); |
| 38 | static void autofs4_dentry_release(struct dentry *); | 36 | static void autofs4_dentry_release(struct dentry *); |
| @@ -74,7 +72,9 @@ const struct dentry_operations autofs4_dentry_operations = { | |||
| 74 | static void autofs4_add_active(struct dentry *dentry) | 72 | static void autofs4_add_active(struct dentry *dentry) |
| 75 | { | 73 | { |
| 76 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 74 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 77 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 75 | struct autofs_info *ino; |
| 76 | |||
| 77 | ino = autofs4_dentry_ino(dentry); | ||
| 78 | if (ino) { | 78 | if (ino) { |
| 79 | spin_lock(&sbi->lookup_lock); | 79 | spin_lock(&sbi->lookup_lock); |
| 80 | if (!ino->active_count) { | 80 | if (!ino->active_count) { |
| @@ -84,13 +84,14 @@ static void autofs4_add_active(struct dentry *dentry) | |||
| 84 | ino->active_count++; | 84 | ino->active_count++; |
| 85 | spin_unlock(&sbi->lookup_lock); | 85 | spin_unlock(&sbi->lookup_lock); |
| 86 | } | 86 | } |
| 87 | return; | ||
| 88 | } | 87 | } |
| 89 | 88 | ||
| 90 | static void autofs4_del_active(struct dentry *dentry) | 89 | static void autofs4_del_active(struct dentry *dentry) |
| 91 | { | 90 | { |
| 92 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 91 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 93 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 92 | struct autofs_info *ino; |
| 93 | |||
| 94 | ino = autofs4_dentry_ino(dentry); | ||
| 94 | if (ino) { | 95 | if (ino) { |
| 95 | spin_lock(&sbi->lookup_lock); | 96 | spin_lock(&sbi->lookup_lock); |
| 96 | ino->active_count--; | 97 | ino->active_count--; |
| @@ -100,7 +101,6 @@ static void autofs4_del_active(struct dentry *dentry) | |||
| 100 | } | 101 | } |
| 101 | spin_unlock(&sbi->lookup_lock); | 102 | spin_unlock(&sbi->lookup_lock); |
| 102 | } | 103 | } |
| 103 | return; | ||
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | static int autofs4_dir_open(struct inode *inode, struct file *file) | 106 | static int autofs4_dir_open(struct inode *inode, struct file *file) |
| @@ -108,7 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) | |||
| 108 | struct dentry *dentry = file->f_path.dentry; | 108 | struct dentry *dentry = file->f_path.dentry; |
| 109 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 109 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
| 110 | 110 | ||
| 111 | DPRINTK("file=%p dentry=%p %pd", file, dentry, dentry); | 111 | pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); |
| 112 | 112 | ||
| 113 | if (autofs4_oz_mode(sbi)) | 113 | if (autofs4_oz_mode(sbi)) |
| 114 | goto out; | 114 | goto out; |
| @@ -138,7 +138,7 @@ static void autofs4_dentry_release(struct dentry *de) | |||
| 138 | struct autofs_info *ino = autofs4_dentry_ino(de); | 138 | struct autofs_info *ino = autofs4_dentry_ino(de); |
| 139 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); | 139 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); |
| 140 | 140 | ||
| 141 | DPRINTK("releasing %p", de); | 141 | pr_debug("releasing %p\n", de); |
| 142 | 142 | ||
| 143 | if (!ino) | 143 | if (!ino) |
| 144 | return; | 144 | return; |
| @@ -278,9 +278,9 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk) | |||
| 278 | if (ino->flags & AUTOFS_INF_PENDING) { | 278 | if (ino->flags & AUTOFS_INF_PENDING) { |
| 279 | if (rcu_walk) | 279 | if (rcu_walk) |
| 280 | return -ECHILD; | 280 | return -ECHILD; |
| 281 | DPRINTK("waiting for mount name=%pd", dentry); | 281 | pr_debug("waiting for mount name=%pd\n", dentry); |
| 282 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | 282 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); |
| 283 | DPRINTK("mount wait done status=%d", status); | 283 | pr_debug("mount wait done status=%d\n", status); |
| 284 | } | 284 | } |
| 285 | ino->last_used = jiffies; | 285 | ino->last_used = jiffies; |
| 286 | return status; | 286 | return status; |
| @@ -320,7 +320,9 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
| 320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { | 320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { |
| 321 | struct dentry *parent = dentry->d_parent; | 321 | struct dentry *parent = dentry->d_parent; |
| 322 | struct autofs_info *ino; | 322 | struct autofs_info *ino; |
| 323 | struct dentry *new = d_lookup(parent, &dentry->d_name); | 323 | struct dentry *new; |
| 324 | |||
| 325 | new = d_lookup(parent, &dentry->d_name); | ||
| 324 | if (!new) | 326 | if (!new) |
| 325 | return NULL; | 327 | return NULL; |
| 326 | ino = autofs4_dentry_ino(new); | 328 | ino = autofs4_dentry_ino(new); |
| @@ -338,7 +340,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
| 338 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 340 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 339 | int status; | 341 | int status; |
| 340 | 342 | ||
| 341 | DPRINTK("dentry=%p %pd", dentry, dentry); | 343 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
| 342 | 344 | ||
| 343 | /* The daemon never triggers a mount. */ | 345 | /* The daemon never triggers a mount. */ |
| 344 | if (autofs4_oz_mode(sbi)) | 346 | if (autofs4_oz_mode(sbi)) |
| @@ -425,7 +427,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
| 425 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 427 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 426 | int status; | 428 | int status; |
| 427 | 429 | ||
| 428 | DPRINTK("dentry=%p %pd", dentry, dentry); | 430 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
| 429 | 431 | ||
| 430 | /* The daemon never waits. */ | 432 | /* The daemon never waits. */ |
| 431 | if (autofs4_oz_mode(sbi)) { | 433 | if (autofs4_oz_mode(sbi)) { |
| @@ -455,6 +457,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
| 455 | * a mount-trap. | 457 | * a mount-trap. |
| 456 | */ | 458 | */ |
| 457 | struct inode *inode; | 459 | struct inode *inode; |
| 460 | |||
| 458 | if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) | 461 | if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) |
| 459 | return 0; | 462 | return 0; |
| 460 | if (d_mountpoint(dentry)) | 463 | if (d_mountpoint(dentry)) |
| @@ -494,13 +497,14 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
| 494 | } | 497 | } |
| 495 | 498 | ||
| 496 | /* Lookups in the root directory */ | 499 | /* Lookups in the root directory */ |
| 497 | static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 500 | static struct dentry *autofs4_lookup(struct inode *dir, |
| 501 | struct dentry *dentry, unsigned int flags) | ||
| 498 | { | 502 | { |
| 499 | struct autofs_sb_info *sbi; | 503 | struct autofs_sb_info *sbi; |
| 500 | struct autofs_info *ino; | 504 | struct autofs_info *ino; |
| 501 | struct dentry *active; | 505 | struct dentry *active; |
| 502 | 506 | ||
| 503 | DPRINTK("name = %pd", dentry); | 507 | pr_debug("name = %pd\n", dentry); |
| 504 | 508 | ||
| 505 | /* File name too long to exist */ | 509 | /* File name too long to exist */ |
| 506 | if (dentry->d_name.len > NAME_MAX) | 510 | if (dentry->d_name.len > NAME_MAX) |
| @@ -508,14 +512,14 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u | |||
| 508 | 512 | ||
| 509 | sbi = autofs4_sbi(dir->i_sb); | 513 | sbi = autofs4_sbi(dir->i_sb); |
| 510 | 514 | ||
| 511 | DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", | 515 | pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", |
| 512 | current->pid, task_pgrp_nr(current), sbi->catatonic, | 516 | current->pid, task_pgrp_nr(current), sbi->catatonic, |
| 513 | autofs4_oz_mode(sbi)); | 517 | autofs4_oz_mode(sbi)); |
| 514 | 518 | ||
| 515 | active = autofs4_lookup_active(dentry); | 519 | active = autofs4_lookup_active(dentry); |
| 516 | if (active) { | 520 | if (active) |
| 517 | return active; | 521 | return active; |
| 518 | } else { | 522 | else { |
| 519 | /* | 523 | /* |
| 520 | * A dentry that is not within the root can never trigger a | 524 | * A dentry that is not within the root can never trigger a |
| 521 | * mount operation, unless the directory already exists, so we | 525 | * mount operation, unless the directory already exists, so we |
| @@ -526,7 +530,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u | |||
| 526 | return ERR_PTR(-ENOENT); | 530 | return ERR_PTR(-ENOENT); |
| 527 | 531 | ||
| 528 | /* Mark entries in the root as mount triggers */ | 532 | /* Mark entries in the root as mount triggers */ |
| 529 | if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) | 533 | if (IS_ROOT(dentry->d_parent) && |
| 534 | autofs_type_indirect(sbi->type)) | ||
| 530 | __managed_dentry_set_managed(dentry); | 535 | __managed_dentry_set_managed(dentry); |
| 531 | 536 | ||
| 532 | ino = autofs4_new_ino(sbi); | 537 | ino = autofs4_new_ino(sbi); |
| @@ -554,7 +559,7 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
| 554 | size_t size = strlen(symname); | 559 | size_t size = strlen(symname); |
| 555 | char *cp; | 560 | char *cp; |
| 556 | 561 | ||
| 557 | DPRINTK("%s <- %pd", symname, dentry); | 562 | pr_debug("%s <- %pd\n", symname, dentry); |
| 558 | 563 | ||
| 559 | if (!autofs4_oz_mode(sbi)) | 564 | if (!autofs4_oz_mode(sbi)) |
| 560 | return -EACCES; | 565 | return -EACCES; |
| @@ -613,7 +618,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
| 613 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 618 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
| 614 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 619 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 615 | struct autofs_info *p_ino; | 620 | struct autofs_info *p_ino; |
| 616 | 621 | ||
| 617 | /* This allows root to remove symlinks */ | 622 | /* This allows root to remove symlinks */ |
| 618 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 623 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
| 619 | return -EPERM; | 624 | return -EPERM; |
| @@ -664,7 +669,6 @@ static void autofs_set_leaf_automount_flags(struct dentry *dentry) | |||
| 664 | if (IS_ROOT(parent->d_parent)) | 669 | if (IS_ROOT(parent->d_parent)) |
| 665 | return; | 670 | return; |
| 666 | managed_dentry_clear_managed(parent); | 671 | managed_dentry_clear_managed(parent); |
| 667 | return; | ||
| 668 | } | 672 | } |
| 669 | 673 | ||
| 670 | static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | 674 | static void autofs_clear_leaf_automount_flags(struct dentry *dentry) |
| @@ -687,7 +691,6 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | |||
| 687 | if (d_child->next == &parent->d_subdirs && | 691 | if (d_child->next == &parent->d_subdirs && |
| 688 | d_child->prev == &parent->d_subdirs) | 692 | d_child->prev == &parent->d_subdirs) |
| 689 | managed_dentry_set_managed(parent); | 693 | managed_dentry_set_managed(parent); |
| 690 | return; | ||
| 691 | } | 694 | } |
| 692 | 695 | ||
| 693 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | 696 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) |
| @@ -695,8 +698,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 695 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 698 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
| 696 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 699 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| 697 | struct autofs_info *p_ino; | 700 | struct autofs_info *p_ino; |
| 698 | 701 | ||
| 699 | DPRINTK("dentry %p, removing %pd", dentry, dentry); | 702 | pr_debug("dentry %p, removing %pd\n", dentry, dentry); |
| 700 | 703 | ||
| 701 | if (!autofs4_oz_mode(sbi)) | 704 | if (!autofs4_oz_mode(sbi)) |
| 702 | return -EACCES; | 705 | return -EACCES; |
| @@ -728,7 +731,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 728 | return 0; | 731 | return 0; |
| 729 | } | 732 | } |
| 730 | 733 | ||
| 731 | static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | 734 | static int autofs4_dir_mkdir(struct inode *dir, |
| 735 | struct dentry *dentry, umode_t mode) | ||
| 732 | { | 736 | { |
| 733 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 737 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
| 734 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 738 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
| @@ -738,7 +742,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m | |||
| 738 | if (!autofs4_oz_mode(sbi)) | 742 | if (!autofs4_oz_mode(sbi)) |
| 739 | return -EACCES; | 743 | return -EACCES; |
| 740 | 744 | ||
| 741 | DPRINTK("dentry %p, creating %pd", dentry, dentry); | 745 | pr_debug("dentry %p, creating %pd\n", dentry, dentry); |
| 742 | 746 | ||
| 743 | BUG_ON(!ino); | 747 | BUG_ON(!ino); |
| 744 | 748 | ||
| @@ -768,14 +772,18 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m | |||
| 768 | /* Get/set timeout ioctl() operation */ | 772 | /* Get/set timeout ioctl() operation */ |
| 769 | #ifdef CONFIG_COMPAT | 773 | #ifdef CONFIG_COMPAT |
| 770 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | 774 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, |
| 771 | compat_ulong_t __user *p) | 775 | compat_ulong_t __user *p) |
| 772 | { | 776 | { |
| 773 | int rv; | ||
| 774 | unsigned long ntimeout; | 777 | unsigned long ntimeout; |
| 778 | int rv; | ||
| 775 | 779 | ||
| 776 | if ((rv = get_user(ntimeout, p)) || | 780 | rv = get_user(ntimeout, p); |
| 777 | (rv = put_user(sbi->exp_timeout/HZ, p))) | 781 | if (rv) |
| 778 | return rv; | 782 | goto error; |
| 783 | |||
| 784 | rv = put_user(sbi->exp_timeout/HZ, p); | ||
| 785 | if (rv) | ||
| 786 | goto error; | ||
| 779 | 787 | ||
| 780 | if (ntimeout > UINT_MAX/HZ) | 788 | if (ntimeout > UINT_MAX/HZ) |
| 781 | sbi->exp_timeout = 0; | 789 | sbi->exp_timeout = 0; |
| @@ -783,18 +791,24 @@ static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | |||
| 783 | sbi->exp_timeout = ntimeout * HZ; | 791 | sbi->exp_timeout = ntimeout * HZ; |
| 784 | 792 | ||
| 785 | return 0; | 793 | return 0; |
| 794 | error: | ||
| 795 | return rv; | ||
| 786 | } | 796 | } |
| 787 | #endif | 797 | #endif |
| 788 | 798 | ||
| 789 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | 799 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, |
| 790 | unsigned long __user *p) | 800 | unsigned long __user *p) |
| 791 | { | 801 | { |
| 792 | int rv; | ||
| 793 | unsigned long ntimeout; | 802 | unsigned long ntimeout; |
| 803 | int rv; | ||
| 794 | 804 | ||
| 795 | if ((rv = get_user(ntimeout, p)) || | 805 | rv = get_user(ntimeout, p); |
| 796 | (rv = put_user(sbi->exp_timeout/HZ, p))) | 806 | if (rv) |
| 797 | return rv; | 807 | goto error; |
| 808 | |||
| 809 | rv = put_user(sbi->exp_timeout/HZ, p); | ||
| 810 | if (rv) | ||
| 811 | goto error; | ||
| 798 | 812 | ||
| 799 | if (ntimeout > ULONG_MAX/HZ) | 813 | if (ntimeout > ULONG_MAX/HZ) |
| 800 | sbi->exp_timeout = 0; | 814 | sbi->exp_timeout = 0; |
| @@ -802,16 +816,20 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | |||
| 802 | sbi->exp_timeout = ntimeout * HZ; | 816 | sbi->exp_timeout = ntimeout * HZ; |
| 803 | 817 | ||
| 804 | return 0; | 818 | return 0; |
| 819 | error: | ||
| 820 | return rv; | ||
| 805 | } | 821 | } |
| 806 | 822 | ||
| 807 | /* Return protocol version */ | 823 | /* Return protocol version */ |
| 808 | static inline int autofs4_get_protover(struct autofs_sb_info *sbi, int __user *p) | 824 | static inline int autofs4_get_protover(struct autofs_sb_info *sbi, |
| 825 | int __user *p) | ||
| 809 | { | 826 | { |
| 810 | return put_user(sbi->version, p); | 827 | return put_user(sbi->version, p); |
| 811 | } | 828 | } |
| 812 | 829 | ||
| 813 | /* Return protocol sub version */ | 830 | /* Return protocol sub version */ |
| 814 | static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user *p) | 831 | static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, |
| 832 | int __user *p) | ||
| 815 | { | 833 | { |
| 816 | return put_user(sbi->sub_version, p); | 834 | return put_user(sbi->sub_version, p); |
| 817 | } | 835 | } |
| @@ -826,7 +844,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
| 826 | if (may_umount(mnt)) | 844 | if (may_umount(mnt)) |
| 827 | status = 1; | 845 | status = 1; |
| 828 | 846 | ||
| 829 | DPRINTK("returning %d", status); | 847 | pr_debug("returning %d\n", status); |
| 830 | 848 | ||
| 831 | status = put_user(status, p); | 849 | status = put_user(status, p); |
| 832 | 850 | ||
| @@ -834,9 +852,9 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
| 834 | } | 852 | } |
| 835 | 853 | ||
| 836 | /* Identify autofs4_dentries - this is so we can tell if there's | 854 | /* Identify autofs4_dentries - this is so we can tell if there's |
| 837 | an extra dentry refcount or not. We only hold a refcount on the | 855 | * an extra dentry refcount or not. We only hold a refcount on the |
| 838 | dentry if its non-negative (ie, d_inode != NULL) | 856 | * dentry if its non-negative (ie, d_inode != NULL) |
| 839 | */ | 857 | */ |
| 840 | int is_autofs4_dentry(struct dentry *dentry) | 858 | int is_autofs4_dentry(struct dentry *dentry) |
| 841 | { | 859 | { |
| 842 | return dentry && d_really_is_positive(dentry) && | 860 | return dentry && d_really_is_positive(dentry) && |
| @@ -854,21 +872,21 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
| 854 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); | 872 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); |
| 855 | void __user *p = (void __user *)arg; | 873 | void __user *p = (void __user *)arg; |
| 856 | 874 | ||
| 857 | DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", | 875 | pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", |
| 858 | cmd,arg,sbi,task_pgrp_nr(current)); | 876 | cmd, arg, sbi, task_pgrp_nr(current)); |
| 859 | 877 | ||
| 860 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || | 878 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || |
| 861 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) | 879 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) |
| 862 | return -ENOTTY; | 880 | return -ENOTTY; |
| 863 | 881 | ||
| 864 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 882 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
| 865 | return -EPERM; | 883 | return -EPERM; |
| 866 | 884 | ||
| 867 | switch(cmd) { | 885 | switch (cmd) { |
| 868 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ | 886 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ |
| 869 | return autofs4_wait_release(sbi,(autofs_wqt_t)arg,0); | 887 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0); |
| 870 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ | 888 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ |
| 871 | return autofs4_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT); | 889 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT); |
| 872 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ | 890 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ |
| 873 | autofs4_catatonic_mode(sbi); | 891 | autofs4_catatonic_mode(sbi); |
| 874 | return 0; | 892 | return 0; |
| @@ -888,13 +906,15 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
| 888 | 906 | ||
| 889 | /* return a single thing to expire */ | 907 | /* return a single thing to expire */ |
| 890 | case AUTOFS_IOC_EXPIRE: | 908 | case AUTOFS_IOC_EXPIRE: |
| 891 | return autofs4_expire_run(inode->i_sb,filp->f_path.mnt,sbi, p); | 909 | return autofs4_expire_run(inode->i_sb, |
| 910 | filp->f_path.mnt, sbi, p); | ||
| 892 | /* same as above, but can send multiple expires through pipe */ | 911 | /* same as above, but can send multiple expires through pipe */ |
| 893 | case AUTOFS_IOC_EXPIRE_MULTI: | 912 | case AUTOFS_IOC_EXPIRE_MULTI: |
| 894 | return autofs4_expire_multi(inode->i_sb,filp->f_path.mnt,sbi, p); | 913 | return autofs4_expire_multi(inode->i_sb, |
| 914 | filp->f_path.mnt, sbi, p); | ||
| 895 | 915 | ||
| 896 | default: | 916 | default: |
| 897 | return -ENOSYS; | 917 | return -EINVAL; |
| 898 | } | 918 | } |
| 899 | } | 919 | } |
| 900 | 920 | ||
| @@ -902,12 +922,13 @@ static long autofs4_root_ioctl(struct file *filp, | |||
| 902 | unsigned int cmd, unsigned long arg) | 922 | unsigned int cmd, unsigned long arg) |
| 903 | { | 923 | { |
| 904 | struct inode *inode = file_inode(filp); | 924 | struct inode *inode = file_inode(filp); |
| 925 | |||
| 905 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 926 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
| 906 | } | 927 | } |
| 907 | 928 | ||
| 908 | #ifdef CONFIG_COMPAT | 929 | #ifdef CONFIG_COMPAT |
| 909 | static long autofs4_root_compat_ioctl(struct file *filp, | 930 | static long autofs4_root_compat_ioctl(struct file *filp, |
| 910 | unsigned int cmd, unsigned long arg) | 931 | unsigned int cmd, unsigned long arg) |
| 911 | { | 932 | { |
| 912 | struct inode *inode = file_inode(filp); | 933 | struct inode *inode = file_inode(filp); |
| 913 | int ret; | 934 | int ret; |
| @@ -916,7 +937,7 @@ static long autofs4_root_compat_ioctl(struct file *filp, | |||
| 916 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 937 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
| 917 | else | 938 | else |
| 918 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | 939 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, |
| 919 | (unsigned long)compat_ptr(arg)); | 940 | (unsigned long) compat_ptr(arg)); |
| 920 | 941 | ||
| 921 | return ret; | 942 | return ret; |
| 922 | } | 943 | } |
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index 84e037d1d129..99aab00dc217 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c | |||
| @@ -1,14 +1,10 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/symlink.c | ||
| 4 | * | ||
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * | 3 | * |
| 7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
| 8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
| 9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
| 10 | * | 7 | */ |
| 11 | * ------------------------------------------------------------------------- */ | ||
| 12 | 8 | ||
| 13 | #include "autofs_i.h" | 9 | #include "autofs_i.h" |
| 14 | 10 | ||
| @@ -18,6 +14,7 @@ static const char *autofs4_get_link(struct dentry *dentry, | |||
| 18 | { | 14 | { |
| 19 | struct autofs_sb_info *sbi; | 15 | struct autofs_sb_info *sbi; |
| 20 | struct autofs_info *ino; | 16 | struct autofs_info *ino; |
| 17 | |||
| 21 | if (!dentry) | 18 | if (!dentry) |
| 22 | return ERR_PTR(-ECHILD); | 19 | return ERR_PTR(-ECHILD); |
| 23 | sbi = autofs4_sbi(dentry->d_sb); | 20 | sbi = autofs4_sbi(dentry->d_sb); |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35b755e79c2d..0146d911f468 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
| @@ -1,15 +1,11 @@ | |||
| 1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
| 3 | * linux/fs/autofs/waitq.c | 3 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
| 4 | * | ||
| 5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
| 6 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
| 7 | * | 4 | * |
| 8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
| 9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
| 10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
| 11 | * | 8 | */ |
| 12 | * ------------------------------------------------------------------------- */ | ||
| 13 | 9 | ||
| 14 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
| 15 | #include <linux/time.h> | 11 | #include <linux/time.h> |
| @@ -18,7 +14,8 @@ | |||
| 18 | #include "autofs_i.h" | 14 | #include "autofs_i.h" |
| 19 | 15 | ||
| 20 | /* We make this a static variable rather than a part of the superblock; it | 16 | /* We make this a static variable rather than a part of the superblock; it |
| 21 | is better if we don't reassign numbers easily even across filesystems */ | 17 | * is better if we don't reassign numbers easily even across filesystems |
| 18 | */ | ||
| 22 | static autofs_wqt_t autofs4_next_wait_queue = 1; | 19 | static autofs_wqt_t autofs4_next_wait_queue = 1; |
| 23 | 20 | ||
| 24 | /* These are the signals we allow interrupting a pending mount */ | 21 | /* These are the signals we allow interrupting a pending mount */ |
| @@ -34,7 +31,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | |||
| 34 | return; | 31 | return; |
| 35 | } | 32 | } |
| 36 | 33 | ||
| 37 | DPRINTK("entering catatonic mode"); | 34 | pr_debug("entering catatonic mode\n"); |
| 38 | 35 | ||
| 39 | sbi->catatonic = 1; | 36 | sbi->catatonic = 1; |
| 40 | wq = sbi->queues; | 37 | wq = sbi->queues; |
| @@ -69,17 +66,19 @@ static int autofs4_write(struct autofs_sb_info *sbi, | |||
| 69 | set_fs(KERNEL_DS); | 66 | set_fs(KERNEL_DS); |
| 70 | 67 | ||
| 71 | mutex_lock(&sbi->pipe_mutex); | 68 | mutex_lock(&sbi->pipe_mutex); |
| 72 | while (bytes && | 69 | wr = __vfs_write(file, data, bytes, &file->f_pos); |
| 73 | (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) { | 70 | while (bytes && wr) { |
| 74 | data += wr; | 71 | data += wr; |
| 75 | bytes -= wr; | 72 | bytes -= wr; |
| 73 | wr = __vfs_write(file, data, bytes, &file->f_pos); | ||
| 76 | } | 74 | } |
| 77 | mutex_unlock(&sbi->pipe_mutex); | 75 | mutex_unlock(&sbi->pipe_mutex); |
| 78 | 76 | ||
| 79 | set_fs(fs); | 77 | set_fs(fs); |
| 80 | 78 | ||
| 81 | /* Keep the currently executing process from receiving a | 79 | /* Keep the currently executing process from receiving a |
| 82 | SIGPIPE unless it was already supposed to get one */ | 80 | * SIGPIPE unless it was already supposed to get one |
| 81 | */ | ||
| 83 | if (wr == -EPIPE && !sigpipe) { | 82 | if (wr == -EPIPE && !sigpipe) { |
| 84 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 83 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
| 85 | sigdelset(¤t->pending.signal, SIGPIPE); | 84 | sigdelset(¤t->pending.signal, SIGPIPE); |
| @@ -89,7 +88,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, | |||
| 89 | 88 | ||
| 90 | return (bytes > 0); | 89 | return (bytes > 0); |
| 91 | } | 90 | } |
| 92 | 91 | ||
| 93 | static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | 92 | static void autofs4_notify_daemon(struct autofs_sb_info *sbi, |
| 94 | struct autofs_wait_queue *wq, | 93 | struct autofs_wait_queue *wq, |
| 95 | int type) | 94 | int type) |
| @@ -102,10 +101,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
| 102 | struct file *pipe = NULL; | 101 | struct file *pipe = NULL; |
| 103 | size_t pktsz; | 102 | size_t pktsz; |
| 104 | 103 | ||
| 105 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", | 104 | pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", |
| 106 | (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); | 105 | (unsigned long) wq->wait_queue_token, |
| 106 | wq->name.len, wq->name.name, type); | ||
| 107 | 107 | ||
| 108 | memset(&pkt,0,sizeof pkt); /* For security reasons */ | 108 | memset(&pkt, 0, sizeof(pkt)); /* For security reasons */ |
| 109 | 109 | ||
| 110 | pkt.hdr.proto_version = sbi->version; | 110 | pkt.hdr.proto_version = sbi->version; |
| 111 | pkt.hdr.type = type; | 111 | pkt.hdr.type = type; |
| @@ -126,7 +126,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
| 126 | } | 126 | } |
| 127 | case autofs_ptype_expire_multi: | 127 | case autofs_ptype_expire_multi: |
| 128 | { | 128 | { |
| 129 | struct autofs_packet_expire_multi *ep = &pkt.v4_pkt.expire_multi; | 129 | struct autofs_packet_expire_multi *ep = |
| 130 | &pkt.v4_pkt.expire_multi; | ||
| 130 | 131 | ||
| 131 | pktsz = sizeof(*ep); | 132 | pktsz = sizeof(*ep); |
| 132 | 133 | ||
| @@ -163,7 +164,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
| 163 | break; | 164 | break; |
| 164 | } | 165 | } |
| 165 | default: | 166 | default: |
| 166 | printk("autofs4_notify_daemon: bad type %d!\n", type); | 167 | pr_warn("bad type %d!\n", type); |
| 167 | mutex_unlock(&sbi->wq_mutex); | 168 | mutex_unlock(&sbi->wq_mutex); |
| 168 | return; | 169 | return; |
| 169 | } | 170 | } |
| @@ -231,7 +232,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr) | |||
| 231 | if (wq->name.hash == qstr->hash && | 232 | if (wq->name.hash == qstr->hash && |
| 232 | wq->name.len == qstr->len && | 233 | wq->name.len == qstr->len && |
| 233 | wq->name.name && | 234 | wq->name.name && |
| 234 | !memcmp(wq->name.name, qstr->name, qstr->len)) | 235 | !memcmp(wq->name.name, qstr->name, qstr->len)) |
| 235 | break; | 236 | break; |
| 236 | } | 237 | } |
| 237 | return wq; | 238 | return wq; |
| @@ -248,7 +249,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr) | |||
| 248 | static int validate_request(struct autofs_wait_queue **wait, | 249 | static int validate_request(struct autofs_wait_queue **wait, |
| 249 | struct autofs_sb_info *sbi, | 250 | struct autofs_sb_info *sbi, |
| 250 | struct qstr *qstr, | 251 | struct qstr *qstr, |
| 251 | struct dentry*dentry, enum autofs_notify notify) | 252 | struct dentry *dentry, enum autofs_notify notify) |
| 252 | { | 253 | { |
| 253 | struct autofs_wait_queue *wq; | 254 | struct autofs_wait_queue *wq; |
| 254 | struct autofs_info *ino; | 255 | struct autofs_info *ino; |
| @@ -322,8 +323,10 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
| 322 | * continue on and create a new request. | 323 | * continue on and create a new request. |
| 323 | */ | 324 | */ |
| 324 | if (!IS_ROOT(dentry)) { | 325 | if (!IS_ROOT(dentry)) { |
| 325 | if (d_really_is_positive(dentry) && d_unhashed(dentry)) { | 326 | if (d_unhashed(dentry) && |
| 327 | d_really_is_positive(dentry)) { | ||
| 326 | struct dentry *parent = dentry->d_parent; | 328 | struct dentry *parent = dentry->d_parent; |
| 329 | |||
| 327 | new = d_lookup(parent, &dentry->d_name); | 330 | new = d_lookup(parent, &dentry->d_name); |
| 328 | if (new) | 331 | if (new) |
| 329 | dentry = new; | 332 | dentry = new; |
| @@ -340,8 +343,8 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
| 340 | return 1; | 343 | return 1; |
| 341 | } | 344 | } |
| 342 | 345 | ||
| 343 | int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | 346 | int autofs4_wait(struct autofs_sb_info *sbi, |
| 344 | enum autofs_notify notify) | 347 | struct dentry *dentry, enum autofs_notify notify) |
| 345 | { | 348 | { |
| 346 | struct autofs_wait_queue *wq; | 349 | struct autofs_wait_queue *wq; |
| 347 | struct qstr qstr; | 350 | struct qstr qstr; |
| @@ -411,7 +414,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 411 | 414 | ||
| 412 | if (!wq) { | 415 | if (!wq) { |
| 413 | /* Create a new wait queue */ | 416 | /* Create a new wait queue */ |
| 414 | wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); | 417 | wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL); |
| 415 | if (!wq) { | 418 | if (!wq) { |
| 416 | kfree(qstr.name); | 419 | kfree(qstr.name); |
| 417 | mutex_unlock(&sbi->wq_mutex); | 420 | mutex_unlock(&sbi->wq_mutex); |
| @@ -450,17 +453,19 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 450 | autofs_ptype_expire_indirect; | 453 | autofs_ptype_expire_indirect; |
| 451 | } | 454 | } |
| 452 | 455 | ||
| 453 | DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", | 456 | pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", |
| 454 | (unsigned long) wq->wait_queue_token, wq->name.len, | 457 | (unsigned long) wq->wait_queue_token, wq->name.len, |
| 455 | wq->name.name, notify); | 458 | wq->name.name, notify); |
| 456 | 459 | ||
| 457 | /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */ | 460 | /* |
| 461 | * autofs4_notify_daemon() may block; it will unlock ->wq_mutex | ||
| 462 | */ | ||
| 458 | autofs4_notify_daemon(sbi, wq, type); | 463 | autofs4_notify_daemon(sbi, wq, type); |
| 459 | } else { | 464 | } else { |
| 460 | wq->wait_ctr++; | 465 | wq->wait_ctr++; |
| 461 | DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", | 466 | pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", |
| 462 | (unsigned long) wq->wait_queue_token, wq->name.len, | 467 | (unsigned long) wq->wait_queue_token, wq->name.len, |
| 463 | wq->name.name, notify); | 468 | wq->name.name, notify); |
| 464 | mutex_unlock(&sbi->wq_mutex); | 469 | mutex_unlock(&sbi->wq_mutex); |
| 465 | kfree(qstr.name); | 470 | kfree(qstr.name); |
| 466 | } | 471 | } |
| @@ -471,12 +476,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 471 | */ | 476 | */ |
| 472 | if (wq->name.name) { | 477 | if (wq->name.name) { |
| 473 | /* Block all but "shutdown" signals while waiting */ | 478 | /* Block all but "shutdown" signals while waiting */ |
| 474 | sigset_t oldset; | 479 | unsigned long shutdown_sigs_mask; |
| 475 | unsigned long irqflags; | 480 | unsigned long irqflags; |
| 481 | sigset_t oldset; | ||
| 476 | 482 | ||
| 477 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | 483 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); |
| 478 | oldset = current->blocked; | 484 | oldset = current->blocked; |
| 479 | siginitsetinv(¤t->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]); | 485 | shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0]; |
| 486 | siginitsetinv(¤t->blocked, shutdown_sigs_mask); | ||
| 480 | recalc_sigpending(); | 487 | recalc_sigpending(); |
| 481 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | 488 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); |
| 482 | 489 | ||
| @@ -487,7 +494,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 487 | recalc_sigpending(); | 494 | recalc_sigpending(); |
| 488 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | 495 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); |
| 489 | } else { | 496 | } else { |
| 490 | DPRINTK("skipped sleeping"); | 497 | pr_debug("skipped sleeping\n"); |
| 491 | } | 498 | } |
| 492 | 499 | ||
| 493 | status = wq->status; | 500 | status = wq->status; |
| @@ -562,4 +569,3 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok | |||
| 562 | 569 | ||
| 563 | return 0; | 570 | return 0; |
| 564 | } | 571 | } |
| 565 | |||
diff --git a/fs/buffer.c b/fs/buffer.c index e1632abb4ca9..33be29675358 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -621,17 +621,17 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
| 621 | * If warn is true, then emit a warning if the page is not uptodate and has | 621 | * If warn is true, then emit a warning if the page is not uptodate and has |
| 622 | * not been truncated. | 622 | * not been truncated. |
| 623 | * | 623 | * |
| 624 | * The caller must hold mem_cgroup_begin_page_stat() lock. | 624 | * The caller must hold lock_page_memcg(). |
| 625 | */ | 625 | */ |
| 626 | static void __set_page_dirty(struct page *page, struct address_space *mapping, | 626 | static void __set_page_dirty(struct page *page, struct address_space *mapping, |
| 627 | struct mem_cgroup *memcg, int warn) | 627 | int warn) |
| 628 | { | 628 | { |
| 629 | unsigned long flags; | 629 | unsigned long flags; |
| 630 | 630 | ||
| 631 | spin_lock_irqsave(&mapping->tree_lock, flags); | 631 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 632 | if (page->mapping) { /* Race with truncate? */ | 632 | if (page->mapping) { /* Race with truncate? */ |
| 633 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 633 | WARN_ON_ONCE(warn && !PageUptodate(page)); |
| 634 | account_page_dirtied(page, mapping, memcg); | 634 | account_page_dirtied(page, mapping); |
| 635 | radix_tree_tag_set(&mapping->page_tree, | 635 | radix_tree_tag_set(&mapping->page_tree, |
| 636 | page_index(page), PAGECACHE_TAG_DIRTY); | 636 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 637 | } | 637 | } |
| @@ -666,7 +666,6 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, | |||
| 666 | int __set_page_dirty_buffers(struct page *page) | 666 | int __set_page_dirty_buffers(struct page *page) |
| 667 | { | 667 | { |
| 668 | int newly_dirty; | 668 | int newly_dirty; |
| 669 | struct mem_cgroup *memcg; | ||
| 670 | struct address_space *mapping = page_mapping(page); | 669 | struct address_space *mapping = page_mapping(page); |
| 671 | 670 | ||
| 672 | if (unlikely(!mapping)) | 671 | if (unlikely(!mapping)) |
| @@ -683,17 +682,17 @@ int __set_page_dirty_buffers(struct page *page) | |||
| 683 | } while (bh != head); | 682 | } while (bh != head); |
| 684 | } | 683 | } |
| 685 | /* | 684 | /* |
| 686 | * Use mem_group_begin_page_stat() to keep PageDirty synchronized with | 685 | * Lock out page->mem_cgroup migration to keep PageDirty |
| 687 | * per-memcg dirty page counters. | 686 | * synchronized with per-memcg dirty page counters. |
| 688 | */ | 687 | */ |
| 689 | memcg = mem_cgroup_begin_page_stat(page); | 688 | lock_page_memcg(page); |
| 690 | newly_dirty = !TestSetPageDirty(page); | 689 | newly_dirty = !TestSetPageDirty(page); |
| 691 | spin_unlock(&mapping->private_lock); | 690 | spin_unlock(&mapping->private_lock); |
| 692 | 691 | ||
| 693 | if (newly_dirty) | 692 | if (newly_dirty) |
| 694 | __set_page_dirty(page, mapping, memcg, 1); | 693 | __set_page_dirty(page, mapping, 1); |
| 695 | 694 | ||
| 696 | mem_cgroup_end_page_stat(memcg); | 695 | unlock_page_memcg(page); |
| 697 | 696 | ||
| 698 | if (newly_dirty) | 697 | if (newly_dirty) |
| 699 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 698 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
| @@ -1167,15 +1166,14 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
| 1167 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { |
| 1168 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; |
| 1169 | struct address_space *mapping = NULL; | 1168 | struct address_space *mapping = NULL; |
| 1170 | struct mem_cgroup *memcg; | ||
| 1171 | 1169 | ||
| 1172 | memcg = mem_cgroup_begin_page_stat(page); | 1170 | lock_page_memcg(page); |
| 1173 | if (!TestSetPageDirty(page)) { | 1171 | if (!TestSetPageDirty(page)) { |
| 1174 | mapping = page_mapping(page); | 1172 | mapping = page_mapping(page); |
| 1175 | if (mapping) | 1173 | if (mapping) |
| 1176 | __set_page_dirty(page, mapping, memcg, 0); | 1174 | __set_page_dirty(page, mapping, 0); |
| 1177 | } | 1175 | } |
| 1178 | mem_cgroup_end_page_stat(memcg); | 1176 | unlock_page_memcg(page); |
| 1179 | if (mapping) | 1177 | if (mapping) |
| 1180 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1178 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
| 1181 | } | 1179 | } |
diff --git a/fs/mpage.c b/fs/mpage.c index 1480d3a18037..6bd9fd90964e 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
| 25 | #include <linux/prefetch.h> | 25 | #include <linux/prefetch.h> |
| 26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
| 27 | #include <linux/mm_inline.h> | ||
| 27 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 28 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
| 29 | #include <linux/pagevec.h> | 30 | #include <linux/pagevec.h> |
| @@ -366,7 +367,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
| 366 | map_bh.b_state = 0; | 367 | map_bh.b_state = 0; |
| 367 | map_bh.b_size = 0; | 368 | map_bh.b_size = 0; |
| 368 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 369 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
| 369 | struct page *page = list_entry(pages->prev, struct page, lru); | 370 | struct page *page = lru_to_page(pages); |
| 370 | 371 | ||
| 371 | prefetchw(&page->flags); | 372 | prefetchw(&page->flags); |
| 372 | list_del(&page->lru); | 373 | list_del(&page->lru); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a76b9ea7722e..ef6a2ec494de 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -287,7 +287,6 @@ struct o2hb_bio_wait_ctxt { | |||
| 287 | static void o2hb_write_timeout(struct work_struct *work) | 287 | static void o2hb_write_timeout(struct work_struct *work) |
| 288 | { | 288 | { |
| 289 | int failed, quorum; | 289 | int failed, quorum; |
| 290 | unsigned long flags; | ||
| 291 | struct o2hb_region *reg = | 290 | struct o2hb_region *reg = |
| 292 | container_of(work, struct o2hb_region, | 291 | container_of(work, struct o2hb_region, |
| 293 | hr_write_timeout_work.work); | 292 | hr_write_timeout_work.work); |
| @@ -297,14 +296,14 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
| 297 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 296 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
| 298 | 297 | ||
| 299 | if (o2hb_global_heartbeat_active()) { | 298 | if (o2hb_global_heartbeat_active()) { |
| 300 | spin_lock_irqsave(&o2hb_live_lock, flags); | 299 | spin_lock(&o2hb_live_lock); |
| 301 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | 300 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) |
| 302 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | 301 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); |
| 303 | failed = bitmap_weight(o2hb_failed_region_bitmap, | 302 | failed = bitmap_weight(o2hb_failed_region_bitmap, |
| 304 | O2NM_MAX_REGIONS); | 303 | O2NM_MAX_REGIONS); |
| 305 | quorum = bitmap_weight(o2hb_quorum_region_bitmap, | 304 | quorum = bitmap_weight(o2hb_quorum_region_bitmap, |
| 306 | O2NM_MAX_REGIONS); | 305 | O2NM_MAX_REGIONS); |
| 307 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | 306 | spin_unlock(&o2hb_live_lock); |
| 308 | 307 | ||
| 309 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | 308 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", |
| 310 | quorum, failed); | 309 | quorum, failed); |
| @@ -2425,11 +2424,10 @@ EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); | |||
| 2425 | int o2hb_check_node_heartbeating_no_sem(u8 node_num) | 2424 | int o2hb_check_node_heartbeating_no_sem(u8 node_num) |
| 2426 | { | 2425 | { |
| 2427 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 2426 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 2428 | unsigned long flags; | ||
| 2429 | 2427 | ||
| 2430 | spin_lock_irqsave(&o2hb_live_lock, flags); | 2428 | spin_lock(&o2hb_live_lock); |
| 2431 | o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); | 2429 | o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); |
| 2432 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | 2430 | spin_unlock(&o2hb_live_lock); |
| 2433 | if (!test_bit(node_num, testing_map)) { | 2431 | if (!test_bit(node_num, testing_map)) { |
| 2434 | mlog(ML_HEARTBEAT, | 2432 | mlog(ML_HEARTBEAT, |
| 2435 | "node (%u) does not have heartbeating enabled.\n", | 2433 | "node (%u) does not have heartbeating enabled.\n", |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 68c607e63ff6..004f2cbe8f71 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -282,6 +282,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
| 282 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 | 282 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 |
| 283 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 | 283 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 |
| 284 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 | 284 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 |
| 285 | #define DLM_LOCK_RES_RECOVERY_WAITING 0x00004000 | ||
| 285 | 286 | ||
| 286 | /* max milliseconds to wait to sync up a network failure with a node death */ | 287 | /* max milliseconds to wait to sync up a network failure with a node death */ |
| 287 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | 288 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) |
| @@ -451,6 +452,7 @@ enum { | |||
| 451 | DLM_QUERY_REGION = 519, | 452 | DLM_QUERY_REGION = 519, |
| 452 | DLM_QUERY_NODEINFO = 520, | 453 | DLM_QUERY_NODEINFO = 520, |
| 453 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, | 454 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, |
| 455 | DLM_DEREF_LOCKRES_DONE = 522, | ||
| 454 | }; | 456 | }; |
| 455 | 457 | ||
| 456 | struct dlm_reco_node_data | 458 | struct dlm_reco_node_data |
| @@ -545,7 +547,7 @@ struct dlm_master_requery | |||
| 545 | * }; | 547 | * }; |
| 546 | * | 548 | * |
| 547 | * from ../cluster/tcp.h | 549 | * from ../cluster/tcp.h |
| 548 | * NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) | 550 | * O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) |
| 549 | * (roughly 4080 bytes) | 551 | * (roughly 4080 bytes) |
| 550 | * and sizeof(dlm_migratable_lockres) = 112 bytes | 552 | * and sizeof(dlm_migratable_lockres) = 112 bytes |
| 551 | * and sizeof(dlm_migratable_lock) = 16 bytes | 553 | * and sizeof(dlm_migratable_lock) = 16 bytes |
| @@ -586,7 +588,7 @@ struct dlm_migratable_lockres | |||
| 586 | 588 | ||
| 587 | /* from above, 128 bytes | 589 | /* from above, 128 bytes |
| 588 | * for some undetermined future use */ | 590 | * for some undetermined future use */ |
| 589 | #define DLM_MIG_LOCKRES_RESERVED (NET_MAX_PAYLOAD_BYTES - \ | 591 | #define DLM_MIG_LOCKRES_RESERVED (O2NET_MAX_PAYLOAD_BYTES - \ |
| 590 | DLM_MIG_LOCKRES_MAX_LEN) | 592 | DLM_MIG_LOCKRES_MAX_LEN) |
| 591 | 593 | ||
| 592 | struct dlm_create_lock | 594 | struct dlm_create_lock |
| @@ -782,6 +784,20 @@ struct dlm_deref_lockres | |||
| 782 | u8 name[O2NM_MAX_NAME_LEN]; | 784 | u8 name[O2NM_MAX_NAME_LEN]; |
| 783 | }; | 785 | }; |
| 784 | 786 | ||
| 787 | enum { | ||
| 788 | DLM_DEREF_RESPONSE_DONE = 0, | ||
| 789 | DLM_DEREF_RESPONSE_INPROG = 1, | ||
| 790 | }; | ||
| 791 | |||
| 792 | struct dlm_deref_lockres_done { | ||
| 793 | u32 pad1; | ||
| 794 | u16 pad2; | ||
| 795 | u8 node_idx; | ||
| 796 | u8 namelen; | ||
| 797 | |||
| 798 | u8 name[O2NM_MAX_NAME_LEN]; | ||
| 799 | }; | ||
| 800 | |||
| 785 | static inline enum dlm_status | 801 | static inline enum dlm_status |
| 786 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | 802 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) |
| 787 | { | 803 | { |
| @@ -789,7 +805,8 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | |||
| 789 | 805 | ||
| 790 | assert_spin_locked(&res->spinlock); | 806 | assert_spin_locked(&res->spinlock); |
| 791 | 807 | ||
| 792 | if (res->state & DLM_LOCK_RES_RECOVERING) | 808 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
| 809 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
| 793 | status = DLM_RECOVERING; | 810 | status = DLM_RECOVERING; |
| 794 | else if (res->state & DLM_LOCK_RES_MIGRATING) | 811 | else if (res->state & DLM_LOCK_RES_MIGRATING) |
| 795 | status = DLM_MIGRATING; | 812 | status = DLM_MIGRATING; |
| @@ -968,6 +985,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 968 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); | 985 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); |
| 969 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 986 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
| 970 | void **ret_data); | 987 | void **ret_data); |
| 988 | int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
| 989 | void **ret_data); | ||
| 971 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | 990 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
| 972 | void **ret_data); | 991 | void **ret_data); |
| 973 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 992 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
| @@ -1009,6 +1028,7 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
| 1009 | { | 1028 | { |
| 1010 | __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS| | 1029 | __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS| |
| 1011 | DLM_LOCK_RES_RECOVERING| | 1030 | DLM_LOCK_RES_RECOVERING| |
| 1031 | DLM_LOCK_RES_RECOVERY_WAITING| | ||
| 1012 | DLM_LOCK_RES_MIGRATING)); | 1032 | DLM_LOCK_RES_MIGRATING)); |
| 1013 | } | 1033 | } |
| 1014 | 1034 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2ee7fe747cea..12e064b8be9a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -132,10 +132,13 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
| 132 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | 132 | * - Message DLM_QUERY_NODEINFO added to allow online node removes |
| 133 | * New in version 1.2: | 133 | * New in version 1.2: |
| 134 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain | 134 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain |
| 135 | * New in version 1.3: | ||
| 136 | * - Message DLM_DEREF_LOCKRES_DONE added to inform non-master that the | ||
| 137 | * refmap is cleared | ||
| 135 | */ | 138 | */ |
| 136 | static const struct dlm_protocol_version dlm_protocol = { | 139 | static const struct dlm_protocol_version dlm_protocol = { |
| 137 | .pv_major = 1, | 140 | .pv_major = 1, |
| 138 | .pv_minor = 2, | 141 | .pv_minor = 3, |
| 139 | }; | 142 | }; |
| 140 | 143 | ||
| 141 | #define DLM_DOMAIN_BACKOFF_MS 200 | 144 | #define DLM_DOMAIN_BACKOFF_MS 200 |
| @@ -1396,7 +1399,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm, | |||
| 1396 | unsigned int map_size) | 1399 | unsigned int map_size) |
| 1397 | { | 1400 | { |
| 1398 | int status, tmpstat; | 1401 | int status, tmpstat; |
| 1399 | unsigned int node; | 1402 | int node; |
| 1400 | 1403 | ||
| 1401 | if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * | 1404 | if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * |
| 1402 | sizeof(unsigned long))) { | 1405 | sizeof(unsigned long))) { |
| @@ -1853,7 +1856,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
| 1853 | sizeof(struct dlm_exit_domain), | 1856 | sizeof(struct dlm_exit_domain), |
| 1854 | dlm_begin_exit_domain_handler, | 1857 | dlm_begin_exit_domain_handler, |
| 1855 | dlm, NULL, &dlm->dlm_domain_handlers); | 1858 | dlm, NULL, &dlm->dlm_domain_handlers); |
| 1859 | if (status) | ||
| 1860 | goto bail; | ||
| 1856 | 1861 | ||
| 1862 | status = o2net_register_handler(DLM_DEREF_LOCKRES_DONE, dlm->key, | ||
| 1863 | sizeof(struct dlm_deref_lockres_done), | ||
| 1864 | dlm_deref_lockres_done_handler, | ||
| 1865 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
| 1857 | bail: | 1866 | bail: |
| 1858 | if (status) | 1867 | if (status) |
| 1859 | dlm_unregister_domain_handlers(dlm); | 1868 | dlm_unregister_domain_handlers(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9477d6e1de37..9aed6e202201 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -2278,7 +2278,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
| 2278 | dlm_print_one_lock_resource(res); | 2278 | dlm_print_one_lock_resource(res); |
| 2279 | BUG(); | 2279 | BUG(); |
| 2280 | } | 2280 | } |
| 2281 | return ret; | 2281 | return ret ? ret : r; |
| 2282 | } | 2282 | } |
| 2283 | 2283 | ||
| 2284 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 2284 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
| @@ -2345,7 +2345,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 2345 | res->lockname.len, res->lockname.name, node); | 2345 | res->lockname.len, res->lockname.name, node); |
| 2346 | dlm_print_one_lock_resource(res); | 2346 | dlm_print_one_lock_resource(res); |
| 2347 | } | 2347 | } |
| 2348 | ret = 0; | 2348 | ret = DLM_DEREF_RESPONSE_DONE; |
| 2349 | goto done; | 2349 | goto done; |
| 2350 | } | 2350 | } |
| 2351 | 2351 | ||
| @@ -2365,7 +2365,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 2365 | spin_unlock(&dlm->work_lock); | 2365 | spin_unlock(&dlm->work_lock); |
| 2366 | 2366 | ||
| 2367 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); | 2367 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); |
| 2368 | return 0; | 2368 | return DLM_DEREF_RESPONSE_INPROG; |
| 2369 | 2369 | ||
| 2370 | done: | 2370 | done: |
| 2371 | if (res) | 2371 | if (res) |
| @@ -2375,6 +2375,122 @@ done: | |||
| 2375 | return ret; | 2375 | return ret; |
| 2376 | } | 2376 | } |
| 2377 | 2377 | ||
| 2378 | int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
| 2379 | void **ret_data) | ||
| 2380 | { | ||
| 2381 | struct dlm_ctxt *dlm = data; | ||
| 2382 | struct dlm_deref_lockres_done *deref | ||
| 2383 | = (struct dlm_deref_lockres_done *)msg->buf; | ||
| 2384 | struct dlm_lock_resource *res = NULL; | ||
| 2385 | char *name; | ||
| 2386 | unsigned int namelen; | ||
| 2387 | int ret = -EINVAL; | ||
| 2388 | u8 node; | ||
| 2389 | unsigned int hash; | ||
| 2390 | |||
| 2391 | if (!dlm_grab(dlm)) | ||
| 2392 | return 0; | ||
| 2393 | |||
| 2394 | name = deref->name; | ||
| 2395 | namelen = deref->namelen; | ||
| 2396 | node = deref->node_idx; | ||
| 2397 | |||
| 2398 | if (namelen > DLM_LOCKID_NAME_MAX) { | ||
| 2399 | mlog(ML_ERROR, "Invalid name length!"); | ||
| 2400 | goto done; | ||
| 2401 | } | ||
| 2402 | if (deref->node_idx >= O2NM_MAX_NODES) { | ||
| 2403 | mlog(ML_ERROR, "Invalid node number: %u\n", node); | ||
| 2404 | goto done; | ||
| 2405 | } | ||
| 2406 | |||
| 2407 | hash = dlm_lockid_hash(name, namelen); | ||
| 2408 | |||
| 2409 | spin_lock(&dlm->spinlock); | ||
| 2410 | res = __dlm_lookup_lockres_full(dlm, name, namelen, hash); | ||
| 2411 | if (!res) { | ||
| 2412 | spin_unlock(&dlm->spinlock); | ||
| 2413 | mlog(ML_ERROR, "%s:%.*s: bad lockres name\n", | ||
| 2414 | dlm->name, namelen, name); | ||
| 2415 | goto done; | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | spin_lock(&res->spinlock); | ||
| 2419 | BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); | ||
| 2420 | if (!list_empty(&res->purge)) { | ||
| 2421 | mlog(0, "%s: Removing res %.*s from purgelist\n", | ||
| 2422 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 2423 | list_del_init(&res->purge); | ||
| 2424 | dlm_lockres_put(res); | ||
| 2425 | dlm->purge_count--; | ||
| 2426 | } | ||
| 2427 | |||
| 2428 | if (!__dlm_lockres_unused(res)) { | ||
| 2429 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", | ||
| 2430 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 2431 | __dlm_print_one_lock_resource(res); | ||
| 2432 | BUG(); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | __dlm_unhash_lockres(dlm, res); | ||
| 2436 | |||
| 2437 | spin_lock(&dlm->track_lock); | ||
| 2438 | if (!list_empty(&res->tracking)) | ||
| 2439 | list_del_init(&res->tracking); | ||
| 2440 | else { | ||
| 2441 | mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n", | ||
| 2442 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 2443 | __dlm_print_one_lock_resource(res); | ||
| 2444 | } | ||
| 2445 | spin_unlock(&dlm->track_lock); | ||
| 2446 | |||
| 2447 | /* lockres is not in the hash now. drop the flag and wake up | ||
| 2448 | * any processes waiting in dlm_get_lock_resource. | ||
| 2449 | */ | ||
| 2450 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
| 2451 | spin_unlock(&res->spinlock); | ||
| 2452 | wake_up(&res->wq); | ||
| 2453 | |||
| 2454 | dlm_lockres_put(res); | ||
| 2455 | |||
| 2456 | spin_unlock(&dlm->spinlock); | ||
| 2457 | |||
| 2458 | done: | ||
| 2459 | dlm_put(dlm); | ||
| 2460 | return ret; | ||
| 2461 | } | ||
| 2462 | |||
| 2463 | static void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm, | ||
| 2464 | struct dlm_lock_resource *res, u8 node) | ||
| 2465 | { | ||
| 2466 | struct dlm_deref_lockres_done deref; | ||
| 2467 | int ret = 0, r; | ||
| 2468 | const char *lockname; | ||
| 2469 | unsigned int namelen; | ||
| 2470 | |||
| 2471 | lockname = res->lockname.name; | ||
| 2472 | namelen = res->lockname.len; | ||
| 2473 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | ||
| 2474 | |||
| 2475 | memset(&deref, 0, sizeof(deref)); | ||
| 2476 | deref.node_idx = dlm->node_num; | ||
| 2477 | deref.namelen = namelen; | ||
| 2478 | memcpy(deref.name, lockname, namelen); | ||
| 2479 | |||
| 2480 | ret = o2net_send_message(DLM_DEREF_LOCKRES_DONE, dlm->key, | ||
| 2481 | &deref, sizeof(deref), node, &r); | ||
| 2482 | if (ret < 0) { | ||
| 2483 | mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF DONE " | ||
| 2484 | " to node %u\n", dlm->name, namelen, | ||
| 2485 | lockname, ret, node); | ||
| 2486 | } else if (r < 0) { | ||
| 2487 | /* ignore the error */ | ||
| 2488 | mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", | ||
| 2489 | dlm->name, namelen, lockname, node, r); | ||
| 2490 | dlm_print_one_lock_resource(res); | ||
| 2491 | } | ||
| 2492 | } | ||
| 2493 | |||
| 2378 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | 2494 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) |
| 2379 | { | 2495 | { |
| 2380 | struct dlm_ctxt *dlm; | 2496 | struct dlm_ctxt *dlm; |
| @@ -2395,6 +2511,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
| 2395 | } | 2511 | } |
| 2396 | spin_unlock(&res->spinlock); | 2512 | spin_unlock(&res->spinlock); |
| 2397 | 2513 | ||
| 2514 | dlm_drop_lockres_ref_done(dlm, res, node); | ||
| 2515 | |||
| 2398 | if (cleared) { | 2516 | if (cleared) { |
| 2399 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", | 2517 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", |
| 2400 | dlm->name, res->lockname.len, res->lockname.name, node); | 2518 | dlm->name, res->lockname.len, res->lockname.name, node); |
| @@ -2432,7 +2550,8 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
| 2432 | return 0; | 2550 | return 0; |
| 2433 | 2551 | ||
| 2434 | /* delay migration when the lockres is in RECOCERING state */ | 2552 | /* delay migration when the lockres is in RECOCERING state */ |
| 2435 | if (res->state & DLM_LOCK_RES_RECOVERING) | 2553 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
| 2554 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
| 2436 | return 0; | 2555 | return 0; |
| 2437 | 2556 | ||
| 2438 | if (res->owner != dlm->node_num) | 2557 | if (res->owner != dlm->node_num) |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index b94a425f0175..cd38488a10fc 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -1403,12 +1403,24 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 1403 | * and RECOVERY flag changed when it completes. */ | 1403 | * and RECOVERY flag changed when it completes. */ |
| 1404 | hash = dlm_lockid_hash(mres->lockname, mres->lockname_len); | 1404 | hash = dlm_lockid_hash(mres->lockname, mres->lockname_len); |
| 1405 | spin_lock(&dlm->spinlock); | 1405 | spin_lock(&dlm->spinlock); |
| 1406 | res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len, | 1406 | res = __dlm_lookup_lockres_full(dlm, mres->lockname, mres->lockname_len, |
| 1407 | hash); | 1407 | hash); |
| 1408 | if (res) { | 1408 | if (res) { |
| 1409 | /* this will get a ref on res */ | 1409 | /* this will get a ref on res */ |
| 1410 | /* mark it as recovering/migrating and hash it */ | 1410 | /* mark it as recovering/migrating and hash it */ |
| 1411 | spin_lock(&res->spinlock); | 1411 | spin_lock(&res->spinlock); |
| 1412 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | ||
| 1413 | mlog(0, "%s: node is attempting to migrate " | ||
| 1414 | "lockres %.*s, but marked as dropping " | ||
| 1415 | " ref!\n", dlm->name, | ||
| 1416 | mres->lockname_len, mres->lockname); | ||
| 1417 | ret = -EINVAL; | ||
| 1418 | spin_unlock(&res->spinlock); | ||
| 1419 | spin_unlock(&dlm->spinlock); | ||
| 1420 | dlm_lockres_put(res); | ||
| 1421 | goto leave; | ||
| 1422 | } | ||
| 1423 | |||
| 1412 | if (mres->flags & DLM_MRES_RECOVERY) { | 1424 | if (mres->flags & DLM_MRES_RECOVERY) { |
| 1413 | res->state |= DLM_LOCK_RES_RECOVERING; | 1425 | res->state |= DLM_LOCK_RES_RECOVERING; |
| 1414 | } else { | 1426 | } else { |
| @@ -2163,6 +2175,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 2163 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 2175 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
| 2164 | bucket = dlm_lockres_hash(dlm, i); | 2176 | bucket = dlm_lockres_hash(dlm, i); |
| 2165 | hlist_for_each_entry(res, bucket, hash_node) { | 2177 | hlist_for_each_entry(res, bucket, hash_node) { |
| 2178 | if (res->state & DLM_LOCK_RES_RECOVERY_WAITING) { | ||
| 2179 | spin_lock(&res->spinlock); | ||
| 2180 | res->state &= ~DLM_LOCK_RES_RECOVERY_WAITING; | ||
| 2181 | spin_unlock(&res->spinlock); | ||
| 2182 | wake_up(&res->wq); | ||
| 2183 | } | ||
| 2184 | |||
| 2166 | if (!(res->state & DLM_LOCK_RES_RECOVERING)) | 2185 | if (!(res->state & DLM_LOCK_RES_RECOVERING)) |
| 2167 | continue; | 2186 | continue; |
| 2168 | 2187 | ||
| @@ -2300,6 +2319,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
| 2300 | res->lockname.len, res->lockname.name, freed, dead_node); | 2319 | res->lockname.len, res->lockname.name, freed, dead_node); |
| 2301 | __dlm_print_one_lock_resource(res); | 2320 | __dlm_print_one_lock_resource(res); |
| 2302 | } | 2321 | } |
| 2322 | res->state |= DLM_LOCK_RES_RECOVERY_WAITING; | ||
| 2303 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); | 2323 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); |
| 2304 | } else if (test_bit(dead_node, res->refmap)) { | 2324 | } else if (test_bit(dead_node, res->refmap)) { |
| 2305 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | 2325 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " |
| @@ -2377,14 +2397,16 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 2377 | dlm_revalidate_lvb(dlm, res, dead_node); | 2397 | dlm_revalidate_lvb(dlm, res, dead_node); |
| 2378 | if (res->owner == dead_node) { | 2398 | if (res->owner == dead_node) { |
| 2379 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | 2399 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
| 2380 | mlog(ML_NOTICE, "%s: res %.*s, Skip " | 2400 | mlog(0, "%s:%.*s: owned by " |
| 2381 | "recovery as it is being freed\n", | 2401 | "dead node %u, this node was " |
| 2382 | dlm->name, res->lockname.len, | 2402 | "dropping its ref when it died. " |
| 2383 | res->lockname.name); | 2403 | "continue, dropping the flag.\n", |
| 2384 | } else | 2404 | dlm->name, res->lockname.len, |
| 2385 | dlm_move_lockres_to_recovery_list(dlm, | 2405 | res->lockname.name, dead_node); |
| 2386 | res); | 2406 | } |
| 2387 | 2407 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | |
| 2408 | dlm_move_lockres_to_recovery_list(dlm, | ||
| 2409 | res); | ||
| 2388 | } else if (res->owner == dlm->node_num) { | 2410 | } else if (res->owner == dlm->node_num) { |
| 2389 | dlm_free_dead_locks(dlm, res, dead_node); | 2411 | dlm_free_dead_locks(dlm, res, dead_node); |
| 2390 | __dlm_lockres_calc_usage(dlm, res); | 2412 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index c5f6c241ecd7..68d239ba0c63 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
| @@ -106,7 +106,8 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) | |||
| 106 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) | 106 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
| 107 | return 0; | 107 | return 0; |
| 108 | 108 | ||
| 109 | if (res->state & DLM_LOCK_RES_RECOVERING) | 109 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
| 110 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
| 110 | return 0; | 111 | return 0; |
| 111 | 112 | ||
| 112 | /* Another node has this resource with this node as the master */ | 113 | /* Another node has this resource with this node as the master */ |
| @@ -202,6 +203,13 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
| 202 | dlm->purge_count--; | 203 | dlm->purge_count--; |
| 203 | } | 204 | } |
| 204 | 205 | ||
| 206 | if (!master && ret != 0) { | ||
| 207 | mlog(0, "%s: deref %.*s in progress or master goes down\n", | ||
| 208 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 209 | spin_unlock(&res->spinlock); | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | |||
| 205 | if (!__dlm_lockres_unused(res)) { | 213 | if (!__dlm_lockres_unused(res)) { |
| 206 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", | 214 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", |
| 207 | dlm->name, res->lockname.len, res->lockname.name); | 215 | dlm->name, res->lockname.len, res->lockname.name); |
| @@ -700,7 +708,8 @@ static int dlm_thread(void *data) | |||
| 700 | * dirty for a short while. */ | 708 | * dirty for a short while. */ |
| 701 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); | 709 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); |
| 702 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | | 710 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | |
| 703 | DLM_LOCK_RES_RECOVERING)) { | 711 | DLM_LOCK_RES_RECOVERING | |
| 712 | DLM_LOCK_RES_RECOVERY_WAITING)) { | ||
| 704 | /* move it to the tail and keep going */ | 713 | /* move it to the tail and keep going */ |
| 705 | res->state &= ~DLM_LOCK_RES_DIRTY; | 714 | res->state &= ~DLM_LOCK_RES_DIRTY; |
| 706 | spin_unlock(&res->spinlock); | 715 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index faa1365097bc..302854ee0985 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -236,6 +236,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
| 236 | struct ocfs2_recovery_map *rm = osb->recovery_map; | 236 | struct ocfs2_recovery_map *rm = osb->recovery_map; |
| 237 | struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; | 237 | struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; |
| 238 | int i, out = 0; | 238 | int i, out = 0; |
| 239 | unsigned long flags; | ||
| 239 | 240 | ||
| 240 | out += snprintf(buf + out, len - out, | 241 | out += snprintf(buf + out, len - out, |
| 241 | "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", | 242 | "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", |
| @@ -271,14 +272,14 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
| 271 | cconn->cc_version.pv_minor); | 272 | cconn->cc_version.pv_minor); |
| 272 | } | 273 | } |
| 273 | 274 | ||
| 274 | spin_lock(&osb->dc_task_lock); | 275 | spin_lock_irqsave(&osb->dc_task_lock, flags); |
| 275 | out += snprintf(buf + out, len - out, | 276 | out += snprintf(buf + out, len - out, |
| 276 | "%10s => Pid: %d Count: %lu WakeSeq: %lu " | 277 | "%10s => Pid: %d Count: %lu WakeSeq: %lu " |
| 277 | "WorkSeq: %lu\n", "DownCnvt", | 278 | "WorkSeq: %lu\n", "DownCnvt", |
| 278 | (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), | 279 | (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), |
| 279 | osb->blocked_lock_count, osb->dc_wake_sequence, | 280 | osb->blocked_lock_count, osb->dc_wake_sequence, |
| 280 | osb->dc_work_sequence); | 281 | osb->dc_work_sequence); |
| 281 | spin_unlock(&osb->dc_task_lock); | 282 | spin_unlock_irqrestore(&osb->dc_task_lock, flags); |
| 282 | 283 | ||
| 283 | spin_lock(&osb->osb_lock); | 284 | spin_lock(&osb->osb_lock); |
| 284 | out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", | 285 | out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a9ebabfe7587..5c57b7b40728 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -1957,7 +1957,6 @@ xfs_vm_set_page_dirty( | |||
| 1957 | loff_t end_offset; | 1957 | loff_t end_offset; |
| 1958 | loff_t offset; | 1958 | loff_t offset; |
| 1959 | int newly_dirty; | 1959 | int newly_dirty; |
| 1960 | struct mem_cgroup *memcg; | ||
| 1961 | 1960 | ||
| 1962 | if (unlikely(!mapping)) | 1961 | if (unlikely(!mapping)) |
| 1963 | return !TestSetPageDirty(page); | 1962 | return !TestSetPageDirty(page); |
| @@ -1978,10 +1977,10 @@ xfs_vm_set_page_dirty( | |||
| 1978 | } while (bh != head); | 1977 | } while (bh != head); |
| 1979 | } | 1978 | } |
| 1980 | /* | 1979 | /* |
| 1981 | * Use mem_group_begin_page_stat() to keep PageDirty synchronized with | 1980 | * Lock out page->mem_cgroup migration to keep PageDirty |
| 1982 | * per-memcg dirty page counters. | 1981 | * synchronized with per-memcg dirty page counters. |
| 1983 | */ | 1982 | */ |
| 1984 | memcg = mem_cgroup_begin_page_stat(page); | 1983 | lock_page_memcg(page); |
| 1985 | newly_dirty = !TestSetPageDirty(page); | 1984 | newly_dirty = !TestSetPageDirty(page); |
| 1986 | spin_unlock(&mapping->private_lock); | 1985 | spin_unlock(&mapping->private_lock); |
| 1987 | 1986 | ||
| @@ -1992,13 +1991,13 @@ xfs_vm_set_page_dirty( | |||
| 1992 | spin_lock_irqsave(&mapping->tree_lock, flags); | 1991 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 1993 | if (page->mapping) { /* Race with truncate? */ | 1992 | if (page->mapping) { /* Race with truncate? */ |
| 1994 | WARN_ON_ONCE(!PageUptodate(page)); | 1993 | WARN_ON_ONCE(!PageUptodate(page)); |
| 1995 | account_page_dirtied(page, mapping, memcg); | 1994 | account_page_dirtied(page, mapping); |
| 1996 | radix_tree_tag_set(&mapping->page_tree, | 1995 | radix_tree_tag_set(&mapping->page_tree, |
| 1997 | page_index(page), PAGECACHE_TAG_DIRTY); | 1996 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 1998 | } | 1997 | } |
| 1999 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 1998 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 2000 | } | 1999 | } |
| 2001 | mem_cgroup_end_page_stat(memcg); | 2000 | unlock_page_memcg(page); |
| 2002 | if (newly_dirty) | 2001 | if (newly_dirty) |
| 2003 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 2002 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
| 2004 | return newly_dirty; | 2003 | return newly_dirty; |
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 850f39b33e74..7caaf298f539 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h | |||
| @@ -11,12 +11,7 @@ | |||
| 11 | #define _LINUX_AUTO_DEV_IOCTL_H | 11 | #define _LINUX_AUTO_DEV_IOCTL_H |
| 12 | 12 | ||
| 13 | #include <linux/auto_fs.h> | 13 | #include <linux/auto_fs.h> |
| 14 | |||
| 15 | #ifdef __KERNEL__ | ||
| 16 | #include <linux/string.h> | 14 | #include <linux/string.h> |
| 17 | #else | ||
| 18 | #include <string.h> | ||
| 19 | #endif /* __KERNEL__ */ | ||
| 20 | 15 | ||
| 21 | #define AUTOFS_DEVICE_NAME "autofs" | 16 | #define AUTOFS_DEVICE_NAME "autofs" |
| 22 | 17 | ||
| @@ -125,7 +120,6 @@ static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) | |||
| 125 | in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; | 120 | in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; |
| 126 | in->size = sizeof(struct autofs_dev_ioctl); | 121 | in->size = sizeof(struct autofs_dev_ioctl); |
| 127 | in->ioctlfd = -1; | 122 | in->ioctlfd = -1; |
| 128 | return; | ||
| 129 | } | 123 | } |
| 130 | 124 | ||
| 131 | /* | 125 | /* |
diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index fcd704d354c4..b4066bb89083 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h | |||
| @@ -1,14 +1,10 @@ | |||
| 1 | /* -*- linux-c -*- ------------------------------------------------------- * | 1 | /* |
| 2 | * | 2 | * Copyright 1997 Transmeta Corporation - All Rights Reserved |
| 3 | * linux/include/linux/auto_fs.h | ||
| 4 | * | ||
| 5 | * Copyright 1997 Transmeta Corporation - All Rights Reserved | ||
| 6 | * | 3 | * |
| 7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
| 8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
| 9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
| 10 | * | 7 | */ |
| 11 | * ----------------------------------------------------------------------- */ | ||
| 12 | 8 | ||
| 13 | #ifndef _LINUX_AUTO_FS_H | 9 | #ifndef _LINUX_AUTO_FS_H |
| 14 | #define _LINUX_AUTO_FS_H | 10 | #define _LINUX_AUTO_FS_H |
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3159a7dba034..9f4956d8601c 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h | |||
| @@ -62,10 +62,9 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, | |||
| 62 | #endif /* CONFIG_FAULT_INJECTION */ | 62 | #endif /* CONFIG_FAULT_INJECTION */ |
| 63 | 63 | ||
| 64 | #ifdef CONFIG_FAILSLAB | 64 | #ifdef CONFIG_FAILSLAB |
| 65 | extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags); | 65 | extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); |
| 66 | #else | 66 | #else |
| 67 | static inline bool should_failslab(size_t size, gfp_t gfpflags, | 67 | static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
| 68 | unsigned long flags) | ||
| 69 | { | 68 | { |
| 70 | return false; | 69 | return false; |
| 71 | } | 70 | } |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index af1f2b24bbe4..bb16dfeb917e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -9,6 +9,11 @@ | |||
| 9 | 9 | ||
| 10 | struct vm_area_struct; | 10 | struct vm_area_struct; |
| 11 | 11 | ||
| 12 | /* | ||
| 13 | * In case of changes, please don't forget to update | ||
| 14 | * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c | ||
| 15 | */ | ||
| 16 | |||
| 12 | /* Plain integer GFP bitmasks. Do not use this directly. */ | 17 | /* Plain integer GFP bitmasks. Do not use this directly. */ |
| 13 | #define ___GFP_DMA 0x01u | 18 | #define ___GFP_DMA 0x01u |
| 14 | #define ___GFP_HIGHMEM 0x02u | 19 | #define ___GFP_HIGHMEM 0x02u |
| @@ -48,7 +53,6 @@ struct vm_area_struct; | |||
| 48 | #define __GFP_DMA ((__force gfp_t)___GFP_DMA) | 53 | #define __GFP_DMA ((__force gfp_t)___GFP_DMA) |
| 49 | #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) | 54 | #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) |
| 50 | #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) | 55 | #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) |
| 51 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ | ||
| 52 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ | 56 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ |
| 53 | #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) | 57 | #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) |
| 54 | 58 | ||
| @@ -515,13 +519,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); | |||
| 515 | void drain_all_pages(struct zone *zone); | 519 | void drain_all_pages(struct zone *zone); |
| 516 | void drain_local_pages(struct zone *zone); | 520 | void drain_local_pages(struct zone *zone); |
| 517 | 521 | ||
| 518 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
| 519 | void page_alloc_init_late(void); | 522 | void page_alloc_init_late(void); |
| 520 | #else | ||
| 521 | static inline void page_alloc_init_late(void) | ||
| 522 | { | ||
| 523 | } | ||
| 524 | #endif | ||
| 525 | 523 | ||
| 526 | /* | 524 | /* |
| 527 | * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what | 525 | * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 792c8981e633..f0c4bec6565b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/eventfd.h> | 28 | #include <linux/eventfd.h> |
| 29 | #include <linux/mmzone.h> | 29 | #include <linux/mmzone.h> |
| 30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
| 31 | #include <linux/page-flags.h> | ||
| 31 | 32 | ||
| 32 | struct mem_cgroup; | 33 | struct mem_cgroup; |
| 33 | struct page; | 34 | struct page; |
| @@ -89,6 +90,10 @@ enum mem_cgroup_events_target { | |||
| 89 | }; | 90 | }; |
| 90 | 91 | ||
| 91 | #ifdef CONFIG_MEMCG | 92 | #ifdef CONFIG_MEMCG |
| 93 | |||
| 94 | #define MEM_CGROUP_ID_SHIFT 16 | ||
| 95 | #define MEM_CGROUP_ID_MAX USHRT_MAX | ||
| 96 | |||
| 92 | struct mem_cgroup_stat_cpu { | 97 | struct mem_cgroup_stat_cpu { |
| 93 | long count[MEMCG_NR_STAT]; | 98 | long count[MEMCG_NR_STAT]; |
| 94 | unsigned long events[MEMCG_NR_EVENTS]; | 99 | unsigned long events[MEMCG_NR_EVENTS]; |
| @@ -265,6 +270,11 @@ struct mem_cgroup { | |||
| 265 | 270 | ||
| 266 | extern struct mem_cgroup *root_mem_cgroup; | 271 | extern struct mem_cgroup *root_mem_cgroup; |
| 267 | 272 | ||
| 273 | static inline bool mem_cgroup_disabled(void) | ||
| 274 | { | ||
| 275 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | ||
| 276 | } | ||
| 277 | |||
| 268 | /** | 278 | /** |
| 269 | * mem_cgroup_events - count memory events against a cgroup | 279 | * mem_cgroup_events - count memory events against a cgroup |
| 270 | * @memcg: the memory cgroup | 280 | * @memcg: the memory cgroup |
| @@ -291,7 +301,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, | |||
| 291 | void mem_cgroup_uncharge(struct page *page); | 301 | void mem_cgroup_uncharge(struct page *page); |
| 292 | void mem_cgroup_uncharge_list(struct list_head *page_list); | 302 | void mem_cgroup_uncharge_list(struct list_head *page_list); |
| 293 | 303 | ||
| 294 | void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage); | 304 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); |
| 295 | 305 | ||
| 296 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); | 306 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); |
| 297 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | 307 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); |
| @@ -312,6 +322,28 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | |||
| 312 | struct mem_cgroup_reclaim_cookie *); | 322 | struct mem_cgroup_reclaim_cookie *); |
| 313 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 323 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
| 314 | 324 | ||
| 325 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | ||
| 326 | { | ||
| 327 | if (mem_cgroup_disabled()) | ||
| 328 | return 0; | ||
| 329 | |||
| 330 | return memcg->css.id; | ||
| 331 | } | ||
| 332 | |||
| 333 | /** | ||
| 334 | * mem_cgroup_from_id - look up a memcg from an id | ||
| 335 | * @id: the id to look up | ||
| 336 | * | ||
| 337 | * Caller must hold rcu_read_lock() and use css_tryget() as necessary. | ||
| 338 | */ | ||
| 339 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
| 340 | { | ||
| 341 | struct cgroup_subsys_state *css; | ||
| 342 | |||
| 343 | css = css_from_id(id, &memory_cgrp_subsys); | ||
| 344 | return mem_cgroup_from_css(css); | ||
| 345 | } | ||
| 346 | |||
| 315 | /** | 347 | /** |
| 316 | * parent_mem_cgroup - find the accounting parent of a memcg | 348 | * parent_mem_cgroup - find the accounting parent of a memcg |
| 317 | * @memcg: memcg whose parent to find | 349 | * @memcg: memcg whose parent to find |
| @@ -353,11 +385,6 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, | |||
| 353 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); | 385 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); |
| 354 | ino_t page_cgroup_ino(struct page *page); | 386 | ino_t page_cgroup_ino(struct page *page); |
| 355 | 387 | ||
| 356 | static inline bool mem_cgroup_disabled(void) | ||
| 357 | { | ||
| 358 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | ||
| 359 | } | ||
| 360 | |||
| 361 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) | 388 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) |
| 362 | { | 389 | { |
| 363 | if (mem_cgroup_disabled()) | 390 | if (mem_cgroup_disabled()) |
| @@ -429,36 +456,43 @@ bool mem_cgroup_oom_synchronize(bool wait); | |||
| 429 | extern int do_swap_account; | 456 | extern int do_swap_account; |
| 430 | #endif | 457 | #endif |
| 431 | 458 | ||
| 432 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); | 459 | void lock_page_memcg(struct page *page); |
| 433 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); | 460 | void unlock_page_memcg(struct page *page); |
| 434 | 461 | ||
| 435 | /** | 462 | /** |
| 436 | * mem_cgroup_update_page_stat - update page state statistics | 463 | * mem_cgroup_update_page_stat - update page state statistics |
| 437 | * @memcg: memcg to account against | 464 | * @page: the page |
| 438 | * @idx: page state item to account | 465 | * @idx: page state item to account |
| 439 | * @val: number of pages (positive or negative) | 466 | * @val: number of pages (positive or negative) |
| 440 | * | 467 | * |
| 441 | * See mem_cgroup_begin_page_stat() for locking requirements. | 468 | * The @page must be locked or the caller must use lock_page_memcg() |
| 469 | * to prevent double accounting when the page is concurrently being | ||
| 470 | * moved to another memcg: | ||
| 471 | * | ||
| 472 | * lock_page(page) or lock_page_memcg(page) | ||
| 473 | * if (TestClearPageState(page)) | ||
| 474 | * mem_cgroup_update_page_stat(page, state, -1); | ||
| 475 | * unlock_page(page) or unlock_page_memcg(page) | ||
| 442 | */ | 476 | */ |
| 443 | static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | 477 | static inline void mem_cgroup_update_page_stat(struct page *page, |
| 444 | enum mem_cgroup_stat_index idx, int val) | 478 | enum mem_cgroup_stat_index idx, int val) |
| 445 | { | 479 | { |
| 446 | VM_BUG_ON(!rcu_read_lock_held()); | 480 | VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); |
| 447 | 481 | ||
| 448 | if (memcg) | 482 | if (page->mem_cgroup) |
| 449 | this_cpu_add(memcg->stat->count[idx], val); | 483 | this_cpu_add(page->mem_cgroup->stat->count[idx], val); |
| 450 | } | 484 | } |
| 451 | 485 | ||
| 452 | static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, | 486 | static inline void mem_cgroup_inc_page_stat(struct page *page, |
| 453 | enum mem_cgroup_stat_index idx) | 487 | enum mem_cgroup_stat_index idx) |
| 454 | { | 488 | { |
| 455 | mem_cgroup_update_page_stat(memcg, idx, 1); | 489 | mem_cgroup_update_page_stat(page, idx, 1); |
| 456 | } | 490 | } |
| 457 | 491 | ||
| 458 | static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, | 492 | static inline void mem_cgroup_dec_page_stat(struct page *page, |
| 459 | enum mem_cgroup_stat_index idx) | 493 | enum mem_cgroup_stat_index idx) |
| 460 | { | 494 | { |
| 461 | mem_cgroup_update_page_stat(memcg, idx, -1); | 495 | mem_cgroup_update_page_stat(page, idx, -1); |
| 462 | } | 496 | } |
| 463 | 497 | ||
| 464 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 498 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
| @@ -496,8 +530,17 @@ void mem_cgroup_split_huge_fixup(struct page *head); | |||
| 496 | #endif | 530 | #endif |
| 497 | 531 | ||
| 498 | #else /* CONFIG_MEMCG */ | 532 | #else /* CONFIG_MEMCG */ |
| 533 | |||
| 534 | #define MEM_CGROUP_ID_SHIFT 0 | ||
| 535 | #define MEM_CGROUP_ID_MAX 0 | ||
| 536 | |||
| 499 | struct mem_cgroup; | 537 | struct mem_cgroup; |
| 500 | 538 | ||
| 539 | static inline bool mem_cgroup_disabled(void) | ||
| 540 | { | ||
| 541 | return true; | ||
| 542 | } | ||
| 543 | |||
| 501 | static inline void mem_cgroup_events(struct mem_cgroup *memcg, | 544 | static inline void mem_cgroup_events(struct mem_cgroup *memcg, |
| 502 | enum mem_cgroup_events_index idx, | 545 | enum mem_cgroup_events_index idx, |
| 503 | unsigned int nr) | 546 | unsigned int nr) |
| @@ -539,7 +582,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) | |||
| 539 | { | 582 | { |
| 540 | } | 583 | } |
| 541 | 584 | ||
| 542 | static inline void mem_cgroup_replace_page(struct page *old, struct page *new) | 585 | static inline void mem_cgroup_migrate(struct page *old, struct page *new) |
| 543 | { | 586 | { |
| 544 | } | 587 | } |
| 545 | 588 | ||
| @@ -580,9 +623,16 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root, | |||
| 580 | { | 623 | { |
| 581 | } | 624 | } |
| 582 | 625 | ||
| 583 | static inline bool mem_cgroup_disabled(void) | 626 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) |
| 584 | { | 627 | { |
| 585 | return true; | 628 | return 0; |
| 629 | } | ||
| 630 | |||
| 631 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
| 632 | { | ||
| 633 | WARN_ON_ONCE(id); | ||
| 634 | /* XXX: This should always return root_mem_cgroup */ | ||
| 635 | return NULL; | ||
| 586 | } | 636 | } |
| 587 | 637 | ||
| 588 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) | 638 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) |
| @@ -613,12 +663,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
| 613 | { | 663 | { |
| 614 | } | 664 | } |
| 615 | 665 | ||
| 616 | static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | 666 | static inline void lock_page_memcg(struct page *page) |
| 617 | { | 667 | { |
| 618 | return NULL; | ||
| 619 | } | 668 | } |
| 620 | 669 | ||
| 621 | static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | 670 | static inline void unlock_page_memcg(struct page *page) |
| 622 | { | 671 | { |
| 623 | } | 672 | } |
| 624 | 673 | ||
| @@ -644,12 +693,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) | |||
| 644 | return false; | 693 | return false; |
| 645 | } | 694 | } |
| 646 | 695 | ||
| 647 | static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, | 696 | static inline void mem_cgroup_inc_page_stat(struct page *page, |
| 648 | enum mem_cgroup_stat_index idx) | 697 | enum mem_cgroup_stat_index idx) |
| 649 | { | 698 | { |
| 650 | } | 699 | } |
| 651 | 700 | ||
| 652 | static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, | 701 | static inline void mem_cgroup_dec_page_stat(struct page *page, |
| 653 | enum mem_cgroup_stat_index idx) | 702 | enum mem_cgroup_stat_index idx) |
| 654 | { | 703 | { |
| 655 | } | 704 | } |
| @@ -765,7 +814,7 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); | |||
| 765 | void __memcg_kmem_uncharge(struct page *page, int order); | 814 | void __memcg_kmem_uncharge(struct page *page, int order); |
| 766 | 815 | ||
| 767 | /* | 816 | /* |
| 768 | * helper for acessing a memcg's index. It will be used as an index in the | 817 | * helper for accessing a memcg's index. It will be used as an index in the |
| 769 | * child cache array in kmem_cache, and also to derive its name. This function | 818 | * child cache array in kmem_cache, and also to derive its name. This function |
| 770 | * will return -1 when this is not a kmem-limited memcg. | 819 | * will return -1 when this is not a kmem-limited memcg. |
| 771 | */ | 820 | */ |
diff --git a/include/linux/memory.h b/include/linux/memory.h index 8b8d8d12348e..82730adba950 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h | |||
| @@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb); | |||
| 109 | extern int register_memory_isolate_notifier(struct notifier_block *nb); | 109 | extern int register_memory_isolate_notifier(struct notifier_block *nb); |
| 110 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); | 110 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); |
| 111 | extern int register_new_memory(int, struct mem_section *); | 111 | extern int register_new_memory(int, struct mem_section *); |
| 112 | extern int memory_block_change_state(struct memory_block *mem, | ||
| 113 | unsigned long to_state, | ||
| 114 | unsigned long from_state_req); | ||
| 112 | #ifdef CONFIG_MEMORY_HOTREMOVE | 115 | #ifdef CONFIG_MEMORY_HOTREMOVE |
| 113 | extern int unregister_memory_section(struct mem_section *); | 116 | extern int unregister_memory_section(struct mem_section *); |
| 114 | #endif | 117 | #endif |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 43405992d027..adbef586e696 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
| @@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page); | |||
| 99 | 99 | ||
| 100 | extern int try_online_node(int nid); | 100 | extern int try_online_node(int nid); |
| 101 | 101 | ||
| 102 | extern bool memhp_auto_online; | ||
| 103 | |||
| 102 | #ifdef CONFIG_MEMORY_HOTREMOVE | 104 | #ifdef CONFIG_MEMORY_HOTREMOVE |
| 103 | extern bool is_pageblock_removable_nolock(struct page *page); | 105 | extern bool is_pageblock_removable_nolock(struct page *page); |
| 104 | extern int arch_remove_memory(u64 start, u64 size); | 106 | extern int arch_remove_memory(u64 start, u64 size); |
| @@ -196,6 +198,9 @@ void put_online_mems(void); | |||
| 196 | void mem_hotplug_begin(void); | 198 | void mem_hotplug_begin(void); |
| 197 | void mem_hotplug_done(void); | 199 | void mem_hotplug_done(void); |
| 198 | 200 | ||
| 201 | extern void set_zone_contiguous(struct zone *zone); | ||
| 202 | extern void clear_zone_contiguous(struct zone *zone); | ||
| 203 | |||
| 199 | #else /* ! CONFIG_MEMORY_HOTPLUG */ | 204 | #else /* ! CONFIG_MEMORY_HOTPLUG */ |
| 200 | /* | 205 | /* |
| 201 | * Stub functions for when hotplug is off | 206 | * Stub functions for when hotplug is off |
| @@ -267,7 +272,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} | |||
| 267 | extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, | 272 | extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, |
| 268 | void *arg, int (*func)(struct memory_block *, void *)); | 273 | void *arg, int (*func)(struct memory_block *, void *)); |
| 269 | extern int add_memory(int nid, u64 start, u64 size); | 274 | extern int add_memory(int nid, u64 start, u64 size); |
| 270 | extern int add_memory_resource(int nid, struct resource *resource); | 275 | extern int add_memory_resource(int nid, struct resource *resource, bool online); |
| 271 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, | 276 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, |
| 272 | bool for_device); | 277 | bool for_device); |
| 273 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); | 278 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index cac1c0904d5f..9b50325e4ddf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
| @@ -23,9 +23,13 @@ enum migrate_reason { | |||
| 23 | MR_SYSCALL, /* also applies to cpusets */ | 23 | MR_SYSCALL, /* also applies to cpusets */ |
| 24 | MR_MEMPOLICY_MBIND, | 24 | MR_MEMPOLICY_MBIND, |
| 25 | MR_NUMA_MISPLACED, | 25 | MR_NUMA_MISPLACED, |
| 26 | MR_CMA | 26 | MR_CMA, |
| 27 | MR_TYPES | ||
| 27 | }; | 28 | }; |
| 28 | 29 | ||
| 30 | /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ | ||
| 31 | extern char *migrate_reason_names[MR_TYPES]; | ||
| 32 | |||
| 29 | #ifdef CONFIG_MIGRATION | 33 | #ifdef CONFIG_MIGRATION |
| 30 | 34 | ||
| 31 | extern void putback_movable_pages(struct list_head *l); | 35 | extern void putback_movable_pages(struct list_head *l); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 3579d1e2fe3a..dbf1eddab964 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -905,20 +905,11 @@ static inline struct mem_cgroup *page_memcg(struct page *page) | |||
| 905 | { | 905 | { |
| 906 | return page->mem_cgroup; | 906 | return page->mem_cgroup; |
| 907 | } | 907 | } |
| 908 | |||
| 909 | static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) | ||
| 910 | { | ||
| 911 | page->mem_cgroup = memcg; | ||
| 912 | } | ||
| 913 | #else | 908 | #else |
| 914 | static inline struct mem_cgroup *page_memcg(struct page *page) | 909 | static inline struct mem_cgroup *page_memcg(struct page *page) |
| 915 | { | 910 | { |
| 916 | return NULL; | 911 | return NULL; |
| 917 | } | 912 | } |
| 918 | |||
| 919 | static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) | ||
| 920 | { | ||
| 921 | } | ||
| 922 | #endif | 913 | #endif |
| 923 | 914 | ||
| 924 | /* | 915 | /* |
| @@ -1300,10 +1291,9 @@ int __set_page_dirty_nobuffers(struct page *page); | |||
| 1300 | int __set_page_dirty_no_writeback(struct page *page); | 1291 | int __set_page_dirty_no_writeback(struct page *page); |
| 1301 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1292 | int redirty_page_for_writepage(struct writeback_control *wbc, |
| 1302 | struct page *page); | 1293 | struct page *page); |
| 1303 | void account_page_dirtied(struct page *page, struct address_space *mapping, | 1294 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
| 1304 | struct mem_cgroup *memcg); | ||
| 1305 | void account_page_cleaned(struct page *page, struct address_space *mapping, | 1295 | void account_page_cleaned(struct page *page, struct address_space *mapping, |
| 1306 | struct mem_cgroup *memcg, struct bdi_writeback *wb); | 1296 | struct bdi_writeback *wb); |
| 1307 | int set_page_dirty(struct page *page); | 1297 | int set_page_dirty(struct page *page); |
| 1308 | int set_page_dirty_lock(struct page *page); | 1298 | int set_page_dirty_lock(struct page *page); |
| 1309 | void cancel_dirty_page(struct page *page); | 1299 | void cancel_dirty_page(struct page *page); |
| @@ -2178,6 +2168,17 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, | |||
| 2178 | unsigned long size, pte_fn_t fn, void *data); | 2168 | unsigned long size, pte_fn_t fn, void *data); |
| 2179 | 2169 | ||
| 2180 | 2170 | ||
| 2171 | #ifdef CONFIG_PAGE_POISONING | ||
| 2172 | extern bool page_poisoning_enabled(void); | ||
| 2173 | extern void kernel_poison_pages(struct page *page, int numpages, int enable); | ||
| 2174 | extern bool page_is_poisoned(struct page *page); | ||
| 2175 | #else | ||
| 2176 | static inline bool page_poisoning_enabled(void) { return false; } | ||
| 2177 | static inline void kernel_poison_pages(struct page *page, int numpages, | ||
| 2178 | int enable) { } | ||
| 2179 | static inline bool page_is_poisoned(struct page *page) { return false; } | ||
| 2180 | #endif | ||
| 2181 | |||
| 2181 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2182 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 2182 | extern bool _debug_pagealloc_enabled; | 2183 | extern bool _debug_pagealloc_enabled; |
| 2183 | extern void __kernel_map_pages(struct page *page, int numpages, int enable); | 2184 | extern void __kernel_map_pages(struct page *page, int numpages, int enable); |
| @@ -2197,14 +2198,18 @@ kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 2197 | } | 2198 | } |
| 2198 | #ifdef CONFIG_HIBERNATION | 2199 | #ifdef CONFIG_HIBERNATION |
| 2199 | extern bool kernel_page_present(struct page *page); | 2200 | extern bool kernel_page_present(struct page *page); |
| 2200 | #endif /* CONFIG_HIBERNATION */ | 2201 | #endif /* CONFIG_HIBERNATION */ |
| 2201 | #else | 2202 | #else /* CONFIG_DEBUG_PAGEALLOC */ |
| 2202 | static inline void | 2203 | static inline void |
| 2203 | kernel_map_pages(struct page *page, int numpages, int enable) {} | 2204 | kernel_map_pages(struct page *page, int numpages, int enable) {} |
| 2204 | #ifdef CONFIG_HIBERNATION | 2205 | #ifdef CONFIG_HIBERNATION |
| 2205 | static inline bool kernel_page_present(struct page *page) { return true; } | 2206 | static inline bool kernel_page_present(struct page *page) { return true; } |
| 2206 | #endif /* CONFIG_HIBERNATION */ | 2207 | #endif /* CONFIG_HIBERNATION */ |
| 2207 | #endif | 2208 | static inline bool debug_pagealloc_enabled(void) |
| 2209 | { | ||
| 2210 | return false; | ||
| 2211 | } | ||
| 2212 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | ||
| 2208 | 2213 | ||
| 2209 | #ifdef __HAVE_ARCH_GATE_AREA | 2214 | #ifdef __HAVE_ARCH_GATE_AREA |
| 2210 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); | 2215 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); |
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 053824b0a412..de7be78c6f0e 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h | |||
| @@ -9,8 +9,7 @@ struct vm_area_struct; | |||
| 9 | struct mm_struct; | 9 | struct mm_struct; |
| 10 | 10 | ||
| 11 | extern void dump_page(struct page *page, const char *reason); | 11 | extern void dump_page(struct page *page, const char *reason); |
| 12 | extern void dump_page_badflags(struct page *page, const char *reason, | 12 | extern void __dump_page(struct page *page, const char *reason); |
| 13 | unsigned long badflags); | ||
| 14 | void dump_vma(const struct vm_area_struct *vma); | 13 | void dump_vma(const struct vm_area_struct *vma); |
| 15 | void dump_mm(const struct mm_struct *mm); | 14 | void dump_mm(const struct mm_struct *mm); |
| 16 | 15 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7b6c2cfee390..6de02ac378a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -63,6 +63,9 @@ enum { | |||
| 63 | MIGRATE_TYPES | 63 | MIGRATE_TYPES |
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ | ||
| 67 | extern char * const migratetype_names[MIGRATE_TYPES]; | ||
| 68 | |||
| 66 | #ifdef CONFIG_CMA | 69 | #ifdef CONFIG_CMA |
| 67 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) | 70 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) |
| 68 | #else | 71 | #else |
| @@ -209,10 +212,12 @@ struct zone_reclaim_stat { | |||
| 209 | }; | 212 | }; |
| 210 | 213 | ||
| 211 | struct lruvec { | 214 | struct lruvec { |
| 212 | struct list_head lists[NR_LRU_LISTS]; | 215 | struct list_head lists[NR_LRU_LISTS]; |
| 213 | struct zone_reclaim_stat reclaim_stat; | 216 | struct zone_reclaim_stat reclaim_stat; |
| 217 | /* Evictions & activations on the inactive file list */ | ||
| 218 | atomic_long_t inactive_age; | ||
| 214 | #ifdef CONFIG_MEMCG | 219 | #ifdef CONFIG_MEMCG |
| 215 | struct zone *zone; | 220 | struct zone *zone; |
| 216 | #endif | 221 | #endif |
| 217 | }; | 222 | }; |
| 218 | 223 | ||
| @@ -487,9 +492,6 @@ struct zone { | |||
| 487 | spinlock_t lru_lock; | 492 | spinlock_t lru_lock; |
| 488 | struct lruvec lruvec; | 493 | struct lruvec lruvec; |
| 489 | 494 | ||
| 490 | /* Evictions & activations on the inactive file list */ | ||
| 491 | atomic_long_t inactive_age; | ||
| 492 | |||
| 493 | /* | 495 | /* |
| 494 | * When free pages are below this point, additional steps are taken | 496 | * When free pages are below this point, additional steps are taken |
| 495 | * when reading the number of free pages to avoid per-cpu counter | 497 | * when reading the number of free pages to avoid per-cpu counter |
| @@ -520,6 +522,8 @@ struct zone { | |||
| 520 | bool compact_blockskip_flush; | 522 | bool compact_blockskip_flush; |
| 521 | #endif | 523 | #endif |
| 522 | 524 | ||
| 525 | bool contiguous; | ||
| 526 | |||
| 523 | ZONE_PADDING(_pad3_) | 527 | ZONE_PADDING(_pad3_) |
| 524 | /* Zone statistics */ | 528 | /* Zone statistics */ |
| 525 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | 529 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
| @@ -758,6 +762,8 @@ static inline struct zone *lruvec_zone(struct lruvec *lruvec) | |||
| 758 | #endif | 762 | #endif |
| 759 | } | 763 | } |
| 760 | 764 | ||
| 765 | extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); | ||
| 766 | |||
| 761 | #ifdef CONFIG_HAVE_MEMORY_PRESENT | 767 | #ifdef CONFIG_HAVE_MEMORY_PRESENT |
| 762 | void memory_present(int nid, unsigned long start, unsigned long end); | 768 | void memory_present(int nid, unsigned long start, unsigned long end); |
| 763 | #else | 769 | #else |
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 17f118a82854..e1fe7cf5bddf 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h | |||
| @@ -45,6 +45,7 @@ struct page_ext { | |||
| 45 | unsigned int order; | 45 | unsigned int order; |
| 46 | gfp_t gfp_mask; | 46 | gfp_t gfp_mask; |
| 47 | unsigned int nr_entries; | 47 | unsigned int nr_entries; |
| 48 | int last_migrate_reason; | ||
| 48 | unsigned long trace_entries[8]; | 49 | unsigned long trace_entries[8]; |
| 49 | #endif | 50 | #endif |
| 50 | }; | 51 | }; |
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index cacaabea8a09..46f1b939948c 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h | |||
| @@ -1,38 +1,54 @@ | |||
| 1 | #ifndef __LINUX_PAGE_OWNER_H | 1 | #ifndef __LINUX_PAGE_OWNER_H |
| 2 | #define __LINUX_PAGE_OWNER_H | 2 | #define __LINUX_PAGE_OWNER_H |
| 3 | 3 | ||
| 4 | #include <linux/jump_label.h> | ||
| 5 | |||
| 4 | #ifdef CONFIG_PAGE_OWNER | 6 | #ifdef CONFIG_PAGE_OWNER |
| 5 | extern bool page_owner_inited; | 7 | extern struct static_key_false page_owner_inited; |
| 6 | extern struct page_ext_operations page_owner_ops; | 8 | extern struct page_ext_operations page_owner_ops; |
| 7 | 9 | ||
| 8 | extern void __reset_page_owner(struct page *page, unsigned int order); | 10 | extern void __reset_page_owner(struct page *page, unsigned int order); |
| 9 | extern void __set_page_owner(struct page *page, | 11 | extern void __set_page_owner(struct page *page, |
| 10 | unsigned int order, gfp_t gfp_mask); | 12 | unsigned int order, gfp_t gfp_mask); |
| 11 | extern gfp_t __get_page_owner_gfp(struct page *page); | 13 | extern gfp_t __get_page_owner_gfp(struct page *page); |
| 14 | extern void __copy_page_owner(struct page *oldpage, struct page *newpage); | ||
| 15 | extern void __set_page_owner_migrate_reason(struct page *page, int reason); | ||
| 16 | extern void __dump_page_owner(struct page *page); | ||
| 12 | 17 | ||
| 13 | static inline void reset_page_owner(struct page *page, unsigned int order) | 18 | static inline void reset_page_owner(struct page *page, unsigned int order) |
| 14 | { | 19 | { |
| 15 | if (likely(!page_owner_inited)) | 20 | if (static_branch_unlikely(&page_owner_inited)) |
| 16 | return; | 21 | __reset_page_owner(page, order); |
| 17 | |||
| 18 | __reset_page_owner(page, order); | ||
| 19 | } | 22 | } |
| 20 | 23 | ||
| 21 | static inline void set_page_owner(struct page *page, | 24 | static inline void set_page_owner(struct page *page, |
| 22 | unsigned int order, gfp_t gfp_mask) | 25 | unsigned int order, gfp_t gfp_mask) |
| 23 | { | 26 | { |
| 24 | if (likely(!page_owner_inited)) | 27 | if (static_branch_unlikely(&page_owner_inited)) |
| 25 | return; | 28 | __set_page_owner(page, order, gfp_mask); |
| 26 | |||
| 27 | __set_page_owner(page, order, gfp_mask); | ||
| 28 | } | 29 | } |
| 29 | 30 | ||
| 30 | static inline gfp_t get_page_owner_gfp(struct page *page) | 31 | static inline gfp_t get_page_owner_gfp(struct page *page) |
| 31 | { | 32 | { |
| 32 | if (likely(!page_owner_inited)) | 33 | if (static_branch_unlikely(&page_owner_inited)) |
| 34 | return __get_page_owner_gfp(page); | ||
| 35 | else | ||
| 33 | return 0; | 36 | return 0; |
| 34 | 37 | } | |
| 35 | return __get_page_owner_gfp(page); | 38 | static inline void copy_page_owner(struct page *oldpage, struct page *newpage) |
| 39 | { | ||
| 40 | if (static_branch_unlikely(&page_owner_inited)) | ||
| 41 | __copy_page_owner(oldpage, newpage); | ||
| 42 | } | ||
| 43 | static inline void set_page_owner_migrate_reason(struct page *page, int reason) | ||
| 44 | { | ||
| 45 | if (static_branch_unlikely(&page_owner_inited)) | ||
| 46 | __set_page_owner_migrate_reason(page, reason); | ||
| 47 | } | ||
| 48 | static inline void dump_page_owner(struct page *page) | ||
| 49 | { | ||
| 50 | if (static_branch_unlikely(&page_owner_inited)) | ||
| 51 | __dump_page_owner(page); | ||
| 36 | } | 52 | } |
| 37 | #else | 53 | #else |
| 38 | static inline void reset_page_owner(struct page *page, unsigned int order) | 54 | static inline void reset_page_owner(struct page *page, unsigned int order) |
| @@ -46,6 +62,14 @@ static inline gfp_t get_page_owner_gfp(struct page *page) | |||
| 46 | { | 62 | { |
| 47 | return 0; | 63 | return 0; |
| 48 | } | 64 | } |
| 49 | 65 | static inline void copy_page_owner(struct page *oldpage, struct page *newpage) | |
| 66 | { | ||
| 67 | } | ||
| 68 | static inline void set_page_owner_migrate_reason(struct page *page, int reason) | ||
| 69 | { | ||
| 70 | } | ||
| 71 | static inline void dump_page_owner(struct page *page) | ||
| 72 | { | ||
| 73 | } | ||
| 50 | #endif /* CONFIG_PAGE_OWNER */ | 74 | #endif /* CONFIG_PAGE_OWNER */ |
| 51 | #endif /* __LINUX_PAGE_OWNER_H */ | 75 | #endif /* __LINUX_PAGE_OWNER_H */ |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 92395a0a7dc5..183b15ea052b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
| @@ -663,8 +663,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
| 663 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 663 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
| 664 | pgoff_t index, gfp_t gfp_mask); | 664 | pgoff_t index, gfp_t gfp_mask); |
| 665 | extern void delete_from_page_cache(struct page *page); | 665 | extern void delete_from_page_cache(struct page *page); |
| 666 | extern void __delete_from_page_cache(struct page *page, void *shadow, | 666 | extern void __delete_from_page_cache(struct page *page, void *shadow); |
| 667 | struct mem_cgroup *memcg); | ||
| 668 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); | 667 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); |
| 669 | 668 | ||
| 670 | /* | 669 | /* |
diff --git a/include/linux/poison.h b/include/linux/poison.h index 4a27153574e2..51334edec506 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h | |||
| @@ -30,7 +30,11 @@ | |||
| 30 | #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) | 30 | #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) |
| 31 | 31 | ||
| 32 | /********** mm/debug-pagealloc.c **********/ | 32 | /********** mm/debug-pagealloc.c **********/ |
| 33 | #ifdef CONFIG_PAGE_POISONING_ZERO | ||
| 34 | #define PAGE_POISON 0x00 | ||
| 35 | #else | ||
| 33 | #define PAGE_POISON 0xaa | 36 | #define PAGE_POISON 0xaa |
| 37 | #endif | ||
| 34 | 38 | ||
| 35 | /********** mm/page_alloc.c ************/ | 39 | /********** mm/page_alloc.c ************/ |
| 36 | 40 | ||
diff --git a/include/linux/slab.h b/include/linux/slab.h index 3627d5c1bc47..e4b568738ca3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | * Flags to pass to kmem_cache_create(). | 20 | * Flags to pass to kmem_cache_create(). |
| 21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. | 21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. |
| 22 | */ | 22 | */ |
| 23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ | 23 | #define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ |
| 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ | 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ |
| 25 | #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ | 25 | #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ |
| 26 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ | 26 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ |
| @@ -314,7 +314,7 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment | |||
| 314 | void kmem_cache_free(struct kmem_cache *, void *); | 314 | void kmem_cache_free(struct kmem_cache *, void *); |
| 315 | 315 | ||
| 316 | /* | 316 | /* |
| 317 | * Bulk allocation and freeing operations. These are accellerated in an | 317 | * Bulk allocation and freeing operations. These are accelerated in an |
| 318 | * allocator specific way to avoid taking locks repeatedly or building | 318 | * allocator specific way to avoid taking locks repeatedly or building |
| 319 | * metadata structures unnecessarily. | 319 | * metadata structures unnecessarily. |
| 320 | * | 320 | * |
| @@ -323,6 +323,15 @@ void kmem_cache_free(struct kmem_cache *, void *); | |||
| 323 | void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); | 323 | void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); |
| 324 | int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); | 324 | int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); |
| 325 | 325 | ||
| 326 | /* | ||
| 327 | * Caller must not use kfree_bulk() on memory not originally allocated | ||
| 328 | * by kmalloc(), because the SLOB allocator cannot handle this. | ||
| 329 | */ | ||
| 330 | static __always_inline void kfree_bulk(size_t size, void **p) | ||
| 331 | { | ||
| 332 | kmem_cache_free_bulk(NULL, size, p); | ||
| 333 | } | ||
| 334 | |||
| 326 | #ifdef CONFIG_NUMA | 335 | #ifdef CONFIG_NUMA |
| 327 | void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; | 336 | void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; |
| 328 | void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; | 337 | void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cf139d3fa513..e878ba35ae91 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
| @@ -60,6 +60,9 @@ struct kmem_cache { | |||
| 60 | atomic_t allocmiss; | 60 | atomic_t allocmiss; |
| 61 | atomic_t freehit; | 61 | atomic_t freehit; |
| 62 | atomic_t freemiss; | 62 | atomic_t freemiss; |
| 63 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
| 64 | atomic_t store_user_clean; | ||
| 65 | #endif | ||
| 63 | 66 | ||
| 64 | /* | 67 | /* |
| 65 | * If debugging is enabled, then the allocator can add additional | 68 | * If debugging is enabled, then the allocator can add additional |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b7e57927f521..ac5143f95ee6 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
| @@ -81,6 +81,7 @@ struct kmem_cache { | |||
| 81 | int reserved; /* Reserved bytes at the end of slabs */ | 81 | int reserved; /* Reserved bytes at the end of slabs */ |
| 82 | const char *name; /* Name (only for display!) */ | 82 | const char *name; /* Name (only for display!) */ |
| 83 | struct list_head list; /* List of slab caches */ | 83 | struct list_head list; /* List of slab caches */ |
| 84 | int red_left_pad; /* Left redzone padding size */ | ||
| 84 | #ifdef CONFIG_SYSFS | 85 | #ifdef CONFIG_SYSFS |
| 85 | struct kobject kobj; /* For sysfs */ | 86 | struct kobject kobj; /* For sysfs */ |
| 86 | #endif | 87 | #endif |
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 925730bc9fc1..705df7db4482 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h | |||
| @@ -15,16 +15,6 @@ struct tracer; | |||
| 15 | struct dentry; | 15 | struct dentry; |
| 16 | struct bpf_prog; | 16 | struct bpf_prog; |
| 17 | 17 | ||
| 18 | struct trace_print_flags { | ||
| 19 | unsigned long mask; | ||
| 20 | const char *name; | ||
| 21 | }; | ||
| 22 | |||
| 23 | struct trace_print_flags_u64 { | ||
| 24 | unsigned long long mask; | ||
| 25 | const char *name; | ||
| 26 | }; | ||
| 27 | |||
| 28 | const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, | 18 | const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, |
| 29 | unsigned long flags, | 19 | unsigned long flags, |
| 30 | const struct trace_print_flags *flag_array); | 20 | const struct trace_print_flags *flag_array); |
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index e1ee97c713bf..4ac89acb6136 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h | |||
| @@ -3,13 +3,23 @@ | |||
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * File can be included directly by headers who only want to access | 5 | * File can be included directly by headers who only want to access |
| 6 | * tracepoint->key to guard out of line trace calls. Otherwise | 6 | * tracepoint->key to guard out of line trace calls, or the definition of |
| 7 | * linux/tracepoint.h should be used. | 7 | * trace_print_flags{_u64}. Otherwise linux/tracepoint.h should be used. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include <linux/atomic.h> | 10 | #include <linux/atomic.h> |
| 11 | #include <linux/static_key.h> | 11 | #include <linux/static_key.h> |
| 12 | 12 | ||
| 13 | struct trace_print_flags { | ||
| 14 | unsigned long mask; | ||
| 15 | const char *name; | ||
| 16 | }; | ||
| 17 | |||
| 18 | struct trace_print_flags_u64 { | ||
| 19 | unsigned long long mask; | ||
| 20 | const char *name; | ||
| 21 | }; | ||
| 22 | |||
| 13 | struct tracepoint_func { | 23 | struct tracepoint_func { |
| 14 | void *func; | 24 | void *func; |
| 15 | void *data; | 25 | void *data; |
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d866f21efbbf..677807f29a1c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/writeback.h> | 7 | #include <linux/writeback.h> |
| 8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
| 9 | #include <trace/events/gfpflags.h> | 9 | #include <trace/events/mmflags.h> |
| 10 | 10 | ||
| 11 | struct btrfs_root; | 11 | struct btrfs_root; |
| 12 | struct btrfs_fs_info; | 12 | struct btrfs_fs_info; |
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index c92d1e1cbad9..111e5666e5eb 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
| 8 | #include <linux/list.h> | 8 | #include <linux/list.h> |
| 9 | #include <linux/tracepoint.h> | 9 | #include <linux/tracepoint.h> |
| 10 | #include <trace/events/gfpflags.h> | 10 | #include <trace/events/mmflags.h> |
| 11 | 11 | ||
| 12 | #define COMPACTION_STATUS \ | 12 | #define COMPACTION_STATUS \ |
| 13 | EM( COMPACT_DEFERRED, "deferred") \ | 13 | EM( COMPACT_DEFERRED, "deferred") \ |
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h deleted file mode 100644 index dde6bf092c8a..000000000000 --- a/include/trace/events/gfpflags.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * The order of these masks is important. Matching masks will be seen | ||
| 3 | * first and the left over flags will end up showing by themselves. | ||
| 4 | * | ||
| 5 | * For example, if we have GFP_KERNEL before GFP_USER we wil get: | ||
| 6 | * | ||
| 7 | * GFP_KERNEL|GFP_HARDWALL | ||
| 8 | * | ||
| 9 | * Thus most bits set go first. | ||
| 10 | */ | ||
| 11 | #define show_gfp_flags(flags) \ | ||
| 12 | (flags) ? __print_flags(flags, "|", \ | ||
| 13 | {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ | ||
| 14 | {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ | ||
| 15 | {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ | ||
| 16 | {(unsigned long)GFP_USER, "GFP_USER"}, \ | ||
| 17 | {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ | ||
| 18 | {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ | ||
| 19 | {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ | ||
| 20 | {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
| 21 | {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ | ||
| 22 | {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ | ||
| 23 | {(unsigned long)__GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
| 24 | {(unsigned long)__GFP_IO, "GFP_IO"}, \ | ||
| 25 | {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ | ||
| 26 | {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ | ||
| 27 | {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ | ||
| 28 | {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ | ||
| 29 | {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ | ||
| 30 | {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ | ||
| 31 | {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ | ||
| 32 | {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ | ||
| 33 | {(unsigned long)__GFP_MEMALLOC, "GFP_MEMALLOC"}, \ | ||
| 34 | {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ | ||
| 35 | {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ | ||
| 36 | {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ | ||
| 37 | {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ | ||
| 38 | {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ | ||
| 39 | {(unsigned long)__GFP_DIRECT_RECLAIM, "GFP_DIRECT_RECLAIM"}, \ | ||
| 40 | {(unsigned long)__GFP_KSWAPD_RECLAIM, "GFP_KSWAPD_RECLAIM"}, \ | ||
| 41 | {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ | ||
| 42 | ) : "GFP_NOWAIT" | ||
| 43 | |||
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 47c6212d8f3c..551ba4acde4d 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h | |||
| @@ -6,8 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
| 8 | 8 | ||
| 9 | #include <trace/events/gfpflags.h> | ||
| 10 | |||
| 11 | #define SCAN_STATUS \ | 9 | #define SCAN_STATUS \ |
| 12 | EM( SCAN_FAIL, "failed") \ | 10 | EM( SCAN_FAIL, "failed") \ |
| 13 | EM( SCAN_SUCCEED, "succeeded") \ | 11 | EM( SCAN_SUCCEED, "succeeded") \ |
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f7554fd7fc62..ca7217389067 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
| 8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
| 9 | #include <trace/events/gfpflags.h> | 9 | #include <trace/events/mmflags.h> |
| 10 | 10 | ||
| 11 | DECLARE_EVENT_CLASS(kmem_alloc, | 11 | DECLARE_EVENT_CLASS(kmem_alloc, |
| 12 | 12 | ||
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h new file mode 100644 index 000000000000..a849185c82f0 --- /dev/null +++ b/include/trace/events/mmflags.h | |||
| @@ -0,0 +1,164 @@ | |||
| 1 | /* | ||
| 2 | * The order of these masks is important. Matching masks will be seen | ||
| 3 | * first and the left over flags will end up showing by themselves. | ||
| 4 | * | ||
| 5 | * For example, if we have GFP_KERNEL before GFP_USER we wil get: | ||
| 6 | * | ||
| 7 | * GFP_KERNEL|GFP_HARDWALL | ||
| 8 | * | ||
| 9 | * Thus most bits set go first. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #define __def_gfpflag_names \ | ||
| 13 | {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ | ||
| 14 | {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ | ||
| 15 | {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ | ||
| 16 | {(unsigned long)GFP_USER, "GFP_USER"}, \ | ||
| 17 | {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ | ||
| 18 | {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ | ||
| 19 | {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ | ||
| 20 | {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ | ||
| 21 | {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
| 22 | {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ | ||
| 23 | {(unsigned long)GFP_NOWAIT, "GFP_NOWAIT"}, \ | ||
| 24 | {(unsigned long)GFP_DMA, "GFP_DMA"}, \ | ||
| 25 | {(unsigned long)__GFP_HIGHMEM, "__GFP_HIGHMEM"}, \ | ||
| 26 | {(unsigned long)GFP_DMA32, "GFP_DMA32"}, \ | ||
| 27 | {(unsigned long)__GFP_HIGH, "__GFP_HIGH"}, \ | ||
| 28 | {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ | ||
| 29 | {(unsigned long)__GFP_IO, "__GFP_IO"}, \ | ||
| 30 | {(unsigned long)__GFP_FS, "__GFP_FS"}, \ | ||
| 31 | {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ | ||
| 32 | {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ | ||
| 33 | {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ | ||
| 34 | {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ | ||
| 35 | {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ | ||
| 36 | {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ | ||
| 37 | {(unsigned long)__GFP_ZERO, "__GFP_ZERO"}, \ | ||
| 38 | {(unsigned long)__GFP_NOMEMALLOC, "__GFP_NOMEMALLOC"}, \ | ||
| 39 | {(unsigned long)__GFP_MEMALLOC, "__GFP_MEMALLOC"}, \ | ||
| 40 | {(unsigned long)__GFP_HARDWALL, "__GFP_HARDWALL"}, \ | ||
| 41 | {(unsigned long)__GFP_THISNODE, "__GFP_THISNODE"}, \ | ||
| 42 | {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ | ||
| 43 | {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ | ||
| 44 | {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ | ||
| 45 | {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ | ||
| 46 | {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ | ||
| 47 | {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ | ||
| 48 | {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ | ||
| 49 | {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ | ||
| 50 | {(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \ | ||
| 51 | |||
| 52 | #define show_gfp_flags(flags) \ | ||
| 53 | (flags) ? __print_flags(flags, "|", \ | ||
| 54 | __def_gfpflag_names \ | ||
| 55 | ) : "none" | ||
| 56 | |||
| 57 | #ifdef CONFIG_MMU | ||
| 58 | #define IF_HAVE_PG_MLOCK(flag,string) ,{1UL << flag, string} | ||
| 59 | #else | ||
| 60 | #define IF_HAVE_PG_MLOCK(flag,string) | ||
| 61 | #endif | ||
| 62 | |||
| 63 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | ||
| 64 | #define IF_HAVE_PG_UNCACHED(flag,string) ,{1UL << flag, string} | ||
| 65 | #else | ||
| 66 | #define IF_HAVE_PG_UNCACHED(flag,string) | ||
| 67 | #endif | ||
| 68 | |||
| 69 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 70 | #define IF_HAVE_PG_HWPOISON(flag,string) ,{1UL << flag, string} | ||
| 71 | #else | ||
| 72 | #define IF_HAVE_PG_HWPOISON(flag,string) | ||
| 73 | #endif | ||
| 74 | |||
| 75 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
| 76 | #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} | ||
| 77 | #else | ||
| 78 | #define IF_HAVE_PG_IDLE(flag,string) | ||
| 79 | #endif | ||
| 80 | |||
| 81 | #define __def_pageflag_names \ | ||
| 82 | {1UL << PG_locked, "locked" }, \ | ||
| 83 | {1UL << PG_error, "error" }, \ | ||
| 84 | {1UL << PG_referenced, "referenced" }, \ | ||
| 85 | {1UL << PG_uptodate, "uptodate" }, \ | ||
| 86 | {1UL << PG_dirty, "dirty" }, \ | ||
| 87 | {1UL << PG_lru, "lru" }, \ | ||
| 88 | {1UL << PG_active, "active" }, \ | ||
| 89 | {1UL << PG_slab, "slab" }, \ | ||
| 90 | {1UL << PG_owner_priv_1, "owner_priv_1" }, \ | ||
| 91 | {1UL << PG_arch_1, "arch_1" }, \ | ||
| 92 | {1UL << PG_reserved, "reserved" }, \ | ||
| 93 | {1UL << PG_private, "private" }, \ | ||
| 94 | {1UL << PG_private_2, "private_2" }, \ | ||
| 95 | {1UL << PG_writeback, "writeback" }, \ | ||
| 96 | {1UL << PG_head, "head" }, \ | ||
| 97 | {1UL << PG_swapcache, "swapcache" }, \ | ||
| 98 | {1UL << PG_mappedtodisk, "mappedtodisk" }, \ | ||
| 99 | {1UL << PG_reclaim, "reclaim" }, \ | ||
| 100 | {1UL << PG_swapbacked, "swapbacked" }, \ | ||
| 101 | {1UL << PG_unevictable, "unevictable" } \ | ||
| 102 | IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ | ||
| 103 | IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ | ||
| 104 | IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ | ||
| 105 | IF_HAVE_PG_IDLE(PG_young, "young" ) \ | ||
| 106 | IF_HAVE_PG_IDLE(PG_idle, "idle" ) | ||
| 107 | |||
| 108 | #define show_page_flags(flags) \ | ||
| 109 | (flags) ? __print_flags(flags, "|", \ | ||
| 110 | __def_pageflag_names \ | ||
| 111 | ) : "none" | ||
| 112 | |||
| 113 | #if defined(CONFIG_X86) | ||
| 114 | #define __VM_ARCH_SPECIFIC {VM_PAT, "pat" } | ||
| 115 | #elif defined(CONFIG_PPC) | ||
| 116 | #define __VM_ARCH_SPECIFIC {VM_SAO, "sao" } | ||
| 117 | #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) | ||
| 118 | #define __VM_ARCH_SPECIFIC {VM_GROWSUP, "growsup" } | ||
| 119 | #elif !defined(CONFIG_MMU) | ||
| 120 | #define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy" } | ||
| 121 | #else | ||
| 122 | #define __VM_ARCH_SPECIFIC {VM_ARCH_1, "arch_1" } | ||
| 123 | #endif | ||
| 124 | |||
| 125 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
| 126 | #define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name }, | ||
| 127 | #else | ||
| 128 | #define IF_HAVE_VM_SOFTDIRTY(flag,name) | ||
| 129 | #endif | ||
| 130 | |||
| 131 | #define __def_vmaflag_names \ | ||
| 132 | {VM_READ, "read" }, \ | ||
| 133 | {VM_WRITE, "write" }, \ | ||
| 134 | {VM_EXEC, "exec" }, \ | ||
| 135 | {VM_SHARED, "shared" }, \ | ||
| 136 | {VM_MAYREAD, "mayread" }, \ | ||
| 137 | {VM_MAYWRITE, "maywrite" }, \ | ||
| 138 | {VM_MAYEXEC, "mayexec" }, \ | ||
| 139 | {VM_MAYSHARE, "mayshare" }, \ | ||
| 140 | {VM_GROWSDOWN, "growsdown" }, \ | ||
| 141 | {VM_PFNMAP, "pfnmap" }, \ | ||
| 142 | {VM_DENYWRITE, "denywrite" }, \ | ||
| 143 | {VM_LOCKONFAULT, "lockonfault" }, \ | ||
| 144 | {VM_LOCKED, "locked" }, \ | ||
| 145 | {VM_IO, "io" }, \ | ||
| 146 | {VM_SEQ_READ, "seqread" }, \ | ||
| 147 | {VM_RAND_READ, "randread" }, \ | ||
| 148 | {VM_DONTCOPY, "dontcopy" }, \ | ||
| 149 | {VM_DONTEXPAND, "dontexpand" }, \ | ||
| 150 | {VM_ACCOUNT, "account" }, \ | ||
| 151 | {VM_NORESERVE, "noreserve" }, \ | ||
| 152 | {VM_HUGETLB, "hugetlb" }, \ | ||
| 153 | __VM_ARCH_SPECIFIC , \ | ||
| 154 | {VM_DONTDUMP, "dontdump" }, \ | ||
| 155 | IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ | ||
| 156 | {VM_MIXEDMAP, "mixedmap" }, \ | ||
| 157 | {VM_HUGEPAGE, "hugepage" }, \ | ||
| 158 | {VM_NOHUGEPAGE, "nohugepage" }, \ | ||
| 159 | {VM_MERGEABLE, "mergeable" } \ | ||
| 160 | |||
| 161 | #define show_vma_flags(flags) \ | ||
| 162 | (flags) ? __print_flags(flags, "|", \ | ||
| 163 | __def_vmaflag_names \ | ||
| 164 | ) : "none" | ||
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 31763dd8db1c..0101ef37f1ee 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
| 9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
| 10 | #include <linux/memcontrol.h> | 10 | #include <linux/memcontrol.h> |
| 11 | #include <trace/events/gfpflags.h> | 11 | #include <trace/events/mmflags.h> |
| 12 | 12 | ||
| 13 | #define RECLAIM_WB_ANON 0x0001u | 13 | #define RECLAIM_WB_ANON 0x0001u |
| 14 | #define RECLAIM_WB_FILE 0x0002u | 14 | #define RECLAIM_WB_FILE 0x0002u |
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index bb991dfe134f..9175a1b4dc69 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h | |||
| @@ -1,7 +1,4 @@ | |||
| 1 | /* -*- linux-c -*- ------------------------------------------------------- * | 1 | /* |
| 2 | * | ||
| 3 | * linux/include/linux/auto_fs.h | ||
| 4 | * | ||
| 5 | * Copyright 1997 Transmeta Corporation - All Rights Reserved | 2 | * Copyright 1997 Transmeta Corporation - All Rights Reserved |
| 6 | * | 3 | * |
| 7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
| @@ -51,7 +48,7 @@ struct autofs_packet_hdr { | |||
| 51 | 48 | ||
| 52 | struct autofs_packet_missing { | 49 | struct autofs_packet_missing { |
| 53 | struct autofs_packet_hdr hdr; | 50 | struct autofs_packet_hdr hdr; |
| 54 | autofs_wqt_t wait_queue_token; | 51 | autofs_wqt_t wait_queue_token; |
| 55 | int len; | 52 | int len; |
| 56 | char name[NAME_MAX+1]; | 53 | char name[NAME_MAX+1]; |
| 57 | }; | 54 | }; |
| @@ -63,12 +60,12 @@ struct autofs_packet_expire { | |||
| 63 | char name[NAME_MAX+1]; | 60 | char name[NAME_MAX+1]; |
| 64 | }; | 61 | }; |
| 65 | 62 | ||
| 66 | #define AUTOFS_IOC_READY _IO(0x93,0x60) | 63 | #define AUTOFS_IOC_READY _IO(0x93, 0x60) |
| 67 | #define AUTOFS_IOC_FAIL _IO(0x93,0x61) | 64 | #define AUTOFS_IOC_FAIL _IO(0x93, 0x61) |
| 68 | #define AUTOFS_IOC_CATATONIC _IO(0x93,0x62) | 65 | #define AUTOFS_IOC_CATATONIC _IO(0x93, 0x62) |
| 69 | #define AUTOFS_IOC_PROTOVER _IOR(0x93,0x63,int) | 66 | #define AUTOFS_IOC_PROTOVER _IOR(0x93, 0x63, int) |
| 70 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,compat_ulong_t) | 67 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t) |
| 71 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93,0x64,unsigned long) | 68 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long) |
| 72 | #define AUTOFS_IOC_EXPIRE _IOR(0x93,0x65,struct autofs_packet_expire) | 69 | #define AUTOFS_IOC_EXPIRE _IOR(0x93, 0x65, struct autofs_packet_expire) |
| 73 | 70 | ||
| 74 | #endif /* _UAPI_LINUX_AUTO_FS_H */ | 71 | #endif /* _UAPI_LINUX_AUTO_FS_H */ |
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index e02982fa2953..8f8f1bdcca8c 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | /* -*- c -*- | 1 | /* |
| 2 | * linux/include/linux/auto_fs4.h | ||
| 3 | * | ||
| 4 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | 2 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
| 5 | * | 3 | * |
| 6 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
| @@ -38,7 +36,6 @@ | |||
| 38 | static inline void set_autofs_type_indirect(unsigned int *type) | 36 | static inline void set_autofs_type_indirect(unsigned int *type) |
| 39 | { | 37 | { |
| 40 | *type = AUTOFS_TYPE_INDIRECT; | 38 | *type = AUTOFS_TYPE_INDIRECT; |
| 41 | return; | ||
| 42 | } | 39 | } |
| 43 | 40 | ||
| 44 | static inline unsigned int autofs_type_indirect(unsigned int type) | 41 | static inline unsigned int autofs_type_indirect(unsigned int type) |
| @@ -49,7 +46,6 @@ static inline unsigned int autofs_type_indirect(unsigned int type) | |||
| 49 | static inline void set_autofs_type_direct(unsigned int *type) | 46 | static inline void set_autofs_type_direct(unsigned int *type) |
| 50 | { | 47 | { |
| 51 | *type = AUTOFS_TYPE_DIRECT; | 48 | *type = AUTOFS_TYPE_DIRECT; |
| 52 | return; | ||
| 53 | } | 49 | } |
| 54 | 50 | ||
| 55 | static inline unsigned int autofs_type_direct(unsigned int type) | 51 | static inline unsigned int autofs_type_direct(unsigned int type) |
| @@ -60,7 +56,6 @@ static inline unsigned int autofs_type_direct(unsigned int type) | |||
| 60 | static inline void set_autofs_type_offset(unsigned int *type) | 56 | static inline void set_autofs_type_offset(unsigned int *type) |
| 61 | { | 57 | { |
| 62 | *type = AUTOFS_TYPE_OFFSET; | 58 | *type = AUTOFS_TYPE_OFFSET; |
| 63 | return; | ||
| 64 | } | 59 | } |
| 65 | 60 | ||
| 66 | static inline unsigned int autofs_type_offset(unsigned int type) | 61 | static inline unsigned int autofs_type_offset(unsigned int type) |
| @@ -81,7 +76,6 @@ static inline unsigned int autofs_type_trigger(unsigned int type) | |||
| 81 | static inline void set_autofs_type_any(unsigned int *type) | 76 | static inline void set_autofs_type_any(unsigned int *type) |
| 82 | { | 77 | { |
| 83 | *type = AUTOFS_TYPE_ANY; | 78 | *type = AUTOFS_TYPE_ANY; |
| 84 | return; | ||
| 85 | } | 79 | } |
| 86 | 80 | ||
| 87 | static inline unsigned int autofs_type_any(unsigned int type) | 81 | static inline unsigned int autofs_type_any(unsigned int type) |
| @@ -114,7 +108,7 @@ enum autofs_notify { | |||
| 114 | /* v4 multi expire (via pipe) */ | 108 | /* v4 multi expire (via pipe) */ |
| 115 | struct autofs_packet_expire_multi { | 109 | struct autofs_packet_expire_multi { |
| 116 | struct autofs_packet_hdr hdr; | 110 | struct autofs_packet_hdr hdr; |
| 117 | autofs_wqt_t wait_queue_token; | 111 | autofs_wqt_t wait_queue_token; |
| 118 | int len; | 112 | int len; |
| 119 | char name[NAME_MAX+1]; | 113 | char name[NAME_MAX+1]; |
| 120 | }; | 114 | }; |
| @@ -154,11 +148,10 @@ union autofs_v5_packet_union { | |||
| 154 | autofs_packet_expire_direct_t expire_direct; | 148 | autofs_packet_expire_direct_t expire_direct; |
| 155 | }; | 149 | }; |
| 156 | 150 | ||
| 157 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) | 151 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93, 0x66, int) |
| 158 | #define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI | 152 | #define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI |
| 159 | #define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI | 153 | #define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI |
| 160 | #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) | 154 | #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93, 0x67, int) |
| 161 | #define AUTOFS_IOC_ASKUMOUNT _IOR(0x93,0x70,int) | 155 | #define AUTOFS_IOC_ASKUMOUNT _IOR(0x93, 0x70, int) |
| 162 | |||
| 163 | 156 | ||
| 164 | #endif /* _LINUX_AUTO_FS4_H */ | 157 | #endif /* _LINUX_AUTO_FS4_H */ |
diff --git a/init/Kconfig b/init/Kconfig index 22320804fbaf..fd664b3ab99e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -1420,6 +1420,28 @@ config KALLSYMS_ALL | |||
| 1420 | 1420 | ||
| 1421 | Say N unless you really need all symbols. | 1421 | Say N unless you really need all symbols. |
| 1422 | 1422 | ||
| 1423 | config KALLSYMS_ABSOLUTE_PERCPU | ||
| 1424 | bool | ||
| 1425 | default X86_64 && SMP | ||
| 1426 | |||
| 1427 | config KALLSYMS_BASE_RELATIVE | ||
| 1428 | bool | ||
| 1429 | depends on KALLSYMS | ||
| 1430 | default !IA64 && !(TILE && 64BIT) | ||
| 1431 | help | ||
| 1432 | Instead of emitting them as absolute values in the native word size, | ||
| 1433 | emit the symbol references in the kallsyms table as 32-bit entries, | ||
| 1434 | each containing a relative value in the range [base, base + U32_MAX] | ||
| 1435 | or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either | ||
| 1436 | an absolute value in the range [0, S32_MAX] or a relative value in the | ||
| 1437 | range [base, base + S32_MAX], where base is the lowest relative symbol | ||
| 1438 | address encountered in the image. | ||
| 1439 | |||
| 1440 | On 64-bit builds, this reduces the size of the address table by 50%, | ||
| 1441 | but more importantly, it results in entries whose values are build | ||
| 1442 | time constants, and no relocation pass is required at runtime to fix | ||
| 1443 | up the entries based on the runtime load address of the kernel. | ||
| 1444 | |||
| 1423 | config PRINTK | 1445 | config PRINTK |
| 1424 | default y | 1446 | default y |
| 1425 | bool "Enable support for printk" if EXPERT | 1447 | bool "Enable support for printk" if EXPERT |
diff --git a/init/main.c b/init/main.c index 8dc93df20f7f..b3c6e363ae18 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -705,7 +705,6 @@ static int __init initcall_blacklist(char *str) | |||
| 705 | 705 | ||
| 706 | static bool __init_or_module initcall_blacklisted(initcall_t fn) | 706 | static bool __init_or_module initcall_blacklisted(initcall_t fn) |
| 707 | { | 707 | { |
| 708 | struct list_head *tmp; | ||
| 709 | struct blacklist_entry *entry; | 708 | struct blacklist_entry *entry; |
| 710 | char *fn_name; | 709 | char *fn_name; |
| 711 | 710 | ||
| @@ -713,8 +712,7 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) | |||
| 713 | if (!fn_name) | 712 | if (!fn_name) |
| 714 | return false; | 713 | return false; |
| 715 | 714 | ||
| 716 | list_for_each(tmp, &blacklisted_initcalls) { | 715 | list_for_each_entry(entry, &blacklisted_initcalls, next) { |
| 717 | entry = list_entry(tmp, struct blacklist_entry, next); | ||
| 718 | if (!strcmp(fn_name, entry->buf)) { | 716 | if (!strcmp(fn_name, entry->buf)) { |
| 719 | pr_debug("initcall %s blacklisted\n", fn_name); | 717 | pr_debug("initcall %s blacklisted\n", fn_name); |
| 720 | kfree(fn_name); | 718 | kfree(fn_name); |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 5c5987f10819..fafd1a3ef0da 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | * during the second link stage. | 38 | * during the second link stage. |
| 39 | */ | 39 | */ |
| 40 | extern const unsigned long kallsyms_addresses[] __weak; | 40 | extern const unsigned long kallsyms_addresses[] __weak; |
| 41 | extern const int kallsyms_offsets[] __weak; | ||
| 41 | extern const u8 kallsyms_names[] __weak; | 42 | extern const u8 kallsyms_names[] __weak; |
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| @@ -47,6 +48,9 @@ extern const u8 kallsyms_names[] __weak; | |||
| 47 | extern const unsigned long kallsyms_num_syms | 48 | extern const unsigned long kallsyms_num_syms |
| 48 | __attribute__((weak, section(".rodata"))); | 49 | __attribute__((weak, section(".rodata"))); |
| 49 | 50 | ||
| 51 | extern const unsigned long kallsyms_relative_base | ||
| 52 | __attribute__((weak, section(".rodata"))); | ||
| 53 | |||
| 50 | extern const u8 kallsyms_token_table[] __weak; | 54 | extern const u8 kallsyms_token_table[] __weak; |
| 51 | extern const u16 kallsyms_token_index[] __weak; | 55 | extern const u16 kallsyms_token_index[] __weak; |
| 52 | 56 | ||
| @@ -176,6 +180,23 @@ static unsigned int get_symbol_offset(unsigned long pos) | |||
| 176 | return name - kallsyms_names; | 180 | return name - kallsyms_names; |
| 177 | } | 181 | } |
| 178 | 182 | ||
| 183 | static unsigned long kallsyms_sym_address(int idx) | ||
| 184 | { | ||
| 185 | if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) | ||
| 186 | return kallsyms_addresses[idx]; | ||
| 187 | |||
| 188 | /* values are unsigned offsets if --absolute-percpu is not in effect */ | ||
| 189 | if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU)) | ||
| 190 | return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; | ||
| 191 | |||
| 192 | /* ...otherwise, positive offsets are absolute values */ | ||
| 193 | if (kallsyms_offsets[idx] >= 0) | ||
| 194 | return kallsyms_offsets[idx]; | ||
| 195 | |||
| 196 | /* ...and negative offsets are relative to kallsyms_relative_base - 1 */ | ||
| 197 | return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; | ||
| 198 | } | ||
| 199 | |||
| 179 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 200 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
| 180 | unsigned long kallsyms_lookup_name(const char *name) | 201 | unsigned long kallsyms_lookup_name(const char *name) |
| 181 | { | 202 | { |
| @@ -187,7 +208,7 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
| 187 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); | 208 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); |
| 188 | 209 | ||
| 189 | if (strcmp(namebuf, name) == 0) | 210 | if (strcmp(namebuf, name) == 0) |
| 190 | return kallsyms_addresses[i]; | 211 | return kallsyms_sym_address(i); |
| 191 | } | 212 | } |
| 192 | return module_kallsyms_lookup_name(name); | 213 | return module_kallsyms_lookup_name(name); |
| 193 | } | 214 | } |
| @@ -204,7 +225,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, | |||
| 204 | 225 | ||
| 205 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { | 226 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { |
| 206 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); | 227 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); |
| 207 | ret = fn(data, namebuf, NULL, kallsyms_addresses[i]); | 228 | ret = fn(data, namebuf, NULL, kallsyms_sym_address(i)); |
| 208 | if (ret != 0) | 229 | if (ret != 0) |
| 209 | return ret; | 230 | return ret; |
| 210 | } | 231 | } |
| @@ -220,7 +241,10 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
| 220 | unsigned long i, low, high, mid; | 241 | unsigned long i, low, high, mid; |
| 221 | 242 | ||
| 222 | /* This kernel should never had been booted. */ | 243 | /* This kernel should never had been booted. */ |
| 223 | BUG_ON(!kallsyms_addresses); | 244 | if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) |
| 245 | BUG_ON(!kallsyms_addresses); | ||
| 246 | else | ||
| 247 | BUG_ON(!kallsyms_offsets); | ||
| 224 | 248 | ||
| 225 | /* Do a binary search on the sorted kallsyms_addresses array. */ | 249 | /* Do a binary search on the sorted kallsyms_addresses array. */ |
| 226 | low = 0; | 250 | low = 0; |
| @@ -228,7 +252,7 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
| 228 | 252 | ||
| 229 | while (high - low > 1) { | 253 | while (high - low > 1) { |
| 230 | mid = low + (high - low) / 2; | 254 | mid = low + (high - low) / 2; |
| 231 | if (kallsyms_addresses[mid] <= addr) | 255 | if (kallsyms_sym_address(mid) <= addr) |
| 232 | low = mid; | 256 | low = mid; |
| 233 | else | 257 | else |
| 234 | high = mid; | 258 | high = mid; |
| @@ -238,15 +262,15 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
| 238 | * Search for the first aliased symbol. Aliased | 262 | * Search for the first aliased symbol. Aliased |
| 239 | * symbols are symbols with the same address. | 263 | * symbols are symbols with the same address. |
| 240 | */ | 264 | */ |
| 241 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) | 265 | while (low && kallsyms_sym_address(low-1) == kallsyms_sym_address(low)) |
| 242 | --low; | 266 | --low; |
| 243 | 267 | ||
| 244 | symbol_start = kallsyms_addresses[low]; | 268 | symbol_start = kallsyms_sym_address(low); |
| 245 | 269 | ||
| 246 | /* Search for next non-aliased symbol. */ | 270 | /* Search for next non-aliased symbol. */ |
| 247 | for (i = low + 1; i < kallsyms_num_syms; i++) { | 271 | for (i = low + 1; i < kallsyms_num_syms; i++) { |
| 248 | if (kallsyms_addresses[i] > symbol_start) { | 272 | if (kallsyms_sym_address(i) > symbol_start) { |
| 249 | symbol_end = kallsyms_addresses[i]; | 273 | symbol_end = kallsyms_sym_address(i); |
| 250 | break; | 274 | break; |
| 251 | } | 275 | } |
| 252 | } | 276 | } |
| @@ -470,7 +494,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) | |||
| 470 | unsigned off = iter->nameoff; | 494 | unsigned off = iter->nameoff; |
| 471 | 495 | ||
| 472 | iter->module_name[0] = '\0'; | 496 | iter->module_name[0] = '\0'; |
| 473 | iter->value = kallsyms_addresses[iter->pos]; | 497 | iter->value = kallsyms_sym_address(iter->pos); |
| 474 | 498 | ||
| 475 | iter->type = kallsyms_get_symbol_type(off); | 499 | iter->type = kallsyms_get_symbol_type(off); |
| 476 | 500 | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f894a2cd9b2a..53ab2f85d77e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -148,8 +148,7 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) | |||
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | #ifdef CONFIG_LOCK_STAT | 150 | #ifdef CONFIG_LOCK_STAT |
| 151 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], | 151 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats); |
| 152 | cpu_lock_stats); | ||
| 153 | 152 | ||
| 154 | static inline u64 lockstat_clock(void) | 153 | static inline u64 lockstat_clock(void) |
| 155 | { | 154 | { |
diff --git a/kernel/memremap.c b/kernel/memremap.c index fb9b88787ebc..584febd13e2e 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
| @@ -391,7 +391,7 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) | |||
| 391 | /* | 391 | /* |
| 392 | * 'memmap_start' is the virtual address for the first "struct | 392 | * 'memmap_start' is the virtual address for the first "struct |
| 393 | * page" in this range of the vmemmap array. In the case of | 393 | * page" in this range of the vmemmap array. In the case of |
| 394 | * CONFIG_SPARSE_VMEMMAP a page_to_pfn conversion is simple | 394 | * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple |
| 395 | * pointer arithmetic, so we can perform this to_vmem_altmap() | 395 | * pointer arithmetic, so we can perform this to_vmem_altmap() |
| 396 | * conversion without concern for the initialization state of | 396 | * conversion without concern for the initialization state of |
| 397 | * the struct page fields. | 397 | * the struct page fields. |
| @@ -400,7 +400,7 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) | |||
| 400 | struct dev_pagemap *pgmap; | 400 | struct dev_pagemap *pgmap; |
| 401 | 401 | ||
| 402 | /* | 402 | /* |
| 403 | * Uncoditionally retrieve a dev_pagemap associated with the | 403 | * Unconditionally retrieve a dev_pagemap associated with the |
| 404 | * given physical address, this is only for use in the | 404 | * given physical address, this is only for use in the |
| 405 | * arch_{add|remove}_memory() for setting up and tearing down | 405 | * arch_{add|remove}_memory() for setting up and tearing down |
| 406 | * the memmap. | 406 | * the memmap. |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index b7342a24f559..aa0f26b58426 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
| @@ -1158,6 +1158,22 @@ static int __init kaslr_nohibernate_setup(char *str) | |||
| 1158 | return nohibernate_setup(str); | 1158 | return nohibernate_setup(str); |
| 1159 | } | 1159 | } |
| 1160 | 1160 | ||
| 1161 | static int __init page_poison_nohibernate_setup(char *str) | ||
| 1162 | { | ||
| 1163 | #ifdef CONFIG_PAGE_POISONING_ZERO | ||
| 1164 | /* | ||
| 1165 | * The zeroing option for page poison skips the checks on alloc. | ||
| 1166 | * since hibernation doesn't save free pages there's no way to | ||
| 1167 | * guarantee the pages will still be zeroed. | ||
| 1168 | */ | ||
| 1169 | if (!strcmp(str, "on")) { | ||
| 1170 | pr_info("Disabling hibernation due to page poisoning\n"); | ||
| 1171 | return nohibernate_setup(str); | ||
| 1172 | } | ||
| 1173 | #endif | ||
| 1174 | return 1; | ||
| 1175 | } | ||
| 1176 | |||
| 1161 | __setup("noresume", noresume_setup); | 1177 | __setup("noresume", noresume_setup); |
| 1162 | __setup("resume_offset=", resume_offset_setup); | 1178 | __setup("resume_offset=", resume_offset_setup); |
| 1163 | __setup("resume=", resume_setup); | 1179 | __setup("resume=", resume_setup); |
| @@ -1166,3 +1182,4 @@ __setup("resumewait", resumewait_setup); | |||
| 1166 | __setup("resumedelay=", resumedelay_setup); | 1182 | __setup("resumedelay=", resumedelay_setup); |
| 1167 | __setup("nohibernate", nohibernate_setup); | 1183 | __setup("nohibernate", nohibernate_setup); |
| 1168 | __setup("kaslr", kaslr_nohibernate_setup); | 1184 | __setup("kaslr", kaslr_nohibernate_setup); |
| 1185 | __setup("page_poison=", page_poison_nohibernate_setup); | ||
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 65ae0e5c35da..250ea67c1615 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
| @@ -130,10 +130,8 @@ static struct rcu_torture __rcu *rcu_torture_current; | |||
| 130 | static unsigned long rcu_torture_current_version; | 130 | static unsigned long rcu_torture_current_version; |
| 131 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 131 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
| 132 | static DEFINE_SPINLOCK(rcu_torture_lock); | 132 | static DEFINE_SPINLOCK(rcu_torture_lock); |
| 133 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], | 133 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 }; |
| 134 | rcu_torture_count) = { 0 }; | 134 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 }; |
| 135 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], | ||
| 136 | rcu_torture_batch) = { 0 }; | ||
| 137 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; | 135 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; |
| 138 | static atomic_t n_rcu_torture_alloc; | 136 | static atomic_t n_rcu_torture_alloc; |
| 139 | static atomic_t n_rcu_torture_alloc_fail; | 137 | static atomic_t n_rcu_torture_alloc_fail; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7ff5dc7d2ac5..16e13d8628a3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -320,8 +320,7 @@ static bool wq_debug_force_rr_cpu = false; | |||
| 320 | module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); | 320 | module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); |
| 321 | 321 | ||
| 322 | /* the per-cpu worker pools */ | 322 | /* the per-cpu worker pools */ |
| 323 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], | 323 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools); |
| 324 | cpu_worker_pools); | ||
| 325 | 324 | ||
| 326 | static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ | 325 | static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ |
| 327 | 326 | ||
diff --git a/lib/test_printf.c b/lib/test_printf.c index 4f6ae60433bc..563f10e6876a 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c | |||
| @@ -17,6 +17,9 @@ | |||
| 17 | #include <linux/socket.h> | 17 | #include <linux/socket.h> |
| 18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
| 19 | 19 | ||
| 20 | #include <linux/gfp.h> | ||
| 21 | #include <linux/mm.h> | ||
| 22 | |||
| 20 | #define BUF_SIZE 256 | 23 | #define BUF_SIZE 256 |
| 21 | #define PAD_SIZE 16 | 24 | #define PAD_SIZE 16 |
| 22 | #define FILL_CHAR '$' | 25 | #define FILL_CHAR '$' |
| @@ -411,6 +414,55 @@ netdev_features(void) | |||
| 411 | } | 414 | } |
| 412 | 415 | ||
| 413 | static void __init | 416 | static void __init |
| 417 | flags(void) | ||
| 418 | { | ||
| 419 | unsigned long flags; | ||
| 420 | gfp_t gfp; | ||
| 421 | char *cmp_buffer; | ||
| 422 | |||
| 423 | flags = 0; | ||
| 424 | test("", "%pGp", &flags); | ||
| 425 | |||
| 426 | /* Page flags should filter the zone id */ | ||
| 427 | flags = 1UL << NR_PAGEFLAGS; | ||
| 428 | test("", "%pGp", &flags); | ||
| 429 | |||
| 430 | flags |= 1UL << PG_uptodate | 1UL << PG_dirty | 1UL << PG_lru | ||
| 431 | | 1UL << PG_active | 1UL << PG_swapbacked; | ||
| 432 | test("uptodate|dirty|lru|active|swapbacked", "%pGp", &flags); | ||
| 433 | |||
| 434 | |||
| 435 | flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | ||
| 436 | | VM_DENYWRITE; | ||
| 437 | test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags); | ||
| 438 | |||
| 439 | gfp = GFP_TRANSHUGE; | ||
| 440 | test("GFP_TRANSHUGE", "%pGg", &gfp); | ||
| 441 | |||
| 442 | gfp = GFP_ATOMIC|__GFP_DMA; | ||
| 443 | test("GFP_ATOMIC|GFP_DMA", "%pGg", &gfp); | ||
| 444 | |||
| 445 | gfp = __GFP_ATOMIC; | ||
| 446 | test("__GFP_ATOMIC", "%pGg", &gfp); | ||
| 447 | |||
| 448 | cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); | ||
| 449 | if (!cmp_buffer) | ||
| 450 | return; | ||
| 451 | |||
| 452 | /* Any flags not translated by the table should remain numeric */ | ||
| 453 | gfp = ~__GFP_BITS_MASK; | ||
| 454 | snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp); | ||
| 455 | test(cmp_buffer, "%pGg", &gfp); | ||
| 456 | |||
| 457 | snprintf(cmp_buffer, BUF_SIZE, "__GFP_ATOMIC|%#lx", | ||
| 458 | (unsigned long) gfp); | ||
| 459 | gfp |= __GFP_ATOMIC; | ||
| 460 | test(cmp_buffer, "%pGg", &gfp); | ||
| 461 | |||
| 462 | kfree(cmp_buffer); | ||
| 463 | } | ||
| 464 | |||
| 465 | static void __init | ||
| 414 | test_pointer(void) | 466 | test_pointer(void) |
| 415 | { | 467 | { |
| 416 | plain(); | 468 | plain(); |
| @@ -428,6 +480,7 @@ test_pointer(void) | |||
| 428 | struct_clk(); | 480 | struct_clk(); |
| 429 | bitmap(); | 481 | bitmap(); |
| 430 | netdev_features(); | 482 | netdev_features(); |
| 483 | flags(); | ||
| 431 | } | 484 | } |
| 432 | 485 | ||
| 433 | static int __init | 486 | static int __init |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f44e178e6ede..525c8e19bda2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
| @@ -35,6 +35,8 @@ | |||
| 35 | #include <linux/blkdev.h> | 35 | #include <linux/blkdev.h> |
| 36 | #endif | 36 | #endif |
| 37 | 37 | ||
| 38 | #include "../mm/internal.h" /* For the trace_print_flags arrays */ | ||
| 39 | |||
| 38 | #include <asm/page.h> /* for PAGE_SIZE */ | 40 | #include <asm/page.h> /* for PAGE_SIZE */ |
| 39 | #include <asm/sections.h> /* for dereference_function_descriptor() */ | 41 | #include <asm/sections.h> /* for dereference_function_descriptor() */ |
| 40 | #include <asm/byteorder.h> /* cpu_to_le16 */ | 42 | #include <asm/byteorder.h> /* cpu_to_le16 */ |
| @@ -1407,6 +1409,72 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, | |||
| 1407 | } | 1409 | } |
| 1408 | } | 1410 | } |
| 1409 | 1411 | ||
| 1412 | static | ||
| 1413 | char *format_flags(char *buf, char *end, unsigned long flags, | ||
| 1414 | const struct trace_print_flags *names) | ||
| 1415 | { | ||
| 1416 | unsigned long mask; | ||
| 1417 | const struct printf_spec strspec = { | ||
| 1418 | .field_width = -1, | ||
| 1419 | .precision = -1, | ||
| 1420 | }; | ||
| 1421 | const struct printf_spec numspec = { | ||
| 1422 | .flags = SPECIAL|SMALL, | ||
| 1423 | .field_width = -1, | ||
| 1424 | .precision = -1, | ||
| 1425 | .base = 16, | ||
| 1426 | }; | ||
| 1427 | |||
| 1428 | for ( ; flags && names->name; names++) { | ||
| 1429 | mask = names->mask; | ||
| 1430 | if ((flags & mask) != mask) | ||
| 1431 | continue; | ||
| 1432 | |||
| 1433 | buf = string(buf, end, names->name, strspec); | ||
| 1434 | |||
| 1435 | flags &= ~mask; | ||
| 1436 | if (flags) { | ||
| 1437 | if (buf < end) | ||
| 1438 | *buf = '|'; | ||
| 1439 | buf++; | ||
| 1440 | } | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | if (flags) | ||
| 1444 | buf = number(buf, end, flags, numspec); | ||
| 1445 | |||
| 1446 | return buf; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | static noinline_for_stack | ||
| 1450 | char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) | ||
| 1451 | { | ||
| 1452 | unsigned long flags; | ||
| 1453 | const struct trace_print_flags *names; | ||
| 1454 | |||
| 1455 | switch (fmt[1]) { | ||
| 1456 | case 'p': | ||
| 1457 | flags = *(unsigned long *)flags_ptr; | ||
| 1458 | /* Remove zone id */ | ||
| 1459 | flags &= (1UL << NR_PAGEFLAGS) - 1; | ||
| 1460 | names = pageflag_names; | ||
| 1461 | break; | ||
| 1462 | case 'v': | ||
| 1463 | flags = *(unsigned long *)flags_ptr; | ||
| 1464 | names = vmaflag_names; | ||
| 1465 | break; | ||
| 1466 | case 'g': | ||
| 1467 | flags = *(gfp_t *)flags_ptr; | ||
| 1468 | names = gfpflag_names; | ||
| 1469 | break; | ||
| 1470 | default: | ||
| 1471 | WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]); | ||
| 1472 | return buf; | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | return format_flags(buf, end, flags, names); | ||
| 1476 | } | ||
| 1477 | |||
| 1410 | int kptr_restrict __read_mostly; | 1478 | int kptr_restrict __read_mostly; |
| 1411 | 1479 | ||
| 1412 | /* | 1480 | /* |
| @@ -1495,6 +1563,11 @@ int kptr_restrict __read_mostly; | |||
| 1495 | * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address | 1563 | * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address |
| 1496 | * (legacy clock framework) of the clock | 1564 | * (legacy clock framework) of the clock |
| 1497 | * - 'Cr' For a clock, it prints the current rate of the clock | 1565 | * - 'Cr' For a clock, it prints the current rate of the clock |
| 1566 | * - 'G' For flags to be printed as a collection of symbolic strings that would | ||
| 1567 | * construct the specific value. Supported flags given by option: | ||
| 1568 | * p page flags (see struct page) given as pointer to unsigned long | ||
| 1569 | * g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t | ||
| 1570 | * v vma flags (VM_*) given as pointer to unsigned long | ||
| 1498 | * | 1571 | * |
| 1499 | * ** Please update also Documentation/printk-formats.txt when making changes ** | 1572 | * ** Please update also Documentation/printk-formats.txt when making changes ** |
| 1500 | * | 1573 | * |
| @@ -1648,6 +1721,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, | |||
| 1648 | return bdev_name(buf, end, ptr, spec, fmt); | 1721 | return bdev_name(buf, end, ptr, spec, fmt); |
| 1649 | #endif | 1722 | #endif |
| 1650 | 1723 | ||
| 1724 | case 'G': | ||
| 1725 | return flags_string(buf, end, ptr, fmt); | ||
| 1651 | } | 1726 | } |
| 1652 | spec.flags |= SMALL; | 1727 | spec.flags |= SMALL; |
| 1653 | if (spec.field_width == -1) { | 1728 | if (spec.field_width == -1) { |
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 957d3da53ddd..5c50b238b770 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
| @@ -16,8 +16,8 @@ config DEBUG_PAGEALLOC | |||
| 16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC | 16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC |
| 17 | ---help--- | 17 | ---help--- |
| 18 | Unmap pages from the kernel linear mapping after free_pages(). | 18 | Unmap pages from the kernel linear mapping after free_pages(). |
| 19 | This results in a large slowdown, but helps to find certain types | 19 | Depending on runtime enablement, this results in a small or large |
| 20 | of memory corruption. | 20 | slowdown, but helps to find certain types of memory corruption. |
| 21 | 21 | ||
| 22 | For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC, | 22 | For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC, |
| 23 | fill the pages with poison patterns after free_pages() and verify | 23 | fill the pages with poison patterns after free_pages() and verify |
| @@ -26,5 +26,56 @@ config DEBUG_PAGEALLOC | |||
| 26 | that would result in incorrect warnings of memory corruption after | 26 | that would result in incorrect warnings of memory corruption after |
| 27 | a resume because free pages are not saved to the suspend image. | 27 | a resume because free pages are not saved to the suspend image. |
| 28 | 28 | ||
| 29 | By default this option will have a small overhead, e.g. by not | ||
| 30 | allowing the kernel mapping to be backed by large pages on some | ||
| 31 | architectures. Even bigger overhead comes when the debugging is | ||
| 32 | enabled by DEBUG_PAGEALLOC_ENABLE_DEFAULT or the debug_pagealloc | ||
| 33 | command line parameter. | ||
| 34 | |||
| 35 | config DEBUG_PAGEALLOC_ENABLE_DEFAULT | ||
| 36 | bool "Enable debug page memory allocations by default?" | ||
| 37 | default n | ||
| 38 | depends on DEBUG_PAGEALLOC | ||
| 39 | ---help--- | ||
| 40 | Enable debug page memory allocations by default? This value | ||
| 41 | can be overridden by debug_pagealloc=off|on. | ||
| 42 | |||
| 29 | config PAGE_POISONING | 43 | config PAGE_POISONING |
| 30 | bool | 44 | bool "Poison pages after freeing" |
| 45 | select PAGE_EXTENSION | ||
| 46 | select PAGE_POISONING_NO_SANITY if HIBERNATION | ||
| 47 | ---help--- | ||
| 48 | Fill the pages with poison patterns after free_pages() and verify | ||
| 49 | the patterns before alloc_pages. The filling of the memory helps | ||
| 50 | reduce the risk of information leaks from freed data. This does | ||
| 51 | have a potential performance impact. | ||
| 52 | |||
| 53 | Note that "poison" here is not the same thing as the "HWPoison" | ||
| 54 | for CONFIG_MEMORY_FAILURE. This is software poisoning only. | ||
| 55 | |||
| 56 | If unsure, say N | ||
| 57 | |||
| 58 | config PAGE_POISONING_NO_SANITY | ||
| 59 | depends on PAGE_POISONING | ||
| 60 | bool "Only poison, don't sanity check" | ||
| 61 | ---help--- | ||
| 62 | Skip the sanity checking on alloc, only fill the pages with | ||
| 63 | poison on free. This reduces some of the overhead of the | ||
| 64 | poisoning feature. | ||
| 65 | |||
| 66 | If you are only interested in sanitization, say Y. Otherwise | ||
| 67 | say N. | ||
| 68 | |||
| 69 | config PAGE_POISONING_ZERO | ||
| 70 | bool "Use zero for poisoning instead of random data" | ||
| 71 | depends on PAGE_POISONING | ||
| 72 | ---help--- | ||
| 73 | Instead of using the existing poison value, fill the pages with | ||
| 74 | zeros. This makes it harder to detect when errors are occurring | ||
| 75 | due to sanitization but the zeroing at free means that it is | ||
| 76 | no longer necessary to write zeros when GFP_ZERO is used on | ||
| 77 | allocation. | ||
| 78 | |||
| 79 | Enabling page poisoning with this option will disable hibernation | ||
| 80 | |||
| 81 | If unsure, say N | ||
diff --git a/mm/Makefile b/mm/Makefile index 2ed43191fc3b..cfdd481d27a5 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -48,7 +48,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o | |||
| 48 | obj-$(CONFIG_SLOB) += slob.o | 48 | obj-$(CONFIG_SLOB) += slob.o |
| 49 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | 49 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o |
| 50 | obj-$(CONFIG_KSM) += ksm.o | 50 | obj-$(CONFIG_KSM) += ksm.o |
| 51 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 51 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o |
| 52 | obj-$(CONFIG_SLAB) += slab.o | 52 | obj-$(CONFIG_SLAB) += slab.o |
| 53 | obj-$(CONFIG_SLUB) += slub.o | 53 | obj-$(CONFIG_SLUB) += slub.o |
| 54 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | 54 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o |
diff --git a/mm/compaction.c b/mm/compaction.c index 585de54dbe8c..93f71d968098 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
| @@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype) | |||
| 71 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; | 71 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /* | ||
| 75 | * Check that the whole (or subset of) a pageblock given by the interval of | ||
| 76 | * [start_pfn, end_pfn) is valid and within the same zone, before scanning it | ||
| 77 | * with the migration of free compaction scanner. The scanners then need to | ||
| 78 | * use only pfn_valid_within() check for arches that allow holes within | ||
| 79 | * pageblocks. | ||
| 80 | * | ||
| 81 | * Return struct page pointer of start_pfn, or NULL if checks were not passed. | ||
| 82 | * | ||
| 83 | * It's possible on some configurations to have a setup like node0 node1 node0 | ||
| 84 | * i.e. it's possible that all pages within a zones range of pages do not | ||
| 85 | * belong to a single zone. We assume that a border between node0 and node1 | ||
| 86 | * can occur within a single pageblock, but not a node0 node1 node0 | ||
| 87 | * interleaving within a single pageblock. It is therefore sufficient to check | ||
| 88 | * the first and last page of a pageblock and avoid checking each individual | ||
| 89 | * page in a pageblock. | ||
| 90 | */ | ||
| 91 | static struct page *pageblock_pfn_to_page(unsigned long start_pfn, | ||
| 92 | unsigned long end_pfn, struct zone *zone) | ||
| 93 | { | ||
| 94 | struct page *start_page; | ||
| 95 | struct page *end_page; | ||
| 96 | |||
| 97 | /* end_pfn is one past the range we are checking */ | ||
| 98 | end_pfn--; | ||
| 99 | |||
| 100 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ||
| 101 | return NULL; | ||
| 102 | |||
| 103 | start_page = pfn_to_page(start_pfn); | ||
| 104 | |||
| 105 | if (page_zone(start_page) != zone) | ||
| 106 | return NULL; | ||
| 107 | |||
| 108 | end_page = pfn_to_page(end_pfn); | ||
| 109 | |||
| 110 | /* This gives a shorter code than deriving page_zone(end_page) */ | ||
| 111 | if (page_zone_id(start_page) != page_zone_id(end_page)) | ||
| 112 | return NULL; | ||
| 113 | |||
| 114 | return start_page; | ||
| 115 | } | ||
| 116 | |||
| 117 | #ifdef CONFIG_COMPACTION | 74 | #ifdef CONFIG_COMPACTION |
| 118 | 75 | ||
| 119 | /* Do not skip compaction more than 64 times */ | 76 | /* Do not skip compaction more than 64 times */ |
| @@ -200,7 +157,8 @@ static void reset_cached_positions(struct zone *zone) | |||
| 200 | { | 157 | { |
| 201 | zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; | 158 | zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; |
| 202 | zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; | 159 | zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; |
| 203 | zone->compact_cached_free_pfn = zone_end_pfn(zone); | 160 | zone->compact_cached_free_pfn = |
| 161 | round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages); | ||
| 204 | } | 162 | } |
| 205 | 163 | ||
| 206 | /* | 164 | /* |
| @@ -554,13 +512,17 @@ unsigned long | |||
| 554 | isolate_freepages_range(struct compact_control *cc, | 512 | isolate_freepages_range(struct compact_control *cc, |
| 555 | unsigned long start_pfn, unsigned long end_pfn) | 513 | unsigned long start_pfn, unsigned long end_pfn) |
| 556 | { | 514 | { |
| 557 | unsigned long isolated, pfn, block_end_pfn; | 515 | unsigned long isolated, pfn, block_start_pfn, block_end_pfn; |
| 558 | LIST_HEAD(freelist); | 516 | LIST_HEAD(freelist); |
| 559 | 517 | ||
| 560 | pfn = start_pfn; | 518 | pfn = start_pfn; |
| 519 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
| 520 | if (block_start_pfn < cc->zone->zone_start_pfn) | ||
| 521 | block_start_pfn = cc->zone->zone_start_pfn; | ||
| 561 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 522 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
| 562 | 523 | ||
| 563 | for (; pfn < end_pfn; pfn += isolated, | 524 | for (; pfn < end_pfn; pfn += isolated, |
| 525 | block_start_pfn = block_end_pfn, | ||
| 564 | block_end_pfn += pageblock_nr_pages) { | 526 | block_end_pfn += pageblock_nr_pages) { |
| 565 | /* Protect pfn from changing by isolate_freepages_block */ | 527 | /* Protect pfn from changing by isolate_freepages_block */ |
| 566 | unsigned long isolate_start_pfn = pfn; | 528 | unsigned long isolate_start_pfn = pfn; |
| @@ -573,11 +535,13 @@ isolate_freepages_range(struct compact_control *cc, | |||
| 573 | * scanning range to right one. | 535 | * scanning range to right one. |
| 574 | */ | 536 | */ |
| 575 | if (pfn >= block_end_pfn) { | 537 | if (pfn >= block_end_pfn) { |
| 538 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
| 576 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 539 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
| 577 | block_end_pfn = min(block_end_pfn, end_pfn); | 540 | block_end_pfn = min(block_end_pfn, end_pfn); |
| 578 | } | 541 | } |
| 579 | 542 | ||
| 580 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 543 | if (!pageblock_pfn_to_page(block_start_pfn, |
| 544 | block_end_pfn, cc->zone)) | ||
| 581 | break; | 545 | break; |
| 582 | 546 | ||
| 583 | isolated = isolate_freepages_block(cc, &isolate_start_pfn, | 547 | isolated = isolate_freepages_block(cc, &isolate_start_pfn, |
| @@ -863,18 +827,23 @@ unsigned long | |||
| 863 | isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, | 827 | isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, |
| 864 | unsigned long end_pfn) | 828 | unsigned long end_pfn) |
| 865 | { | 829 | { |
| 866 | unsigned long pfn, block_end_pfn; | 830 | unsigned long pfn, block_start_pfn, block_end_pfn; |
| 867 | 831 | ||
| 868 | /* Scan block by block. First and last block may be incomplete */ | 832 | /* Scan block by block. First and last block may be incomplete */ |
| 869 | pfn = start_pfn; | 833 | pfn = start_pfn; |
| 834 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
| 835 | if (block_start_pfn < cc->zone->zone_start_pfn) | ||
| 836 | block_start_pfn = cc->zone->zone_start_pfn; | ||
| 870 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 837 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
| 871 | 838 | ||
| 872 | for (; pfn < end_pfn; pfn = block_end_pfn, | 839 | for (; pfn < end_pfn; pfn = block_end_pfn, |
| 840 | block_start_pfn = block_end_pfn, | ||
| 873 | block_end_pfn += pageblock_nr_pages) { | 841 | block_end_pfn += pageblock_nr_pages) { |
| 874 | 842 | ||
| 875 | block_end_pfn = min(block_end_pfn, end_pfn); | 843 | block_end_pfn = min(block_end_pfn, end_pfn); |
| 876 | 844 | ||
| 877 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 845 | if (!pageblock_pfn_to_page(block_start_pfn, |
| 846 | block_end_pfn, cc->zone)) | ||
| 878 | continue; | 847 | continue; |
| 879 | 848 | ||
| 880 | pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, | 849 | pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, |
| @@ -1103,7 +1072,9 @@ int sysctl_compact_unevictable_allowed __read_mostly = 1; | |||
| 1103 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | 1072 | static isolate_migrate_t isolate_migratepages(struct zone *zone, |
| 1104 | struct compact_control *cc) | 1073 | struct compact_control *cc) |
| 1105 | { | 1074 | { |
| 1106 | unsigned long low_pfn, end_pfn; | 1075 | unsigned long block_start_pfn; |
| 1076 | unsigned long block_end_pfn; | ||
| 1077 | unsigned long low_pfn; | ||
| 1107 | unsigned long isolate_start_pfn; | 1078 | unsigned long isolate_start_pfn; |
| 1108 | struct page *page; | 1079 | struct page *page; |
| 1109 | const isolate_mode_t isolate_mode = | 1080 | const isolate_mode_t isolate_mode = |
| @@ -1115,16 +1086,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 1115 | * initialized by compact_zone() | 1086 | * initialized by compact_zone() |
| 1116 | */ | 1087 | */ |
| 1117 | low_pfn = cc->migrate_pfn; | 1088 | low_pfn = cc->migrate_pfn; |
| 1089 | block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1); | ||
| 1090 | if (block_start_pfn < zone->zone_start_pfn) | ||
| 1091 | block_start_pfn = zone->zone_start_pfn; | ||
| 1118 | 1092 | ||
| 1119 | /* Only scan within a pageblock boundary */ | 1093 | /* Only scan within a pageblock boundary */ |
| 1120 | end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); | 1094 | block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); |
| 1121 | 1095 | ||
| 1122 | /* | 1096 | /* |
| 1123 | * Iterate over whole pageblocks until we find the first suitable. | 1097 | * Iterate over whole pageblocks until we find the first suitable. |
| 1124 | * Do not cross the free scanner. | 1098 | * Do not cross the free scanner. |
| 1125 | */ | 1099 | */ |
| 1126 | for (; end_pfn <= cc->free_pfn; | 1100 | for (; block_end_pfn <= cc->free_pfn; |
| 1127 | low_pfn = end_pfn, end_pfn += pageblock_nr_pages) { | 1101 | low_pfn = block_end_pfn, |
| 1102 | block_start_pfn = block_end_pfn, | ||
| 1103 | block_end_pfn += pageblock_nr_pages) { | ||
| 1128 | 1104 | ||
| 1129 | /* | 1105 | /* |
| 1130 | * This can potentially iterate a massively long zone with | 1106 | * This can potentially iterate a massively long zone with |
| @@ -1135,7 +1111,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 1135 | && compact_should_abort(cc)) | 1111 | && compact_should_abort(cc)) |
| 1136 | break; | 1112 | break; |
| 1137 | 1113 | ||
| 1138 | page = pageblock_pfn_to_page(low_pfn, end_pfn, zone); | 1114 | page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, |
| 1115 | zone); | ||
| 1139 | if (!page) | 1116 | if (!page) |
| 1140 | continue; | 1117 | continue; |
| 1141 | 1118 | ||
| @@ -1154,8 +1131,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
| 1154 | 1131 | ||
| 1155 | /* Perform the isolation */ | 1132 | /* Perform the isolation */ |
| 1156 | isolate_start_pfn = low_pfn; | 1133 | isolate_start_pfn = low_pfn; |
| 1157 | low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, | 1134 | low_pfn = isolate_migratepages_block(cc, low_pfn, |
| 1158 | isolate_mode); | 1135 | block_end_pfn, isolate_mode); |
| 1159 | 1136 | ||
| 1160 | if (!low_pfn || cc->contended) { | 1137 | if (!low_pfn || cc->contended) { |
| 1161 | acct_isolated(zone, cc); | 1138 | acct_isolated(zone, cc); |
| @@ -1371,11 +1348,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
| 1371 | */ | 1348 | */ |
| 1372 | cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; | 1349 | cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; |
| 1373 | cc->free_pfn = zone->compact_cached_free_pfn; | 1350 | cc->free_pfn = zone->compact_cached_free_pfn; |
| 1374 | if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) { | 1351 | if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { |
| 1375 | cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1); | 1352 | cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages); |
| 1376 | zone->compact_cached_free_pfn = cc->free_pfn; | 1353 | zone->compact_cached_free_pfn = cc->free_pfn; |
| 1377 | } | 1354 | } |
| 1378 | if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) { | 1355 | if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { |
| 1379 | cc->migrate_pfn = start_pfn; | 1356 | cc->migrate_pfn = start_pfn; |
| 1380 | zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; | 1357 | zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; |
| 1381 | zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; | 1358 | zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; |
diff --git a/mm/debug.c b/mm/debug.c index f05b2d5d6481..df7247b0b532 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
| @@ -9,75 +9,38 @@ | |||
| 9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
| 10 | #include <linux/trace_events.h> | 10 | #include <linux/trace_events.h> |
| 11 | #include <linux/memcontrol.h> | 11 | #include <linux/memcontrol.h> |
| 12 | 12 | #include <trace/events/mmflags.h> | |
| 13 | static const struct trace_print_flags pageflag_names[] = { | 13 | #include <linux/migrate.h> |
| 14 | {1UL << PG_locked, "locked" }, | 14 | #include <linux/page_owner.h> |
| 15 | {1UL << PG_error, "error" }, | 15 | |
| 16 | {1UL << PG_referenced, "referenced" }, | 16 | #include "internal.h" |
| 17 | {1UL << PG_uptodate, "uptodate" }, | 17 | |
| 18 | {1UL << PG_dirty, "dirty" }, | 18 | char *migrate_reason_names[MR_TYPES] = { |
| 19 | {1UL << PG_lru, "lru" }, | 19 | "compaction", |
| 20 | {1UL << PG_active, "active" }, | 20 | "memory_failure", |
| 21 | {1UL << PG_slab, "slab" }, | 21 | "memory_hotplug", |
| 22 | {1UL << PG_owner_priv_1, "owner_priv_1" }, | 22 | "syscall_or_cpuset", |
| 23 | {1UL << PG_arch_1, "arch_1" }, | 23 | "mempolicy_mbind", |
| 24 | {1UL << PG_reserved, "reserved" }, | 24 | "numa_misplaced", |
| 25 | {1UL << PG_private, "private" }, | 25 | "cma", |
| 26 | {1UL << PG_private_2, "private_2" }, | ||
| 27 | {1UL << PG_writeback, "writeback" }, | ||
| 28 | {1UL << PG_head, "head" }, | ||
| 29 | {1UL << PG_swapcache, "swapcache" }, | ||
| 30 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | ||
| 31 | {1UL << PG_reclaim, "reclaim" }, | ||
| 32 | {1UL << PG_swapbacked, "swapbacked" }, | ||
| 33 | {1UL << PG_unevictable, "unevictable" }, | ||
| 34 | #ifdef CONFIG_MMU | ||
| 35 | {1UL << PG_mlocked, "mlocked" }, | ||
| 36 | #endif | ||
| 37 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | ||
| 38 | {1UL << PG_uncached, "uncached" }, | ||
| 39 | #endif | ||
| 40 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 41 | {1UL << PG_hwpoison, "hwpoison" }, | ||
| 42 | #endif | ||
| 43 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
| 44 | {1UL << PG_young, "young" }, | ||
| 45 | {1UL << PG_idle, "idle" }, | ||
| 46 | #endif | ||
| 47 | }; | 26 | }; |
| 48 | 27 | ||
| 49 | static void dump_flags(unsigned long flags, | 28 | const struct trace_print_flags pageflag_names[] = { |
| 50 | const struct trace_print_flags *names, int count) | 29 | __def_pageflag_names, |
| 51 | { | 30 | {0, NULL} |
| 52 | const char *delim = ""; | 31 | }; |
| 53 | unsigned long mask; | ||
| 54 | int i; | ||
| 55 | |||
| 56 | pr_emerg("flags: %#lx(", flags); | ||
| 57 | |||
| 58 | /* remove zone id */ | ||
| 59 | flags &= (1UL << NR_PAGEFLAGS) - 1; | ||
| 60 | |||
| 61 | for (i = 0; i < count && flags; i++) { | ||
| 62 | |||
| 63 | mask = names[i].mask; | ||
| 64 | if ((flags & mask) != mask) | ||
| 65 | continue; | ||
| 66 | |||
| 67 | flags &= ~mask; | ||
| 68 | pr_cont("%s%s", delim, names[i].name); | ||
| 69 | delim = "|"; | ||
| 70 | } | ||
| 71 | 32 | ||
| 72 | /* check for left over flags */ | 33 | const struct trace_print_flags gfpflag_names[] = { |
| 73 | if (flags) | 34 | __def_gfpflag_names, |
| 74 | pr_cont("%s%#lx", delim, flags); | 35 | {0, NULL} |
| 36 | }; | ||
| 75 | 37 | ||
| 76 | pr_cont(")\n"); | 38 | const struct trace_print_flags vmaflag_names[] = { |
| 77 | } | 39 | __def_vmaflag_names, |
| 40 | {0, NULL} | ||
| 41 | }; | ||
| 78 | 42 | ||
| 79 | void dump_page_badflags(struct page *page, const char *reason, | 43 | void __dump_page(struct page *page, const char *reason) |
| 80 | unsigned long badflags) | ||
| 81 | { | 44 | { |
| 82 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", | 45 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", |
| 83 | page, atomic_read(&page->_count), page_mapcount(page), | 46 | page, atomic_read(&page->_count), page_mapcount(page), |
| @@ -85,15 +48,13 @@ void dump_page_badflags(struct page *page, const char *reason, | |||
| 85 | if (PageCompound(page)) | 48 | if (PageCompound(page)) |
| 86 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); | 49 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); |
| 87 | pr_cont("\n"); | 50 | pr_cont("\n"); |
| 88 | BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS); | 51 | BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); |
| 89 | dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names)); | 52 | |
| 53 | pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); | ||
| 54 | |||
| 90 | if (reason) | 55 | if (reason) |
| 91 | pr_alert("page dumped because: %s\n", reason); | 56 | pr_alert("page dumped because: %s\n", reason); |
| 92 | if (page->flags & badflags) { | 57 | |
| 93 | pr_alert("bad because of flags:\n"); | ||
| 94 | dump_flags(page->flags & badflags, | ||
| 95 | pageflag_names, ARRAY_SIZE(pageflag_names)); | ||
| 96 | } | ||
| 97 | #ifdef CONFIG_MEMCG | 58 | #ifdef CONFIG_MEMCG |
| 98 | if (page->mem_cgroup) | 59 | if (page->mem_cgroup) |
| 99 | pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); | 60 | pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); |
| @@ -102,67 +63,26 @@ void dump_page_badflags(struct page *page, const char *reason, | |||
| 102 | 63 | ||
| 103 | void dump_page(struct page *page, const char *reason) | 64 | void dump_page(struct page *page, const char *reason) |
| 104 | { | 65 | { |
| 105 | dump_page_badflags(page, reason, 0); | 66 | __dump_page(page, reason); |
| 67 | dump_page_owner(page); | ||
| 106 | } | 68 | } |
| 107 | EXPORT_SYMBOL(dump_page); | 69 | EXPORT_SYMBOL(dump_page); |
| 108 | 70 | ||
| 109 | #ifdef CONFIG_DEBUG_VM | 71 | #ifdef CONFIG_DEBUG_VM |
| 110 | 72 | ||
| 111 | static const struct trace_print_flags vmaflags_names[] = { | ||
| 112 | {VM_READ, "read" }, | ||
| 113 | {VM_WRITE, "write" }, | ||
| 114 | {VM_EXEC, "exec" }, | ||
| 115 | {VM_SHARED, "shared" }, | ||
| 116 | {VM_MAYREAD, "mayread" }, | ||
| 117 | {VM_MAYWRITE, "maywrite" }, | ||
| 118 | {VM_MAYEXEC, "mayexec" }, | ||
| 119 | {VM_MAYSHARE, "mayshare" }, | ||
| 120 | {VM_GROWSDOWN, "growsdown" }, | ||
| 121 | {VM_PFNMAP, "pfnmap" }, | ||
| 122 | {VM_DENYWRITE, "denywrite" }, | ||
| 123 | {VM_LOCKONFAULT, "lockonfault" }, | ||
| 124 | {VM_LOCKED, "locked" }, | ||
| 125 | {VM_IO, "io" }, | ||
| 126 | {VM_SEQ_READ, "seqread" }, | ||
| 127 | {VM_RAND_READ, "randread" }, | ||
| 128 | {VM_DONTCOPY, "dontcopy" }, | ||
| 129 | {VM_DONTEXPAND, "dontexpand" }, | ||
| 130 | {VM_ACCOUNT, "account" }, | ||
| 131 | {VM_NORESERVE, "noreserve" }, | ||
| 132 | {VM_HUGETLB, "hugetlb" }, | ||
| 133 | #if defined(CONFIG_X86) | ||
| 134 | {VM_PAT, "pat" }, | ||
| 135 | #elif defined(CONFIG_PPC) | ||
| 136 | {VM_SAO, "sao" }, | ||
| 137 | #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) | ||
| 138 | {VM_GROWSUP, "growsup" }, | ||
| 139 | #elif !defined(CONFIG_MMU) | ||
| 140 | {VM_MAPPED_COPY, "mappedcopy" }, | ||
| 141 | #else | ||
| 142 | {VM_ARCH_1, "arch_1" }, | ||
| 143 | #endif | ||
| 144 | {VM_DONTDUMP, "dontdump" }, | ||
| 145 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
| 146 | {VM_SOFTDIRTY, "softdirty" }, | ||
| 147 | #endif | ||
| 148 | {VM_MIXEDMAP, "mixedmap" }, | ||
| 149 | {VM_HUGEPAGE, "hugepage" }, | ||
| 150 | {VM_NOHUGEPAGE, "nohugepage" }, | ||
| 151 | {VM_MERGEABLE, "mergeable" }, | ||
| 152 | }; | ||
| 153 | |||
| 154 | void dump_vma(const struct vm_area_struct *vma) | 73 | void dump_vma(const struct vm_area_struct *vma) |
| 155 | { | 74 | { |
| 156 | pr_emerg("vma %p start %p end %p\n" | 75 | pr_emerg("vma %p start %p end %p\n" |
| 157 | "next %p prev %p mm %p\n" | 76 | "next %p prev %p mm %p\n" |
| 158 | "prot %lx anon_vma %p vm_ops %p\n" | 77 | "prot %lx anon_vma %p vm_ops %p\n" |
| 159 | "pgoff %lx file %p private_data %p\n", | 78 | "pgoff %lx file %p private_data %p\n" |
| 79 | "flags: %#lx(%pGv)\n", | ||
| 160 | vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, | 80 | vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, |
| 161 | vma->vm_prev, vma->vm_mm, | 81 | vma->vm_prev, vma->vm_mm, |
| 162 | (unsigned long)pgprot_val(vma->vm_page_prot), | 82 | (unsigned long)pgprot_val(vma->vm_page_prot), |
| 163 | vma->anon_vma, vma->vm_ops, vma->vm_pgoff, | 83 | vma->anon_vma, vma->vm_ops, vma->vm_pgoff, |
| 164 | vma->vm_file, vma->vm_private_data); | 84 | vma->vm_file, vma->vm_private_data, |
| 165 | dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names)); | 85 | vma->vm_flags, &vma->vm_flags); |
| 166 | } | 86 | } |
| 167 | EXPORT_SYMBOL(dump_vma); | 87 | EXPORT_SYMBOL(dump_vma); |
| 168 | 88 | ||
| @@ -196,7 +116,7 @@ void dump_mm(const struct mm_struct *mm) | |||
| 196 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) | 116 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) |
| 197 | "tlb_flush_pending %d\n" | 117 | "tlb_flush_pending %d\n" |
| 198 | #endif | 118 | #endif |
| 199 | "%s", /* This is here to hold the comma */ | 119 | "def_flags: %#lx(%pGv)\n", |
| 200 | 120 | ||
| 201 | mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, | 121 | mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, |
| 202 | #ifdef CONFIG_MMU | 122 | #ifdef CONFIG_MMU |
| @@ -230,11 +150,8 @@ void dump_mm(const struct mm_struct *mm) | |||
| 230 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) | 150 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) |
| 231 | mm->tlb_flush_pending, | 151 | mm->tlb_flush_pending, |
| 232 | #endif | 152 | #endif |
| 233 | "" /* This is here to not have a comma! */ | 153 | mm->def_flags, &mm->def_flags |
| 234 | ); | 154 | ); |
| 235 | |||
| 236 | dump_flags(mm->def_flags, vmaflags_names, | ||
| 237 | ARRAY_SIZE(vmaflags_names)); | ||
| 238 | } | 155 | } |
| 239 | 156 | ||
| 240 | #endif /* CONFIG_DEBUG_VM */ | 157 | #endif /* CONFIG_DEBUG_VM */ |
diff --git a/mm/failslab.c b/mm/failslab.c index 79171b4a5826..b0fac98cd938 100644 --- a/mm/failslab.c +++ b/mm/failslab.c | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | #include <linux/fault-inject.h> | 1 | #include <linux/fault-inject.h> |
| 2 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
| 3 | #include <linux/mm.h> | ||
| 4 | #include "slab.h" | ||
| 3 | 5 | ||
| 4 | static struct { | 6 | static struct { |
| 5 | struct fault_attr attr; | 7 | struct fault_attr attr; |
| @@ -11,18 +13,22 @@ static struct { | |||
| 11 | .cache_filter = false, | 13 | .cache_filter = false, |
| 12 | }; | 14 | }; |
| 13 | 15 | ||
| 14 | bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) | 16 | bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
| 15 | { | 17 | { |
| 18 | /* No fault-injection for bootstrap cache */ | ||
| 19 | if (unlikely(s == kmem_cache)) | ||
| 20 | return false; | ||
| 21 | |||
| 16 | if (gfpflags & __GFP_NOFAIL) | 22 | if (gfpflags & __GFP_NOFAIL) |
| 17 | return false; | 23 | return false; |
| 18 | 24 | ||
| 19 | if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) | 25 | if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) |
| 20 | return false; | 26 | return false; |
| 21 | 27 | ||
| 22 | if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) | 28 | if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) |
| 23 | return false; | 29 | return false; |
| 24 | 30 | ||
| 25 | return should_fail(&failslab.attr, size); | 31 | return should_fail(&failslab.attr, s->object_size); |
| 26 | } | 32 | } |
| 27 | 33 | ||
| 28 | static int __init setup_failslab(char *str) | 34 | static int __init setup_failslab(char *str) |
diff --git a/mm/filemap.c b/mm/filemap.c index da7a35d83de7..61b441b191ad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -101,7 +101,7 @@ | |||
| 101 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 101 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
| 102 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) | 102 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) |
| 103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) | 103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
| 104 | * ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat) | 104 | * ->memcg->move_lock (page_remove_rmap->lock_page_memcg) |
| 105 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) | 105 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) |
| 106 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | 106 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
| 107 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 107 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
| @@ -176,11 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
| 176 | /* | 176 | /* |
| 177 | * Delete a page from the page cache and free it. Caller has to make | 177 | * Delete a page from the page cache and free it. Caller has to make |
| 178 | * sure the page is locked and that nobody else uses it - or that usage | 178 | * sure the page is locked and that nobody else uses it - or that usage |
| 179 | * is safe. The caller must hold the mapping's tree_lock and | 179 | * is safe. The caller must hold the mapping's tree_lock. |
| 180 | * mem_cgroup_begin_page_stat(). | ||
| 181 | */ | 180 | */ |
| 182 | void __delete_from_page_cache(struct page *page, void *shadow, | 181 | void __delete_from_page_cache(struct page *page, void *shadow) |
| 183 | struct mem_cgroup *memcg) | ||
| 184 | { | 182 | { |
| 185 | struct address_space *mapping = page->mapping; | 183 | struct address_space *mapping = page->mapping; |
| 186 | 184 | ||
| @@ -239,8 +237,7 @@ void __delete_from_page_cache(struct page *page, void *shadow, | |||
| 239 | * anyway will be cleared before returning page into buddy allocator. | 237 | * anyway will be cleared before returning page into buddy allocator. |
| 240 | */ | 238 | */ |
| 241 | if (WARN_ON_ONCE(PageDirty(page))) | 239 | if (WARN_ON_ONCE(PageDirty(page))) |
| 242 | account_page_cleaned(page, mapping, memcg, | 240 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); |
| 243 | inode_to_wb(mapping->host)); | ||
| 244 | } | 241 | } |
| 245 | 242 | ||
| 246 | /** | 243 | /** |
| @@ -254,7 +251,6 @@ void __delete_from_page_cache(struct page *page, void *shadow, | |||
| 254 | void delete_from_page_cache(struct page *page) | 251 | void delete_from_page_cache(struct page *page) |
| 255 | { | 252 | { |
| 256 | struct address_space *mapping = page->mapping; | 253 | struct address_space *mapping = page->mapping; |
| 257 | struct mem_cgroup *memcg; | ||
| 258 | unsigned long flags; | 254 | unsigned long flags; |
| 259 | 255 | ||
| 260 | void (*freepage)(struct page *); | 256 | void (*freepage)(struct page *); |
| @@ -263,11 +259,9 @@ void delete_from_page_cache(struct page *page) | |||
| 263 | 259 | ||
| 264 | freepage = mapping->a_ops->freepage; | 260 | freepage = mapping->a_ops->freepage; |
| 265 | 261 | ||
| 266 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 267 | spin_lock_irqsave(&mapping->tree_lock, flags); | 262 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 268 | __delete_from_page_cache(page, NULL, memcg); | 263 | __delete_from_page_cache(page, NULL); |
| 269 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 264 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 270 | mem_cgroup_end_page_stat(memcg); | ||
| 271 | 265 | ||
| 272 | if (freepage) | 266 | if (freepage) |
| 273 | freepage(page); | 267 | freepage(page); |
| @@ -551,7 +545,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
| 551 | if (!error) { | 545 | if (!error) { |
| 552 | struct address_space *mapping = old->mapping; | 546 | struct address_space *mapping = old->mapping; |
| 553 | void (*freepage)(struct page *); | 547 | void (*freepage)(struct page *); |
| 554 | struct mem_cgroup *memcg; | ||
| 555 | unsigned long flags; | 548 | unsigned long flags; |
| 556 | 549 | ||
| 557 | pgoff_t offset = old->index; | 550 | pgoff_t offset = old->index; |
| @@ -561,9 +554,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
| 561 | new->mapping = mapping; | 554 | new->mapping = mapping; |
| 562 | new->index = offset; | 555 | new->index = offset; |
| 563 | 556 | ||
| 564 | memcg = mem_cgroup_begin_page_stat(old); | ||
| 565 | spin_lock_irqsave(&mapping->tree_lock, flags); | 557 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 566 | __delete_from_page_cache(old, NULL, memcg); | 558 | __delete_from_page_cache(old, NULL); |
| 567 | error = radix_tree_insert(&mapping->page_tree, offset, new); | 559 | error = radix_tree_insert(&mapping->page_tree, offset, new); |
| 568 | BUG_ON(error); | 560 | BUG_ON(error); |
| 569 | mapping->nrpages++; | 561 | mapping->nrpages++; |
| @@ -576,8 +568,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
| 576 | if (PageSwapBacked(new)) | 568 | if (PageSwapBacked(new)) |
| 577 | __inc_zone_page_state(new, NR_SHMEM); | 569 | __inc_zone_page_state(new, NR_SHMEM); |
| 578 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 570 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 579 | mem_cgroup_end_page_stat(memcg); | 571 | mem_cgroup_migrate(old, new); |
| 580 | mem_cgroup_replace_page(old, new); | ||
| 581 | radix_tree_preload_end(); | 572 | radix_tree_preload_end(); |
| 582 | if (freepage) | 573 | if (freepage) |
| 583 | freepage(old); | 574 | freepage(old); |
| @@ -1668,6 +1659,15 @@ find_page: | |||
| 1668 | index, last_index - index); | 1659 | index, last_index - index); |
| 1669 | } | 1660 | } |
| 1670 | if (!PageUptodate(page)) { | 1661 | if (!PageUptodate(page)) { |
| 1662 | /* | ||
| 1663 | * See comment in do_read_cache_page on why | ||
| 1664 | * wait_on_page_locked is used to avoid unnecessarily | ||
| 1665 | * serialisations and why it's safe. | ||
| 1666 | */ | ||
| 1667 | wait_on_page_locked_killable(page); | ||
| 1668 | if (PageUptodate(page)) | ||
| 1669 | goto page_ok; | ||
| 1670 | |||
| 1671 | if (inode->i_blkbits == PAGE_CACHE_SHIFT || | 1671 | if (inode->i_blkbits == PAGE_CACHE_SHIFT || |
| 1672 | !mapping->a_ops->is_partially_uptodate) | 1672 | !mapping->a_ops->is_partially_uptodate) |
| 1673 | goto page_not_up_to_date; | 1673 | goto page_not_up_to_date; |
| @@ -2303,7 +2303,7 @@ static struct page *wait_on_page_read(struct page *page) | |||
| 2303 | return page; | 2303 | return page; |
| 2304 | } | 2304 | } |
| 2305 | 2305 | ||
| 2306 | static struct page *__read_cache_page(struct address_space *mapping, | 2306 | static struct page *do_read_cache_page(struct address_space *mapping, |
| 2307 | pgoff_t index, | 2307 | pgoff_t index, |
| 2308 | int (*filler)(void *, struct page *), | 2308 | int (*filler)(void *, struct page *), |
| 2309 | void *data, | 2309 | void *data, |
| @@ -2325,53 +2325,74 @@ repeat: | |||
| 2325 | /* Presumably ENOMEM for radix tree node */ | 2325 | /* Presumably ENOMEM for radix tree node */ |
| 2326 | return ERR_PTR(err); | 2326 | return ERR_PTR(err); |
| 2327 | } | 2327 | } |
| 2328 | |||
| 2329 | filler: | ||
| 2328 | err = filler(data, page); | 2330 | err = filler(data, page); |
| 2329 | if (err < 0) { | 2331 | if (err < 0) { |
| 2330 | page_cache_release(page); | 2332 | page_cache_release(page); |
| 2331 | page = ERR_PTR(err); | 2333 | return ERR_PTR(err); |
| 2332 | } else { | ||
| 2333 | page = wait_on_page_read(page); | ||
| 2334 | } | 2334 | } |
| 2335 | } | ||
| 2336 | return page; | ||
| 2337 | } | ||
| 2338 | |||
| 2339 | static struct page *do_read_cache_page(struct address_space *mapping, | ||
| 2340 | pgoff_t index, | ||
| 2341 | int (*filler)(void *, struct page *), | ||
| 2342 | void *data, | ||
| 2343 | gfp_t gfp) | ||
| 2344 | 2335 | ||
| 2345 | { | 2336 | page = wait_on_page_read(page); |
| 2346 | struct page *page; | 2337 | if (IS_ERR(page)) |
| 2347 | int err; | 2338 | return page; |
| 2339 | goto out; | ||
| 2340 | } | ||
| 2341 | if (PageUptodate(page)) | ||
| 2342 | goto out; | ||
| 2348 | 2343 | ||
| 2349 | retry: | 2344 | /* |
| 2350 | page = __read_cache_page(mapping, index, filler, data, gfp); | 2345 | * Page is not up to date and may be locked due one of the following |
| 2351 | if (IS_ERR(page)) | 2346 | * case a: Page is being filled and the page lock is held |
| 2352 | return page; | 2347 | * case b: Read/write error clearing the page uptodate status |
| 2348 | * case c: Truncation in progress (page locked) | ||
| 2349 | * case d: Reclaim in progress | ||
| 2350 | * | ||
| 2351 | * Case a, the page will be up to date when the page is unlocked. | ||
| 2352 | * There is no need to serialise on the page lock here as the page | ||
| 2353 | * is pinned so the lock gives no additional protection. Even if the | ||
| 2354 | * the page is truncated, the data is still valid if PageUptodate as | ||
| 2355 | * it's a race vs truncate race. | ||
| 2356 | * Case b, the page will not be up to date | ||
| 2357 | * Case c, the page may be truncated but in itself, the data may still | ||
| 2358 | * be valid after IO completes as it's a read vs truncate race. The | ||
| 2359 | * operation must restart if the page is not uptodate on unlock but | ||
| 2360 | * otherwise serialising on page lock to stabilise the mapping gives | ||
| 2361 | * no additional guarantees to the caller as the page lock is | ||
| 2362 | * released before return. | ||
| 2363 | * Case d, similar to truncation. If reclaim holds the page lock, it | ||
| 2364 | * will be a race with remove_mapping that determines if the mapping | ||
| 2365 | * is valid on unlock but otherwise the data is valid and there is | ||
| 2366 | * no need to serialise with page lock. | ||
| 2367 | * | ||
| 2368 | * As the page lock gives no additional guarantee, we optimistically | ||
| 2369 | * wait on the page to be unlocked and check if it's up to date and | ||
| 2370 | * use the page if it is. Otherwise, the page lock is required to | ||
| 2371 | * distinguish between the different cases. The motivation is that we | ||
| 2372 | * avoid spurious serialisations and wakeups when multiple processes | ||
| 2373 | * wait on the same page for IO to complete. | ||
| 2374 | */ | ||
| 2375 | wait_on_page_locked(page); | ||
| 2353 | if (PageUptodate(page)) | 2376 | if (PageUptodate(page)) |
| 2354 | goto out; | 2377 | goto out; |
| 2355 | 2378 | ||
| 2379 | /* Distinguish between all the cases under the safety of the lock */ | ||
| 2356 | lock_page(page); | 2380 | lock_page(page); |
| 2381 | |||
| 2382 | /* Case c or d, restart the operation */ | ||
| 2357 | if (!page->mapping) { | 2383 | if (!page->mapping) { |
| 2358 | unlock_page(page); | 2384 | unlock_page(page); |
| 2359 | page_cache_release(page); | 2385 | page_cache_release(page); |
| 2360 | goto retry; | 2386 | goto repeat; |
| 2361 | } | 2387 | } |
| 2388 | |||
| 2389 | /* Someone else locked and filled the page in a very small window */ | ||
| 2362 | if (PageUptodate(page)) { | 2390 | if (PageUptodate(page)) { |
| 2363 | unlock_page(page); | 2391 | unlock_page(page); |
| 2364 | goto out; | 2392 | goto out; |
| 2365 | } | 2393 | } |
| 2366 | err = filler(data, page); | 2394 | goto filler; |
| 2367 | if (err < 0) { | 2395 | |
| 2368 | page_cache_release(page); | ||
| 2369 | return ERR_PTR(err); | ||
| 2370 | } else { | ||
| 2371 | page = wait_on_page_read(page); | ||
| 2372 | if (IS_ERR(page)) | ||
| 2373 | return page; | ||
| 2374 | } | ||
| 2375 | out: | 2396 | out: |
| 2376 | mark_page_accessed(page); | 2397 | mark_page_accessed(page); |
| 2377 | return page; | 2398 | return page; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e10a4fee88d2..1ea21e203a70 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -3220,28 +3220,26 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page) | |||
| 3220 | } | 3220 | } |
| 3221 | } | 3221 | } |
| 3222 | 3222 | ||
| 3223 | static int __split_huge_page_tail(struct page *head, int tail, | 3223 | static void __split_huge_page_tail(struct page *head, int tail, |
| 3224 | struct lruvec *lruvec, struct list_head *list) | 3224 | struct lruvec *lruvec, struct list_head *list) |
| 3225 | { | 3225 | { |
| 3226 | int mapcount; | ||
| 3227 | struct page *page_tail = head + tail; | 3226 | struct page *page_tail = head + tail; |
| 3228 | 3227 | ||
| 3229 | mapcount = atomic_read(&page_tail->_mapcount) + 1; | 3228 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); |
| 3230 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail); | 3229 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail); |
| 3231 | 3230 | ||
| 3232 | /* | 3231 | /* |
| 3233 | * tail_page->_count is zero and not changing from under us. But | 3232 | * tail_page->_count is zero and not changing from under us. But |
| 3234 | * get_page_unless_zero() may be running from under us on the | 3233 | * get_page_unless_zero() may be running from under us on the |
| 3235 | * tail_page. If we used atomic_set() below instead of atomic_add(), we | 3234 | * tail_page. If we used atomic_set() below instead of atomic_inc(), we |
| 3236 | * would then run atomic_set() concurrently with | 3235 | * would then run atomic_set() concurrently with |
| 3237 | * get_page_unless_zero(), and atomic_set() is implemented in C not | 3236 | * get_page_unless_zero(), and atomic_set() is implemented in C not |
| 3238 | * using locked ops. spin_unlock on x86 sometime uses locked ops | 3237 | * using locked ops. spin_unlock on x86 sometime uses locked ops |
| 3239 | * because of PPro errata 66, 92, so unless somebody can guarantee | 3238 | * because of PPro errata 66, 92, so unless somebody can guarantee |
| 3240 | * atomic_set() here would be safe on all archs (and not only on x86), | 3239 | * atomic_set() here would be safe on all archs (and not only on x86), |
| 3241 | * it's safer to use atomic_add(). | 3240 | * it's safer to use atomic_inc(). |
| 3242 | */ | 3241 | */ |
| 3243 | atomic_add(mapcount + 1, &page_tail->_count); | 3242 | atomic_inc(&page_tail->_count); |
| 3244 | |||
| 3245 | 3243 | ||
| 3246 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 3244 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
| 3247 | page_tail->flags |= (head->flags & | 3245 | page_tail->flags |= (head->flags & |
| @@ -3275,8 +3273,6 @@ static int __split_huge_page_tail(struct page *head, int tail, | |||
| 3275 | page_tail->index = head->index + tail; | 3273 | page_tail->index = head->index + tail; |
| 3276 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); | 3274 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); |
| 3277 | lru_add_page_tail(head, page_tail, lruvec, list); | 3275 | lru_add_page_tail(head, page_tail, lruvec, list); |
| 3278 | |||
| 3279 | return mapcount; | ||
| 3280 | } | 3276 | } |
| 3281 | 3277 | ||
| 3282 | static void __split_huge_page(struct page *page, struct list_head *list) | 3278 | static void __split_huge_page(struct page *page, struct list_head *list) |
| @@ -3284,7 +3280,7 @@ static void __split_huge_page(struct page *page, struct list_head *list) | |||
| 3284 | struct page *head = compound_head(page); | 3280 | struct page *head = compound_head(page); |
| 3285 | struct zone *zone = page_zone(head); | 3281 | struct zone *zone = page_zone(head); |
| 3286 | struct lruvec *lruvec; | 3282 | struct lruvec *lruvec; |
| 3287 | int i, tail_mapcount; | 3283 | int i; |
| 3288 | 3284 | ||
| 3289 | /* prevent PageLRU to go away from under us, and freeze lru stats */ | 3285 | /* prevent PageLRU to go away from under us, and freeze lru stats */ |
| 3290 | spin_lock_irq(&zone->lru_lock); | 3286 | spin_lock_irq(&zone->lru_lock); |
| @@ -3293,10 +3289,8 @@ static void __split_huge_page(struct page *page, struct list_head *list) | |||
| 3293 | /* complete memcg works before add pages to LRU */ | 3289 | /* complete memcg works before add pages to LRU */ |
| 3294 | mem_cgroup_split_huge_fixup(head); | 3290 | mem_cgroup_split_huge_fixup(head); |
| 3295 | 3291 | ||
| 3296 | tail_mapcount = 0; | ||
| 3297 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) | 3292 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) |
| 3298 | tail_mapcount += __split_huge_page_tail(head, i, lruvec, list); | 3293 | __split_huge_page_tail(head, i, lruvec, list); |
| 3299 | atomic_sub(tail_mapcount, &head->_count); | ||
| 3300 | 3294 | ||
| 3301 | ClearPageCompound(head); | 3295 | ClearPageCompound(head); |
| 3302 | spin_unlock_irq(&zone->lru_lock); | 3296 | spin_unlock_irq(&zone->lru_lock); |
diff --git a/mm/internal.h b/mm/internal.h index a38a21ebddb4..ad9400d759c8 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
| 15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
| 16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
| 17 | #include <linux/tracepoint-defs.h> | ||
| 17 | 18 | ||
| 18 | /* | 19 | /* |
| 19 | * The set of flags that only affect watermark checking and reclaim | 20 | * The set of flags that only affect watermark checking and reclaim |
| @@ -131,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) | |||
| 131 | return page_idx ^ (1 << order); | 132 | return page_idx ^ (1 << order); |
| 132 | } | 133 | } |
| 133 | 134 | ||
| 135 | extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, | ||
| 136 | unsigned long end_pfn, struct zone *zone); | ||
| 137 | |||
| 138 | static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, | ||
| 139 | unsigned long end_pfn, struct zone *zone) | ||
| 140 | { | ||
| 141 | if (zone->contiguous) | ||
| 142 | return pfn_to_page(start_pfn); | ||
| 143 | |||
| 144 | return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); | ||
| 145 | } | ||
| 146 | |||
| 134 | extern int __isolate_free_page(struct page *page, unsigned int order); | 147 | extern int __isolate_free_page(struct page *page, unsigned int order); |
| 135 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, | 148 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, |
| 136 | unsigned int order); | 149 | unsigned int order); |
| @@ -466,4 +479,9 @@ static inline void try_to_unmap_flush_dirty(void) | |||
| 466 | } | 479 | } |
| 467 | 480 | ||
| 468 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ | 481 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
| 482 | |||
| 483 | extern const struct trace_print_flags pageflag_names[]; | ||
| 484 | extern const struct trace_print_flags vmaflag_names[]; | ||
| 485 | extern const struct trace_print_flags gfpflag_names[]; | ||
| 486 | |||
| 469 | #endif /* __MM_INTERNAL_H */ | 487 | #endif /* __MM_INTERNAL_H */ |
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index cab58bb592d8..6f4f424037c0 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c | |||
| @@ -60,6 +60,9 @@ void kmemcheck_free_shadow(struct page *page, int order) | |||
| 60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | 60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, |
| 61 | size_t size) | 61 | size_t size) |
| 62 | { | 62 | { |
| 63 | if (unlikely(!object)) /* Skip object if allocation failed */ | ||
| 64 | return; | ||
| 65 | |||
| 63 | /* | 66 | /* |
| 64 | * Has already been memset(), which initializes the shadow for us | 67 | * Has already been memset(), which initializes the shadow for us |
| 65 | * as well. | 68 | * as well. |
diff --git a/mm/madvise.c b/mm/madvise.c index f56825b6d2e1..a01147359f3b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
| @@ -555,8 +555,9 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) | |||
| 555 | } | 555 | } |
| 556 | pr_info("Injecting memory failure for page %#lx at %#lx\n", | 556 | pr_info("Injecting memory failure for page %#lx at %#lx\n", |
| 557 | page_to_pfn(p), start); | 557 | page_to_pfn(p), start); |
| 558 | /* Ignore return value for now */ | 558 | ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); |
| 559 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); | 559 | if (ret) |
| 560 | return ret; | ||
| 560 | } | 561 | } |
| 561 | return 0; | 562 | return 0; |
| 562 | } | 563 | } |
| @@ -638,14 +639,28 @@ madvise_behavior_valid(int behavior) | |||
| 638 | * some pages ahead. | 639 | * some pages ahead. |
| 639 | * MADV_DONTNEED - the application is finished with the given range, | 640 | * MADV_DONTNEED - the application is finished with the given range, |
| 640 | * so the kernel can free resources associated with it. | 641 | * so the kernel can free resources associated with it. |
| 642 | * MADV_FREE - the application marks pages in the given range as lazy free, | ||
| 643 | * where actual purges are postponed until memory pressure happens. | ||
| 641 | * MADV_REMOVE - the application wants to free up the given range of | 644 | * MADV_REMOVE - the application wants to free up the given range of |
| 642 | * pages and associated backing store. | 645 | * pages and associated backing store. |
| 643 | * MADV_DONTFORK - omit this area from child's address space when forking: | 646 | * MADV_DONTFORK - omit this area from child's address space when forking: |
| 644 | * typically, to avoid COWing pages pinned by get_user_pages(). | 647 | * typically, to avoid COWing pages pinned by get_user_pages(). |
| 645 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. | 648 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. |
| 649 | * MADV_HWPOISON - trigger memory error handler as if the given memory range | ||
| 650 | * were corrupted by unrecoverable hardware memory failure. | ||
| 651 | * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. | ||
| 646 | * MADV_MERGEABLE - the application recommends that KSM try to merge pages in | 652 | * MADV_MERGEABLE - the application recommends that KSM try to merge pages in |
| 647 | * this area with pages of identical content from other such areas. | 653 | * this area with pages of identical content from other such areas. |
| 648 | * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. | 654 | * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. |
| 655 | * MADV_HUGEPAGE - the application wants to back the given range by transparent | ||
| 656 | * huge pages in the future. Existing pages might be coalesced and | ||
| 657 | * new pages might be allocated as THP. | ||
| 658 | * MADV_NOHUGEPAGE - mark the given range as not worth being backed by | ||
| 659 | * transparent huge pages so the existing pages will not be | ||
| 660 | * coalesced into THP and new pages will not be allocated as THP. | ||
| 661 | * MADV_DONTDUMP - the application wants to prevent pages in the given range | ||
| 662 | * from being included in its core dump. | ||
| 663 | * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. | ||
| 649 | * | 664 | * |
| 650 | * return values: | 665 | * return values: |
| 651 | * zero - success | 666 | * zero - success |
diff --git a/mm/memblock.c b/mm/memblock.c index dd7989929f13..fc7824fa1b42 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
| @@ -612,14 +612,12 @@ static int __init_memblock memblock_add_region(phys_addr_t base, | |||
| 612 | int nid, | 612 | int nid, |
| 613 | unsigned long flags) | 613 | unsigned long flags) |
| 614 | { | 614 | { |
| 615 | struct memblock_type *type = &memblock.memory; | ||
| 616 | |||
| 617 | memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n", | 615 | memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n", |
| 618 | (unsigned long long)base, | 616 | (unsigned long long)base, |
| 619 | (unsigned long long)base + size - 1, | 617 | (unsigned long long)base + size - 1, |
| 620 | flags, (void *)_RET_IP_); | 618 | flags, (void *)_RET_IP_); |
| 621 | 619 | ||
| 622 | return memblock_add_range(type, base, size, nid, flags); | 620 | return memblock_add_range(&memblock.memory, base, size, nid, flags); |
| 623 | } | 621 | } |
| 624 | 622 | ||
| 625 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) | 623 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) |
| @@ -740,14 +738,12 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base, | |||
| 740 | int nid, | 738 | int nid, |
| 741 | unsigned long flags) | 739 | unsigned long flags) |
| 742 | { | 740 | { |
| 743 | struct memblock_type *type = &memblock.reserved; | ||
| 744 | |||
| 745 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", | 741 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", |
| 746 | (unsigned long long)base, | 742 | (unsigned long long)base, |
| 747 | (unsigned long long)base + size - 1, | 743 | (unsigned long long)base + size - 1, |
| 748 | flags, (void *)_RET_IP_); | 744 | flags, (void *)_RET_IP_); |
| 749 | 745 | ||
| 750 | return memblock_add_range(type, base, size, nid, flags); | 746 | return memblock_add_range(&memblock.reserved, base, size, nid, flags); |
| 751 | } | 747 | } |
| 752 | 748 | ||
| 753 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | 749 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d06cae2de783..42882c1e7fce 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -268,31 +268,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | |||
| 268 | return (memcg == root_mem_cgroup); | 268 | return (memcg == root_mem_cgroup); |
| 269 | } | 269 | } |
| 270 | 270 | ||
| 271 | /* | ||
| 272 | * We restrict the id in the range of [1, 65535], so it can fit into | ||
| 273 | * an unsigned short. | ||
| 274 | */ | ||
| 275 | #define MEM_CGROUP_ID_MAX USHRT_MAX | ||
| 276 | |||
| 277 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | ||
| 278 | { | ||
| 279 | return memcg->css.id; | ||
| 280 | } | ||
| 281 | |||
| 282 | /* | ||
| 283 | * A helper function to get mem_cgroup from ID. must be called under | ||
| 284 | * rcu_read_lock(). The caller is responsible for calling | ||
| 285 | * css_tryget_online() if the mem_cgroup is used for charging. (dropping | ||
| 286 | * refcnt from swap can be called against removed memcg.) | ||
| 287 | */ | ||
| 288 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
| 289 | { | ||
| 290 | struct cgroup_subsys_state *css; | ||
| 291 | |||
| 292 | css = css_from_id(id, &memory_cgrp_subsys); | ||
| 293 | return mem_cgroup_from_css(css); | ||
| 294 | } | ||
| 295 | |||
| 296 | #ifndef CONFIG_SLOB | 271 | #ifndef CONFIG_SLOB |
| 297 | /* | 272 | /* |
| 298 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. | 273 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. |
| @@ -1709,19 +1684,13 @@ cleanup: | |||
| 1709 | } | 1684 | } |
| 1710 | 1685 | ||
| 1711 | /** | 1686 | /** |
| 1712 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction | 1687 | * lock_page_memcg - lock a page->mem_cgroup binding |
| 1713 | * @page: page that is going to change accounted state | 1688 | * @page: the page |
| 1714 | * | ||
| 1715 | * This function must mark the beginning of an accounted page state | ||
| 1716 | * change to prevent double accounting when the page is concurrently | ||
| 1717 | * being moved to another memcg: | ||
| 1718 | * | 1689 | * |
| 1719 | * memcg = mem_cgroup_begin_page_stat(page); | 1690 | * This function protects unlocked LRU pages from being moved to |
| 1720 | * if (TestClearPageState(page)) | 1691 | * another cgroup and stabilizes their page->mem_cgroup binding. |
| 1721 | * mem_cgroup_update_page_stat(memcg, state, -1); | ||
| 1722 | * mem_cgroup_end_page_stat(memcg); | ||
| 1723 | */ | 1692 | */ |
| 1724 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | 1693 | void lock_page_memcg(struct page *page) |
| 1725 | { | 1694 | { |
| 1726 | struct mem_cgroup *memcg; | 1695 | struct mem_cgroup *memcg; |
| 1727 | unsigned long flags; | 1696 | unsigned long flags; |
| @@ -1730,25 +1699,18 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | |||
| 1730 | * The RCU lock is held throughout the transaction. The fast | 1699 | * The RCU lock is held throughout the transaction. The fast |
| 1731 | * path can get away without acquiring the memcg->move_lock | 1700 | * path can get away without acquiring the memcg->move_lock |
| 1732 | * because page moving starts with an RCU grace period. | 1701 | * because page moving starts with an RCU grace period. |
| 1733 | * | ||
| 1734 | * The RCU lock also protects the memcg from being freed when | ||
| 1735 | * the page state that is going to change is the only thing | ||
| 1736 | * preventing the page from being uncharged. | ||
| 1737 | * E.g. end-writeback clearing PageWriteback(), which allows | ||
| 1738 | * migration to go ahead and uncharge the page before the | ||
| 1739 | * account transaction might be complete. | ||
| 1740 | */ | 1702 | */ |
| 1741 | rcu_read_lock(); | 1703 | rcu_read_lock(); |
| 1742 | 1704 | ||
| 1743 | if (mem_cgroup_disabled()) | 1705 | if (mem_cgroup_disabled()) |
| 1744 | return NULL; | 1706 | return; |
| 1745 | again: | 1707 | again: |
| 1746 | memcg = page->mem_cgroup; | 1708 | memcg = page->mem_cgroup; |
| 1747 | if (unlikely(!memcg)) | 1709 | if (unlikely(!memcg)) |
| 1748 | return NULL; | 1710 | return; |
| 1749 | 1711 | ||
| 1750 | if (atomic_read(&memcg->moving_account) <= 0) | 1712 | if (atomic_read(&memcg->moving_account) <= 0) |
| 1751 | return memcg; | 1713 | return; |
| 1752 | 1714 | ||
| 1753 | spin_lock_irqsave(&memcg->move_lock, flags); | 1715 | spin_lock_irqsave(&memcg->move_lock, flags); |
| 1754 | if (memcg != page->mem_cgroup) { | 1716 | if (memcg != page->mem_cgroup) { |
| @@ -1759,21 +1721,23 @@ again: | |||
| 1759 | /* | 1721 | /* |
| 1760 | * When charge migration first begins, we can have locked and | 1722 | * When charge migration first begins, we can have locked and |
| 1761 | * unlocked page stat updates happening concurrently. Track | 1723 | * unlocked page stat updates happening concurrently. Track |
| 1762 | * the task who has the lock for mem_cgroup_end_page_stat(). | 1724 | * the task who has the lock for unlock_page_memcg(). |
| 1763 | */ | 1725 | */ |
| 1764 | memcg->move_lock_task = current; | 1726 | memcg->move_lock_task = current; |
| 1765 | memcg->move_lock_flags = flags; | 1727 | memcg->move_lock_flags = flags; |
| 1766 | 1728 | ||
| 1767 | return memcg; | 1729 | return; |
| 1768 | } | 1730 | } |
| 1769 | EXPORT_SYMBOL(mem_cgroup_begin_page_stat); | 1731 | EXPORT_SYMBOL(lock_page_memcg); |
| 1770 | 1732 | ||
| 1771 | /** | 1733 | /** |
| 1772 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | 1734 | * unlock_page_memcg - unlock a page->mem_cgroup binding |
| 1773 | * @memcg: the memcg that was accounted against | 1735 | * @page: the page |
| 1774 | */ | 1736 | */ |
| 1775 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | 1737 | void unlock_page_memcg(struct page *page) |
| 1776 | { | 1738 | { |
| 1739 | struct mem_cgroup *memcg = page->mem_cgroup; | ||
| 1740 | |||
| 1777 | if (memcg && memcg->move_lock_task == current) { | 1741 | if (memcg && memcg->move_lock_task == current) { |
| 1778 | unsigned long flags = memcg->move_lock_flags; | 1742 | unsigned long flags = memcg->move_lock_flags; |
| 1779 | 1743 | ||
| @@ -1785,7 +1749,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | |||
| 1785 | 1749 | ||
| 1786 | rcu_read_unlock(); | 1750 | rcu_read_unlock(); |
| 1787 | } | 1751 | } |
| 1788 | EXPORT_SYMBOL(mem_cgroup_end_page_stat); | 1752 | EXPORT_SYMBOL(unlock_page_memcg); |
| 1789 | 1753 | ||
| 1790 | /* | 1754 | /* |
| 1791 | * size of first charge trial. "32" comes from vmscan.c's magic value. | 1755 | * size of first charge trial. "32" comes from vmscan.c's magic value. |
| @@ -4488,7 +4452,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
| 4488 | VM_BUG_ON(compound && !PageTransHuge(page)); | 4452 | VM_BUG_ON(compound && !PageTransHuge(page)); |
| 4489 | 4453 | ||
| 4490 | /* | 4454 | /* |
| 4491 | * Prevent mem_cgroup_replace_page() from looking at | 4455 | * Prevent mem_cgroup_migrate() from looking at |
| 4492 | * page->mem_cgroup of its source page while we change it. | 4456 | * page->mem_cgroup of its source page while we change it. |
| 4493 | */ | 4457 | */ |
| 4494 | ret = -EBUSY; | 4458 | ret = -EBUSY; |
| @@ -4923,9 +4887,9 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
| 4923 | 4887 | ||
| 4924 | lru_add_drain_all(); | 4888 | lru_add_drain_all(); |
| 4925 | /* | 4889 | /* |
| 4926 | * Signal mem_cgroup_begin_page_stat() to take the memcg's | 4890 | * Signal lock_page_memcg() to take the memcg's move_lock |
| 4927 | * move_lock while we're moving its pages to another memcg. | 4891 | * while we're moving its pages to another memcg. Then wait |
| 4928 | * Then wait for already started RCU-only updates to finish. | 4892 | * for already started RCU-only updates to finish. |
| 4929 | */ | 4893 | */ |
| 4930 | atomic_inc(&mc.from->moving_account); | 4894 | atomic_inc(&mc.from->moving_account); |
| 4931 | synchronize_rcu(); | 4895 | synchronize_rcu(); |
| @@ -5517,16 +5481,16 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) | |||
| 5517 | } | 5481 | } |
| 5518 | 5482 | ||
| 5519 | /** | 5483 | /** |
| 5520 | * mem_cgroup_replace_page - migrate a charge to another page | 5484 | * mem_cgroup_migrate - charge a page's replacement |
| 5521 | * @oldpage: currently charged page | 5485 | * @oldpage: currently circulating page |
| 5522 | * @newpage: page to transfer the charge to | 5486 | * @newpage: replacement page |
| 5523 | * | 5487 | * |
| 5524 | * Migrate the charge from @oldpage to @newpage. | 5488 | * Charge @newpage as a replacement page for @oldpage. @oldpage will |
| 5489 | * be uncharged upon free. | ||
| 5525 | * | 5490 | * |
| 5526 | * Both pages must be locked, @newpage->mapping must be set up. | 5491 | * Both pages must be locked, @newpage->mapping must be set up. |
| 5527 | * Either or both pages might be on the LRU already. | ||
| 5528 | */ | 5492 | */ |
| 5529 | void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) | 5493 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) |
| 5530 | { | 5494 | { |
| 5531 | struct mem_cgroup *memcg; | 5495 | struct mem_cgroup *memcg; |
| 5532 | unsigned int nr_pages; | 5496 | unsigned int nr_pages; |
| @@ -5559,7 +5523,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) | |||
| 5559 | page_counter_charge(&memcg->memsw, nr_pages); | 5523 | page_counter_charge(&memcg->memsw, nr_pages); |
| 5560 | css_get_many(&memcg->css, nr_pages); | 5524 | css_get_many(&memcg->css, nr_pages); |
| 5561 | 5525 | ||
| 5562 | commit_charge(newpage, memcg, true); | 5526 | commit_charge(newpage, memcg, false); |
| 5563 | 5527 | ||
| 5564 | local_irq_disable(); | 5528 | local_irq_disable(); |
| 5565 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | 5529 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ac595e7a3a95..67c30eb993f0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -826,8 +826,6 @@ static struct page_state { | |||
| 826 | #undef lru | 826 | #undef lru |
| 827 | #undef swapbacked | 827 | #undef swapbacked |
| 828 | #undef head | 828 | #undef head |
| 829 | #undef tail | ||
| 830 | #undef compound | ||
| 831 | #undef slab | 829 | #undef slab |
| 832 | #undef reserved | 830 | #undef reserved |
| 833 | 831 | ||
diff --git a/mm/memory.c b/mm/memory.c index 906d8e3b42c0..0e247642ed5b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1897,7 +1897,9 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
| 1897 | unsigned long end = addr + size; | 1897 | unsigned long end = addr + size; |
| 1898 | int err; | 1898 | int err; |
| 1899 | 1899 | ||
| 1900 | BUG_ON(addr >= end); | 1900 | if (WARN_ON(addr >= end)) |
| 1901 | return -EINVAL; | ||
| 1902 | |||
| 1901 | pgd = pgd_offset(mm, addr); | 1903 | pgd = pgd_offset(mm, addr); |
| 1902 | do { | 1904 | do { |
| 1903 | next = pgd_addr_end(addr, end); | 1905 | next = pgd_addr_end(addr, end); |
| @@ -3143,8 +3145,7 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3143 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3145 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
| 3144 | unsigned int flags, pte_t orig_pte) | 3146 | unsigned int flags, pte_t orig_pte) |
| 3145 | { | 3147 | { |
| 3146 | pgoff_t pgoff = (((address & PAGE_MASK) | 3148 | pgoff_t pgoff = linear_page_index(vma, address); |
| 3147 | - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
| 3148 | 3149 | ||
| 3149 | pte_unmap(page_table); | 3150 | pte_unmap(page_table); |
| 3150 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ | 3151 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 979b18cbd343..24ea06393816 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -77,6 +77,9 @@ static struct { | |||
| 77 | #define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) | 77 | #define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) |
| 78 | #define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) | 78 | #define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) |
| 79 | 79 | ||
| 80 | bool memhp_auto_online; | ||
| 81 | EXPORT_SYMBOL_GPL(memhp_auto_online); | ||
| 82 | |||
| 80 | void get_online_mems(void) | 83 | void get_online_mems(void) |
| 81 | { | 84 | { |
| 82 | might_sleep(); | 85 | might_sleep(); |
| @@ -509,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
| 509 | int start_sec, end_sec; | 512 | int start_sec, end_sec; |
| 510 | struct vmem_altmap *altmap; | 513 | struct vmem_altmap *altmap; |
| 511 | 514 | ||
| 515 | clear_zone_contiguous(zone); | ||
| 516 | |||
| 512 | /* during initialize mem_map, align hot-added range to section */ | 517 | /* during initialize mem_map, align hot-added range to section */ |
| 513 | start_sec = pfn_to_section_nr(phys_start_pfn); | 518 | start_sec = pfn_to_section_nr(phys_start_pfn); |
| 514 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 519 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
| @@ -521,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
| 521 | if (altmap->base_pfn != phys_start_pfn | 526 | if (altmap->base_pfn != phys_start_pfn |
| 522 | || vmem_altmap_offset(altmap) > nr_pages) { | 527 | || vmem_altmap_offset(altmap) > nr_pages) { |
| 523 | pr_warn_once("memory add fail, invalid altmap\n"); | 528 | pr_warn_once("memory add fail, invalid altmap\n"); |
| 524 | return -EINVAL; | 529 | err = -EINVAL; |
| 530 | goto out; | ||
| 525 | } | 531 | } |
| 526 | altmap->alloc = 0; | 532 | altmap->alloc = 0; |
| 527 | } | 533 | } |
| @@ -539,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
| 539 | err = 0; | 545 | err = 0; |
| 540 | } | 546 | } |
| 541 | vmemmap_populate_print_last(); | 547 | vmemmap_populate_print_last(); |
| 542 | 548 | out: | |
| 549 | set_zone_contiguous(zone); | ||
| 543 | return err; | 550 | return err; |
| 544 | } | 551 | } |
| 545 | EXPORT_SYMBOL_GPL(__add_pages); | 552 | EXPORT_SYMBOL_GPL(__add_pages); |
| @@ -811,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
| 811 | } | 818 | } |
| 812 | } | 819 | } |
| 813 | 820 | ||
| 821 | clear_zone_contiguous(zone); | ||
| 822 | |||
| 814 | /* | 823 | /* |
| 815 | * We can only remove entire sections | 824 | * We can only remove entire sections |
| 816 | */ | 825 | */ |
| @@ -826,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
| 826 | if (ret) | 835 | if (ret) |
| 827 | break; | 836 | break; |
| 828 | } | 837 | } |
| 838 | |||
| 839 | set_zone_contiguous(zone); | ||
| 840 | |||
| 829 | return ret; | 841 | return ret; |
| 830 | } | 842 | } |
| 831 | EXPORT_SYMBOL_GPL(__remove_pages); | 843 | EXPORT_SYMBOL_GPL(__remove_pages); |
| @@ -1261,8 +1273,13 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default, | |||
| 1261 | return zone_default; | 1273 | return zone_default; |
| 1262 | } | 1274 | } |
| 1263 | 1275 | ||
| 1276 | static int online_memory_block(struct memory_block *mem, void *arg) | ||
| 1277 | { | ||
| 1278 | return memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); | ||
| 1279 | } | ||
| 1280 | |||
| 1264 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1281 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
| 1265 | int __ref add_memory_resource(int nid, struct resource *res) | 1282 | int __ref add_memory_resource(int nid, struct resource *res, bool online) |
| 1266 | { | 1283 | { |
| 1267 | u64 start, size; | 1284 | u64 start, size; |
| 1268 | pg_data_t *pgdat = NULL; | 1285 | pg_data_t *pgdat = NULL; |
| @@ -1322,6 +1339,11 @@ int __ref add_memory_resource(int nid, struct resource *res) | |||
| 1322 | /* create new memmap entry */ | 1339 | /* create new memmap entry */ |
| 1323 | firmware_map_add_hotplug(start, start + size, "System RAM"); | 1340 | firmware_map_add_hotplug(start, start + size, "System RAM"); |
| 1324 | 1341 | ||
| 1342 | /* online pages if requested */ | ||
| 1343 | if (online) | ||
| 1344 | walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), | ||
| 1345 | NULL, online_memory_block); | ||
| 1346 | |||
| 1325 | goto out; | 1347 | goto out; |
| 1326 | 1348 | ||
| 1327 | error: | 1349 | error: |
| @@ -1345,7 +1367,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
| 1345 | if (IS_ERR(res)) | 1367 | if (IS_ERR(res)) |
| 1346 | return PTR_ERR(res); | 1368 | return PTR_ERR(res); |
| 1347 | 1369 | ||
| 1348 | ret = add_memory_resource(nid, res); | 1370 | ret = add_memory_resource(nid, res, memhp_auto_online); |
| 1349 | if (ret < 0) | 1371 | if (ret < 0) |
| 1350 | release_memory_resource(res); | 1372 | release_memory_resource(res); |
| 1351 | return ret; | 1373 | return ret; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9a3f6b90e628..8cbc74387df3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -643,7 +643,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
| 643 | 643 | ||
| 644 | if (flags & MPOL_MF_LAZY) { | 644 | if (flags & MPOL_MF_LAZY) { |
| 645 | /* Similar to task_numa_work, skip inaccessible VMAs */ | 645 | /* Similar to task_numa_work, skip inaccessible VMAs */ |
| 646 | if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) | 646 | if (!is_vm_hugetlb_page(vma) && |
| 647 | (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) && | ||
| 648 | !(vma->vm_flags & VM_MIXEDMAP)) | ||
| 647 | change_prot_numa(vma, start, endvma); | 649 | change_prot_numa(vma, start, endvma); |
| 648 | return 1; | 650 | return 1; |
| 649 | } | 651 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 3ad0fea5c438..568284ec75d4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/balloon_compaction.h> | 38 | #include <linux/balloon_compaction.h> |
| 39 | #include <linux/mmu_notifier.h> | 39 | #include <linux/mmu_notifier.h> |
| 40 | #include <linux/page_idle.h> | 40 | #include <linux/page_idle.h> |
| 41 | #include <linux/page_owner.h> | ||
| 41 | 42 | ||
| 42 | #include <asm/tlbflush.h> | 43 | #include <asm/tlbflush.h> |
| 43 | 44 | ||
| @@ -325,7 +326,6 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
| 325 | return -EAGAIN; | 326 | return -EAGAIN; |
| 326 | 327 | ||
| 327 | /* No turning back from here */ | 328 | /* No turning back from here */ |
| 328 | set_page_memcg(newpage, page_memcg(page)); | ||
| 329 | newpage->index = page->index; | 329 | newpage->index = page->index; |
| 330 | newpage->mapping = page->mapping; | 330 | newpage->mapping = page->mapping; |
| 331 | if (PageSwapBacked(page)) | 331 | if (PageSwapBacked(page)) |
| @@ -372,7 +372,6 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
| 372 | * Now we know that no one else is looking at the page: | 372 | * Now we know that no one else is looking at the page: |
| 373 | * no turning back from here. | 373 | * no turning back from here. |
| 374 | */ | 374 | */ |
| 375 | set_page_memcg(newpage, page_memcg(page)); | ||
| 376 | newpage->index = page->index; | 375 | newpage->index = page->index; |
| 377 | newpage->mapping = page->mapping; | 376 | newpage->mapping = page->mapping; |
| 378 | if (PageSwapBacked(page)) | 377 | if (PageSwapBacked(page)) |
| @@ -457,9 +456,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
| 457 | return -EAGAIN; | 456 | return -EAGAIN; |
| 458 | } | 457 | } |
| 459 | 458 | ||
| 460 | set_page_memcg(newpage, page_memcg(page)); | ||
| 461 | newpage->index = page->index; | 459 | newpage->index = page->index; |
| 462 | newpage->mapping = page->mapping; | 460 | newpage->mapping = page->mapping; |
| 461 | |||
| 463 | get_page(newpage); | 462 | get_page(newpage); |
| 464 | 463 | ||
| 465 | radix_tree_replace_slot(pslot, newpage); | 464 | radix_tree_replace_slot(pslot, newpage); |
| @@ -467,6 +466,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
| 467 | page_unfreeze_refs(page, expected_count - 1); | 466 | page_unfreeze_refs(page, expected_count - 1); |
| 468 | 467 | ||
| 469 | spin_unlock_irq(&mapping->tree_lock); | 468 | spin_unlock_irq(&mapping->tree_lock); |
| 469 | |||
| 470 | return MIGRATEPAGE_SUCCESS; | 470 | return MIGRATEPAGE_SUCCESS; |
| 471 | } | 471 | } |
| 472 | 472 | ||
| @@ -578,6 +578,10 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
| 578 | */ | 578 | */ |
| 579 | if (PageWriteback(newpage)) | 579 | if (PageWriteback(newpage)) |
| 580 | end_page_writeback(newpage); | 580 | end_page_writeback(newpage); |
| 581 | |||
| 582 | copy_page_owner(page, newpage); | ||
| 583 | |||
| 584 | mem_cgroup_migrate(page, newpage); | ||
| 581 | } | 585 | } |
| 582 | 586 | ||
| 583 | /************************************************************ | 587 | /************************************************************ |
| @@ -772,7 +776,6 @@ static int move_to_new_page(struct page *newpage, struct page *page, | |||
| 772 | * page is freed; but stats require that PageAnon be left as PageAnon. | 776 | * page is freed; but stats require that PageAnon be left as PageAnon. |
| 773 | */ | 777 | */ |
| 774 | if (rc == MIGRATEPAGE_SUCCESS) { | 778 | if (rc == MIGRATEPAGE_SUCCESS) { |
| 775 | set_page_memcg(page, NULL); | ||
| 776 | if (!PageAnon(page)) | 779 | if (!PageAnon(page)) |
| 777 | page->mapping = NULL; | 780 | page->mapping = NULL; |
| 778 | } | 781 | } |
| @@ -952,8 +955,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, | |||
| 952 | } | 955 | } |
| 953 | 956 | ||
| 954 | rc = __unmap_and_move(page, newpage, force, mode); | 957 | rc = __unmap_and_move(page, newpage, force, mode); |
| 955 | if (rc == MIGRATEPAGE_SUCCESS) | 958 | if (rc == MIGRATEPAGE_SUCCESS) { |
| 956 | put_new_page = NULL; | 959 | put_new_page = NULL; |
| 960 | set_page_owner_migrate_reason(newpage, reason); | ||
| 961 | } | ||
| 957 | 962 | ||
| 958 | out: | 963 | out: |
| 959 | if (rc != -EAGAIN) { | 964 | if (rc != -EAGAIN) { |
| @@ -1018,7 +1023,7 @@ out: | |||
| 1018 | static int unmap_and_move_huge_page(new_page_t get_new_page, | 1023 | static int unmap_and_move_huge_page(new_page_t get_new_page, |
| 1019 | free_page_t put_new_page, unsigned long private, | 1024 | free_page_t put_new_page, unsigned long private, |
| 1020 | struct page *hpage, int force, | 1025 | struct page *hpage, int force, |
| 1021 | enum migrate_mode mode) | 1026 | enum migrate_mode mode, int reason) |
| 1022 | { | 1027 | { |
| 1023 | int rc = -EAGAIN; | 1028 | int rc = -EAGAIN; |
| 1024 | int *result = NULL; | 1029 | int *result = NULL; |
| @@ -1076,6 +1081,7 @@ put_anon: | |||
| 1076 | if (rc == MIGRATEPAGE_SUCCESS) { | 1081 | if (rc == MIGRATEPAGE_SUCCESS) { |
| 1077 | hugetlb_cgroup_migrate(hpage, new_hpage); | 1082 | hugetlb_cgroup_migrate(hpage, new_hpage); |
| 1078 | put_new_page = NULL; | 1083 | put_new_page = NULL; |
| 1084 | set_page_owner_migrate_reason(new_hpage, reason); | ||
| 1079 | } | 1085 | } |
| 1080 | 1086 | ||
| 1081 | unlock_page(hpage); | 1087 | unlock_page(hpage); |
| @@ -1148,7 +1154,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, | |||
| 1148 | if (PageHuge(page)) | 1154 | if (PageHuge(page)) |
| 1149 | rc = unmap_and_move_huge_page(get_new_page, | 1155 | rc = unmap_and_move_huge_page(get_new_page, |
| 1150 | put_new_page, private, page, | 1156 | put_new_page, private, page, |
| 1151 | pass > 2, mode); | 1157 | pass > 2, mode, reason); |
| 1152 | else | 1158 | else |
| 1153 | rc = unmap_and_move(get_new_page, put_new_page, | 1159 | rc = unmap_and_move(get_new_page, put_new_page, |
| 1154 | private, page, pass > 2, mode, | 1160 | private, page, pass > 2, mode, |
| @@ -1836,9 +1842,8 @@ fail_putback: | |||
| 1836 | } | 1842 | } |
| 1837 | 1843 | ||
| 1838 | mlock_migrate_page(new_page, page); | 1844 | mlock_migrate_page(new_page, page); |
| 1839 | set_page_memcg(new_page, page_memcg(page)); | ||
| 1840 | set_page_memcg(page, NULL); | ||
| 1841 | page_remove_rmap(page, true); | 1845 | page_remove_rmap(page, true); |
| 1846 | set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED); | ||
| 1842 | 1847 | ||
| 1843 | spin_unlock(ptl); | 1848 | spin_unlock(ptl); |
| 1844 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1849 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dc490c06941b..e97a05d9621f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -386,10 +386,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | |||
| 386 | static void dump_header(struct oom_control *oc, struct task_struct *p, | 386 | static void dump_header(struct oom_control *oc, struct task_struct *p, |
| 387 | struct mem_cgroup *memcg) | 387 | struct mem_cgroup *memcg) |
| 388 | { | 388 | { |
| 389 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " | 389 | pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, " |
| 390 | "oom_score_adj=%hd\n", | 390 | "oom_score_adj=%hd\n", |
| 391 | current->comm, oc->gfp_mask, oc->order, | 391 | current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, |
| 392 | current->signal->oom_score_adj); | 392 | current->signal->oom_score_adj); |
| 393 | |||
| 393 | cpuset_print_current_mems_allowed(); | 394 | cpuset_print_current_mems_allowed(); |
| 394 | dump_stack(); | 395 | dump_stack(); |
| 395 | if (memcg) | 396 | if (memcg) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6fe7d15bd1f7..11ff8f758631 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -1169,6 +1169,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, | |||
| 1169 | unsigned long balanced_dirty_ratelimit; | 1169 | unsigned long balanced_dirty_ratelimit; |
| 1170 | unsigned long step; | 1170 | unsigned long step; |
| 1171 | unsigned long x; | 1171 | unsigned long x; |
| 1172 | unsigned long shift; | ||
| 1172 | 1173 | ||
| 1173 | /* | 1174 | /* |
| 1174 | * The dirty rate will match the writeout rate in long term, except | 1175 | * The dirty rate will match the writeout rate in long term, except |
| @@ -1293,11 +1294,11 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, | |||
| 1293 | * rate itself is constantly fluctuating. So decrease the track speed | 1294 | * rate itself is constantly fluctuating. So decrease the track speed |
| 1294 | * when it gets close to the target. Helps eliminate pointless tremors. | 1295 | * when it gets close to the target. Helps eliminate pointless tremors. |
| 1295 | */ | 1296 | */ |
| 1296 | step >>= dirty_ratelimit / (2 * step + 1); | 1297 | shift = dirty_ratelimit / (2 * step + 1); |
| 1297 | /* | 1298 | if (shift < BITS_PER_LONG) |
| 1298 | * Limit the tracking speed to avoid overshooting. | 1299 | step = DIV_ROUND_UP(step >> shift, 8); |
| 1299 | */ | 1300 | else |
| 1300 | step = (step + 7) / 8; | 1301 | step = 0; |
| 1301 | 1302 | ||
| 1302 | if (dirty_ratelimit < balanced_dirty_ratelimit) | 1303 | if (dirty_ratelimit < balanced_dirty_ratelimit) |
| 1303 | dirty_ratelimit += step; | 1304 | dirty_ratelimit += step; |
| @@ -2409,12 +2410,11 @@ int __set_page_dirty_no_writeback(struct page *page) | |||
| 2409 | /* | 2410 | /* |
| 2410 | * Helper function for set_page_dirty family. | 2411 | * Helper function for set_page_dirty family. |
| 2411 | * | 2412 | * |
| 2412 | * Caller must hold mem_cgroup_begin_page_stat(). | 2413 | * Caller must hold lock_page_memcg(). |
| 2413 | * | 2414 | * |
| 2414 | * NOTE: This relies on being atomic wrt interrupts. | 2415 | * NOTE: This relies on being atomic wrt interrupts. |
| 2415 | */ | 2416 | */ |
| 2416 | void account_page_dirtied(struct page *page, struct address_space *mapping, | 2417 | void account_page_dirtied(struct page *page, struct address_space *mapping) |
| 2417 | struct mem_cgroup *memcg) | ||
| 2418 | { | 2418 | { |
| 2419 | struct inode *inode = mapping->host; | 2419 | struct inode *inode = mapping->host; |
| 2420 | 2420 | ||
| @@ -2426,7 +2426,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping, | |||
| 2426 | inode_attach_wb(inode, page); | 2426 | inode_attach_wb(inode, page); |
| 2427 | wb = inode_to_wb(inode); | 2427 | wb = inode_to_wb(inode); |
| 2428 | 2428 | ||
| 2429 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2429 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
| 2430 | __inc_zone_page_state(page, NR_FILE_DIRTY); | 2430 | __inc_zone_page_state(page, NR_FILE_DIRTY); |
| 2431 | __inc_zone_page_state(page, NR_DIRTIED); | 2431 | __inc_zone_page_state(page, NR_DIRTIED); |
| 2432 | __inc_wb_stat(wb, WB_RECLAIMABLE); | 2432 | __inc_wb_stat(wb, WB_RECLAIMABLE); |
| @@ -2441,13 +2441,13 @@ EXPORT_SYMBOL(account_page_dirtied); | |||
| 2441 | /* | 2441 | /* |
| 2442 | * Helper function for deaccounting dirty page without writeback. | 2442 | * Helper function for deaccounting dirty page without writeback. |
| 2443 | * | 2443 | * |
| 2444 | * Caller must hold mem_cgroup_begin_page_stat(). | 2444 | * Caller must hold lock_page_memcg(). |
| 2445 | */ | 2445 | */ |
| 2446 | void account_page_cleaned(struct page *page, struct address_space *mapping, | 2446 | void account_page_cleaned(struct page *page, struct address_space *mapping, |
| 2447 | struct mem_cgroup *memcg, struct bdi_writeback *wb) | 2447 | struct bdi_writeback *wb) |
| 2448 | { | 2448 | { |
| 2449 | if (mapping_cap_account_dirty(mapping)) { | 2449 | if (mapping_cap_account_dirty(mapping)) { |
| 2450 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2450 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
| 2451 | dec_zone_page_state(page, NR_FILE_DIRTY); | 2451 | dec_zone_page_state(page, NR_FILE_DIRTY); |
| 2452 | dec_wb_stat(wb, WB_RECLAIMABLE); | 2452 | dec_wb_stat(wb, WB_RECLAIMABLE); |
| 2453 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); | 2453 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); |
| @@ -2468,26 +2468,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, | |||
| 2468 | */ | 2468 | */ |
| 2469 | int __set_page_dirty_nobuffers(struct page *page) | 2469 | int __set_page_dirty_nobuffers(struct page *page) |
| 2470 | { | 2470 | { |
| 2471 | struct mem_cgroup *memcg; | 2471 | lock_page_memcg(page); |
| 2472 | |||
| 2473 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 2474 | if (!TestSetPageDirty(page)) { | 2472 | if (!TestSetPageDirty(page)) { |
| 2475 | struct address_space *mapping = page_mapping(page); | 2473 | struct address_space *mapping = page_mapping(page); |
| 2476 | unsigned long flags; | 2474 | unsigned long flags; |
| 2477 | 2475 | ||
| 2478 | if (!mapping) { | 2476 | if (!mapping) { |
| 2479 | mem_cgroup_end_page_stat(memcg); | 2477 | unlock_page_memcg(page); |
| 2480 | return 1; | 2478 | return 1; |
| 2481 | } | 2479 | } |
| 2482 | 2480 | ||
| 2483 | spin_lock_irqsave(&mapping->tree_lock, flags); | 2481 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 2484 | BUG_ON(page_mapping(page) != mapping); | 2482 | BUG_ON(page_mapping(page) != mapping); |
| 2485 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); | 2483 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); |
| 2486 | account_page_dirtied(page, mapping, memcg); | 2484 | account_page_dirtied(page, mapping); |
| 2487 | radix_tree_tag_set(&mapping->page_tree, page_index(page), | 2485 | radix_tree_tag_set(&mapping->page_tree, page_index(page), |
| 2488 | PAGECACHE_TAG_DIRTY); | 2486 | PAGECACHE_TAG_DIRTY); |
| 2489 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 2487 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 2490 | mem_cgroup_end_page_stat(memcg); | 2488 | unlock_page_memcg(page); |
| 2491 | 2489 | ||
| 2492 | if (mapping->host) { | 2490 | if (mapping->host) { |
| 2493 | /* !PageAnon && !swapper_space */ | 2491 | /* !PageAnon && !swapper_space */ |
| @@ -2495,7 +2493,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
| 2495 | } | 2493 | } |
| 2496 | return 1; | 2494 | return 1; |
| 2497 | } | 2495 | } |
| 2498 | mem_cgroup_end_page_stat(memcg); | 2496 | unlock_page_memcg(page); |
| 2499 | return 0; | 2497 | return 0; |
| 2500 | } | 2498 | } |
| 2501 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); | 2499 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); |
| @@ -2625,17 +2623,16 @@ void cancel_dirty_page(struct page *page) | |||
| 2625 | if (mapping_cap_account_dirty(mapping)) { | 2623 | if (mapping_cap_account_dirty(mapping)) { |
| 2626 | struct inode *inode = mapping->host; | 2624 | struct inode *inode = mapping->host; |
| 2627 | struct bdi_writeback *wb; | 2625 | struct bdi_writeback *wb; |
| 2628 | struct mem_cgroup *memcg; | ||
| 2629 | bool locked; | 2626 | bool locked; |
| 2630 | 2627 | ||
| 2631 | memcg = mem_cgroup_begin_page_stat(page); | 2628 | lock_page_memcg(page); |
| 2632 | wb = unlocked_inode_to_wb_begin(inode, &locked); | 2629 | wb = unlocked_inode_to_wb_begin(inode, &locked); |
| 2633 | 2630 | ||
| 2634 | if (TestClearPageDirty(page)) | 2631 | if (TestClearPageDirty(page)) |
| 2635 | account_page_cleaned(page, mapping, memcg, wb); | 2632 | account_page_cleaned(page, mapping, wb); |
| 2636 | 2633 | ||
| 2637 | unlocked_inode_to_wb_end(inode, locked); | 2634 | unlocked_inode_to_wb_end(inode, locked); |
| 2638 | mem_cgroup_end_page_stat(memcg); | 2635 | unlock_page_memcg(page); |
| 2639 | } else { | 2636 | } else { |
| 2640 | ClearPageDirty(page); | 2637 | ClearPageDirty(page); |
| 2641 | } | 2638 | } |
| @@ -2666,7 +2663,6 @@ int clear_page_dirty_for_io(struct page *page) | |||
| 2666 | if (mapping && mapping_cap_account_dirty(mapping)) { | 2663 | if (mapping && mapping_cap_account_dirty(mapping)) { |
| 2667 | struct inode *inode = mapping->host; | 2664 | struct inode *inode = mapping->host; |
| 2668 | struct bdi_writeback *wb; | 2665 | struct bdi_writeback *wb; |
| 2669 | struct mem_cgroup *memcg; | ||
| 2670 | bool locked; | 2666 | bool locked; |
| 2671 | 2667 | ||
| 2672 | /* | 2668 | /* |
| @@ -2704,16 +2700,14 @@ int clear_page_dirty_for_io(struct page *page) | |||
| 2704 | * always locked coming in here, so we get the desired | 2700 | * always locked coming in here, so we get the desired |
| 2705 | * exclusion. | 2701 | * exclusion. |
| 2706 | */ | 2702 | */ |
| 2707 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 2708 | wb = unlocked_inode_to_wb_begin(inode, &locked); | 2703 | wb = unlocked_inode_to_wb_begin(inode, &locked); |
| 2709 | if (TestClearPageDirty(page)) { | 2704 | if (TestClearPageDirty(page)) { |
| 2710 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2705 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
| 2711 | dec_zone_page_state(page, NR_FILE_DIRTY); | 2706 | dec_zone_page_state(page, NR_FILE_DIRTY); |
| 2712 | dec_wb_stat(wb, WB_RECLAIMABLE); | 2707 | dec_wb_stat(wb, WB_RECLAIMABLE); |
| 2713 | ret = 1; | 2708 | ret = 1; |
| 2714 | } | 2709 | } |
| 2715 | unlocked_inode_to_wb_end(inode, locked); | 2710 | unlocked_inode_to_wb_end(inode, locked); |
| 2716 | mem_cgroup_end_page_stat(memcg); | ||
| 2717 | return ret; | 2711 | return ret; |
| 2718 | } | 2712 | } |
| 2719 | return TestClearPageDirty(page); | 2713 | return TestClearPageDirty(page); |
| @@ -2723,10 +2717,9 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); | |||
| 2723 | int test_clear_page_writeback(struct page *page) | 2717 | int test_clear_page_writeback(struct page *page) |
| 2724 | { | 2718 | { |
| 2725 | struct address_space *mapping = page_mapping(page); | 2719 | struct address_space *mapping = page_mapping(page); |
| 2726 | struct mem_cgroup *memcg; | ||
| 2727 | int ret; | 2720 | int ret; |
| 2728 | 2721 | ||
| 2729 | memcg = mem_cgroup_begin_page_stat(page); | 2722 | lock_page_memcg(page); |
| 2730 | if (mapping) { | 2723 | if (mapping) { |
| 2731 | struct inode *inode = mapping->host; | 2724 | struct inode *inode = mapping->host; |
| 2732 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 2725 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
| @@ -2750,21 +2743,20 @@ int test_clear_page_writeback(struct page *page) | |||
| 2750 | ret = TestClearPageWriteback(page); | 2743 | ret = TestClearPageWriteback(page); |
| 2751 | } | 2744 | } |
| 2752 | if (ret) { | 2745 | if (ret) { |
| 2753 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); | 2746 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); |
| 2754 | dec_zone_page_state(page, NR_WRITEBACK); | 2747 | dec_zone_page_state(page, NR_WRITEBACK); |
| 2755 | inc_zone_page_state(page, NR_WRITTEN); | 2748 | inc_zone_page_state(page, NR_WRITTEN); |
| 2756 | } | 2749 | } |
| 2757 | mem_cgroup_end_page_stat(memcg); | 2750 | unlock_page_memcg(page); |
| 2758 | return ret; | 2751 | return ret; |
| 2759 | } | 2752 | } |
| 2760 | 2753 | ||
| 2761 | int __test_set_page_writeback(struct page *page, bool keep_write) | 2754 | int __test_set_page_writeback(struct page *page, bool keep_write) |
| 2762 | { | 2755 | { |
| 2763 | struct address_space *mapping = page_mapping(page); | 2756 | struct address_space *mapping = page_mapping(page); |
| 2764 | struct mem_cgroup *memcg; | ||
| 2765 | int ret; | 2757 | int ret; |
| 2766 | 2758 | ||
| 2767 | memcg = mem_cgroup_begin_page_stat(page); | 2759 | lock_page_memcg(page); |
| 2768 | if (mapping) { | 2760 | if (mapping) { |
| 2769 | struct inode *inode = mapping->host; | 2761 | struct inode *inode = mapping->host; |
| 2770 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 2762 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
| @@ -2792,10 +2784,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write) | |||
| 2792 | ret = TestSetPageWriteback(page); | 2784 | ret = TestSetPageWriteback(page); |
| 2793 | } | 2785 | } |
| 2794 | if (!ret) { | 2786 | if (!ret) { |
| 2795 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); | 2787 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); |
| 2796 | inc_zone_page_state(page, NR_WRITEBACK); | 2788 | inc_zone_page_state(page, NR_WRITEBACK); |
| 2797 | } | 2789 | } |
| 2798 | mem_cgroup_end_page_stat(memcg); | 2790 | unlock_page_memcg(page); |
| 2799 | return ret; | 2791 | return ret; |
| 2800 | 2792 | ||
| 2801 | } | 2793 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 838ca8bb64f7..c46b75d14b6f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -223,6 +223,19 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
| 223 | #endif | 223 | #endif |
| 224 | }; | 224 | }; |
| 225 | 225 | ||
| 226 | char * const migratetype_names[MIGRATE_TYPES] = { | ||
| 227 | "Unmovable", | ||
| 228 | "Movable", | ||
| 229 | "Reclaimable", | ||
| 230 | "HighAtomic", | ||
| 231 | #ifdef CONFIG_CMA | ||
| 232 | "CMA", | ||
| 233 | #endif | ||
| 234 | #ifdef CONFIG_MEMORY_ISOLATION | ||
| 235 | "Isolate", | ||
| 236 | #endif | ||
| 237 | }; | ||
| 238 | |||
| 226 | compound_page_dtor * const compound_page_dtors[] = { | 239 | compound_page_dtor * const compound_page_dtors[] = { |
| 227 | NULL, | 240 | NULL, |
| 228 | free_compound_page, | 241 | free_compound_page, |
| @@ -247,6 +260,7 @@ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | |||
| 247 | static unsigned long __initdata required_kernelcore; | 260 | static unsigned long __initdata required_kernelcore; |
| 248 | static unsigned long __initdata required_movablecore; | 261 | static unsigned long __initdata required_movablecore; |
| 249 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; | 262 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; |
| 263 | static bool mirrored_kernelcore; | ||
| 250 | 264 | ||
| 251 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | 265 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
| 252 | int movable_zone; | 266 | int movable_zone; |
| @@ -416,7 +430,7 @@ static void bad_page(struct page *page, const char *reason, | |||
| 416 | goto out; | 430 | goto out; |
| 417 | } | 431 | } |
| 418 | if (nr_unshown) { | 432 | if (nr_unshown) { |
| 419 | printk(KERN_ALERT | 433 | pr_alert( |
| 420 | "BUG: Bad page state: %lu messages suppressed\n", | 434 | "BUG: Bad page state: %lu messages suppressed\n", |
| 421 | nr_unshown); | 435 | nr_unshown); |
| 422 | nr_unshown = 0; | 436 | nr_unshown = 0; |
| @@ -426,9 +440,14 @@ static void bad_page(struct page *page, const char *reason, | |||
| 426 | if (nr_shown++ == 0) | 440 | if (nr_shown++ == 0) |
| 427 | resume = jiffies + 60 * HZ; | 441 | resume = jiffies + 60 * HZ; |
| 428 | 442 | ||
| 429 | printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", | 443 | pr_alert("BUG: Bad page state in process %s pfn:%05lx\n", |
| 430 | current->comm, page_to_pfn(page)); | 444 | current->comm, page_to_pfn(page)); |
| 431 | dump_page_badflags(page, reason, bad_flags); | 445 | __dump_page(page, reason); |
| 446 | bad_flags &= page->flags; | ||
| 447 | if (bad_flags) | ||
| 448 | pr_alert("bad because of flags: %#lx(%pGp)\n", | ||
| 449 | bad_flags, &bad_flags); | ||
| 450 | dump_page_owner(page); | ||
| 432 | 451 | ||
| 433 | print_modules(); | 452 | print_modules(); |
| 434 | dump_stack(); | 453 | dump_stack(); |
| @@ -477,7 +496,8 @@ void prep_compound_page(struct page *page, unsigned int order) | |||
| 477 | 496 | ||
| 478 | #ifdef CONFIG_DEBUG_PAGEALLOC | 497 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 479 | unsigned int _debug_guardpage_minorder; | 498 | unsigned int _debug_guardpage_minorder; |
| 480 | bool _debug_pagealloc_enabled __read_mostly; | 499 | bool _debug_pagealloc_enabled __read_mostly |
| 500 | = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); | ||
| 481 | bool _debug_guardpage_enabled __read_mostly; | 501 | bool _debug_guardpage_enabled __read_mostly; |
| 482 | 502 | ||
| 483 | static int __init early_debug_pagealloc(char *buf) | 503 | static int __init early_debug_pagealloc(char *buf) |
| @@ -488,6 +508,9 @@ static int __init early_debug_pagealloc(char *buf) | |||
| 488 | if (strcmp(buf, "on") == 0) | 508 | if (strcmp(buf, "on") == 0) |
| 489 | _debug_pagealloc_enabled = true; | 509 | _debug_pagealloc_enabled = true; |
| 490 | 510 | ||
| 511 | if (strcmp(buf, "off") == 0) | ||
| 512 | _debug_pagealloc_enabled = false; | ||
| 513 | |||
| 491 | return 0; | 514 | return 0; |
| 492 | } | 515 | } |
| 493 | early_param("debug_pagealloc", early_debug_pagealloc); | 516 | early_param("debug_pagealloc", early_debug_pagealloc); |
| @@ -1002,6 +1025,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
| 1002 | PAGE_SIZE << order); | 1025 | PAGE_SIZE << order); |
| 1003 | } | 1026 | } |
| 1004 | arch_free_page(page, order); | 1027 | arch_free_page(page, order); |
| 1028 | kernel_poison_pages(page, 1 << order, 0); | ||
| 1005 | kernel_map_pages(page, 1 << order, 0); | 1029 | kernel_map_pages(page, 1 << order, 0); |
| 1006 | 1030 | ||
| 1007 | return true; | 1031 | return true; |
| @@ -1104,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn, | |||
| 1104 | return __free_pages_boot_core(page, pfn, order); | 1128 | return __free_pages_boot_core(page, pfn, order); |
| 1105 | } | 1129 | } |
| 1106 | 1130 | ||
| 1131 | /* | ||
| 1132 | * Check that the whole (or subset of) a pageblock given by the interval of | ||
| 1133 | * [start_pfn, end_pfn) is valid and within the same zone, before scanning it | ||
| 1134 | * with the migration of free compaction scanner. The scanners then need to | ||
| 1135 | * use only pfn_valid_within() check for arches that allow holes within | ||
| 1136 | * pageblocks. | ||
| 1137 | * | ||
| 1138 | * Return struct page pointer of start_pfn, or NULL if checks were not passed. | ||
| 1139 | * | ||
| 1140 | * It's possible on some configurations to have a setup like node0 node1 node0 | ||
| 1141 | * i.e. it's possible that all pages within a zones range of pages do not | ||
| 1142 | * belong to a single zone. We assume that a border between node0 and node1 | ||
| 1143 | * can occur within a single pageblock, but not a node0 node1 node0 | ||
| 1144 | * interleaving within a single pageblock. It is therefore sufficient to check | ||
| 1145 | * the first and last page of a pageblock and avoid checking each individual | ||
| 1146 | * page in a pageblock. | ||
| 1147 | */ | ||
| 1148 | struct page *__pageblock_pfn_to_page(unsigned long start_pfn, | ||
| 1149 | unsigned long end_pfn, struct zone *zone) | ||
| 1150 | { | ||
| 1151 | struct page *start_page; | ||
| 1152 | struct page *end_page; | ||
| 1153 | |||
| 1154 | /* end_pfn is one past the range we are checking */ | ||
| 1155 | end_pfn--; | ||
| 1156 | |||
| 1157 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ||
| 1158 | return NULL; | ||
| 1159 | |||
| 1160 | start_page = pfn_to_page(start_pfn); | ||
| 1161 | |||
| 1162 | if (page_zone(start_page) != zone) | ||
| 1163 | return NULL; | ||
| 1164 | |||
| 1165 | end_page = pfn_to_page(end_pfn); | ||
| 1166 | |||
| 1167 | /* This gives a shorter code than deriving page_zone(end_page) */ | ||
| 1168 | if (page_zone_id(start_page) != page_zone_id(end_page)) | ||
| 1169 | return NULL; | ||
| 1170 | |||
| 1171 | return start_page; | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | void set_zone_contiguous(struct zone *zone) | ||
| 1175 | { | ||
| 1176 | unsigned long block_start_pfn = zone->zone_start_pfn; | ||
| 1177 | unsigned long block_end_pfn; | ||
| 1178 | |||
| 1179 | block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages); | ||
| 1180 | for (; block_start_pfn < zone_end_pfn(zone); | ||
| 1181 | block_start_pfn = block_end_pfn, | ||
| 1182 | block_end_pfn += pageblock_nr_pages) { | ||
| 1183 | |||
| 1184 | block_end_pfn = min(block_end_pfn, zone_end_pfn(zone)); | ||
| 1185 | |||
| 1186 | if (!__pageblock_pfn_to_page(block_start_pfn, | ||
| 1187 | block_end_pfn, zone)) | ||
| 1188 | return; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | /* We confirm that there is no hole */ | ||
| 1192 | zone->contiguous = true; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | void clear_zone_contiguous(struct zone *zone) | ||
| 1196 | { | ||
| 1197 | zone->contiguous = false; | ||
| 1198 | } | ||
| 1199 | |||
| 1107 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1200 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
| 1108 | static void __init deferred_free_range(struct page *page, | 1201 | static void __init deferred_free_range(struct page *page, |
| 1109 | unsigned long pfn, int nr_pages) | 1202 | unsigned long pfn, int nr_pages) |
| @@ -1254,9 +1347,13 @@ free_range: | |||
| 1254 | pgdat_init_report_one_done(); | 1347 | pgdat_init_report_one_done(); |
| 1255 | return 0; | 1348 | return 0; |
| 1256 | } | 1349 | } |
| 1350 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
| 1257 | 1351 | ||
| 1258 | void __init page_alloc_init_late(void) | 1352 | void __init page_alloc_init_late(void) |
| 1259 | { | 1353 | { |
| 1354 | struct zone *zone; | ||
| 1355 | |||
| 1356 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
| 1260 | int nid; | 1357 | int nid; |
| 1261 | 1358 | ||
| 1262 | /* There will be num_node_state(N_MEMORY) threads */ | 1359 | /* There will be num_node_state(N_MEMORY) threads */ |
| @@ -1270,8 +1367,11 @@ void __init page_alloc_init_late(void) | |||
| 1270 | 1367 | ||
| 1271 | /* Reinit limits that are based on free pages after the kernel is up */ | 1368 | /* Reinit limits that are based on free pages after the kernel is up */ |
| 1272 | files_maxfiles_init(); | 1369 | files_maxfiles_init(); |
| 1370 | #endif | ||
| 1371 | |||
| 1372 | for_each_populated_zone(zone) | ||
| 1373 | set_zone_contiguous(zone); | ||
| 1273 | } | 1374 | } |
| 1274 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
| 1275 | 1375 | ||
| 1276 | #ifdef CONFIG_CMA | 1376 | #ifdef CONFIG_CMA |
| 1277 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ | 1377 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ |
| @@ -1381,15 +1481,24 @@ static inline int check_new_page(struct page *page) | |||
| 1381 | return 0; | 1481 | return 0; |
| 1382 | } | 1482 | } |
| 1383 | 1483 | ||
| 1484 | static inline bool free_pages_prezeroed(bool poisoned) | ||
| 1485 | { | ||
| 1486 | return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && | ||
| 1487 | page_poisoning_enabled() && poisoned; | ||
| 1488 | } | ||
| 1489 | |||
| 1384 | static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, | 1490 | static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, |
| 1385 | int alloc_flags) | 1491 | int alloc_flags) |
| 1386 | { | 1492 | { |
| 1387 | int i; | 1493 | int i; |
| 1494 | bool poisoned = true; | ||
| 1388 | 1495 | ||
| 1389 | for (i = 0; i < (1 << order); i++) { | 1496 | for (i = 0; i < (1 << order); i++) { |
| 1390 | struct page *p = page + i; | 1497 | struct page *p = page + i; |
| 1391 | if (unlikely(check_new_page(p))) | 1498 | if (unlikely(check_new_page(p))) |
| 1392 | return 1; | 1499 | return 1; |
| 1500 | if (poisoned) | ||
| 1501 | poisoned &= page_is_poisoned(p); | ||
| 1393 | } | 1502 | } |
| 1394 | 1503 | ||
| 1395 | set_page_private(page, 0); | 1504 | set_page_private(page, 0); |
| @@ -1397,9 +1506,10 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, | |||
| 1397 | 1506 | ||
| 1398 | arch_alloc_page(page, order); | 1507 | arch_alloc_page(page, order); |
| 1399 | kernel_map_pages(page, 1 << order, 1); | 1508 | kernel_map_pages(page, 1 << order, 1); |
| 1509 | kernel_poison_pages(page, 1 << order, 1); | ||
| 1400 | kasan_alloc_pages(page, order); | 1510 | kasan_alloc_pages(page, order); |
| 1401 | 1511 | ||
| 1402 | if (gfp_flags & __GFP_ZERO) | 1512 | if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) |
| 1403 | for (i = 0; i < (1 << order); i++) | 1513 | for (i = 0; i < (1 << order); i++) |
| 1404 | clear_highpage(page + i); | 1514 | clear_highpage(page + i); |
| 1405 | 1515 | ||
| @@ -2690,9 +2800,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) | |||
| 2690 | va_end(args); | 2800 | va_end(args); |
| 2691 | } | 2801 | } |
| 2692 | 2802 | ||
| 2693 | pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n", | 2803 | pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n", |
| 2694 | current->comm, order, gfp_mask); | 2804 | current->comm, order, gfp_mask, &gfp_mask); |
| 2695 | |||
| 2696 | dump_stack(); | 2805 | dump_stack(); |
| 2697 | if (!should_suppress_show_mem()) | 2806 | if (!should_suppress_show_mem()) |
| 2698 | show_mem(filter); | 2807 | show_mem(filter); |
| @@ -4491,6 +4600,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
| 4491 | pg_data_t *pgdat = NODE_DATA(nid); | 4600 | pg_data_t *pgdat = NODE_DATA(nid); |
| 4492 | unsigned long pfn; | 4601 | unsigned long pfn; |
| 4493 | unsigned long nr_initialised = 0; | 4602 | unsigned long nr_initialised = 0; |
| 4603 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
| 4604 | struct memblock_region *r = NULL, *tmp; | ||
| 4605 | #endif | ||
| 4494 | 4606 | ||
| 4495 | if (highest_memmap_pfn < end_pfn - 1) | 4607 | if (highest_memmap_pfn < end_pfn - 1) |
| 4496 | highest_memmap_pfn = end_pfn - 1; | 4608 | highest_memmap_pfn = end_pfn - 1; |
| @@ -4504,20 +4616,51 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
| 4504 | 4616 | ||
| 4505 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 4617 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
| 4506 | /* | 4618 | /* |
| 4507 | * There can be holes in boot-time mem_map[]s | 4619 | * There can be holes in boot-time mem_map[]s handed to this |
| 4508 | * handed to this function. They do not | 4620 | * function. They do not exist on hotplugged memory. |
| 4509 | * exist on hotplugged memory. | ||
| 4510 | */ | 4621 | */ |
| 4511 | if (context == MEMMAP_EARLY) { | 4622 | if (context != MEMMAP_EARLY) |
| 4512 | if (!early_pfn_valid(pfn)) | 4623 | goto not_early; |
| 4624 | |||
| 4625 | if (!early_pfn_valid(pfn)) | ||
| 4626 | continue; | ||
| 4627 | if (!early_pfn_in_nid(pfn, nid)) | ||
| 4628 | continue; | ||
| 4629 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) | ||
| 4630 | break; | ||
| 4631 | |||
| 4632 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
| 4633 | /* | ||
| 4634 | * If not mirrored_kernelcore and ZONE_MOVABLE exists, range | ||
| 4635 | * from zone_movable_pfn[nid] to end of each node should be | ||
| 4636 | * ZONE_MOVABLE not ZONE_NORMAL. skip it. | ||
| 4637 | */ | ||
| 4638 | if (!mirrored_kernelcore && zone_movable_pfn[nid]) | ||
| 4639 | if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid]) | ||
| 4513 | continue; | 4640 | continue; |
| 4514 | if (!early_pfn_in_nid(pfn, nid)) | 4641 | |
| 4642 | /* | ||
| 4643 | * Check given memblock attribute by firmware which can affect | ||
| 4644 | * kernel memory layout. If zone==ZONE_MOVABLE but memory is | ||
| 4645 | * mirrored, it's an overlapped memmap init. skip it. | ||
| 4646 | */ | ||
| 4647 | if (mirrored_kernelcore && zone == ZONE_MOVABLE) { | ||
| 4648 | if (!r || pfn >= memblock_region_memory_end_pfn(r)) { | ||
| 4649 | for_each_memblock(memory, tmp) | ||
| 4650 | if (pfn < memblock_region_memory_end_pfn(tmp)) | ||
| 4651 | break; | ||
| 4652 | r = tmp; | ||
| 4653 | } | ||
| 4654 | if (pfn >= memblock_region_memory_base_pfn(r) && | ||
| 4655 | memblock_is_mirror(r)) { | ||
| 4656 | /* already initialized as NORMAL */ | ||
| 4657 | pfn = memblock_region_memory_end_pfn(r); | ||
| 4515 | continue; | 4658 | continue; |
| 4516 | if (!update_defer_init(pgdat, pfn, end_pfn, | 4659 | } |
| 4517 | &nr_initialised)) | ||
| 4518 | break; | ||
| 4519 | } | 4660 | } |
| 4661 | #endif | ||
| 4520 | 4662 | ||
| 4663 | not_early: | ||
| 4521 | /* | 4664 | /* |
| 4522 | * Mark the block movable so that blocks are reserved for | 4665 | * Mark the block movable so that blocks are reserved for |
| 4523 | * movable at startup. This will force kernel allocations | 4666 | * movable at startup. This will force kernel allocations |
| @@ -4934,11 +5077,6 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, | |||
| 4934 | *zone_end_pfn = min(node_end_pfn, | 5077 | *zone_end_pfn = min(node_end_pfn, |
| 4935 | arch_zone_highest_possible_pfn[movable_zone]); | 5078 | arch_zone_highest_possible_pfn[movable_zone]); |
| 4936 | 5079 | ||
| 4937 | /* Adjust for ZONE_MOVABLE starting within this range */ | ||
| 4938 | } else if (*zone_start_pfn < zone_movable_pfn[nid] && | ||
| 4939 | *zone_end_pfn > zone_movable_pfn[nid]) { | ||
| 4940 | *zone_end_pfn = zone_movable_pfn[nid]; | ||
| 4941 | |||
| 4942 | /* Check if this whole range is within ZONE_MOVABLE */ | 5080 | /* Check if this whole range is within ZONE_MOVABLE */ |
| 4943 | } else if (*zone_start_pfn >= zone_movable_pfn[nid]) | 5081 | } else if (*zone_start_pfn >= zone_movable_pfn[nid]) |
| 4944 | *zone_start_pfn = *zone_end_pfn; | 5082 | *zone_start_pfn = *zone_end_pfn; |
| @@ -4953,31 +5091,31 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
| 4953 | unsigned long zone_type, | 5091 | unsigned long zone_type, |
| 4954 | unsigned long node_start_pfn, | 5092 | unsigned long node_start_pfn, |
| 4955 | unsigned long node_end_pfn, | 5093 | unsigned long node_end_pfn, |
| 5094 | unsigned long *zone_start_pfn, | ||
| 5095 | unsigned long *zone_end_pfn, | ||
| 4956 | unsigned long *ignored) | 5096 | unsigned long *ignored) |
| 4957 | { | 5097 | { |
| 4958 | unsigned long zone_start_pfn, zone_end_pfn; | ||
| 4959 | |||
| 4960 | /* When hotadd a new node from cpu_up(), the node should be empty */ | 5098 | /* When hotadd a new node from cpu_up(), the node should be empty */ |
| 4961 | if (!node_start_pfn && !node_end_pfn) | 5099 | if (!node_start_pfn && !node_end_pfn) |
| 4962 | return 0; | 5100 | return 0; |
| 4963 | 5101 | ||
| 4964 | /* Get the start and end of the zone */ | 5102 | /* Get the start and end of the zone */ |
| 4965 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; | 5103 | *zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; |
| 4966 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; | 5104 | *zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; |
| 4967 | adjust_zone_range_for_zone_movable(nid, zone_type, | 5105 | adjust_zone_range_for_zone_movable(nid, zone_type, |
| 4968 | node_start_pfn, node_end_pfn, | 5106 | node_start_pfn, node_end_pfn, |
| 4969 | &zone_start_pfn, &zone_end_pfn); | 5107 | zone_start_pfn, zone_end_pfn); |
| 4970 | 5108 | ||
| 4971 | /* Check that this node has pages within the zone's required range */ | 5109 | /* Check that this node has pages within the zone's required range */ |
| 4972 | if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) | 5110 | if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn) |
| 4973 | return 0; | 5111 | return 0; |
| 4974 | 5112 | ||
| 4975 | /* Move the zone boundaries inside the node if necessary */ | 5113 | /* Move the zone boundaries inside the node if necessary */ |
| 4976 | zone_end_pfn = min(zone_end_pfn, node_end_pfn); | 5114 | *zone_end_pfn = min(*zone_end_pfn, node_end_pfn); |
| 4977 | zone_start_pfn = max(zone_start_pfn, node_start_pfn); | 5115 | *zone_start_pfn = max(*zone_start_pfn, node_start_pfn); |
| 4978 | 5116 | ||
| 4979 | /* Return the spanned pages */ | 5117 | /* Return the spanned pages */ |
| 4980 | return zone_end_pfn - zone_start_pfn; | 5118 | return *zone_end_pfn - *zone_start_pfn; |
| 4981 | } | 5119 | } |
| 4982 | 5120 | ||
| 4983 | /* | 5121 | /* |
| @@ -5023,6 +5161,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
| 5023 | unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; | 5161 | unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; |
| 5024 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; | 5162 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; |
| 5025 | unsigned long zone_start_pfn, zone_end_pfn; | 5163 | unsigned long zone_start_pfn, zone_end_pfn; |
| 5164 | unsigned long nr_absent; | ||
| 5026 | 5165 | ||
| 5027 | /* When hotadd a new node from cpu_up(), the node should be empty */ | 5166 | /* When hotadd a new node from cpu_up(), the node should be empty */ |
| 5028 | if (!node_start_pfn && !node_end_pfn) | 5167 | if (!node_start_pfn && !node_end_pfn) |
| @@ -5034,7 +5173,39 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
| 5034 | adjust_zone_range_for_zone_movable(nid, zone_type, | 5173 | adjust_zone_range_for_zone_movable(nid, zone_type, |
| 5035 | node_start_pfn, node_end_pfn, | 5174 | node_start_pfn, node_end_pfn, |
| 5036 | &zone_start_pfn, &zone_end_pfn); | 5175 | &zone_start_pfn, &zone_end_pfn); |
| 5037 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | 5176 | nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); |
| 5177 | |||
| 5178 | /* | ||
| 5179 | * ZONE_MOVABLE handling. | ||
| 5180 | * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages | ||
| 5181 | * and vice versa. | ||
| 5182 | */ | ||
| 5183 | if (zone_movable_pfn[nid]) { | ||
| 5184 | if (mirrored_kernelcore) { | ||
| 5185 | unsigned long start_pfn, end_pfn; | ||
| 5186 | struct memblock_region *r; | ||
| 5187 | |||
| 5188 | for_each_memblock(memory, r) { | ||
| 5189 | start_pfn = clamp(memblock_region_memory_base_pfn(r), | ||
| 5190 | zone_start_pfn, zone_end_pfn); | ||
| 5191 | end_pfn = clamp(memblock_region_memory_end_pfn(r), | ||
| 5192 | zone_start_pfn, zone_end_pfn); | ||
| 5193 | |||
| 5194 | if (zone_type == ZONE_MOVABLE && | ||
| 5195 | memblock_is_mirror(r)) | ||
| 5196 | nr_absent += end_pfn - start_pfn; | ||
| 5197 | |||
| 5198 | if (zone_type == ZONE_NORMAL && | ||
| 5199 | !memblock_is_mirror(r)) | ||
| 5200 | nr_absent += end_pfn - start_pfn; | ||
| 5201 | } | ||
| 5202 | } else { | ||
| 5203 | if (zone_type == ZONE_NORMAL) | ||
| 5204 | nr_absent += node_end_pfn - zone_movable_pfn[nid]; | ||
| 5205 | } | ||
| 5206 | } | ||
| 5207 | |||
| 5208 | return nr_absent; | ||
| 5038 | } | 5209 | } |
| 5039 | 5210 | ||
| 5040 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 5211 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
| @@ -5042,8 +5213,18 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
| 5042 | unsigned long zone_type, | 5213 | unsigned long zone_type, |
| 5043 | unsigned long node_start_pfn, | 5214 | unsigned long node_start_pfn, |
| 5044 | unsigned long node_end_pfn, | 5215 | unsigned long node_end_pfn, |
| 5216 | unsigned long *zone_start_pfn, | ||
| 5217 | unsigned long *zone_end_pfn, | ||
| 5045 | unsigned long *zones_size) | 5218 | unsigned long *zones_size) |
| 5046 | { | 5219 | { |
| 5220 | unsigned int zone; | ||
| 5221 | |||
| 5222 | *zone_start_pfn = node_start_pfn; | ||
| 5223 | for (zone = 0; zone < zone_type; zone++) | ||
| 5224 | *zone_start_pfn += zones_size[zone]; | ||
| 5225 | |||
| 5226 | *zone_end_pfn = *zone_start_pfn + zones_size[zone_type]; | ||
| 5227 | |||
| 5047 | return zones_size[zone_type]; | 5228 | return zones_size[zone_type]; |
| 5048 | } | 5229 | } |
| 5049 | 5230 | ||
| @@ -5072,15 +5253,22 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | |||
| 5072 | 5253 | ||
| 5073 | for (i = 0; i < MAX_NR_ZONES; i++) { | 5254 | for (i = 0; i < MAX_NR_ZONES; i++) { |
| 5074 | struct zone *zone = pgdat->node_zones + i; | 5255 | struct zone *zone = pgdat->node_zones + i; |
| 5256 | unsigned long zone_start_pfn, zone_end_pfn; | ||
| 5075 | unsigned long size, real_size; | 5257 | unsigned long size, real_size; |
| 5076 | 5258 | ||
| 5077 | size = zone_spanned_pages_in_node(pgdat->node_id, i, | 5259 | size = zone_spanned_pages_in_node(pgdat->node_id, i, |
| 5078 | node_start_pfn, | 5260 | node_start_pfn, |
| 5079 | node_end_pfn, | 5261 | node_end_pfn, |
| 5262 | &zone_start_pfn, | ||
| 5263 | &zone_end_pfn, | ||
| 5080 | zones_size); | 5264 | zones_size); |
| 5081 | real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, | 5265 | real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, |
| 5082 | node_start_pfn, node_end_pfn, | 5266 | node_start_pfn, node_end_pfn, |
| 5083 | zholes_size); | 5267 | zholes_size); |
| 5268 | if (size) | ||
| 5269 | zone->zone_start_pfn = zone_start_pfn; | ||
| 5270 | else | ||
| 5271 | zone->zone_start_pfn = 0; | ||
| 5084 | zone->spanned_pages = size; | 5272 | zone->spanned_pages = size; |
| 5085 | zone->present_pages = real_size; | 5273 | zone->present_pages = real_size; |
| 5086 | 5274 | ||
| @@ -5201,7 +5389,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
| 5201 | { | 5389 | { |
| 5202 | enum zone_type j; | 5390 | enum zone_type j; |
| 5203 | int nid = pgdat->node_id; | 5391 | int nid = pgdat->node_id; |
| 5204 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | ||
| 5205 | int ret; | 5392 | int ret; |
| 5206 | 5393 | ||
| 5207 | pgdat_resize_init(pgdat); | 5394 | pgdat_resize_init(pgdat); |
| @@ -5222,6 +5409,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
| 5222 | for (j = 0; j < MAX_NR_ZONES; j++) { | 5409 | for (j = 0; j < MAX_NR_ZONES; j++) { |
| 5223 | struct zone *zone = pgdat->node_zones + j; | 5410 | struct zone *zone = pgdat->node_zones + j; |
| 5224 | unsigned long size, realsize, freesize, memmap_pages; | 5411 | unsigned long size, realsize, freesize, memmap_pages; |
| 5412 | unsigned long zone_start_pfn = zone->zone_start_pfn; | ||
| 5225 | 5413 | ||
| 5226 | size = zone->spanned_pages; | 5414 | size = zone->spanned_pages; |
| 5227 | realsize = freesize = zone->present_pages; | 5415 | realsize = freesize = zone->present_pages; |
| @@ -5290,7 +5478,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
| 5290 | ret = init_currently_empty_zone(zone, zone_start_pfn, size); | 5478 | ret = init_currently_empty_zone(zone, zone_start_pfn, size); |
| 5291 | BUG_ON(ret); | 5479 | BUG_ON(ret); |
| 5292 | memmap_init(size, nid, j, zone_start_pfn); | 5480 | memmap_init(size, nid, j, zone_start_pfn); |
| 5293 | zone_start_pfn += size; | ||
| 5294 | } | 5481 | } |
| 5295 | } | 5482 | } |
| 5296 | 5483 | ||
| @@ -5358,6 +5545,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
| 5358 | pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, | 5545 | pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, |
| 5359 | (u64)start_pfn << PAGE_SHIFT, | 5546 | (u64)start_pfn << PAGE_SHIFT, |
| 5360 | end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); | 5547 | end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); |
| 5548 | #else | ||
| 5549 | start_pfn = node_start_pfn; | ||
| 5361 | #endif | 5550 | #endif |
| 5362 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, | 5551 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, |
| 5363 | zones_size, zholes_size); | 5552 | zones_size, zholes_size); |
| @@ -5529,6 +5718,36 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
| 5529 | } | 5718 | } |
| 5530 | 5719 | ||
| 5531 | /* | 5720 | /* |
| 5721 | * If kernelcore=mirror is specified, ignore movablecore option | ||
| 5722 | */ | ||
| 5723 | if (mirrored_kernelcore) { | ||
| 5724 | bool mem_below_4gb_not_mirrored = false; | ||
| 5725 | |||
| 5726 | for_each_memblock(memory, r) { | ||
| 5727 | if (memblock_is_mirror(r)) | ||
| 5728 | continue; | ||
| 5729 | |||
| 5730 | nid = r->nid; | ||
| 5731 | |||
| 5732 | usable_startpfn = memblock_region_memory_base_pfn(r); | ||
| 5733 | |||
| 5734 | if (usable_startpfn < 0x100000) { | ||
| 5735 | mem_below_4gb_not_mirrored = true; | ||
| 5736 | continue; | ||
| 5737 | } | ||
| 5738 | |||
| 5739 | zone_movable_pfn[nid] = zone_movable_pfn[nid] ? | ||
| 5740 | min(usable_startpfn, zone_movable_pfn[nid]) : | ||
| 5741 | usable_startpfn; | ||
| 5742 | } | ||
| 5743 | |||
| 5744 | if (mem_below_4gb_not_mirrored) | ||
| 5745 | pr_warn("This configuration results in unmirrored kernel memory."); | ||
| 5746 | |||
| 5747 | goto out2; | ||
| 5748 | } | ||
| 5749 | |||
| 5750 | /* | ||
| 5532 | * If movablecore=nn[KMG] was specified, calculate what size of | 5751 | * If movablecore=nn[KMG] was specified, calculate what size of |
| 5533 | * kernelcore that corresponds so that memory usable for | 5752 | * kernelcore that corresponds so that memory usable for |
| 5534 | * any allocation type is evenly spread. If both kernelcore | 5753 | * any allocation type is evenly spread. If both kernelcore |
| @@ -5788,6 +6007,12 @@ static int __init cmdline_parse_core(char *p, unsigned long *core) | |||
| 5788 | */ | 6007 | */ |
| 5789 | static int __init cmdline_parse_kernelcore(char *p) | 6008 | static int __init cmdline_parse_kernelcore(char *p) |
| 5790 | { | 6009 | { |
| 6010 | /* parse kernelcore=mirror */ | ||
| 6011 | if (parse_option_str(p, "mirror")) { | ||
| 6012 | mirrored_kernelcore = true; | ||
| 6013 | return 0; | ||
| 6014 | } | ||
| 6015 | |||
| 5791 | return cmdline_parse_core(p, &required_kernelcore); | 6016 | return cmdline_parse_core(p, &required_kernelcore); |
| 5792 | } | 6017 | } |
| 5793 | 6018 | ||
diff --git a/mm/page_ext.c b/mm/page_ext.c index 292ca7b8debd..2d864e64f7fe 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c | |||
| @@ -106,12 +106,15 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
| 106 | struct page_ext *base; | 106 | struct page_ext *base; |
| 107 | 107 | ||
| 108 | base = NODE_DATA(page_to_nid(page))->node_page_ext; | 108 | base = NODE_DATA(page_to_nid(page))->node_page_ext; |
| 109 | #ifdef CONFIG_DEBUG_VM | 109 | #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) |
| 110 | /* | 110 | /* |
| 111 | * The sanity checks the page allocator does upon freeing a | 111 | * The sanity checks the page allocator does upon freeing a |
| 112 | * page can reach here before the page_ext arrays are | 112 | * page can reach here before the page_ext arrays are |
| 113 | * allocated when feeding a range of pages to the allocator | 113 | * allocated when feeding a range of pages to the allocator |
| 114 | * for the first time during bootup or memory hotplug. | 114 | * for the first time during bootup or memory hotplug. |
| 115 | * | ||
| 116 | * This check is also necessary for ensuring page poisoning | ||
| 117 | * works as expected when enabled | ||
| 115 | */ | 118 | */ |
| 116 | if (unlikely(!base)) | 119 | if (unlikely(!base)) |
| 117 | return NULL; | 120 | return NULL; |
| @@ -180,12 +183,15 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
| 180 | { | 183 | { |
| 181 | unsigned long pfn = page_to_pfn(page); | 184 | unsigned long pfn = page_to_pfn(page); |
| 182 | struct mem_section *section = __pfn_to_section(pfn); | 185 | struct mem_section *section = __pfn_to_section(pfn); |
| 183 | #ifdef CONFIG_DEBUG_VM | 186 | #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) |
| 184 | /* | 187 | /* |
| 185 | * The sanity checks the page allocator does upon freeing a | 188 | * The sanity checks the page allocator does upon freeing a |
| 186 | * page can reach here before the page_ext arrays are | 189 | * page can reach here before the page_ext arrays are |
| 187 | * allocated when feeding a range of pages to the allocator | 190 | * allocated when feeding a range of pages to the allocator |
| 188 | * for the first time during bootup or memory hotplug. | 191 | * for the first time during bootup or memory hotplug. |
| 192 | * | ||
| 193 | * This check is also necessary for ensuring page poisoning | ||
| 194 | * works as expected when enabled | ||
| 189 | */ | 195 | */ |
| 190 | if (!section->page_ext) | 196 | if (!section->page_ext) |
| 191 | return NULL; | 197 | return NULL; |
diff --git a/mm/page_owner.c b/mm/page_owner.c index 983c3a10fa07..44ad1f00c4e1 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c | |||
| @@ -5,10 +5,12 @@ | |||
| 5 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
| 6 | #include <linux/stacktrace.h> | 6 | #include <linux/stacktrace.h> |
| 7 | #include <linux/page_owner.h> | 7 | #include <linux/page_owner.h> |
| 8 | #include <linux/jump_label.h> | ||
| 9 | #include <linux/migrate.h> | ||
| 8 | #include "internal.h" | 10 | #include "internal.h" |
| 9 | 11 | ||
| 10 | static bool page_owner_disabled = true; | 12 | static bool page_owner_disabled = true; |
| 11 | bool page_owner_inited __read_mostly; | 13 | DEFINE_STATIC_KEY_FALSE(page_owner_inited); |
| 12 | 14 | ||
| 13 | static void init_early_allocated_pages(void); | 15 | static void init_early_allocated_pages(void); |
| 14 | 16 | ||
| @@ -37,7 +39,7 @@ static void init_page_owner(void) | |||
| 37 | if (page_owner_disabled) | 39 | if (page_owner_disabled) |
| 38 | return; | 40 | return; |
| 39 | 41 | ||
| 40 | page_owner_inited = true; | 42 | static_branch_enable(&page_owner_inited); |
| 41 | init_early_allocated_pages(); | 43 | init_early_allocated_pages(); |
| 42 | } | 44 | } |
| 43 | 45 | ||
| @@ -72,10 +74,18 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) | |||
| 72 | page_ext->order = order; | 74 | page_ext->order = order; |
| 73 | page_ext->gfp_mask = gfp_mask; | 75 | page_ext->gfp_mask = gfp_mask; |
| 74 | page_ext->nr_entries = trace.nr_entries; | 76 | page_ext->nr_entries = trace.nr_entries; |
| 77 | page_ext->last_migrate_reason = -1; | ||
| 75 | 78 | ||
| 76 | __set_bit(PAGE_EXT_OWNER, &page_ext->flags); | 79 | __set_bit(PAGE_EXT_OWNER, &page_ext->flags); |
| 77 | } | 80 | } |
| 78 | 81 | ||
| 82 | void __set_page_owner_migrate_reason(struct page *page, int reason) | ||
| 83 | { | ||
| 84 | struct page_ext *page_ext = lookup_page_ext(page); | ||
| 85 | |||
| 86 | page_ext->last_migrate_reason = reason; | ||
| 87 | } | ||
| 88 | |||
| 79 | gfp_t __get_page_owner_gfp(struct page *page) | 89 | gfp_t __get_page_owner_gfp(struct page *page) |
| 80 | { | 90 | { |
| 81 | struct page_ext *page_ext = lookup_page_ext(page); | 91 | struct page_ext *page_ext = lookup_page_ext(page); |
| @@ -83,6 +93,31 @@ gfp_t __get_page_owner_gfp(struct page *page) | |||
| 83 | return page_ext->gfp_mask; | 93 | return page_ext->gfp_mask; |
| 84 | } | 94 | } |
| 85 | 95 | ||
| 96 | void __copy_page_owner(struct page *oldpage, struct page *newpage) | ||
| 97 | { | ||
| 98 | struct page_ext *old_ext = lookup_page_ext(oldpage); | ||
| 99 | struct page_ext *new_ext = lookup_page_ext(newpage); | ||
| 100 | int i; | ||
| 101 | |||
| 102 | new_ext->order = old_ext->order; | ||
| 103 | new_ext->gfp_mask = old_ext->gfp_mask; | ||
| 104 | new_ext->nr_entries = old_ext->nr_entries; | ||
| 105 | |||
| 106 | for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++) | ||
| 107 | new_ext->trace_entries[i] = old_ext->trace_entries[i]; | ||
| 108 | |||
| 109 | /* | ||
| 110 | * We don't clear the bit on the oldpage as it's going to be freed | ||
| 111 | * after migration. Until then, the info can be useful in case of | ||
| 112 | * a bug, and the overal stats will be off a bit only temporarily. | ||
| 113 | * Also, migrate_misplaced_transhuge_page() can still fail the | ||
| 114 | * migration and then we want the oldpage to retain the info. But | ||
| 115 | * in that case we also don't need to explicitly clear the info from | ||
| 116 | * the new page, which will be freed. | ||
| 117 | */ | ||
| 118 | __set_bit(PAGE_EXT_OWNER, &new_ext->flags); | ||
| 119 | } | ||
| 120 | |||
| 86 | static ssize_t | 121 | static ssize_t |
| 87 | print_page_owner(char __user *buf, size_t count, unsigned long pfn, | 122 | print_page_owner(char __user *buf, size_t count, unsigned long pfn, |
| 88 | struct page *page, struct page_ext *page_ext) | 123 | struct page *page, struct page_ext *page_ext) |
| @@ -100,8 +135,9 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
| 100 | return -ENOMEM; | 135 | return -ENOMEM; |
| 101 | 136 | ||
| 102 | ret = snprintf(kbuf, count, | 137 | ret = snprintf(kbuf, count, |
| 103 | "Page allocated via order %u, mask 0x%x\n", | 138 | "Page allocated via order %u, mask %#x(%pGg)\n", |
| 104 | page_ext->order, page_ext->gfp_mask); | 139 | page_ext->order, page_ext->gfp_mask, |
| 140 | &page_ext->gfp_mask); | ||
| 105 | 141 | ||
| 106 | if (ret >= count) | 142 | if (ret >= count) |
| 107 | goto err; | 143 | goto err; |
| @@ -110,23 +146,12 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
| 110 | pageblock_mt = get_pfnblock_migratetype(page, pfn); | 146 | pageblock_mt = get_pfnblock_migratetype(page, pfn); |
| 111 | page_mt = gfpflags_to_migratetype(page_ext->gfp_mask); | 147 | page_mt = gfpflags_to_migratetype(page_ext->gfp_mask); |
| 112 | ret += snprintf(kbuf + ret, count - ret, | 148 | ret += snprintf(kbuf + ret, count - ret, |
| 113 | "PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n", | 149 | "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n", |
| 114 | pfn, | 150 | pfn, |
| 151 | migratetype_names[page_mt], | ||
| 115 | pfn >> pageblock_order, | 152 | pfn >> pageblock_order, |
| 116 | pageblock_mt, | 153 | migratetype_names[pageblock_mt], |
| 117 | pageblock_mt != page_mt ? "Fallback" : " ", | 154 | page->flags, &page->flags); |
| 118 | PageLocked(page) ? "K" : " ", | ||
| 119 | PageError(page) ? "E" : " ", | ||
| 120 | PageReferenced(page) ? "R" : " ", | ||
| 121 | PageUptodate(page) ? "U" : " ", | ||
| 122 | PageDirty(page) ? "D" : " ", | ||
| 123 | PageLRU(page) ? "L" : " ", | ||
| 124 | PageActive(page) ? "A" : " ", | ||
| 125 | PageSlab(page) ? "S" : " ", | ||
| 126 | PageWriteback(page) ? "W" : " ", | ||
| 127 | PageCompound(page) ? "C" : " ", | ||
| 128 | PageSwapCache(page) ? "B" : " ", | ||
| 129 | PageMappedToDisk(page) ? "M" : " "); | ||
| 130 | 155 | ||
| 131 | if (ret >= count) | 156 | if (ret >= count) |
| 132 | goto err; | 157 | goto err; |
| @@ -135,6 +160,14 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
| 135 | if (ret >= count) | 160 | if (ret >= count) |
| 136 | goto err; | 161 | goto err; |
| 137 | 162 | ||
| 163 | if (page_ext->last_migrate_reason != -1) { | ||
| 164 | ret += snprintf(kbuf + ret, count - ret, | ||
| 165 | "Page has been migrated, last migrate reason: %s\n", | ||
| 166 | migrate_reason_names[page_ext->last_migrate_reason]); | ||
| 167 | if (ret >= count) | ||
| 168 | goto err; | ||
| 169 | } | ||
| 170 | |||
| 138 | ret += snprintf(kbuf + ret, count - ret, "\n"); | 171 | ret += snprintf(kbuf + ret, count - ret, "\n"); |
| 139 | if (ret >= count) | 172 | if (ret >= count) |
| 140 | goto err; | 173 | goto err; |
| @@ -150,6 +183,31 @@ err: | |||
| 150 | return -ENOMEM; | 183 | return -ENOMEM; |
| 151 | } | 184 | } |
| 152 | 185 | ||
| 186 | void __dump_page_owner(struct page *page) | ||
| 187 | { | ||
| 188 | struct page_ext *page_ext = lookup_page_ext(page); | ||
| 189 | struct stack_trace trace = { | ||
| 190 | .nr_entries = page_ext->nr_entries, | ||
| 191 | .entries = &page_ext->trace_entries[0], | ||
| 192 | }; | ||
| 193 | gfp_t gfp_mask = page_ext->gfp_mask; | ||
| 194 | int mt = gfpflags_to_migratetype(gfp_mask); | ||
| 195 | |||
| 196 | if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { | ||
| 197 | pr_alert("page_owner info is not active (free page?)\n"); | ||
| 198 | return; | ||
| 199 | } | ||
| 200 | |||
| 201 | pr_alert("page allocated via order %u, migratetype %s, " | ||
| 202 | "gfp_mask %#x(%pGg)\n", page_ext->order, | ||
| 203 | migratetype_names[mt], gfp_mask, &gfp_mask); | ||
| 204 | print_stack_trace(&trace, 0); | ||
| 205 | |||
| 206 | if (page_ext->last_migrate_reason != -1) | ||
| 207 | pr_alert("page has been migrated, last migrate reason: %s\n", | ||
| 208 | migrate_reason_names[page_ext->last_migrate_reason]); | ||
| 209 | } | ||
| 210 | |||
| 153 | static ssize_t | 211 | static ssize_t |
| 154 | read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 212 | read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
| 155 | { | 213 | { |
| @@ -157,7 +215,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
| 157 | struct page *page; | 215 | struct page *page; |
| 158 | struct page_ext *page_ext; | 216 | struct page_ext *page_ext; |
| 159 | 217 | ||
| 160 | if (!page_owner_inited) | 218 | if (!static_branch_unlikely(&page_owner_inited)) |
| 161 | return -EINVAL; | 219 | return -EINVAL; |
| 162 | 220 | ||
| 163 | page = NULL; | 221 | page = NULL; |
| @@ -305,7 +363,7 @@ static int __init pageowner_init(void) | |||
| 305 | { | 363 | { |
| 306 | struct dentry *dentry; | 364 | struct dentry *dentry; |
| 307 | 365 | ||
| 308 | if (!page_owner_inited) { | 366 | if (!static_branch_unlikely(&page_owner_inited)) { |
| 309 | pr_info("page_owner is disabled\n"); | 367 | pr_info("page_owner is disabled\n"); |
| 310 | return 0; | 368 | return 0; |
| 311 | } | 369 | } |
diff --git a/mm/debug-pagealloc.c b/mm/page_poison.c index 5bf5906ce13b..479e7ea2bea6 100644 --- a/mm/debug-pagealloc.c +++ b/mm/page_poison.c | |||
| @@ -6,22 +6,48 @@ | |||
| 6 | #include <linux/poison.h> | 6 | #include <linux/poison.h> |
| 7 | #include <linux/ratelimit.h> | 7 | #include <linux/ratelimit.h> |
| 8 | 8 | ||
| 9 | static bool page_poisoning_enabled __read_mostly; | 9 | static bool __page_poisoning_enabled __read_mostly; |
| 10 | static bool want_page_poisoning __read_mostly; | ||
| 10 | 11 | ||
| 11 | static bool need_page_poisoning(void) | 12 | static int early_page_poison_param(char *buf) |
| 12 | { | 13 | { |
| 13 | if (!debug_pagealloc_enabled()) | 14 | if (!buf) |
| 14 | return false; | 15 | return -EINVAL; |
| 16 | |||
| 17 | if (strcmp(buf, "on") == 0) | ||
| 18 | want_page_poisoning = true; | ||
| 19 | else if (strcmp(buf, "off") == 0) | ||
| 20 | want_page_poisoning = false; | ||
| 15 | 21 | ||
| 16 | return true; | 22 | return 0; |
| 23 | } | ||
| 24 | early_param("page_poison", early_page_poison_param); | ||
| 25 | |||
| 26 | bool page_poisoning_enabled(void) | ||
| 27 | { | ||
| 28 | return __page_poisoning_enabled; | ||
| 29 | } | ||
| 30 | |||
| 31 | static bool need_page_poisoning(void) | ||
| 32 | { | ||
| 33 | return want_page_poisoning; | ||
| 17 | } | 34 | } |
| 18 | 35 | ||
| 19 | static void init_page_poisoning(void) | 36 | static void init_page_poisoning(void) |
| 20 | { | 37 | { |
| 21 | if (!debug_pagealloc_enabled()) | 38 | /* |
| 22 | return; | 39 | * page poisoning is debug page alloc for some arches. If either |
| 40 | * of those options are enabled, enable poisoning | ||
| 41 | */ | ||
| 42 | if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC)) { | ||
| 43 | if (!want_page_poisoning && !debug_pagealloc_enabled()) | ||
| 44 | return; | ||
| 45 | } else { | ||
| 46 | if (!want_page_poisoning) | ||
| 47 | return; | ||
| 48 | } | ||
| 23 | 49 | ||
| 24 | page_poisoning_enabled = true; | 50 | __page_poisoning_enabled = true; |
| 25 | } | 51 | } |
| 26 | 52 | ||
| 27 | struct page_ext_operations page_poisoning_ops = { | 53 | struct page_ext_operations page_poisoning_ops = { |
| @@ -45,11 +71,14 @@ static inline void clear_page_poison(struct page *page) | |||
| 45 | __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); | 71 | __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); |
| 46 | } | 72 | } |
| 47 | 73 | ||
| 48 | static inline bool page_poison(struct page *page) | 74 | bool page_is_poisoned(struct page *page) |
| 49 | { | 75 | { |
| 50 | struct page_ext *page_ext; | 76 | struct page_ext *page_ext; |
| 51 | 77 | ||
| 52 | page_ext = lookup_page_ext(page); | 78 | page_ext = lookup_page_ext(page); |
| 79 | if (!page_ext) | ||
| 80 | return false; | ||
| 81 | |||
| 53 | return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); | 82 | return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); |
| 54 | } | 83 | } |
| 55 | 84 | ||
| @@ -83,6 +112,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) | |||
| 83 | unsigned char *start; | 112 | unsigned char *start; |
| 84 | unsigned char *end; | 113 | unsigned char *end; |
| 85 | 114 | ||
| 115 | if (IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY)) | ||
| 116 | return; | ||
| 117 | |||
| 86 | start = memchr_inv(mem, PAGE_POISON, bytes); | 118 | start = memchr_inv(mem, PAGE_POISON, bytes); |
| 87 | if (!start) | 119 | if (!start) |
| 88 | return; | 120 | return; |
| @@ -95,9 +127,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) | |||
| 95 | if (!__ratelimit(&ratelimit)) | 127 | if (!__ratelimit(&ratelimit)) |
| 96 | return; | 128 | return; |
| 97 | else if (start == end && single_bit_flip(*start, PAGE_POISON)) | 129 | else if (start == end && single_bit_flip(*start, PAGE_POISON)) |
| 98 | printk(KERN_ERR "pagealloc: single bit error\n"); | 130 | pr_err("pagealloc: single bit error\n"); |
| 99 | else | 131 | else |
| 100 | printk(KERN_ERR "pagealloc: memory corruption\n"); | 132 | pr_err("pagealloc: memory corruption\n"); |
| 101 | 133 | ||
| 102 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, | 134 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, |
| 103 | end - start + 1, 1); | 135 | end - start + 1, 1); |
| @@ -108,7 +140,7 @@ static void unpoison_page(struct page *page) | |||
| 108 | { | 140 | { |
| 109 | void *addr; | 141 | void *addr; |
| 110 | 142 | ||
| 111 | if (!page_poison(page)) | 143 | if (!page_is_poisoned(page)) |
| 112 | return; | 144 | return; |
| 113 | 145 | ||
| 114 | addr = kmap_atomic(page); | 146 | addr = kmap_atomic(page); |
| @@ -125,9 +157,9 @@ static void unpoison_pages(struct page *page, int n) | |||
| 125 | unpoison_page(page + i); | 157 | unpoison_page(page + i); |
| 126 | } | 158 | } |
| 127 | 159 | ||
| 128 | void __kernel_map_pages(struct page *page, int numpages, int enable) | 160 | void kernel_poison_pages(struct page *page, int numpages, int enable) |
| 129 | { | 161 | { |
| 130 | if (!page_poisoning_enabled) | 162 | if (!page_poisoning_enabled()) |
| 131 | return; | 163 | return; |
| 132 | 164 | ||
| 133 | if (enable) | 165 | if (enable) |
| @@ -135,3 +167,10 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 135 | else | 167 | else |
| 136 | poison_pages(page, numpages); | 168 | poison_pages(page, numpages); |
| 137 | } | 169 | } |
| 170 | |||
| 171 | #ifndef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||
| 172 | void __kernel_map_pages(struct page *page, int numpages, int enable) | ||
| 173 | { | ||
| 174 | /* This function does nothing, all work is done via poison pages */ | ||
| 175 | } | ||
| 176 | #endif | ||
| @@ -1287,21 +1287,17 @@ void page_add_new_anon_rmap(struct page *page, | |||
| 1287 | */ | 1287 | */ |
| 1288 | void page_add_file_rmap(struct page *page) | 1288 | void page_add_file_rmap(struct page *page) |
| 1289 | { | 1289 | { |
| 1290 | struct mem_cgroup *memcg; | 1290 | lock_page_memcg(page); |
| 1291 | |||
| 1292 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 1293 | if (atomic_inc_and_test(&page->_mapcount)) { | 1291 | if (atomic_inc_and_test(&page->_mapcount)) { |
| 1294 | __inc_zone_page_state(page, NR_FILE_MAPPED); | 1292 | __inc_zone_page_state(page, NR_FILE_MAPPED); |
| 1295 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | 1293 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
| 1296 | } | 1294 | } |
| 1297 | mem_cgroup_end_page_stat(memcg); | 1295 | unlock_page_memcg(page); |
| 1298 | } | 1296 | } |
| 1299 | 1297 | ||
| 1300 | static void page_remove_file_rmap(struct page *page) | 1298 | static void page_remove_file_rmap(struct page *page) |
| 1301 | { | 1299 | { |
| 1302 | struct mem_cgroup *memcg; | 1300 | lock_page_memcg(page); |
| 1303 | |||
| 1304 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 1305 | 1301 | ||
| 1306 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ | 1302 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ |
| 1307 | if (unlikely(PageHuge(page))) { | 1303 | if (unlikely(PageHuge(page))) { |
| @@ -1320,12 +1316,12 @@ static void page_remove_file_rmap(struct page *page) | |||
| 1320 | * pte lock(a spinlock) is held, which implies preemption disabled. | 1316 | * pte lock(a spinlock) is held, which implies preemption disabled. |
| 1321 | */ | 1317 | */ |
| 1322 | __dec_zone_page_state(page, NR_FILE_MAPPED); | 1318 | __dec_zone_page_state(page, NR_FILE_MAPPED); |
| 1323 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | 1319 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
| 1324 | 1320 | ||
| 1325 | if (unlikely(PageMlocked(page))) | 1321 | if (unlikely(PageMlocked(page))) |
| 1326 | clear_page_mlock(page); | 1322 | clear_page_mlock(page); |
| 1327 | out: | 1323 | out: |
| 1328 | mem_cgroup_end_page_stat(memcg); | 1324 | unlock_page_memcg(page); |
| 1329 | } | 1325 | } |
| 1330 | 1326 | ||
| 1331 | static void page_remove_anon_compound_rmap(struct page *page) | 1327 | static void page_remove_anon_compound_rmap(struct page *page) |
diff --git a/mm/shmem.c b/mm/shmem.c index 440e2a7e6c1c..1acfdbc4bd9e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -1116,7 +1116,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
| 1116 | */ | 1116 | */ |
| 1117 | oldpage = newpage; | 1117 | oldpage = newpage; |
| 1118 | } else { | 1118 | } else { |
| 1119 | mem_cgroup_replace_page(oldpage, newpage); | 1119 | mem_cgroup_migrate(oldpage, newpage); |
| 1120 | lru_cache_add_anon(newpage); | 1120 | lru_cache_add_anon(newpage); |
| 1121 | *pagep = newpage; | 1121 | *pagep = newpage; |
| 1122 | } | 1122 | } |
| @@ -169,12 +169,6 @@ typedef unsigned short freelist_idx_t; | |||
| 169 | #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) | 169 | #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) |
| 170 | 170 | ||
| 171 | /* | 171 | /* |
| 172 | * true if a page was allocated from pfmemalloc reserves for network-based | ||
| 173 | * swap | ||
| 174 | */ | ||
| 175 | static bool pfmemalloc_active __read_mostly; | ||
| 176 | |||
| 177 | /* | ||
| 178 | * struct array_cache | 172 | * struct array_cache |
| 179 | * | 173 | * |
| 180 | * Purpose: | 174 | * Purpose: |
| @@ -195,10 +189,6 @@ struct array_cache { | |||
| 195 | * Must have this definition in here for the proper | 189 | * Must have this definition in here for the proper |
| 196 | * alignment of array_cache. Also simplifies accessing | 190 | * alignment of array_cache. Also simplifies accessing |
| 197 | * the entries. | 191 | * the entries. |
| 198 | * | ||
| 199 | * Entries should not be directly dereferenced as | ||
| 200 | * entries belonging to slabs marked pfmemalloc will | ||
| 201 | * have the lower bits set SLAB_OBJ_PFMEMALLOC | ||
| 202 | */ | 192 | */ |
| 203 | }; | 193 | }; |
| 204 | 194 | ||
| @@ -207,33 +197,6 @@ struct alien_cache { | |||
| 207 | struct array_cache ac; | 197 | struct array_cache ac; |
| 208 | }; | 198 | }; |
| 209 | 199 | ||
| 210 | #define SLAB_OBJ_PFMEMALLOC 1 | ||
| 211 | static inline bool is_obj_pfmemalloc(void *objp) | ||
| 212 | { | ||
| 213 | return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline void set_obj_pfmemalloc(void **objp) | ||
| 217 | { | ||
| 218 | *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); | ||
| 219 | return; | ||
| 220 | } | ||
| 221 | |||
| 222 | static inline void clear_obj_pfmemalloc(void **objp) | ||
| 223 | { | ||
| 224 | *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * bootstrap: The caches do not work without cpuarrays anymore, but the | ||
| 229 | * cpuarrays are allocated from the generic caches... | ||
| 230 | */ | ||
| 231 | #define BOOT_CPUCACHE_ENTRIES 1 | ||
| 232 | struct arraycache_init { | ||
| 233 | struct array_cache cache; | ||
| 234 | void *entries[BOOT_CPUCACHE_ENTRIES]; | ||
| 235 | }; | ||
| 236 | |||
| 237 | /* | 200 | /* |
| 238 | * Need this for bootstrapping a per node allocator. | 201 | * Need this for bootstrapping a per node allocator. |
| 239 | */ | 202 | */ |
| @@ -280,9 +243,10 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) | |||
| 280 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 243 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
| 281 | } while (0) | 244 | } while (0) |
| 282 | 245 | ||
| 246 | #define CFLGS_OBJFREELIST_SLAB (0x40000000UL) | ||
| 283 | #define CFLGS_OFF_SLAB (0x80000000UL) | 247 | #define CFLGS_OFF_SLAB (0x80000000UL) |
| 248 | #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB) | ||
| 284 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 249 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
| 285 | #define OFF_SLAB_MIN_SIZE (max_t(size_t, PAGE_SIZE >> 5, KMALLOC_MIN_SIZE + 1)) | ||
| 286 | 250 | ||
| 287 | #define BATCHREFILL_LIMIT 16 | 251 | #define BATCHREFILL_LIMIT 16 |
| 288 | /* | 252 | /* |
| @@ -390,36 +354,26 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
| 390 | 354 | ||
| 391 | #endif | 355 | #endif |
| 392 | 356 | ||
| 393 | #define OBJECT_FREE (0) | ||
| 394 | #define OBJECT_ACTIVE (1) | ||
| 395 | |||
| 396 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 357 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
| 397 | 358 | ||
| 398 | static void set_obj_status(struct page *page, int idx, int val) | 359 | static inline bool is_store_user_clean(struct kmem_cache *cachep) |
| 399 | { | 360 | { |
| 400 | int freelist_size; | 361 | return atomic_read(&cachep->store_user_clean) == 1; |
| 401 | char *status; | ||
| 402 | struct kmem_cache *cachep = page->slab_cache; | ||
| 403 | |||
| 404 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
| 405 | status = (char *)page->freelist + freelist_size; | ||
| 406 | status[idx] = val; | ||
| 407 | } | 362 | } |
| 408 | 363 | ||
| 409 | static inline unsigned int get_obj_status(struct page *page, int idx) | 364 | static inline void set_store_user_clean(struct kmem_cache *cachep) |
| 410 | { | 365 | { |
| 411 | int freelist_size; | 366 | atomic_set(&cachep->store_user_clean, 1); |
| 412 | char *status; | 367 | } |
| 413 | struct kmem_cache *cachep = page->slab_cache; | ||
| 414 | |||
| 415 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
| 416 | status = (char *)page->freelist + freelist_size; | ||
| 417 | 368 | ||
| 418 | return status[idx]; | 369 | static inline void set_store_user_dirty(struct kmem_cache *cachep) |
| 370 | { | ||
| 371 | if (is_store_user_clean(cachep)) | ||
| 372 | atomic_set(&cachep->store_user_clean, 0); | ||
| 419 | } | 373 | } |
| 420 | 374 | ||
| 421 | #else | 375 | #else |
| 422 | static inline void set_obj_status(struct page *page, int idx, int val) {} | 376 | static inline void set_store_user_dirty(struct kmem_cache *cachep) {} |
| 423 | 377 | ||
| 424 | #endif | 378 | #endif |
| 425 | 379 | ||
| @@ -457,6 +411,7 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, | |||
| 457 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | 411 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); |
| 458 | } | 412 | } |
| 459 | 413 | ||
| 414 | #define BOOT_CPUCACHE_ENTRIES 1 | ||
| 460 | /* internal cache of cache description objs */ | 415 | /* internal cache of cache description objs */ |
| 461 | static struct kmem_cache kmem_cache_boot = { | 416 | static struct kmem_cache kmem_cache_boot = { |
| 462 | .batchcount = 1, | 417 | .batchcount = 1, |
| @@ -475,61 +430,13 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
| 475 | return this_cpu_ptr(cachep->cpu_cache); | 430 | return this_cpu_ptr(cachep->cpu_cache); |
| 476 | } | 431 | } |
| 477 | 432 | ||
| 478 | static size_t calculate_freelist_size(int nr_objs, size_t align) | ||
| 479 | { | ||
| 480 | size_t freelist_size; | ||
| 481 | |||
| 482 | freelist_size = nr_objs * sizeof(freelist_idx_t); | ||
| 483 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
| 484 | freelist_size += nr_objs * sizeof(char); | ||
| 485 | |||
| 486 | if (align) | ||
| 487 | freelist_size = ALIGN(freelist_size, align); | ||
| 488 | |||
| 489 | return freelist_size; | ||
| 490 | } | ||
| 491 | |||
| 492 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | ||
| 493 | size_t idx_size, size_t align) | ||
| 494 | { | ||
| 495 | int nr_objs; | ||
| 496 | size_t remained_size; | ||
| 497 | size_t freelist_size; | ||
| 498 | int extra_space = 0; | ||
| 499 | |||
| 500 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
| 501 | extra_space = sizeof(char); | ||
| 502 | /* | ||
| 503 | * Ignore padding for the initial guess. The padding | ||
| 504 | * is at most @align-1 bytes, and @buffer_size is at | ||
| 505 | * least @align. In the worst case, this result will | ||
| 506 | * be one greater than the number of objects that fit | ||
| 507 | * into the memory allocation when taking the padding | ||
| 508 | * into account. | ||
| 509 | */ | ||
| 510 | nr_objs = slab_size / (buffer_size + idx_size + extra_space); | ||
| 511 | |||
| 512 | /* | ||
| 513 | * This calculated number will be either the right | ||
| 514 | * amount, or one greater than what we want. | ||
| 515 | */ | ||
| 516 | remained_size = slab_size - nr_objs * buffer_size; | ||
| 517 | freelist_size = calculate_freelist_size(nr_objs, align); | ||
| 518 | if (remained_size < freelist_size) | ||
| 519 | nr_objs--; | ||
| 520 | |||
| 521 | return nr_objs; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* | 433 | /* |
| 525 | * Calculate the number of objects and left-over bytes for a given buffer size. | 434 | * Calculate the number of objects and left-over bytes for a given buffer size. |
| 526 | */ | 435 | */ |
| 527 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 436 | static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size, |
| 528 | size_t align, int flags, size_t *left_over, | 437 | unsigned long flags, size_t *left_over) |
| 529 | unsigned int *num) | ||
| 530 | { | 438 | { |
| 531 | int nr_objs; | 439 | unsigned int num; |
| 532 | size_t mgmt_size; | ||
| 533 | size_t slab_size = PAGE_SIZE << gfporder; | 440 | size_t slab_size = PAGE_SIZE << gfporder; |
| 534 | 441 | ||
| 535 | /* | 442 | /* |
| @@ -537,26 +444,28 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 537 | * on it. For the latter case, the memory allocated for a | 444 | * on it. For the latter case, the memory allocated for a |
| 538 | * slab is used for: | 445 | * slab is used for: |
| 539 | * | 446 | * |
| 540 | * - One unsigned int for each object | ||
| 541 | * - Padding to respect alignment of @align | ||
| 542 | * - @buffer_size bytes for each object | 447 | * - @buffer_size bytes for each object |
| 448 | * - One freelist_idx_t for each object | ||
| 449 | * | ||
| 450 | * We don't need to consider alignment of freelist because | ||
| 451 | * freelist will be at the end of slab page. The objects will be | ||
| 452 | * at the correct alignment. | ||
| 543 | * | 453 | * |
| 544 | * If the slab management structure is off the slab, then the | 454 | * If the slab management structure is off the slab, then the |
| 545 | * alignment will already be calculated into the size. Because | 455 | * alignment will already be calculated into the size. Because |
| 546 | * the slabs are all pages aligned, the objects will be at the | 456 | * the slabs are all pages aligned, the objects will be at the |
| 547 | * correct alignment when allocated. | 457 | * correct alignment when allocated. |
| 548 | */ | 458 | */ |
| 549 | if (flags & CFLGS_OFF_SLAB) { | 459 | if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) { |
| 550 | mgmt_size = 0; | 460 | num = slab_size / buffer_size; |
| 551 | nr_objs = slab_size / buffer_size; | 461 | *left_over = slab_size % buffer_size; |
| 552 | |||
| 553 | } else { | 462 | } else { |
| 554 | nr_objs = calculate_nr_objs(slab_size, buffer_size, | 463 | num = slab_size / (buffer_size + sizeof(freelist_idx_t)); |
| 555 | sizeof(freelist_idx_t), align); | 464 | *left_over = slab_size % |
| 556 | mgmt_size = calculate_freelist_size(nr_objs, align); | 465 | (buffer_size + sizeof(freelist_idx_t)); |
| 557 | } | 466 | } |
| 558 | *num = nr_objs; | 467 | |
| 559 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 468 | return num; |
| 560 | } | 469 | } |
| 561 | 470 | ||
| 562 | #if DEBUG | 471 | #if DEBUG |
| @@ -687,120 +596,21 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
| 687 | return ac; | 596 | return ac; |
| 688 | } | 597 | } |
| 689 | 598 | ||
| 690 | static inline bool is_slab_pfmemalloc(struct page *page) | 599 | static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, |
| 691 | { | 600 | struct page *page, void *objp) |
| 692 | return PageSlabPfmemalloc(page); | ||
| 693 | } | ||
| 694 | |||
| 695 | /* Clears pfmemalloc_active if no slabs have pfmalloc set */ | ||
| 696 | static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | ||
| 697 | struct array_cache *ac) | ||
| 698 | { | ||
| 699 | struct kmem_cache_node *n = get_node(cachep, numa_mem_id()); | ||
| 700 | struct page *page; | ||
| 701 | unsigned long flags; | ||
| 702 | |||
| 703 | if (!pfmemalloc_active) | ||
| 704 | return; | ||
| 705 | |||
| 706 | spin_lock_irqsave(&n->list_lock, flags); | ||
| 707 | list_for_each_entry(page, &n->slabs_full, lru) | ||
| 708 | if (is_slab_pfmemalloc(page)) | ||
| 709 | goto out; | ||
| 710 | |||
| 711 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
| 712 | if (is_slab_pfmemalloc(page)) | ||
| 713 | goto out; | ||
| 714 | |||
| 715 | list_for_each_entry(page, &n->slabs_free, lru) | ||
| 716 | if (is_slab_pfmemalloc(page)) | ||
| 717 | goto out; | ||
| 718 | |||
| 719 | pfmemalloc_active = false; | ||
| 720 | out: | ||
| 721 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
| 722 | } | ||
| 723 | |||
| 724 | static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
| 725 | gfp_t flags, bool force_refill) | ||
| 726 | { | 601 | { |
| 727 | int i; | 602 | struct kmem_cache_node *n; |
| 728 | void *objp = ac->entry[--ac->avail]; | 603 | int page_node; |
| 729 | 604 | LIST_HEAD(list); | |
| 730 | /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ | ||
| 731 | if (unlikely(is_obj_pfmemalloc(objp))) { | ||
| 732 | struct kmem_cache_node *n; | ||
| 733 | |||
| 734 | if (gfp_pfmemalloc_allowed(flags)) { | ||
| 735 | clear_obj_pfmemalloc(&objp); | ||
| 736 | return objp; | ||
| 737 | } | ||
| 738 | |||
| 739 | /* The caller cannot use PFMEMALLOC objects, find another one */ | ||
| 740 | for (i = 0; i < ac->avail; i++) { | ||
| 741 | /* If a !PFMEMALLOC object is found, swap them */ | ||
| 742 | if (!is_obj_pfmemalloc(ac->entry[i])) { | ||
| 743 | objp = ac->entry[i]; | ||
| 744 | ac->entry[i] = ac->entry[ac->avail]; | ||
| 745 | ac->entry[ac->avail] = objp; | ||
| 746 | return objp; | ||
| 747 | } | ||
| 748 | } | ||
| 749 | |||
| 750 | /* | ||
| 751 | * If there are empty slabs on the slabs_free list and we are | ||
| 752 | * being forced to refill the cache, mark this one !pfmemalloc. | ||
| 753 | */ | ||
| 754 | n = get_node(cachep, numa_mem_id()); | ||
| 755 | if (!list_empty(&n->slabs_free) && force_refill) { | ||
| 756 | struct page *page = virt_to_head_page(objp); | ||
| 757 | ClearPageSlabPfmemalloc(page); | ||
| 758 | clear_obj_pfmemalloc(&objp); | ||
| 759 | recheck_pfmemalloc_active(cachep, ac); | ||
| 760 | return objp; | ||
| 761 | } | ||
| 762 | |||
| 763 | /* No !PFMEMALLOC objects available */ | ||
| 764 | ac->avail++; | ||
| 765 | objp = NULL; | ||
| 766 | } | ||
| 767 | |||
| 768 | return objp; | ||
| 769 | } | ||
| 770 | |||
| 771 | static inline void *ac_get_obj(struct kmem_cache *cachep, | ||
| 772 | struct array_cache *ac, gfp_t flags, bool force_refill) | ||
| 773 | { | ||
| 774 | void *objp; | ||
| 775 | |||
| 776 | if (unlikely(sk_memalloc_socks())) | ||
| 777 | objp = __ac_get_obj(cachep, ac, flags, force_refill); | ||
| 778 | else | ||
| 779 | objp = ac->entry[--ac->avail]; | ||
| 780 | |||
| 781 | return objp; | ||
| 782 | } | ||
| 783 | |||
| 784 | static noinline void *__ac_put_obj(struct kmem_cache *cachep, | ||
| 785 | struct array_cache *ac, void *objp) | ||
| 786 | { | ||
| 787 | if (unlikely(pfmemalloc_active)) { | ||
| 788 | /* Some pfmemalloc slabs exist, check if this is one */ | ||
| 789 | struct page *page = virt_to_head_page(objp); | ||
| 790 | if (PageSlabPfmemalloc(page)) | ||
| 791 | set_obj_pfmemalloc(&objp); | ||
| 792 | } | ||
| 793 | 605 | ||
| 794 | return objp; | 606 | page_node = page_to_nid(page); |
| 795 | } | 607 | n = get_node(cachep, page_node); |
| 796 | 608 | ||
| 797 | static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, | 609 | spin_lock(&n->list_lock); |
| 798 | void *objp) | 610 | free_block(cachep, &objp, 1, page_node, &list); |
| 799 | { | 611 | spin_unlock(&n->list_lock); |
| 800 | if (unlikely(sk_memalloc_socks())) | ||
| 801 | objp = __ac_put_obj(cachep, ac, objp); | ||
| 802 | 612 | ||
| 803 | ac->entry[ac->avail++] = objp; | 613 | slabs_destroy(cachep, &list); |
| 804 | } | 614 | } |
| 805 | 615 | ||
| 806 | /* | 616 | /* |
| @@ -1003,7 +813,7 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, | |||
| 1003 | STATS_INC_ACOVERFLOW(cachep); | 813 | STATS_INC_ACOVERFLOW(cachep); |
| 1004 | __drain_alien_cache(cachep, ac, page_node, &list); | 814 | __drain_alien_cache(cachep, ac, page_node, &list); |
| 1005 | } | 815 | } |
| 1006 | ac_put_obj(cachep, ac, objp); | 816 | ac->entry[ac->avail++] = objp; |
| 1007 | spin_unlock(&alien->lock); | 817 | spin_unlock(&alien->lock); |
| 1008 | slabs_destroy(cachep, &list); | 818 | slabs_destroy(cachep, &list); |
| 1009 | } else { | 819 | } else { |
| @@ -1540,10 +1350,9 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
| 1540 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs)) | 1350 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs)) |
| 1541 | return; | 1351 | return; |
| 1542 | 1352 | ||
| 1543 | printk(KERN_WARNING | 1353 | pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", |
| 1544 | "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 1354 | nodeid, gfpflags, &gfpflags); |
| 1545 | nodeid, gfpflags); | 1355 | pr_warn(" cache: %s, object size: %d, order: %d\n", |
| 1546 | printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", | ||
| 1547 | cachep->name, cachep->size, cachep->gfporder); | 1356 | cachep->name, cachep->size, cachep->gfporder); |
| 1548 | 1357 | ||
| 1549 | for_each_kmem_cache_node(cachep, node, n) { | 1358 | for_each_kmem_cache_node(cachep, node, n) { |
| @@ -1567,8 +1376,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
| 1567 | 1376 | ||
| 1568 | num_slabs += active_slabs; | 1377 | num_slabs += active_slabs; |
| 1569 | num_objs = num_slabs * cachep->num; | 1378 | num_objs = num_slabs * cachep->num; |
| 1570 | printk(KERN_WARNING | 1379 | pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", |
| 1571 | " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", | ||
| 1572 | node, active_slabs, num_slabs, active_objs, num_objs, | 1380 | node, active_slabs, num_slabs, active_objs, num_objs, |
| 1573 | free_objects); | 1381 | free_objects); |
| 1574 | } | 1382 | } |
| @@ -1604,10 +1412,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
| 1604 | return NULL; | 1412 | return NULL; |
| 1605 | } | 1413 | } |
| 1606 | 1414 | ||
| 1607 | /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ | ||
| 1608 | if (page_is_pfmemalloc(page)) | ||
| 1609 | pfmemalloc_active = true; | ||
| 1610 | |||
| 1611 | nr_pages = (1 << cachep->gfporder); | 1415 | nr_pages = (1 << cachep->gfporder); |
| 1612 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1416 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| 1613 | add_zone_page_state(page_zone(page), | 1417 | add_zone_page_state(page_zone(page), |
| @@ -1615,8 +1419,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
| 1615 | else | 1419 | else |
| 1616 | add_zone_page_state(page_zone(page), | 1420 | add_zone_page_state(page_zone(page), |
| 1617 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1421 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
| 1422 | |||
| 1618 | __SetPageSlab(page); | 1423 | __SetPageSlab(page); |
| 1619 | if (page_is_pfmemalloc(page)) | 1424 | /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ |
| 1425 | if (sk_memalloc_socks() && page_is_pfmemalloc(page)) | ||
| 1620 | SetPageSlabPfmemalloc(page); | 1426 | SetPageSlabPfmemalloc(page); |
| 1621 | 1427 | ||
| 1622 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1428 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
| @@ -1670,6 +1476,14 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
| 1670 | } | 1476 | } |
| 1671 | 1477 | ||
| 1672 | #if DEBUG | 1478 | #if DEBUG |
| 1479 | static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) | ||
| 1480 | { | ||
| 1481 | if (debug_pagealloc_enabled() && OFF_SLAB(cachep) && | ||
| 1482 | (cachep->size % PAGE_SIZE) == 0) | ||
| 1483 | return true; | ||
| 1484 | |||
| 1485 | return false; | ||
| 1486 | } | ||
| 1673 | 1487 | ||
| 1674 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1488 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 1675 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | 1489 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, |
| @@ -1703,6 +1517,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | |||
| 1703 | } | 1517 | } |
| 1704 | *addr++ = 0x87654321; | 1518 | *addr++ = 0x87654321; |
| 1705 | } | 1519 | } |
| 1520 | |||
| 1521 | static void slab_kernel_map(struct kmem_cache *cachep, void *objp, | ||
| 1522 | int map, unsigned long caller) | ||
| 1523 | { | ||
| 1524 | if (!is_debug_pagealloc_cache(cachep)) | ||
| 1525 | return; | ||
| 1526 | |||
| 1527 | if (caller) | ||
| 1528 | store_stackinfo(cachep, objp, caller); | ||
| 1529 | |||
| 1530 | kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | #else | ||
| 1534 | static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, | ||
| 1535 | int map, unsigned long caller) {} | ||
| 1536 | |||
| 1706 | #endif | 1537 | #endif |
| 1707 | 1538 | ||
| 1708 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) | 1539 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) |
| @@ -1781,6 +1612,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1781 | int size, i; | 1612 | int size, i; |
| 1782 | int lines = 0; | 1613 | int lines = 0; |
| 1783 | 1614 | ||
| 1615 | if (is_debug_pagealloc_cache(cachep)) | ||
| 1616 | return; | ||
| 1617 | |||
| 1784 | realobj = (char *)objp + obj_offset(cachep); | 1618 | realobj = (char *)objp + obj_offset(cachep); |
| 1785 | size = cachep->object_size; | 1619 | size = cachep->object_size; |
| 1786 | 1620 | ||
| @@ -1842,20 +1676,18 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, | |||
| 1842 | struct page *page) | 1676 | struct page *page) |
| 1843 | { | 1677 | { |
| 1844 | int i; | 1678 | int i; |
| 1679 | |||
| 1680 | if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) { | ||
| 1681 | poison_obj(cachep, page->freelist - obj_offset(cachep), | ||
| 1682 | POISON_FREE); | ||
| 1683 | } | ||
| 1684 | |||
| 1845 | for (i = 0; i < cachep->num; i++) { | 1685 | for (i = 0; i < cachep->num; i++) { |
| 1846 | void *objp = index_to_obj(cachep, page, i); | 1686 | void *objp = index_to_obj(cachep, page, i); |
| 1847 | 1687 | ||
| 1848 | if (cachep->flags & SLAB_POISON) { | 1688 | if (cachep->flags & SLAB_POISON) { |
| 1849 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 1850 | if (cachep->size % PAGE_SIZE == 0 && | ||
| 1851 | OFF_SLAB(cachep)) | ||
| 1852 | kernel_map_pages(virt_to_page(objp), | ||
| 1853 | cachep->size / PAGE_SIZE, 1); | ||
| 1854 | else | ||
| 1855 | check_poison_obj(cachep, objp); | ||
| 1856 | #else | ||
| 1857 | check_poison_obj(cachep, objp); | 1689 | check_poison_obj(cachep, objp); |
| 1858 | #endif | 1690 | slab_kernel_map(cachep, objp, 1, 0); |
| 1859 | } | 1691 | } |
| 1860 | if (cachep->flags & SLAB_RED_ZONE) { | 1692 | if (cachep->flags & SLAB_RED_ZONE) { |
| 1861 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 1693 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
| @@ -1916,7 +1748,6 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) | |||
| 1916 | * calculate_slab_order - calculate size (page order) of slabs | 1748 | * calculate_slab_order - calculate size (page order) of slabs |
| 1917 | * @cachep: pointer to the cache that is being created | 1749 | * @cachep: pointer to the cache that is being created |
| 1918 | * @size: size of objects to be created in this cache. | 1750 | * @size: size of objects to be created in this cache. |
| 1919 | * @align: required alignment for the objects. | ||
| 1920 | * @flags: slab allocation flags | 1751 | * @flags: slab allocation flags |
| 1921 | * | 1752 | * |
| 1922 | * Also calculates the number of objects per slab. | 1753 | * Also calculates the number of objects per slab. |
| @@ -1926,9 +1757,8 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) | |||
| 1926 | * towards high-order requests, this should be changed. | 1757 | * towards high-order requests, this should be changed. |
| 1927 | */ | 1758 | */ |
| 1928 | static size_t calculate_slab_order(struct kmem_cache *cachep, | 1759 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
| 1929 | size_t size, size_t align, unsigned long flags) | 1760 | size_t size, unsigned long flags) |
| 1930 | { | 1761 | { |
| 1931 | unsigned long offslab_limit; | ||
| 1932 | size_t left_over = 0; | 1762 | size_t left_over = 0; |
| 1933 | int gfporder; | 1763 | int gfporder; |
| 1934 | 1764 | ||
| @@ -1936,7 +1766,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1936 | unsigned int num; | 1766 | unsigned int num; |
| 1937 | size_t remainder; | 1767 | size_t remainder; |
| 1938 | 1768 | ||
| 1939 | cache_estimate(gfporder, size, align, flags, &remainder, &num); | 1769 | num = cache_estimate(gfporder, size, flags, &remainder); |
| 1940 | if (!num) | 1770 | if (!num) |
| 1941 | continue; | 1771 | continue; |
| 1942 | 1772 | ||
| @@ -1945,19 +1775,24 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1945 | break; | 1775 | break; |
| 1946 | 1776 | ||
| 1947 | if (flags & CFLGS_OFF_SLAB) { | 1777 | if (flags & CFLGS_OFF_SLAB) { |
| 1948 | size_t freelist_size_per_obj = sizeof(freelist_idx_t); | 1778 | struct kmem_cache *freelist_cache; |
| 1779 | size_t freelist_size; | ||
| 1780 | |||
| 1781 | freelist_size = num * sizeof(freelist_idx_t); | ||
| 1782 | freelist_cache = kmalloc_slab(freelist_size, 0u); | ||
| 1783 | if (!freelist_cache) | ||
| 1784 | continue; | ||
| 1785 | |||
| 1949 | /* | 1786 | /* |
| 1950 | * Max number of objs-per-slab for caches which | 1787 | * Needed to avoid possible looping condition |
| 1951 | * use off-slab slabs. Needed to avoid a possible | 1788 | * in cache_grow() |
| 1952 | * looping condition in cache_grow(). | ||
| 1953 | */ | 1789 | */ |
| 1954 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | 1790 | if (OFF_SLAB(freelist_cache)) |
| 1955 | freelist_size_per_obj += sizeof(char); | 1791 | continue; |
| 1956 | offslab_limit = size; | ||
| 1957 | offslab_limit /= freelist_size_per_obj; | ||
| 1958 | 1792 | ||
| 1959 | if (num > offslab_limit) | 1793 | /* check if off slab has enough benefit */ |
| 1960 | break; | 1794 | if (freelist_cache->size > cachep->size / 2) |
| 1795 | continue; | ||
| 1961 | } | 1796 | } |
| 1962 | 1797 | ||
| 1963 | /* Found something acceptable - save it away */ | 1798 | /* Found something acceptable - save it away */ |
| @@ -2075,6 +1910,79 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
| 2075 | return cachep; | 1910 | return cachep; |
| 2076 | } | 1911 | } |
| 2077 | 1912 | ||
| 1913 | static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, | ||
| 1914 | size_t size, unsigned long flags) | ||
| 1915 | { | ||
| 1916 | size_t left; | ||
| 1917 | |||
| 1918 | cachep->num = 0; | ||
| 1919 | |||
| 1920 | if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) | ||
| 1921 | return false; | ||
| 1922 | |||
| 1923 | left = calculate_slab_order(cachep, size, | ||
| 1924 | flags | CFLGS_OBJFREELIST_SLAB); | ||
| 1925 | if (!cachep->num) | ||
| 1926 | return false; | ||
| 1927 | |||
| 1928 | if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size) | ||
| 1929 | return false; | ||
| 1930 | |||
| 1931 | cachep->colour = left / cachep->colour_off; | ||
| 1932 | |||
| 1933 | return true; | ||
| 1934 | } | ||
| 1935 | |||
| 1936 | static bool set_off_slab_cache(struct kmem_cache *cachep, | ||
| 1937 | size_t size, unsigned long flags) | ||
| 1938 | { | ||
| 1939 | size_t left; | ||
| 1940 | |||
| 1941 | cachep->num = 0; | ||
| 1942 | |||
| 1943 | /* | ||
| 1944 | * Always use on-slab management when SLAB_NOLEAKTRACE | ||
| 1945 | * to avoid recursive calls into kmemleak. | ||
| 1946 | */ | ||
| 1947 | if (flags & SLAB_NOLEAKTRACE) | ||
| 1948 | return false; | ||
| 1949 | |||
| 1950 | /* | ||
| 1951 | * Size is large, assume best to place the slab management obj | ||
| 1952 | * off-slab (should allow better packing of objs). | ||
| 1953 | */ | ||
| 1954 | left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB); | ||
| 1955 | if (!cachep->num) | ||
| 1956 | return false; | ||
| 1957 | |||
| 1958 | /* | ||
| 1959 | * If the slab has been placed off-slab, and we have enough space then | ||
| 1960 | * move it on-slab. This is at the expense of any extra colouring. | ||
| 1961 | */ | ||
| 1962 | if (left >= cachep->num * sizeof(freelist_idx_t)) | ||
| 1963 | return false; | ||
| 1964 | |||
| 1965 | cachep->colour = left / cachep->colour_off; | ||
| 1966 | |||
| 1967 | return true; | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | static bool set_on_slab_cache(struct kmem_cache *cachep, | ||
| 1971 | size_t size, unsigned long flags) | ||
| 1972 | { | ||
| 1973 | size_t left; | ||
| 1974 | |||
| 1975 | cachep->num = 0; | ||
| 1976 | |||
| 1977 | left = calculate_slab_order(cachep, size, flags); | ||
| 1978 | if (!cachep->num) | ||
| 1979 | return false; | ||
| 1980 | |||
| 1981 | cachep->colour = left / cachep->colour_off; | ||
| 1982 | |||
| 1983 | return true; | ||
| 1984 | } | ||
| 1985 | |||
| 2078 | /** | 1986 | /** |
| 2079 | * __kmem_cache_create - Create a cache. | 1987 | * __kmem_cache_create - Create a cache. |
| 2080 | * @cachep: cache management descriptor | 1988 | * @cachep: cache management descriptor |
| @@ -2099,7 +2007,6 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
| 2099 | int | 2007 | int |
| 2100 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | 2008 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) |
| 2101 | { | 2009 | { |
| 2102 | size_t left_over, freelist_size; | ||
| 2103 | size_t ralign = BYTES_PER_WORD; | 2010 | size_t ralign = BYTES_PER_WORD; |
| 2104 | gfp_t gfp; | 2011 | gfp_t gfp; |
| 2105 | int err; | 2012 | int err; |
| @@ -2119,8 +2026,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2119 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 2026 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
| 2120 | flags |= SLAB_POISON; | 2027 | flags |= SLAB_POISON; |
| 2121 | #endif | 2028 | #endif |
| 2122 | if (flags & SLAB_DESTROY_BY_RCU) | ||
| 2123 | BUG_ON(flags & SLAB_POISON); | ||
| 2124 | #endif | 2029 | #endif |
| 2125 | 2030 | ||
| 2126 | /* | 2031 | /* |
| @@ -2152,6 +2057,10 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2152 | * 4) Store it. | 2057 | * 4) Store it. |
| 2153 | */ | 2058 | */ |
| 2154 | cachep->align = ralign; | 2059 | cachep->align = ralign; |
| 2060 | cachep->colour_off = cache_line_size(); | ||
| 2061 | /* Offset must be a multiple of the alignment. */ | ||
| 2062 | if (cachep->colour_off < cachep->align) | ||
| 2063 | cachep->colour_off = cachep->align; | ||
| 2155 | 2064 | ||
| 2156 | if (slab_is_available()) | 2065 | if (slab_is_available()) |
| 2157 | gfp = GFP_KERNEL; | 2066 | gfp = GFP_KERNEL; |
| @@ -2179,37 +2088,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2179 | else | 2088 | else |
| 2180 | size += BYTES_PER_WORD; | 2089 | size += BYTES_PER_WORD; |
| 2181 | } | 2090 | } |
| 2182 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | ||
| 2183 | /* | ||
| 2184 | * To activate debug pagealloc, off-slab management is necessary | ||
| 2185 | * requirement. In early phase of initialization, small sized slab | ||
| 2186 | * doesn't get initialized so it would not be possible. So, we need | ||
| 2187 | * to check size >= 256. It guarantees that all necessary small | ||
| 2188 | * sized slab is initialized in current slab initialization sequence. | ||
| 2189 | */ | ||
| 2190 | if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && | ||
| 2191 | size >= 256 && cachep->object_size > cache_line_size() && | ||
| 2192 | ALIGN(size, cachep->align) < PAGE_SIZE) { | ||
| 2193 | cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); | ||
| 2194 | size = PAGE_SIZE; | ||
| 2195 | } | ||
| 2196 | #endif | ||
| 2197 | #endif | 2091 | #endif |
| 2198 | 2092 | ||
| 2199 | /* | ||
| 2200 | * Determine if the slab management is 'on' or 'off' slab. | ||
| 2201 | * (bootstrapping cannot cope with offslab caches so don't do | ||
| 2202 | * it too early on. Always use on-slab management when | ||
| 2203 | * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) | ||
| 2204 | */ | ||
| 2205 | if (size >= OFF_SLAB_MIN_SIZE && !slab_early_init && | ||
| 2206 | !(flags & SLAB_NOLEAKTRACE)) | ||
| 2207 | /* | ||
| 2208 | * Size is large, assume best to place the slab management obj | ||
| 2209 | * off-slab (should allow better packing of objs). | ||
| 2210 | */ | ||
| 2211 | flags |= CFLGS_OFF_SLAB; | ||
| 2212 | |||
| 2213 | size = ALIGN(size, cachep->align); | 2093 | size = ALIGN(size, cachep->align); |
| 2214 | /* | 2094 | /* |
| 2215 | * We should restrict the number of objects in a slab to implement | 2095 | * We should restrict the number of objects in a slab to implement |
| @@ -2218,42 +2098,46 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2218 | if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) | 2098 | if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) |
| 2219 | size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); | 2099 | size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); |
| 2220 | 2100 | ||
| 2221 | left_over = calculate_slab_order(cachep, size, cachep->align, flags); | 2101 | #if DEBUG |
| 2222 | |||
| 2223 | if (!cachep->num) | ||
| 2224 | return -E2BIG; | ||
| 2225 | |||
| 2226 | freelist_size = calculate_freelist_size(cachep->num, cachep->align); | ||
| 2227 | |||
| 2228 | /* | 2102 | /* |
| 2229 | * If the slab has been placed off-slab, and we have enough space then | 2103 | * To activate debug pagealloc, off-slab management is necessary |
| 2230 | * move it on-slab. This is at the expense of any extra colouring. | 2104 | * requirement. In early phase of initialization, small sized slab |
| 2105 | * doesn't get initialized so it would not be possible. So, we need | ||
| 2106 | * to check size >= 256. It guarantees that all necessary small | ||
| 2107 | * sized slab is initialized in current slab initialization sequence. | ||
| 2231 | */ | 2108 | */ |
| 2232 | if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { | 2109 | if (debug_pagealloc_enabled() && (flags & SLAB_POISON) && |
| 2233 | flags &= ~CFLGS_OFF_SLAB; | 2110 | size >= 256 && cachep->object_size > cache_line_size()) { |
| 2234 | left_over -= freelist_size; | 2111 | if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { |
| 2112 | size_t tmp_size = ALIGN(size, PAGE_SIZE); | ||
| 2113 | |||
| 2114 | if (set_off_slab_cache(cachep, tmp_size, flags)) { | ||
| 2115 | flags |= CFLGS_OFF_SLAB; | ||
| 2116 | cachep->obj_offset += tmp_size - size; | ||
| 2117 | size = tmp_size; | ||
| 2118 | goto done; | ||
| 2119 | } | ||
| 2120 | } | ||
| 2235 | } | 2121 | } |
| 2122 | #endif | ||
| 2236 | 2123 | ||
| 2237 | if (flags & CFLGS_OFF_SLAB) { | 2124 | if (set_objfreelist_slab_cache(cachep, size, flags)) { |
| 2238 | /* really off slab. No need for manual alignment */ | 2125 | flags |= CFLGS_OBJFREELIST_SLAB; |
| 2239 | freelist_size = calculate_freelist_size(cachep->num, 0); | 2126 | goto done; |
| 2127 | } | ||
| 2240 | 2128 | ||
| 2241 | #ifdef CONFIG_PAGE_POISONING | 2129 | if (set_off_slab_cache(cachep, size, flags)) { |
| 2242 | /* If we're going to use the generic kernel_map_pages() | 2130 | flags |= CFLGS_OFF_SLAB; |
| 2243 | * poisoning, then it's going to smash the contents of | 2131 | goto done; |
| 2244 | * the redzone and userword anyhow, so switch them off. | ||
| 2245 | */ | ||
| 2246 | if (size % PAGE_SIZE == 0 && flags & SLAB_POISON) | ||
| 2247 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
| 2248 | #endif | ||
| 2249 | } | 2132 | } |
| 2250 | 2133 | ||
| 2251 | cachep->colour_off = cache_line_size(); | 2134 | if (set_on_slab_cache(cachep, size, flags)) |
| 2252 | /* Offset must be a multiple of the alignment. */ | 2135 | goto done; |
| 2253 | if (cachep->colour_off < cachep->align) | 2136 | |
| 2254 | cachep->colour_off = cachep->align; | 2137 | return -E2BIG; |
| 2255 | cachep->colour = left_over / cachep->colour_off; | 2138 | |
| 2256 | cachep->freelist_size = freelist_size; | 2139 | done: |
| 2140 | cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
| 2257 | cachep->flags = flags; | 2141 | cachep->flags = flags; |
| 2258 | cachep->allocflags = __GFP_COMP; | 2142 | cachep->allocflags = __GFP_COMP; |
| 2259 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) | 2143 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) |
| @@ -2261,16 +2145,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2261 | cachep->size = size; | 2145 | cachep->size = size; |
| 2262 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2146 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
| 2263 | 2147 | ||
| 2264 | if (flags & CFLGS_OFF_SLAB) { | 2148 | #if DEBUG |
| 2265 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); | 2149 | /* |
| 2266 | /* | 2150 | * If we're going to use the generic kernel_map_pages() |
| 2267 | * This is a possibility for one of the kmalloc_{dma,}_caches. | 2151 | * poisoning, then it's going to smash the contents of |
| 2268 | * But since we go off slab only for object size greater than | 2152 | * the redzone and userword anyhow, so switch them off. |
| 2269 | * OFF_SLAB_MIN_SIZE, and kmalloc_{dma,}_caches get created | 2153 | */ |
| 2270 | * in ascending order,this should not happen at all. | 2154 | if (IS_ENABLED(CONFIG_PAGE_POISONING) && |
| 2271 | * But leave a BUG_ON for some lucky dude. | 2155 | (cachep->flags & SLAB_POISON) && |
| 2272 | */ | 2156 | is_debug_pagealloc_cache(cachep)) |
| 2273 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); | 2157 | cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
| 2158 | #endif | ||
| 2159 | |||
| 2160 | if (OFF_SLAB(cachep)) { | ||
| 2161 | cachep->freelist_cache = | ||
| 2162 | kmalloc_slab(cachep->freelist_size, 0u); | ||
| 2274 | } | 2163 | } |
| 2275 | 2164 | ||
| 2276 | err = setup_cpu_cache(cachep, gfp); | 2165 | err = setup_cpu_cache(cachep, gfp); |
| @@ -2377,9 +2266,6 @@ static int drain_freelist(struct kmem_cache *cache, | |||
| 2377 | } | 2266 | } |
| 2378 | 2267 | ||
| 2379 | page = list_entry(p, struct page, lru); | 2268 | page = list_entry(p, struct page, lru); |
| 2380 | #if DEBUG | ||
| 2381 | BUG_ON(page->active); | ||
| 2382 | #endif | ||
| 2383 | list_del(&page->lru); | 2269 | list_del(&page->lru); |
| 2384 | /* | 2270 | /* |
| 2385 | * Safe to drop the lock. The slab is no longer linked | 2271 | * Safe to drop the lock. The slab is no longer linked |
| @@ -2454,18 +2340,23 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, | |||
| 2454 | void *freelist; | 2340 | void *freelist; |
| 2455 | void *addr = page_address(page); | 2341 | void *addr = page_address(page); |
| 2456 | 2342 | ||
| 2457 | if (OFF_SLAB(cachep)) { | 2343 | page->s_mem = addr + colour_off; |
| 2344 | page->active = 0; | ||
| 2345 | |||
| 2346 | if (OBJFREELIST_SLAB(cachep)) | ||
| 2347 | freelist = NULL; | ||
| 2348 | else if (OFF_SLAB(cachep)) { | ||
| 2458 | /* Slab management obj is off-slab. */ | 2349 | /* Slab management obj is off-slab. */ |
| 2459 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, | 2350 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, |
| 2460 | local_flags, nodeid); | 2351 | local_flags, nodeid); |
| 2461 | if (!freelist) | 2352 | if (!freelist) |
| 2462 | return NULL; | 2353 | return NULL; |
| 2463 | } else { | 2354 | } else { |
| 2464 | freelist = addr + colour_off; | 2355 | /* We will use last bytes at the slab for freelist */ |
| 2465 | colour_off += cachep->freelist_size; | 2356 | freelist = addr + (PAGE_SIZE << cachep->gfporder) - |
| 2357 | cachep->freelist_size; | ||
| 2466 | } | 2358 | } |
| 2467 | page->active = 0; | 2359 | |
| 2468 | page->s_mem = addr + colour_off; | ||
| 2469 | return freelist; | 2360 | return freelist; |
| 2470 | } | 2361 | } |
| 2471 | 2362 | ||
| @@ -2480,17 +2371,14 @@ static inline void set_free_obj(struct page *page, | |||
| 2480 | ((freelist_idx_t *)(page->freelist))[idx] = val; | 2371 | ((freelist_idx_t *)(page->freelist))[idx] = val; |
| 2481 | } | 2372 | } |
| 2482 | 2373 | ||
| 2483 | static void cache_init_objs(struct kmem_cache *cachep, | 2374 | static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) |
| 2484 | struct page *page) | ||
| 2485 | { | 2375 | { |
| 2376 | #if DEBUG | ||
| 2486 | int i; | 2377 | int i; |
| 2487 | 2378 | ||
| 2488 | for (i = 0; i < cachep->num; i++) { | 2379 | for (i = 0; i < cachep->num; i++) { |
| 2489 | void *objp = index_to_obj(cachep, page, i); | 2380 | void *objp = index_to_obj(cachep, page, i); |
| 2490 | #if DEBUG | 2381 | |
| 2491 | /* need to poison the objs? */ | ||
| 2492 | if (cachep->flags & SLAB_POISON) | ||
| 2493 | poison_obj(cachep, objp, POISON_FREE); | ||
| 2494 | if (cachep->flags & SLAB_STORE_USER) | 2382 | if (cachep->flags & SLAB_STORE_USER) |
| 2495 | *dbg_userword(cachep, objp) = NULL; | 2383 | *dbg_userword(cachep, objp) = NULL; |
| 2496 | 2384 | ||
| @@ -2514,15 +2402,32 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2514 | slab_error(cachep, "constructor overwrote the" | 2402 | slab_error(cachep, "constructor overwrote the" |
| 2515 | " start of an object"); | 2403 | " start of an object"); |
| 2516 | } | 2404 | } |
| 2517 | if ((cachep->size % PAGE_SIZE) == 0 && | 2405 | /* need to poison the objs? */ |
| 2518 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) | 2406 | if (cachep->flags & SLAB_POISON) { |
| 2519 | kernel_map_pages(virt_to_page(objp), | 2407 | poison_obj(cachep, objp, POISON_FREE); |
| 2520 | cachep->size / PAGE_SIZE, 0); | 2408 | slab_kernel_map(cachep, objp, 0, 0); |
| 2521 | #else | 2409 | } |
| 2522 | if (cachep->ctor) | 2410 | } |
| 2523 | cachep->ctor(objp); | ||
| 2524 | #endif | 2411 | #endif |
| 2525 | set_obj_status(page, i, OBJECT_FREE); | 2412 | } |
| 2413 | |||
| 2414 | static void cache_init_objs(struct kmem_cache *cachep, | ||
| 2415 | struct page *page) | ||
| 2416 | { | ||
| 2417 | int i; | ||
| 2418 | |||
| 2419 | cache_init_objs_debug(cachep, page); | ||
| 2420 | |||
| 2421 | if (OBJFREELIST_SLAB(cachep)) { | ||
| 2422 | page->freelist = index_to_obj(cachep, page, cachep->num - 1) + | ||
| 2423 | obj_offset(cachep); | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | for (i = 0; i < cachep->num; i++) { | ||
| 2427 | /* constructor could break poison info */ | ||
| 2428 | if (DEBUG == 0 && cachep->ctor) | ||
| 2429 | cachep->ctor(index_to_obj(cachep, page, i)); | ||
| 2430 | |||
| 2526 | set_free_obj(page, i, i); | 2431 | set_free_obj(page, i, i); |
| 2527 | } | 2432 | } |
| 2528 | } | 2433 | } |
| @@ -2537,30 +2442,28 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | |||
| 2537 | } | 2442 | } |
| 2538 | } | 2443 | } |
| 2539 | 2444 | ||
| 2540 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, | 2445 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page) |
| 2541 | int nodeid) | ||
| 2542 | { | 2446 | { |
| 2543 | void *objp; | 2447 | void *objp; |
| 2544 | 2448 | ||
| 2545 | objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); | 2449 | objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); |
| 2546 | page->active++; | 2450 | page->active++; |
| 2451 | |||
| 2547 | #if DEBUG | 2452 | #if DEBUG |
| 2548 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); | 2453 | if (cachep->flags & SLAB_STORE_USER) |
| 2454 | set_store_user_dirty(cachep); | ||
| 2549 | #endif | 2455 | #endif |
| 2550 | 2456 | ||
| 2551 | return objp; | 2457 | return objp; |
| 2552 | } | 2458 | } |
| 2553 | 2459 | ||
| 2554 | static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | 2460 | static void slab_put_obj(struct kmem_cache *cachep, |
| 2555 | void *objp, int nodeid) | 2461 | struct page *page, void *objp) |
| 2556 | { | 2462 | { |
| 2557 | unsigned int objnr = obj_to_index(cachep, page, objp); | 2463 | unsigned int objnr = obj_to_index(cachep, page, objp); |
| 2558 | #if DEBUG | 2464 | #if DEBUG |
| 2559 | unsigned int i; | 2465 | unsigned int i; |
| 2560 | 2466 | ||
| 2561 | /* Verify that the slab belongs to the intended node */ | ||
| 2562 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); | ||
| 2563 | |||
| 2564 | /* Verify double free bug */ | 2467 | /* Verify double free bug */ |
| 2565 | for (i = page->active; i < cachep->num; i++) { | 2468 | for (i = page->active; i < cachep->num; i++) { |
| 2566 | if (get_free_obj(page, i) == objnr) { | 2469 | if (get_free_obj(page, i) == objnr) { |
| @@ -2571,6 +2474,9 @@ static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | |||
| 2571 | } | 2474 | } |
| 2572 | #endif | 2475 | #endif |
| 2573 | page->active--; | 2476 | page->active--; |
| 2477 | if (!page->freelist) | ||
| 2478 | page->freelist = objp + obj_offset(cachep); | ||
| 2479 | |||
| 2574 | set_free_obj(page, page->active, objnr); | 2480 | set_free_obj(page, page->active, objnr); |
| 2575 | } | 2481 | } |
| 2576 | 2482 | ||
| @@ -2645,7 +2551,7 @@ static int cache_grow(struct kmem_cache *cachep, | |||
| 2645 | /* Get slab management. */ | 2551 | /* Get slab management. */ |
| 2646 | freelist = alloc_slabmgmt(cachep, page, offset, | 2552 | freelist = alloc_slabmgmt(cachep, page, offset, |
| 2647 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); | 2553 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); |
| 2648 | if (!freelist) | 2554 | if (OFF_SLAB(cachep) && !freelist) |
| 2649 | goto opps1; | 2555 | goto opps1; |
| 2650 | 2556 | ||
| 2651 | slab_map_pages(cachep, page, freelist); | 2557 | slab_map_pages(cachep, page, freelist); |
| @@ -2726,27 +2632,19 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2726 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2632 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
| 2727 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2633 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
| 2728 | } | 2634 | } |
| 2729 | if (cachep->flags & SLAB_STORE_USER) | 2635 | if (cachep->flags & SLAB_STORE_USER) { |
| 2636 | set_store_user_dirty(cachep); | ||
| 2730 | *dbg_userword(cachep, objp) = (void *)caller; | 2637 | *dbg_userword(cachep, objp) = (void *)caller; |
| 2638 | } | ||
| 2731 | 2639 | ||
| 2732 | objnr = obj_to_index(cachep, page, objp); | 2640 | objnr = obj_to_index(cachep, page, objp); |
| 2733 | 2641 | ||
| 2734 | BUG_ON(objnr >= cachep->num); | 2642 | BUG_ON(objnr >= cachep->num); |
| 2735 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); | 2643 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
| 2736 | 2644 | ||
| 2737 | set_obj_status(page, objnr, OBJECT_FREE); | ||
| 2738 | if (cachep->flags & SLAB_POISON) { | 2645 | if (cachep->flags & SLAB_POISON) { |
| 2739 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 2740 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | ||
| 2741 | store_stackinfo(cachep, objp, caller); | ||
| 2742 | kernel_map_pages(virt_to_page(objp), | ||
| 2743 | cachep->size / PAGE_SIZE, 0); | ||
| 2744 | } else { | ||
| 2745 | poison_obj(cachep, objp, POISON_FREE); | ||
| 2746 | } | ||
| 2747 | #else | ||
| 2748 | poison_obj(cachep, objp, POISON_FREE); | 2646 | poison_obj(cachep, objp, POISON_FREE); |
| 2749 | #endif | 2647 | slab_kernel_map(cachep, objp, 0, caller); |
| 2750 | } | 2648 | } |
| 2751 | return objp; | 2649 | return objp; |
| 2752 | } | 2650 | } |
| @@ -2756,7 +2654,85 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2756 | #define cache_free_debugcheck(x,objp,z) (objp) | 2654 | #define cache_free_debugcheck(x,objp,z) (objp) |
| 2757 | #endif | 2655 | #endif |
| 2758 | 2656 | ||
| 2759 | static struct page *get_first_slab(struct kmem_cache_node *n) | 2657 | static inline void fixup_objfreelist_debug(struct kmem_cache *cachep, |
| 2658 | void **list) | ||
| 2659 | { | ||
| 2660 | #if DEBUG | ||
| 2661 | void *next = *list; | ||
| 2662 | void *objp; | ||
| 2663 | |||
| 2664 | while (next) { | ||
| 2665 | objp = next - obj_offset(cachep); | ||
| 2666 | next = *(void **)next; | ||
| 2667 | poison_obj(cachep, objp, POISON_FREE); | ||
| 2668 | } | ||
| 2669 | #endif | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | static inline void fixup_slab_list(struct kmem_cache *cachep, | ||
| 2673 | struct kmem_cache_node *n, struct page *page, | ||
| 2674 | void **list) | ||
| 2675 | { | ||
| 2676 | /* move slabp to correct slabp list: */ | ||
| 2677 | list_del(&page->lru); | ||
| 2678 | if (page->active == cachep->num) { | ||
| 2679 | list_add(&page->lru, &n->slabs_full); | ||
| 2680 | if (OBJFREELIST_SLAB(cachep)) { | ||
| 2681 | #if DEBUG | ||
| 2682 | /* Poisoning will be done without holding the lock */ | ||
| 2683 | if (cachep->flags & SLAB_POISON) { | ||
| 2684 | void **objp = page->freelist; | ||
| 2685 | |||
| 2686 | *objp = *list; | ||
| 2687 | *list = objp; | ||
| 2688 | } | ||
| 2689 | #endif | ||
| 2690 | page->freelist = NULL; | ||
| 2691 | } | ||
| 2692 | } else | ||
| 2693 | list_add(&page->lru, &n->slabs_partial); | ||
| 2694 | } | ||
| 2695 | |||
| 2696 | /* Try to find non-pfmemalloc slab if needed */ | ||
| 2697 | static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, | ||
| 2698 | struct page *page, bool pfmemalloc) | ||
| 2699 | { | ||
| 2700 | if (!page) | ||
| 2701 | return NULL; | ||
| 2702 | |||
| 2703 | if (pfmemalloc) | ||
| 2704 | return page; | ||
| 2705 | |||
| 2706 | if (!PageSlabPfmemalloc(page)) | ||
| 2707 | return page; | ||
| 2708 | |||
| 2709 | /* No need to keep pfmemalloc slab if we have enough free objects */ | ||
| 2710 | if (n->free_objects > n->free_limit) { | ||
| 2711 | ClearPageSlabPfmemalloc(page); | ||
| 2712 | return page; | ||
| 2713 | } | ||
| 2714 | |||
| 2715 | /* Move pfmemalloc slab to the end of list to speed up next search */ | ||
| 2716 | list_del(&page->lru); | ||
| 2717 | if (!page->active) | ||
| 2718 | list_add_tail(&page->lru, &n->slabs_free); | ||
| 2719 | else | ||
| 2720 | list_add_tail(&page->lru, &n->slabs_partial); | ||
| 2721 | |||
| 2722 | list_for_each_entry(page, &n->slabs_partial, lru) { | ||
| 2723 | if (!PageSlabPfmemalloc(page)) | ||
| 2724 | return page; | ||
| 2725 | } | ||
| 2726 | |||
| 2727 | list_for_each_entry(page, &n->slabs_free, lru) { | ||
| 2728 | if (!PageSlabPfmemalloc(page)) | ||
| 2729 | return page; | ||
| 2730 | } | ||
| 2731 | |||
| 2732 | return NULL; | ||
| 2733 | } | ||
| 2734 | |||
| 2735 | static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) | ||
| 2760 | { | 2736 | { |
| 2761 | struct page *page; | 2737 | struct page *page; |
| 2762 | 2738 | ||
| @@ -2768,21 +2744,51 @@ static struct page *get_first_slab(struct kmem_cache_node *n) | |||
| 2768 | struct page, lru); | 2744 | struct page, lru); |
| 2769 | } | 2745 | } |
| 2770 | 2746 | ||
| 2747 | if (sk_memalloc_socks()) | ||
| 2748 | return get_valid_first_slab(n, page, pfmemalloc); | ||
| 2749 | |||
| 2771 | return page; | 2750 | return page; |
| 2772 | } | 2751 | } |
| 2773 | 2752 | ||
| 2774 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, | 2753 | static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, |
| 2775 | bool force_refill) | 2754 | struct kmem_cache_node *n, gfp_t flags) |
| 2755 | { | ||
| 2756 | struct page *page; | ||
| 2757 | void *obj; | ||
| 2758 | void *list = NULL; | ||
| 2759 | |||
| 2760 | if (!gfp_pfmemalloc_allowed(flags)) | ||
| 2761 | return NULL; | ||
| 2762 | |||
| 2763 | spin_lock(&n->list_lock); | ||
| 2764 | page = get_first_slab(n, true); | ||
| 2765 | if (!page) { | ||
| 2766 | spin_unlock(&n->list_lock); | ||
| 2767 | return NULL; | ||
| 2768 | } | ||
| 2769 | |||
| 2770 | obj = slab_get_obj(cachep, page); | ||
| 2771 | n->free_objects--; | ||
| 2772 | |||
| 2773 | fixup_slab_list(cachep, n, page, &list); | ||
| 2774 | |||
| 2775 | spin_unlock(&n->list_lock); | ||
| 2776 | fixup_objfreelist_debug(cachep, &list); | ||
| 2777 | |||
| 2778 | return obj; | ||
| 2779 | } | ||
| 2780 | |||
| 2781 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | ||
| 2776 | { | 2782 | { |
| 2777 | int batchcount; | 2783 | int batchcount; |
| 2778 | struct kmem_cache_node *n; | 2784 | struct kmem_cache_node *n; |
| 2779 | struct array_cache *ac; | 2785 | struct array_cache *ac; |
| 2780 | int node; | 2786 | int node; |
| 2787 | void *list = NULL; | ||
| 2781 | 2788 | ||
| 2782 | check_irq_off(); | 2789 | check_irq_off(); |
| 2783 | node = numa_mem_id(); | 2790 | node = numa_mem_id(); |
| 2784 | if (unlikely(force_refill)) | 2791 | |
| 2785 | goto force_grow; | ||
| 2786 | retry: | 2792 | retry: |
| 2787 | ac = cpu_cache_get(cachep); | 2793 | ac = cpu_cache_get(cachep); |
| 2788 | batchcount = ac->batchcount; | 2794 | batchcount = ac->batchcount; |
| @@ -2808,7 +2814,7 @@ retry: | |||
| 2808 | while (batchcount > 0) { | 2814 | while (batchcount > 0) { |
| 2809 | struct page *page; | 2815 | struct page *page; |
| 2810 | /* Get slab alloc is to come from. */ | 2816 | /* Get slab alloc is to come from. */ |
| 2811 | page = get_first_slab(n); | 2817 | page = get_first_slab(n, false); |
| 2812 | if (!page) | 2818 | if (!page) |
| 2813 | goto must_grow; | 2819 | goto must_grow; |
| 2814 | 2820 | ||
| @@ -2826,26 +2832,29 @@ retry: | |||
| 2826 | STATS_INC_ACTIVE(cachep); | 2832 | STATS_INC_ACTIVE(cachep); |
| 2827 | STATS_SET_HIGH(cachep); | 2833 | STATS_SET_HIGH(cachep); |
| 2828 | 2834 | ||
| 2829 | ac_put_obj(cachep, ac, slab_get_obj(cachep, page, | 2835 | ac->entry[ac->avail++] = slab_get_obj(cachep, page); |
| 2830 | node)); | ||
| 2831 | } | 2836 | } |
| 2832 | 2837 | ||
| 2833 | /* move slabp to correct slabp list: */ | 2838 | fixup_slab_list(cachep, n, page, &list); |
| 2834 | list_del(&page->lru); | ||
| 2835 | if (page->active == cachep->num) | ||
| 2836 | list_add(&page->lru, &n->slabs_full); | ||
| 2837 | else | ||
| 2838 | list_add(&page->lru, &n->slabs_partial); | ||
| 2839 | } | 2839 | } |
| 2840 | 2840 | ||
| 2841 | must_grow: | 2841 | must_grow: |
| 2842 | n->free_objects -= ac->avail; | 2842 | n->free_objects -= ac->avail; |
| 2843 | alloc_done: | 2843 | alloc_done: |
| 2844 | spin_unlock(&n->list_lock); | 2844 | spin_unlock(&n->list_lock); |
| 2845 | fixup_objfreelist_debug(cachep, &list); | ||
| 2845 | 2846 | ||
| 2846 | if (unlikely(!ac->avail)) { | 2847 | if (unlikely(!ac->avail)) { |
| 2847 | int x; | 2848 | int x; |
| 2848 | force_grow: | 2849 | |
| 2850 | /* Check if we can use obj in pfmemalloc slab */ | ||
| 2851 | if (sk_memalloc_socks()) { | ||
| 2852 | void *obj = cache_alloc_pfmemalloc(cachep, n, flags); | ||
| 2853 | |||
| 2854 | if (obj) | ||
| 2855 | return obj; | ||
| 2856 | } | ||
| 2857 | |||
| 2849 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); | 2858 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); |
| 2850 | 2859 | ||
| 2851 | /* cache_grow can reenable interrupts, then ac could change. */ | 2860 | /* cache_grow can reenable interrupts, then ac could change. */ |
| @@ -2853,7 +2862,7 @@ force_grow: | |||
| 2853 | node = numa_mem_id(); | 2862 | node = numa_mem_id(); |
| 2854 | 2863 | ||
| 2855 | /* no objects in sight? abort */ | 2864 | /* no objects in sight? abort */ |
| 2856 | if (!x && (ac->avail == 0 || force_refill)) | 2865 | if (!x && ac->avail == 0) |
| 2857 | return NULL; | 2866 | return NULL; |
| 2858 | 2867 | ||
| 2859 | if (!ac->avail) /* objects refilled by interrupt? */ | 2868 | if (!ac->avail) /* objects refilled by interrupt? */ |
| @@ -2861,7 +2870,7 @@ force_grow: | |||
| 2861 | } | 2870 | } |
| 2862 | ac->touched = 1; | 2871 | ac->touched = 1; |
| 2863 | 2872 | ||
| 2864 | return ac_get_obj(cachep, ac, flags, force_refill); | 2873 | return ac->entry[--ac->avail]; |
| 2865 | } | 2874 | } |
| 2866 | 2875 | ||
| 2867 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | 2876 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
| @@ -2877,20 +2886,11 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | |||
| 2877 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 2886 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
| 2878 | gfp_t flags, void *objp, unsigned long caller) | 2887 | gfp_t flags, void *objp, unsigned long caller) |
| 2879 | { | 2888 | { |
| 2880 | struct page *page; | ||
| 2881 | |||
| 2882 | if (!objp) | 2889 | if (!objp) |
| 2883 | return objp; | 2890 | return objp; |
| 2884 | if (cachep->flags & SLAB_POISON) { | 2891 | if (cachep->flags & SLAB_POISON) { |
| 2885 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 2886 | if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) | ||
| 2887 | kernel_map_pages(virt_to_page(objp), | ||
| 2888 | cachep->size / PAGE_SIZE, 1); | ||
| 2889 | else | ||
| 2890 | check_poison_obj(cachep, objp); | ||
| 2891 | #else | ||
| 2892 | check_poison_obj(cachep, objp); | 2892 | check_poison_obj(cachep, objp); |
| 2893 | #endif | 2893 | slab_kernel_map(cachep, objp, 1, 0); |
| 2894 | poison_obj(cachep, objp, POISON_INUSE); | 2894 | poison_obj(cachep, objp, POISON_INUSE); |
| 2895 | } | 2895 | } |
| 2896 | if (cachep->flags & SLAB_STORE_USER) | 2896 | if (cachep->flags & SLAB_STORE_USER) |
| @@ -2910,8 +2910,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
| 2910 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2910 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
| 2911 | } | 2911 | } |
| 2912 | 2912 | ||
| 2913 | page = virt_to_head_page(objp); | ||
| 2914 | set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); | ||
| 2915 | objp += obj_offset(cachep); | 2913 | objp += obj_offset(cachep); |
| 2916 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 2914 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
| 2917 | cachep->ctor(objp); | 2915 | cachep->ctor(objp); |
| @@ -2926,40 +2924,24 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
| 2926 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) | 2924 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) |
| 2927 | #endif | 2925 | #endif |
| 2928 | 2926 | ||
| 2929 | static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) | ||
| 2930 | { | ||
| 2931 | if (unlikely(cachep == kmem_cache)) | ||
| 2932 | return false; | ||
| 2933 | |||
| 2934 | return should_failslab(cachep->object_size, flags, cachep->flags); | ||
| 2935 | } | ||
| 2936 | |||
| 2937 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 2927 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
| 2938 | { | 2928 | { |
| 2939 | void *objp; | 2929 | void *objp; |
| 2940 | struct array_cache *ac; | 2930 | struct array_cache *ac; |
| 2941 | bool force_refill = false; | ||
| 2942 | 2931 | ||
| 2943 | check_irq_off(); | 2932 | check_irq_off(); |
| 2944 | 2933 | ||
| 2945 | ac = cpu_cache_get(cachep); | 2934 | ac = cpu_cache_get(cachep); |
| 2946 | if (likely(ac->avail)) { | 2935 | if (likely(ac->avail)) { |
| 2947 | ac->touched = 1; | 2936 | ac->touched = 1; |
| 2948 | objp = ac_get_obj(cachep, ac, flags, false); | 2937 | objp = ac->entry[--ac->avail]; |
| 2949 | 2938 | ||
| 2950 | /* | 2939 | STATS_INC_ALLOCHIT(cachep); |
| 2951 | * Allow for the possibility all avail objects are not allowed | 2940 | goto out; |
| 2952 | * by the current flags | ||
| 2953 | */ | ||
| 2954 | if (objp) { | ||
| 2955 | STATS_INC_ALLOCHIT(cachep); | ||
| 2956 | goto out; | ||
| 2957 | } | ||
| 2958 | force_refill = true; | ||
| 2959 | } | 2941 | } |
| 2960 | 2942 | ||
| 2961 | STATS_INC_ALLOCMISS(cachep); | 2943 | STATS_INC_ALLOCMISS(cachep); |
| 2962 | objp = cache_alloc_refill(cachep, flags, force_refill); | 2944 | objp = cache_alloc_refill(cachep, flags); |
| 2963 | /* | 2945 | /* |
| 2964 | * the 'ac' may be updated by cache_alloc_refill(), | 2946 | * the 'ac' may be updated by cache_alloc_refill(), |
| 2965 | * and kmemleak_erase() requires its correct value. | 2947 | * and kmemleak_erase() requires its correct value. |
| @@ -3097,6 +3079,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
| 3097 | struct page *page; | 3079 | struct page *page; |
| 3098 | struct kmem_cache_node *n; | 3080 | struct kmem_cache_node *n; |
| 3099 | void *obj; | 3081 | void *obj; |
| 3082 | void *list = NULL; | ||
| 3100 | int x; | 3083 | int x; |
| 3101 | 3084 | ||
| 3102 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); | 3085 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); |
| @@ -3106,7 +3089,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
| 3106 | retry: | 3089 | retry: |
| 3107 | check_irq_off(); | 3090 | check_irq_off(); |
| 3108 | spin_lock(&n->list_lock); | 3091 | spin_lock(&n->list_lock); |
| 3109 | page = get_first_slab(n); | 3092 | page = get_first_slab(n, false); |
| 3110 | if (!page) | 3093 | if (!page) |
| 3111 | goto must_grow; | 3094 | goto must_grow; |
| 3112 | 3095 | ||
| @@ -3118,17 +3101,13 @@ retry: | |||
| 3118 | 3101 | ||
| 3119 | BUG_ON(page->active == cachep->num); | 3102 | BUG_ON(page->active == cachep->num); |
| 3120 | 3103 | ||
| 3121 | obj = slab_get_obj(cachep, page, nodeid); | 3104 | obj = slab_get_obj(cachep, page); |
| 3122 | n->free_objects--; | 3105 | n->free_objects--; |
| 3123 | /* move slabp to correct slabp list: */ | ||
| 3124 | list_del(&page->lru); | ||
| 3125 | 3106 | ||
| 3126 | if (page->active == cachep->num) | 3107 | fixup_slab_list(cachep, n, page, &list); |
| 3127 | list_add(&page->lru, &n->slabs_full); | ||
| 3128 | else | ||
| 3129 | list_add(&page->lru, &n->slabs_partial); | ||
| 3130 | 3108 | ||
| 3131 | spin_unlock(&n->list_lock); | 3109 | spin_unlock(&n->list_lock); |
| 3110 | fixup_objfreelist_debug(cachep, &list); | ||
| 3132 | goto done; | 3111 | goto done; |
| 3133 | 3112 | ||
| 3134 | must_grow: | 3113 | must_grow: |
| @@ -3152,14 +3131,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
| 3152 | int slab_node = numa_mem_id(); | 3131 | int slab_node = numa_mem_id(); |
| 3153 | 3132 | ||
| 3154 | flags &= gfp_allowed_mask; | 3133 | flags &= gfp_allowed_mask; |
| 3155 | 3134 | cachep = slab_pre_alloc_hook(cachep, flags); | |
| 3156 | lockdep_trace_alloc(flags); | 3135 | if (unlikely(!cachep)) |
| 3157 | |||
| 3158 | if (slab_should_failslab(cachep, flags)) | ||
| 3159 | return NULL; | 3136 | return NULL; |
| 3160 | 3137 | ||
| 3161 | cachep = memcg_kmem_get_cache(cachep, flags); | ||
| 3162 | |||
| 3163 | cache_alloc_debugcheck_before(cachep, flags); | 3138 | cache_alloc_debugcheck_before(cachep, flags); |
| 3164 | local_irq_save(save_flags); | 3139 | local_irq_save(save_flags); |
| 3165 | 3140 | ||
| @@ -3188,16 +3163,11 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
| 3188 | out: | 3163 | out: |
| 3189 | local_irq_restore(save_flags); | 3164 | local_irq_restore(save_flags); |
| 3190 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3165 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
| 3191 | kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, | ||
| 3192 | flags); | ||
| 3193 | 3166 | ||
| 3194 | if (likely(ptr)) { | 3167 | if (unlikely(flags & __GFP_ZERO) && ptr) |
| 3195 | kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); | 3168 | memset(ptr, 0, cachep->object_size); |
| 3196 | if (unlikely(flags & __GFP_ZERO)) | ||
| 3197 | memset(ptr, 0, cachep->object_size); | ||
| 3198 | } | ||
| 3199 | 3169 | ||
| 3200 | memcg_kmem_put_cache(cachep); | 3170 | slab_post_alloc_hook(cachep, flags, 1, &ptr); |
| 3201 | return ptr; | 3171 | return ptr; |
| 3202 | } | 3172 | } |
| 3203 | 3173 | ||
| @@ -3240,30 +3210,21 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) | |||
| 3240 | void *objp; | 3210 | void *objp; |
| 3241 | 3211 | ||
| 3242 | flags &= gfp_allowed_mask; | 3212 | flags &= gfp_allowed_mask; |
| 3243 | 3213 | cachep = slab_pre_alloc_hook(cachep, flags); | |
| 3244 | lockdep_trace_alloc(flags); | 3214 | if (unlikely(!cachep)) |
| 3245 | |||
| 3246 | if (slab_should_failslab(cachep, flags)) | ||
| 3247 | return NULL; | 3215 | return NULL; |
| 3248 | 3216 | ||
| 3249 | cachep = memcg_kmem_get_cache(cachep, flags); | ||
| 3250 | |||
| 3251 | cache_alloc_debugcheck_before(cachep, flags); | 3217 | cache_alloc_debugcheck_before(cachep, flags); |
| 3252 | local_irq_save(save_flags); | 3218 | local_irq_save(save_flags); |
| 3253 | objp = __do_cache_alloc(cachep, flags); | 3219 | objp = __do_cache_alloc(cachep, flags); |
| 3254 | local_irq_restore(save_flags); | 3220 | local_irq_restore(save_flags); |
| 3255 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3221 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
| 3256 | kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, | ||
| 3257 | flags); | ||
| 3258 | prefetchw(objp); | 3222 | prefetchw(objp); |
| 3259 | 3223 | ||
| 3260 | if (likely(objp)) { | 3224 | if (unlikely(flags & __GFP_ZERO) && objp) |
| 3261 | kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); | 3225 | memset(objp, 0, cachep->object_size); |
| 3262 | if (unlikely(flags & __GFP_ZERO)) | ||
| 3263 | memset(objp, 0, cachep->object_size); | ||
| 3264 | } | ||
| 3265 | 3226 | ||
| 3266 | memcg_kmem_put_cache(cachep); | 3227 | slab_post_alloc_hook(cachep, flags, 1, &objp); |
| 3267 | return objp; | 3228 | return objp; |
| 3268 | } | 3229 | } |
| 3269 | 3230 | ||
| @@ -3281,13 +3242,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, | |||
| 3281 | void *objp; | 3242 | void *objp; |
| 3282 | struct page *page; | 3243 | struct page *page; |
| 3283 | 3244 | ||
| 3284 | clear_obj_pfmemalloc(&objpp[i]); | ||
| 3285 | objp = objpp[i]; | 3245 | objp = objpp[i]; |
| 3286 | 3246 | ||
| 3287 | page = virt_to_head_page(objp); | 3247 | page = virt_to_head_page(objp); |
| 3288 | list_del(&page->lru); | 3248 | list_del(&page->lru); |
| 3289 | check_spinlock_acquired_node(cachep, node); | 3249 | check_spinlock_acquired_node(cachep, node); |
| 3290 | slab_put_obj(cachep, page, objp, node); | 3250 | slab_put_obj(cachep, page, objp); |
| 3291 | STATS_DEC_ACTIVE(cachep); | 3251 | STATS_DEC_ACTIVE(cachep); |
| 3292 | n->free_objects++; | 3252 | n->free_objects++; |
| 3293 | 3253 | ||
| @@ -3317,9 +3277,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
| 3317 | LIST_HEAD(list); | 3277 | LIST_HEAD(list); |
| 3318 | 3278 | ||
| 3319 | batchcount = ac->batchcount; | 3279 | batchcount = ac->batchcount; |
| 3320 | #if DEBUG | 3280 | |
| 3321 | BUG_ON(!batchcount || batchcount > ac->avail); | ||
| 3322 | #endif | ||
| 3323 | check_irq_off(); | 3281 | check_irq_off(); |
| 3324 | n = get_node(cachep, node); | 3282 | n = get_node(cachep, node); |
| 3325 | spin_lock(&n->list_lock); | 3283 | spin_lock(&n->list_lock); |
| @@ -3389,7 +3347,16 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, | |||
| 3389 | cache_flusharray(cachep, ac); | 3347 | cache_flusharray(cachep, ac); |
| 3390 | } | 3348 | } |
| 3391 | 3349 | ||
| 3392 | ac_put_obj(cachep, ac, objp); | 3350 | if (sk_memalloc_socks()) { |
| 3351 | struct page *page = virt_to_head_page(objp); | ||
| 3352 | |||
| 3353 | if (unlikely(PageSlabPfmemalloc(page))) { | ||
| 3354 | cache_free_pfmemalloc(cachep, page, objp); | ||
| 3355 | return; | ||
| 3356 | } | ||
| 3357 | } | ||
| 3358 | |||
| 3359 | ac->entry[ac->avail++] = objp; | ||
| 3393 | } | 3360 | } |
| 3394 | 3361 | ||
| 3395 | /** | 3362 | /** |
| @@ -3411,16 +3378,53 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 3411 | } | 3378 | } |
| 3412 | EXPORT_SYMBOL(kmem_cache_alloc); | 3379 | EXPORT_SYMBOL(kmem_cache_alloc); |
| 3413 | 3380 | ||
| 3414 | void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) | 3381 | static __always_inline void |
| 3382 | cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags, | ||
| 3383 | size_t size, void **p, unsigned long caller) | ||
| 3415 | { | 3384 | { |
| 3416 | __kmem_cache_free_bulk(s, size, p); | 3385 | size_t i; |
| 3386 | |||
| 3387 | for (i = 0; i < size; i++) | ||
| 3388 | p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller); | ||
| 3417 | } | 3389 | } |
| 3418 | EXPORT_SYMBOL(kmem_cache_free_bulk); | ||
| 3419 | 3390 | ||
| 3420 | int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | 3391 | int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
| 3421 | void **p) | 3392 | void **p) |
| 3422 | { | 3393 | { |
| 3423 | return __kmem_cache_alloc_bulk(s, flags, size, p); | 3394 | size_t i; |
| 3395 | |||
| 3396 | s = slab_pre_alloc_hook(s, flags); | ||
| 3397 | if (!s) | ||
| 3398 | return 0; | ||
| 3399 | |||
| 3400 | cache_alloc_debugcheck_before(s, flags); | ||
| 3401 | |||
| 3402 | local_irq_disable(); | ||
| 3403 | for (i = 0; i < size; i++) { | ||
| 3404 | void *objp = __do_cache_alloc(s, flags); | ||
| 3405 | |||
| 3406 | if (unlikely(!objp)) | ||
| 3407 | goto error; | ||
| 3408 | p[i] = objp; | ||
| 3409 | } | ||
| 3410 | local_irq_enable(); | ||
| 3411 | |||
| 3412 | cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); | ||
| 3413 | |||
| 3414 | /* Clear memory outside IRQ disabled section */ | ||
| 3415 | if (unlikely(flags & __GFP_ZERO)) | ||
| 3416 | for (i = 0; i < size; i++) | ||
| 3417 | memset(p[i], 0, s->object_size); | ||
| 3418 | |||
| 3419 | slab_post_alloc_hook(s, flags, size, p); | ||
| 3420 | /* FIXME: Trace call missing. Christoph would like a bulk variant */ | ||
| 3421 | return size; | ||
| 3422 | error: | ||
| 3423 | local_irq_enable(); | ||
| 3424 | cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); | ||
| 3425 | slab_post_alloc_hook(s, flags, i, p); | ||
| 3426 | __kmem_cache_free_bulk(s, i, p); | ||
| 3427 | return 0; | ||
| 3424 | } | 3428 | } |
| 3425 | EXPORT_SYMBOL(kmem_cache_alloc_bulk); | 3429 | EXPORT_SYMBOL(kmem_cache_alloc_bulk); |
| 3426 | 3430 | ||
| @@ -3567,6 +3571,32 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) | |||
| 3567 | } | 3571 | } |
| 3568 | EXPORT_SYMBOL(kmem_cache_free); | 3572 | EXPORT_SYMBOL(kmem_cache_free); |
| 3569 | 3573 | ||
| 3574 | void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) | ||
| 3575 | { | ||
| 3576 | struct kmem_cache *s; | ||
| 3577 | size_t i; | ||
| 3578 | |||
| 3579 | local_irq_disable(); | ||
| 3580 | for (i = 0; i < size; i++) { | ||
| 3581 | void *objp = p[i]; | ||
| 3582 | |||
| 3583 | if (!orig_s) /* called via kfree_bulk */ | ||
| 3584 | s = virt_to_cache(objp); | ||
| 3585 | else | ||
| 3586 | s = cache_from_obj(orig_s, objp); | ||
| 3587 | |||
| 3588 | debug_check_no_locks_freed(objp, s->object_size); | ||
| 3589 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | ||
| 3590 | debug_check_no_obj_freed(objp, s->object_size); | ||
| 3591 | |||
| 3592 | __cache_free(s, objp, _RET_IP_); | ||
| 3593 | } | ||
| 3594 | local_irq_enable(); | ||
| 3595 | |||
| 3596 | /* FIXME: add tracing */ | ||
| 3597 | } | ||
| 3598 | EXPORT_SYMBOL(kmem_cache_free_bulk); | ||
| 3599 | |||
| 3570 | /** | 3600 | /** |
| 3571 | * kfree - free previously allocated memory | 3601 | * kfree - free previously allocated memory |
| 3572 | * @objp: pointer returned by kmalloc. | 3602 | * @objp: pointer returned by kmalloc. |
| @@ -4102,15 +4132,34 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, | |||
| 4102 | struct page *page) | 4132 | struct page *page) |
| 4103 | { | 4133 | { |
| 4104 | void *p; | 4134 | void *p; |
| 4105 | int i; | 4135 | int i, j; |
| 4136 | unsigned long v; | ||
| 4106 | 4137 | ||
| 4107 | if (n[0] == n[1]) | 4138 | if (n[0] == n[1]) |
| 4108 | return; | 4139 | return; |
| 4109 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { | 4140 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
| 4110 | if (get_obj_status(page, i) != OBJECT_ACTIVE) | 4141 | bool active = true; |
| 4142 | |||
| 4143 | for (j = page->active; j < c->num; j++) { | ||
| 4144 | if (get_free_obj(page, j) == i) { | ||
| 4145 | active = false; | ||
| 4146 | break; | ||
| 4147 | } | ||
| 4148 | } | ||
| 4149 | |||
| 4150 | if (!active) | ||
| 4151 | continue; | ||
| 4152 | |||
| 4153 | /* | ||
| 4154 | * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table | ||
| 4155 | * mapping is established when actual object allocation and | ||
| 4156 | * we could mistakenly access the unmapped object in the cpu | ||
| 4157 | * cache. | ||
| 4158 | */ | ||
| 4159 | if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v))) | ||
| 4111 | continue; | 4160 | continue; |
| 4112 | 4161 | ||
| 4113 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4162 | if (!add_caller(n, v)) |
| 4114 | return; | 4163 | return; |
| 4115 | } | 4164 | } |
| 4116 | } | 4165 | } |
| @@ -4146,21 +4195,31 @@ static int leaks_show(struct seq_file *m, void *p) | |||
| 4146 | if (!(cachep->flags & SLAB_RED_ZONE)) | 4195 | if (!(cachep->flags & SLAB_RED_ZONE)) |
| 4147 | return 0; | 4196 | return 0; |
| 4148 | 4197 | ||
| 4149 | /* OK, we can do it */ | 4198 | /* |
| 4199 | * Set store_user_clean and start to grab stored user information | ||
| 4200 | * for all objects on this cache. If some alloc/free requests comes | ||
| 4201 | * during the processing, information would be wrong so restart | ||
| 4202 | * whole processing. | ||
| 4203 | */ | ||
| 4204 | do { | ||
| 4205 | set_store_user_clean(cachep); | ||
| 4206 | drain_cpu_caches(cachep); | ||
| 4150 | 4207 | ||
| 4151 | x[1] = 0; | 4208 | x[1] = 0; |
| 4152 | 4209 | ||
| 4153 | for_each_kmem_cache_node(cachep, node, n) { | 4210 | for_each_kmem_cache_node(cachep, node, n) { |
| 4154 | 4211 | ||
| 4155 | check_irq_on(); | 4212 | check_irq_on(); |
| 4156 | spin_lock_irq(&n->list_lock); | 4213 | spin_lock_irq(&n->list_lock); |
| 4214 | |||
| 4215 | list_for_each_entry(page, &n->slabs_full, lru) | ||
| 4216 | handle_slab(x, cachep, page); | ||
| 4217 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
| 4218 | handle_slab(x, cachep, page); | ||
| 4219 | spin_unlock_irq(&n->list_lock); | ||
| 4220 | } | ||
| 4221 | } while (!is_store_user_clean(cachep)); | ||
| 4157 | 4222 | ||
| 4158 | list_for_each_entry(page, &n->slabs_full, lru) | ||
| 4159 | handle_slab(x, cachep, page); | ||
| 4160 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
| 4161 | handle_slab(x, cachep, page); | ||
| 4162 | spin_unlock_irq(&n->list_lock); | ||
| 4163 | } | ||
| 4164 | name = cachep->name; | 4223 | name = cachep->name; |
| 4165 | if (x[0] == x[1]) { | 4224 | if (x[0] == x[1]) { |
| 4166 | /* Increase the buffer size */ | 4225 | /* Increase the buffer size */ |
| @@ -38,6 +38,10 @@ struct kmem_cache { | |||
| 38 | #endif | 38 | #endif |
| 39 | 39 | ||
| 40 | #include <linux/memcontrol.h> | 40 | #include <linux/memcontrol.h> |
| 41 | #include <linux/fault-inject.h> | ||
| 42 | #include <linux/kmemcheck.h> | ||
| 43 | #include <linux/kasan.h> | ||
| 44 | #include <linux/kmemleak.h> | ||
| 41 | 45 | ||
| 42 | /* | 46 | /* |
| 43 | * State of the slab allocator. | 47 | * State of the slab allocator. |
| @@ -121,7 +125,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
| 121 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) | 125 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) |
| 122 | #elif defined(CONFIG_SLUB_DEBUG) | 126 | #elif defined(CONFIG_SLUB_DEBUG) |
| 123 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | 127 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
| 124 | SLAB_TRACE | SLAB_DEBUG_FREE) | 128 | SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) |
| 125 | #else | 129 | #else |
| 126 | #define SLAB_DEBUG_FLAGS (0) | 130 | #define SLAB_DEBUG_FLAGS (0) |
| 127 | #endif | 131 | #endif |
| @@ -168,7 +172,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
| 168 | /* | 172 | /* |
| 169 | * Generic implementation of bulk operations | 173 | * Generic implementation of bulk operations |
| 170 | * These are useful for situations in which the allocator cannot | 174 | * These are useful for situations in which the allocator cannot |
| 171 | * perform optimizations. In that case segments of the objecct listed | 175 | * perform optimizations. In that case segments of the object listed |
| 172 | * may be allocated or freed using these operations. | 176 | * may be allocated or freed using these operations. |
| 173 | */ | 177 | */ |
| 174 | void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); | 178 | void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); |
| @@ -307,7 +311,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | |||
| 307 | * to not do even the assignment. In that case, slab_equal_or_root | 311 | * to not do even the assignment. In that case, slab_equal_or_root |
| 308 | * will also be a constant. | 312 | * will also be a constant. |
| 309 | */ | 313 | */ |
| 310 | if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE)) | 314 | if (!memcg_kmem_enabled() && |
| 315 | !unlikely(s->flags & SLAB_CONSISTENCY_CHECKS)) | ||
| 311 | return s; | 316 | return s; |
| 312 | 317 | ||
| 313 | page = virt_to_head_page(x); | 318 | page = virt_to_head_page(x); |
| @@ -321,6 +326,64 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | |||
| 321 | return s; | 326 | return s; |
| 322 | } | 327 | } |
| 323 | 328 | ||
| 329 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
| 330 | { | ||
| 331 | #ifndef CONFIG_SLUB | ||
| 332 | return s->object_size; | ||
| 333 | |||
| 334 | #else /* CONFIG_SLUB */ | ||
| 335 | # ifdef CONFIG_SLUB_DEBUG | ||
| 336 | /* | ||
| 337 | * Debugging requires use of the padding between object | ||
| 338 | * and whatever may come after it. | ||
| 339 | */ | ||
| 340 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
| 341 | return s->object_size; | ||
| 342 | # endif | ||
| 343 | /* | ||
| 344 | * If we have the need to store the freelist pointer | ||
| 345 | * back there or track user information then we can | ||
| 346 | * only use the space before that information. | ||
| 347 | */ | ||
| 348 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
| 349 | return s->inuse; | ||
| 350 | /* | ||
| 351 | * Else we can use all the padding etc for the allocation | ||
| 352 | */ | ||
| 353 | return s->size; | ||
| 354 | #endif | ||
| 355 | } | ||
| 356 | |||
| 357 | static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | ||
| 358 | gfp_t flags) | ||
| 359 | { | ||
| 360 | flags &= gfp_allowed_mask; | ||
| 361 | lockdep_trace_alloc(flags); | ||
| 362 | might_sleep_if(gfpflags_allow_blocking(flags)); | ||
| 363 | |||
| 364 | if (should_failslab(s, flags)) | ||
| 365 | return NULL; | ||
| 366 | |||
| 367 | return memcg_kmem_get_cache(s, flags); | ||
| 368 | } | ||
| 369 | |||
| 370 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | ||
| 371 | size_t size, void **p) | ||
| 372 | { | ||
| 373 | size_t i; | ||
| 374 | |||
| 375 | flags &= gfp_allowed_mask; | ||
| 376 | for (i = 0; i < size; i++) { | ||
| 377 | void *object = p[i]; | ||
| 378 | |||
| 379 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
| 380 | kmemleak_alloc_recursive(object, s->object_size, 1, | ||
| 381 | s->flags, flags); | ||
| 382 | kasan_slab_alloc(s, object); | ||
| 383 | } | ||
| 384 | memcg_kmem_put_cache(s); | ||
| 385 | } | ||
| 386 | |||
| 324 | #ifndef CONFIG_SLOB | 387 | #ifndef CONFIG_SLOB |
| 325 | /* | 388 | /* |
| 326 | * The slab lists for all objects. | 389 | * The slab lists for all objects. |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 065b7bdabdc3..6afb2263a5c5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
| @@ -109,8 +109,12 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) | |||
| 109 | { | 109 | { |
| 110 | size_t i; | 110 | size_t i; |
| 111 | 111 | ||
| 112 | for (i = 0; i < nr; i++) | 112 | for (i = 0; i < nr; i++) { |
| 113 | kmem_cache_free(s, p[i]); | 113 | if (s) |
| 114 | kmem_cache_free(s, p[i]); | ||
| 115 | else | ||
| 116 | kfree(p[i]); | ||
| 117 | } | ||
| 114 | } | 118 | } |
| 115 | 119 | ||
| 116 | int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, | 120 | int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, |
| @@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
| 124 | #endif | 124 | #endif |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | static inline void *fixup_red_left(struct kmem_cache *s, void *p) | ||
| 128 | { | ||
| 129 | if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) | ||
| 130 | p += s->red_left_pad; | ||
| 131 | |||
| 132 | return p; | ||
| 133 | } | ||
| 134 | |||
| 127 | static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | 135 | static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) |
| 128 | { | 136 | { |
| 129 | #ifdef CONFIG_SLUB_CPU_PARTIAL | 137 | #ifdef CONFIG_SLUB_CPU_PARTIAL |
| @@ -160,10 +168,18 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | |||
| 160 | */ | 168 | */ |
| 161 | #define MAX_PARTIAL 10 | 169 | #define MAX_PARTIAL 10 |
| 162 | 170 | ||
| 163 | #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ | 171 | #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \ |
| 164 | SLAB_POISON | SLAB_STORE_USER) | 172 | SLAB_POISON | SLAB_STORE_USER) |
| 165 | 173 | ||
| 166 | /* | 174 | /* |
| 175 | * These debug flags cannot use CMPXCHG because there might be consistency | ||
| 176 | * issues when checking or reading debug information | ||
| 177 | */ | ||
| 178 | #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \ | ||
| 179 | SLAB_TRACE) | ||
| 180 | |||
| 181 | |||
| 182 | /* | ||
| 167 | * Debugging flags that require metadata to be stored in the slab. These get | 183 | * Debugging flags that require metadata to be stored in the slab. These get |
| 168 | * disabled when slub_debug=O is used and a cache's min order increases with | 184 | * disabled when slub_debug=O is used and a cache's min order increases with |
| 169 | * metadata. | 185 | * metadata. |
| @@ -224,24 +240,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) | |||
| 224 | * Core slab cache functions | 240 | * Core slab cache functions |
| 225 | *******************************************************************/ | 241 | *******************************************************************/ |
| 226 | 242 | ||
| 227 | /* Verify that a pointer has an address that is valid within a slab page */ | ||
| 228 | static inline int check_valid_pointer(struct kmem_cache *s, | ||
| 229 | struct page *page, const void *object) | ||
| 230 | { | ||
| 231 | void *base; | ||
| 232 | |||
| 233 | if (!object) | ||
| 234 | return 1; | ||
| 235 | |||
| 236 | base = page_address(page); | ||
| 237 | if (object < base || object >= base + page->objects * s->size || | ||
| 238 | (object - base) % s->size) { | ||
| 239 | return 0; | ||
| 240 | } | ||
| 241 | |||
| 242 | return 1; | ||
| 243 | } | ||
| 244 | |||
| 245 | static inline void *get_freepointer(struct kmem_cache *s, void *object) | 243 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
| 246 | { | 244 | { |
| 247 | return *(void **)(object + s->offset); | 245 | return *(void **)(object + s->offset); |
| @@ -271,12 +269,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
| 271 | 269 | ||
| 272 | /* Loop over all objects in a slab */ | 270 | /* Loop over all objects in a slab */ |
| 273 | #define for_each_object(__p, __s, __addr, __objects) \ | 271 | #define for_each_object(__p, __s, __addr, __objects) \ |
| 274 | for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ | 272 | for (__p = fixup_red_left(__s, __addr); \ |
| 275 | __p += (__s)->size) | 273 | __p < (__addr) + (__objects) * (__s)->size; \ |
| 274 | __p += (__s)->size) | ||
| 276 | 275 | ||
| 277 | #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ | 276 | #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ |
| 278 | for (__p = (__addr), __idx = 1; __idx <= __objects;\ | 277 | for (__p = fixup_red_left(__s, __addr), __idx = 1; \ |
| 279 | __p += (__s)->size, __idx++) | 278 | __idx <= __objects; \ |
| 279 | __p += (__s)->size, __idx++) | ||
| 280 | 280 | ||
| 281 | /* Determine object index from a given position */ | 281 | /* Determine object index from a given position */ |
| 282 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 282 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
| @@ -284,30 +284,6 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | |||
| 284 | return (p - addr) / s->size; | 284 | return (p - addr) / s->size; |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
| 288 | { | ||
| 289 | #ifdef CONFIG_SLUB_DEBUG | ||
| 290 | /* | ||
| 291 | * Debugging requires use of the padding between object | ||
| 292 | * and whatever may come after it. | ||
| 293 | */ | ||
| 294 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
| 295 | return s->object_size; | ||
| 296 | |||
| 297 | #endif | ||
| 298 | /* | ||
| 299 | * If we have the need to store the freelist pointer | ||
| 300 | * back there or track user information then we can | ||
| 301 | * only use the space before that information. | ||
| 302 | */ | ||
| 303 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
| 304 | return s->inuse; | ||
| 305 | /* | ||
| 306 | * Else we can use all the padding etc for the allocation | ||
| 307 | */ | ||
| 308 | return s->size; | ||
| 309 | } | ||
| 310 | |||
| 311 | static inline int order_objects(int order, unsigned long size, int reserved) | 287 | static inline int order_objects(int order, unsigned long size, int reserved) |
| 312 | { | 288 | { |
| 313 | return ((PAGE_SIZE << order) - reserved) / size; | 289 | return ((PAGE_SIZE << order) - reserved) / size; |
| @@ -458,6 +434,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) | |||
| 458 | set_bit(slab_index(p, s, addr), map); | 434 | set_bit(slab_index(p, s, addr), map); |
| 459 | } | 435 | } |
| 460 | 436 | ||
| 437 | static inline int size_from_object(struct kmem_cache *s) | ||
| 438 | { | ||
| 439 | if (s->flags & SLAB_RED_ZONE) | ||
| 440 | return s->size - s->red_left_pad; | ||
| 441 | |||
| 442 | return s->size; | ||
| 443 | } | ||
| 444 | |||
| 445 | static inline void *restore_red_left(struct kmem_cache *s, void *p) | ||
| 446 | { | ||
| 447 | if (s->flags & SLAB_RED_ZONE) | ||
| 448 | p -= s->red_left_pad; | ||
| 449 | |||
| 450 | return p; | ||
| 451 | } | ||
| 452 | |||
| 461 | /* | 453 | /* |
| 462 | * Debug settings: | 454 | * Debug settings: |
| 463 | */ | 455 | */ |
| @@ -491,6 +483,26 @@ static inline void metadata_access_disable(void) | |||
| 491 | /* | 483 | /* |
| 492 | * Object debugging | 484 | * Object debugging |
| 493 | */ | 485 | */ |
| 486 | |||
| 487 | /* Verify that a pointer has an address that is valid within a slab page */ | ||
| 488 | static inline int check_valid_pointer(struct kmem_cache *s, | ||
| 489 | struct page *page, void *object) | ||
| 490 | { | ||
| 491 | void *base; | ||
| 492 | |||
| 493 | if (!object) | ||
| 494 | return 1; | ||
| 495 | |||
| 496 | base = page_address(page); | ||
| 497 | object = restore_red_left(s, object); | ||
| 498 | if (object < base || object >= base + page->objects * s->size || | ||
| 499 | (object - base) % s->size) { | ||
| 500 | return 0; | ||
| 501 | } | ||
| 502 | |||
| 503 | return 1; | ||
| 504 | } | ||
| 505 | |||
| 494 | static void print_section(char *text, u8 *addr, unsigned int length) | 506 | static void print_section(char *text, u8 *addr, unsigned int length) |
| 495 | { | 507 | { |
| 496 | metadata_access_enable(); | 508 | metadata_access_enable(); |
| @@ -630,7 +642,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
| 630 | pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", | 642 | pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", |
| 631 | p, p - addr, get_freepointer(s, p)); | 643 | p, p - addr, get_freepointer(s, p)); |
| 632 | 644 | ||
| 633 | if (p > addr + 16) | 645 | if (s->flags & SLAB_RED_ZONE) |
| 646 | print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); | ||
| 647 | else if (p > addr + 16) | ||
| 634 | print_section("Bytes b4 ", p - 16, 16); | 648 | print_section("Bytes b4 ", p - 16, 16); |
| 635 | 649 | ||
| 636 | print_section("Object ", p, min_t(unsigned long, s->object_size, | 650 | print_section("Object ", p, min_t(unsigned long, s->object_size, |
| @@ -647,9 +661,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
| 647 | if (s->flags & SLAB_STORE_USER) | 661 | if (s->flags & SLAB_STORE_USER) |
| 648 | off += 2 * sizeof(struct track); | 662 | off += 2 * sizeof(struct track); |
| 649 | 663 | ||
| 650 | if (off != s->size) | 664 | if (off != size_from_object(s)) |
| 651 | /* Beginning of the filler is the free pointer */ | 665 | /* Beginning of the filler is the free pointer */ |
| 652 | print_section("Padding ", p + off, s->size - off); | 666 | print_section("Padding ", p + off, size_from_object(s) - off); |
| 653 | 667 | ||
| 654 | dump_stack(); | 668 | dump_stack(); |
| 655 | } | 669 | } |
| @@ -679,6 +693,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) | |||
| 679 | { | 693 | { |
| 680 | u8 *p = object; | 694 | u8 *p = object; |
| 681 | 695 | ||
| 696 | if (s->flags & SLAB_RED_ZONE) | ||
| 697 | memset(p - s->red_left_pad, val, s->red_left_pad); | ||
| 698 | |||
| 682 | if (s->flags & __OBJECT_POISON) { | 699 | if (s->flags & __OBJECT_POISON) { |
| 683 | memset(p, POISON_FREE, s->object_size - 1); | 700 | memset(p, POISON_FREE, s->object_size - 1); |
| 684 | p[s->object_size - 1] = POISON_END; | 701 | p[s->object_size - 1] = POISON_END; |
| @@ -771,11 +788,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) | |||
| 771 | /* We also have user information there */ | 788 | /* We also have user information there */ |
| 772 | off += 2 * sizeof(struct track); | 789 | off += 2 * sizeof(struct track); |
| 773 | 790 | ||
| 774 | if (s->size == off) | 791 | if (size_from_object(s) == off) |
| 775 | return 1; | 792 | return 1; |
| 776 | 793 | ||
| 777 | return check_bytes_and_report(s, page, p, "Object padding", | 794 | return check_bytes_and_report(s, page, p, "Object padding", |
| 778 | p + off, POISON_INUSE, s->size - off); | 795 | p + off, POISON_INUSE, size_from_object(s) - off); |
| 779 | } | 796 | } |
| 780 | 797 | ||
| 781 | /* Check the pad bytes at the end of a slab page */ | 798 | /* Check the pad bytes at the end of a slab page */ |
| @@ -820,6 +837,10 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
| 820 | 837 | ||
| 821 | if (s->flags & SLAB_RED_ZONE) { | 838 | if (s->flags & SLAB_RED_ZONE) { |
| 822 | if (!check_bytes_and_report(s, page, object, "Redzone", | 839 | if (!check_bytes_and_report(s, page, object, "Redzone", |
| 840 | object - s->red_left_pad, val, s->red_left_pad)) | ||
| 841 | return 0; | ||
| 842 | |||
| 843 | if (!check_bytes_and_report(s, page, object, "Redzone", | ||
| 823 | endobject, val, s->inuse - s->object_size)) | 844 | endobject, val, s->inuse - s->object_size)) |
| 824 | return 0; | 845 | return 0; |
| 825 | } else { | 846 | } else { |
| @@ -1031,20 +1052,32 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, | |||
| 1031 | init_tracking(s, object); | 1052 | init_tracking(s, object); |
| 1032 | } | 1053 | } |
| 1033 | 1054 | ||
| 1034 | static noinline int alloc_debug_processing(struct kmem_cache *s, | 1055 | static inline int alloc_consistency_checks(struct kmem_cache *s, |
| 1035 | struct page *page, | 1056 | struct page *page, |
| 1036 | void *object, unsigned long addr) | 1057 | void *object, unsigned long addr) |
| 1037 | { | 1058 | { |
| 1038 | if (!check_slab(s, page)) | 1059 | if (!check_slab(s, page)) |
| 1039 | goto bad; | 1060 | return 0; |
| 1040 | 1061 | ||
| 1041 | if (!check_valid_pointer(s, page, object)) { | 1062 | if (!check_valid_pointer(s, page, object)) { |
| 1042 | object_err(s, page, object, "Freelist Pointer check fails"); | 1063 | object_err(s, page, object, "Freelist Pointer check fails"); |
| 1043 | goto bad; | 1064 | return 0; |
| 1044 | } | 1065 | } |
| 1045 | 1066 | ||
| 1046 | if (!check_object(s, page, object, SLUB_RED_INACTIVE)) | 1067 | if (!check_object(s, page, object, SLUB_RED_INACTIVE)) |
| 1047 | goto bad; | 1068 | return 0; |
| 1069 | |||
| 1070 | return 1; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | static noinline int alloc_debug_processing(struct kmem_cache *s, | ||
| 1074 | struct page *page, | ||
| 1075 | void *object, unsigned long addr) | ||
| 1076 | { | ||
| 1077 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
| 1078 | if (!alloc_consistency_checks(s, page, object, addr)) | ||
| 1079 | goto bad; | ||
| 1080 | } | ||
| 1048 | 1081 | ||
| 1049 | /* Success perform special debug activities for allocs */ | 1082 | /* Success perform special debug activities for allocs */ |
| 1050 | if (s->flags & SLAB_STORE_USER) | 1083 | if (s->flags & SLAB_STORE_USER) |
| @@ -1067,37 +1100,21 @@ bad: | |||
| 1067 | return 0; | 1100 | return 0; |
| 1068 | } | 1101 | } |
| 1069 | 1102 | ||
| 1070 | /* Supports checking bulk free of a constructed freelist */ | 1103 | static inline int free_consistency_checks(struct kmem_cache *s, |
| 1071 | static noinline struct kmem_cache_node *free_debug_processing( | 1104 | struct page *page, void *object, unsigned long addr) |
| 1072 | struct kmem_cache *s, struct page *page, | ||
| 1073 | void *head, void *tail, int bulk_cnt, | ||
| 1074 | unsigned long addr, unsigned long *flags) | ||
| 1075 | { | 1105 | { |
| 1076 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
| 1077 | void *object = head; | ||
| 1078 | int cnt = 0; | ||
| 1079 | |||
| 1080 | spin_lock_irqsave(&n->list_lock, *flags); | ||
| 1081 | slab_lock(page); | ||
| 1082 | |||
| 1083 | if (!check_slab(s, page)) | ||
| 1084 | goto fail; | ||
| 1085 | |||
| 1086 | next_object: | ||
| 1087 | cnt++; | ||
| 1088 | |||
| 1089 | if (!check_valid_pointer(s, page, object)) { | 1106 | if (!check_valid_pointer(s, page, object)) { |
| 1090 | slab_err(s, page, "Invalid object pointer 0x%p", object); | 1107 | slab_err(s, page, "Invalid object pointer 0x%p", object); |
| 1091 | goto fail; | 1108 | return 0; |
| 1092 | } | 1109 | } |
| 1093 | 1110 | ||
| 1094 | if (on_freelist(s, page, object)) { | 1111 | if (on_freelist(s, page, object)) { |
| 1095 | object_err(s, page, object, "Object already free"); | 1112 | object_err(s, page, object, "Object already free"); |
| 1096 | goto fail; | 1113 | return 0; |
| 1097 | } | 1114 | } |
| 1098 | 1115 | ||
| 1099 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) | 1116 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) |
| 1100 | goto out; | 1117 | return 0; |
| 1101 | 1118 | ||
| 1102 | if (unlikely(s != page->slab_cache)) { | 1119 | if (unlikely(s != page->slab_cache)) { |
| 1103 | if (!PageSlab(page)) { | 1120 | if (!PageSlab(page)) { |
| @@ -1110,7 +1127,37 @@ next_object: | |||
| 1110 | } else | 1127 | } else |
| 1111 | object_err(s, page, object, | 1128 | object_err(s, page, object, |
| 1112 | "page slab pointer corrupt."); | 1129 | "page slab pointer corrupt."); |
| 1113 | goto fail; | 1130 | return 0; |
| 1131 | } | ||
| 1132 | return 1; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | /* Supports checking bulk free of a constructed freelist */ | ||
| 1136 | static noinline int free_debug_processing( | ||
| 1137 | struct kmem_cache *s, struct page *page, | ||
| 1138 | void *head, void *tail, int bulk_cnt, | ||
| 1139 | unsigned long addr) | ||
| 1140 | { | ||
| 1141 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
| 1142 | void *object = head; | ||
| 1143 | int cnt = 0; | ||
| 1144 | unsigned long uninitialized_var(flags); | ||
| 1145 | int ret = 0; | ||
| 1146 | |||
| 1147 | spin_lock_irqsave(&n->list_lock, flags); | ||
| 1148 | slab_lock(page); | ||
| 1149 | |||
| 1150 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
| 1151 | if (!check_slab(s, page)) | ||
| 1152 | goto out; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | next_object: | ||
| 1156 | cnt++; | ||
| 1157 | |||
| 1158 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
| 1159 | if (!free_consistency_checks(s, page, object, addr)) | ||
| 1160 | goto out; | ||
| 1114 | } | 1161 | } |
| 1115 | 1162 | ||
| 1116 | if (s->flags & SLAB_STORE_USER) | 1163 | if (s->flags & SLAB_STORE_USER) |
| @@ -1124,23 +1171,18 @@ next_object: | |||
| 1124 | object = get_freepointer(s, object); | 1171 | object = get_freepointer(s, object); |
| 1125 | goto next_object; | 1172 | goto next_object; |
| 1126 | } | 1173 | } |
| 1174 | ret = 1; | ||
| 1175 | |||
| 1127 | out: | 1176 | out: |
| 1128 | if (cnt != bulk_cnt) | 1177 | if (cnt != bulk_cnt) |
| 1129 | slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", | 1178 | slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", |
| 1130 | bulk_cnt, cnt); | 1179 | bulk_cnt, cnt); |
| 1131 | 1180 | ||
| 1132 | slab_unlock(page); | 1181 | slab_unlock(page); |
| 1133 | /* | 1182 | spin_unlock_irqrestore(&n->list_lock, flags); |
| 1134 | * Keep node_lock to preserve integrity | 1183 | if (!ret) |
| 1135 | * until the object is actually freed | 1184 | slab_fix(s, "Object at 0x%p not freed", object); |
| 1136 | */ | 1185 | return ret; |
| 1137 | return n; | ||
| 1138 | |||
| 1139 | fail: | ||
| 1140 | slab_unlock(page); | ||
| 1141 | spin_unlock_irqrestore(&n->list_lock, *flags); | ||
| 1142 | slab_fix(s, "Object at 0x%p not freed", object); | ||
| 1143 | return NULL; | ||
| 1144 | } | 1186 | } |
| 1145 | 1187 | ||
| 1146 | static int __init setup_slub_debug(char *str) | 1188 | static int __init setup_slub_debug(char *str) |
| @@ -1172,7 +1214,7 @@ static int __init setup_slub_debug(char *str) | |||
| 1172 | for (; *str && *str != ','; str++) { | 1214 | for (; *str && *str != ','; str++) { |
| 1173 | switch (tolower(*str)) { | 1215 | switch (tolower(*str)) { |
| 1174 | case 'f': | 1216 | case 'f': |
| 1175 | slub_debug |= SLAB_DEBUG_FREE; | 1217 | slub_debug |= SLAB_CONSISTENCY_CHECKS; |
| 1176 | break; | 1218 | break; |
| 1177 | case 'z': | 1219 | case 'z': |
| 1178 | slub_debug |= SLAB_RED_ZONE; | 1220 | slub_debug |= SLAB_RED_ZONE; |
| @@ -1231,10 +1273,10 @@ static inline void setup_object_debug(struct kmem_cache *s, | |||
| 1231 | static inline int alloc_debug_processing(struct kmem_cache *s, | 1273 | static inline int alloc_debug_processing(struct kmem_cache *s, |
| 1232 | struct page *page, void *object, unsigned long addr) { return 0; } | 1274 | struct page *page, void *object, unsigned long addr) { return 0; } |
| 1233 | 1275 | ||
| 1234 | static inline struct kmem_cache_node *free_debug_processing( | 1276 | static inline int free_debug_processing( |
| 1235 | struct kmem_cache *s, struct page *page, | 1277 | struct kmem_cache *s, struct page *page, |
| 1236 | void *head, void *tail, int bulk_cnt, | 1278 | void *head, void *tail, int bulk_cnt, |
| 1237 | unsigned long addr, unsigned long *flags) { return NULL; } | 1279 | unsigned long addr) { return 0; } |
| 1238 | 1280 | ||
| 1239 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | 1281 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) |
| 1240 | { return 1; } | 1282 | { return 1; } |
| @@ -1281,36 +1323,6 @@ static inline void kfree_hook(const void *x) | |||
| 1281 | kasan_kfree_large(x); | 1323 | kasan_kfree_large(x); |
| 1282 | } | 1324 | } |
| 1283 | 1325 | ||
| 1284 | static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | ||
| 1285 | gfp_t flags) | ||
| 1286 | { | ||
| 1287 | flags &= gfp_allowed_mask; | ||
| 1288 | lockdep_trace_alloc(flags); | ||
| 1289 | might_sleep_if(gfpflags_allow_blocking(flags)); | ||
| 1290 | |||
| 1291 | if (should_failslab(s->object_size, flags, s->flags)) | ||
| 1292 | return NULL; | ||
| 1293 | |||
| 1294 | return memcg_kmem_get_cache(s, flags); | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | ||
| 1298 | size_t size, void **p) | ||
| 1299 | { | ||
| 1300 | size_t i; | ||
| 1301 | |||
| 1302 | flags &= gfp_allowed_mask; | ||
| 1303 | for (i = 0; i < size; i++) { | ||
| 1304 | void *object = p[i]; | ||
| 1305 | |||
| 1306 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
| 1307 | kmemleak_alloc_recursive(object, s->object_size, 1, | ||
| 1308 | s->flags, flags); | ||
| 1309 | kasan_slab_alloc(s, object); | ||
| 1310 | } | ||
| 1311 | memcg_kmem_put_cache(s); | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 1326 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
| 1315 | { | 1327 | { |
| 1316 | kmemleak_free_recursive(x, s->flags); | 1328 | kmemleak_free_recursive(x, s->flags); |
| @@ -1470,7 +1482,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1470 | set_freepointer(s, p, NULL); | 1482 | set_freepointer(s, p, NULL); |
| 1471 | } | 1483 | } |
| 1472 | 1484 | ||
| 1473 | page->freelist = start; | 1485 | page->freelist = fixup_red_left(s, start); |
| 1474 | page->inuse = page->objects; | 1486 | page->inuse = page->objects; |
| 1475 | page->frozen = 1; | 1487 | page->frozen = 1; |
| 1476 | 1488 | ||
| @@ -1506,7 +1518,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1506 | int order = compound_order(page); | 1518 | int order = compound_order(page); |
| 1507 | int pages = 1 << order; | 1519 | int pages = 1 << order; |
| 1508 | 1520 | ||
| 1509 | if (kmem_cache_debug(s)) { | 1521 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
| 1510 | void *p; | 1522 | void *p; |
| 1511 | 1523 | ||
| 1512 | slab_pad_check(s, page); | 1524 | slab_pad_check(s, page); |
| @@ -2224,8 +2236,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
| 2224 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) | 2236 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) |
| 2225 | return; | 2237 | return; |
| 2226 | 2238 | ||
| 2227 | pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 2239 | pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", |
| 2228 | nid, gfpflags); | 2240 | nid, gfpflags, &gfpflags); |
| 2229 | pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", | 2241 | pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", |
| 2230 | s->name, s->object_size, s->size, oo_order(s->oo), | 2242 | s->name, s->object_size, s->size, oo_order(s->oo), |
| 2231 | oo_order(s->min)); | 2243 | oo_order(s->min)); |
| @@ -2642,8 +2654,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
| 2642 | stat(s, FREE_SLOWPATH); | 2654 | stat(s, FREE_SLOWPATH); |
| 2643 | 2655 | ||
| 2644 | if (kmem_cache_debug(s) && | 2656 | if (kmem_cache_debug(s) && |
| 2645 | !(n = free_debug_processing(s, page, head, tail, cnt, | 2657 | !free_debug_processing(s, page, head, tail, cnt, addr)) |
| 2646 | addr, &flags))) | ||
| 2647 | return; | 2658 | return; |
| 2648 | 2659 | ||
| 2649 | do { | 2660 | do { |
| @@ -2815,6 +2826,7 @@ struct detached_freelist { | |||
| 2815 | void *tail; | 2826 | void *tail; |
| 2816 | void *freelist; | 2827 | void *freelist; |
| 2817 | int cnt; | 2828 | int cnt; |
| 2829 | struct kmem_cache *s; | ||
| 2818 | }; | 2830 | }; |
| 2819 | 2831 | ||
| 2820 | /* | 2832 | /* |
| @@ -2829,26 +2841,45 @@ struct detached_freelist { | |||
| 2829 | * synchronization primitive. Look ahead in the array is limited due | 2841 | * synchronization primitive. Look ahead in the array is limited due |
| 2830 | * to performance reasons. | 2842 | * to performance reasons. |
| 2831 | */ | 2843 | */ |
| 2832 | static int build_detached_freelist(struct kmem_cache *s, size_t size, | 2844 | static inline |
| 2833 | void **p, struct detached_freelist *df) | 2845 | int build_detached_freelist(struct kmem_cache *s, size_t size, |
| 2846 | void **p, struct detached_freelist *df) | ||
| 2834 | { | 2847 | { |
| 2835 | size_t first_skipped_index = 0; | 2848 | size_t first_skipped_index = 0; |
| 2836 | int lookahead = 3; | 2849 | int lookahead = 3; |
| 2837 | void *object; | 2850 | void *object; |
| 2851 | struct page *page; | ||
| 2838 | 2852 | ||
| 2839 | /* Always re-init detached_freelist */ | 2853 | /* Always re-init detached_freelist */ |
| 2840 | df->page = NULL; | 2854 | df->page = NULL; |
| 2841 | 2855 | ||
| 2842 | do { | 2856 | do { |
| 2843 | object = p[--size]; | 2857 | object = p[--size]; |
| 2858 | /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */ | ||
| 2844 | } while (!object && size); | 2859 | } while (!object && size); |
| 2845 | 2860 | ||
| 2846 | if (!object) | 2861 | if (!object) |
| 2847 | return 0; | 2862 | return 0; |
| 2848 | 2863 | ||
| 2864 | page = virt_to_head_page(object); | ||
| 2865 | if (!s) { | ||
| 2866 | /* Handle kalloc'ed objects */ | ||
| 2867 | if (unlikely(!PageSlab(page))) { | ||
| 2868 | BUG_ON(!PageCompound(page)); | ||
| 2869 | kfree_hook(object); | ||
| 2870 | __free_kmem_pages(page, compound_order(page)); | ||
| 2871 | p[size] = NULL; /* mark object processed */ | ||
| 2872 | return size; | ||
| 2873 | } | ||
| 2874 | /* Derive kmem_cache from object */ | ||
| 2875 | df->s = page->slab_cache; | ||
| 2876 | } else { | ||
| 2877 | df->s = cache_from_obj(s, object); /* Support for memcg */ | ||
| 2878 | } | ||
| 2879 | |||
| 2849 | /* Start new detached freelist */ | 2880 | /* Start new detached freelist */ |
| 2850 | set_freepointer(s, object, NULL); | 2881 | df->page = page; |
| 2851 | df->page = virt_to_head_page(object); | 2882 | set_freepointer(df->s, object, NULL); |
| 2852 | df->tail = object; | 2883 | df->tail = object; |
| 2853 | df->freelist = object; | 2884 | df->freelist = object; |
| 2854 | p[size] = NULL; /* mark object processed */ | 2885 | p[size] = NULL; /* mark object processed */ |
| @@ -2862,7 +2893,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, | |||
| 2862 | /* df->page is always set at this point */ | 2893 | /* df->page is always set at this point */ |
| 2863 | if (df->page == virt_to_head_page(object)) { | 2894 | if (df->page == virt_to_head_page(object)) { |
| 2864 | /* Opportunity build freelist */ | 2895 | /* Opportunity build freelist */ |
| 2865 | set_freepointer(s, object, df->freelist); | 2896 | set_freepointer(df->s, object, df->freelist); |
| 2866 | df->freelist = object; | 2897 | df->freelist = object; |
| 2867 | df->cnt++; | 2898 | df->cnt++; |
| 2868 | p[size] = NULL; /* mark object processed */ | 2899 | p[size] = NULL; /* mark object processed */ |
| @@ -2881,25 +2912,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, | |||
| 2881 | return first_skipped_index; | 2912 | return first_skipped_index; |
| 2882 | } | 2913 | } |
| 2883 | 2914 | ||
| 2884 | |||
| 2885 | /* Note that interrupts must be enabled when calling this function. */ | 2915 | /* Note that interrupts must be enabled when calling this function. */ |
| 2886 | void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) | 2916 | void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) |
| 2887 | { | 2917 | { |
| 2888 | if (WARN_ON(!size)) | 2918 | if (WARN_ON(!size)) |
| 2889 | return; | 2919 | return; |
| 2890 | 2920 | ||
| 2891 | do { | 2921 | do { |
| 2892 | struct detached_freelist df; | 2922 | struct detached_freelist df; |
| 2893 | struct kmem_cache *s; | ||
| 2894 | |||
| 2895 | /* Support for memcg */ | ||
| 2896 | s = cache_from_obj(orig_s, p[size - 1]); | ||
| 2897 | 2923 | ||
| 2898 | size = build_detached_freelist(s, size, p, &df); | 2924 | size = build_detached_freelist(s, size, p, &df); |
| 2899 | if (unlikely(!df.page)) | 2925 | if (unlikely(!df.page)) |
| 2900 | continue; | 2926 | continue; |
| 2901 | 2927 | ||
| 2902 | slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); | 2928 | slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); |
| 2903 | } while (likely(size)); | 2929 | } while (likely(size)); |
| 2904 | } | 2930 | } |
| 2905 | EXPORT_SYMBOL(kmem_cache_free_bulk); | 2931 | EXPORT_SYMBOL(kmem_cache_free_bulk); |
| @@ -3285,7 +3311,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
| 3285 | */ | 3311 | */ |
| 3286 | size += 2 * sizeof(struct track); | 3312 | size += 2 * sizeof(struct track); |
| 3287 | 3313 | ||
| 3288 | if (flags & SLAB_RED_ZONE) | 3314 | if (flags & SLAB_RED_ZONE) { |
| 3289 | /* | 3315 | /* |
| 3290 | * Add some empty padding so that we can catch | 3316 | * Add some empty padding so that we can catch |
| 3291 | * overwrites from earlier objects rather than let | 3317 | * overwrites from earlier objects rather than let |
| @@ -3294,6 +3320,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
| 3294 | * of the object. | 3320 | * of the object. |
| 3295 | */ | 3321 | */ |
| 3296 | size += sizeof(void *); | 3322 | size += sizeof(void *); |
| 3323 | |||
| 3324 | s->red_left_pad = sizeof(void *); | ||
| 3325 | s->red_left_pad = ALIGN(s->red_left_pad, s->align); | ||
| 3326 | size += s->red_left_pad; | ||
| 3327 | } | ||
| 3297 | #endif | 3328 | #endif |
| 3298 | 3329 | ||
| 3299 | /* | 3330 | /* |
| @@ -3357,7 +3388,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
| 3357 | 3388 | ||
| 3358 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 3389 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ |
| 3359 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | 3390 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) |
| 3360 | if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) | 3391 | if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0) |
| 3361 | /* Enable fast mode */ | 3392 | /* Enable fast mode */ |
| 3362 | s->flags |= __CMPXCHG_DOUBLE; | 3393 | s->flags |= __CMPXCHG_DOUBLE; |
| 3363 | #endif | 3394 | #endif |
| @@ -4812,16 +4843,16 @@ SLAB_ATTR_RO(total_objects); | |||
| 4812 | 4843 | ||
| 4813 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) | 4844 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) |
| 4814 | { | 4845 | { |
| 4815 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); | 4846 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS)); |
| 4816 | } | 4847 | } |
| 4817 | 4848 | ||
| 4818 | static ssize_t sanity_checks_store(struct kmem_cache *s, | 4849 | static ssize_t sanity_checks_store(struct kmem_cache *s, |
| 4819 | const char *buf, size_t length) | 4850 | const char *buf, size_t length) |
| 4820 | { | 4851 | { |
| 4821 | s->flags &= ~SLAB_DEBUG_FREE; | 4852 | s->flags &= ~SLAB_CONSISTENCY_CHECKS; |
| 4822 | if (buf[0] == '1') { | 4853 | if (buf[0] == '1') { |
| 4823 | s->flags &= ~__CMPXCHG_DOUBLE; | 4854 | s->flags &= ~__CMPXCHG_DOUBLE; |
| 4824 | s->flags |= SLAB_DEBUG_FREE; | 4855 | s->flags |= SLAB_CONSISTENCY_CHECKS; |
| 4825 | } | 4856 | } |
| 4826 | return length; | 4857 | return length; |
| 4827 | } | 4858 | } |
| @@ -4865,7 +4896,6 @@ static ssize_t red_zone_store(struct kmem_cache *s, | |||
| 4865 | 4896 | ||
| 4866 | s->flags &= ~SLAB_RED_ZONE; | 4897 | s->flags &= ~SLAB_RED_ZONE; |
| 4867 | if (buf[0] == '1') { | 4898 | if (buf[0] == '1') { |
| 4868 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
| 4869 | s->flags |= SLAB_RED_ZONE; | 4899 | s->flags |= SLAB_RED_ZONE; |
| 4870 | } | 4900 | } |
| 4871 | calculate_sizes(s, -1); | 4901 | calculate_sizes(s, -1); |
| @@ -4886,7 +4916,6 @@ static ssize_t poison_store(struct kmem_cache *s, | |||
| 4886 | 4916 | ||
| 4887 | s->flags &= ~SLAB_POISON; | 4917 | s->flags &= ~SLAB_POISON; |
| 4888 | if (buf[0] == '1') { | 4918 | if (buf[0] == '1') { |
| 4889 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
| 4890 | s->flags |= SLAB_POISON; | 4919 | s->flags |= SLAB_POISON; |
| 4891 | } | 4920 | } |
| 4892 | calculate_sizes(s, -1); | 4921 | calculate_sizes(s, -1); |
| @@ -5356,7 +5385,7 @@ static char *create_unique_id(struct kmem_cache *s) | |||
| 5356 | *p++ = 'd'; | 5385 | *p++ = 'd'; |
| 5357 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | 5386 | if (s->flags & SLAB_RECLAIM_ACCOUNT) |
| 5358 | *p++ = 'a'; | 5387 | *p++ = 'a'; |
| 5359 | if (s->flags & SLAB_DEBUG_FREE) | 5388 | if (s->flags & SLAB_CONSISTENCY_CHECKS) |
| 5360 | *p++ = 'F'; | 5389 | *p++ = 'F'; |
| 5361 | if (!(s->flags & SLAB_NOTRACK)) | 5390 | if (!(s->flags & SLAB_NOTRACK)) |
| 5362 | *p++ = 't'; | 5391 | *p++ = 't'; |
diff --git a/mm/truncate.c b/mm/truncate.c index e3ee0e27cd17..7598b552ae03 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -519,7 +519,6 @@ EXPORT_SYMBOL(invalidate_mapping_pages); | |||
| 519 | static int | 519 | static int |
| 520 | invalidate_complete_page2(struct address_space *mapping, struct page *page) | 520 | invalidate_complete_page2(struct address_space *mapping, struct page *page) |
| 521 | { | 521 | { |
| 522 | struct mem_cgroup *memcg; | ||
| 523 | unsigned long flags; | 522 | unsigned long flags; |
| 524 | 523 | ||
| 525 | if (page->mapping != mapping) | 524 | if (page->mapping != mapping) |
| @@ -528,15 +527,13 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
| 528 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) | 527 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) |
| 529 | return 0; | 528 | return 0; |
| 530 | 529 | ||
| 531 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 532 | spin_lock_irqsave(&mapping->tree_lock, flags); | 530 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 533 | if (PageDirty(page)) | 531 | if (PageDirty(page)) |
| 534 | goto failed; | 532 | goto failed; |
| 535 | 533 | ||
| 536 | BUG_ON(page_has_private(page)); | 534 | BUG_ON(page_has_private(page)); |
| 537 | __delete_from_page_cache(page, NULL, memcg); | 535 | __delete_from_page_cache(page, NULL); |
| 538 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 536 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 539 | mem_cgroup_end_page_stat(memcg); | ||
| 540 | 537 | ||
| 541 | if (mapping->a_ops->freepage) | 538 | if (mapping->a_ops->freepage) |
| 542 | mapping->a_ops->freepage(page); | 539 | mapping->a_ops->freepage(page); |
| @@ -545,7 +542,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
| 545 | return 1; | 542 | return 1; |
| 546 | failed: | 543 | failed: |
| 547 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 544 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 548 | mem_cgroup_end_page_stat(memcg); | ||
| 549 | return 0; | 545 | return 0; |
| 550 | } | 546 | } |
| 551 | 547 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 71b1c29948db..dd984470248f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -195,25 +195,25 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) | |||
| 195 | { | 195 | { |
| 196 | unsigned long nr; | 196 | unsigned long nr; |
| 197 | 197 | ||
| 198 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | 198 | nr = zone_page_state_snapshot(zone, NR_ACTIVE_FILE) + |
| 199 | zone_page_state(zone, NR_INACTIVE_FILE) + | 199 | zone_page_state_snapshot(zone, NR_INACTIVE_FILE) + |
| 200 | zone_page_state(zone, NR_ISOLATED_FILE); | 200 | zone_page_state_snapshot(zone, NR_ISOLATED_FILE); |
| 201 | 201 | ||
| 202 | if (get_nr_swap_pages() > 0) | 202 | if (get_nr_swap_pages() > 0) |
| 203 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | 203 | nr += zone_page_state_snapshot(zone, NR_ACTIVE_ANON) + |
| 204 | zone_page_state(zone, NR_INACTIVE_ANON) + | 204 | zone_page_state_snapshot(zone, NR_INACTIVE_ANON) + |
| 205 | zone_page_state(zone, NR_ISOLATED_ANON); | 205 | zone_page_state_snapshot(zone, NR_ISOLATED_ANON); |
| 206 | 206 | ||
| 207 | return nr; | 207 | return nr; |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | bool zone_reclaimable(struct zone *zone) | 210 | bool zone_reclaimable(struct zone *zone) |
| 211 | { | 211 | { |
| 212 | return zone_page_state(zone, NR_PAGES_SCANNED) < | 212 | return zone_page_state_snapshot(zone, NR_PAGES_SCANNED) < |
| 213 | zone_reclaimable_pages(zone) * 6; | 213 | zone_reclaimable_pages(zone) * 6; |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | 216 | unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) |
| 217 | { | 217 | { |
| 218 | if (!mem_cgroup_disabled()) | 218 | if (!mem_cgroup_disabled()) |
| 219 | return mem_cgroup_get_lru_size(lruvec, lru); | 219 | return mem_cgroup_get_lru_size(lruvec, lru); |
| @@ -228,14 +228,6 @@ int register_shrinker(struct shrinker *shrinker) | |||
| 228 | { | 228 | { |
| 229 | size_t size = sizeof(*shrinker->nr_deferred); | 229 | size_t size = sizeof(*shrinker->nr_deferred); |
| 230 | 230 | ||
| 231 | /* | ||
| 232 | * If we only have one possible node in the system anyway, save | ||
| 233 | * ourselves the trouble and disable NUMA aware behavior. This way we | ||
| 234 | * will save memory and some small loop time later. | ||
| 235 | */ | ||
| 236 | if (nr_node_ids == 1) | ||
| 237 | shrinker->flags &= ~SHRINKER_NUMA_AWARE; | ||
| 238 | |||
| 239 | if (shrinker->flags & SHRINKER_NUMA_AWARE) | 231 | if (shrinker->flags & SHRINKER_NUMA_AWARE) |
| 240 | size *= nr_node_ids; | 232 | size *= nr_node_ids; |
| 241 | 233 | ||
| @@ -611,12 +603,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
| 611 | bool reclaimed) | 603 | bool reclaimed) |
| 612 | { | 604 | { |
| 613 | unsigned long flags; | 605 | unsigned long flags; |
| 614 | struct mem_cgroup *memcg; | ||
| 615 | 606 | ||
| 616 | BUG_ON(!PageLocked(page)); | 607 | BUG_ON(!PageLocked(page)); |
| 617 | BUG_ON(mapping != page_mapping(page)); | 608 | BUG_ON(mapping != page_mapping(page)); |
| 618 | 609 | ||
| 619 | memcg = mem_cgroup_begin_page_stat(page); | ||
| 620 | spin_lock_irqsave(&mapping->tree_lock, flags); | 610 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 621 | /* | 611 | /* |
| 622 | * The non racy check for a busy page. | 612 | * The non racy check for a busy page. |
| @@ -656,7 +646,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
| 656 | mem_cgroup_swapout(page, swap); | 646 | mem_cgroup_swapout(page, swap); |
| 657 | __delete_from_swap_cache(page); | 647 | __delete_from_swap_cache(page); |
| 658 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 648 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 659 | mem_cgroup_end_page_stat(memcg); | ||
| 660 | swapcache_free(swap); | 649 | swapcache_free(swap); |
| 661 | } else { | 650 | } else { |
| 662 | void (*freepage)(struct page *); | 651 | void (*freepage)(struct page *); |
| @@ -682,9 +671,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
| 682 | if (reclaimed && page_is_file_cache(page) && | 671 | if (reclaimed && page_is_file_cache(page) && |
| 683 | !mapping_exiting(mapping) && !dax_mapping(mapping)) | 672 | !mapping_exiting(mapping) && !dax_mapping(mapping)) |
| 684 | shadow = workingset_eviction(mapping, page); | 673 | shadow = workingset_eviction(mapping, page); |
| 685 | __delete_from_page_cache(page, shadow, memcg); | 674 | __delete_from_page_cache(page, shadow); |
| 686 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 675 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 687 | mem_cgroup_end_page_stat(memcg); | ||
| 688 | 676 | ||
| 689 | if (freepage != NULL) | 677 | if (freepage != NULL) |
| 690 | freepage(page); | 678 | freepage(page); |
| @@ -694,7 +682,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
| 694 | 682 | ||
| 695 | cannot_free: | 683 | cannot_free: |
| 696 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 684 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 697 | mem_cgroup_end_page_stat(memcg); | ||
| 698 | return 0; | 685 | return 0; |
| 699 | } | 686 | } |
| 700 | 687 | ||
| @@ -1931,8 +1918,8 @@ static bool inactive_file_is_low(struct lruvec *lruvec) | |||
| 1931 | unsigned long inactive; | 1918 | unsigned long inactive; |
| 1932 | unsigned long active; | 1919 | unsigned long active; |
| 1933 | 1920 | ||
| 1934 | inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE); | 1921 | inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); |
| 1935 | active = get_lru_size(lruvec, LRU_ACTIVE_FILE); | 1922 | active = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); |
| 1936 | 1923 | ||
| 1937 | return active > inactive; | 1924 | return active > inactive; |
| 1938 | } | 1925 | } |
| @@ -2071,7 +2058,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, | |||
| 2071 | * system is under heavy pressure. | 2058 | * system is under heavy pressure. |
| 2072 | */ | 2059 | */ |
| 2073 | if (!inactive_file_is_low(lruvec) && | 2060 | if (!inactive_file_is_low(lruvec) && |
| 2074 | get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { | 2061 | lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { |
| 2075 | scan_balance = SCAN_FILE; | 2062 | scan_balance = SCAN_FILE; |
| 2076 | goto out; | 2063 | goto out; |
| 2077 | } | 2064 | } |
| @@ -2097,10 +2084,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, | |||
| 2097 | * anon in [0], file in [1] | 2084 | * anon in [0], file in [1] |
| 2098 | */ | 2085 | */ |
| 2099 | 2086 | ||
| 2100 | anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + | 2087 | anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + |
| 2101 | get_lru_size(lruvec, LRU_INACTIVE_ANON); | 2088 | lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); |
| 2102 | file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + | 2089 | file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + |
| 2103 | get_lru_size(lruvec, LRU_INACTIVE_FILE); | 2090 | lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); |
| 2104 | 2091 | ||
| 2105 | spin_lock_irq(&zone->lru_lock); | 2092 | spin_lock_irq(&zone->lru_lock); |
| 2106 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 2093 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
| @@ -2138,7 +2125,7 @@ out: | |||
| 2138 | unsigned long size; | 2125 | unsigned long size; |
| 2139 | unsigned long scan; | 2126 | unsigned long scan; |
| 2140 | 2127 | ||
| 2141 | size = get_lru_size(lruvec, lru); | 2128 | size = lruvec_lru_size(lruvec, lru); |
| 2142 | scan = size >> sc->priority; | 2129 | scan = size >> sc->priority; |
| 2143 | 2130 | ||
| 2144 | if (!scan && pass && force_scan) | 2131 | if (!scan && pass && force_scan) |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 084c6725b373..69ce64f7b8d7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -924,19 +924,6 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, | |||
| 924 | #endif | 924 | #endif |
| 925 | 925 | ||
| 926 | #ifdef CONFIG_PROC_FS | 926 | #ifdef CONFIG_PROC_FS |
| 927 | static char * const migratetype_names[MIGRATE_TYPES] = { | ||
| 928 | "Unmovable", | ||
| 929 | "Movable", | ||
| 930 | "Reclaimable", | ||
| 931 | "HighAtomic", | ||
| 932 | #ifdef CONFIG_CMA | ||
| 933 | "CMA", | ||
| 934 | #endif | ||
| 935 | #ifdef CONFIG_MEMORY_ISOLATION | ||
| 936 | "Isolate", | ||
| 937 | #endif | ||
| 938 | }; | ||
| 939 | |||
| 940 | static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, | 927 | static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, |
| 941 | struct zone *zone) | 928 | struct zone *zone) |
| 942 | { | 929 | { |
| @@ -1133,7 +1120,7 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) | |||
| 1133 | #ifdef CONFIG_PAGE_OWNER | 1120 | #ifdef CONFIG_PAGE_OWNER |
| 1134 | int mtype; | 1121 | int mtype; |
| 1135 | 1122 | ||
| 1136 | if (!page_owner_inited) | 1123 | if (!static_branch_unlikely(&page_owner_inited)) |
| 1137 | return; | 1124 | return; |
| 1138 | 1125 | ||
| 1139 | drain_all_pages(NULL); | 1126 | drain_all_pages(NULL); |
diff --git a/mm/workingset.c b/mm/workingset.c index 61ead9e5549d..6130ba0b2641 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
| @@ -152,8 +152,25 @@ | |||
| 152 | * refault distance will immediately activate the refaulting page. | 152 | * refault distance will immediately activate the refaulting page. |
| 153 | */ | 153 | */ |
| 154 | 154 | ||
| 155 | static void *pack_shadow(unsigned long eviction, struct zone *zone) | 155 | #define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ |
| 156 | ZONES_SHIFT + NODES_SHIFT + \ | ||
| 157 | MEM_CGROUP_ID_SHIFT) | ||
| 158 | #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Eviction timestamps need to be able to cover the full range of | ||
| 162 | * actionable refaults. However, bits are tight in the radix tree | ||
| 163 | * entry, and after storing the identifier for the lruvec there might | ||
| 164 | * not be enough left to represent every single actionable refault. In | ||
| 165 | * that case, we have to sacrifice granularity for distance, and group | ||
| 166 | * evictions into coarser buckets by shaving off lower timestamp bits. | ||
| 167 | */ | ||
| 168 | static unsigned int bucket_order __read_mostly; | ||
| 169 | |||
| 170 | static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction) | ||
| 156 | { | 171 | { |
| 172 | eviction >>= bucket_order; | ||
| 173 | eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; | ||
| 157 | eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); | 174 | eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); |
| 158 | eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); | 175 | eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); |
| 159 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); | 176 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); |
| @@ -161,45 +178,23 @@ static void *pack_shadow(unsigned long eviction, struct zone *zone) | |||
| 161 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); | 178 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); |
| 162 | } | 179 | } |
| 163 | 180 | ||
| 164 | static void unpack_shadow(void *shadow, | 181 | static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep, |
| 165 | struct zone **zone, | 182 | unsigned long *evictionp) |
| 166 | unsigned long *distance) | ||
| 167 | { | 183 | { |
| 168 | unsigned long entry = (unsigned long)shadow; | 184 | unsigned long entry = (unsigned long)shadow; |
| 169 | unsigned long eviction; | 185 | int memcgid, nid, zid; |
| 170 | unsigned long refault; | ||
| 171 | unsigned long mask; | ||
| 172 | int zid, nid; | ||
| 173 | 186 | ||
| 174 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; | 187 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 175 | zid = entry & ((1UL << ZONES_SHIFT) - 1); | 188 | zid = entry & ((1UL << ZONES_SHIFT) - 1); |
| 176 | entry >>= ZONES_SHIFT; | 189 | entry >>= ZONES_SHIFT; |
| 177 | nid = entry & ((1UL << NODES_SHIFT) - 1); | 190 | nid = entry & ((1UL << NODES_SHIFT) - 1); |
| 178 | entry >>= NODES_SHIFT; | 191 | entry >>= NODES_SHIFT; |
| 179 | eviction = entry; | 192 | memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); |
| 193 | entry >>= MEM_CGROUP_ID_SHIFT; | ||
| 180 | 194 | ||
| 181 | *zone = NODE_DATA(nid)->node_zones + zid; | 195 | *memcgidp = memcgid; |
| 182 | 196 | *zonep = NODE_DATA(nid)->node_zones + zid; | |
| 183 | refault = atomic_long_read(&(*zone)->inactive_age); | 197 | *evictionp = entry << bucket_order; |
| 184 | mask = ~0UL >> (NODES_SHIFT + ZONES_SHIFT + | ||
| 185 | RADIX_TREE_EXCEPTIONAL_SHIFT); | ||
| 186 | /* | ||
| 187 | * The unsigned subtraction here gives an accurate distance | ||
| 188 | * across inactive_age overflows in most cases. | ||
| 189 | * | ||
| 190 | * There is a special case: usually, shadow entries have a | ||
| 191 | * short lifetime and are either refaulted or reclaimed along | ||
| 192 | * with the inode before they get too old. But it is not | ||
| 193 | * impossible for the inactive_age to lap a shadow entry in | ||
| 194 | * the field, which can then can result in a false small | ||
| 195 | * refault distance, leading to a false activation should this | ||
| 196 | * old entry actually refault again. However, earlier kernels | ||
| 197 | * used to deactivate unconditionally with *every* reclaim | ||
| 198 | * invocation for the longest time, so the occasional | ||
| 199 | * inappropriate activation leading to pressure on the active | ||
| 200 | * list is not a problem. | ||
| 201 | */ | ||
| 202 | *distance = (refault - eviction) & mask; | ||
| 203 | } | 198 | } |
| 204 | 199 | ||
| 205 | /** | 200 | /** |
| @@ -212,11 +207,20 @@ static void unpack_shadow(void *shadow, | |||
| 212 | */ | 207 | */ |
| 213 | void *workingset_eviction(struct address_space *mapping, struct page *page) | 208 | void *workingset_eviction(struct address_space *mapping, struct page *page) |
| 214 | { | 209 | { |
| 210 | struct mem_cgroup *memcg = page_memcg(page); | ||
| 215 | struct zone *zone = page_zone(page); | 211 | struct zone *zone = page_zone(page); |
| 212 | int memcgid = mem_cgroup_id(memcg); | ||
| 216 | unsigned long eviction; | 213 | unsigned long eviction; |
| 214 | struct lruvec *lruvec; | ||
| 217 | 215 | ||
| 218 | eviction = atomic_long_inc_return(&zone->inactive_age); | 216 | /* Page is fully exclusive and pins page->mem_cgroup */ |
| 219 | return pack_shadow(eviction, zone); | 217 | VM_BUG_ON_PAGE(PageLRU(page), page); |
| 218 | VM_BUG_ON_PAGE(page_count(page), page); | ||
| 219 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
| 220 | |||
| 221 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | ||
| 222 | eviction = atomic_long_inc_return(&lruvec->inactive_age); | ||
| 223 | return pack_shadow(memcgid, zone, eviction); | ||
| 220 | } | 224 | } |
| 221 | 225 | ||
| 222 | /** | 226 | /** |
| @@ -231,12 +235,64 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) | |||
| 231 | bool workingset_refault(void *shadow) | 235 | bool workingset_refault(void *shadow) |
| 232 | { | 236 | { |
| 233 | unsigned long refault_distance; | 237 | unsigned long refault_distance; |
| 238 | unsigned long active_file; | ||
| 239 | struct mem_cgroup *memcg; | ||
| 240 | unsigned long eviction; | ||
| 241 | struct lruvec *lruvec; | ||
| 242 | unsigned long refault; | ||
| 234 | struct zone *zone; | 243 | struct zone *zone; |
| 244 | int memcgid; | ||
| 245 | |||
| 246 | unpack_shadow(shadow, &memcgid, &zone, &eviction); | ||
| 247 | |||
| 248 | rcu_read_lock(); | ||
| 249 | /* | ||
| 250 | * Look up the memcg associated with the stored ID. It might | ||
| 251 | * have been deleted since the page's eviction. | ||
| 252 | * | ||
| 253 | * Note that in rare events the ID could have been recycled | ||
| 254 | * for a new cgroup that refaults a shared page. This is | ||
| 255 | * impossible to tell from the available data. However, this | ||
| 256 | * should be a rare and limited disturbance, and activations | ||
| 257 | * are always speculative anyway. Ultimately, it's the aging | ||
| 258 | * algorithm's job to shake out the minimum access frequency | ||
| 259 | * for the active cache. | ||
| 260 | * | ||
| 261 | * XXX: On !CONFIG_MEMCG, this will always return NULL; it | ||
| 262 | * would be better if the root_mem_cgroup existed in all | ||
| 263 | * configurations instead. | ||
| 264 | */ | ||
| 265 | memcg = mem_cgroup_from_id(memcgid); | ||
| 266 | if (!mem_cgroup_disabled() && !memcg) { | ||
| 267 | rcu_read_unlock(); | ||
| 268 | return false; | ||
| 269 | } | ||
| 270 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | ||
| 271 | refault = atomic_long_read(&lruvec->inactive_age); | ||
| 272 | active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); | ||
| 273 | rcu_read_unlock(); | ||
| 274 | |||
| 275 | /* | ||
| 276 | * The unsigned subtraction here gives an accurate distance | ||
| 277 | * across inactive_age overflows in most cases. | ||
| 278 | * | ||
| 279 | * There is a special case: usually, shadow entries have a | ||
| 280 | * short lifetime and are either refaulted or reclaimed along | ||
| 281 | * with the inode before they get too old. But it is not | ||
| 282 | * impossible for the inactive_age to lap a shadow entry in | ||
| 283 | * the field, which can then can result in a false small | ||
| 284 | * refault distance, leading to a false activation should this | ||
| 285 | * old entry actually refault again. However, earlier kernels | ||
| 286 | * used to deactivate unconditionally with *every* reclaim | ||
| 287 | * invocation for the longest time, so the occasional | ||
| 288 | * inappropriate activation leading to pressure on the active | ||
| 289 | * list is not a problem. | ||
| 290 | */ | ||
| 291 | refault_distance = (refault - eviction) & EVICTION_MASK; | ||
| 235 | 292 | ||
| 236 | unpack_shadow(shadow, &zone, &refault_distance); | ||
| 237 | inc_zone_state(zone, WORKINGSET_REFAULT); | 293 | inc_zone_state(zone, WORKINGSET_REFAULT); |
| 238 | 294 | ||
| 239 | if (refault_distance <= zone_page_state(zone, NR_ACTIVE_FILE)) { | 295 | if (refault_distance <= active_file) { |
| 240 | inc_zone_state(zone, WORKINGSET_ACTIVATE); | 296 | inc_zone_state(zone, WORKINGSET_ACTIVATE); |
| 241 | return true; | 297 | return true; |
| 242 | } | 298 | } |
| @@ -249,7 +305,22 @@ bool workingset_refault(void *shadow) | |||
| 249 | */ | 305 | */ |
| 250 | void workingset_activation(struct page *page) | 306 | void workingset_activation(struct page *page) |
| 251 | { | 307 | { |
| 252 | atomic_long_inc(&page_zone(page)->inactive_age); | 308 | struct lruvec *lruvec; |
| 309 | |||
| 310 | lock_page_memcg(page); | ||
| 311 | /* | ||
| 312 | * Filter non-memcg pages here, e.g. unmap can call | ||
| 313 | * mark_page_accessed() on VDSO pages. | ||
| 314 | * | ||
| 315 | * XXX: See workingset_refault() - this should return | ||
| 316 | * root_mem_cgroup even for !CONFIG_MEMCG. | ||
| 317 | */ | ||
| 318 | if (!mem_cgroup_disabled() && !page_memcg(page)) | ||
| 319 | goto out; | ||
| 320 | lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page)); | ||
| 321 | atomic_long_inc(&lruvec->inactive_age); | ||
| 322 | out: | ||
| 323 | unlock_page_memcg(page); | ||
| 253 | } | 324 | } |
| 254 | 325 | ||
| 255 | /* | 326 | /* |
| @@ -398,8 +469,25 @@ static struct lock_class_key shadow_nodes_key; | |||
| 398 | 469 | ||
| 399 | static int __init workingset_init(void) | 470 | static int __init workingset_init(void) |
| 400 | { | 471 | { |
| 472 | unsigned int timestamp_bits; | ||
| 473 | unsigned int max_order; | ||
| 401 | int ret; | 474 | int ret; |
| 402 | 475 | ||
| 476 | BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); | ||
| 477 | /* | ||
| 478 | * Calculate the eviction bucket size to cover the longest | ||
| 479 | * actionable refault distance, which is currently half of | ||
| 480 | * memory (totalram_pages/2). However, memory hotplug may add | ||
| 481 | * some more pages at runtime, so keep working with up to | ||
| 482 | * double the initial memory by using totalram_pages as-is. | ||
| 483 | */ | ||
| 484 | timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; | ||
| 485 | max_order = fls_long(totalram_pages - 1); | ||
| 486 | if (max_order > timestamp_bits) | ||
| 487 | bucket_order = max_order - timestamp_bits; | ||
| 488 | printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | ||
| 489 | timestamp_bits, max_order, bucket_order); | ||
| 490 | |||
| 403 | ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | 491 | ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); |
| 404 | if (ret) | 492 | if (ret) |
| 405 | goto err; | 493 | goto err; |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 643a86c49020..2d5589b61e9f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -50,8 +50,7 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; | |||
| 50 | #define TSBITS 6 | 50 | #define TSBITS 6 |
| 51 | #define TSMASK (((__u32)1 << TSBITS) - 1) | 51 | #define TSMASK (((__u32)1 << TSBITS) - 1) |
| 52 | 52 | ||
| 53 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], | 53 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch); |
| 54 | ipv4_cookie_scratch); | ||
| 55 | 54 | ||
| 56 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | 55 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, |
| 57 | u32 count, int c) | 56 | u32 count, int c) |
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef20795e..aae3e5ca63ea 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c | |||
| @@ -41,8 +41,7 @@ static __u16 const msstab[] = { | |||
| 41 | 9000 - 60, | 41 | 9000 - 60, |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], | 44 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); |
| 45 | ipv6_cookie_scratch); | ||
| 46 | 45 | ||
| 47 | static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, | 46 | static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, |
| 48 | __be16 sport, __be16 dport, u32 count, int c) | 47 | __be16 sport, __be16 dport, u32 count, int c) |
diff --git a/net/rds/page.c b/net/rds/page.c index 5a14e6d6a926..616f21f4e7d7 100644 --- a/net/rds/page.c +++ b/net/rds/page.c | |||
| @@ -42,8 +42,8 @@ struct rds_page_remainder { | |||
| 42 | unsigned long r_offset; | 42 | unsigned long r_offset; |
| 43 | }; | 43 | }; |
| 44 | 44 | ||
| 45 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, | 45 | static |
| 46 | rds_page_remainders); | 46 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); |
| 47 | 47 | ||
| 48 | /* | 48 | /* |
| 49 | * returns 0 on success or -errno on failure. | 49 | * returns 0 on success or -errno on failure. |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 874132b26d23..d574d13ba963 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
| @@ -3240,6 +3240,30 @@ sub process { | |||
| 3240 | #ignore lines not being added | 3240 | #ignore lines not being added |
| 3241 | next if ($line =~ /^[^\+]/); | 3241 | next if ($line =~ /^[^\+]/); |
| 3242 | 3242 | ||
| 3243 | # check for declarations of signed or unsigned without int | ||
| 3244 | while ($line =~ m{($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) { | ||
| 3245 | my $type = $1; | ||
| 3246 | my $var = $2; | ||
| 3247 | $var = "" if (!defined $var); | ||
| 3248 | if ($type =~ /^(?:(?:$Storage|$Inline|$Attribute)\s+)*((?:un)?signed)((?:\s*\*)*)\s*$/) { | ||
| 3249 | my $sign = $1; | ||
| 3250 | my $pointer = $2; | ||
| 3251 | |||
| 3252 | $pointer = "" if (!defined $pointer); | ||
| 3253 | |||
| 3254 | if (WARN("UNSPECIFIED_INT", | ||
| 3255 | "Prefer '" . trim($sign) . " int" . rtrim($pointer) . "' to bare use of '$sign" . rtrim($pointer) . "'\n" . $herecurr) && | ||
| 3256 | $fix) { | ||
| 3257 | my $decl = trim($sign) . " int "; | ||
| 3258 | my $comp_pointer = $pointer; | ||
| 3259 | $comp_pointer =~ s/\s//g; | ||
| 3260 | $decl .= $comp_pointer; | ||
| 3261 | $decl = rtrim($decl) if ($var eq ""); | ||
| 3262 | $fixed[$fixlinenr] =~ s@\b$sign\s*\Q$pointer\E\s*$var\b@$decl$var@; | ||
| 3263 | } | ||
| 3264 | } | ||
| 3265 | } | ||
| 3266 | |||
| 3243 | # TEST: allow direct testing of the type matcher. | 3267 | # TEST: allow direct testing of the type matcher. |
| 3244 | if ($dbg_type) { | 3268 | if ($dbg_type) { |
| 3245 | if ($line =~ /^.\s*$Declare\s*$/) { | 3269 | if ($line =~ /^.\s*$Declare\s*$/) { |
| @@ -4109,7 +4133,7 @@ sub process { | |||
| 4109 | ## } | 4133 | ## } |
| 4110 | 4134 | ||
| 4111 | #need space before brace following if, while, etc | 4135 | #need space before brace following if, while, etc |
| 4112 | if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\){/) || | 4136 | if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) || |
| 4113 | $line =~ /do\{/) { | 4137 | $line =~ /do\{/) { |
| 4114 | if (ERROR("SPACING", | 4138 | if (ERROR("SPACING", |
| 4115 | "space required before the open brace '{'\n" . $herecurr) && | 4139 | "space required before the open brace '{'\n" . $herecurr) && |
| @@ -4561,6 +4585,9 @@ sub process { | |||
| 4561 | { | 4585 | { |
| 4562 | } | 4586 | } |
| 4563 | 4587 | ||
| 4588 | # Make asm volatile uses seem like a generic function | ||
| 4589 | $dstat =~ s/\b_*asm_*\s+_*volatile_*\b/asm_volatile/g; | ||
| 4590 | |||
| 4564 | my $exceptions = qr{ | 4591 | my $exceptions = qr{ |
| 4565 | $Declare| | 4592 | $Declare| |
| 4566 | module_param_named| | 4593 | module_param_named| |
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 8fa81e84e295..638b143ee60f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <stdlib.h> | 22 | #include <stdlib.h> |
| 23 | #include <string.h> | 23 | #include <string.h> |
| 24 | #include <ctype.h> | 24 | #include <ctype.h> |
| 25 | #include <limits.h> | ||
| 25 | 26 | ||
| 26 | #ifndef ARRAY_SIZE | 27 | #ifndef ARRAY_SIZE |
| 27 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) | 28 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) |
| @@ -34,6 +35,7 @@ struct sym_entry { | |||
| 34 | unsigned int len; | 35 | unsigned int len; |
| 35 | unsigned int start_pos; | 36 | unsigned int start_pos; |
| 36 | unsigned char *sym; | 37 | unsigned char *sym; |
| 38 | unsigned int percpu_absolute; | ||
| 37 | }; | 39 | }; |
| 38 | 40 | ||
| 39 | struct addr_range { | 41 | struct addr_range { |
| @@ -42,6 +44,7 @@ struct addr_range { | |||
| 42 | }; | 44 | }; |
| 43 | 45 | ||
| 44 | static unsigned long long _text; | 46 | static unsigned long long _text; |
| 47 | static unsigned long long relative_base; | ||
| 45 | static struct addr_range text_ranges[] = { | 48 | static struct addr_range text_ranges[] = { |
| 46 | { "_stext", "_etext" }, | 49 | { "_stext", "_etext" }, |
| 47 | { "_sinittext", "_einittext" }, | 50 | { "_sinittext", "_einittext" }, |
| @@ -61,6 +64,7 @@ static int all_symbols = 0; | |||
| 61 | static int absolute_percpu = 0; | 64 | static int absolute_percpu = 0; |
| 62 | static char symbol_prefix_char = '\0'; | 65 | static char symbol_prefix_char = '\0'; |
| 63 | static unsigned long long kernel_start_addr = 0; | 66 | static unsigned long long kernel_start_addr = 0; |
| 67 | static int base_relative = 0; | ||
| 64 | 68 | ||
| 65 | int token_profit[0x10000]; | 69 | int token_profit[0x10000]; |
| 66 | 70 | ||
| @@ -74,7 +78,7 @@ static void usage(void) | |||
| 74 | fprintf(stderr, "Usage: kallsyms [--all-symbols] " | 78 | fprintf(stderr, "Usage: kallsyms [--all-symbols] " |
| 75 | "[--symbol-prefix=<prefix char>] " | 79 | "[--symbol-prefix=<prefix char>] " |
| 76 | "[--page-offset=<CONFIG_PAGE_OFFSET>] " | 80 | "[--page-offset=<CONFIG_PAGE_OFFSET>] " |
| 77 | "< in.map > out.S\n"); | 81 | "[--base-relative] < in.map > out.S\n"); |
| 78 | exit(1); | 82 | exit(1); |
| 79 | } | 83 | } |
| 80 | 84 | ||
| @@ -171,6 +175,8 @@ static int read_symbol(FILE *in, struct sym_entry *s) | |||
| 171 | strcpy((char *)s->sym + 1, str); | 175 | strcpy((char *)s->sym + 1, str); |
| 172 | s->sym[0] = stype; | 176 | s->sym[0] = stype; |
| 173 | 177 | ||
| 178 | s->percpu_absolute = 0; | ||
| 179 | |||
| 174 | /* Record if we've found __per_cpu_start/end. */ | 180 | /* Record if we've found __per_cpu_start/end. */ |
| 175 | check_symbol_range(sym, s->addr, &percpu_range, 1); | 181 | check_symbol_range(sym, s->addr, &percpu_range, 1); |
| 176 | 182 | ||
| @@ -202,6 +208,8 @@ static int symbol_valid(struct sym_entry *s) | |||
| 202 | */ | 208 | */ |
| 203 | static char *special_symbols[] = { | 209 | static char *special_symbols[] = { |
| 204 | "kallsyms_addresses", | 210 | "kallsyms_addresses", |
| 211 | "kallsyms_offsets", | ||
| 212 | "kallsyms_relative_base", | ||
| 205 | "kallsyms_num_syms", | 213 | "kallsyms_num_syms", |
| 206 | "kallsyms_names", | 214 | "kallsyms_names", |
| 207 | "kallsyms_markers", | 215 | "kallsyms_markers", |
| @@ -325,7 +333,7 @@ static int expand_symbol(unsigned char *data, int len, char *result) | |||
| 325 | 333 | ||
| 326 | static int symbol_absolute(struct sym_entry *s) | 334 | static int symbol_absolute(struct sym_entry *s) |
| 327 | { | 335 | { |
| 328 | return toupper(s->sym[0]) == 'A'; | 336 | return s->percpu_absolute; |
| 329 | } | 337 | } |
| 330 | 338 | ||
| 331 | static void write_src(void) | 339 | static void write_src(void) |
| @@ -346,16 +354,48 @@ static void write_src(void) | |||
| 346 | 354 | ||
| 347 | printf("\t.section .rodata, \"a\"\n"); | 355 | printf("\t.section .rodata, \"a\"\n"); |
| 348 | 356 | ||
| 349 | /* Provide proper symbols relocatability by their '_text' | 357 | /* Provide proper symbols relocatability by their relativeness |
| 350 | * relativeness. The symbol names cannot be used to construct | 358 | * to a fixed anchor point in the runtime image, either '_text' |
| 351 | * normal symbol references as the list of symbols contains | 359 | * for absolute address tables, in which case the linker will |
| 352 | * symbols that are declared static and are private to their | 360 | * emit the final addresses at build time. Otherwise, use the |
| 353 | * .o files. This prevents .tmp_kallsyms.o or any other | 361 | * offset relative to the lowest value encountered of all relative |
| 354 | * object from referencing them. | 362 | * symbols, and emit non-relocatable fixed offsets that will be fixed |
| 363 | * up at runtime. | ||
| 364 | * | ||
| 365 | * The symbol names cannot be used to construct normal symbol | ||
| 366 | * references as the list of symbols contains symbols that are | ||
| 367 | * declared static and are private to their .o files. This prevents | ||
| 368 | * .tmp_kallsyms.o or any other object from referencing them. | ||
| 355 | */ | 369 | */ |
| 356 | output_label("kallsyms_addresses"); | 370 | if (!base_relative) |
| 371 | output_label("kallsyms_addresses"); | ||
| 372 | else | ||
| 373 | output_label("kallsyms_offsets"); | ||
| 374 | |||
| 357 | for (i = 0; i < table_cnt; i++) { | 375 | for (i = 0; i < table_cnt; i++) { |
| 358 | if (!symbol_absolute(&table[i])) { | 376 | if (base_relative) { |
| 377 | long long offset; | ||
| 378 | int overflow; | ||
| 379 | |||
| 380 | if (!absolute_percpu) { | ||
| 381 | offset = table[i].addr - relative_base; | ||
| 382 | overflow = (offset < 0 || offset > UINT_MAX); | ||
| 383 | } else if (symbol_absolute(&table[i])) { | ||
| 384 | offset = table[i].addr; | ||
| 385 | overflow = (offset < 0 || offset > INT_MAX); | ||
| 386 | } else { | ||
| 387 | offset = relative_base - table[i].addr - 1; | ||
| 388 | overflow = (offset < INT_MIN || offset >= 0); | ||
| 389 | } | ||
| 390 | if (overflow) { | ||
| 391 | fprintf(stderr, "kallsyms failure: " | ||
| 392 | "%s symbol value %#llx out of range in relative mode\n", | ||
| 393 | symbol_absolute(&table[i]) ? "absolute" : "relative", | ||
| 394 | table[i].addr); | ||
| 395 | exit(EXIT_FAILURE); | ||
| 396 | } | ||
| 397 | printf("\t.long\t%#x\n", (int)offset); | ||
| 398 | } else if (!symbol_absolute(&table[i])) { | ||
| 359 | if (_text <= table[i].addr) | 399 | if (_text <= table[i].addr) |
| 360 | printf("\tPTR\t_text + %#llx\n", | 400 | printf("\tPTR\t_text + %#llx\n", |
| 361 | table[i].addr - _text); | 401 | table[i].addr - _text); |
| @@ -368,6 +408,12 @@ static void write_src(void) | |||
| 368 | } | 408 | } |
| 369 | printf("\n"); | 409 | printf("\n"); |
| 370 | 410 | ||
| 411 | if (base_relative) { | ||
| 412 | output_label("kallsyms_relative_base"); | ||
| 413 | printf("\tPTR\t_text - %#llx\n", _text - relative_base); | ||
| 414 | printf("\n"); | ||
| 415 | } | ||
| 416 | |||
| 371 | output_label("kallsyms_num_syms"); | 417 | output_label("kallsyms_num_syms"); |
| 372 | printf("\tPTR\t%d\n", table_cnt); | 418 | printf("\tPTR\t%d\n", table_cnt); |
| 373 | printf("\n"); | 419 | printf("\n"); |
| @@ -681,8 +727,27 @@ static void make_percpus_absolute(void) | |||
| 681 | unsigned int i; | 727 | unsigned int i; |
| 682 | 728 | ||
| 683 | for (i = 0; i < table_cnt; i++) | 729 | for (i = 0; i < table_cnt; i++) |
| 684 | if (symbol_in_range(&table[i], &percpu_range, 1)) | 730 | if (symbol_in_range(&table[i], &percpu_range, 1)) { |
| 731 | /* | ||
| 732 | * Keep the 'A' override for percpu symbols to | ||
| 733 | * ensure consistent behavior compared to older | ||
| 734 | * versions of this tool. | ||
| 735 | */ | ||
| 685 | table[i].sym[0] = 'A'; | 736 | table[i].sym[0] = 'A'; |
| 737 | table[i].percpu_absolute = 1; | ||
| 738 | } | ||
| 739 | } | ||
| 740 | |||
| 741 | /* find the minimum non-absolute symbol address */ | ||
| 742 | static void record_relative_base(void) | ||
| 743 | { | ||
| 744 | unsigned int i; | ||
| 745 | |||
| 746 | relative_base = -1ULL; | ||
| 747 | for (i = 0; i < table_cnt; i++) | ||
| 748 | if (!symbol_absolute(&table[i]) && | ||
| 749 | table[i].addr < relative_base) | ||
| 750 | relative_base = table[i].addr; | ||
| 686 | } | 751 | } |
| 687 | 752 | ||
| 688 | int main(int argc, char **argv) | 753 | int main(int argc, char **argv) |
| @@ -703,7 +768,9 @@ int main(int argc, char **argv) | |||
| 703 | } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { | 768 | } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { |
| 704 | const char *p = &argv[i][14]; | 769 | const char *p = &argv[i][14]; |
| 705 | kernel_start_addr = strtoull(p, NULL, 16); | 770 | kernel_start_addr = strtoull(p, NULL, 16); |
| 706 | } else | 771 | } else if (strcmp(argv[i], "--base-relative") == 0) |
| 772 | base_relative = 1; | ||
| 773 | else | ||
| 707 | usage(); | 774 | usage(); |
| 708 | } | 775 | } |
| 709 | } else if (argc != 1) | 776 | } else if (argc != 1) |
| @@ -712,6 +779,8 @@ int main(int argc, char **argv) | |||
| 712 | read_map(stdin); | 779 | read_map(stdin); |
| 713 | if (absolute_percpu) | 780 | if (absolute_percpu) |
| 714 | make_percpus_absolute(); | 781 | make_percpus_absolute(); |
| 782 | if (base_relative) | ||
| 783 | record_relative_base(); | ||
| 715 | sort_symbols(); | 784 | sort_symbols(); |
| 716 | optimize_token_table(); | 785 | optimize_token_table(); |
| 717 | write_src(); | 786 | write_src(); |
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index ba6c34ea5429..453ede9d2f3d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh | |||
| @@ -86,10 +86,14 @@ kallsyms() | |||
| 86 | kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" | 86 | kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" |
| 87 | fi | 87 | fi |
| 88 | 88 | ||
| 89 | if [ -n "${CONFIG_X86_64}" ]; then | 89 | if [ -n "${CONFIG_KALLSYMS_ABSOLUTE_PERCPU}" ]; then |
| 90 | kallsymopt="${kallsymopt} --absolute-percpu" | 90 | kallsymopt="${kallsymopt} --absolute-percpu" |
| 91 | fi | 91 | fi |
| 92 | 92 | ||
| 93 | if [ -n "${CONFIG_KALLSYMS_BASE_RELATIVE}" ]; then | ||
| 94 | kallsymopt="${kallsymopt} --base-relative" | ||
| 95 | fi | ||
| 96 | |||
| 93 | local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ | 97 | local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ |
| 94 | ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" | 98 | ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" |
| 95 | 99 | ||
diff --git a/scripts/namespace.pl b/scripts/namespace.pl index a71be6b7cdec..9f3c9d47a4a5 100755 --- a/scripts/namespace.pl +++ b/scripts/namespace.pl | |||
| @@ -117,6 +117,8 @@ my %nameexception = ( | |||
| 117 | 'kallsyms_names' => 1, | 117 | 'kallsyms_names' => 1, |
| 118 | 'kallsyms_num_syms' => 1, | 118 | 'kallsyms_num_syms' => 1, |
| 119 | 'kallsyms_addresses'=> 1, | 119 | 'kallsyms_addresses'=> 1, |
| 120 | 'kallsyms_offsets' => 1, | ||
| 121 | 'kallsyms_relative_base'=> 1, | ||
| 120 | '__this_module' => 1, | 122 | '__this_module' => 1, |
| 121 | '_etext' => 1, | 123 | '_etext' => 1, |
| 122 | '_edata' => 1, | 124 | '_edata' => 1, |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4d3340cce9a0..c9cb3be47cff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
| @@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b) | |||
| 602 | return fa->flags - fb->flags; | 602 | return fa->flags - fb->flags; |
| 603 | } | 603 | } |
| 604 | 604 | ||
| 605 | /* see include/trace/events/gfpflags.h */ | 605 | /* see include/trace/events/mmflags.h */ |
| 606 | static const struct { | 606 | static const struct { |
| 607 | const char *original; | 607 | const char *original; |
| 608 | const char *compact; | 608 | const char *compact; |
| @@ -612,30 +612,39 @@ static const struct { | |||
| 612 | { "GFP_HIGHUSER", "HU" }, | 612 | { "GFP_HIGHUSER", "HU" }, |
| 613 | { "GFP_USER", "U" }, | 613 | { "GFP_USER", "U" }, |
| 614 | { "GFP_TEMPORARY", "TMP" }, | 614 | { "GFP_TEMPORARY", "TMP" }, |
| 615 | { "GFP_KERNEL_ACCOUNT", "KAC" }, | ||
| 615 | { "GFP_KERNEL", "K" }, | 616 | { "GFP_KERNEL", "K" }, |
| 616 | { "GFP_NOFS", "NF" }, | 617 | { "GFP_NOFS", "NF" }, |
| 617 | { "GFP_ATOMIC", "A" }, | 618 | { "GFP_ATOMIC", "A" }, |
| 618 | { "GFP_NOIO", "NI" }, | 619 | { "GFP_NOIO", "NI" }, |
| 619 | { "GFP_HIGH", "H" }, | ||
| 620 | { "GFP_WAIT", "W" }, | ||
| 621 | { "GFP_IO", "I" }, | ||
| 622 | { "GFP_COLD", "CO" }, | ||
| 623 | { "GFP_NOWARN", "NWR" }, | ||
| 624 | { "GFP_REPEAT", "R" }, | ||
| 625 | { "GFP_NOFAIL", "NF" }, | ||
| 626 | { "GFP_NORETRY", "NR" }, | ||
| 627 | { "GFP_COMP", "C" }, | ||
| 628 | { "GFP_ZERO", "Z" }, | ||
| 629 | { "GFP_NOMEMALLOC", "NMA" }, | ||
| 630 | { "GFP_MEMALLOC", "MA" }, | ||
| 631 | { "GFP_HARDWALL", "HW" }, | ||
| 632 | { "GFP_THISNODE", "TN" }, | ||
| 633 | { "GFP_RECLAIMABLE", "RC" }, | ||
| 634 | { "GFP_MOVABLE", "M" }, | ||
| 635 | { "GFP_NOTRACK", "NT" }, | ||
| 636 | { "GFP_NO_KSWAPD", "NK" }, | ||
| 637 | { "GFP_OTHER_NODE", "ON" }, | ||
| 638 | { "GFP_NOWAIT", "NW" }, | 620 | { "GFP_NOWAIT", "NW" }, |
| 621 | { "GFP_DMA", "D" }, | ||
| 622 | { "__GFP_HIGHMEM", "HM" }, | ||
| 623 | { "GFP_DMA32", "D32" }, | ||
| 624 | { "__GFP_HIGH", "H" }, | ||
| 625 | { "__GFP_ATOMIC", "_A" }, | ||
| 626 | { "__GFP_IO", "I" }, | ||
| 627 | { "__GFP_FS", "F" }, | ||
| 628 | { "__GFP_COLD", "CO" }, | ||
| 629 | { "__GFP_NOWARN", "NWR" }, | ||
| 630 | { "__GFP_REPEAT", "R" }, | ||
| 631 | { "__GFP_NOFAIL", "NF" }, | ||
| 632 | { "__GFP_NORETRY", "NR" }, | ||
| 633 | { "__GFP_COMP", "C" }, | ||
| 634 | { "__GFP_ZERO", "Z" }, | ||
| 635 | { "__GFP_NOMEMALLOC", "NMA" }, | ||
| 636 | { "__GFP_MEMALLOC", "MA" }, | ||
| 637 | { "__GFP_HARDWALL", "HW" }, | ||
| 638 | { "__GFP_THISNODE", "TN" }, | ||
| 639 | { "__GFP_RECLAIMABLE", "RC" }, | ||
| 640 | { "__GFP_MOVABLE", "M" }, | ||
| 641 | { "__GFP_ACCOUNT", "AC" }, | ||
| 642 | { "__GFP_NOTRACK", "NT" }, | ||
| 643 | { "__GFP_WRITE", "WR" }, | ||
| 644 | { "__GFP_RECLAIM", "R" }, | ||
| 645 | { "__GFP_DIRECT_RECLAIM", "DR" }, | ||
| 646 | { "__GFP_KSWAPD_RECLAIM", "KR" }, | ||
| 647 | { "__GFP_OTHER_NODE", "ON" }, | ||
| 639 | }; | 648 | }; |
| 640 | 649 | ||
| 641 | static size_t max_gfp_len; | 650 | static size_t max_gfp_len; |
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 86e698d07e20..1889163f2f05 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c | |||
| @@ -135,7 +135,7 @@ static void usage(void) | |||
| 135 | "\nValid debug options (FZPUT may be combined)\n" | 135 | "\nValid debug options (FZPUT may be combined)\n" |
| 136 | "a / A Switch on all debug options (=FZUP)\n" | 136 | "a / A Switch on all debug options (=FZUP)\n" |
| 137 | "- Switch off all debug options\n" | 137 | "- Switch off all debug options\n" |
| 138 | "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" | 138 | "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" |
| 139 | "z / Z Redzoning\n" | 139 | "z / Z Redzoning\n" |
| 140 | "p / P Poisoning\n" | 140 | "p / P Poisoning\n" |
| 141 | "u / U Tracking\n" | 141 | "u / U Tracking\n" |
