diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 14:51:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 14:51:08 -0400 |
commit | 271ecc5253e2b317d729d366560789cd7f93836c (patch) | |
tree | d3a60bc4dfa8245ff934f357f2367db76b59e7cf | |
parent | aa6865d836418eb2ba888a4cb1318a28e9aa2e0c (diff) | |
parent | 63c06227a22b098a3849c5c99e836aea161ca0d7 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge first patch-bomb from Andrew Morton:
- some misc things
- ofs2 updates
- about half of MM
- checkpatch updates
- autofs4 update
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits)
autofs4: fix string.h include in auto_dev-ioctl.h
autofs4: use pr_xxx() macros directly for logging
autofs4: change log print macros to not insert newline
autofs4: make autofs log prints consistent
autofs4: fix some white space errors
autofs4: fix invalid ioctl return in autofs4_root_ioctl_unlocked()
autofs4: fix coding style line length in autofs4_wait()
autofs4: fix coding style problem in autofs4_get_set_timeout()
autofs4: coding style fixes
autofs: show pipe inode in mount options
kallsyms: add support for relative offsets in kallsyms address table
kallsyms: don't overload absolute symbol type for percpu symbols
x86: kallsyms: disable absolute percpu symbols on !SMP
checkpatch: fix another left brace warning
checkpatch: improve UNSPECIFIED_INT test for bare signed/unsigned uses
checkpatch: warn on bare unsigned or signed declarations without int
checkpatch: exclude asm volatile from complex macro check
mm: memcontrol: drop unnecessary lru locking from mem_cgroup_migrate()
mm: migrate: consolidate mem_cgroup_migrate() calls
mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous
...
119 files changed, 3139 insertions, 1805 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4d9ca7d92a20..5b47acb86111 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1759,7 +1759,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1759 | 1759 | ||
1760 | keepinitrd [HW,ARM] | 1760 | keepinitrd [HW,ARM] |
1761 | 1761 | ||
1762 | kernelcore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter | 1762 | kernelcore= [KNL,X86,IA-64,PPC] |
1763 | Format: nn[KMGTPE] | "mirror" | ||
1764 | This parameter | ||
1763 | specifies the amount of memory usable by the kernel | 1765 | specifies the amount of memory usable by the kernel |
1764 | for non-movable allocations. The requested amount is | 1766 | for non-movable allocations. The requested amount is |
1765 | spread evenly throughout all nodes in the system. The | 1767 | spread evenly throughout all nodes in the system. The |
@@ -1775,6 +1777,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1775 | use the HighMem zone if it exists, and the Normal | 1777 | use the HighMem zone if it exists, and the Normal |
1776 | zone if it does not. | 1778 | zone if it does not. |
1777 | 1779 | ||
1780 | Instead of specifying the amount of memory (nn[KMGTPE]), | ||
1781 | you can specify "mirror" option. In case "mirror" | ||
1782 | option is specified, mirrored (reliable) memory is used | ||
1783 | for non-movable allocations and remaining memory is used | ||
1784 | for Movable pages. nn[KMGTPE] and "mirror" are exclusive, | ||
1785 | so you can NOT specify nn[KMGTPE] and "mirror" at the same | ||
1786 | time. | ||
1787 | |||
1778 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. | 1788 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. |
1779 | Format: <Controller#>[,poll interval] | 1789 | Format: <Controller#>[,poll interval] |
1780 | The controller # is the number of the ehci usb debug | 1790 | The controller # is the number of the ehci usb debug |
@@ -2732,6 +2742,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2732 | we can turn it on. | 2742 | we can turn it on. |
2733 | on: enable the feature | 2743 | on: enable the feature |
2734 | 2744 | ||
2745 | page_poison= [KNL] Boot-time parameter changing the state of | ||
2746 | poisoning on the buddy allocator. | ||
2747 | off: turn off poisoning | ||
2748 | on: turn on poisoning | ||
2749 | |||
2735 | panic= [KNL] Kernel behaviour on panic: delay <timeout> | 2750 | panic= [KNL] Kernel behaviour on panic: delay <timeout> |
2736 | timeout > 0: seconds before rebooting | 2751 | timeout > 0: seconds before rebooting |
2737 | timeout = 0: wait forever | 2752 | timeout = 0: wait forever |
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index ce2cfcf35c27..443f4b44ad97 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
@@ -256,10 +256,27 @@ If the memory block is offline, you'll read "offline". | |||
256 | 256 | ||
257 | 5.2. How to online memory | 257 | 5.2. How to online memory |
258 | ------------ | 258 | ------------ |
259 | Even if the memory is hot-added, it is not at ready-to-use state. | 259 | When the memory is hot-added, the kernel decides whether or not to "online" |
260 | For using newly added memory, you have to "online" the memory block. | 260 | it according to the policy which can be read from "auto_online_blocks" file: |
261 | 261 | ||
262 | For onlining, you have to write "online" to the memory block's state file as: | 262 | % cat /sys/devices/system/memory/auto_online_blocks |
263 | |||
264 | The default is "offline" which means the newly added memory is not in a | ||
265 | ready-to-use state and you have to "online" the newly added memory blocks | ||
266 | manually. Automatic onlining can be requested by writing "online" to | ||
267 | "auto_online_blocks" file: | ||
268 | |||
269 | % echo online > /sys/devices/system/memory/auto_online_blocks | ||
270 | |||
271 | This sets a global policy and impacts all memory blocks that will subsequently | ||
272 | be hotplugged. Currently offline blocks keep their state. It is possible, under | ||
273 | certain circumstances, that some memory blocks will be added but will fail to | ||
274 | online. User space tools can check their "state" files | ||
275 | (/sys/devices/system/memory/memoryXXX/state) and try to online them manually. | ||
276 | |||
277 | If the automatic onlining wasn't requested, failed, or some memory block was | ||
278 | offlined it is possible to change the individual block's state by writing to the | ||
279 | "state" file: | ||
263 | 280 | ||
264 | % echo online > /sys/devices/system/memory/memoryXXX/state | 281 | % echo online > /sys/devices/system/memory/memoryXXX/state |
265 | 282 | ||
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 5d1128bf0282..5962949944fd 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt | |||
@@ -298,6 +298,24 @@ bitmap and its derivatives such as cpumask and nodemask: | |||
298 | 298 | ||
299 | Passed by reference. | 299 | Passed by reference. |
300 | 300 | ||
301 | Flags bitfields such as page flags, gfp_flags: | ||
302 | |||
303 | %pGp referenced|uptodate|lru|active|private | ||
304 | %pGg GFP_USER|GFP_DMA32|GFP_NOWARN | ||
305 | %pGv read|exec|mayread|maywrite|mayexec|denywrite | ||
306 | |||
307 | For printing flags bitfields as a collection of symbolic constants that | ||
308 | would construct the value. The type of flags is given by the third | ||
309 | character. Currently supported are [p]age flags, [v]ma_flags (both | ||
310 | expect unsigned long *) and [g]fp_flags (expects gfp_t *). The flag | ||
311 | names and print order depends on the particular type. | ||
312 | |||
313 | Note that this format should not be used directly in TP_printk() part | ||
314 | of a tracepoint. Instead, use the show_*_flags() functions from | ||
315 | <trace/events/mmflags.h>. | ||
316 | |||
317 | Passed by reference. | ||
318 | |||
301 | Network device features: | 319 | Network device features: |
302 | 320 | ||
303 | %pNF 0x000000000000c000 | 321 | %pNF 0x000000000000c000 |
diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.txt index 8f3ce9b3aa11..ffff1439076a 100644 --- a/Documentation/vm/page_owner.txt +++ b/Documentation/vm/page_owner.txt | |||
@@ -28,10 +28,11 @@ with page owner and page owner is disabled in runtime due to no enabling | |||
28 | boot option, runtime overhead is marginal. If disabled in runtime, it | 28 | boot option, runtime overhead is marginal. If disabled in runtime, it |
29 | doesn't require memory to store owner information, so there is no runtime | 29 | doesn't require memory to store owner information, so there is no runtime |
30 | memory overhead. And, page owner inserts just two unlikely branches into | 30 | memory overhead. And, page owner inserts just two unlikely branches into |
31 | the page allocator hotpath and if it returns false then allocation is | 31 | the page allocator hotpath and if not enabled, then allocation is done |
32 | done like as the kernel without page owner. These two unlikely branches | 32 | like as the kernel without page owner. These two unlikely branches should |
33 | would not affect to allocation performance. Following is the kernel's | 33 | not affect to allocation performance, especially if the static keys jump |
34 | code size change due to this facility. | 34 | label patching functionality is available. Following is the kernel's code |
35 | size change due to this facility. | ||
35 | 36 | ||
36 | - Without page owner | 37 | - Without page owner |
37 | text data bss dec hex filename | 38 | text data bss dec hex filename |
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index f0d340959319..84652419bff2 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt | |||
@@ -35,8 +35,8 @@ slub_debug=<Debug-Options>,<slab name> | |||
35 | Enable options only for select slabs | 35 | Enable options only for select slabs |
36 | 36 | ||
37 | Possible debug options are | 37 | Possible debug options are |
38 | F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry | 38 | F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS |
39 | SLAB legacy issues) | 39 | Sorry SLAB legacy issues) |
40 | Z Red zoning | 40 | Z Red zoning |
41 | P Poisoning (object and padding) | 41 | P Poisoning (object and padding) |
42 | U User tracking (free and alloc) | 42 | U User tracking (free and alloc) |
diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h index b88a1558b0b9..c1ee3d6533fb 100644 --- a/arch/blackfin/include/asm/pgtable.h +++ b/arch/blackfin/include/asm/pgtable.h | |||
@@ -97,6 +97,8 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, | |||
97 | unsigned long); | 97 | unsigned long); |
98 | #define HAVE_ARCH_FB_UNMAPPED_AREA | 98 | #define HAVE_ARCH_FB_UNMAPPED_AREA |
99 | 99 | ||
100 | #define pgprot_writecombine pgprot_noncached | ||
101 | |||
100 | #include <asm-generic/pgtable.h> | 102 | #include <asm-generic/pgtable.h> |
101 | 103 | ||
102 | #endif /* _BLACKFIN_PGTABLE_H */ | 104 | #endif /* _BLACKFIN_PGTABLE_H */ |
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index 0d4146f644dc..11fa717d93b1 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c | |||
@@ -59,21 +59,24 @@ void free_initrd_mem(unsigned long, unsigned long); | |||
59 | void __init zone_sizes_init(void) | 59 | void __init zone_sizes_init(void) |
60 | { | 60 | { |
61 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; | 61 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; |
62 | unsigned long max_dma; | ||
63 | unsigned long low; | ||
64 | unsigned long start_pfn; | 62 | unsigned long start_pfn; |
65 | 63 | ||
66 | #ifdef CONFIG_MMU | 64 | #ifdef CONFIG_MMU |
67 | start_pfn = START_PFN(0); | 65 | { |
68 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 66 | unsigned long low; |
69 | low = MAX_LOW_PFN(0); | 67 | unsigned long max_dma; |
70 | 68 | ||
71 | if (low < max_dma){ | 69 | start_pfn = START_PFN(0); |
72 | zones_size[ZONE_DMA] = low - start_pfn; | 70 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
73 | zones_size[ZONE_NORMAL] = 0; | 71 | low = MAX_LOW_PFN(0); |
74 | } else { | 72 | |
75 | zones_size[ZONE_DMA] = low - start_pfn; | 73 | if (low < max_dma) { |
76 | zones_size[ZONE_NORMAL] = low - max_dma; | 74 | zones_size[ZONE_DMA] = low - start_pfn; |
75 | zones_size[ZONE_NORMAL] = 0; | ||
76 | } else { | ||
77 | zones_size[ZONE_DMA] = low - start_pfn; | ||
78 | zones_size[ZONE_NORMAL] = low - max_dma; | ||
79 | } | ||
77 | } | 80 | } |
78 | #else | 81 | #else |
79 | zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; | 82 | zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT; |
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2150b0139a0b..1b6081c0aff9 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/export.h> | 11 | #include <linux/export.h> |
12 | #include <linux/kdebug.h> | 12 | #include <linux/kdebug.h> |
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <linux/mm.h> | ||
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
@@ -189,9 +190,8 @@ void die(struct pt_regs *regs, const char *str) | |||
189 | #ifdef CONFIG_SMP | 190 | #ifdef CONFIG_SMP |
190 | printk("SMP "); | 191 | printk("SMP "); |
191 | #endif | 192 | #endif |
192 | #ifdef CONFIG_DEBUG_PAGEALLOC | 193 | if (debug_pagealloc_enabled()) |
193 | printk("DEBUG_PAGEALLOC"); | 194 | printk("DEBUG_PAGEALLOC"); |
194 | #endif | ||
195 | printk("\n"); | 195 | printk("\n"); |
196 | notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); | 196 | notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); |
197 | print_modules(); | 197 | print_modules(); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ef7d6c8fea66..d27fccbad7c1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
94 | pgd_populate(&init_mm, pg_dir, pu_dir); | 94 | pgd_populate(&init_mm, pg_dir, pu_dir); |
95 | } | 95 | } |
96 | pu_dir = pud_offset(pg_dir, address); | 96 | pu_dir = pud_offset(pg_dir, address); |
97 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
98 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && | 97 | if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && |
99 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { | 98 | !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && |
99 | !debug_pagealloc_enabled()) { | ||
100 | pud_val(*pu_dir) = __pa(address) | | 100 | pud_val(*pu_dir) = __pa(address) | |
101 | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | | 101 | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | |
102 | (ro ? _REGION_ENTRY_PROTECT : 0); | 102 | (ro ? _REGION_ENTRY_PROTECT : 0); |
103 | address += PUD_SIZE; | 103 | address += PUD_SIZE; |
104 | continue; | 104 | continue; |
105 | } | 105 | } |
106 | #endif | ||
107 | if (pud_none(*pu_dir)) { | 106 | if (pud_none(*pu_dir)) { |
108 | pm_dir = vmem_pmd_alloc(); | 107 | pm_dir = vmem_pmd_alloc(); |
109 | if (!pm_dir) | 108 | if (!pm_dir) |
@@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
111 | pud_populate(&init_mm, pu_dir, pm_dir); | 110 | pud_populate(&init_mm, pu_dir, pm_dir); |
112 | } | 111 | } |
113 | pm_dir = pmd_offset(pu_dir, address); | 112 | pm_dir = pmd_offset(pu_dir, address); |
114 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
115 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && | 113 | if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && |
116 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { | 114 | !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && |
115 | !debug_pagealloc_enabled()) { | ||
117 | pmd_val(*pm_dir) = __pa(address) | | 116 | pmd_val(*pm_dir) = __pa(address) | |
118 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | | 117 | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | |
119 | _SEGMENT_ENTRY_YOUNG | | 118 | _SEGMENT_ENTRY_YOUNG | |
@@ -121,7 +120,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
121 | address += PMD_SIZE; | 120 | address += PMD_SIZE; |
122 | continue; | 121 | continue; |
123 | } | 122 | } |
124 | #endif | ||
125 | if (pmd_none(*pm_dir)) { | 123 | if (pmd_none(*pm_dir)) { |
126 | pt_dir = vmem_pte_alloc(address); | 124 | pt_dir = vmem_pte_alloc(address); |
127 | if (!pt_dir) | 125 | if (!pt_dir) |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9c30acfadae2..32e5699eadfe 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -265,9 +265,8 @@ int __die(const char *str, struct pt_regs *regs, long err) | |||
265 | #ifdef CONFIG_SMP | 265 | #ifdef CONFIG_SMP |
266 | printk("SMP "); | 266 | printk("SMP "); |
267 | #endif | 267 | #endif |
268 | #ifdef CONFIG_DEBUG_PAGEALLOC | 268 | if (debug_pagealloc_enabled()) |
269 | printk("DEBUG_PAGEALLOC "); | 269 | printk("DEBUG_PAGEALLOC "); |
270 | #endif | ||
271 | #ifdef CONFIG_KASAN | 270 | #ifdef CONFIG_KASAN |
272 | printk("KASAN"); | 271 | printk("KASAN"); |
273 | #endif | 272 | #endif |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 493f54172b4a..9d56f271d519 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -150,13 +150,14 @@ static int page_size_mask; | |||
150 | 150 | ||
151 | static void __init probe_page_size_mask(void) | 151 | static void __init probe_page_size_mask(void) |
152 | { | 152 | { |
153 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | 153 | #if !defined(CONFIG_KMEMCHECK) |
154 | /* | 154 | /* |
155 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 155 | * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will |
156 | * use small pages. | ||
156 | * This will simplify cpa(), which otherwise needs to support splitting | 157 | * This will simplify cpa(), which otherwise needs to support splitting |
157 | * large pages into small in interrupt context, etc. | 158 | * large pages into small in interrupt context, etc. |
158 | */ | 159 | */ |
159 | if (cpu_has_pse) | 160 | if (cpu_has_pse && !debug_pagealloc_enabled()) |
160 | page_size_mask |= 1 << PG_LEVEL_2M; | 161 | page_size_mask |= 1 << PG_LEVEL_2M; |
161 | #endif | 162 | #endif |
162 | 163 | ||
@@ -666,21 +667,22 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
666 | * mark them not present - any buggy init-section access will | 667 | * mark them not present - any buggy init-section access will |
667 | * create a kernel page fault: | 668 | * create a kernel page fault: |
668 | */ | 669 | */ |
669 | #ifdef CONFIG_DEBUG_PAGEALLOC | 670 | if (debug_pagealloc_enabled()) { |
670 | printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", | 671 | pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", |
671 | begin, end - 1); | 672 | begin, end - 1); |
672 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 673 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
673 | #else | 674 | } else { |
674 | /* | 675 | /* |
675 | * We just marked the kernel text read only above, now that | 676 | * We just marked the kernel text read only above, now that |
676 | * we are going to free part of that, we need to make that | 677 | * we are going to free part of that, we need to make that |
677 | * writeable and non-executable first. | 678 | * writeable and non-executable first. |
678 | */ | 679 | */ |
679 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | 680 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); |
680 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 681 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
681 | 682 | ||
682 | free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); | 683 | free_reserved_area((void *)begin, (void *)end, |
683 | #endif | 684 | POISON_FREE_INITMEM, what); |
685 | } | ||
684 | } | 686 | } |
685 | 687 | ||
686 | void free_initmem(void) | 688 | void free_initmem(void) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 007ebe2d8157..4d0b26253042 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -106,12 +106,6 @@ static inline unsigned long highmap_end_pfn(void) | |||
106 | 106 | ||
107 | #endif | 107 | #endif |
108 | 108 | ||
109 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
110 | # define debug_pagealloc 1 | ||
111 | #else | ||
112 | # define debug_pagealloc 0 | ||
113 | #endif | ||
114 | |||
115 | static inline int | 109 | static inline int |
116 | within(unsigned long addr, unsigned long start, unsigned long end) | 110 | within(unsigned long addr, unsigned long start, unsigned long end) |
117 | { | 111 | { |
@@ -714,10 +708,10 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, | |||
714 | { | 708 | { |
715 | struct page *base; | 709 | struct page *base; |
716 | 710 | ||
717 | if (!debug_pagealloc) | 711 | if (!debug_pagealloc_enabled()) |
718 | spin_unlock(&cpa_lock); | 712 | spin_unlock(&cpa_lock); |
719 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); | 713 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
720 | if (!debug_pagealloc) | 714 | if (!debug_pagealloc_enabled()) |
721 | spin_lock(&cpa_lock); | 715 | spin_lock(&cpa_lock); |
722 | if (!base) | 716 | if (!base) |
723 | return -ENOMEM; | 717 | return -ENOMEM; |
@@ -1339,10 +1333,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
1339 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) | 1333 | if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY)) |
1340 | cpa->numpages = 1; | 1334 | cpa->numpages = 1; |
1341 | 1335 | ||
1342 | if (!debug_pagealloc) | 1336 | if (!debug_pagealloc_enabled()) |
1343 | spin_lock(&cpa_lock); | 1337 | spin_lock(&cpa_lock); |
1344 | ret = __change_page_attr(cpa, checkalias); | 1338 | ret = __change_page_attr(cpa, checkalias); |
1345 | if (!debug_pagealloc) | 1339 | if (!debug_pagealloc_enabled()) |
1346 | spin_unlock(&cpa_lock); | 1340 | spin_unlock(&cpa_lock); |
1347 | if (ret) | 1341 | if (ret) |
1348 | return ret; | 1342 | return ret; |
diff --git a/block/partition-generic.c b/block/partition-generic.c index fefd01b496a0..5d8701941054 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -217,10 +217,21 @@ static void part_release(struct device *dev) | |||
217 | kfree(p); | 217 | kfree(p); |
218 | } | 218 | } |
219 | 219 | ||
220 | static int part_uevent(struct device *dev, struct kobj_uevent_env *env) | ||
221 | { | ||
222 | struct hd_struct *part = dev_to_part(dev); | ||
223 | |||
224 | add_uevent_var(env, "PARTN=%u", part->partno); | ||
225 | if (part->info && part->info->volname[0]) | ||
226 | add_uevent_var(env, "PARTNAME=%s", part->info->volname); | ||
227 | return 0; | ||
228 | } | ||
229 | |||
220 | struct device_type part_type = { | 230 | struct device_type part_type = { |
221 | .name = "partition", | 231 | .name = "partition", |
222 | .groups = part_attr_groups, | 232 | .groups = part_attr_groups, |
223 | .release = part_release, | 233 | .release = part_release, |
234 | .uevent = part_uevent, | ||
224 | }; | 235 | }; |
225 | 236 | ||
226 | static void delete_partition_rcu_cb(struct rcu_head *head) | 237 | static void delete_partition_rcu_cb(struct rcu_head *head) |
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 175c86bee3a9..9ca2b2fefd76 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c | |||
@@ -61,8 +61,8 @@ module_param(latency_factor, uint, 0644); | |||
61 | 61 | ||
62 | static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); | 62 | static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); |
63 | 63 | ||
64 | static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], | 64 | static |
65 | acpi_cstate); | 65 | DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); |
66 | 66 | ||
67 | static int disabled_by_idle_boot_param(void) | 67 | static int disabled_by_idle_boot_param(void) |
68 | { | 68 | { |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 213456c2b123..f46dba8b7092 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -251,7 +251,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t | |||
251 | return ret; | 251 | return ret; |
252 | } | 252 | } |
253 | 253 | ||
254 | static int memory_block_change_state(struct memory_block *mem, | 254 | int memory_block_change_state(struct memory_block *mem, |
255 | unsigned long to_state, unsigned long from_state_req) | 255 | unsigned long to_state, unsigned long from_state_req) |
256 | { | 256 | { |
257 | int ret = 0; | 257 | int ret = 0; |
@@ -439,6 +439,37 @@ print_block_size(struct device *dev, struct device_attribute *attr, | |||
439 | static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); | 439 | static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); |
440 | 440 | ||
441 | /* | 441 | /* |
442 | * Memory auto online policy. | ||
443 | */ | ||
444 | |||
445 | static ssize_t | ||
446 | show_auto_online_blocks(struct device *dev, struct device_attribute *attr, | ||
447 | char *buf) | ||
448 | { | ||
449 | if (memhp_auto_online) | ||
450 | return sprintf(buf, "online\n"); | ||
451 | else | ||
452 | return sprintf(buf, "offline\n"); | ||
453 | } | ||
454 | |||
455 | static ssize_t | ||
456 | store_auto_online_blocks(struct device *dev, struct device_attribute *attr, | ||
457 | const char *buf, size_t count) | ||
458 | { | ||
459 | if (sysfs_streq(buf, "online")) | ||
460 | memhp_auto_online = true; | ||
461 | else if (sysfs_streq(buf, "offline")) | ||
462 | memhp_auto_online = false; | ||
463 | else | ||
464 | return -EINVAL; | ||
465 | |||
466 | return count; | ||
467 | } | ||
468 | |||
469 | static DEVICE_ATTR(auto_online_blocks, 0644, show_auto_online_blocks, | ||
470 | store_auto_online_blocks); | ||
471 | |||
472 | /* | ||
442 | * Some architectures will have custom drivers to do this, and | 473 | * Some architectures will have custom drivers to do this, and |
443 | * will not need to do it from userspace. The fake hot-add code | 474 | * will not need to do it from userspace. The fake hot-add code |
444 | * as well as ppc64 will do all of their discovery in userspace | 475 | * as well as ppc64 will do all of their discovery in userspace |
@@ -746,6 +777,7 @@ static struct attribute *memory_root_attrs[] = { | |||
746 | #endif | 777 | #endif |
747 | 778 | ||
748 | &dev_attr_block_size_bytes.attr, | 779 | &dev_attr_block_size_bytes.attr, |
780 | &dev_attr_auto_online_blocks.attr, | ||
749 | NULL | 781 | NULL |
750 | }; | 782 | }; |
751 | 783 | ||
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 562b5a4ca7b7..78a39f736c64 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
@@ -126,7 +126,7 @@ | |||
126 | */ | 126 | */ |
127 | #include <linux/types.h> | 127 | #include <linux/types.h> |
128 | 128 | ||
129 | static bool verbose = 0; | 129 | static int verbose = 0; |
130 | static int major = PD_MAJOR; | 130 | static int major = PD_MAJOR; |
131 | static char *name = PD_NAME; | 131 | static char *name = PD_NAME; |
132 | static int cluster = 64; | 132 | static int cluster = 64; |
@@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; | |||
161 | static DEFINE_MUTEX(pd_mutex); | 161 | static DEFINE_MUTEX(pd_mutex); |
162 | static DEFINE_SPINLOCK(pd_lock); | 162 | static DEFINE_SPINLOCK(pd_lock); |
163 | 163 | ||
164 | module_param(verbose, bool, 0); | 164 | module_param(verbose, int, 0); |
165 | module_param(major, int, 0); | 165 | module_param(major, int, 0); |
166 | module_param(name, charp, 0); | 166 | module_param(name, charp, 0); |
167 | module_param(cluster, int, 0); | 167 | module_param(cluster, int, 0); |
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 1740d75e8a32..216a94fed5b4 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c | |||
@@ -117,7 +117,7 @@ | |||
117 | 117 | ||
118 | */ | 118 | */ |
119 | 119 | ||
120 | static bool verbose = 0; | 120 | static int verbose = 0; |
121 | static int major = PT_MAJOR; | 121 | static int major = PT_MAJOR; |
122 | static char *name = PT_NAME; | 122 | static char *name = PT_NAME; |
123 | static int disable = 0; | 123 | static int disable = 0; |
@@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; | |||
152 | 152 | ||
153 | #include <asm/uaccess.h> | 153 | #include <asm/uaccess.h> |
154 | 154 | ||
155 | module_param(verbose, bool, 0); | 155 | module_param(verbose, int, 0); |
156 | module_param(major, int, 0); | 156 | module_param(major, int, 0); |
157 | module_param(name, charp, 0); | 157 | module_param(name, charp, 0); |
158 | module_param_array(drive0, int, NULL, 0); | 158 | module_param_array(drive0, int, NULL, 0); |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 73708acce3ca..979a8317204f 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -37,23 +37,30 @@ config XEN_BALLOON_MEMORY_HOTPLUG | |||
37 | 37 | ||
38 | Memory could be hotplugged in following steps: | 38 | Memory could be hotplugged in following steps: |
39 | 39 | ||
40 | 1) dom0: xl mem-max <domU> <maxmem> | 40 | 1) target domain: ensure that memory auto online policy is in |
41 | effect by checking /sys/devices/system/memory/auto_online_blocks | ||
42 | file (should be 'online'). | ||
43 | |||
44 | 2) control domain: xl mem-max <target-domain> <maxmem> | ||
41 | where <maxmem> is >= requested memory size, | 45 | where <maxmem> is >= requested memory size, |
42 | 46 | ||
43 | 2) dom0: xl mem-set <domU> <memory> | 47 | 3) control domain: xl mem-set <target-domain> <memory> |
44 | where <memory> is requested memory size; alternatively memory | 48 | where <memory> is requested memory size; alternatively memory |
45 | could be added by writing proper value to | 49 | could be added by writing proper value to |
46 | /sys/devices/system/xen_memory/xen_memory0/target or | 50 | /sys/devices/system/xen_memory/xen_memory0/target or |
47 | /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU, | 51 | /sys/devices/system/xen_memory/xen_memory0/target_kb on the |
52 | target domain. | ||
48 | 53 | ||
49 | 3) domU: for i in /sys/devices/system/memory/memory*/state; do \ | 54 | Alternatively, if memory auto onlining was not requested at step 1 |
50 | [ "`cat "$i"`" = offline ] && echo online > "$i"; done | 55 | the newly added memory can be manually onlined in the target domain |
56 | by doing the following: | ||
51 | 57 | ||
52 | Memory could be onlined automatically on domU by adding following line to udev rules: | 58 | for i in /sys/devices/system/memory/memory*/state; do \ |
59 | [ "`cat "$i"`" = offline ] && echo online > "$i"; done | ||
53 | 60 | ||
54 | SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" | 61 | or by adding the following line to udev rules: |
55 | 62 | ||
56 | In that case step 3 should be omitted. | 63 | SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" |
57 | 64 | ||
58 | config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | 65 | config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT |
59 | int "Hotplugged memory limit (in GiB) for a PV guest" | 66 | int "Hotplugged memory limit (in GiB) for a PV guest" |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index dc4305b407bf..7c8a2cf16f58 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -338,7 +338,16 @@ static enum bp_state reserve_additional_memory(void) | |||
338 | } | 338 | } |
339 | #endif | 339 | #endif |
340 | 340 | ||
341 | rc = add_memory_resource(nid, resource); | 341 | /* |
342 | * add_memory_resource() will call online_pages() which in its turn | ||
343 | * will call xen_online_page() callback causing deadlock if we don't | ||
344 | * release balloon_mutex here. Unlocking here is safe because the | ||
345 | * callers drop the mutex before trying again. | ||
346 | */ | ||
347 | mutex_unlock(&balloon_mutex); | ||
348 | rc = add_memory_resource(nid, resource, memhp_auto_online); | ||
349 | mutex_lock(&balloon_mutex); | ||
350 | |||
342 | if (rc) { | 351 | if (rc) { |
343 | pr_warn("Cannot add additional memory (%i)\n", rc); | 352 | pr_warn("Cannot add additional memory (%i)\n", rc); |
344 | goto err; | 353 | goto err; |
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 7dd46312c180..403fe3955393 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c | |||
@@ -38,8 +38,9 @@ | |||
38 | /* Find the first set bit in a evtchn mask */ | 38 | /* Find the first set bit in a evtchn mask */ |
39 | #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD) | 39 | #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD) |
40 | 40 | ||
41 | static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD], | 41 | #define EVTCHN_MASK_SIZE (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD) |
42 | cpu_evtchn_mask); | 42 | |
43 | static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_MASK_SIZE], cpu_evtchn_mask); | ||
43 | 44 | ||
44 | static unsigned evtchn_2l_max_channels(void) | 45 | static unsigned evtchn_2l_max_channels(void) |
45 | { | 46 | { |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c37149b929be..f0d268b97d19 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -1,15 +1,11 @@ | |||
1 | /* -*- c -*- ------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved |
3 | * linux/fs/autofs/autofs_i.h | 3 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> |
4 | * | ||
5 | * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved | ||
6 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> | ||
7 | * | 4 | * |
8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
11 | * | 8 | */ |
12 | * ----------------------------------------------------------------------- */ | ||
13 | 9 | ||
14 | /* Internal header file for autofs */ | 10 | /* Internal header file for autofs */ |
15 | 11 | ||
@@ -35,28 +31,23 @@ | |||
35 | #include <linux/mount.h> | 31 | #include <linux/mount.h> |
36 | #include <linux/namei.h> | 32 | #include <linux/namei.h> |
37 | #include <asm/current.h> | 33 | #include <asm/current.h> |
38 | #include <asm/uaccess.h> | 34 | #include <linux/uaccess.h> |
39 | 35 | ||
40 | /* #define DEBUG */ | 36 | /* #define DEBUG */ |
41 | 37 | ||
42 | #define DPRINTK(fmt, ...) \ | 38 | #ifdef pr_fmt |
43 | pr_debug("pid %d: %s: " fmt "\n", \ | 39 | #undef pr_fmt |
44 | current->pid, __func__, ##__VA_ARGS__) | 40 | #endif |
45 | 41 | #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ | |
46 | #define AUTOFS_WARN(fmt, ...) \ | 42 | |
47 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | 43 | /* |
48 | current->pid, __func__, ##__VA_ARGS__) | 44 | * Unified info structure. This is pointed to by both the dentry and |
49 | 45 | * inode structures. Each file in the filesystem has an instance of this | |
50 | #define AUTOFS_ERROR(fmt, ...) \ | 46 | * structure. It holds a reference to the dentry, so dentries are never |
51 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | 47 | * flushed while the file exists. All name lookups are dealt with at the |
52 | current->pid, __func__, ##__VA_ARGS__) | 48 | * dentry level, although the filesystem can interfere in the validation |
53 | 49 | * process. Readdir is implemented by traversing the dentry lists. | |
54 | /* Unified info structure. This is pointed to by both the dentry and | 50 | */ |
55 | inode structures. Each file in the filesystem has an instance of this | ||
56 | structure. It holds a reference to the dentry, so dentries are never | ||
57 | flushed while the file exists. All name lookups are dealt with at the | ||
58 | dentry level, although the filesystem can interfere in the validation | ||
59 | process. Readdir is implemented by traversing the dentry lists. */ | ||
60 | struct autofs_info { | 51 | struct autofs_info { |
61 | struct dentry *dentry; | 52 | struct dentry *dentry; |
62 | struct inode *inode; | 53 | struct inode *inode; |
@@ -78,7 +69,7 @@ struct autofs_info { | |||
78 | kgid_t gid; | 69 | kgid_t gid; |
79 | }; | 70 | }; |
80 | 71 | ||
81 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ | 72 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ |
82 | #define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered | 73 | #define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered |
83 | * for expiry, so RCU_walk is | 74 | * for expiry, so RCU_walk is |
84 | * not permitted | 75 | * not permitted |
@@ -140,10 +131,11 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) | |||
140 | } | 131 | } |
141 | 132 | ||
142 | /* autofs4_oz_mode(): do we see the man behind the curtain? (The | 133 | /* autofs4_oz_mode(): do we see the man behind the curtain? (The |
143 | processes which do manipulations for us in user space sees the raw | 134 | * processes which do manipulations for us in user space sees the raw |
144 | filesystem without "magic".) */ | 135 | * filesystem without "magic".) |
145 | 136 | */ | |
146 | static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { | 137 | static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) |
138 | { | ||
147 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; | 139 | return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; |
148 | } | 140 | } |
149 | 141 | ||
@@ -154,12 +146,12 @@ void autofs4_free_ino(struct autofs_info *); | |||
154 | int is_autofs4_dentry(struct dentry *); | 146 | int is_autofs4_dentry(struct dentry *); |
155 | int autofs4_expire_wait(struct dentry *dentry, int rcu_walk); | 147 | int autofs4_expire_wait(struct dentry *dentry, int rcu_walk); |
156 | int autofs4_expire_run(struct super_block *, struct vfsmount *, | 148 | int autofs4_expire_run(struct super_block *, struct vfsmount *, |
157 | struct autofs_sb_info *, | 149 | struct autofs_sb_info *, |
158 | struct autofs_packet_expire __user *); | 150 | struct autofs_packet_expire __user *); |
159 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 151 | int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
160 | struct autofs_sb_info *sbi, int when); | 152 | struct autofs_sb_info *sbi, int when); |
161 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, | 153 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, |
162 | struct autofs_sb_info *, int __user *); | 154 | struct autofs_sb_info *, int __user *); |
163 | struct dentry *autofs4_expire_direct(struct super_block *sb, | 155 | struct dentry *autofs4_expire_direct(struct super_block *sb, |
164 | struct vfsmount *mnt, | 156 | struct vfsmount *mnt, |
165 | struct autofs_sb_info *sbi, int how); | 157 | struct autofs_sb_info *sbi, int how); |
@@ -224,8 +216,8 @@ static inline int autofs_prepare_pipe(struct file *pipe) | |||
224 | 216 | ||
225 | /* Queue management functions */ | 217 | /* Queue management functions */ |
226 | 218 | ||
227 | int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); | 219 | int autofs4_wait(struct autofs_sb_info *, struct dentry *, enum autofs_notify); |
228 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); | 220 | int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); |
229 | void autofs4_catatonic_mode(struct autofs_sb_info *); | 221 | void autofs4_catatonic_mode(struct autofs_sb_info *); |
230 | 222 | ||
231 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) | 223 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) |
@@ -242,37 +234,37 @@ static inline void __autofs4_add_expiring(struct dentry *dentry) | |||
242 | { | 234 | { |
243 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 235 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
244 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 236 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
237 | |||
245 | if (ino) { | 238 | if (ino) { |
246 | if (list_empty(&ino->expiring)) | 239 | if (list_empty(&ino->expiring)) |
247 | list_add(&ino->expiring, &sbi->expiring_list); | 240 | list_add(&ino->expiring, &sbi->expiring_list); |
248 | } | 241 | } |
249 | return; | ||
250 | } | 242 | } |
251 | 243 | ||
252 | static inline void autofs4_add_expiring(struct dentry *dentry) | 244 | static inline void autofs4_add_expiring(struct dentry *dentry) |
253 | { | 245 | { |
254 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 246 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
255 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 247 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
248 | |||
256 | if (ino) { | 249 | if (ino) { |
257 | spin_lock(&sbi->lookup_lock); | 250 | spin_lock(&sbi->lookup_lock); |
258 | if (list_empty(&ino->expiring)) | 251 | if (list_empty(&ino->expiring)) |
259 | list_add(&ino->expiring, &sbi->expiring_list); | 252 | list_add(&ino->expiring, &sbi->expiring_list); |
260 | spin_unlock(&sbi->lookup_lock); | 253 | spin_unlock(&sbi->lookup_lock); |
261 | } | 254 | } |
262 | return; | ||
263 | } | 255 | } |
264 | 256 | ||
265 | static inline void autofs4_del_expiring(struct dentry *dentry) | 257 | static inline void autofs4_del_expiring(struct dentry *dentry) |
266 | { | 258 | { |
267 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 259 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
268 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 260 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
261 | |||
269 | if (ino) { | 262 | if (ino) { |
270 | spin_lock(&sbi->lookup_lock); | 263 | spin_lock(&sbi->lookup_lock); |
271 | if (!list_empty(&ino->expiring)) | 264 | if (!list_empty(&ino->expiring)) |
272 | list_del_init(&ino->expiring); | 265 | list_del_init(&ino->expiring); |
273 | spin_unlock(&sbi->lookup_lock); | 266 | spin_unlock(&sbi->lookup_lock); |
274 | } | 267 | } |
275 | return; | ||
276 | } | 268 | } |
277 | 269 | ||
278 | extern void autofs4_kill_sb(struct super_block *); | 270 | extern void autofs4_kill_sb(struct super_block *); |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index ac7d921ed984..c7fcc7438843 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -72,13 +72,13 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
72 | { | 72 | { |
73 | int err = 0; | 73 | int err = 0; |
74 | 74 | ||
75 | if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) || | 75 | if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) || |
76 | (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) { | 76 | (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) { |
77 | AUTOFS_WARN("ioctl control interface version mismatch: " | 77 | pr_warn("ioctl control interface version mismatch: " |
78 | "kernel(%u.%u), user(%u.%u), cmd(%d)", | 78 | "kernel(%u.%u), user(%u.%u), cmd(%d)\n", |
79 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, | 79 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, |
80 | AUTOFS_DEV_IOCTL_VERSION_MINOR, | 80 | AUTOFS_DEV_IOCTL_VERSION_MINOR, |
81 | param->ver_major, param->ver_minor, cmd); | 81 | param->ver_major, param->ver_minor, cmd); |
82 | err = -EINVAL; | 82 | err = -EINVAL; |
83 | } | 83 | } |
84 | 84 | ||
@@ -93,7 +93,8 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
93 | * Copy parameter control struct, including a possible path allocated | 93 | * Copy parameter control struct, including a possible path allocated |
94 | * at the end of the struct. | 94 | * at the end of the struct. |
95 | */ | 95 | */ |
96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | 96 | static struct autofs_dev_ioctl * |
97 | copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | ||
97 | { | 98 | { |
98 | struct autofs_dev_ioctl tmp, *res; | 99 | struct autofs_dev_ioctl tmp, *res; |
99 | 100 | ||
@@ -116,7 +117,6 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i | |||
116 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | 117 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) |
117 | { | 118 | { |
118 | kfree(param); | 119 | kfree(param); |
119 | return; | ||
120 | } | 120 | } |
121 | 121 | ||
122 | /* | 122 | /* |
@@ -129,24 +129,24 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | |||
129 | 129 | ||
130 | err = check_dev_ioctl_version(cmd, param); | 130 | err = check_dev_ioctl_version(cmd, param); |
131 | if (err) { | 131 | if (err) { |
132 | AUTOFS_WARN("invalid device control module version " | 132 | pr_warn("invalid device control module version " |
133 | "supplied for cmd(0x%08x)", cmd); | 133 | "supplied for cmd(0x%08x)\n", cmd); |
134 | goto out; | 134 | goto out; |
135 | } | 135 | } |
136 | 136 | ||
137 | if (param->size > sizeof(*param)) { | 137 | if (param->size > sizeof(*param)) { |
138 | err = invalid_str(param->path, param->size - sizeof(*param)); | 138 | err = invalid_str(param->path, param->size - sizeof(*param)); |
139 | if (err) { | 139 | if (err) { |
140 | AUTOFS_WARN( | 140 | pr_warn( |
141 | "path string terminator missing for cmd(0x%08x)", | 141 | "path string terminator missing for cmd(0x%08x)\n", |
142 | cmd); | 142 | cmd); |
143 | goto out; | 143 | goto out; |
144 | } | 144 | } |
145 | 145 | ||
146 | err = check_name(param->path); | 146 | err = check_name(param->path); |
147 | if (err) { | 147 | if (err) { |
148 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | 148 | pr_warn("invalid path supplied for cmd(0x%08x)\n", |
149 | cmd); | 149 | cmd); |
150 | goto out; | 150 | goto out; |
151 | } | 151 | } |
152 | } | 152 | } |
@@ -197,7 +197,9 @@ static int find_autofs_mount(const char *pathname, | |||
197 | void *data) | 197 | void *data) |
198 | { | 198 | { |
199 | struct path path; | 199 | struct path path; |
200 | int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); | 200 | int err; |
201 | |||
202 | err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); | ||
201 | if (err) | 203 | if (err) |
202 | return err; | 204 | return err; |
203 | err = -ENOENT; | 205 | err = -ENOENT; |
@@ -225,6 +227,7 @@ static int test_by_dev(struct path *path, void *p) | |||
225 | static int test_by_type(struct path *path, void *p) | 227 | static int test_by_type(struct path *path, void *p) |
226 | { | 228 | { |
227 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); | 229 | struct autofs_info *ino = autofs4_dentry_ino(path->dentry); |
230 | |||
228 | return ino && ino->sbi->type & *(unsigned *)p; | 231 | return ino && ino->sbi->type & *(unsigned *)p; |
229 | } | 232 | } |
230 | 233 | ||
@@ -370,7 +373,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, | |||
370 | new_pid = get_task_pid(current, PIDTYPE_PGID); | 373 | new_pid = get_task_pid(current, PIDTYPE_PGID); |
371 | 374 | ||
372 | if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { | 375 | if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { |
373 | AUTOFS_WARN("Not allowed to change PID namespace"); | 376 | pr_warn("not allowed to change PID namespace\n"); |
374 | err = -EINVAL; | 377 | err = -EINVAL; |
375 | goto out; | 378 | goto out; |
376 | } | 379 | } |
@@ -456,8 +459,10 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
456 | err = 0; | 459 | err = 0; |
457 | autofs4_expire_wait(path.dentry, 0); | 460 | autofs4_expire_wait(path.dentry, 0); |
458 | spin_lock(&sbi->fs_lock); | 461 | spin_lock(&sbi->fs_lock); |
459 | param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); | 462 | param->requester.uid = |
460 | param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); | 463 | from_kuid_munged(current_user_ns(), ino->uid); |
464 | param->requester.gid = | ||
465 | from_kgid_munged(current_user_ns(), ino->gid); | ||
461 | spin_unlock(&sbi->fs_lock); | 466 | spin_unlock(&sbi->fs_lock); |
462 | } | 467 | } |
463 | path_put(&path); | 468 | path_put(&path); |
@@ -619,7 +624,8 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd) | |||
619 | } | 624 | } |
620 | 625 | ||
621 | /* ioctl dispatcher */ | 626 | /* ioctl dispatcher */ |
622 | static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) | 627 | static int _autofs_dev_ioctl(unsigned int command, |
628 | struct autofs_dev_ioctl __user *user) | ||
623 | { | 629 | { |
624 | struct autofs_dev_ioctl *param; | 630 | struct autofs_dev_ioctl *param; |
625 | struct file *fp; | 631 | struct file *fp; |
@@ -655,7 +661,7 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use | |||
655 | 661 | ||
656 | fn = lookup_dev_ioctl(cmd); | 662 | fn = lookup_dev_ioctl(cmd); |
657 | if (!fn) { | 663 | if (!fn) { |
658 | AUTOFS_WARN("unknown command 0x%08x", command); | 664 | pr_warn("unknown command 0x%08x\n", command); |
659 | return -ENOTTY; | 665 | return -ENOTTY; |
660 | } | 666 | } |
661 | 667 | ||
@@ -711,6 +717,7 @@ out: | |||
711 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) | 717 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) |
712 | { | 718 | { |
713 | int err; | 719 | int err; |
720 | |||
714 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); | 721 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); |
715 | return (long) err; | 722 | return (long) err; |
716 | } | 723 | } |
@@ -733,8 +740,8 @@ static const struct file_operations _dev_ioctl_fops = { | |||
733 | 740 | ||
734 | static struct miscdevice _autofs_dev_ioctl_misc = { | 741 | static struct miscdevice _autofs_dev_ioctl_misc = { |
735 | .minor = AUTOFS_MINOR, | 742 | .minor = AUTOFS_MINOR, |
736 | .name = AUTOFS_DEVICE_NAME, | 743 | .name = AUTOFS_DEVICE_NAME, |
737 | .fops = &_dev_ioctl_fops | 744 | .fops = &_dev_ioctl_fops |
738 | }; | 745 | }; |
739 | 746 | ||
740 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); | 747 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); |
@@ -747,7 +754,7 @@ int __init autofs_dev_ioctl_init(void) | |||
747 | 754 | ||
748 | r = misc_register(&_autofs_dev_ioctl_misc); | 755 | r = misc_register(&_autofs_dev_ioctl_misc); |
749 | if (r) { | 756 | if (r) { |
750 | AUTOFS_ERROR("misc_register failed for control device"); | 757 | pr_err("misc_register failed for control device\n"); |
751 | return r; | 758 | return r; |
752 | } | 759 | } |
753 | 760 | ||
@@ -757,6 +764,4 @@ int __init autofs_dev_ioctl_init(void) | |||
757 | void autofs_dev_ioctl_exit(void) | 764 | void autofs_dev_ioctl_exit(void) |
758 | { | 765 | { |
759 | misc_deregister(&_autofs_dev_ioctl_misc); | 766 | misc_deregister(&_autofs_dev_ioctl_misc); |
760 | return; | ||
761 | } | 767 | } |
762 | |||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1cebc3c52fa5..9510d8d2e9cd 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -1,16 +1,12 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/expire.c | 3 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
4 | * | 4 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | ||
7 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
8 | * | 5 | * |
9 | * This file is part of the Linux kernel and is made available under | 6 | * This file is part of the Linux kernel and is made available under |
10 | * the terms of the GNU General Public License, version 2, or at your | 7 | * the terms of the GNU General Public License, version 2, or at your |
11 | * option, any later version, incorporated herein by reference. | 8 | * option, any later version, incorporated herein by reference. |
12 | * | 9 | */ |
13 | * ------------------------------------------------------------------------- */ | ||
14 | 10 | ||
15 | #include "autofs_i.h" | 11 | #include "autofs_i.h" |
16 | 12 | ||
@@ -18,7 +14,7 @@ static unsigned long now; | |||
18 | 14 | ||
19 | /* Check if a dentry can be expired */ | 15 | /* Check if a dentry can be expired */ |
20 | static inline int autofs4_can_expire(struct dentry *dentry, | 16 | static inline int autofs4_can_expire(struct dentry *dentry, |
21 | unsigned long timeout, int do_now) | 17 | unsigned long timeout, int do_now) |
22 | { | 18 | { |
23 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 19 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
24 | 20 | ||
@@ -41,7 +37,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
41 | struct path path = {.mnt = mnt, .dentry = dentry}; | 37 | struct path path = {.mnt = mnt, .dentry = dentry}; |
42 | int status = 1; | 38 | int status = 1; |
43 | 39 | ||
44 | DPRINTK("dentry %p %pd", dentry, dentry); | 40 | pr_debug("dentry %p %pd\n", dentry, dentry); |
45 | 41 | ||
46 | path_get(&path); | 42 | path_get(&path); |
47 | 43 | ||
@@ -58,14 +54,16 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
58 | 54 | ||
59 | /* Update the expiry counter if fs is busy */ | 55 | /* Update the expiry counter if fs is busy */ |
60 | if (!may_umount_tree(path.mnt)) { | 56 | if (!may_umount_tree(path.mnt)) { |
61 | struct autofs_info *ino = autofs4_dentry_ino(top); | 57 | struct autofs_info *ino; |
58 | |||
59 | ino = autofs4_dentry_ino(top); | ||
62 | ino->last_used = jiffies; | 60 | ino->last_used = jiffies; |
63 | goto done; | 61 | goto done; |
64 | } | 62 | } |
65 | 63 | ||
66 | status = 0; | 64 | status = 0; |
67 | done: | 65 | done: |
68 | DPRINTK("returning = %d", status); | 66 | pr_debug("returning = %d\n", status); |
69 | path_put(&path); | 67 | path_put(&path); |
70 | return status; | 68 | return status; |
71 | } | 69 | } |
@@ -74,7 +72,7 @@ done: | |||
74 | * Calculate and dget next entry in the subdirs list under root. | 72 | * Calculate and dget next entry in the subdirs list under root. |
75 | */ | 73 | */ |
76 | static struct dentry *get_next_positive_subdir(struct dentry *prev, | 74 | static struct dentry *get_next_positive_subdir(struct dentry *prev, |
77 | struct dentry *root) | 75 | struct dentry *root) |
78 | { | 76 | { |
79 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 77 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); |
80 | struct list_head *next; | 78 | struct list_head *next; |
@@ -121,7 +119,7 @@ cont: | |||
121 | * Calculate and dget next entry in top down tree traversal. | 119 | * Calculate and dget next entry in top down tree traversal. |
122 | */ | 120 | */ |
123 | static struct dentry *get_next_positive_dentry(struct dentry *prev, | 121 | static struct dentry *get_next_positive_dentry(struct dentry *prev, |
124 | struct dentry *root) | 122 | struct dentry *root) |
125 | { | 123 | { |
126 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | 124 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); |
127 | struct list_head *next; | 125 | struct list_head *next; |
@@ -187,15 +185,17 @@ again: | |||
187 | * autofs submounts. | 185 | * autofs submounts. |
188 | */ | 186 | */ |
189 | static int autofs4_direct_busy(struct vfsmount *mnt, | 187 | static int autofs4_direct_busy(struct vfsmount *mnt, |
190 | struct dentry *top, | 188 | struct dentry *top, |
191 | unsigned long timeout, | 189 | unsigned long timeout, |
192 | int do_now) | 190 | int do_now) |
193 | { | 191 | { |
194 | DPRINTK("top %p %pd", top, top); | 192 | pr_debug("top %p %pd\n", top, top); |
195 | 193 | ||
196 | /* If it's busy update the expiry counters */ | 194 | /* If it's busy update the expiry counters */ |
197 | if (!may_umount_tree(mnt)) { | 195 | if (!may_umount_tree(mnt)) { |
198 | struct autofs_info *ino = autofs4_dentry_ino(top); | 196 | struct autofs_info *ino; |
197 | |||
198 | ino = autofs4_dentry_ino(top); | ||
199 | if (ino) | 199 | if (ino) |
200 | ino->last_used = jiffies; | 200 | ino->last_used = jiffies; |
201 | return 1; | 201 | return 1; |
@@ -208,7 +208,8 @@ static int autofs4_direct_busy(struct vfsmount *mnt, | |||
208 | return 0; | 208 | return 0; |
209 | } | 209 | } |
210 | 210 | ||
211 | /* Check a directory tree of mount points for busyness | 211 | /* |
212 | * Check a directory tree of mount points for busyness | ||
212 | * The tree is not busy iff no mountpoints are busy | 213 | * The tree is not busy iff no mountpoints are busy |
213 | */ | 214 | */ |
214 | static int autofs4_tree_busy(struct vfsmount *mnt, | 215 | static int autofs4_tree_busy(struct vfsmount *mnt, |
@@ -219,7 +220,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
219 | struct autofs_info *top_ino = autofs4_dentry_ino(top); | 220 | struct autofs_info *top_ino = autofs4_dentry_ino(top); |
220 | struct dentry *p; | 221 | struct dentry *p; |
221 | 222 | ||
222 | DPRINTK("top %p %pd", top, top); | 223 | pr_debug("top %p %pd\n", top, top); |
223 | 224 | ||
224 | /* Negative dentry - give up */ | 225 | /* Negative dentry - give up */ |
225 | if (!simple_positive(top)) | 226 | if (!simple_positive(top)) |
@@ -227,7 +228,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
227 | 228 | ||
228 | p = NULL; | 229 | p = NULL; |
229 | while ((p = get_next_positive_dentry(p, top))) { | 230 | while ((p = get_next_positive_dentry(p, top))) { |
230 | DPRINTK("dentry %p %pd", p, p); | 231 | pr_debug("dentry %p %pd\n", p, p); |
231 | 232 | ||
232 | /* | 233 | /* |
233 | * Is someone visiting anywhere in the subtree ? | 234 | * Is someone visiting anywhere in the subtree ? |
@@ -273,11 +274,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, | |||
273 | { | 274 | { |
274 | struct dentry *p; | 275 | struct dentry *p; |
275 | 276 | ||
276 | DPRINTK("parent %p %pd", parent, parent); | 277 | pr_debug("parent %p %pd\n", parent, parent); |
277 | 278 | ||
278 | p = NULL; | 279 | p = NULL; |
279 | while ((p = get_next_positive_dentry(p, parent))) { | 280 | while ((p = get_next_positive_dentry(p, parent))) { |
280 | DPRINTK("dentry %p %pd", p, p); | 281 | pr_debug("dentry %p %pd\n", p, p); |
281 | 282 | ||
282 | if (d_mountpoint(p)) { | 283 | if (d_mountpoint(p)) { |
283 | /* Can we umount this guy */ | 284 | /* Can we umount this guy */ |
@@ -362,7 +363,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
362 | * offset (autofs-5.0+). | 363 | * offset (autofs-5.0+). |
363 | */ | 364 | */ |
364 | if (d_mountpoint(dentry)) { | 365 | if (d_mountpoint(dentry)) { |
365 | DPRINTK("checking mountpoint %p %pd", dentry, dentry); | 366 | pr_debug("checking mountpoint %p %pd\n", dentry, dentry); |
366 | 367 | ||
367 | /* Can we umount this guy */ | 368 | /* Can we umount this guy */ |
368 | if (autofs4_mount_busy(mnt, dentry)) | 369 | if (autofs4_mount_busy(mnt, dentry)) |
@@ -375,7 +376,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
375 | } | 376 | } |
376 | 377 | ||
377 | if (d_really_is_positive(dentry) && d_is_symlink(dentry)) { | 378 | if (d_really_is_positive(dentry) && d_is_symlink(dentry)) { |
378 | DPRINTK("checking symlink %p %pd", dentry, dentry); | 379 | pr_debug("checking symlink %p %pd\n", dentry, dentry); |
379 | /* | 380 | /* |
380 | * A symlink can't be "busy" in the usual sense so | 381 | * A symlink can't be "busy" in the usual sense so |
381 | * just check last used for expire timeout. | 382 | * just check last used for expire timeout. |
@@ -404,6 +405,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
404 | } else { | 405 | } else { |
405 | /* Path walk currently on this dentry? */ | 406 | /* Path walk currently on this dentry? */ |
406 | struct dentry *expired; | 407 | struct dentry *expired; |
408 | |||
407 | ino_count = atomic_read(&ino->count) + 1; | 409 | ino_count = atomic_read(&ino->count) + 1; |
408 | if (d_count(dentry) > ino_count) | 410 | if (d_count(dentry) > ino_count) |
409 | return NULL; | 411 | return NULL; |
@@ -471,7 +473,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
471 | return NULL; | 473 | return NULL; |
472 | 474 | ||
473 | found: | 475 | found: |
474 | DPRINTK("returning %p %pd", expired, expired); | 476 | pr_debug("returning %p %pd\n", expired, expired); |
475 | ino->flags |= AUTOFS_INF_EXPIRING; | 477 | ino->flags |= AUTOFS_INF_EXPIRING; |
476 | smp_mb(); | 478 | smp_mb(); |
477 | ino->flags &= ~AUTOFS_INF_NO_RCU; | 479 | ino->flags &= ~AUTOFS_INF_NO_RCU; |
@@ -503,12 +505,12 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
503 | if (ino->flags & AUTOFS_INF_EXPIRING) { | 505 | if (ino->flags & AUTOFS_INF_EXPIRING) { |
504 | spin_unlock(&sbi->fs_lock); | 506 | spin_unlock(&sbi->fs_lock); |
505 | 507 | ||
506 | DPRINTK("waiting for expire %p name=%pd", dentry, dentry); | 508 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); |
507 | 509 | ||
508 | status = autofs4_wait(sbi, dentry, NFY_NONE); | 510 | status = autofs4_wait(sbi, dentry, NFY_NONE); |
509 | wait_for_completion(&ino->expire_complete); | 511 | wait_for_completion(&ino->expire_complete); |
510 | 512 | ||
511 | DPRINTK("expire done status=%d", status); | 513 | pr_debug("expire done status=%d\n", status); |
512 | 514 | ||
513 | if (d_unhashed(dentry)) | 515 | if (d_unhashed(dentry)) |
514 | return -EAGAIN; | 516 | return -EAGAIN; |
@@ -522,21 +524,22 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
522 | 524 | ||
523 | /* Perform an expiry operation */ | 525 | /* Perform an expiry operation */ |
524 | int autofs4_expire_run(struct super_block *sb, | 526 | int autofs4_expire_run(struct super_block *sb, |
525 | struct vfsmount *mnt, | 527 | struct vfsmount *mnt, |
526 | struct autofs_sb_info *sbi, | 528 | struct autofs_sb_info *sbi, |
527 | struct autofs_packet_expire __user *pkt_p) | 529 | struct autofs_packet_expire __user *pkt_p) |
528 | { | 530 | { |
529 | struct autofs_packet_expire pkt; | 531 | struct autofs_packet_expire pkt; |
530 | struct autofs_info *ino; | 532 | struct autofs_info *ino; |
531 | struct dentry *dentry; | 533 | struct dentry *dentry; |
532 | int ret = 0; | 534 | int ret = 0; |
533 | 535 | ||
534 | memset(&pkt,0,sizeof pkt); | 536 | memset(&pkt, 0, sizeof(pkt)); |
535 | 537 | ||
536 | pkt.hdr.proto_version = sbi->version; | 538 | pkt.hdr.proto_version = sbi->version; |
537 | pkt.hdr.type = autofs_ptype_expire; | 539 | pkt.hdr.type = autofs_ptype_expire; |
538 | 540 | ||
539 | if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL) | 541 | dentry = autofs4_expire_indirect(sb, mnt, sbi, 0); |
542 | if (!dentry) | ||
540 | return -EAGAIN; | 543 | return -EAGAIN; |
541 | 544 | ||
542 | pkt.len = dentry->d_name.len; | 545 | pkt.len = dentry->d_name.len; |
@@ -544,7 +547,7 @@ int autofs4_expire_run(struct super_block *sb, | |||
544 | pkt.name[pkt.len] = '\0'; | 547 | pkt.name[pkt.len] = '\0'; |
545 | dput(dentry); | 548 | dput(dentry); |
546 | 549 | ||
547 | if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) | 550 | if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) |
548 | ret = -EFAULT; | 551 | ret = -EFAULT; |
549 | 552 | ||
550 | spin_lock(&sbi->fs_lock); | 553 | spin_lock(&sbi->fs_lock); |
@@ -573,7 +576,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
573 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 576 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
574 | 577 | ||
575 | /* This is synchronous because it makes the daemon a | 578 | /* This is synchronous because it makes the daemon a |
576 | little easier */ | 579 | * little easier |
580 | */ | ||
577 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); | 581 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); |
578 | 582 | ||
579 | spin_lock(&sbi->fs_lock); | 583 | spin_lock(&sbi->fs_lock); |
@@ -588,8 +592,10 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
588 | return ret; | 592 | return ret; |
589 | } | 593 | } |
590 | 594 | ||
591 | /* Call repeatedly until it returns -EAGAIN, meaning there's nothing | 595 | /* |
592 | more to be done */ | 596 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing |
597 | * more to be done. | ||
598 | */ | ||
593 | int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | 599 | int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, |
594 | struct autofs_sb_info *sbi, int __user *arg) | 600 | struct autofs_sb_info *sbi, int __user *arg) |
595 | { | 601 | { |
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index b3db517e89ec..8cf0e63389ae 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c | |||
@@ -1,14 +1,10 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/init.c | ||
4 | * | ||
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * | 3 | * |
7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
10 | * | 7 | */ |
11 | * ------------------------------------------------------------------------- */ | ||
12 | 8 | ||
13 | #include <linux/module.h> | 9 | #include <linux/module.h> |
14 | #include <linux/init.h> | 10 | #include <linux/init.h> |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index a3ae0b2aeb5a..61b21051bd5a 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -1,15 +1,11 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/inode.c | 3 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> |
4 | * | ||
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * Copyright 2005-2006 Ian Kent <raven@themaw.net> | ||
7 | * | 4 | * |
8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
11 | * | 8 | */ |
12 | * ------------------------------------------------------------------------- */ | ||
13 | 9 | ||
14 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
15 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
@@ -24,7 +20,9 @@ | |||
24 | 20 | ||
25 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) | 21 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) |
26 | { | 22 | { |
27 | struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL); | 23 | struct autofs_info *ino; |
24 | |||
25 | ino = kzalloc(sizeof(*ino), GFP_KERNEL); | ||
28 | if (ino) { | 26 | if (ino) { |
29 | INIT_LIST_HEAD(&ino->active); | 27 | INIT_LIST_HEAD(&ino->active); |
30 | INIT_LIST_HEAD(&ino->expiring); | 28 | INIT_LIST_HEAD(&ino->expiring); |
@@ -62,7 +60,7 @@ void autofs4_kill_sb(struct super_block *sb) | |||
62 | put_pid(sbi->oz_pgrp); | 60 | put_pid(sbi->oz_pgrp); |
63 | } | 61 | } |
64 | 62 | ||
65 | DPRINTK("shutting down"); | 63 | pr_debug("shutting down\n"); |
66 | kill_litter_super(sb); | 64 | kill_litter_super(sb); |
67 | if (sbi) | 65 | if (sbi) |
68 | kfree_rcu(sbi, rcu); | 66 | kfree_rcu(sbi, rcu); |
@@ -94,7 +92,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) | |||
94 | seq_printf(m, ",direct"); | 92 | seq_printf(m, ",direct"); |
95 | else | 93 | else |
96 | seq_printf(m, ",indirect"); | 94 | seq_printf(m, ",indirect"); |
97 | 95 | #ifdef CONFIG_CHECKPOINT_RESTORE | |
96 | if (sbi->pipe) | ||
97 | seq_printf(m, ",pipe_ino=%ld", sbi->pipe->f_inode->i_ino); | ||
98 | else | ||
99 | seq_printf(m, ",pipe_ino=-1"); | ||
100 | #endif | ||
98 | return 0; | 101 | return 0; |
99 | } | 102 | } |
100 | 103 | ||
@@ -147,6 +150,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, | |||
147 | 150 | ||
148 | while ((p = strsep(&options, ",")) != NULL) { | 151 | while ((p = strsep(&options, ",")) != NULL) { |
149 | int token; | 152 | int token; |
153 | |||
150 | if (!*p) | 154 | if (!*p) |
151 | continue; | 155 | continue; |
152 | 156 | ||
@@ -204,9 +208,9 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, | |||
204 | 208 | ||
205 | int autofs4_fill_super(struct super_block *s, void *data, int silent) | 209 | int autofs4_fill_super(struct super_block *s, void *data, int silent) |
206 | { | 210 | { |
207 | struct inode * root_inode; | 211 | struct inode *root_inode; |
208 | struct dentry * root; | 212 | struct dentry *root; |
209 | struct file * pipe; | 213 | struct file *pipe; |
210 | int pipefd; | 214 | int pipefd; |
211 | struct autofs_sb_info *sbi; | 215 | struct autofs_sb_info *sbi; |
212 | struct autofs_info *ino; | 216 | struct autofs_info *ino; |
@@ -217,7 +221,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
217 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 221 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
218 | if (!sbi) | 222 | if (!sbi) |
219 | return -ENOMEM; | 223 | return -ENOMEM; |
220 | DPRINTK("starting up, sbi = %p",sbi); | 224 | pr_debug("starting up, sbi = %p\n", sbi); |
221 | 225 | ||
222 | s->s_fs_info = sbi; | 226 | s->s_fs_info = sbi; |
223 | sbi->magic = AUTOFS_SBI_MAGIC; | 227 | sbi->magic = AUTOFS_SBI_MAGIC; |
@@ -266,14 +270,14 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
266 | if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, | 270 | if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, |
267 | &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, | 271 | &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, |
268 | &sbi->max_proto)) { | 272 | &sbi->max_proto)) { |
269 | printk("autofs: called with bogus options\n"); | 273 | pr_err("called with bogus options\n"); |
270 | goto fail_dput; | 274 | goto fail_dput; |
271 | } | 275 | } |
272 | 276 | ||
273 | if (pgrp_set) { | 277 | if (pgrp_set) { |
274 | sbi->oz_pgrp = find_get_pid(pgrp); | 278 | sbi->oz_pgrp = find_get_pid(pgrp); |
275 | if (!sbi->oz_pgrp) { | 279 | if (!sbi->oz_pgrp) { |
276 | pr_warn("autofs: could not find process group %d\n", | 280 | pr_err("could not find process group %d\n", |
277 | pgrp); | 281 | pgrp); |
278 | goto fail_dput; | 282 | goto fail_dput; |
279 | } | 283 | } |
@@ -290,10 +294,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
290 | /* Couldn't this be tested earlier? */ | 294 | /* Couldn't this be tested earlier? */ |
291 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || | 295 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || |
292 | sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { | 296 | sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) { |
293 | printk("autofs: kernel does not match daemon version " | 297 | pr_err("kernel does not match daemon version " |
294 | "daemon (%d, %d) kernel (%d, %d)\n", | 298 | "daemon (%d, %d) kernel (%d, %d)\n", |
295 | sbi->min_proto, sbi->max_proto, | 299 | sbi->min_proto, sbi->max_proto, |
296 | AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); | 300 | AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION); |
297 | goto fail_dput; | 301 | goto fail_dput; |
298 | } | 302 | } |
299 | 303 | ||
@@ -304,11 +308,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
304 | sbi->version = sbi->max_proto; | 308 | sbi->version = sbi->max_proto; |
305 | sbi->sub_version = AUTOFS_PROTO_SUBVERSION; | 309 | sbi->sub_version = AUTOFS_PROTO_SUBVERSION; |
306 | 310 | ||
307 | DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp)); | 311 | pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); |
308 | pipe = fget(pipefd); | 312 | pipe = fget(pipefd); |
309 | 313 | ||
310 | if (!pipe) { | 314 | if (!pipe) { |
311 | printk("autofs: could not open pipe file descriptor\n"); | 315 | pr_err("could not open pipe file descriptor\n"); |
312 | goto fail_dput; | 316 | goto fail_dput; |
313 | } | 317 | } |
314 | ret = autofs_prepare_pipe(pipe); | 318 | ret = autofs_prepare_pipe(pipe); |
@@ -323,12 +327,12 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
323 | */ | 327 | */ |
324 | s->s_root = root; | 328 | s->s_root = root; |
325 | return 0; | 329 | return 0; |
326 | 330 | ||
327 | /* | 331 | /* |
328 | * Failure ... clean up. | 332 | * Failure ... clean up. |
329 | */ | 333 | */ |
330 | fail_fput: | 334 | fail_fput: |
331 | printk("autofs: pipe file descriptor does not contain proper ops\n"); | 335 | pr_err("pipe file descriptor does not contain proper ops\n"); |
332 | fput(pipe); | 336 | fput(pipe); |
333 | /* fall through */ | 337 | /* fall through */ |
334 | fail_dput: | 338 | fail_dput: |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c6d7d3dbd52a..9328b5861c7a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -1,16 +1,12 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/root.c | 3 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
4 | * | 4 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | ||
7 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
8 | * | 5 | * |
9 | * This file is part of the Linux kernel and is made available under | 6 | * This file is part of the Linux kernel and is made available under |
10 | * the terms of the GNU General Public License, version 2, or at your | 7 | * the terms of the GNU General Public License, version 2, or at your |
11 | * option, any later version, incorporated herein by reference. | 8 | * option, any later version, incorporated herein by reference. |
12 | * | 9 | */ |
13 | * ------------------------------------------------------------------------- */ | ||
14 | 10 | ||
15 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
16 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
@@ -23,16 +19,18 @@ | |||
23 | 19 | ||
24 | #include "autofs_i.h" | 20 | #include "autofs_i.h" |
25 | 21 | ||
26 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 22 | static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *); |
27 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 23 | static int autofs4_dir_unlink(struct inode *, struct dentry *); |
28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 24 | static int autofs4_dir_rmdir(struct inode *, struct dentry *); |
29 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t); | 25 | static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t); |
30 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); | 26 | static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long); |
31 | #ifdef CONFIG_COMPAT | 27 | #ifdef CONFIG_COMPAT |
32 | static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); | 28 | static long autofs4_root_compat_ioctl(struct file *, |
29 | unsigned int, unsigned long); | ||
33 | #endif | 30 | #endif |
34 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 31 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
35 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int); | 32 | static struct dentry *autofs4_lookup(struct inode *, |
33 | struct dentry *, unsigned int); | ||
36 | static struct vfsmount *autofs4_d_automount(struct path *); | 34 | static struct vfsmount *autofs4_d_automount(struct path *); |
37 | static int autofs4_d_manage(struct dentry *, bool); | 35 | static int autofs4_d_manage(struct dentry *, bool); |
38 | static void autofs4_dentry_release(struct dentry *); | 36 | static void autofs4_dentry_release(struct dentry *); |
@@ -74,7 +72,9 @@ const struct dentry_operations autofs4_dentry_operations = { | |||
74 | static void autofs4_add_active(struct dentry *dentry) | 72 | static void autofs4_add_active(struct dentry *dentry) |
75 | { | 73 | { |
76 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 74 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
77 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 75 | struct autofs_info *ino; |
76 | |||
77 | ino = autofs4_dentry_ino(dentry); | ||
78 | if (ino) { | 78 | if (ino) { |
79 | spin_lock(&sbi->lookup_lock); | 79 | spin_lock(&sbi->lookup_lock); |
80 | if (!ino->active_count) { | 80 | if (!ino->active_count) { |
@@ -84,13 +84,14 @@ static void autofs4_add_active(struct dentry *dentry) | |||
84 | ino->active_count++; | 84 | ino->active_count++; |
85 | spin_unlock(&sbi->lookup_lock); | 85 | spin_unlock(&sbi->lookup_lock); |
86 | } | 86 | } |
87 | return; | ||
88 | } | 87 | } |
89 | 88 | ||
90 | static void autofs4_del_active(struct dentry *dentry) | 89 | static void autofs4_del_active(struct dentry *dentry) |
91 | { | 90 | { |
92 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 91 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
93 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 92 | struct autofs_info *ino; |
93 | |||
94 | ino = autofs4_dentry_ino(dentry); | ||
94 | if (ino) { | 95 | if (ino) { |
95 | spin_lock(&sbi->lookup_lock); | 96 | spin_lock(&sbi->lookup_lock); |
96 | ino->active_count--; | 97 | ino->active_count--; |
@@ -100,7 +101,6 @@ static void autofs4_del_active(struct dentry *dentry) | |||
100 | } | 101 | } |
101 | spin_unlock(&sbi->lookup_lock); | 102 | spin_unlock(&sbi->lookup_lock); |
102 | } | 103 | } |
103 | return; | ||
104 | } | 104 | } |
105 | 105 | ||
106 | static int autofs4_dir_open(struct inode *inode, struct file *file) | 106 | static int autofs4_dir_open(struct inode *inode, struct file *file) |
@@ -108,7 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) | |||
108 | struct dentry *dentry = file->f_path.dentry; | 108 | struct dentry *dentry = file->f_path.dentry; |
109 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 109 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
110 | 110 | ||
111 | DPRINTK("file=%p dentry=%p %pd", file, dentry, dentry); | 111 | pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); |
112 | 112 | ||
113 | if (autofs4_oz_mode(sbi)) | 113 | if (autofs4_oz_mode(sbi)) |
114 | goto out; | 114 | goto out; |
@@ -138,7 +138,7 @@ static void autofs4_dentry_release(struct dentry *de) | |||
138 | struct autofs_info *ino = autofs4_dentry_ino(de); | 138 | struct autofs_info *ino = autofs4_dentry_ino(de); |
139 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); | 139 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); |
140 | 140 | ||
141 | DPRINTK("releasing %p", de); | 141 | pr_debug("releasing %p\n", de); |
142 | 142 | ||
143 | if (!ino) | 143 | if (!ino) |
144 | return; | 144 | return; |
@@ -278,9 +278,9 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk) | |||
278 | if (ino->flags & AUTOFS_INF_PENDING) { | 278 | if (ino->flags & AUTOFS_INF_PENDING) { |
279 | if (rcu_walk) | 279 | if (rcu_walk) |
280 | return -ECHILD; | 280 | return -ECHILD; |
281 | DPRINTK("waiting for mount name=%pd", dentry); | 281 | pr_debug("waiting for mount name=%pd\n", dentry); |
282 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | 282 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); |
283 | DPRINTK("mount wait done status=%d", status); | 283 | pr_debug("mount wait done status=%d\n", status); |
284 | } | 284 | } |
285 | ino->last_used = jiffies; | 285 | ino->last_used = jiffies; |
286 | return status; | 286 | return status; |
@@ -320,7 +320,9 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { | 320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { |
321 | struct dentry *parent = dentry->d_parent; | 321 | struct dentry *parent = dentry->d_parent; |
322 | struct autofs_info *ino; | 322 | struct autofs_info *ino; |
323 | struct dentry *new = d_lookup(parent, &dentry->d_name); | 323 | struct dentry *new; |
324 | |||
325 | new = d_lookup(parent, &dentry->d_name); | ||
324 | if (!new) | 326 | if (!new) |
325 | return NULL; | 327 | return NULL; |
326 | ino = autofs4_dentry_ino(new); | 328 | ino = autofs4_dentry_ino(new); |
@@ -338,7 +340,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
338 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 340 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
339 | int status; | 341 | int status; |
340 | 342 | ||
341 | DPRINTK("dentry=%p %pd", dentry, dentry); | 343 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
342 | 344 | ||
343 | /* The daemon never triggers a mount. */ | 345 | /* The daemon never triggers a mount. */ |
344 | if (autofs4_oz_mode(sbi)) | 346 | if (autofs4_oz_mode(sbi)) |
@@ -425,7 +427,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
425 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 427 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
426 | int status; | 428 | int status; |
427 | 429 | ||
428 | DPRINTK("dentry=%p %pd", dentry, dentry); | 430 | pr_debug("dentry=%p %pd\n", dentry, dentry); |
429 | 431 | ||
430 | /* The daemon never waits. */ | 432 | /* The daemon never waits. */ |
431 | if (autofs4_oz_mode(sbi)) { | 433 | if (autofs4_oz_mode(sbi)) { |
@@ -455,6 +457,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
455 | * a mount-trap. | 457 | * a mount-trap. |
456 | */ | 458 | */ |
457 | struct inode *inode; | 459 | struct inode *inode; |
460 | |||
458 | if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) | 461 | if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)) |
459 | return 0; | 462 | return 0; |
460 | if (d_mountpoint(dentry)) | 463 | if (d_mountpoint(dentry)) |
@@ -494,13 +497,14 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
494 | } | 497 | } |
495 | 498 | ||
496 | /* Lookups in the root directory */ | 499 | /* Lookups in the root directory */ |
497 | static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 500 | static struct dentry *autofs4_lookup(struct inode *dir, |
501 | struct dentry *dentry, unsigned int flags) | ||
498 | { | 502 | { |
499 | struct autofs_sb_info *sbi; | 503 | struct autofs_sb_info *sbi; |
500 | struct autofs_info *ino; | 504 | struct autofs_info *ino; |
501 | struct dentry *active; | 505 | struct dentry *active; |
502 | 506 | ||
503 | DPRINTK("name = %pd", dentry); | 507 | pr_debug("name = %pd\n", dentry); |
504 | 508 | ||
505 | /* File name too long to exist */ | 509 | /* File name too long to exist */ |
506 | if (dentry->d_name.len > NAME_MAX) | 510 | if (dentry->d_name.len > NAME_MAX) |
@@ -508,14 +512,14 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u | |||
508 | 512 | ||
509 | sbi = autofs4_sbi(dir->i_sb); | 513 | sbi = autofs4_sbi(dir->i_sb); |
510 | 514 | ||
511 | DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", | 515 | pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", |
512 | current->pid, task_pgrp_nr(current), sbi->catatonic, | 516 | current->pid, task_pgrp_nr(current), sbi->catatonic, |
513 | autofs4_oz_mode(sbi)); | 517 | autofs4_oz_mode(sbi)); |
514 | 518 | ||
515 | active = autofs4_lookup_active(dentry); | 519 | active = autofs4_lookup_active(dentry); |
516 | if (active) { | 520 | if (active) |
517 | return active; | 521 | return active; |
518 | } else { | 522 | else { |
519 | /* | 523 | /* |
520 | * A dentry that is not within the root can never trigger a | 524 | * A dentry that is not within the root can never trigger a |
521 | * mount operation, unless the directory already exists, so we | 525 | * mount operation, unless the directory already exists, so we |
@@ -526,7 +530,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u | |||
526 | return ERR_PTR(-ENOENT); | 530 | return ERR_PTR(-ENOENT); |
527 | 531 | ||
528 | /* Mark entries in the root as mount triggers */ | 532 | /* Mark entries in the root as mount triggers */ |
529 | if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) | 533 | if (IS_ROOT(dentry->d_parent) && |
534 | autofs_type_indirect(sbi->type)) | ||
530 | __managed_dentry_set_managed(dentry); | 535 | __managed_dentry_set_managed(dentry); |
531 | 536 | ||
532 | ino = autofs4_new_ino(sbi); | 537 | ino = autofs4_new_ino(sbi); |
@@ -554,7 +559,7 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
554 | size_t size = strlen(symname); | 559 | size_t size = strlen(symname); |
555 | char *cp; | 560 | char *cp; |
556 | 561 | ||
557 | DPRINTK("%s <- %pd", symname, dentry); | 562 | pr_debug("%s <- %pd\n", symname, dentry); |
558 | 563 | ||
559 | if (!autofs4_oz_mode(sbi)) | 564 | if (!autofs4_oz_mode(sbi)) |
560 | return -EACCES; | 565 | return -EACCES; |
@@ -613,7 +618,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
613 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 618 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
614 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 619 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
615 | struct autofs_info *p_ino; | 620 | struct autofs_info *p_ino; |
616 | 621 | ||
617 | /* This allows root to remove symlinks */ | 622 | /* This allows root to remove symlinks */ |
618 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 623 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
619 | return -EPERM; | 624 | return -EPERM; |
@@ -664,7 +669,6 @@ static void autofs_set_leaf_automount_flags(struct dentry *dentry) | |||
664 | if (IS_ROOT(parent->d_parent)) | 669 | if (IS_ROOT(parent->d_parent)) |
665 | return; | 670 | return; |
666 | managed_dentry_clear_managed(parent); | 671 | managed_dentry_clear_managed(parent); |
667 | return; | ||
668 | } | 672 | } |
669 | 673 | ||
670 | static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | 674 | static void autofs_clear_leaf_automount_flags(struct dentry *dentry) |
@@ -687,7 +691,6 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | |||
687 | if (d_child->next == &parent->d_subdirs && | 691 | if (d_child->next == &parent->d_subdirs && |
688 | d_child->prev == &parent->d_subdirs) | 692 | d_child->prev == &parent->d_subdirs) |
689 | managed_dentry_set_managed(parent); | 693 | managed_dentry_set_managed(parent); |
690 | return; | ||
691 | } | 694 | } |
692 | 695 | ||
693 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | 696 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) |
@@ -695,8 +698,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
695 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 698 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
696 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 699 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
697 | struct autofs_info *p_ino; | 700 | struct autofs_info *p_ino; |
698 | 701 | ||
699 | DPRINTK("dentry %p, removing %pd", dentry, dentry); | 702 | pr_debug("dentry %p, removing %pd\n", dentry, dentry); |
700 | 703 | ||
701 | if (!autofs4_oz_mode(sbi)) | 704 | if (!autofs4_oz_mode(sbi)) |
702 | return -EACCES; | 705 | return -EACCES; |
@@ -728,7 +731,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
728 | return 0; | 731 | return 0; |
729 | } | 732 | } |
730 | 733 | ||
731 | static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | 734 | static int autofs4_dir_mkdir(struct inode *dir, |
735 | struct dentry *dentry, umode_t mode) | ||
732 | { | 736 | { |
733 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 737 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
734 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 738 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
@@ -738,7 +742,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m | |||
738 | if (!autofs4_oz_mode(sbi)) | 742 | if (!autofs4_oz_mode(sbi)) |
739 | return -EACCES; | 743 | return -EACCES; |
740 | 744 | ||
741 | DPRINTK("dentry %p, creating %pd", dentry, dentry); | 745 | pr_debug("dentry %p, creating %pd\n", dentry, dentry); |
742 | 746 | ||
743 | BUG_ON(!ino); | 747 | BUG_ON(!ino); |
744 | 748 | ||
@@ -768,14 +772,18 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m | |||
768 | /* Get/set timeout ioctl() operation */ | 772 | /* Get/set timeout ioctl() operation */ |
769 | #ifdef CONFIG_COMPAT | 773 | #ifdef CONFIG_COMPAT |
770 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | 774 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, |
771 | compat_ulong_t __user *p) | 775 | compat_ulong_t __user *p) |
772 | { | 776 | { |
773 | int rv; | ||
774 | unsigned long ntimeout; | 777 | unsigned long ntimeout; |
778 | int rv; | ||
775 | 779 | ||
776 | if ((rv = get_user(ntimeout, p)) || | 780 | rv = get_user(ntimeout, p); |
777 | (rv = put_user(sbi->exp_timeout/HZ, p))) | 781 | if (rv) |
778 | return rv; | 782 | goto error; |
783 | |||
784 | rv = put_user(sbi->exp_timeout/HZ, p); | ||
785 | if (rv) | ||
786 | goto error; | ||
779 | 787 | ||
780 | if (ntimeout > UINT_MAX/HZ) | 788 | if (ntimeout > UINT_MAX/HZ) |
781 | sbi->exp_timeout = 0; | 789 | sbi->exp_timeout = 0; |
@@ -783,18 +791,24 @@ static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | |||
783 | sbi->exp_timeout = ntimeout * HZ; | 791 | sbi->exp_timeout = ntimeout * HZ; |
784 | 792 | ||
785 | return 0; | 793 | return 0; |
794 | error: | ||
795 | return rv; | ||
786 | } | 796 | } |
787 | #endif | 797 | #endif |
788 | 798 | ||
789 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | 799 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, |
790 | unsigned long __user *p) | 800 | unsigned long __user *p) |
791 | { | 801 | { |
792 | int rv; | ||
793 | unsigned long ntimeout; | 802 | unsigned long ntimeout; |
803 | int rv; | ||
794 | 804 | ||
795 | if ((rv = get_user(ntimeout, p)) || | 805 | rv = get_user(ntimeout, p); |
796 | (rv = put_user(sbi->exp_timeout/HZ, p))) | 806 | if (rv) |
797 | return rv; | 807 | goto error; |
808 | |||
809 | rv = put_user(sbi->exp_timeout/HZ, p); | ||
810 | if (rv) | ||
811 | goto error; | ||
798 | 812 | ||
799 | if (ntimeout > ULONG_MAX/HZ) | 813 | if (ntimeout > ULONG_MAX/HZ) |
800 | sbi->exp_timeout = 0; | 814 | sbi->exp_timeout = 0; |
@@ -802,16 +816,20 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | |||
802 | sbi->exp_timeout = ntimeout * HZ; | 816 | sbi->exp_timeout = ntimeout * HZ; |
803 | 817 | ||
804 | return 0; | 818 | return 0; |
819 | error: | ||
820 | return rv; | ||
805 | } | 821 | } |
806 | 822 | ||
807 | /* Return protocol version */ | 823 | /* Return protocol version */ |
808 | static inline int autofs4_get_protover(struct autofs_sb_info *sbi, int __user *p) | 824 | static inline int autofs4_get_protover(struct autofs_sb_info *sbi, |
825 | int __user *p) | ||
809 | { | 826 | { |
810 | return put_user(sbi->version, p); | 827 | return put_user(sbi->version, p); |
811 | } | 828 | } |
812 | 829 | ||
813 | /* Return protocol sub version */ | 830 | /* Return protocol sub version */ |
814 | static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user *p) | 831 | static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, |
832 | int __user *p) | ||
815 | { | 833 | { |
816 | return put_user(sbi->sub_version, p); | 834 | return put_user(sbi->sub_version, p); |
817 | } | 835 | } |
@@ -826,7 +844,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
826 | if (may_umount(mnt)) | 844 | if (may_umount(mnt)) |
827 | status = 1; | 845 | status = 1; |
828 | 846 | ||
829 | DPRINTK("returning %d", status); | 847 | pr_debug("returning %d\n", status); |
830 | 848 | ||
831 | status = put_user(status, p); | 849 | status = put_user(status, p); |
832 | 850 | ||
@@ -834,9 +852,9 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
834 | } | 852 | } |
835 | 853 | ||
836 | /* Identify autofs4_dentries - this is so we can tell if there's | 854 | /* Identify autofs4_dentries - this is so we can tell if there's |
837 | an extra dentry refcount or not. We only hold a refcount on the | 855 | * an extra dentry refcount or not. We only hold a refcount on the |
838 | dentry if its non-negative (ie, d_inode != NULL) | 856 | * dentry if its non-negative (ie, d_inode != NULL) |
839 | */ | 857 | */ |
840 | int is_autofs4_dentry(struct dentry *dentry) | 858 | int is_autofs4_dentry(struct dentry *dentry) |
841 | { | 859 | { |
842 | return dentry && d_really_is_positive(dentry) && | 860 | return dentry && d_really_is_positive(dentry) && |
@@ -854,21 +872,21 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
854 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); | 872 | struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); |
855 | void __user *p = (void __user *)arg; | 873 | void __user *p = (void __user *)arg; |
856 | 874 | ||
857 | DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", | 875 | pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", |
858 | cmd,arg,sbi,task_pgrp_nr(current)); | 876 | cmd, arg, sbi, task_pgrp_nr(current)); |
859 | 877 | ||
860 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || | 878 | if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || |
861 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) | 879 | _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) |
862 | return -ENOTTY; | 880 | return -ENOTTY; |
863 | 881 | ||
864 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 882 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
865 | return -EPERM; | 883 | return -EPERM; |
866 | 884 | ||
867 | switch(cmd) { | 885 | switch (cmd) { |
868 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ | 886 | case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */ |
869 | return autofs4_wait_release(sbi,(autofs_wqt_t)arg,0); | 887 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0); |
870 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ | 888 | case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */ |
871 | return autofs4_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT); | 889 | return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT); |
872 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ | 890 | case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */ |
873 | autofs4_catatonic_mode(sbi); | 891 | autofs4_catatonic_mode(sbi); |
874 | return 0; | 892 | return 0; |
@@ -888,13 +906,15 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
888 | 906 | ||
889 | /* return a single thing to expire */ | 907 | /* return a single thing to expire */ |
890 | case AUTOFS_IOC_EXPIRE: | 908 | case AUTOFS_IOC_EXPIRE: |
891 | return autofs4_expire_run(inode->i_sb,filp->f_path.mnt,sbi, p); | 909 | return autofs4_expire_run(inode->i_sb, |
910 | filp->f_path.mnt, sbi, p); | ||
892 | /* same as above, but can send multiple expires through pipe */ | 911 | /* same as above, but can send multiple expires through pipe */ |
893 | case AUTOFS_IOC_EXPIRE_MULTI: | 912 | case AUTOFS_IOC_EXPIRE_MULTI: |
894 | return autofs4_expire_multi(inode->i_sb,filp->f_path.mnt,sbi, p); | 913 | return autofs4_expire_multi(inode->i_sb, |
914 | filp->f_path.mnt, sbi, p); | ||
895 | 915 | ||
896 | default: | 916 | default: |
897 | return -ENOSYS; | 917 | return -EINVAL; |
898 | } | 918 | } |
899 | } | 919 | } |
900 | 920 | ||
@@ -902,12 +922,13 @@ static long autofs4_root_ioctl(struct file *filp, | |||
902 | unsigned int cmd, unsigned long arg) | 922 | unsigned int cmd, unsigned long arg) |
903 | { | 923 | { |
904 | struct inode *inode = file_inode(filp); | 924 | struct inode *inode = file_inode(filp); |
925 | |||
905 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 926 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
906 | } | 927 | } |
907 | 928 | ||
908 | #ifdef CONFIG_COMPAT | 929 | #ifdef CONFIG_COMPAT |
909 | static long autofs4_root_compat_ioctl(struct file *filp, | 930 | static long autofs4_root_compat_ioctl(struct file *filp, |
910 | unsigned int cmd, unsigned long arg) | 931 | unsigned int cmd, unsigned long arg) |
911 | { | 932 | { |
912 | struct inode *inode = file_inode(filp); | 933 | struct inode *inode = file_inode(filp); |
913 | int ret; | 934 | int ret; |
@@ -916,7 +937,7 @@ static long autofs4_root_compat_ioctl(struct file *filp, | |||
916 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 937 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
917 | else | 938 | else |
918 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | 939 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, |
919 | (unsigned long)compat_ptr(arg)); | 940 | (unsigned long) compat_ptr(arg)); |
920 | 941 | ||
921 | return ret; | 942 | return ret; |
922 | } | 943 | } |
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index 84e037d1d129..99aab00dc217 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c | |||
@@ -1,14 +1,10 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/symlink.c | ||
4 | * | ||
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * | 3 | * |
7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
10 | * | 7 | */ |
11 | * ------------------------------------------------------------------------- */ | ||
12 | 8 | ||
13 | #include "autofs_i.h" | 9 | #include "autofs_i.h" |
14 | 10 | ||
@@ -18,6 +14,7 @@ static const char *autofs4_get_link(struct dentry *dentry, | |||
18 | { | 14 | { |
19 | struct autofs_sb_info *sbi; | 15 | struct autofs_sb_info *sbi; |
20 | struct autofs_info *ino; | 16 | struct autofs_info *ino; |
17 | |||
21 | if (!dentry) | 18 | if (!dentry) |
22 | return ERR_PTR(-ECHILD); | 19 | return ERR_PTR(-ECHILD); |
23 | sbi = autofs4_sbi(dentry->d_sb); | 20 | sbi = autofs4_sbi(dentry->d_sb); |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35b755e79c2d..0146d911f468 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -1,15 +1,11 @@ | |||
1 | /* -*- c -*- --------------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved |
3 | * linux/fs/autofs/waitq.c | 3 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> |
4 | * | ||
5 | * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved | ||
6 | * Copyright 2001-2006 Ian Kent <raven@themaw.net> | ||
7 | * | 4 | * |
8 | * This file is part of the Linux kernel and is made available under | 5 | * This file is part of the Linux kernel and is made available under |
9 | * the terms of the GNU General Public License, version 2, or at your | 6 | * the terms of the GNU General Public License, version 2, or at your |
10 | * option, any later version, incorporated herein by reference. | 7 | * option, any later version, incorporated herein by reference. |
11 | * | 8 | */ |
12 | * ------------------------------------------------------------------------- */ | ||
13 | 9 | ||
14 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
15 | #include <linux/time.h> | 11 | #include <linux/time.h> |
@@ -18,7 +14,8 @@ | |||
18 | #include "autofs_i.h" | 14 | #include "autofs_i.h" |
19 | 15 | ||
20 | /* We make this a static variable rather than a part of the superblock; it | 16 | /* We make this a static variable rather than a part of the superblock; it |
21 | is better if we don't reassign numbers easily even across filesystems */ | 17 | * is better if we don't reassign numbers easily even across filesystems |
18 | */ | ||
22 | static autofs_wqt_t autofs4_next_wait_queue = 1; | 19 | static autofs_wqt_t autofs4_next_wait_queue = 1; |
23 | 20 | ||
24 | /* These are the signals we allow interrupting a pending mount */ | 21 | /* These are the signals we allow interrupting a pending mount */ |
@@ -34,7 +31,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | |||
34 | return; | 31 | return; |
35 | } | 32 | } |
36 | 33 | ||
37 | DPRINTK("entering catatonic mode"); | 34 | pr_debug("entering catatonic mode\n"); |
38 | 35 | ||
39 | sbi->catatonic = 1; | 36 | sbi->catatonic = 1; |
40 | wq = sbi->queues; | 37 | wq = sbi->queues; |
@@ -69,17 +66,19 @@ static int autofs4_write(struct autofs_sb_info *sbi, | |||
69 | set_fs(KERNEL_DS); | 66 | set_fs(KERNEL_DS); |
70 | 67 | ||
71 | mutex_lock(&sbi->pipe_mutex); | 68 | mutex_lock(&sbi->pipe_mutex); |
72 | while (bytes && | 69 | wr = __vfs_write(file, data, bytes, &file->f_pos); |
73 | (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) { | 70 | while (bytes && wr) { |
74 | data += wr; | 71 | data += wr; |
75 | bytes -= wr; | 72 | bytes -= wr; |
73 | wr = __vfs_write(file, data, bytes, &file->f_pos); | ||
76 | } | 74 | } |
77 | mutex_unlock(&sbi->pipe_mutex); | 75 | mutex_unlock(&sbi->pipe_mutex); |
78 | 76 | ||
79 | set_fs(fs); | 77 | set_fs(fs); |
80 | 78 | ||
81 | /* Keep the currently executing process from receiving a | 79 | /* Keep the currently executing process from receiving a |
82 | SIGPIPE unless it was already supposed to get one */ | 80 | * SIGPIPE unless it was already supposed to get one |
81 | */ | ||
83 | if (wr == -EPIPE && !sigpipe) { | 82 | if (wr == -EPIPE && !sigpipe) { |
84 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 83 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
85 | sigdelset(¤t->pending.signal, SIGPIPE); | 84 | sigdelset(¤t->pending.signal, SIGPIPE); |
@@ -89,7 +88,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, | |||
89 | 88 | ||
90 | return (bytes > 0); | 89 | return (bytes > 0); |
91 | } | 90 | } |
92 | 91 | ||
93 | static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | 92 | static void autofs4_notify_daemon(struct autofs_sb_info *sbi, |
94 | struct autofs_wait_queue *wq, | 93 | struct autofs_wait_queue *wq, |
95 | int type) | 94 | int type) |
@@ -102,10 +101,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
102 | struct file *pipe = NULL; | 101 | struct file *pipe = NULL; |
103 | size_t pktsz; | 102 | size_t pktsz; |
104 | 103 | ||
105 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", | 104 | pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", |
106 | (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); | 105 | (unsigned long) wq->wait_queue_token, |
106 | wq->name.len, wq->name.name, type); | ||
107 | 107 | ||
108 | memset(&pkt,0,sizeof pkt); /* For security reasons */ | 108 | memset(&pkt, 0, sizeof(pkt)); /* For security reasons */ |
109 | 109 | ||
110 | pkt.hdr.proto_version = sbi->version; | 110 | pkt.hdr.proto_version = sbi->version; |
111 | pkt.hdr.type = type; | 111 | pkt.hdr.type = type; |
@@ -126,7 +126,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
126 | } | 126 | } |
127 | case autofs_ptype_expire_multi: | 127 | case autofs_ptype_expire_multi: |
128 | { | 128 | { |
129 | struct autofs_packet_expire_multi *ep = &pkt.v4_pkt.expire_multi; | 129 | struct autofs_packet_expire_multi *ep = |
130 | &pkt.v4_pkt.expire_multi; | ||
130 | 131 | ||
131 | pktsz = sizeof(*ep); | 132 | pktsz = sizeof(*ep); |
132 | 133 | ||
@@ -163,7 +164,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
163 | break; | 164 | break; |
164 | } | 165 | } |
165 | default: | 166 | default: |
166 | printk("autofs4_notify_daemon: bad type %d!\n", type); | 167 | pr_warn("bad type %d!\n", type); |
167 | mutex_unlock(&sbi->wq_mutex); | 168 | mutex_unlock(&sbi->wq_mutex); |
168 | return; | 169 | return; |
169 | } | 170 | } |
@@ -231,7 +232,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr) | |||
231 | if (wq->name.hash == qstr->hash && | 232 | if (wq->name.hash == qstr->hash && |
232 | wq->name.len == qstr->len && | 233 | wq->name.len == qstr->len && |
233 | wq->name.name && | 234 | wq->name.name && |
234 | !memcmp(wq->name.name, qstr->name, qstr->len)) | 235 | !memcmp(wq->name.name, qstr->name, qstr->len)) |
235 | break; | 236 | break; |
236 | } | 237 | } |
237 | return wq; | 238 | return wq; |
@@ -248,7 +249,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr) | |||
248 | static int validate_request(struct autofs_wait_queue **wait, | 249 | static int validate_request(struct autofs_wait_queue **wait, |
249 | struct autofs_sb_info *sbi, | 250 | struct autofs_sb_info *sbi, |
250 | struct qstr *qstr, | 251 | struct qstr *qstr, |
251 | struct dentry*dentry, enum autofs_notify notify) | 252 | struct dentry *dentry, enum autofs_notify notify) |
252 | { | 253 | { |
253 | struct autofs_wait_queue *wq; | 254 | struct autofs_wait_queue *wq; |
254 | struct autofs_info *ino; | 255 | struct autofs_info *ino; |
@@ -322,8 +323,10 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
322 | * continue on and create a new request. | 323 | * continue on and create a new request. |
323 | */ | 324 | */ |
324 | if (!IS_ROOT(dentry)) { | 325 | if (!IS_ROOT(dentry)) { |
325 | if (d_really_is_positive(dentry) && d_unhashed(dentry)) { | 326 | if (d_unhashed(dentry) && |
327 | d_really_is_positive(dentry)) { | ||
326 | struct dentry *parent = dentry->d_parent; | 328 | struct dentry *parent = dentry->d_parent; |
329 | |||
327 | new = d_lookup(parent, &dentry->d_name); | 330 | new = d_lookup(parent, &dentry->d_name); |
328 | if (new) | 331 | if (new) |
329 | dentry = new; | 332 | dentry = new; |
@@ -340,8 +343,8 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
340 | return 1; | 343 | return 1; |
341 | } | 344 | } |
342 | 345 | ||
343 | int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | 346 | int autofs4_wait(struct autofs_sb_info *sbi, |
344 | enum autofs_notify notify) | 347 | struct dentry *dentry, enum autofs_notify notify) |
345 | { | 348 | { |
346 | struct autofs_wait_queue *wq; | 349 | struct autofs_wait_queue *wq; |
347 | struct qstr qstr; | 350 | struct qstr qstr; |
@@ -411,7 +414,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
411 | 414 | ||
412 | if (!wq) { | 415 | if (!wq) { |
413 | /* Create a new wait queue */ | 416 | /* Create a new wait queue */ |
414 | wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); | 417 | wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL); |
415 | if (!wq) { | 418 | if (!wq) { |
416 | kfree(qstr.name); | 419 | kfree(qstr.name); |
417 | mutex_unlock(&sbi->wq_mutex); | 420 | mutex_unlock(&sbi->wq_mutex); |
@@ -450,17 +453,19 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
450 | autofs_ptype_expire_indirect; | 453 | autofs_ptype_expire_indirect; |
451 | } | 454 | } |
452 | 455 | ||
453 | DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", | 456 | pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", |
454 | (unsigned long) wq->wait_queue_token, wq->name.len, | 457 | (unsigned long) wq->wait_queue_token, wq->name.len, |
455 | wq->name.name, notify); | 458 | wq->name.name, notify); |
456 | 459 | ||
457 | /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */ | 460 | /* |
461 | * autofs4_notify_daemon() may block; it will unlock ->wq_mutex | ||
462 | */ | ||
458 | autofs4_notify_daemon(sbi, wq, type); | 463 | autofs4_notify_daemon(sbi, wq, type); |
459 | } else { | 464 | } else { |
460 | wq->wait_ctr++; | 465 | wq->wait_ctr++; |
461 | DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", | 466 | pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", |
462 | (unsigned long) wq->wait_queue_token, wq->name.len, | 467 | (unsigned long) wq->wait_queue_token, wq->name.len, |
463 | wq->name.name, notify); | 468 | wq->name.name, notify); |
464 | mutex_unlock(&sbi->wq_mutex); | 469 | mutex_unlock(&sbi->wq_mutex); |
465 | kfree(qstr.name); | 470 | kfree(qstr.name); |
466 | } | 471 | } |
@@ -471,12 +476,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
471 | */ | 476 | */ |
472 | if (wq->name.name) { | 477 | if (wq->name.name) { |
473 | /* Block all but "shutdown" signals while waiting */ | 478 | /* Block all but "shutdown" signals while waiting */ |
474 | sigset_t oldset; | 479 | unsigned long shutdown_sigs_mask; |
475 | unsigned long irqflags; | 480 | unsigned long irqflags; |
481 | sigset_t oldset; | ||
476 | 482 | ||
477 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | 483 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); |
478 | oldset = current->blocked; | 484 | oldset = current->blocked; |
479 | siginitsetinv(¤t->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]); | 485 | shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0]; |
486 | siginitsetinv(¤t->blocked, shutdown_sigs_mask); | ||
480 | recalc_sigpending(); | 487 | recalc_sigpending(); |
481 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | 488 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); |
482 | 489 | ||
@@ -487,7 +494,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
487 | recalc_sigpending(); | 494 | recalc_sigpending(); |
488 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | 495 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); |
489 | } else { | 496 | } else { |
490 | DPRINTK("skipped sleeping"); | 497 | pr_debug("skipped sleeping\n"); |
491 | } | 498 | } |
492 | 499 | ||
493 | status = wq->status; | 500 | status = wq->status; |
@@ -562,4 +569,3 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok | |||
562 | 569 | ||
563 | return 0; | 570 | return 0; |
564 | } | 571 | } |
565 | |||
diff --git a/fs/buffer.c b/fs/buffer.c index e1632abb4ca9..33be29675358 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -621,17 +621,17 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
621 | * If warn is true, then emit a warning if the page is not uptodate and has | 621 | * If warn is true, then emit a warning if the page is not uptodate and has |
622 | * not been truncated. | 622 | * not been truncated. |
623 | * | 623 | * |
624 | * The caller must hold mem_cgroup_begin_page_stat() lock. | 624 | * The caller must hold lock_page_memcg(). |
625 | */ | 625 | */ |
626 | static void __set_page_dirty(struct page *page, struct address_space *mapping, | 626 | static void __set_page_dirty(struct page *page, struct address_space *mapping, |
627 | struct mem_cgroup *memcg, int warn) | 627 | int warn) |
628 | { | 628 | { |
629 | unsigned long flags; | 629 | unsigned long flags; |
630 | 630 | ||
631 | spin_lock_irqsave(&mapping->tree_lock, flags); | 631 | spin_lock_irqsave(&mapping->tree_lock, flags); |
632 | if (page->mapping) { /* Race with truncate? */ | 632 | if (page->mapping) { /* Race with truncate? */ |
633 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 633 | WARN_ON_ONCE(warn && !PageUptodate(page)); |
634 | account_page_dirtied(page, mapping, memcg); | 634 | account_page_dirtied(page, mapping); |
635 | radix_tree_tag_set(&mapping->page_tree, | 635 | radix_tree_tag_set(&mapping->page_tree, |
636 | page_index(page), PAGECACHE_TAG_DIRTY); | 636 | page_index(page), PAGECACHE_TAG_DIRTY); |
637 | } | 637 | } |
@@ -666,7 +666,6 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, | |||
666 | int __set_page_dirty_buffers(struct page *page) | 666 | int __set_page_dirty_buffers(struct page *page) |
667 | { | 667 | { |
668 | int newly_dirty; | 668 | int newly_dirty; |
669 | struct mem_cgroup *memcg; | ||
670 | struct address_space *mapping = page_mapping(page); | 669 | struct address_space *mapping = page_mapping(page); |
671 | 670 | ||
672 | if (unlikely(!mapping)) | 671 | if (unlikely(!mapping)) |
@@ -683,17 +682,17 @@ int __set_page_dirty_buffers(struct page *page) | |||
683 | } while (bh != head); | 682 | } while (bh != head); |
684 | } | 683 | } |
685 | /* | 684 | /* |
686 | * Use mem_group_begin_page_stat() to keep PageDirty synchronized with | 685 | * Lock out page->mem_cgroup migration to keep PageDirty |
687 | * per-memcg dirty page counters. | 686 | * synchronized with per-memcg dirty page counters. |
688 | */ | 687 | */ |
689 | memcg = mem_cgroup_begin_page_stat(page); | 688 | lock_page_memcg(page); |
690 | newly_dirty = !TestSetPageDirty(page); | 689 | newly_dirty = !TestSetPageDirty(page); |
691 | spin_unlock(&mapping->private_lock); | 690 | spin_unlock(&mapping->private_lock); |
692 | 691 | ||
693 | if (newly_dirty) | 692 | if (newly_dirty) |
694 | __set_page_dirty(page, mapping, memcg, 1); | 693 | __set_page_dirty(page, mapping, 1); |
695 | 694 | ||
696 | mem_cgroup_end_page_stat(memcg); | 695 | unlock_page_memcg(page); |
697 | 696 | ||
698 | if (newly_dirty) | 697 | if (newly_dirty) |
699 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 698 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
@@ -1167,15 +1166,14 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1167 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { |
1168 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; |
1169 | struct address_space *mapping = NULL; | 1168 | struct address_space *mapping = NULL; |
1170 | struct mem_cgroup *memcg; | ||
1171 | 1169 | ||
1172 | memcg = mem_cgroup_begin_page_stat(page); | 1170 | lock_page_memcg(page); |
1173 | if (!TestSetPageDirty(page)) { | 1171 | if (!TestSetPageDirty(page)) { |
1174 | mapping = page_mapping(page); | 1172 | mapping = page_mapping(page); |
1175 | if (mapping) | 1173 | if (mapping) |
1176 | __set_page_dirty(page, mapping, memcg, 0); | 1174 | __set_page_dirty(page, mapping, 0); |
1177 | } | 1175 | } |
1178 | mem_cgroup_end_page_stat(memcg); | 1176 | unlock_page_memcg(page); |
1179 | if (mapping) | 1177 | if (mapping) |
1180 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1178 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
1181 | } | 1179 | } |
diff --git a/fs/mpage.c b/fs/mpage.c index 1480d3a18037..6bd9fd90964e 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/prefetch.h> | 25 | #include <linux/prefetch.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/mm_inline.h> | ||
27 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
28 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
29 | #include <linux/pagevec.h> | 30 | #include <linux/pagevec.h> |
@@ -366,7 +367,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
366 | map_bh.b_state = 0; | 367 | map_bh.b_state = 0; |
367 | map_bh.b_size = 0; | 368 | map_bh.b_size = 0; |
368 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 369 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
369 | struct page *page = list_entry(pages->prev, struct page, lru); | 370 | struct page *page = lru_to_page(pages); |
370 | 371 | ||
371 | prefetchw(&page->flags); | 372 | prefetchw(&page->flags); |
372 | list_del(&page->lru); | 373 | list_del(&page->lru); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a76b9ea7722e..ef6a2ec494de 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -287,7 +287,6 @@ struct o2hb_bio_wait_ctxt { | |||
287 | static void o2hb_write_timeout(struct work_struct *work) | 287 | static void o2hb_write_timeout(struct work_struct *work) |
288 | { | 288 | { |
289 | int failed, quorum; | 289 | int failed, quorum; |
290 | unsigned long flags; | ||
291 | struct o2hb_region *reg = | 290 | struct o2hb_region *reg = |
292 | container_of(work, struct o2hb_region, | 291 | container_of(work, struct o2hb_region, |
293 | hr_write_timeout_work.work); | 292 | hr_write_timeout_work.work); |
@@ -297,14 +296,14 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
297 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 296 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
298 | 297 | ||
299 | if (o2hb_global_heartbeat_active()) { | 298 | if (o2hb_global_heartbeat_active()) { |
300 | spin_lock_irqsave(&o2hb_live_lock, flags); | 299 | spin_lock(&o2hb_live_lock); |
301 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | 300 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) |
302 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | 301 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); |
303 | failed = bitmap_weight(o2hb_failed_region_bitmap, | 302 | failed = bitmap_weight(o2hb_failed_region_bitmap, |
304 | O2NM_MAX_REGIONS); | 303 | O2NM_MAX_REGIONS); |
305 | quorum = bitmap_weight(o2hb_quorum_region_bitmap, | 304 | quorum = bitmap_weight(o2hb_quorum_region_bitmap, |
306 | O2NM_MAX_REGIONS); | 305 | O2NM_MAX_REGIONS); |
307 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | 306 | spin_unlock(&o2hb_live_lock); |
308 | 307 | ||
309 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | 308 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", |
310 | quorum, failed); | 309 | quorum, failed); |
@@ -2425,11 +2424,10 @@ EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); | |||
2425 | int o2hb_check_node_heartbeating_no_sem(u8 node_num) | 2424 | int o2hb_check_node_heartbeating_no_sem(u8 node_num) |
2426 | { | 2425 | { |
2427 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 2426 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
2428 | unsigned long flags; | ||
2429 | 2427 | ||
2430 | spin_lock_irqsave(&o2hb_live_lock, flags); | 2428 | spin_lock(&o2hb_live_lock); |
2431 | o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); | 2429 | o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); |
2432 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | 2430 | spin_unlock(&o2hb_live_lock); |
2433 | if (!test_bit(node_num, testing_map)) { | 2431 | if (!test_bit(node_num, testing_map)) { |
2434 | mlog(ML_HEARTBEAT, | 2432 | mlog(ML_HEARTBEAT, |
2435 | "node (%u) does not have heartbeating enabled.\n", | 2433 | "node (%u) does not have heartbeating enabled.\n", |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 68c607e63ff6..004f2cbe8f71 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -282,6 +282,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
282 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 | 282 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 |
283 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 | 283 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 |
284 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 | 284 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 |
285 | #define DLM_LOCK_RES_RECOVERY_WAITING 0x00004000 | ||
285 | 286 | ||
286 | /* max milliseconds to wait to sync up a network failure with a node death */ | 287 | /* max milliseconds to wait to sync up a network failure with a node death */ |
287 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | 288 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) |
@@ -451,6 +452,7 @@ enum { | |||
451 | DLM_QUERY_REGION = 519, | 452 | DLM_QUERY_REGION = 519, |
452 | DLM_QUERY_NODEINFO = 520, | 453 | DLM_QUERY_NODEINFO = 520, |
453 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, | 454 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, |
455 | DLM_DEREF_LOCKRES_DONE = 522, | ||
454 | }; | 456 | }; |
455 | 457 | ||
456 | struct dlm_reco_node_data | 458 | struct dlm_reco_node_data |
@@ -545,7 +547,7 @@ struct dlm_master_requery | |||
545 | * }; | 547 | * }; |
546 | * | 548 | * |
547 | * from ../cluster/tcp.h | 549 | * from ../cluster/tcp.h |
548 | * NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) | 550 | * O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg)) |
549 | * (roughly 4080 bytes) | 551 | * (roughly 4080 bytes) |
550 | * and sizeof(dlm_migratable_lockres) = 112 bytes | 552 | * and sizeof(dlm_migratable_lockres) = 112 bytes |
551 | * and sizeof(dlm_migratable_lock) = 16 bytes | 553 | * and sizeof(dlm_migratable_lock) = 16 bytes |
@@ -586,7 +588,7 @@ struct dlm_migratable_lockres | |||
586 | 588 | ||
587 | /* from above, 128 bytes | 589 | /* from above, 128 bytes |
588 | * for some undetermined future use */ | 590 | * for some undetermined future use */ |
589 | #define DLM_MIG_LOCKRES_RESERVED (NET_MAX_PAYLOAD_BYTES - \ | 591 | #define DLM_MIG_LOCKRES_RESERVED (O2NET_MAX_PAYLOAD_BYTES - \ |
590 | DLM_MIG_LOCKRES_MAX_LEN) | 592 | DLM_MIG_LOCKRES_MAX_LEN) |
591 | 593 | ||
592 | struct dlm_create_lock | 594 | struct dlm_create_lock |
@@ -782,6 +784,20 @@ struct dlm_deref_lockres | |||
782 | u8 name[O2NM_MAX_NAME_LEN]; | 784 | u8 name[O2NM_MAX_NAME_LEN]; |
783 | }; | 785 | }; |
784 | 786 | ||
787 | enum { | ||
788 | DLM_DEREF_RESPONSE_DONE = 0, | ||
789 | DLM_DEREF_RESPONSE_INPROG = 1, | ||
790 | }; | ||
791 | |||
792 | struct dlm_deref_lockres_done { | ||
793 | u32 pad1; | ||
794 | u16 pad2; | ||
795 | u8 node_idx; | ||
796 | u8 namelen; | ||
797 | |||
798 | u8 name[O2NM_MAX_NAME_LEN]; | ||
799 | }; | ||
800 | |||
785 | static inline enum dlm_status | 801 | static inline enum dlm_status |
786 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | 802 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) |
787 | { | 803 | { |
@@ -789,7 +805,8 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | |||
789 | 805 | ||
790 | assert_spin_locked(&res->spinlock); | 806 | assert_spin_locked(&res->spinlock); |
791 | 807 | ||
792 | if (res->state & DLM_LOCK_RES_RECOVERING) | 808 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
809 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
793 | status = DLM_RECOVERING; | 810 | status = DLM_RECOVERING; |
794 | else if (res->state & DLM_LOCK_RES_MIGRATING) | 811 | else if (res->state & DLM_LOCK_RES_MIGRATING) |
795 | status = DLM_MIGRATING; | 812 | status = DLM_MIGRATING; |
@@ -968,6 +985,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, | |||
968 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); | 985 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); |
969 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 986 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
970 | void **ret_data); | 987 | void **ret_data); |
988 | int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
989 | void **ret_data); | ||
971 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | 990 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
972 | void **ret_data); | 991 | void **ret_data); |
973 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 992 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
@@ -1009,6 +1028,7 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
1009 | { | 1028 | { |
1010 | __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS| | 1029 | __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS| |
1011 | DLM_LOCK_RES_RECOVERING| | 1030 | DLM_LOCK_RES_RECOVERING| |
1031 | DLM_LOCK_RES_RECOVERY_WAITING| | ||
1012 | DLM_LOCK_RES_MIGRATING)); | 1032 | DLM_LOCK_RES_MIGRATING)); |
1013 | } | 1033 | } |
1014 | 1034 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2ee7fe747cea..12e064b8be9a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -132,10 +132,13 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
132 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | 132 | * - Message DLM_QUERY_NODEINFO added to allow online node removes |
133 | * New in version 1.2: | 133 | * New in version 1.2: |
134 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain | 134 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain |
135 | * New in version 1.3: | ||
136 | * - Message DLM_DEREF_LOCKRES_DONE added to inform non-master that the | ||
137 | * refmap is cleared | ||
135 | */ | 138 | */ |
136 | static const struct dlm_protocol_version dlm_protocol = { | 139 | static const struct dlm_protocol_version dlm_protocol = { |
137 | .pv_major = 1, | 140 | .pv_major = 1, |
138 | .pv_minor = 2, | 141 | .pv_minor = 3, |
139 | }; | 142 | }; |
140 | 143 | ||
141 | #define DLM_DOMAIN_BACKOFF_MS 200 | 144 | #define DLM_DOMAIN_BACKOFF_MS 200 |
@@ -1396,7 +1399,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm, | |||
1396 | unsigned int map_size) | 1399 | unsigned int map_size) |
1397 | { | 1400 | { |
1398 | int status, tmpstat; | 1401 | int status, tmpstat; |
1399 | unsigned int node; | 1402 | int node; |
1400 | 1403 | ||
1401 | if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * | 1404 | if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) * |
1402 | sizeof(unsigned long))) { | 1405 | sizeof(unsigned long))) { |
@@ -1853,7 +1856,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
1853 | sizeof(struct dlm_exit_domain), | 1856 | sizeof(struct dlm_exit_domain), |
1854 | dlm_begin_exit_domain_handler, | 1857 | dlm_begin_exit_domain_handler, |
1855 | dlm, NULL, &dlm->dlm_domain_handlers); | 1858 | dlm, NULL, &dlm->dlm_domain_handlers); |
1859 | if (status) | ||
1860 | goto bail; | ||
1856 | 1861 | ||
1862 | status = o2net_register_handler(DLM_DEREF_LOCKRES_DONE, dlm->key, | ||
1863 | sizeof(struct dlm_deref_lockres_done), | ||
1864 | dlm_deref_lockres_done_handler, | ||
1865 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
1857 | bail: | 1866 | bail: |
1858 | if (status) | 1867 | if (status) |
1859 | dlm_unregister_domain_handlers(dlm); | 1868 | dlm_unregister_domain_handlers(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9477d6e1de37..9aed6e202201 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2278,7 +2278,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
2278 | dlm_print_one_lock_resource(res); | 2278 | dlm_print_one_lock_resource(res); |
2279 | BUG(); | 2279 | BUG(); |
2280 | } | 2280 | } |
2281 | return ret; | 2281 | return ret ? ret : r; |
2282 | } | 2282 | } |
2283 | 2283 | ||
2284 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | 2284 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
@@ -2345,7 +2345,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2345 | res->lockname.len, res->lockname.name, node); | 2345 | res->lockname.len, res->lockname.name, node); |
2346 | dlm_print_one_lock_resource(res); | 2346 | dlm_print_one_lock_resource(res); |
2347 | } | 2347 | } |
2348 | ret = 0; | 2348 | ret = DLM_DEREF_RESPONSE_DONE; |
2349 | goto done; | 2349 | goto done; |
2350 | } | 2350 | } |
2351 | 2351 | ||
@@ -2365,7 +2365,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2365 | spin_unlock(&dlm->work_lock); | 2365 | spin_unlock(&dlm->work_lock); |
2366 | 2366 | ||
2367 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); | 2367 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); |
2368 | return 0; | 2368 | return DLM_DEREF_RESPONSE_INPROG; |
2369 | 2369 | ||
2370 | done: | 2370 | done: |
2371 | if (res) | 2371 | if (res) |
@@ -2375,6 +2375,122 @@ done: | |||
2375 | return ret; | 2375 | return ret; |
2376 | } | 2376 | } |
2377 | 2377 | ||
2378 | int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
2379 | void **ret_data) | ||
2380 | { | ||
2381 | struct dlm_ctxt *dlm = data; | ||
2382 | struct dlm_deref_lockres_done *deref | ||
2383 | = (struct dlm_deref_lockres_done *)msg->buf; | ||
2384 | struct dlm_lock_resource *res = NULL; | ||
2385 | char *name; | ||
2386 | unsigned int namelen; | ||
2387 | int ret = -EINVAL; | ||
2388 | u8 node; | ||
2389 | unsigned int hash; | ||
2390 | |||
2391 | if (!dlm_grab(dlm)) | ||
2392 | return 0; | ||
2393 | |||
2394 | name = deref->name; | ||
2395 | namelen = deref->namelen; | ||
2396 | node = deref->node_idx; | ||
2397 | |||
2398 | if (namelen > DLM_LOCKID_NAME_MAX) { | ||
2399 | mlog(ML_ERROR, "Invalid name length!"); | ||
2400 | goto done; | ||
2401 | } | ||
2402 | if (deref->node_idx >= O2NM_MAX_NODES) { | ||
2403 | mlog(ML_ERROR, "Invalid node number: %u\n", node); | ||
2404 | goto done; | ||
2405 | } | ||
2406 | |||
2407 | hash = dlm_lockid_hash(name, namelen); | ||
2408 | |||
2409 | spin_lock(&dlm->spinlock); | ||
2410 | res = __dlm_lookup_lockres_full(dlm, name, namelen, hash); | ||
2411 | if (!res) { | ||
2412 | spin_unlock(&dlm->spinlock); | ||
2413 | mlog(ML_ERROR, "%s:%.*s: bad lockres name\n", | ||
2414 | dlm->name, namelen, name); | ||
2415 | goto done; | ||
2416 | } | ||
2417 | |||
2418 | spin_lock(&res->spinlock); | ||
2419 | BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF)); | ||
2420 | if (!list_empty(&res->purge)) { | ||
2421 | mlog(0, "%s: Removing res %.*s from purgelist\n", | ||
2422 | dlm->name, res->lockname.len, res->lockname.name); | ||
2423 | list_del_init(&res->purge); | ||
2424 | dlm_lockres_put(res); | ||
2425 | dlm->purge_count--; | ||
2426 | } | ||
2427 | |||
2428 | if (!__dlm_lockres_unused(res)) { | ||
2429 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", | ||
2430 | dlm->name, res->lockname.len, res->lockname.name); | ||
2431 | __dlm_print_one_lock_resource(res); | ||
2432 | BUG(); | ||
2433 | } | ||
2434 | |||
2435 | __dlm_unhash_lockres(dlm, res); | ||
2436 | |||
2437 | spin_lock(&dlm->track_lock); | ||
2438 | if (!list_empty(&res->tracking)) | ||
2439 | list_del_init(&res->tracking); | ||
2440 | else { | ||
2441 | mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n", | ||
2442 | dlm->name, res->lockname.len, res->lockname.name); | ||
2443 | __dlm_print_one_lock_resource(res); | ||
2444 | } | ||
2445 | spin_unlock(&dlm->track_lock); | ||
2446 | |||
2447 | /* lockres is not in the hash now. drop the flag and wake up | ||
2448 | * any processes waiting in dlm_get_lock_resource. | ||
2449 | */ | ||
2450 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2451 | spin_unlock(&res->spinlock); | ||
2452 | wake_up(&res->wq); | ||
2453 | |||
2454 | dlm_lockres_put(res); | ||
2455 | |||
2456 | spin_unlock(&dlm->spinlock); | ||
2457 | |||
2458 | done: | ||
2459 | dlm_put(dlm); | ||
2460 | return ret; | ||
2461 | } | ||
2462 | |||
2463 | static void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm, | ||
2464 | struct dlm_lock_resource *res, u8 node) | ||
2465 | { | ||
2466 | struct dlm_deref_lockres_done deref; | ||
2467 | int ret = 0, r; | ||
2468 | const char *lockname; | ||
2469 | unsigned int namelen; | ||
2470 | |||
2471 | lockname = res->lockname.name; | ||
2472 | namelen = res->lockname.len; | ||
2473 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | ||
2474 | |||
2475 | memset(&deref, 0, sizeof(deref)); | ||
2476 | deref.node_idx = dlm->node_num; | ||
2477 | deref.namelen = namelen; | ||
2478 | memcpy(deref.name, lockname, namelen); | ||
2479 | |||
2480 | ret = o2net_send_message(DLM_DEREF_LOCKRES_DONE, dlm->key, | ||
2481 | &deref, sizeof(deref), node, &r); | ||
2482 | if (ret < 0) { | ||
2483 | mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF DONE " | ||
2484 | " to node %u\n", dlm->name, namelen, | ||
2485 | lockname, ret, node); | ||
2486 | } else if (r < 0) { | ||
2487 | /* ignore the error */ | ||
2488 | mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", | ||
2489 | dlm->name, namelen, lockname, node, r); | ||
2490 | dlm_print_one_lock_resource(res); | ||
2491 | } | ||
2492 | } | ||
2493 | |||
2378 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | 2494 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) |
2379 | { | 2495 | { |
2380 | struct dlm_ctxt *dlm; | 2496 | struct dlm_ctxt *dlm; |
@@ -2395,6 +2511,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
2395 | } | 2511 | } |
2396 | spin_unlock(&res->spinlock); | 2512 | spin_unlock(&res->spinlock); |
2397 | 2513 | ||
2514 | dlm_drop_lockres_ref_done(dlm, res, node); | ||
2515 | |||
2398 | if (cleared) { | 2516 | if (cleared) { |
2399 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", | 2517 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", |
2400 | dlm->name, res->lockname.len, res->lockname.name, node); | 2518 | dlm->name, res->lockname.len, res->lockname.name, node); |
@@ -2432,7 +2550,8 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2432 | return 0; | 2550 | return 0; |
2433 | 2551 | ||
2434 | /* delay migration when the lockres is in RECOCERING state */ | 2552 | /* delay migration when the lockres is in RECOCERING state */ |
2435 | if (res->state & DLM_LOCK_RES_RECOVERING) | 2553 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
2554 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
2436 | return 0; | 2555 | return 0; |
2437 | 2556 | ||
2438 | if (res->owner != dlm->node_num) | 2557 | if (res->owner != dlm->node_num) |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index b94a425f0175..cd38488a10fc 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1403,12 +1403,24 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1403 | * and RECOVERY flag changed when it completes. */ | 1403 | * and RECOVERY flag changed when it completes. */ |
1404 | hash = dlm_lockid_hash(mres->lockname, mres->lockname_len); | 1404 | hash = dlm_lockid_hash(mres->lockname, mres->lockname_len); |
1405 | spin_lock(&dlm->spinlock); | 1405 | spin_lock(&dlm->spinlock); |
1406 | res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len, | 1406 | res = __dlm_lookup_lockres_full(dlm, mres->lockname, mres->lockname_len, |
1407 | hash); | 1407 | hash); |
1408 | if (res) { | 1408 | if (res) { |
1409 | /* this will get a ref on res */ | 1409 | /* this will get a ref on res */ |
1410 | /* mark it as recovering/migrating and hash it */ | 1410 | /* mark it as recovering/migrating and hash it */ |
1411 | spin_lock(&res->spinlock); | 1411 | spin_lock(&res->spinlock); |
1412 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | ||
1413 | mlog(0, "%s: node is attempting to migrate " | ||
1414 | "lockres %.*s, but marked as dropping " | ||
1415 | " ref!\n", dlm->name, | ||
1416 | mres->lockname_len, mres->lockname); | ||
1417 | ret = -EINVAL; | ||
1418 | spin_unlock(&res->spinlock); | ||
1419 | spin_unlock(&dlm->spinlock); | ||
1420 | dlm_lockres_put(res); | ||
1421 | goto leave; | ||
1422 | } | ||
1423 | |||
1412 | if (mres->flags & DLM_MRES_RECOVERY) { | 1424 | if (mres->flags & DLM_MRES_RECOVERY) { |
1413 | res->state |= DLM_LOCK_RES_RECOVERING; | 1425 | res->state |= DLM_LOCK_RES_RECOVERING; |
1414 | } else { | 1426 | } else { |
@@ -2163,6 +2175,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
2163 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 2175 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
2164 | bucket = dlm_lockres_hash(dlm, i); | 2176 | bucket = dlm_lockres_hash(dlm, i); |
2165 | hlist_for_each_entry(res, bucket, hash_node) { | 2177 | hlist_for_each_entry(res, bucket, hash_node) { |
2178 | if (res->state & DLM_LOCK_RES_RECOVERY_WAITING) { | ||
2179 | spin_lock(&res->spinlock); | ||
2180 | res->state &= ~DLM_LOCK_RES_RECOVERY_WAITING; | ||
2181 | spin_unlock(&res->spinlock); | ||
2182 | wake_up(&res->wq); | ||
2183 | } | ||
2184 | |||
2166 | if (!(res->state & DLM_LOCK_RES_RECOVERING)) | 2185 | if (!(res->state & DLM_LOCK_RES_RECOVERING)) |
2167 | continue; | 2186 | continue; |
2168 | 2187 | ||
@@ -2300,6 +2319,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2300 | res->lockname.len, res->lockname.name, freed, dead_node); | 2319 | res->lockname.len, res->lockname.name, freed, dead_node); |
2301 | __dlm_print_one_lock_resource(res); | 2320 | __dlm_print_one_lock_resource(res); |
2302 | } | 2321 | } |
2322 | res->state |= DLM_LOCK_RES_RECOVERY_WAITING; | ||
2303 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); | 2323 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); |
2304 | } else if (test_bit(dead_node, res->refmap)) { | 2324 | } else if (test_bit(dead_node, res->refmap)) { |
2305 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | 2325 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " |
@@ -2377,14 +2397,16 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2377 | dlm_revalidate_lvb(dlm, res, dead_node); | 2397 | dlm_revalidate_lvb(dlm, res, dead_node); |
2378 | if (res->owner == dead_node) { | 2398 | if (res->owner == dead_node) { |
2379 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | 2399 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
2380 | mlog(ML_NOTICE, "%s: res %.*s, Skip " | 2400 | mlog(0, "%s:%.*s: owned by " |
2381 | "recovery as it is being freed\n", | 2401 | "dead node %u, this node was " |
2382 | dlm->name, res->lockname.len, | 2402 | "dropping its ref when it died. " |
2383 | res->lockname.name); | 2403 | "continue, dropping the flag.\n", |
2384 | } else | 2404 | dlm->name, res->lockname.len, |
2385 | dlm_move_lockres_to_recovery_list(dlm, | 2405 | res->lockname.name, dead_node); |
2386 | res); | 2406 | } |
2387 | 2407 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | |
2408 | dlm_move_lockres_to_recovery_list(dlm, | ||
2409 | res); | ||
2388 | } else if (res->owner == dlm->node_num) { | 2410 | } else if (res->owner == dlm->node_num) { |
2389 | dlm_free_dead_locks(dlm, res, dead_node); | 2411 | dlm_free_dead_locks(dlm, res, dead_node); |
2390 | __dlm_lockres_calc_usage(dlm, res); | 2412 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index c5f6c241ecd7..68d239ba0c63 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -106,7 +106,8 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) | |||
106 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) | 106 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
107 | return 0; | 107 | return 0; |
108 | 108 | ||
109 | if (res->state & DLM_LOCK_RES_RECOVERING) | 109 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
110 | DLM_LOCK_RES_RECOVERY_WAITING)) | ||
110 | return 0; | 111 | return 0; |
111 | 112 | ||
112 | /* Another node has this resource with this node as the master */ | 113 | /* Another node has this resource with this node as the master */ |
@@ -202,6 +203,13 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
202 | dlm->purge_count--; | 203 | dlm->purge_count--; |
203 | } | 204 | } |
204 | 205 | ||
206 | if (!master && ret != 0) { | ||
207 | mlog(0, "%s: deref %.*s in progress or master goes down\n", | ||
208 | dlm->name, res->lockname.len, res->lockname.name); | ||
209 | spin_unlock(&res->spinlock); | ||
210 | return; | ||
211 | } | ||
212 | |||
205 | if (!__dlm_lockres_unused(res)) { | 213 | if (!__dlm_lockres_unused(res)) { |
206 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", | 214 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", |
207 | dlm->name, res->lockname.len, res->lockname.name); | 215 | dlm->name, res->lockname.len, res->lockname.name); |
@@ -700,7 +708,8 @@ static int dlm_thread(void *data) | |||
700 | * dirty for a short while. */ | 708 | * dirty for a short while. */ |
701 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); | 709 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); |
702 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | | 710 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | |
703 | DLM_LOCK_RES_RECOVERING)) { | 711 | DLM_LOCK_RES_RECOVERING | |
712 | DLM_LOCK_RES_RECOVERY_WAITING)) { | ||
704 | /* move it to the tail and keep going */ | 713 | /* move it to the tail and keep going */ |
705 | res->state &= ~DLM_LOCK_RES_DIRTY; | 714 | res->state &= ~DLM_LOCK_RES_DIRTY; |
706 | spin_unlock(&res->spinlock); | 715 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index faa1365097bc..302854ee0985 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -236,6 +236,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
236 | struct ocfs2_recovery_map *rm = osb->recovery_map; | 236 | struct ocfs2_recovery_map *rm = osb->recovery_map; |
237 | struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; | 237 | struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; |
238 | int i, out = 0; | 238 | int i, out = 0; |
239 | unsigned long flags; | ||
239 | 240 | ||
240 | out += snprintf(buf + out, len - out, | 241 | out += snprintf(buf + out, len - out, |
241 | "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", | 242 | "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", |
@@ -271,14 +272,14 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
271 | cconn->cc_version.pv_minor); | 272 | cconn->cc_version.pv_minor); |
272 | } | 273 | } |
273 | 274 | ||
274 | spin_lock(&osb->dc_task_lock); | 275 | spin_lock_irqsave(&osb->dc_task_lock, flags); |
275 | out += snprintf(buf + out, len - out, | 276 | out += snprintf(buf + out, len - out, |
276 | "%10s => Pid: %d Count: %lu WakeSeq: %lu " | 277 | "%10s => Pid: %d Count: %lu WakeSeq: %lu " |
277 | "WorkSeq: %lu\n", "DownCnvt", | 278 | "WorkSeq: %lu\n", "DownCnvt", |
278 | (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), | 279 | (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), |
279 | osb->blocked_lock_count, osb->dc_wake_sequence, | 280 | osb->blocked_lock_count, osb->dc_wake_sequence, |
280 | osb->dc_work_sequence); | 281 | osb->dc_work_sequence); |
281 | spin_unlock(&osb->dc_task_lock); | 282 | spin_unlock_irqrestore(&osb->dc_task_lock, flags); |
282 | 283 | ||
283 | spin_lock(&osb->osb_lock); | 284 | spin_lock(&osb->osb_lock); |
284 | out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", | 285 | out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a9ebabfe7587..5c57b7b40728 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1957,7 +1957,6 @@ xfs_vm_set_page_dirty( | |||
1957 | loff_t end_offset; | 1957 | loff_t end_offset; |
1958 | loff_t offset; | 1958 | loff_t offset; |
1959 | int newly_dirty; | 1959 | int newly_dirty; |
1960 | struct mem_cgroup *memcg; | ||
1961 | 1960 | ||
1962 | if (unlikely(!mapping)) | 1961 | if (unlikely(!mapping)) |
1963 | return !TestSetPageDirty(page); | 1962 | return !TestSetPageDirty(page); |
@@ -1978,10 +1977,10 @@ xfs_vm_set_page_dirty( | |||
1978 | } while (bh != head); | 1977 | } while (bh != head); |
1979 | } | 1978 | } |
1980 | /* | 1979 | /* |
1981 | * Use mem_group_begin_page_stat() to keep PageDirty synchronized with | 1980 | * Lock out page->mem_cgroup migration to keep PageDirty |
1982 | * per-memcg dirty page counters. | 1981 | * synchronized with per-memcg dirty page counters. |
1983 | */ | 1982 | */ |
1984 | memcg = mem_cgroup_begin_page_stat(page); | 1983 | lock_page_memcg(page); |
1985 | newly_dirty = !TestSetPageDirty(page); | 1984 | newly_dirty = !TestSetPageDirty(page); |
1986 | spin_unlock(&mapping->private_lock); | 1985 | spin_unlock(&mapping->private_lock); |
1987 | 1986 | ||
@@ -1992,13 +1991,13 @@ xfs_vm_set_page_dirty( | |||
1992 | spin_lock_irqsave(&mapping->tree_lock, flags); | 1991 | spin_lock_irqsave(&mapping->tree_lock, flags); |
1993 | if (page->mapping) { /* Race with truncate? */ | 1992 | if (page->mapping) { /* Race with truncate? */ |
1994 | WARN_ON_ONCE(!PageUptodate(page)); | 1993 | WARN_ON_ONCE(!PageUptodate(page)); |
1995 | account_page_dirtied(page, mapping, memcg); | 1994 | account_page_dirtied(page, mapping); |
1996 | radix_tree_tag_set(&mapping->page_tree, | 1995 | radix_tree_tag_set(&mapping->page_tree, |
1997 | page_index(page), PAGECACHE_TAG_DIRTY); | 1996 | page_index(page), PAGECACHE_TAG_DIRTY); |
1998 | } | 1997 | } |
1999 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 1998 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
2000 | } | 1999 | } |
2001 | mem_cgroup_end_page_stat(memcg); | 2000 | unlock_page_memcg(page); |
2002 | if (newly_dirty) | 2001 | if (newly_dirty) |
2003 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 2002 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
2004 | return newly_dirty; | 2003 | return newly_dirty; |
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 850f39b33e74..7caaf298f539 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h | |||
@@ -11,12 +11,7 @@ | |||
11 | #define _LINUX_AUTO_DEV_IOCTL_H | 11 | #define _LINUX_AUTO_DEV_IOCTL_H |
12 | 12 | ||
13 | #include <linux/auto_fs.h> | 13 | #include <linux/auto_fs.h> |
14 | |||
15 | #ifdef __KERNEL__ | ||
16 | #include <linux/string.h> | 14 | #include <linux/string.h> |
17 | #else | ||
18 | #include <string.h> | ||
19 | #endif /* __KERNEL__ */ | ||
20 | 15 | ||
21 | #define AUTOFS_DEVICE_NAME "autofs" | 16 | #define AUTOFS_DEVICE_NAME "autofs" |
22 | 17 | ||
@@ -125,7 +120,6 @@ static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) | |||
125 | in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; | 120 | in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; |
126 | in->size = sizeof(struct autofs_dev_ioctl); | 121 | in->size = sizeof(struct autofs_dev_ioctl); |
127 | in->ioctlfd = -1; | 122 | in->ioctlfd = -1; |
128 | return; | ||
129 | } | 123 | } |
130 | 124 | ||
131 | /* | 125 | /* |
diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index fcd704d354c4..b4066bb89083 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h | |||
@@ -1,14 +1,10 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | 1 | /* |
2 | * | 2 | * Copyright 1997 Transmeta Corporation - All Rights Reserved |
3 | * linux/include/linux/auto_fs.h | ||
4 | * | ||
5 | * Copyright 1997 Transmeta Corporation - All Rights Reserved | ||
6 | * | 3 | * |
7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
8 | * the terms of the GNU General Public License, version 2, or at your | 5 | * the terms of the GNU General Public License, version 2, or at your |
9 | * option, any later version, incorporated herein by reference. | 6 | * option, any later version, incorporated herein by reference. |
10 | * | 7 | */ |
11 | * ----------------------------------------------------------------------- */ | ||
12 | 8 | ||
13 | #ifndef _LINUX_AUTO_FS_H | 9 | #ifndef _LINUX_AUTO_FS_H |
14 | #define _LINUX_AUTO_FS_H | 10 | #define _LINUX_AUTO_FS_H |
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3159a7dba034..9f4956d8601c 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h | |||
@@ -62,10 +62,9 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, | |||
62 | #endif /* CONFIG_FAULT_INJECTION */ | 62 | #endif /* CONFIG_FAULT_INJECTION */ |
63 | 63 | ||
64 | #ifdef CONFIG_FAILSLAB | 64 | #ifdef CONFIG_FAILSLAB |
65 | extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags); | 65 | extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); |
66 | #else | 66 | #else |
67 | static inline bool should_failslab(size_t size, gfp_t gfpflags, | 67 | static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
68 | unsigned long flags) | ||
69 | { | 68 | { |
70 | return false; | 69 | return false; |
71 | } | 70 | } |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index af1f2b24bbe4..bb16dfeb917e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -9,6 +9,11 @@ | |||
9 | 9 | ||
10 | struct vm_area_struct; | 10 | struct vm_area_struct; |
11 | 11 | ||
12 | /* | ||
13 | * In case of changes, please don't forget to update | ||
14 | * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c | ||
15 | */ | ||
16 | |||
12 | /* Plain integer GFP bitmasks. Do not use this directly. */ | 17 | /* Plain integer GFP bitmasks. Do not use this directly. */ |
13 | #define ___GFP_DMA 0x01u | 18 | #define ___GFP_DMA 0x01u |
14 | #define ___GFP_HIGHMEM 0x02u | 19 | #define ___GFP_HIGHMEM 0x02u |
@@ -48,7 +53,6 @@ struct vm_area_struct; | |||
48 | #define __GFP_DMA ((__force gfp_t)___GFP_DMA) | 53 | #define __GFP_DMA ((__force gfp_t)___GFP_DMA) |
49 | #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) | 54 | #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) |
50 | #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) | 55 | #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) |
51 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ | ||
52 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ | 56 | #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ |
53 | #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) | 57 | #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) |
54 | 58 | ||
@@ -515,13 +519,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); | |||
515 | void drain_all_pages(struct zone *zone); | 519 | void drain_all_pages(struct zone *zone); |
516 | void drain_local_pages(struct zone *zone); | 520 | void drain_local_pages(struct zone *zone); |
517 | 521 | ||
518 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
519 | void page_alloc_init_late(void); | 522 | void page_alloc_init_late(void); |
520 | #else | ||
521 | static inline void page_alloc_init_late(void) | ||
522 | { | ||
523 | } | ||
524 | #endif | ||
525 | 523 | ||
526 | /* | 524 | /* |
527 | * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what | 525 | * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 792c8981e633..f0c4bec6565b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/eventfd.h> | 28 | #include <linux/eventfd.h> |
29 | #include <linux/mmzone.h> | 29 | #include <linux/mmzone.h> |
30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
31 | #include <linux/page-flags.h> | ||
31 | 32 | ||
32 | struct mem_cgroup; | 33 | struct mem_cgroup; |
33 | struct page; | 34 | struct page; |
@@ -89,6 +90,10 @@ enum mem_cgroup_events_target { | |||
89 | }; | 90 | }; |
90 | 91 | ||
91 | #ifdef CONFIG_MEMCG | 92 | #ifdef CONFIG_MEMCG |
93 | |||
94 | #define MEM_CGROUP_ID_SHIFT 16 | ||
95 | #define MEM_CGROUP_ID_MAX USHRT_MAX | ||
96 | |||
92 | struct mem_cgroup_stat_cpu { | 97 | struct mem_cgroup_stat_cpu { |
93 | long count[MEMCG_NR_STAT]; | 98 | long count[MEMCG_NR_STAT]; |
94 | unsigned long events[MEMCG_NR_EVENTS]; | 99 | unsigned long events[MEMCG_NR_EVENTS]; |
@@ -265,6 +270,11 @@ struct mem_cgroup { | |||
265 | 270 | ||
266 | extern struct mem_cgroup *root_mem_cgroup; | 271 | extern struct mem_cgroup *root_mem_cgroup; |
267 | 272 | ||
273 | static inline bool mem_cgroup_disabled(void) | ||
274 | { | ||
275 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | ||
276 | } | ||
277 | |||
268 | /** | 278 | /** |
269 | * mem_cgroup_events - count memory events against a cgroup | 279 | * mem_cgroup_events - count memory events against a cgroup |
270 | * @memcg: the memory cgroup | 280 | * @memcg: the memory cgroup |
@@ -291,7 +301,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, | |||
291 | void mem_cgroup_uncharge(struct page *page); | 301 | void mem_cgroup_uncharge(struct page *page); |
292 | void mem_cgroup_uncharge_list(struct list_head *page_list); | 302 | void mem_cgroup_uncharge_list(struct list_head *page_list); |
293 | 303 | ||
294 | void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage); | 304 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); |
295 | 305 | ||
296 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); | 306 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); |
297 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | 307 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); |
@@ -312,6 +322,28 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | |||
312 | struct mem_cgroup_reclaim_cookie *); | 322 | struct mem_cgroup_reclaim_cookie *); |
313 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 323 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
314 | 324 | ||
325 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | ||
326 | { | ||
327 | if (mem_cgroup_disabled()) | ||
328 | return 0; | ||
329 | |||
330 | return memcg->css.id; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * mem_cgroup_from_id - look up a memcg from an id | ||
335 | * @id: the id to look up | ||
336 | * | ||
337 | * Caller must hold rcu_read_lock() and use css_tryget() as necessary. | ||
338 | */ | ||
339 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
340 | { | ||
341 | struct cgroup_subsys_state *css; | ||
342 | |||
343 | css = css_from_id(id, &memory_cgrp_subsys); | ||
344 | return mem_cgroup_from_css(css); | ||
345 | } | ||
346 | |||
315 | /** | 347 | /** |
316 | * parent_mem_cgroup - find the accounting parent of a memcg | 348 | * parent_mem_cgroup - find the accounting parent of a memcg |
317 | * @memcg: memcg whose parent to find | 349 | * @memcg: memcg whose parent to find |
@@ -353,11 +385,6 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, | |||
353 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); | 385 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); |
354 | ino_t page_cgroup_ino(struct page *page); | 386 | ino_t page_cgroup_ino(struct page *page); |
355 | 387 | ||
356 | static inline bool mem_cgroup_disabled(void) | ||
357 | { | ||
358 | return !cgroup_subsys_enabled(memory_cgrp_subsys); | ||
359 | } | ||
360 | |||
361 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) | 388 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) |
362 | { | 389 | { |
363 | if (mem_cgroup_disabled()) | 390 | if (mem_cgroup_disabled()) |
@@ -429,36 +456,43 @@ bool mem_cgroup_oom_synchronize(bool wait); | |||
429 | extern int do_swap_account; | 456 | extern int do_swap_account; |
430 | #endif | 457 | #endif |
431 | 458 | ||
432 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); | 459 | void lock_page_memcg(struct page *page); |
433 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); | 460 | void unlock_page_memcg(struct page *page); |
434 | 461 | ||
435 | /** | 462 | /** |
436 | * mem_cgroup_update_page_stat - update page state statistics | 463 | * mem_cgroup_update_page_stat - update page state statistics |
437 | * @memcg: memcg to account against | 464 | * @page: the page |
438 | * @idx: page state item to account | 465 | * @idx: page state item to account |
439 | * @val: number of pages (positive or negative) | 466 | * @val: number of pages (positive or negative) |
440 | * | 467 | * |
441 | * See mem_cgroup_begin_page_stat() for locking requirements. | 468 | * The @page must be locked or the caller must use lock_page_memcg() |
469 | * to prevent double accounting when the page is concurrently being | ||
470 | * moved to another memcg: | ||
471 | * | ||
472 | * lock_page(page) or lock_page_memcg(page) | ||
473 | * if (TestClearPageState(page)) | ||
474 | * mem_cgroup_update_page_stat(page, state, -1); | ||
475 | * unlock_page(page) or unlock_page_memcg(page) | ||
442 | */ | 476 | */ |
443 | static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | 477 | static inline void mem_cgroup_update_page_stat(struct page *page, |
444 | enum mem_cgroup_stat_index idx, int val) | 478 | enum mem_cgroup_stat_index idx, int val) |
445 | { | 479 | { |
446 | VM_BUG_ON(!rcu_read_lock_held()); | 480 | VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); |
447 | 481 | ||
448 | if (memcg) | 482 | if (page->mem_cgroup) |
449 | this_cpu_add(memcg->stat->count[idx], val); | 483 | this_cpu_add(page->mem_cgroup->stat->count[idx], val); |
450 | } | 484 | } |
451 | 485 | ||
452 | static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, | 486 | static inline void mem_cgroup_inc_page_stat(struct page *page, |
453 | enum mem_cgroup_stat_index idx) | 487 | enum mem_cgroup_stat_index idx) |
454 | { | 488 | { |
455 | mem_cgroup_update_page_stat(memcg, idx, 1); | 489 | mem_cgroup_update_page_stat(page, idx, 1); |
456 | } | 490 | } |
457 | 491 | ||
458 | static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, | 492 | static inline void mem_cgroup_dec_page_stat(struct page *page, |
459 | enum mem_cgroup_stat_index idx) | 493 | enum mem_cgroup_stat_index idx) |
460 | { | 494 | { |
461 | mem_cgroup_update_page_stat(memcg, idx, -1); | 495 | mem_cgroup_update_page_stat(page, idx, -1); |
462 | } | 496 | } |
463 | 497 | ||
464 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 498 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
@@ -496,8 +530,17 @@ void mem_cgroup_split_huge_fixup(struct page *head); | |||
496 | #endif | 530 | #endif |
497 | 531 | ||
498 | #else /* CONFIG_MEMCG */ | 532 | #else /* CONFIG_MEMCG */ |
533 | |||
534 | #define MEM_CGROUP_ID_SHIFT 0 | ||
535 | #define MEM_CGROUP_ID_MAX 0 | ||
536 | |||
499 | struct mem_cgroup; | 537 | struct mem_cgroup; |
500 | 538 | ||
539 | static inline bool mem_cgroup_disabled(void) | ||
540 | { | ||
541 | return true; | ||
542 | } | ||
543 | |||
501 | static inline void mem_cgroup_events(struct mem_cgroup *memcg, | 544 | static inline void mem_cgroup_events(struct mem_cgroup *memcg, |
502 | enum mem_cgroup_events_index idx, | 545 | enum mem_cgroup_events_index idx, |
503 | unsigned int nr) | 546 | unsigned int nr) |
@@ -539,7 +582,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) | |||
539 | { | 582 | { |
540 | } | 583 | } |
541 | 584 | ||
542 | static inline void mem_cgroup_replace_page(struct page *old, struct page *new) | 585 | static inline void mem_cgroup_migrate(struct page *old, struct page *new) |
543 | { | 586 | { |
544 | } | 587 | } |
545 | 588 | ||
@@ -580,9 +623,16 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root, | |||
580 | { | 623 | { |
581 | } | 624 | } |
582 | 625 | ||
583 | static inline bool mem_cgroup_disabled(void) | 626 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) |
584 | { | 627 | { |
585 | return true; | 628 | return 0; |
629 | } | ||
630 | |||
631 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
632 | { | ||
633 | WARN_ON_ONCE(id); | ||
634 | /* XXX: This should always return root_mem_cgroup */ | ||
635 | return NULL; | ||
586 | } | 636 | } |
587 | 637 | ||
588 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) | 638 | static inline bool mem_cgroup_online(struct mem_cgroup *memcg) |
@@ -613,12 +663,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
613 | { | 663 | { |
614 | } | 664 | } |
615 | 665 | ||
616 | static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | 666 | static inline void lock_page_memcg(struct page *page) |
617 | { | 667 | { |
618 | return NULL; | ||
619 | } | 668 | } |
620 | 669 | ||
621 | static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | 670 | static inline void unlock_page_memcg(struct page *page) |
622 | { | 671 | { |
623 | } | 672 | } |
624 | 673 | ||
@@ -644,12 +693,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) | |||
644 | return false; | 693 | return false; |
645 | } | 694 | } |
646 | 695 | ||
647 | static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, | 696 | static inline void mem_cgroup_inc_page_stat(struct page *page, |
648 | enum mem_cgroup_stat_index idx) | 697 | enum mem_cgroup_stat_index idx) |
649 | { | 698 | { |
650 | } | 699 | } |
651 | 700 | ||
652 | static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, | 701 | static inline void mem_cgroup_dec_page_stat(struct page *page, |
653 | enum mem_cgroup_stat_index idx) | 702 | enum mem_cgroup_stat_index idx) |
654 | { | 703 | { |
655 | } | 704 | } |
@@ -765,7 +814,7 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); | |||
765 | void __memcg_kmem_uncharge(struct page *page, int order); | 814 | void __memcg_kmem_uncharge(struct page *page, int order); |
766 | 815 | ||
767 | /* | 816 | /* |
768 | * helper for acessing a memcg's index. It will be used as an index in the | 817 | * helper for accessing a memcg's index. It will be used as an index in the |
769 | * child cache array in kmem_cache, and also to derive its name. This function | 818 | * child cache array in kmem_cache, and also to derive its name. This function |
770 | * will return -1 when this is not a kmem-limited memcg. | 819 | * will return -1 when this is not a kmem-limited memcg. |
771 | */ | 820 | */ |
diff --git a/include/linux/memory.h b/include/linux/memory.h index 8b8d8d12348e..82730adba950 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h | |||
@@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb); | |||
109 | extern int register_memory_isolate_notifier(struct notifier_block *nb); | 109 | extern int register_memory_isolate_notifier(struct notifier_block *nb); |
110 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); | 110 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); |
111 | extern int register_new_memory(int, struct mem_section *); | 111 | extern int register_new_memory(int, struct mem_section *); |
112 | extern int memory_block_change_state(struct memory_block *mem, | ||
113 | unsigned long to_state, | ||
114 | unsigned long from_state_req); | ||
112 | #ifdef CONFIG_MEMORY_HOTREMOVE | 115 | #ifdef CONFIG_MEMORY_HOTREMOVE |
113 | extern int unregister_memory_section(struct mem_section *); | 116 | extern int unregister_memory_section(struct mem_section *); |
114 | #endif | 117 | #endif |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 43405992d027..adbef586e696 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page); | |||
99 | 99 | ||
100 | extern int try_online_node(int nid); | 100 | extern int try_online_node(int nid); |
101 | 101 | ||
102 | extern bool memhp_auto_online; | ||
103 | |||
102 | #ifdef CONFIG_MEMORY_HOTREMOVE | 104 | #ifdef CONFIG_MEMORY_HOTREMOVE |
103 | extern bool is_pageblock_removable_nolock(struct page *page); | 105 | extern bool is_pageblock_removable_nolock(struct page *page); |
104 | extern int arch_remove_memory(u64 start, u64 size); | 106 | extern int arch_remove_memory(u64 start, u64 size); |
@@ -196,6 +198,9 @@ void put_online_mems(void); | |||
196 | void mem_hotplug_begin(void); | 198 | void mem_hotplug_begin(void); |
197 | void mem_hotplug_done(void); | 199 | void mem_hotplug_done(void); |
198 | 200 | ||
201 | extern void set_zone_contiguous(struct zone *zone); | ||
202 | extern void clear_zone_contiguous(struct zone *zone); | ||
203 | |||
199 | #else /* ! CONFIG_MEMORY_HOTPLUG */ | 204 | #else /* ! CONFIG_MEMORY_HOTPLUG */ |
200 | /* | 205 | /* |
201 | * Stub functions for when hotplug is off | 206 | * Stub functions for when hotplug is off |
@@ -267,7 +272,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} | |||
267 | extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, | 272 | extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, |
268 | void *arg, int (*func)(struct memory_block *, void *)); | 273 | void *arg, int (*func)(struct memory_block *, void *)); |
269 | extern int add_memory(int nid, u64 start, u64 size); | 274 | extern int add_memory(int nid, u64 start, u64 size); |
270 | extern int add_memory_resource(int nid, struct resource *resource); | 275 | extern int add_memory_resource(int nid, struct resource *resource, bool online); |
271 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, | 276 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, |
272 | bool for_device); | 277 | bool for_device); |
273 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); | 278 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index cac1c0904d5f..9b50325e4ddf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -23,9 +23,13 @@ enum migrate_reason { | |||
23 | MR_SYSCALL, /* also applies to cpusets */ | 23 | MR_SYSCALL, /* also applies to cpusets */ |
24 | MR_MEMPOLICY_MBIND, | 24 | MR_MEMPOLICY_MBIND, |
25 | MR_NUMA_MISPLACED, | 25 | MR_NUMA_MISPLACED, |
26 | MR_CMA | 26 | MR_CMA, |
27 | MR_TYPES | ||
27 | }; | 28 | }; |
28 | 29 | ||
30 | /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ | ||
31 | extern char *migrate_reason_names[MR_TYPES]; | ||
32 | |||
29 | #ifdef CONFIG_MIGRATION | 33 | #ifdef CONFIG_MIGRATION |
30 | 34 | ||
31 | extern void putback_movable_pages(struct list_head *l); | 35 | extern void putback_movable_pages(struct list_head *l); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 3579d1e2fe3a..dbf1eddab964 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -905,20 +905,11 @@ static inline struct mem_cgroup *page_memcg(struct page *page) | |||
905 | { | 905 | { |
906 | return page->mem_cgroup; | 906 | return page->mem_cgroup; |
907 | } | 907 | } |
908 | |||
909 | static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) | ||
910 | { | ||
911 | page->mem_cgroup = memcg; | ||
912 | } | ||
913 | #else | 908 | #else |
914 | static inline struct mem_cgroup *page_memcg(struct page *page) | 909 | static inline struct mem_cgroup *page_memcg(struct page *page) |
915 | { | 910 | { |
916 | return NULL; | 911 | return NULL; |
917 | } | 912 | } |
918 | |||
919 | static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) | ||
920 | { | ||
921 | } | ||
922 | #endif | 913 | #endif |
923 | 914 | ||
924 | /* | 915 | /* |
@@ -1300,10 +1291,9 @@ int __set_page_dirty_nobuffers(struct page *page); | |||
1300 | int __set_page_dirty_no_writeback(struct page *page); | 1291 | int __set_page_dirty_no_writeback(struct page *page); |
1301 | int redirty_page_for_writepage(struct writeback_control *wbc, | 1292 | int redirty_page_for_writepage(struct writeback_control *wbc, |
1302 | struct page *page); | 1293 | struct page *page); |
1303 | void account_page_dirtied(struct page *page, struct address_space *mapping, | 1294 | void account_page_dirtied(struct page *page, struct address_space *mapping); |
1304 | struct mem_cgroup *memcg); | ||
1305 | void account_page_cleaned(struct page *page, struct address_space *mapping, | 1295 | void account_page_cleaned(struct page *page, struct address_space *mapping, |
1306 | struct mem_cgroup *memcg, struct bdi_writeback *wb); | 1296 | struct bdi_writeback *wb); |
1307 | int set_page_dirty(struct page *page); | 1297 | int set_page_dirty(struct page *page); |
1308 | int set_page_dirty_lock(struct page *page); | 1298 | int set_page_dirty_lock(struct page *page); |
1309 | void cancel_dirty_page(struct page *page); | 1299 | void cancel_dirty_page(struct page *page); |
@@ -2178,6 +2168,17 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, | |||
2178 | unsigned long size, pte_fn_t fn, void *data); | 2168 | unsigned long size, pte_fn_t fn, void *data); |
2179 | 2169 | ||
2180 | 2170 | ||
2171 | #ifdef CONFIG_PAGE_POISONING | ||
2172 | extern bool page_poisoning_enabled(void); | ||
2173 | extern void kernel_poison_pages(struct page *page, int numpages, int enable); | ||
2174 | extern bool page_is_poisoned(struct page *page); | ||
2175 | #else | ||
2176 | static inline bool page_poisoning_enabled(void) { return false; } | ||
2177 | static inline void kernel_poison_pages(struct page *page, int numpages, | ||
2178 | int enable) { } | ||
2179 | static inline bool page_is_poisoned(struct page *page) { return false; } | ||
2180 | #endif | ||
2181 | |||
2181 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2182 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2182 | extern bool _debug_pagealloc_enabled; | 2183 | extern bool _debug_pagealloc_enabled; |
2183 | extern void __kernel_map_pages(struct page *page, int numpages, int enable); | 2184 | extern void __kernel_map_pages(struct page *page, int numpages, int enable); |
@@ -2197,14 +2198,18 @@ kernel_map_pages(struct page *page, int numpages, int enable) | |||
2197 | } | 2198 | } |
2198 | #ifdef CONFIG_HIBERNATION | 2199 | #ifdef CONFIG_HIBERNATION |
2199 | extern bool kernel_page_present(struct page *page); | 2200 | extern bool kernel_page_present(struct page *page); |
2200 | #endif /* CONFIG_HIBERNATION */ | 2201 | #endif /* CONFIG_HIBERNATION */ |
2201 | #else | 2202 | #else /* CONFIG_DEBUG_PAGEALLOC */ |
2202 | static inline void | 2203 | static inline void |
2203 | kernel_map_pages(struct page *page, int numpages, int enable) {} | 2204 | kernel_map_pages(struct page *page, int numpages, int enable) {} |
2204 | #ifdef CONFIG_HIBERNATION | 2205 | #ifdef CONFIG_HIBERNATION |
2205 | static inline bool kernel_page_present(struct page *page) { return true; } | 2206 | static inline bool kernel_page_present(struct page *page) { return true; } |
2206 | #endif /* CONFIG_HIBERNATION */ | 2207 | #endif /* CONFIG_HIBERNATION */ |
2207 | #endif | 2208 | static inline bool debug_pagealloc_enabled(void) |
2209 | { | ||
2210 | return false; | ||
2211 | } | ||
2212 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | ||
2208 | 2213 | ||
2209 | #ifdef __HAVE_ARCH_GATE_AREA | 2214 | #ifdef __HAVE_ARCH_GATE_AREA |
2210 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); | 2215 | extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); |
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 053824b0a412..de7be78c6f0e 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h | |||
@@ -9,8 +9,7 @@ struct vm_area_struct; | |||
9 | struct mm_struct; | 9 | struct mm_struct; |
10 | 10 | ||
11 | extern void dump_page(struct page *page, const char *reason); | 11 | extern void dump_page(struct page *page, const char *reason); |
12 | extern void dump_page_badflags(struct page *page, const char *reason, | 12 | extern void __dump_page(struct page *page, const char *reason); |
13 | unsigned long badflags); | ||
14 | void dump_vma(const struct vm_area_struct *vma); | 13 | void dump_vma(const struct vm_area_struct *vma); |
15 | void dump_mm(const struct mm_struct *mm); | 14 | void dump_mm(const struct mm_struct *mm); |
16 | 15 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7b6c2cfee390..6de02ac378a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -63,6 +63,9 @@ enum { | |||
63 | MIGRATE_TYPES | 63 | MIGRATE_TYPES |
64 | }; | 64 | }; |
65 | 65 | ||
66 | /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ | ||
67 | extern char * const migratetype_names[MIGRATE_TYPES]; | ||
68 | |||
66 | #ifdef CONFIG_CMA | 69 | #ifdef CONFIG_CMA |
67 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) | 70 | # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) |
68 | #else | 71 | #else |
@@ -209,10 +212,12 @@ struct zone_reclaim_stat { | |||
209 | }; | 212 | }; |
210 | 213 | ||
211 | struct lruvec { | 214 | struct lruvec { |
212 | struct list_head lists[NR_LRU_LISTS]; | 215 | struct list_head lists[NR_LRU_LISTS]; |
213 | struct zone_reclaim_stat reclaim_stat; | 216 | struct zone_reclaim_stat reclaim_stat; |
217 | /* Evictions & activations on the inactive file list */ | ||
218 | atomic_long_t inactive_age; | ||
214 | #ifdef CONFIG_MEMCG | 219 | #ifdef CONFIG_MEMCG |
215 | struct zone *zone; | 220 | struct zone *zone; |
216 | #endif | 221 | #endif |
217 | }; | 222 | }; |
218 | 223 | ||
@@ -487,9 +492,6 @@ struct zone { | |||
487 | spinlock_t lru_lock; | 492 | spinlock_t lru_lock; |
488 | struct lruvec lruvec; | 493 | struct lruvec lruvec; |
489 | 494 | ||
490 | /* Evictions & activations on the inactive file list */ | ||
491 | atomic_long_t inactive_age; | ||
492 | |||
493 | /* | 495 | /* |
494 | * When free pages are below this point, additional steps are taken | 496 | * When free pages are below this point, additional steps are taken |
495 | * when reading the number of free pages to avoid per-cpu counter | 497 | * when reading the number of free pages to avoid per-cpu counter |
@@ -520,6 +522,8 @@ struct zone { | |||
520 | bool compact_blockskip_flush; | 522 | bool compact_blockskip_flush; |
521 | #endif | 523 | #endif |
522 | 524 | ||
525 | bool contiguous; | ||
526 | |||
523 | ZONE_PADDING(_pad3_) | 527 | ZONE_PADDING(_pad3_) |
524 | /* Zone statistics */ | 528 | /* Zone statistics */ |
525 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | 529 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
@@ -758,6 +762,8 @@ static inline struct zone *lruvec_zone(struct lruvec *lruvec) | |||
758 | #endif | 762 | #endif |
759 | } | 763 | } |
760 | 764 | ||
765 | extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); | ||
766 | |||
761 | #ifdef CONFIG_HAVE_MEMORY_PRESENT | 767 | #ifdef CONFIG_HAVE_MEMORY_PRESENT |
762 | void memory_present(int nid, unsigned long start, unsigned long end); | 768 | void memory_present(int nid, unsigned long start, unsigned long end); |
763 | #else | 769 | #else |
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 17f118a82854..e1fe7cf5bddf 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h | |||
@@ -45,6 +45,7 @@ struct page_ext { | |||
45 | unsigned int order; | 45 | unsigned int order; |
46 | gfp_t gfp_mask; | 46 | gfp_t gfp_mask; |
47 | unsigned int nr_entries; | 47 | unsigned int nr_entries; |
48 | int last_migrate_reason; | ||
48 | unsigned long trace_entries[8]; | 49 | unsigned long trace_entries[8]; |
49 | #endif | 50 | #endif |
50 | }; | 51 | }; |
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index cacaabea8a09..46f1b939948c 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h | |||
@@ -1,38 +1,54 @@ | |||
1 | #ifndef __LINUX_PAGE_OWNER_H | 1 | #ifndef __LINUX_PAGE_OWNER_H |
2 | #define __LINUX_PAGE_OWNER_H | 2 | #define __LINUX_PAGE_OWNER_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
5 | |||
4 | #ifdef CONFIG_PAGE_OWNER | 6 | #ifdef CONFIG_PAGE_OWNER |
5 | extern bool page_owner_inited; | 7 | extern struct static_key_false page_owner_inited; |
6 | extern struct page_ext_operations page_owner_ops; | 8 | extern struct page_ext_operations page_owner_ops; |
7 | 9 | ||
8 | extern void __reset_page_owner(struct page *page, unsigned int order); | 10 | extern void __reset_page_owner(struct page *page, unsigned int order); |
9 | extern void __set_page_owner(struct page *page, | 11 | extern void __set_page_owner(struct page *page, |
10 | unsigned int order, gfp_t gfp_mask); | 12 | unsigned int order, gfp_t gfp_mask); |
11 | extern gfp_t __get_page_owner_gfp(struct page *page); | 13 | extern gfp_t __get_page_owner_gfp(struct page *page); |
14 | extern void __copy_page_owner(struct page *oldpage, struct page *newpage); | ||
15 | extern void __set_page_owner_migrate_reason(struct page *page, int reason); | ||
16 | extern void __dump_page_owner(struct page *page); | ||
12 | 17 | ||
13 | static inline void reset_page_owner(struct page *page, unsigned int order) | 18 | static inline void reset_page_owner(struct page *page, unsigned int order) |
14 | { | 19 | { |
15 | if (likely(!page_owner_inited)) | 20 | if (static_branch_unlikely(&page_owner_inited)) |
16 | return; | 21 | __reset_page_owner(page, order); |
17 | |||
18 | __reset_page_owner(page, order); | ||
19 | } | 22 | } |
20 | 23 | ||
21 | static inline void set_page_owner(struct page *page, | 24 | static inline void set_page_owner(struct page *page, |
22 | unsigned int order, gfp_t gfp_mask) | 25 | unsigned int order, gfp_t gfp_mask) |
23 | { | 26 | { |
24 | if (likely(!page_owner_inited)) | 27 | if (static_branch_unlikely(&page_owner_inited)) |
25 | return; | 28 | __set_page_owner(page, order, gfp_mask); |
26 | |||
27 | __set_page_owner(page, order, gfp_mask); | ||
28 | } | 29 | } |
29 | 30 | ||
30 | static inline gfp_t get_page_owner_gfp(struct page *page) | 31 | static inline gfp_t get_page_owner_gfp(struct page *page) |
31 | { | 32 | { |
32 | if (likely(!page_owner_inited)) | 33 | if (static_branch_unlikely(&page_owner_inited)) |
34 | return __get_page_owner_gfp(page); | ||
35 | else | ||
33 | return 0; | 36 | return 0; |
34 | 37 | } | |
35 | return __get_page_owner_gfp(page); | 38 | static inline void copy_page_owner(struct page *oldpage, struct page *newpage) |
39 | { | ||
40 | if (static_branch_unlikely(&page_owner_inited)) | ||
41 | __copy_page_owner(oldpage, newpage); | ||
42 | } | ||
43 | static inline void set_page_owner_migrate_reason(struct page *page, int reason) | ||
44 | { | ||
45 | if (static_branch_unlikely(&page_owner_inited)) | ||
46 | __set_page_owner_migrate_reason(page, reason); | ||
47 | } | ||
48 | static inline void dump_page_owner(struct page *page) | ||
49 | { | ||
50 | if (static_branch_unlikely(&page_owner_inited)) | ||
51 | __dump_page_owner(page); | ||
36 | } | 52 | } |
37 | #else | 53 | #else |
38 | static inline void reset_page_owner(struct page *page, unsigned int order) | 54 | static inline void reset_page_owner(struct page *page, unsigned int order) |
@@ -46,6 +62,14 @@ static inline gfp_t get_page_owner_gfp(struct page *page) | |||
46 | { | 62 | { |
47 | return 0; | 63 | return 0; |
48 | } | 64 | } |
49 | 65 | static inline void copy_page_owner(struct page *oldpage, struct page *newpage) | |
66 | { | ||
67 | } | ||
68 | static inline void set_page_owner_migrate_reason(struct page *page, int reason) | ||
69 | { | ||
70 | } | ||
71 | static inline void dump_page_owner(struct page *page) | ||
72 | { | ||
73 | } | ||
50 | #endif /* CONFIG_PAGE_OWNER */ | 74 | #endif /* CONFIG_PAGE_OWNER */ |
51 | #endif /* __LINUX_PAGE_OWNER_H */ | 75 | #endif /* __LINUX_PAGE_OWNER_H */ |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 92395a0a7dc5..183b15ea052b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -663,8 +663,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
663 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 663 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
664 | pgoff_t index, gfp_t gfp_mask); | 664 | pgoff_t index, gfp_t gfp_mask); |
665 | extern void delete_from_page_cache(struct page *page); | 665 | extern void delete_from_page_cache(struct page *page); |
666 | extern void __delete_from_page_cache(struct page *page, void *shadow, | 666 | extern void __delete_from_page_cache(struct page *page, void *shadow); |
667 | struct mem_cgroup *memcg); | ||
668 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); | 667 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); |
669 | 668 | ||
670 | /* | 669 | /* |
diff --git a/include/linux/poison.h b/include/linux/poison.h index 4a27153574e2..51334edec506 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h | |||
@@ -30,7 +30,11 @@ | |||
30 | #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) | 30 | #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) |
31 | 31 | ||
32 | /********** mm/debug-pagealloc.c **********/ | 32 | /********** mm/debug-pagealloc.c **********/ |
33 | #ifdef CONFIG_PAGE_POISONING_ZERO | ||
34 | #define PAGE_POISON 0x00 | ||
35 | #else | ||
33 | #define PAGE_POISON 0xaa | 36 | #define PAGE_POISON 0xaa |
37 | #endif | ||
34 | 38 | ||
35 | /********** mm/page_alloc.c ************/ | 39 | /********** mm/page_alloc.c ************/ |
36 | 40 | ||
diff --git a/include/linux/slab.h b/include/linux/slab.h index 3627d5c1bc47..e4b568738ca3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -20,7 +20,7 @@ | |||
20 | * Flags to pass to kmem_cache_create(). | 20 | * Flags to pass to kmem_cache_create(). |
21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. | 21 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. |
22 | */ | 22 | */ |
23 | #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ | 23 | #define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ |
24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ | 24 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ |
25 | #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ | 25 | #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ |
26 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ | 26 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ |
@@ -314,7 +314,7 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment | |||
314 | void kmem_cache_free(struct kmem_cache *, void *); | 314 | void kmem_cache_free(struct kmem_cache *, void *); |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * Bulk allocation and freeing operations. These are accellerated in an | 317 | * Bulk allocation and freeing operations. These are accelerated in an |
318 | * allocator specific way to avoid taking locks repeatedly or building | 318 | * allocator specific way to avoid taking locks repeatedly or building |
319 | * metadata structures unnecessarily. | 319 | * metadata structures unnecessarily. |
320 | * | 320 | * |
@@ -323,6 +323,15 @@ void kmem_cache_free(struct kmem_cache *, void *); | |||
323 | void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); | 323 | void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); |
324 | int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); | 324 | int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); |
325 | 325 | ||
326 | /* | ||
327 | * Caller must not use kfree_bulk() on memory not originally allocated | ||
328 | * by kmalloc(), because the SLOB allocator cannot handle this. | ||
329 | */ | ||
330 | static __always_inline void kfree_bulk(size_t size, void **p) | ||
331 | { | ||
332 | kmem_cache_free_bulk(NULL, size, p); | ||
333 | } | ||
334 | |||
326 | #ifdef CONFIG_NUMA | 335 | #ifdef CONFIG_NUMA |
327 | void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; | 336 | void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; |
328 | void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; | 337 | void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cf139d3fa513..e878ba35ae91 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -60,6 +60,9 @@ struct kmem_cache { | |||
60 | atomic_t allocmiss; | 60 | atomic_t allocmiss; |
61 | atomic_t freehit; | 61 | atomic_t freehit; |
62 | atomic_t freemiss; | 62 | atomic_t freemiss; |
63 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
64 | atomic_t store_user_clean; | ||
65 | #endif | ||
63 | 66 | ||
64 | /* | 67 | /* |
65 | * If debugging is enabled, then the allocator can add additional | 68 | * If debugging is enabled, then the allocator can add additional |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index b7e57927f521..ac5143f95ee6 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -81,6 +81,7 @@ struct kmem_cache { | |||
81 | int reserved; /* Reserved bytes at the end of slabs */ | 81 | int reserved; /* Reserved bytes at the end of slabs */ |
82 | const char *name; /* Name (only for display!) */ | 82 | const char *name; /* Name (only for display!) */ |
83 | struct list_head list; /* List of slab caches */ | 83 | struct list_head list; /* List of slab caches */ |
84 | int red_left_pad; /* Left redzone padding size */ | ||
84 | #ifdef CONFIG_SYSFS | 85 | #ifdef CONFIG_SYSFS |
85 | struct kobject kobj; /* For sysfs */ | 86 | struct kobject kobj; /* For sysfs */ |
86 | #endif | 87 | #endif |
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 925730bc9fc1..705df7db4482 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h | |||
@@ -15,16 +15,6 @@ struct tracer; | |||
15 | struct dentry; | 15 | struct dentry; |
16 | struct bpf_prog; | 16 | struct bpf_prog; |
17 | 17 | ||
18 | struct trace_print_flags { | ||
19 | unsigned long mask; | ||
20 | const char *name; | ||
21 | }; | ||
22 | |||
23 | struct trace_print_flags_u64 { | ||
24 | unsigned long long mask; | ||
25 | const char *name; | ||
26 | }; | ||
27 | |||
28 | const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, | 18 | const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, |
29 | unsigned long flags, | 19 | unsigned long flags, |
30 | const struct trace_print_flags *flag_array); | 20 | const struct trace_print_flags *flag_array); |
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index e1ee97c713bf..4ac89acb6136 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h | |||
@@ -3,13 +3,23 @@ | |||
3 | 3 | ||
4 | /* | 4 | /* |
5 | * File can be included directly by headers who only want to access | 5 | * File can be included directly by headers who only want to access |
6 | * tracepoint->key to guard out of line trace calls. Otherwise | 6 | * tracepoint->key to guard out of line trace calls, or the definition of |
7 | * linux/tracepoint.h should be used. | 7 | * trace_print_flags{_u64}. Otherwise linux/tracepoint.h should be used. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/atomic.h> | 10 | #include <linux/atomic.h> |
11 | #include <linux/static_key.h> | 11 | #include <linux/static_key.h> |
12 | 12 | ||
13 | struct trace_print_flags { | ||
14 | unsigned long mask; | ||
15 | const char *name; | ||
16 | }; | ||
17 | |||
18 | struct trace_print_flags_u64 { | ||
19 | unsigned long long mask; | ||
20 | const char *name; | ||
21 | }; | ||
22 | |||
13 | struct tracepoint_func { | 23 | struct tracepoint_func { |
14 | void *func; | 24 | void *func; |
15 | void *data; | 25 | void *data; |
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d866f21efbbf..677807f29a1c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/writeback.h> | 7 | #include <linux/writeback.h> |
8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
9 | #include <trace/events/gfpflags.h> | 9 | #include <trace/events/mmflags.h> |
10 | 10 | ||
11 | struct btrfs_root; | 11 | struct btrfs_root; |
12 | struct btrfs_fs_info; | 12 | struct btrfs_fs_info; |
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index c92d1e1cbad9..111e5666e5eb 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/list.h> | 8 | #include <linux/list.h> |
9 | #include <linux/tracepoint.h> | 9 | #include <linux/tracepoint.h> |
10 | #include <trace/events/gfpflags.h> | 10 | #include <trace/events/mmflags.h> |
11 | 11 | ||
12 | #define COMPACTION_STATUS \ | 12 | #define COMPACTION_STATUS \ |
13 | EM( COMPACT_DEFERRED, "deferred") \ | 13 | EM( COMPACT_DEFERRED, "deferred") \ |
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h deleted file mode 100644 index dde6bf092c8a..000000000000 --- a/include/trace/events/gfpflags.h +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | /* | ||
2 | * The order of these masks is important. Matching masks will be seen | ||
3 | * first and the left over flags will end up showing by themselves. | ||
4 | * | ||
5 | * For example, if we have GFP_KERNEL before GFP_USER we wil get: | ||
6 | * | ||
7 | * GFP_KERNEL|GFP_HARDWALL | ||
8 | * | ||
9 | * Thus most bits set go first. | ||
10 | */ | ||
11 | #define show_gfp_flags(flags) \ | ||
12 | (flags) ? __print_flags(flags, "|", \ | ||
13 | {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ | ||
14 | {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ | ||
15 | {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ | ||
16 | {(unsigned long)GFP_USER, "GFP_USER"}, \ | ||
17 | {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ | ||
18 | {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ | ||
19 | {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ | ||
20 | {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
21 | {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ | ||
22 | {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ | ||
23 | {(unsigned long)__GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
24 | {(unsigned long)__GFP_IO, "GFP_IO"}, \ | ||
25 | {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ | ||
26 | {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ | ||
27 | {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ | ||
28 | {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ | ||
29 | {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ | ||
30 | {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ | ||
31 | {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ | ||
32 | {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ | ||
33 | {(unsigned long)__GFP_MEMALLOC, "GFP_MEMALLOC"}, \ | ||
34 | {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ | ||
35 | {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ | ||
36 | {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ | ||
37 | {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ | ||
38 | {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ | ||
39 | {(unsigned long)__GFP_DIRECT_RECLAIM, "GFP_DIRECT_RECLAIM"}, \ | ||
40 | {(unsigned long)__GFP_KSWAPD_RECLAIM, "GFP_KSWAPD_RECLAIM"}, \ | ||
41 | {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ | ||
42 | ) : "GFP_NOWAIT" | ||
43 | |||
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 47c6212d8f3c..551ba4acde4d 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h | |||
@@ -6,8 +6,6 @@ | |||
6 | 6 | ||
7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
8 | 8 | ||
9 | #include <trace/events/gfpflags.h> | ||
10 | |||
11 | #define SCAN_STATUS \ | 9 | #define SCAN_STATUS \ |
12 | EM( SCAN_FAIL, "failed") \ | 10 | EM( SCAN_FAIL, "failed") \ |
13 | EM( SCAN_SUCCEED, "succeeded") \ | 11 | EM( SCAN_SUCCEED, "succeeded") \ |
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f7554fd7fc62..ca7217389067 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
9 | #include <trace/events/gfpflags.h> | 9 | #include <trace/events/mmflags.h> |
10 | 10 | ||
11 | DECLARE_EVENT_CLASS(kmem_alloc, | 11 | DECLARE_EVENT_CLASS(kmem_alloc, |
12 | 12 | ||
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h new file mode 100644 index 000000000000..a849185c82f0 --- /dev/null +++ b/include/trace/events/mmflags.h | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * The order of these masks is important. Matching masks will be seen | ||
3 | * first and the left over flags will end up showing by themselves. | ||
4 | * | ||
5 | * For example, if we have GFP_KERNEL before GFP_USER we wil get: | ||
6 | * | ||
7 | * GFP_KERNEL|GFP_HARDWALL | ||
8 | * | ||
9 | * Thus most bits set go first. | ||
10 | */ | ||
11 | |||
12 | #define __def_gfpflag_names \ | ||
13 | {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ | ||
14 | {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ | ||
15 | {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ | ||
16 | {(unsigned long)GFP_USER, "GFP_USER"}, \ | ||
17 | {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ | ||
18 | {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ | ||
19 | {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ | ||
20 | {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ | ||
21 | {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ | ||
22 | {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ | ||
23 | {(unsigned long)GFP_NOWAIT, "GFP_NOWAIT"}, \ | ||
24 | {(unsigned long)GFP_DMA, "GFP_DMA"}, \ | ||
25 | {(unsigned long)__GFP_HIGHMEM, "__GFP_HIGHMEM"}, \ | ||
26 | {(unsigned long)GFP_DMA32, "GFP_DMA32"}, \ | ||
27 | {(unsigned long)__GFP_HIGH, "__GFP_HIGH"}, \ | ||
28 | {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ | ||
29 | {(unsigned long)__GFP_IO, "__GFP_IO"}, \ | ||
30 | {(unsigned long)__GFP_FS, "__GFP_FS"}, \ | ||
31 | {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ | ||
32 | {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ | ||
33 | {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ | ||
34 | {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ | ||
35 | {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ | ||
36 | {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ | ||
37 | {(unsigned long)__GFP_ZERO, "__GFP_ZERO"}, \ | ||
38 | {(unsigned long)__GFP_NOMEMALLOC, "__GFP_NOMEMALLOC"}, \ | ||
39 | {(unsigned long)__GFP_MEMALLOC, "__GFP_MEMALLOC"}, \ | ||
40 | {(unsigned long)__GFP_HARDWALL, "__GFP_HARDWALL"}, \ | ||
41 | {(unsigned long)__GFP_THISNODE, "__GFP_THISNODE"}, \ | ||
42 | {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ | ||
43 | {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ | ||
44 | {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ | ||
45 | {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ | ||
46 | {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ | ||
47 | {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ | ||
48 | {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ | ||
49 | {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ | ||
50 | {(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \ | ||
51 | |||
52 | #define show_gfp_flags(flags) \ | ||
53 | (flags) ? __print_flags(flags, "|", \ | ||
54 | __def_gfpflag_names \ | ||
55 | ) : "none" | ||
56 | |||
57 | #ifdef CONFIG_MMU | ||
58 | #define IF_HAVE_PG_MLOCK(flag,string) ,{1UL << flag, string} | ||
59 | #else | ||
60 | #define IF_HAVE_PG_MLOCK(flag,string) | ||
61 | #endif | ||
62 | |||
63 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | ||
64 | #define IF_HAVE_PG_UNCACHED(flag,string) ,{1UL << flag, string} | ||
65 | #else | ||
66 | #define IF_HAVE_PG_UNCACHED(flag,string) | ||
67 | #endif | ||
68 | |||
69 | #ifdef CONFIG_MEMORY_FAILURE | ||
70 | #define IF_HAVE_PG_HWPOISON(flag,string) ,{1UL << flag, string} | ||
71 | #else | ||
72 | #define IF_HAVE_PG_HWPOISON(flag,string) | ||
73 | #endif | ||
74 | |||
75 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
76 | #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} | ||
77 | #else | ||
78 | #define IF_HAVE_PG_IDLE(flag,string) | ||
79 | #endif | ||
80 | |||
81 | #define __def_pageflag_names \ | ||
82 | {1UL << PG_locked, "locked" }, \ | ||
83 | {1UL << PG_error, "error" }, \ | ||
84 | {1UL << PG_referenced, "referenced" }, \ | ||
85 | {1UL << PG_uptodate, "uptodate" }, \ | ||
86 | {1UL << PG_dirty, "dirty" }, \ | ||
87 | {1UL << PG_lru, "lru" }, \ | ||
88 | {1UL << PG_active, "active" }, \ | ||
89 | {1UL << PG_slab, "slab" }, \ | ||
90 | {1UL << PG_owner_priv_1, "owner_priv_1" }, \ | ||
91 | {1UL << PG_arch_1, "arch_1" }, \ | ||
92 | {1UL << PG_reserved, "reserved" }, \ | ||
93 | {1UL << PG_private, "private" }, \ | ||
94 | {1UL << PG_private_2, "private_2" }, \ | ||
95 | {1UL << PG_writeback, "writeback" }, \ | ||
96 | {1UL << PG_head, "head" }, \ | ||
97 | {1UL << PG_swapcache, "swapcache" }, \ | ||
98 | {1UL << PG_mappedtodisk, "mappedtodisk" }, \ | ||
99 | {1UL << PG_reclaim, "reclaim" }, \ | ||
100 | {1UL << PG_swapbacked, "swapbacked" }, \ | ||
101 | {1UL << PG_unevictable, "unevictable" } \ | ||
102 | IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ | ||
103 | IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ | ||
104 | IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ | ||
105 | IF_HAVE_PG_IDLE(PG_young, "young" ) \ | ||
106 | IF_HAVE_PG_IDLE(PG_idle, "idle" ) | ||
107 | |||
108 | #define show_page_flags(flags) \ | ||
109 | (flags) ? __print_flags(flags, "|", \ | ||
110 | __def_pageflag_names \ | ||
111 | ) : "none" | ||
112 | |||
113 | #if defined(CONFIG_X86) | ||
114 | #define __VM_ARCH_SPECIFIC {VM_PAT, "pat" } | ||
115 | #elif defined(CONFIG_PPC) | ||
116 | #define __VM_ARCH_SPECIFIC {VM_SAO, "sao" } | ||
117 | #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) | ||
118 | #define __VM_ARCH_SPECIFIC {VM_GROWSUP, "growsup" } | ||
119 | #elif !defined(CONFIG_MMU) | ||
120 | #define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy" } | ||
121 | #else | ||
122 | #define __VM_ARCH_SPECIFIC {VM_ARCH_1, "arch_1" } | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
126 | #define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name }, | ||
127 | #else | ||
128 | #define IF_HAVE_VM_SOFTDIRTY(flag,name) | ||
129 | #endif | ||
130 | |||
131 | #define __def_vmaflag_names \ | ||
132 | {VM_READ, "read" }, \ | ||
133 | {VM_WRITE, "write" }, \ | ||
134 | {VM_EXEC, "exec" }, \ | ||
135 | {VM_SHARED, "shared" }, \ | ||
136 | {VM_MAYREAD, "mayread" }, \ | ||
137 | {VM_MAYWRITE, "maywrite" }, \ | ||
138 | {VM_MAYEXEC, "mayexec" }, \ | ||
139 | {VM_MAYSHARE, "mayshare" }, \ | ||
140 | {VM_GROWSDOWN, "growsdown" }, \ | ||
141 | {VM_PFNMAP, "pfnmap" }, \ | ||
142 | {VM_DENYWRITE, "denywrite" }, \ | ||
143 | {VM_LOCKONFAULT, "lockonfault" }, \ | ||
144 | {VM_LOCKED, "locked" }, \ | ||
145 | {VM_IO, "io" }, \ | ||
146 | {VM_SEQ_READ, "seqread" }, \ | ||
147 | {VM_RAND_READ, "randread" }, \ | ||
148 | {VM_DONTCOPY, "dontcopy" }, \ | ||
149 | {VM_DONTEXPAND, "dontexpand" }, \ | ||
150 | {VM_ACCOUNT, "account" }, \ | ||
151 | {VM_NORESERVE, "noreserve" }, \ | ||
152 | {VM_HUGETLB, "hugetlb" }, \ | ||
153 | __VM_ARCH_SPECIFIC , \ | ||
154 | {VM_DONTDUMP, "dontdump" }, \ | ||
155 | IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ | ||
156 | {VM_MIXEDMAP, "mixedmap" }, \ | ||
157 | {VM_HUGEPAGE, "hugepage" }, \ | ||
158 | {VM_NOHUGEPAGE, "nohugepage" }, \ | ||
159 | {VM_MERGEABLE, "mergeable" } \ | ||
160 | |||
161 | #define show_vma_flags(flags) \ | ||
162 | (flags) ? __print_flags(flags, "|", \ | ||
163 | __def_vmaflag_names \ | ||
164 | ) : "none" | ||
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 31763dd8db1c..0101ef37f1ee 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/memcontrol.h> | 10 | #include <linux/memcontrol.h> |
11 | #include <trace/events/gfpflags.h> | 11 | #include <trace/events/mmflags.h> |
12 | 12 | ||
13 | #define RECLAIM_WB_ANON 0x0001u | 13 | #define RECLAIM_WB_ANON 0x0001u |
14 | #define RECLAIM_WB_FILE 0x0002u | 14 | #define RECLAIM_WB_FILE 0x0002u |
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index bb991dfe134f..9175a1b4dc69 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h | |||
@@ -1,7 +1,4 @@ | |||
1 | /* -*- linux-c -*- ------------------------------------------------------- * | 1 | /* |
2 | * | ||
3 | * linux/include/linux/auto_fs.h | ||
4 | * | ||
5 | * Copyright 1997 Transmeta Corporation - All Rights Reserved | 2 | * Copyright 1997 Transmeta Corporation - All Rights Reserved |
6 | * | 3 | * |
7 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
@@ -51,7 +48,7 @@ struct autofs_packet_hdr { | |||
51 | 48 | ||
52 | struct autofs_packet_missing { | 49 | struct autofs_packet_missing { |
53 | struct autofs_packet_hdr hdr; | 50 | struct autofs_packet_hdr hdr; |
54 | autofs_wqt_t wait_queue_token; | 51 | autofs_wqt_t wait_queue_token; |
55 | int len; | 52 | int len; |
56 | char name[NAME_MAX+1]; | 53 | char name[NAME_MAX+1]; |
57 | }; | 54 | }; |
@@ -63,12 +60,12 @@ struct autofs_packet_expire { | |||
63 | char name[NAME_MAX+1]; | 60 | char name[NAME_MAX+1]; |
64 | }; | 61 | }; |
65 | 62 | ||
66 | #define AUTOFS_IOC_READY _IO(0x93,0x60) | 63 | #define AUTOFS_IOC_READY _IO(0x93, 0x60) |
67 | #define AUTOFS_IOC_FAIL _IO(0x93,0x61) | 64 | #define AUTOFS_IOC_FAIL _IO(0x93, 0x61) |
68 | #define AUTOFS_IOC_CATATONIC _IO(0x93,0x62) | 65 | #define AUTOFS_IOC_CATATONIC _IO(0x93, 0x62) |
69 | #define AUTOFS_IOC_PROTOVER _IOR(0x93,0x63,int) | 66 | #define AUTOFS_IOC_PROTOVER _IOR(0x93, 0x63, int) |
70 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,compat_ulong_t) | 67 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t) |
71 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93,0x64,unsigned long) | 68 | #define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long) |
72 | #define AUTOFS_IOC_EXPIRE _IOR(0x93,0x65,struct autofs_packet_expire) | 69 | #define AUTOFS_IOC_EXPIRE _IOR(0x93, 0x65, struct autofs_packet_expire) |
73 | 70 | ||
74 | #endif /* _UAPI_LINUX_AUTO_FS_H */ | 71 | #endif /* _UAPI_LINUX_AUTO_FS_H */ |
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index e02982fa2953..8f8f1bdcca8c 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h | |||
@@ -1,6 +1,4 @@ | |||
1 | /* -*- c -*- | 1 | /* |
2 | * linux/include/linux/auto_fs4.h | ||
3 | * | ||
4 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> | 2 | * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> |
5 | * | 3 | * |
6 | * This file is part of the Linux kernel and is made available under | 4 | * This file is part of the Linux kernel and is made available under |
@@ -38,7 +36,6 @@ | |||
38 | static inline void set_autofs_type_indirect(unsigned int *type) | 36 | static inline void set_autofs_type_indirect(unsigned int *type) |
39 | { | 37 | { |
40 | *type = AUTOFS_TYPE_INDIRECT; | 38 | *type = AUTOFS_TYPE_INDIRECT; |
41 | return; | ||
42 | } | 39 | } |
43 | 40 | ||
44 | static inline unsigned int autofs_type_indirect(unsigned int type) | 41 | static inline unsigned int autofs_type_indirect(unsigned int type) |
@@ -49,7 +46,6 @@ static inline unsigned int autofs_type_indirect(unsigned int type) | |||
49 | static inline void set_autofs_type_direct(unsigned int *type) | 46 | static inline void set_autofs_type_direct(unsigned int *type) |
50 | { | 47 | { |
51 | *type = AUTOFS_TYPE_DIRECT; | 48 | *type = AUTOFS_TYPE_DIRECT; |
52 | return; | ||
53 | } | 49 | } |
54 | 50 | ||
55 | static inline unsigned int autofs_type_direct(unsigned int type) | 51 | static inline unsigned int autofs_type_direct(unsigned int type) |
@@ -60,7 +56,6 @@ static inline unsigned int autofs_type_direct(unsigned int type) | |||
60 | static inline void set_autofs_type_offset(unsigned int *type) | 56 | static inline void set_autofs_type_offset(unsigned int *type) |
61 | { | 57 | { |
62 | *type = AUTOFS_TYPE_OFFSET; | 58 | *type = AUTOFS_TYPE_OFFSET; |
63 | return; | ||
64 | } | 59 | } |
65 | 60 | ||
66 | static inline unsigned int autofs_type_offset(unsigned int type) | 61 | static inline unsigned int autofs_type_offset(unsigned int type) |
@@ -81,7 +76,6 @@ static inline unsigned int autofs_type_trigger(unsigned int type) | |||
81 | static inline void set_autofs_type_any(unsigned int *type) | 76 | static inline void set_autofs_type_any(unsigned int *type) |
82 | { | 77 | { |
83 | *type = AUTOFS_TYPE_ANY; | 78 | *type = AUTOFS_TYPE_ANY; |
84 | return; | ||
85 | } | 79 | } |
86 | 80 | ||
87 | static inline unsigned int autofs_type_any(unsigned int type) | 81 | static inline unsigned int autofs_type_any(unsigned int type) |
@@ -114,7 +108,7 @@ enum autofs_notify { | |||
114 | /* v4 multi expire (via pipe) */ | 108 | /* v4 multi expire (via pipe) */ |
115 | struct autofs_packet_expire_multi { | 109 | struct autofs_packet_expire_multi { |
116 | struct autofs_packet_hdr hdr; | 110 | struct autofs_packet_hdr hdr; |
117 | autofs_wqt_t wait_queue_token; | 111 | autofs_wqt_t wait_queue_token; |
118 | int len; | 112 | int len; |
119 | char name[NAME_MAX+1]; | 113 | char name[NAME_MAX+1]; |
120 | }; | 114 | }; |
@@ -154,11 +148,10 @@ union autofs_v5_packet_union { | |||
154 | autofs_packet_expire_direct_t expire_direct; | 148 | autofs_packet_expire_direct_t expire_direct; |
155 | }; | 149 | }; |
156 | 150 | ||
157 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) | 151 | #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93, 0x66, int) |
158 | #define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI | 152 | #define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI |
159 | #define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI | 153 | #define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI |
160 | #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) | 154 | #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93, 0x67, int) |
161 | #define AUTOFS_IOC_ASKUMOUNT _IOR(0x93,0x70,int) | 155 | #define AUTOFS_IOC_ASKUMOUNT _IOR(0x93, 0x70, int) |
162 | |||
163 | 156 | ||
164 | #endif /* _LINUX_AUTO_FS4_H */ | 157 | #endif /* _LINUX_AUTO_FS4_H */ |
diff --git a/init/Kconfig b/init/Kconfig index 22320804fbaf..fd664b3ab99e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1420,6 +1420,28 @@ config KALLSYMS_ALL | |||
1420 | 1420 | ||
1421 | Say N unless you really need all symbols. | 1421 | Say N unless you really need all symbols. |
1422 | 1422 | ||
1423 | config KALLSYMS_ABSOLUTE_PERCPU | ||
1424 | bool | ||
1425 | default X86_64 && SMP | ||
1426 | |||
1427 | config KALLSYMS_BASE_RELATIVE | ||
1428 | bool | ||
1429 | depends on KALLSYMS | ||
1430 | default !IA64 && !(TILE && 64BIT) | ||
1431 | help | ||
1432 | Instead of emitting them as absolute values in the native word size, | ||
1433 | emit the symbol references in the kallsyms table as 32-bit entries, | ||
1434 | each containing a relative value in the range [base, base + U32_MAX] | ||
1435 | or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either | ||
1436 | an absolute value in the range [0, S32_MAX] or a relative value in the | ||
1437 | range [base, base + S32_MAX], where base is the lowest relative symbol | ||
1438 | address encountered in the image. | ||
1439 | |||
1440 | On 64-bit builds, this reduces the size of the address table by 50%, | ||
1441 | but more importantly, it results in entries whose values are build | ||
1442 | time constants, and no relocation pass is required at runtime to fix | ||
1443 | up the entries based on the runtime load address of the kernel. | ||
1444 | |||
1423 | config PRINTK | 1445 | config PRINTK |
1424 | default y | 1446 | default y |
1425 | bool "Enable support for printk" if EXPERT | 1447 | bool "Enable support for printk" if EXPERT |
diff --git a/init/main.c b/init/main.c index 8dc93df20f7f..b3c6e363ae18 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -705,7 +705,6 @@ static int __init initcall_blacklist(char *str) | |||
705 | 705 | ||
706 | static bool __init_or_module initcall_blacklisted(initcall_t fn) | 706 | static bool __init_or_module initcall_blacklisted(initcall_t fn) |
707 | { | 707 | { |
708 | struct list_head *tmp; | ||
709 | struct blacklist_entry *entry; | 708 | struct blacklist_entry *entry; |
710 | char *fn_name; | 709 | char *fn_name; |
711 | 710 | ||
@@ -713,8 +712,7 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) | |||
713 | if (!fn_name) | 712 | if (!fn_name) |
714 | return false; | 713 | return false; |
715 | 714 | ||
716 | list_for_each(tmp, &blacklisted_initcalls) { | 715 | list_for_each_entry(entry, &blacklisted_initcalls, next) { |
717 | entry = list_entry(tmp, struct blacklist_entry, next); | ||
718 | if (!strcmp(fn_name, entry->buf)) { | 716 | if (!strcmp(fn_name, entry->buf)) { |
719 | pr_debug("initcall %s blacklisted\n", fn_name); | 717 | pr_debug("initcall %s blacklisted\n", fn_name); |
720 | kfree(fn_name); | 718 | kfree(fn_name); |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 5c5987f10819..fafd1a3ef0da 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -38,6 +38,7 @@ | |||
38 | * during the second link stage. | 38 | * during the second link stage. |
39 | */ | 39 | */ |
40 | extern const unsigned long kallsyms_addresses[] __weak; | 40 | extern const unsigned long kallsyms_addresses[] __weak; |
41 | extern const int kallsyms_offsets[] __weak; | ||
41 | extern const u8 kallsyms_names[] __weak; | 42 | extern const u8 kallsyms_names[] __weak; |
42 | 43 | ||
43 | /* | 44 | /* |
@@ -47,6 +48,9 @@ extern const u8 kallsyms_names[] __weak; | |||
47 | extern const unsigned long kallsyms_num_syms | 48 | extern const unsigned long kallsyms_num_syms |
48 | __attribute__((weak, section(".rodata"))); | 49 | __attribute__((weak, section(".rodata"))); |
49 | 50 | ||
51 | extern const unsigned long kallsyms_relative_base | ||
52 | __attribute__((weak, section(".rodata"))); | ||
53 | |||
50 | extern const u8 kallsyms_token_table[] __weak; | 54 | extern const u8 kallsyms_token_table[] __weak; |
51 | extern const u16 kallsyms_token_index[] __weak; | 55 | extern const u16 kallsyms_token_index[] __weak; |
52 | 56 | ||
@@ -176,6 +180,23 @@ static unsigned int get_symbol_offset(unsigned long pos) | |||
176 | return name - kallsyms_names; | 180 | return name - kallsyms_names; |
177 | } | 181 | } |
178 | 182 | ||
183 | static unsigned long kallsyms_sym_address(int idx) | ||
184 | { | ||
185 | if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) | ||
186 | return kallsyms_addresses[idx]; | ||
187 | |||
188 | /* values are unsigned offsets if --absolute-percpu is not in effect */ | ||
189 | if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU)) | ||
190 | return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; | ||
191 | |||
192 | /* ...otherwise, positive offsets are absolute values */ | ||
193 | if (kallsyms_offsets[idx] >= 0) | ||
194 | return kallsyms_offsets[idx]; | ||
195 | |||
196 | /* ...and negative offsets are relative to kallsyms_relative_base - 1 */ | ||
197 | return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; | ||
198 | } | ||
199 | |||
179 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 200 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
180 | unsigned long kallsyms_lookup_name(const char *name) | 201 | unsigned long kallsyms_lookup_name(const char *name) |
181 | { | 202 | { |
@@ -187,7 +208,7 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
187 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); | 208 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); |
188 | 209 | ||
189 | if (strcmp(namebuf, name) == 0) | 210 | if (strcmp(namebuf, name) == 0) |
190 | return kallsyms_addresses[i]; | 211 | return kallsyms_sym_address(i); |
191 | } | 212 | } |
192 | return module_kallsyms_lookup_name(name); | 213 | return module_kallsyms_lookup_name(name); |
193 | } | 214 | } |
@@ -204,7 +225,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, | |||
204 | 225 | ||
205 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { | 226 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { |
206 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); | 227 | off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); |
207 | ret = fn(data, namebuf, NULL, kallsyms_addresses[i]); | 228 | ret = fn(data, namebuf, NULL, kallsyms_sym_address(i)); |
208 | if (ret != 0) | 229 | if (ret != 0) |
209 | return ret; | 230 | return ret; |
210 | } | 231 | } |
@@ -220,7 +241,10 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
220 | unsigned long i, low, high, mid; | 241 | unsigned long i, low, high, mid; |
221 | 242 | ||
222 | /* This kernel should never had been booted. */ | 243 | /* This kernel should never had been booted. */ |
223 | BUG_ON(!kallsyms_addresses); | 244 | if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) |
245 | BUG_ON(!kallsyms_addresses); | ||
246 | else | ||
247 | BUG_ON(!kallsyms_offsets); | ||
224 | 248 | ||
225 | /* Do a binary search on the sorted kallsyms_addresses array. */ | 249 | /* Do a binary search on the sorted kallsyms_addresses array. */ |
226 | low = 0; | 250 | low = 0; |
@@ -228,7 +252,7 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
228 | 252 | ||
229 | while (high - low > 1) { | 253 | while (high - low > 1) { |
230 | mid = low + (high - low) / 2; | 254 | mid = low + (high - low) / 2; |
231 | if (kallsyms_addresses[mid] <= addr) | 255 | if (kallsyms_sym_address(mid) <= addr) |
232 | low = mid; | 256 | low = mid; |
233 | else | 257 | else |
234 | high = mid; | 258 | high = mid; |
@@ -238,15 +262,15 @@ static unsigned long get_symbol_pos(unsigned long addr, | |||
238 | * Search for the first aliased symbol. Aliased | 262 | * Search for the first aliased symbol. Aliased |
239 | * symbols are symbols with the same address. | 263 | * symbols are symbols with the same address. |
240 | */ | 264 | */ |
241 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) | 265 | while (low && kallsyms_sym_address(low-1) == kallsyms_sym_address(low)) |
242 | --low; | 266 | --low; |
243 | 267 | ||
244 | symbol_start = kallsyms_addresses[low]; | 268 | symbol_start = kallsyms_sym_address(low); |
245 | 269 | ||
246 | /* Search for next non-aliased symbol. */ | 270 | /* Search for next non-aliased symbol. */ |
247 | for (i = low + 1; i < kallsyms_num_syms; i++) { | 271 | for (i = low + 1; i < kallsyms_num_syms; i++) { |
248 | if (kallsyms_addresses[i] > symbol_start) { | 272 | if (kallsyms_sym_address(i) > symbol_start) { |
249 | symbol_end = kallsyms_addresses[i]; | 273 | symbol_end = kallsyms_sym_address(i); |
250 | break; | 274 | break; |
251 | } | 275 | } |
252 | } | 276 | } |
@@ -470,7 +494,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) | |||
470 | unsigned off = iter->nameoff; | 494 | unsigned off = iter->nameoff; |
471 | 495 | ||
472 | iter->module_name[0] = '\0'; | 496 | iter->module_name[0] = '\0'; |
473 | iter->value = kallsyms_addresses[iter->pos]; | 497 | iter->value = kallsyms_sym_address(iter->pos); |
474 | 498 | ||
475 | iter->type = kallsyms_get_symbol_type(off); | 499 | iter->type = kallsyms_get_symbol_type(off); |
476 | 500 | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f894a2cd9b2a..53ab2f85d77e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -148,8 +148,7 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) | |||
148 | } | 148 | } |
149 | 149 | ||
150 | #ifdef CONFIG_LOCK_STAT | 150 | #ifdef CONFIG_LOCK_STAT |
151 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], | 151 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats); |
152 | cpu_lock_stats); | ||
153 | 152 | ||
154 | static inline u64 lockstat_clock(void) | 153 | static inline u64 lockstat_clock(void) |
155 | { | 154 | { |
diff --git a/kernel/memremap.c b/kernel/memremap.c index fb9b88787ebc..584febd13e2e 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
@@ -391,7 +391,7 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) | |||
391 | /* | 391 | /* |
392 | * 'memmap_start' is the virtual address for the first "struct | 392 | * 'memmap_start' is the virtual address for the first "struct |
393 | * page" in this range of the vmemmap array. In the case of | 393 | * page" in this range of the vmemmap array. In the case of |
394 | * CONFIG_SPARSE_VMEMMAP a page_to_pfn conversion is simple | 394 | * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple |
395 | * pointer arithmetic, so we can perform this to_vmem_altmap() | 395 | * pointer arithmetic, so we can perform this to_vmem_altmap() |
396 | * conversion without concern for the initialization state of | 396 | * conversion without concern for the initialization state of |
397 | * the struct page fields. | 397 | * the struct page fields. |
@@ -400,7 +400,7 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) | |||
400 | struct dev_pagemap *pgmap; | 400 | struct dev_pagemap *pgmap; |
401 | 401 | ||
402 | /* | 402 | /* |
403 | * Uncoditionally retrieve a dev_pagemap associated with the | 403 | * Unconditionally retrieve a dev_pagemap associated with the |
404 | * given physical address, this is only for use in the | 404 | * given physical address, this is only for use in the |
405 | * arch_{add|remove}_memory() for setting up and tearing down | 405 | * arch_{add|remove}_memory() for setting up and tearing down |
406 | * the memmap. | 406 | * the memmap. |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index b7342a24f559..aa0f26b58426 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -1158,6 +1158,22 @@ static int __init kaslr_nohibernate_setup(char *str) | |||
1158 | return nohibernate_setup(str); | 1158 | return nohibernate_setup(str); |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | static int __init page_poison_nohibernate_setup(char *str) | ||
1162 | { | ||
1163 | #ifdef CONFIG_PAGE_POISONING_ZERO | ||
1164 | /* | ||
1165 | * The zeroing option for page poison skips the checks on alloc. | ||
1166 | * since hibernation doesn't save free pages there's no way to | ||
1167 | * guarantee the pages will still be zeroed. | ||
1168 | */ | ||
1169 | if (!strcmp(str, "on")) { | ||
1170 | pr_info("Disabling hibernation due to page poisoning\n"); | ||
1171 | return nohibernate_setup(str); | ||
1172 | } | ||
1173 | #endif | ||
1174 | return 1; | ||
1175 | } | ||
1176 | |||
1161 | __setup("noresume", noresume_setup); | 1177 | __setup("noresume", noresume_setup); |
1162 | __setup("resume_offset=", resume_offset_setup); | 1178 | __setup("resume_offset=", resume_offset_setup); |
1163 | __setup("resume=", resume_setup); | 1179 | __setup("resume=", resume_setup); |
@@ -1166,3 +1182,4 @@ __setup("resumewait", resumewait_setup); | |||
1166 | __setup("resumedelay=", resumedelay_setup); | 1182 | __setup("resumedelay=", resumedelay_setup); |
1167 | __setup("nohibernate", nohibernate_setup); | 1183 | __setup("nohibernate", nohibernate_setup); |
1168 | __setup("kaslr", kaslr_nohibernate_setup); | 1184 | __setup("kaslr", kaslr_nohibernate_setup); |
1185 | __setup("page_poison=", page_poison_nohibernate_setup); | ||
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 65ae0e5c35da..250ea67c1615 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -130,10 +130,8 @@ static struct rcu_torture __rcu *rcu_torture_current; | |||
130 | static unsigned long rcu_torture_current_version; | 130 | static unsigned long rcu_torture_current_version; |
131 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 131 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
132 | static DEFINE_SPINLOCK(rcu_torture_lock); | 132 | static DEFINE_SPINLOCK(rcu_torture_lock); |
133 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], | 133 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 }; |
134 | rcu_torture_count) = { 0 }; | 134 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 }; |
135 | static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], | ||
136 | rcu_torture_batch) = { 0 }; | ||
137 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; | 135 | static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; |
138 | static atomic_t n_rcu_torture_alloc; | 136 | static atomic_t n_rcu_torture_alloc; |
139 | static atomic_t n_rcu_torture_alloc_fail; | 137 | static atomic_t n_rcu_torture_alloc_fail; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7ff5dc7d2ac5..16e13d8628a3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -320,8 +320,7 @@ static bool wq_debug_force_rr_cpu = false; | |||
320 | module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); | 320 | module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); |
321 | 321 | ||
322 | /* the per-cpu worker pools */ | 322 | /* the per-cpu worker pools */ |
323 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], | 323 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools); |
324 | cpu_worker_pools); | ||
325 | 324 | ||
326 | static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ | 325 | static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ |
327 | 326 | ||
diff --git a/lib/test_printf.c b/lib/test_printf.c index 4f6ae60433bc..563f10e6876a 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c | |||
@@ -17,6 +17,9 @@ | |||
17 | #include <linux/socket.h> | 17 | #include <linux/socket.h> |
18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
19 | 19 | ||
20 | #include <linux/gfp.h> | ||
21 | #include <linux/mm.h> | ||
22 | |||
20 | #define BUF_SIZE 256 | 23 | #define BUF_SIZE 256 |
21 | #define PAD_SIZE 16 | 24 | #define PAD_SIZE 16 |
22 | #define FILL_CHAR '$' | 25 | #define FILL_CHAR '$' |
@@ -411,6 +414,55 @@ netdev_features(void) | |||
411 | } | 414 | } |
412 | 415 | ||
413 | static void __init | 416 | static void __init |
417 | flags(void) | ||
418 | { | ||
419 | unsigned long flags; | ||
420 | gfp_t gfp; | ||
421 | char *cmp_buffer; | ||
422 | |||
423 | flags = 0; | ||
424 | test("", "%pGp", &flags); | ||
425 | |||
426 | /* Page flags should filter the zone id */ | ||
427 | flags = 1UL << NR_PAGEFLAGS; | ||
428 | test("", "%pGp", &flags); | ||
429 | |||
430 | flags |= 1UL << PG_uptodate | 1UL << PG_dirty | 1UL << PG_lru | ||
431 | | 1UL << PG_active | 1UL << PG_swapbacked; | ||
432 | test("uptodate|dirty|lru|active|swapbacked", "%pGp", &flags); | ||
433 | |||
434 | |||
435 | flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | ||
436 | | VM_DENYWRITE; | ||
437 | test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags); | ||
438 | |||
439 | gfp = GFP_TRANSHUGE; | ||
440 | test("GFP_TRANSHUGE", "%pGg", &gfp); | ||
441 | |||
442 | gfp = GFP_ATOMIC|__GFP_DMA; | ||
443 | test("GFP_ATOMIC|GFP_DMA", "%pGg", &gfp); | ||
444 | |||
445 | gfp = __GFP_ATOMIC; | ||
446 | test("__GFP_ATOMIC", "%pGg", &gfp); | ||
447 | |||
448 | cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); | ||
449 | if (!cmp_buffer) | ||
450 | return; | ||
451 | |||
452 | /* Any flags not translated by the table should remain numeric */ | ||
453 | gfp = ~__GFP_BITS_MASK; | ||
454 | snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp); | ||
455 | test(cmp_buffer, "%pGg", &gfp); | ||
456 | |||
457 | snprintf(cmp_buffer, BUF_SIZE, "__GFP_ATOMIC|%#lx", | ||
458 | (unsigned long) gfp); | ||
459 | gfp |= __GFP_ATOMIC; | ||
460 | test(cmp_buffer, "%pGg", &gfp); | ||
461 | |||
462 | kfree(cmp_buffer); | ||
463 | } | ||
464 | |||
465 | static void __init | ||
414 | test_pointer(void) | 466 | test_pointer(void) |
415 | { | 467 | { |
416 | plain(); | 468 | plain(); |
@@ -428,6 +480,7 @@ test_pointer(void) | |||
428 | struct_clk(); | 480 | struct_clk(); |
429 | bitmap(); | 481 | bitmap(); |
430 | netdev_features(); | 482 | netdev_features(); |
483 | flags(); | ||
431 | } | 484 | } |
432 | 485 | ||
433 | static int __init | 486 | static int __init |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f44e178e6ede..525c8e19bda2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <linux/blkdev.h> | 35 | #include <linux/blkdev.h> |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | #include "../mm/internal.h" /* For the trace_print_flags arrays */ | ||
39 | |||
38 | #include <asm/page.h> /* for PAGE_SIZE */ | 40 | #include <asm/page.h> /* for PAGE_SIZE */ |
39 | #include <asm/sections.h> /* for dereference_function_descriptor() */ | 41 | #include <asm/sections.h> /* for dereference_function_descriptor() */ |
40 | #include <asm/byteorder.h> /* cpu_to_le16 */ | 42 | #include <asm/byteorder.h> /* cpu_to_le16 */ |
@@ -1407,6 +1409,72 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, | |||
1407 | } | 1409 | } |
1408 | } | 1410 | } |
1409 | 1411 | ||
1412 | static | ||
1413 | char *format_flags(char *buf, char *end, unsigned long flags, | ||
1414 | const struct trace_print_flags *names) | ||
1415 | { | ||
1416 | unsigned long mask; | ||
1417 | const struct printf_spec strspec = { | ||
1418 | .field_width = -1, | ||
1419 | .precision = -1, | ||
1420 | }; | ||
1421 | const struct printf_spec numspec = { | ||
1422 | .flags = SPECIAL|SMALL, | ||
1423 | .field_width = -1, | ||
1424 | .precision = -1, | ||
1425 | .base = 16, | ||
1426 | }; | ||
1427 | |||
1428 | for ( ; flags && names->name; names++) { | ||
1429 | mask = names->mask; | ||
1430 | if ((flags & mask) != mask) | ||
1431 | continue; | ||
1432 | |||
1433 | buf = string(buf, end, names->name, strspec); | ||
1434 | |||
1435 | flags &= ~mask; | ||
1436 | if (flags) { | ||
1437 | if (buf < end) | ||
1438 | *buf = '|'; | ||
1439 | buf++; | ||
1440 | } | ||
1441 | } | ||
1442 | |||
1443 | if (flags) | ||
1444 | buf = number(buf, end, flags, numspec); | ||
1445 | |||
1446 | return buf; | ||
1447 | } | ||
1448 | |||
1449 | static noinline_for_stack | ||
1450 | char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) | ||
1451 | { | ||
1452 | unsigned long flags; | ||
1453 | const struct trace_print_flags *names; | ||
1454 | |||
1455 | switch (fmt[1]) { | ||
1456 | case 'p': | ||
1457 | flags = *(unsigned long *)flags_ptr; | ||
1458 | /* Remove zone id */ | ||
1459 | flags &= (1UL << NR_PAGEFLAGS) - 1; | ||
1460 | names = pageflag_names; | ||
1461 | break; | ||
1462 | case 'v': | ||
1463 | flags = *(unsigned long *)flags_ptr; | ||
1464 | names = vmaflag_names; | ||
1465 | break; | ||
1466 | case 'g': | ||
1467 | flags = *(gfp_t *)flags_ptr; | ||
1468 | names = gfpflag_names; | ||
1469 | break; | ||
1470 | default: | ||
1471 | WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]); | ||
1472 | return buf; | ||
1473 | } | ||
1474 | |||
1475 | return format_flags(buf, end, flags, names); | ||
1476 | } | ||
1477 | |||
1410 | int kptr_restrict __read_mostly; | 1478 | int kptr_restrict __read_mostly; |
1411 | 1479 | ||
1412 | /* | 1480 | /* |
@@ -1495,6 +1563,11 @@ int kptr_restrict __read_mostly; | |||
1495 | * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address | 1563 | * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address |
1496 | * (legacy clock framework) of the clock | 1564 | * (legacy clock framework) of the clock |
1497 | * - 'Cr' For a clock, it prints the current rate of the clock | 1565 | * - 'Cr' For a clock, it prints the current rate of the clock |
1566 | * - 'G' For flags to be printed as a collection of symbolic strings that would | ||
1567 | * construct the specific value. Supported flags given by option: | ||
1568 | * p page flags (see struct page) given as pointer to unsigned long | ||
1569 | * g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t | ||
1570 | * v vma flags (VM_*) given as pointer to unsigned long | ||
1498 | * | 1571 | * |
1499 | * ** Please update also Documentation/printk-formats.txt when making changes ** | 1572 | * ** Please update also Documentation/printk-formats.txt when making changes ** |
1500 | * | 1573 | * |
@@ -1648,6 +1721,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, | |||
1648 | return bdev_name(buf, end, ptr, spec, fmt); | 1721 | return bdev_name(buf, end, ptr, spec, fmt); |
1649 | #endif | 1722 | #endif |
1650 | 1723 | ||
1724 | case 'G': | ||
1725 | return flags_string(buf, end, ptr, fmt); | ||
1651 | } | 1726 | } |
1652 | spec.flags |= SMALL; | 1727 | spec.flags |= SMALL; |
1653 | if (spec.field_width == -1) { | 1728 | if (spec.field_width == -1) { |
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 957d3da53ddd..5c50b238b770 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
@@ -16,8 +16,8 @@ config DEBUG_PAGEALLOC | |||
16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC | 16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC |
17 | ---help--- | 17 | ---help--- |
18 | Unmap pages from the kernel linear mapping after free_pages(). | 18 | Unmap pages from the kernel linear mapping after free_pages(). |
19 | This results in a large slowdown, but helps to find certain types | 19 | Depending on runtime enablement, this results in a small or large |
20 | of memory corruption. | 20 | slowdown, but helps to find certain types of memory corruption. |
21 | 21 | ||
22 | For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC, | 22 | For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC, |
23 | fill the pages with poison patterns after free_pages() and verify | 23 | fill the pages with poison patterns after free_pages() and verify |
@@ -26,5 +26,56 @@ config DEBUG_PAGEALLOC | |||
26 | that would result in incorrect warnings of memory corruption after | 26 | that would result in incorrect warnings of memory corruption after |
27 | a resume because free pages are not saved to the suspend image. | 27 | a resume because free pages are not saved to the suspend image. |
28 | 28 | ||
29 | By default this option will have a small overhead, e.g. by not | ||
30 | allowing the kernel mapping to be backed by large pages on some | ||
31 | architectures. Even bigger overhead comes when the debugging is | ||
32 | enabled by DEBUG_PAGEALLOC_ENABLE_DEFAULT or the debug_pagealloc | ||
33 | command line parameter. | ||
34 | |||
35 | config DEBUG_PAGEALLOC_ENABLE_DEFAULT | ||
36 | bool "Enable debug page memory allocations by default?" | ||
37 | default n | ||
38 | depends on DEBUG_PAGEALLOC | ||
39 | ---help--- | ||
40 | Enable debug page memory allocations by default? This value | ||
41 | can be overridden by debug_pagealloc=off|on. | ||
42 | |||
29 | config PAGE_POISONING | 43 | config PAGE_POISONING |
30 | bool | 44 | bool "Poison pages after freeing" |
45 | select PAGE_EXTENSION | ||
46 | select PAGE_POISONING_NO_SANITY if HIBERNATION | ||
47 | ---help--- | ||
48 | Fill the pages with poison patterns after free_pages() and verify | ||
49 | the patterns before alloc_pages. The filling of the memory helps | ||
50 | reduce the risk of information leaks from freed data. This does | ||
51 | have a potential performance impact. | ||
52 | |||
53 | Note that "poison" here is not the same thing as the "HWPoison" | ||
54 | for CONFIG_MEMORY_FAILURE. This is software poisoning only. | ||
55 | |||
56 | If unsure, say N | ||
57 | |||
58 | config PAGE_POISONING_NO_SANITY | ||
59 | depends on PAGE_POISONING | ||
60 | bool "Only poison, don't sanity check" | ||
61 | ---help--- | ||
62 | Skip the sanity checking on alloc, only fill the pages with | ||
63 | poison on free. This reduces some of the overhead of the | ||
64 | poisoning feature. | ||
65 | |||
66 | If you are only interested in sanitization, say Y. Otherwise | ||
67 | say N. | ||
68 | |||
69 | config PAGE_POISONING_ZERO | ||
70 | bool "Use zero for poisoning instead of random data" | ||
71 | depends on PAGE_POISONING | ||
72 | ---help--- | ||
73 | Instead of using the existing poison value, fill the pages with | ||
74 | zeros. This makes it harder to detect when errors are occurring | ||
75 | due to sanitization but the zeroing at free means that it is | ||
76 | no longer necessary to write zeros when GFP_ZERO is used on | ||
77 | allocation. | ||
78 | |||
79 | Enabling page poisoning with this option will disable hibernation | ||
80 | |||
81 | If unsure, say N | ||
diff --git a/mm/Makefile b/mm/Makefile index 2ed43191fc3b..cfdd481d27a5 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -48,7 +48,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o | |||
48 | obj-$(CONFIG_SLOB) += slob.o | 48 | obj-$(CONFIG_SLOB) += slob.o |
49 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | 49 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o |
50 | obj-$(CONFIG_KSM) += ksm.o | 50 | obj-$(CONFIG_KSM) += ksm.o |
51 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 51 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o |
52 | obj-$(CONFIG_SLAB) += slab.o | 52 | obj-$(CONFIG_SLAB) += slab.o |
53 | obj-$(CONFIG_SLUB) += slub.o | 53 | obj-$(CONFIG_SLUB) += slub.o |
54 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | 54 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o |
diff --git a/mm/compaction.c b/mm/compaction.c index 585de54dbe8c..93f71d968098 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype) | |||
71 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; | 71 | return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; |
72 | } | 72 | } |
73 | 73 | ||
74 | /* | ||
75 | * Check that the whole (or subset of) a pageblock given by the interval of | ||
76 | * [start_pfn, end_pfn) is valid and within the same zone, before scanning it | ||
77 | * with the migration of free compaction scanner. The scanners then need to | ||
78 | * use only pfn_valid_within() check for arches that allow holes within | ||
79 | * pageblocks. | ||
80 | * | ||
81 | * Return struct page pointer of start_pfn, or NULL if checks were not passed. | ||
82 | * | ||
83 | * It's possible on some configurations to have a setup like node0 node1 node0 | ||
84 | * i.e. it's possible that all pages within a zones range of pages do not | ||
85 | * belong to a single zone. We assume that a border between node0 and node1 | ||
86 | * can occur within a single pageblock, but not a node0 node1 node0 | ||
87 | * interleaving within a single pageblock. It is therefore sufficient to check | ||
88 | * the first and last page of a pageblock and avoid checking each individual | ||
89 | * page in a pageblock. | ||
90 | */ | ||
91 | static struct page *pageblock_pfn_to_page(unsigned long start_pfn, | ||
92 | unsigned long end_pfn, struct zone *zone) | ||
93 | { | ||
94 | struct page *start_page; | ||
95 | struct page *end_page; | ||
96 | |||
97 | /* end_pfn is one past the range we are checking */ | ||
98 | end_pfn--; | ||
99 | |||
100 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ||
101 | return NULL; | ||
102 | |||
103 | start_page = pfn_to_page(start_pfn); | ||
104 | |||
105 | if (page_zone(start_page) != zone) | ||
106 | return NULL; | ||
107 | |||
108 | end_page = pfn_to_page(end_pfn); | ||
109 | |||
110 | /* This gives a shorter code than deriving page_zone(end_page) */ | ||
111 | if (page_zone_id(start_page) != page_zone_id(end_page)) | ||
112 | return NULL; | ||
113 | |||
114 | return start_page; | ||
115 | } | ||
116 | |||
117 | #ifdef CONFIG_COMPACTION | 74 | #ifdef CONFIG_COMPACTION |
118 | 75 | ||
119 | /* Do not skip compaction more than 64 times */ | 76 | /* Do not skip compaction more than 64 times */ |
@@ -200,7 +157,8 @@ static void reset_cached_positions(struct zone *zone) | |||
200 | { | 157 | { |
201 | zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; | 158 | zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; |
202 | zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; | 159 | zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; |
203 | zone->compact_cached_free_pfn = zone_end_pfn(zone); | 160 | zone->compact_cached_free_pfn = |
161 | round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages); | ||
204 | } | 162 | } |
205 | 163 | ||
206 | /* | 164 | /* |
@@ -554,13 +512,17 @@ unsigned long | |||
554 | isolate_freepages_range(struct compact_control *cc, | 512 | isolate_freepages_range(struct compact_control *cc, |
555 | unsigned long start_pfn, unsigned long end_pfn) | 513 | unsigned long start_pfn, unsigned long end_pfn) |
556 | { | 514 | { |
557 | unsigned long isolated, pfn, block_end_pfn; | 515 | unsigned long isolated, pfn, block_start_pfn, block_end_pfn; |
558 | LIST_HEAD(freelist); | 516 | LIST_HEAD(freelist); |
559 | 517 | ||
560 | pfn = start_pfn; | 518 | pfn = start_pfn; |
519 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
520 | if (block_start_pfn < cc->zone->zone_start_pfn) | ||
521 | block_start_pfn = cc->zone->zone_start_pfn; | ||
561 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 522 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
562 | 523 | ||
563 | for (; pfn < end_pfn; pfn += isolated, | 524 | for (; pfn < end_pfn; pfn += isolated, |
525 | block_start_pfn = block_end_pfn, | ||
564 | block_end_pfn += pageblock_nr_pages) { | 526 | block_end_pfn += pageblock_nr_pages) { |
565 | /* Protect pfn from changing by isolate_freepages_block */ | 527 | /* Protect pfn from changing by isolate_freepages_block */ |
566 | unsigned long isolate_start_pfn = pfn; | 528 | unsigned long isolate_start_pfn = pfn; |
@@ -573,11 +535,13 @@ isolate_freepages_range(struct compact_control *cc, | |||
573 | * scanning range to right one. | 535 | * scanning range to right one. |
574 | */ | 536 | */ |
575 | if (pfn >= block_end_pfn) { | 537 | if (pfn >= block_end_pfn) { |
538 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
576 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 539 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
577 | block_end_pfn = min(block_end_pfn, end_pfn); | 540 | block_end_pfn = min(block_end_pfn, end_pfn); |
578 | } | 541 | } |
579 | 542 | ||
580 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 543 | if (!pageblock_pfn_to_page(block_start_pfn, |
544 | block_end_pfn, cc->zone)) | ||
581 | break; | 545 | break; |
582 | 546 | ||
583 | isolated = isolate_freepages_block(cc, &isolate_start_pfn, | 547 | isolated = isolate_freepages_block(cc, &isolate_start_pfn, |
@@ -863,18 +827,23 @@ unsigned long | |||
863 | isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, | 827 | isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, |
864 | unsigned long end_pfn) | 828 | unsigned long end_pfn) |
865 | { | 829 | { |
866 | unsigned long pfn, block_end_pfn; | 830 | unsigned long pfn, block_start_pfn, block_end_pfn; |
867 | 831 | ||
868 | /* Scan block by block. First and last block may be incomplete */ | 832 | /* Scan block by block. First and last block may be incomplete */ |
869 | pfn = start_pfn; | 833 | pfn = start_pfn; |
834 | block_start_pfn = pfn & ~(pageblock_nr_pages - 1); | ||
835 | if (block_start_pfn < cc->zone->zone_start_pfn) | ||
836 | block_start_pfn = cc->zone->zone_start_pfn; | ||
870 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); | 837 | block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); |
871 | 838 | ||
872 | for (; pfn < end_pfn; pfn = block_end_pfn, | 839 | for (; pfn < end_pfn; pfn = block_end_pfn, |
840 | block_start_pfn = block_end_pfn, | ||
873 | block_end_pfn += pageblock_nr_pages) { | 841 | block_end_pfn += pageblock_nr_pages) { |
874 | 842 | ||
875 | block_end_pfn = min(block_end_pfn, end_pfn); | 843 | block_end_pfn = min(block_end_pfn, end_pfn); |
876 | 844 | ||
877 | if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) | 845 | if (!pageblock_pfn_to_page(block_start_pfn, |
846 | block_end_pfn, cc->zone)) | ||
878 | continue; | 847 | continue; |
879 | 848 | ||
880 | pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, | 849 | pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, |
@@ -1103,7 +1072,9 @@ int sysctl_compact_unevictable_allowed __read_mostly = 1; | |||
1103 | static isolate_migrate_t isolate_migratepages(struct zone *zone, | 1072 | static isolate_migrate_t isolate_migratepages(struct zone *zone, |
1104 | struct compact_control *cc) | 1073 | struct compact_control *cc) |
1105 | { | 1074 | { |
1106 | unsigned long low_pfn, end_pfn; | 1075 | unsigned long block_start_pfn; |
1076 | unsigned long block_end_pfn; | ||
1077 | unsigned long low_pfn; | ||
1107 | unsigned long isolate_start_pfn; | 1078 | unsigned long isolate_start_pfn; |
1108 | struct page *page; | 1079 | struct page *page; |
1109 | const isolate_mode_t isolate_mode = | 1080 | const isolate_mode_t isolate_mode = |
@@ -1115,16 +1086,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1115 | * initialized by compact_zone() | 1086 | * initialized by compact_zone() |
1116 | */ | 1087 | */ |
1117 | low_pfn = cc->migrate_pfn; | 1088 | low_pfn = cc->migrate_pfn; |
1089 | block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1); | ||
1090 | if (block_start_pfn < zone->zone_start_pfn) | ||
1091 | block_start_pfn = zone->zone_start_pfn; | ||
1118 | 1092 | ||
1119 | /* Only scan within a pageblock boundary */ | 1093 | /* Only scan within a pageblock boundary */ |
1120 | end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); | 1094 | block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); |
1121 | 1095 | ||
1122 | /* | 1096 | /* |
1123 | * Iterate over whole pageblocks until we find the first suitable. | 1097 | * Iterate over whole pageblocks until we find the first suitable. |
1124 | * Do not cross the free scanner. | 1098 | * Do not cross the free scanner. |
1125 | */ | 1099 | */ |
1126 | for (; end_pfn <= cc->free_pfn; | 1100 | for (; block_end_pfn <= cc->free_pfn; |
1127 | low_pfn = end_pfn, end_pfn += pageblock_nr_pages) { | 1101 | low_pfn = block_end_pfn, |
1102 | block_start_pfn = block_end_pfn, | ||
1103 | block_end_pfn += pageblock_nr_pages) { | ||
1128 | 1104 | ||
1129 | /* | 1105 | /* |
1130 | * This can potentially iterate a massively long zone with | 1106 | * This can potentially iterate a massively long zone with |
@@ -1135,7 +1111,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1135 | && compact_should_abort(cc)) | 1111 | && compact_should_abort(cc)) |
1136 | break; | 1112 | break; |
1137 | 1113 | ||
1138 | page = pageblock_pfn_to_page(low_pfn, end_pfn, zone); | 1114 | page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, |
1115 | zone); | ||
1139 | if (!page) | 1116 | if (!page) |
1140 | continue; | 1117 | continue; |
1141 | 1118 | ||
@@ -1154,8 +1131,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, | |||
1154 | 1131 | ||
1155 | /* Perform the isolation */ | 1132 | /* Perform the isolation */ |
1156 | isolate_start_pfn = low_pfn; | 1133 | isolate_start_pfn = low_pfn; |
1157 | low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, | 1134 | low_pfn = isolate_migratepages_block(cc, low_pfn, |
1158 | isolate_mode); | 1135 | block_end_pfn, isolate_mode); |
1159 | 1136 | ||
1160 | if (!low_pfn || cc->contended) { | 1137 | if (!low_pfn || cc->contended) { |
1161 | acct_isolated(zone, cc); | 1138 | acct_isolated(zone, cc); |
@@ -1371,11 +1348,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
1371 | */ | 1348 | */ |
1372 | cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; | 1349 | cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; |
1373 | cc->free_pfn = zone->compact_cached_free_pfn; | 1350 | cc->free_pfn = zone->compact_cached_free_pfn; |
1374 | if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) { | 1351 | if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { |
1375 | cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1); | 1352 | cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages); |
1376 | zone->compact_cached_free_pfn = cc->free_pfn; | 1353 | zone->compact_cached_free_pfn = cc->free_pfn; |
1377 | } | 1354 | } |
1378 | if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) { | 1355 | if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { |
1379 | cc->migrate_pfn = start_pfn; | 1356 | cc->migrate_pfn = start_pfn; |
1380 | zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; | 1357 | zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; |
1381 | zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; | 1358 | zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; |
diff --git a/mm/debug.c b/mm/debug.c index f05b2d5d6481..df7247b0b532 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -9,75 +9,38 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/trace_events.h> | 10 | #include <linux/trace_events.h> |
11 | #include <linux/memcontrol.h> | 11 | #include <linux/memcontrol.h> |
12 | 12 | #include <trace/events/mmflags.h> | |
13 | static const struct trace_print_flags pageflag_names[] = { | 13 | #include <linux/migrate.h> |
14 | {1UL << PG_locked, "locked" }, | 14 | #include <linux/page_owner.h> |
15 | {1UL << PG_error, "error" }, | 15 | |
16 | {1UL << PG_referenced, "referenced" }, | 16 | #include "internal.h" |
17 | {1UL << PG_uptodate, "uptodate" }, | 17 | |
18 | {1UL << PG_dirty, "dirty" }, | 18 | char *migrate_reason_names[MR_TYPES] = { |
19 | {1UL << PG_lru, "lru" }, | 19 | "compaction", |
20 | {1UL << PG_active, "active" }, | 20 | "memory_failure", |
21 | {1UL << PG_slab, "slab" }, | 21 | "memory_hotplug", |
22 | {1UL << PG_owner_priv_1, "owner_priv_1" }, | 22 | "syscall_or_cpuset", |
23 | {1UL << PG_arch_1, "arch_1" }, | 23 | "mempolicy_mbind", |
24 | {1UL << PG_reserved, "reserved" }, | 24 | "numa_misplaced", |
25 | {1UL << PG_private, "private" }, | 25 | "cma", |
26 | {1UL << PG_private_2, "private_2" }, | ||
27 | {1UL << PG_writeback, "writeback" }, | ||
28 | {1UL << PG_head, "head" }, | ||
29 | {1UL << PG_swapcache, "swapcache" }, | ||
30 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | ||
31 | {1UL << PG_reclaim, "reclaim" }, | ||
32 | {1UL << PG_swapbacked, "swapbacked" }, | ||
33 | {1UL << PG_unevictable, "unevictable" }, | ||
34 | #ifdef CONFIG_MMU | ||
35 | {1UL << PG_mlocked, "mlocked" }, | ||
36 | #endif | ||
37 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | ||
38 | {1UL << PG_uncached, "uncached" }, | ||
39 | #endif | ||
40 | #ifdef CONFIG_MEMORY_FAILURE | ||
41 | {1UL << PG_hwpoison, "hwpoison" }, | ||
42 | #endif | ||
43 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
44 | {1UL << PG_young, "young" }, | ||
45 | {1UL << PG_idle, "idle" }, | ||
46 | #endif | ||
47 | }; | 26 | }; |
48 | 27 | ||
49 | static void dump_flags(unsigned long flags, | 28 | const struct trace_print_flags pageflag_names[] = { |
50 | const struct trace_print_flags *names, int count) | 29 | __def_pageflag_names, |
51 | { | 30 | {0, NULL} |
52 | const char *delim = ""; | 31 | }; |
53 | unsigned long mask; | ||
54 | int i; | ||
55 | |||
56 | pr_emerg("flags: %#lx(", flags); | ||
57 | |||
58 | /* remove zone id */ | ||
59 | flags &= (1UL << NR_PAGEFLAGS) - 1; | ||
60 | |||
61 | for (i = 0; i < count && flags; i++) { | ||
62 | |||
63 | mask = names[i].mask; | ||
64 | if ((flags & mask) != mask) | ||
65 | continue; | ||
66 | |||
67 | flags &= ~mask; | ||
68 | pr_cont("%s%s", delim, names[i].name); | ||
69 | delim = "|"; | ||
70 | } | ||
71 | 32 | ||
72 | /* check for left over flags */ | 33 | const struct trace_print_flags gfpflag_names[] = { |
73 | if (flags) | 34 | __def_gfpflag_names, |
74 | pr_cont("%s%#lx", delim, flags); | 35 | {0, NULL} |
36 | }; | ||
75 | 37 | ||
76 | pr_cont(")\n"); | 38 | const struct trace_print_flags vmaflag_names[] = { |
77 | } | 39 | __def_vmaflag_names, |
40 | {0, NULL} | ||
41 | }; | ||
78 | 42 | ||
79 | void dump_page_badflags(struct page *page, const char *reason, | 43 | void __dump_page(struct page *page, const char *reason) |
80 | unsigned long badflags) | ||
81 | { | 44 | { |
82 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", | 45 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", |
83 | page, atomic_read(&page->_count), page_mapcount(page), | 46 | page, atomic_read(&page->_count), page_mapcount(page), |
@@ -85,15 +48,13 @@ void dump_page_badflags(struct page *page, const char *reason, | |||
85 | if (PageCompound(page)) | 48 | if (PageCompound(page)) |
86 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); | 49 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); |
87 | pr_cont("\n"); | 50 | pr_cont("\n"); |
88 | BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS); | 51 | BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); |
89 | dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names)); | 52 | |
53 | pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); | ||
54 | |||
90 | if (reason) | 55 | if (reason) |
91 | pr_alert("page dumped because: %s\n", reason); | 56 | pr_alert("page dumped because: %s\n", reason); |
92 | if (page->flags & badflags) { | 57 | |
93 | pr_alert("bad because of flags:\n"); | ||
94 | dump_flags(page->flags & badflags, | ||
95 | pageflag_names, ARRAY_SIZE(pageflag_names)); | ||
96 | } | ||
97 | #ifdef CONFIG_MEMCG | 58 | #ifdef CONFIG_MEMCG |
98 | if (page->mem_cgroup) | 59 | if (page->mem_cgroup) |
99 | pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); | 60 | pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); |
@@ -102,67 +63,26 @@ void dump_page_badflags(struct page *page, const char *reason, | |||
102 | 63 | ||
103 | void dump_page(struct page *page, const char *reason) | 64 | void dump_page(struct page *page, const char *reason) |
104 | { | 65 | { |
105 | dump_page_badflags(page, reason, 0); | 66 | __dump_page(page, reason); |
67 | dump_page_owner(page); | ||
106 | } | 68 | } |
107 | EXPORT_SYMBOL(dump_page); | 69 | EXPORT_SYMBOL(dump_page); |
108 | 70 | ||
109 | #ifdef CONFIG_DEBUG_VM | 71 | #ifdef CONFIG_DEBUG_VM |
110 | 72 | ||
111 | static const struct trace_print_flags vmaflags_names[] = { | ||
112 | {VM_READ, "read" }, | ||
113 | {VM_WRITE, "write" }, | ||
114 | {VM_EXEC, "exec" }, | ||
115 | {VM_SHARED, "shared" }, | ||
116 | {VM_MAYREAD, "mayread" }, | ||
117 | {VM_MAYWRITE, "maywrite" }, | ||
118 | {VM_MAYEXEC, "mayexec" }, | ||
119 | {VM_MAYSHARE, "mayshare" }, | ||
120 | {VM_GROWSDOWN, "growsdown" }, | ||
121 | {VM_PFNMAP, "pfnmap" }, | ||
122 | {VM_DENYWRITE, "denywrite" }, | ||
123 | {VM_LOCKONFAULT, "lockonfault" }, | ||
124 | {VM_LOCKED, "locked" }, | ||
125 | {VM_IO, "io" }, | ||
126 | {VM_SEQ_READ, "seqread" }, | ||
127 | {VM_RAND_READ, "randread" }, | ||
128 | {VM_DONTCOPY, "dontcopy" }, | ||
129 | {VM_DONTEXPAND, "dontexpand" }, | ||
130 | {VM_ACCOUNT, "account" }, | ||
131 | {VM_NORESERVE, "noreserve" }, | ||
132 | {VM_HUGETLB, "hugetlb" }, | ||
133 | #if defined(CONFIG_X86) | ||
134 | {VM_PAT, "pat" }, | ||
135 | #elif defined(CONFIG_PPC) | ||
136 | {VM_SAO, "sao" }, | ||
137 | #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) | ||
138 | {VM_GROWSUP, "growsup" }, | ||
139 | #elif !defined(CONFIG_MMU) | ||
140 | {VM_MAPPED_COPY, "mappedcopy" }, | ||
141 | #else | ||
142 | {VM_ARCH_1, "arch_1" }, | ||
143 | #endif | ||
144 | {VM_DONTDUMP, "dontdump" }, | ||
145 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
146 | {VM_SOFTDIRTY, "softdirty" }, | ||
147 | #endif | ||
148 | {VM_MIXEDMAP, "mixedmap" }, | ||
149 | {VM_HUGEPAGE, "hugepage" }, | ||
150 | {VM_NOHUGEPAGE, "nohugepage" }, | ||
151 | {VM_MERGEABLE, "mergeable" }, | ||
152 | }; | ||
153 | |||
154 | void dump_vma(const struct vm_area_struct *vma) | 73 | void dump_vma(const struct vm_area_struct *vma) |
155 | { | 74 | { |
156 | pr_emerg("vma %p start %p end %p\n" | 75 | pr_emerg("vma %p start %p end %p\n" |
157 | "next %p prev %p mm %p\n" | 76 | "next %p prev %p mm %p\n" |
158 | "prot %lx anon_vma %p vm_ops %p\n" | 77 | "prot %lx anon_vma %p vm_ops %p\n" |
159 | "pgoff %lx file %p private_data %p\n", | 78 | "pgoff %lx file %p private_data %p\n" |
79 | "flags: %#lx(%pGv)\n", | ||
160 | vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, | 80 | vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, |
161 | vma->vm_prev, vma->vm_mm, | 81 | vma->vm_prev, vma->vm_mm, |
162 | (unsigned long)pgprot_val(vma->vm_page_prot), | 82 | (unsigned long)pgprot_val(vma->vm_page_prot), |
163 | vma->anon_vma, vma->vm_ops, vma->vm_pgoff, | 83 | vma->anon_vma, vma->vm_ops, vma->vm_pgoff, |
164 | vma->vm_file, vma->vm_private_data); | 84 | vma->vm_file, vma->vm_private_data, |
165 | dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names)); | 85 | vma->vm_flags, &vma->vm_flags); |
166 | } | 86 | } |
167 | EXPORT_SYMBOL(dump_vma); | 87 | EXPORT_SYMBOL(dump_vma); |
168 | 88 | ||
@@ -196,7 +116,7 @@ void dump_mm(const struct mm_struct *mm) | |||
196 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) | 116 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) |
197 | "tlb_flush_pending %d\n" | 117 | "tlb_flush_pending %d\n" |
198 | #endif | 118 | #endif |
199 | "%s", /* This is here to hold the comma */ | 119 | "def_flags: %#lx(%pGv)\n", |
200 | 120 | ||
201 | mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, | 121 | mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, |
202 | #ifdef CONFIG_MMU | 122 | #ifdef CONFIG_MMU |
@@ -230,11 +150,8 @@ void dump_mm(const struct mm_struct *mm) | |||
230 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) | 150 | #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) |
231 | mm->tlb_flush_pending, | 151 | mm->tlb_flush_pending, |
232 | #endif | 152 | #endif |
233 | "" /* This is here to not have a comma! */ | 153 | mm->def_flags, &mm->def_flags |
234 | ); | 154 | ); |
235 | |||
236 | dump_flags(mm->def_flags, vmaflags_names, | ||
237 | ARRAY_SIZE(vmaflags_names)); | ||
238 | } | 155 | } |
239 | 156 | ||
240 | #endif /* CONFIG_DEBUG_VM */ | 157 | #endif /* CONFIG_DEBUG_VM */ |
diff --git a/mm/failslab.c b/mm/failslab.c index 79171b4a5826..b0fac98cd938 100644 --- a/mm/failslab.c +++ b/mm/failslab.c | |||
@@ -1,5 +1,7 @@ | |||
1 | #include <linux/fault-inject.h> | 1 | #include <linux/fault-inject.h> |
2 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
3 | #include <linux/mm.h> | ||
4 | #include "slab.h" | ||
3 | 5 | ||
4 | static struct { | 6 | static struct { |
5 | struct fault_attr attr; | 7 | struct fault_attr attr; |
@@ -11,18 +13,22 @@ static struct { | |||
11 | .cache_filter = false, | 13 | .cache_filter = false, |
12 | }; | 14 | }; |
13 | 15 | ||
14 | bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) | 16 | bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
15 | { | 17 | { |
18 | /* No fault-injection for bootstrap cache */ | ||
19 | if (unlikely(s == kmem_cache)) | ||
20 | return false; | ||
21 | |||
16 | if (gfpflags & __GFP_NOFAIL) | 22 | if (gfpflags & __GFP_NOFAIL) |
17 | return false; | 23 | return false; |
18 | 24 | ||
19 | if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) | 25 | if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) |
20 | return false; | 26 | return false; |
21 | 27 | ||
22 | if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) | 28 | if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) |
23 | return false; | 29 | return false; |
24 | 30 | ||
25 | return should_fail(&failslab.attr, size); | 31 | return should_fail(&failslab.attr, s->object_size); |
26 | } | 32 | } |
27 | 33 | ||
28 | static int __init setup_failslab(char *str) | 34 | static int __init setup_failslab(char *str) |
diff --git a/mm/filemap.c b/mm/filemap.c index da7a35d83de7..61b441b191ad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -101,7 +101,7 @@ | |||
101 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 101 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
102 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) | 102 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) |
103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) | 103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
104 | * ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat) | 104 | * ->memcg->move_lock (page_remove_rmap->lock_page_memcg) |
105 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) | 105 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) |
106 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | 106 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
107 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 107 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
@@ -176,11 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
176 | /* | 176 | /* |
177 | * Delete a page from the page cache and free it. Caller has to make | 177 | * Delete a page from the page cache and free it. Caller has to make |
178 | * sure the page is locked and that nobody else uses it - or that usage | 178 | * sure the page is locked and that nobody else uses it - or that usage |
179 | * is safe. The caller must hold the mapping's tree_lock and | 179 | * is safe. The caller must hold the mapping's tree_lock. |
180 | * mem_cgroup_begin_page_stat(). | ||
181 | */ | 180 | */ |
182 | void __delete_from_page_cache(struct page *page, void *shadow, | 181 | void __delete_from_page_cache(struct page *page, void *shadow) |
183 | struct mem_cgroup *memcg) | ||
184 | { | 182 | { |
185 | struct address_space *mapping = page->mapping; | 183 | struct address_space *mapping = page->mapping; |
186 | 184 | ||
@@ -239,8 +237,7 @@ void __delete_from_page_cache(struct page *page, void *shadow, | |||
239 | * anyway will be cleared before returning page into buddy allocator. | 237 | * anyway will be cleared before returning page into buddy allocator. |
240 | */ | 238 | */ |
241 | if (WARN_ON_ONCE(PageDirty(page))) | 239 | if (WARN_ON_ONCE(PageDirty(page))) |
242 | account_page_cleaned(page, mapping, memcg, | 240 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); |
243 | inode_to_wb(mapping->host)); | ||
244 | } | 241 | } |
245 | 242 | ||
246 | /** | 243 | /** |
@@ -254,7 +251,6 @@ void __delete_from_page_cache(struct page *page, void *shadow, | |||
254 | void delete_from_page_cache(struct page *page) | 251 | void delete_from_page_cache(struct page *page) |
255 | { | 252 | { |
256 | struct address_space *mapping = page->mapping; | 253 | struct address_space *mapping = page->mapping; |
257 | struct mem_cgroup *memcg; | ||
258 | unsigned long flags; | 254 | unsigned long flags; |
259 | 255 | ||
260 | void (*freepage)(struct page *); | 256 | void (*freepage)(struct page *); |
@@ -263,11 +259,9 @@ void delete_from_page_cache(struct page *page) | |||
263 | 259 | ||
264 | freepage = mapping->a_ops->freepage; | 260 | freepage = mapping->a_ops->freepage; |
265 | 261 | ||
266 | memcg = mem_cgroup_begin_page_stat(page); | ||
267 | spin_lock_irqsave(&mapping->tree_lock, flags); | 262 | spin_lock_irqsave(&mapping->tree_lock, flags); |
268 | __delete_from_page_cache(page, NULL, memcg); | 263 | __delete_from_page_cache(page, NULL); |
269 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 264 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
270 | mem_cgroup_end_page_stat(memcg); | ||
271 | 265 | ||
272 | if (freepage) | 266 | if (freepage) |
273 | freepage(page); | 267 | freepage(page); |
@@ -551,7 +545,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
551 | if (!error) { | 545 | if (!error) { |
552 | struct address_space *mapping = old->mapping; | 546 | struct address_space *mapping = old->mapping; |
553 | void (*freepage)(struct page *); | 547 | void (*freepage)(struct page *); |
554 | struct mem_cgroup *memcg; | ||
555 | unsigned long flags; | 548 | unsigned long flags; |
556 | 549 | ||
557 | pgoff_t offset = old->index; | 550 | pgoff_t offset = old->index; |
@@ -561,9 +554,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
561 | new->mapping = mapping; | 554 | new->mapping = mapping; |
562 | new->index = offset; | 555 | new->index = offset; |
563 | 556 | ||
564 | memcg = mem_cgroup_begin_page_stat(old); | ||
565 | spin_lock_irqsave(&mapping->tree_lock, flags); | 557 | spin_lock_irqsave(&mapping->tree_lock, flags); |
566 | __delete_from_page_cache(old, NULL, memcg); | 558 | __delete_from_page_cache(old, NULL); |
567 | error = radix_tree_insert(&mapping->page_tree, offset, new); | 559 | error = radix_tree_insert(&mapping->page_tree, offset, new); |
568 | BUG_ON(error); | 560 | BUG_ON(error); |
569 | mapping->nrpages++; | 561 | mapping->nrpages++; |
@@ -576,8 +568,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
576 | if (PageSwapBacked(new)) | 568 | if (PageSwapBacked(new)) |
577 | __inc_zone_page_state(new, NR_SHMEM); | 569 | __inc_zone_page_state(new, NR_SHMEM); |
578 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 570 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
579 | mem_cgroup_end_page_stat(memcg); | 571 | mem_cgroup_migrate(old, new); |
580 | mem_cgroup_replace_page(old, new); | ||
581 | radix_tree_preload_end(); | 572 | radix_tree_preload_end(); |
582 | if (freepage) | 573 | if (freepage) |
583 | freepage(old); | 574 | freepage(old); |
@@ -1668,6 +1659,15 @@ find_page: | |||
1668 | index, last_index - index); | 1659 | index, last_index - index); |
1669 | } | 1660 | } |
1670 | if (!PageUptodate(page)) { | 1661 | if (!PageUptodate(page)) { |
1662 | /* | ||
1663 | * See comment in do_read_cache_page on why | ||
1664 | * wait_on_page_locked is used to avoid unnecessarily | ||
1665 | * serialisations and why it's safe. | ||
1666 | */ | ||
1667 | wait_on_page_locked_killable(page); | ||
1668 | if (PageUptodate(page)) | ||
1669 | goto page_ok; | ||
1670 | |||
1671 | if (inode->i_blkbits == PAGE_CACHE_SHIFT || | 1671 | if (inode->i_blkbits == PAGE_CACHE_SHIFT || |
1672 | !mapping->a_ops->is_partially_uptodate) | 1672 | !mapping->a_ops->is_partially_uptodate) |
1673 | goto page_not_up_to_date; | 1673 | goto page_not_up_to_date; |
@@ -2303,7 +2303,7 @@ static struct page *wait_on_page_read(struct page *page) | |||
2303 | return page; | 2303 | return page; |
2304 | } | 2304 | } |
2305 | 2305 | ||
2306 | static struct page *__read_cache_page(struct address_space *mapping, | 2306 | static struct page *do_read_cache_page(struct address_space *mapping, |
2307 | pgoff_t index, | 2307 | pgoff_t index, |
2308 | int (*filler)(void *, struct page *), | 2308 | int (*filler)(void *, struct page *), |
2309 | void *data, | 2309 | void *data, |
@@ -2325,53 +2325,74 @@ repeat: | |||
2325 | /* Presumably ENOMEM for radix tree node */ | 2325 | /* Presumably ENOMEM for radix tree node */ |
2326 | return ERR_PTR(err); | 2326 | return ERR_PTR(err); |
2327 | } | 2327 | } |
2328 | |||
2329 | filler: | ||
2328 | err = filler(data, page); | 2330 | err = filler(data, page); |
2329 | if (err < 0) { | 2331 | if (err < 0) { |
2330 | page_cache_release(page); | 2332 | page_cache_release(page); |
2331 | page = ERR_PTR(err); | 2333 | return ERR_PTR(err); |
2332 | } else { | ||
2333 | page = wait_on_page_read(page); | ||
2334 | } | 2334 | } |
2335 | } | ||
2336 | return page; | ||
2337 | } | ||
2338 | |||
2339 | static struct page *do_read_cache_page(struct address_space *mapping, | ||
2340 | pgoff_t index, | ||
2341 | int (*filler)(void *, struct page *), | ||
2342 | void *data, | ||
2343 | gfp_t gfp) | ||
2344 | 2335 | ||
2345 | { | 2336 | page = wait_on_page_read(page); |
2346 | struct page *page; | 2337 | if (IS_ERR(page)) |
2347 | int err; | 2338 | return page; |
2339 | goto out; | ||
2340 | } | ||
2341 | if (PageUptodate(page)) | ||
2342 | goto out; | ||
2348 | 2343 | ||
2349 | retry: | 2344 | /* |
2350 | page = __read_cache_page(mapping, index, filler, data, gfp); | 2345 | * Page is not up to date and may be locked due one of the following |
2351 | if (IS_ERR(page)) | 2346 | * case a: Page is being filled and the page lock is held |
2352 | return page; | 2347 | * case b: Read/write error clearing the page uptodate status |
2348 | * case c: Truncation in progress (page locked) | ||
2349 | * case d: Reclaim in progress | ||
2350 | * | ||
2351 | * Case a, the page will be up to date when the page is unlocked. | ||
2352 | * There is no need to serialise on the page lock here as the page | ||
2353 | * is pinned so the lock gives no additional protection. Even if the | ||
2354 | * the page is truncated, the data is still valid if PageUptodate as | ||
2355 | * it's a race vs truncate race. | ||
2356 | * Case b, the page will not be up to date | ||
2357 | * Case c, the page may be truncated but in itself, the data may still | ||
2358 | * be valid after IO completes as it's a read vs truncate race. The | ||
2359 | * operation must restart if the page is not uptodate on unlock but | ||
2360 | * otherwise serialising on page lock to stabilise the mapping gives | ||
2361 | * no additional guarantees to the caller as the page lock is | ||
2362 | * released before return. | ||
2363 | * Case d, similar to truncation. If reclaim holds the page lock, it | ||
2364 | * will be a race with remove_mapping that determines if the mapping | ||
2365 | * is valid on unlock but otherwise the data is valid and there is | ||
2366 | * no need to serialise with page lock. | ||
2367 | * | ||
2368 | * As the page lock gives no additional guarantee, we optimistically | ||
2369 | * wait on the page to be unlocked and check if it's up to date and | ||
2370 | * use the page if it is. Otherwise, the page lock is required to | ||
2371 | * distinguish between the different cases. The motivation is that we | ||
2372 | * avoid spurious serialisations and wakeups when multiple processes | ||
2373 | * wait on the same page for IO to complete. | ||
2374 | */ | ||
2375 | wait_on_page_locked(page); | ||
2353 | if (PageUptodate(page)) | 2376 | if (PageUptodate(page)) |
2354 | goto out; | 2377 | goto out; |
2355 | 2378 | ||
2379 | /* Distinguish between all the cases under the safety of the lock */ | ||
2356 | lock_page(page); | 2380 | lock_page(page); |
2381 | |||
2382 | /* Case c or d, restart the operation */ | ||
2357 | if (!page->mapping) { | 2383 | if (!page->mapping) { |
2358 | unlock_page(page); | 2384 | unlock_page(page); |
2359 | page_cache_release(page); | 2385 | page_cache_release(page); |
2360 | goto retry; | 2386 | goto repeat; |
2361 | } | 2387 | } |
2388 | |||
2389 | /* Someone else locked and filled the page in a very small window */ | ||
2362 | if (PageUptodate(page)) { | 2390 | if (PageUptodate(page)) { |
2363 | unlock_page(page); | 2391 | unlock_page(page); |
2364 | goto out; | 2392 | goto out; |
2365 | } | 2393 | } |
2366 | err = filler(data, page); | 2394 | goto filler; |
2367 | if (err < 0) { | 2395 | |
2368 | page_cache_release(page); | ||
2369 | return ERR_PTR(err); | ||
2370 | } else { | ||
2371 | page = wait_on_page_read(page); | ||
2372 | if (IS_ERR(page)) | ||
2373 | return page; | ||
2374 | } | ||
2375 | out: | 2396 | out: |
2376 | mark_page_accessed(page); | 2397 | mark_page_accessed(page); |
2377 | return page; | 2398 | return page; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e10a4fee88d2..1ea21e203a70 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -3220,28 +3220,26 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page) | |||
3220 | } | 3220 | } |
3221 | } | 3221 | } |
3222 | 3222 | ||
3223 | static int __split_huge_page_tail(struct page *head, int tail, | 3223 | static void __split_huge_page_tail(struct page *head, int tail, |
3224 | struct lruvec *lruvec, struct list_head *list) | 3224 | struct lruvec *lruvec, struct list_head *list) |
3225 | { | 3225 | { |
3226 | int mapcount; | ||
3227 | struct page *page_tail = head + tail; | 3226 | struct page *page_tail = head + tail; |
3228 | 3227 | ||
3229 | mapcount = atomic_read(&page_tail->_mapcount) + 1; | 3228 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); |
3230 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail); | 3229 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail); |
3231 | 3230 | ||
3232 | /* | 3231 | /* |
3233 | * tail_page->_count is zero and not changing from under us. But | 3232 | * tail_page->_count is zero and not changing from under us. But |
3234 | * get_page_unless_zero() may be running from under us on the | 3233 | * get_page_unless_zero() may be running from under us on the |
3235 | * tail_page. If we used atomic_set() below instead of atomic_add(), we | 3234 | * tail_page. If we used atomic_set() below instead of atomic_inc(), we |
3236 | * would then run atomic_set() concurrently with | 3235 | * would then run atomic_set() concurrently with |
3237 | * get_page_unless_zero(), and atomic_set() is implemented in C not | 3236 | * get_page_unless_zero(), and atomic_set() is implemented in C not |
3238 | * using locked ops. spin_unlock on x86 sometime uses locked ops | 3237 | * using locked ops. spin_unlock on x86 sometime uses locked ops |
3239 | * because of PPro errata 66, 92, so unless somebody can guarantee | 3238 | * because of PPro errata 66, 92, so unless somebody can guarantee |
3240 | * atomic_set() here would be safe on all archs (and not only on x86), | 3239 | * atomic_set() here would be safe on all archs (and not only on x86), |
3241 | * it's safer to use atomic_add(). | 3240 | * it's safer to use atomic_inc(). |
3242 | */ | 3241 | */ |
3243 | atomic_add(mapcount + 1, &page_tail->_count); | 3242 | atomic_inc(&page_tail->_count); |
3244 | |||
3245 | 3243 | ||
3246 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 3244 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
3247 | page_tail->flags |= (head->flags & | 3245 | page_tail->flags |= (head->flags & |
@@ -3275,8 +3273,6 @@ static int __split_huge_page_tail(struct page *head, int tail, | |||
3275 | page_tail->index = head->index + tail; | 3273 | page_tail->index = head->index + tail; |
3276 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); | 3274 | page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); |
3277 | lru_add_page_tail(head, page_tail, lruvec, list); | 3275 | lru_add_page_tail(head, page_tail, lruvec, list); |
3278 | |||
3279 | return mapcount; | ||
3280 | } | 3276 | } |
3281 | 3277 | ||
3282 | static void __split_huge_page(struct page *page, struct list_head *list) | 3278 | static void __split_huge_page(struct page *page, struct list_head *list) |
@@ -3284,7 +3280,7 @@ static void __split_huge_page(struct page *page, struct list_head *list) | |||
3284 | struct page *head = compound_head(page); | 3280 | struct page *head = compound_head(page); |
3285 | struct zone *zone = page_zone(head); | 3281 | struct zone *zone = page_zone(head); |
3286 | struct lruvec *lruvec; | 3282 | struct lruvec *lruvec; |
3287 | int i, tail_mapcount; | 3283 | int i; |
3288 | 3284 | ||
3289 | /* prevent PageLRU to go away from under us, and freeze lru stats */ | 3285 | /* prevent PageLRU to go away from under us, and freeze lru stats */ |
3290 | spin_lock_irq(&zone->lru_lock); | 3286 | spin_lock_irq(&zone->lru_lock); |
@@ -3293,10 +3289,8 @@ static void __split_huge_page(struct page *page, struct list_head *list) | |||
3293 | /* complete memcg works before add pages to LRU */ | 3289 | /* complete memcg works before add pages to LRU */ |
3294 | mem_cgroup_split_huge_fixup(head); | 3290 | mem_cgroup_split_huge_fixup(head); |
3295 | 3291 | ||
3296 | tail_mapcount = 0; | ||
3297 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) | 3292 | for (i = HPAGE_PMD_NR - 1; i >= 1; i--) |
3298 | tail_mapcount += __split_huge_page_tail(head, i, lruvec, list); | 3293 | __split_huge_page_tail(head, i, lruvec, list); |
3299 | atomic_sub(tail_mapcount, &head->_count); | ||
3300 | 3294 | ||
3301 | ClearPageCompound(head); | 3295 | ClearPageCompound(head); |
3302 | spin_unlock_irq(&zone->lru_lock); | 3296 | spin_unlock_irq(&zone->lru_lock); |
diff --git a/mm/internal.h b/mm/internal.h index a38a21ebddb4..ad9400d759c8 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
17 | #include <linux/tracepoint-defs.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * The set of flags that only affect watermark checking and reclaim | 20 | * The set of flags that only affect watermark checking and reclaim |
@@ -131,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) | |||
131 | return page_idx ^ (1 << order); | 132 | return page_idx ^ (1 << order); |
132 | } | 133 | } |
133 | 134 | ||
135 | extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, | ||
136 | unsigned long end_pfn, struct zone *zone); | ||
137 | |||
138 | static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, | ||
139 | unsigned long end_pfn, struct zone *zone) | ||
140 | { | ||
141 | if (zone->contiguous) | ||
142 | return pfn_to_page(start_pfn); | ||
143 | |||
144 | return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); | ||
145 | } | ||
146 | |||
134 | extern int __isolate_free_page(struct page *page, unsigned int order); | 147 | extern int __isolate_free_page(struct page *page, unsigned int order); |
135 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, | 148 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, |
136 | unsigned int order); | 149 | unsigned int order); |
@@ -466,4 +479,9 @@ static inline void try_to_unmap_flush_dirty(void) | |||
466 | } | 479 | } |
467 | 480 | ||
468 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ | 481 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
482 | |||
483 | extern const struct trace_print_flags pageflag_names[]; | ||
484 | extern const struct trace_print_flags vmaflag_names[]; | ||
485 | extern const struct trace_print_flags gfpflag_names[]; | ||
486 | |||
469 | #endif /* __MM_INTERNAL_H */ | 487 | #endif /* __MM_INTERNAL_H */ |
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index cab58bb592d8..6f4f424037c0 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c | |||
@@ -60,6 +60,9 @@ void kmemcheck_free_shadow(struct page *page, int order) | |||
60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | 60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, |
61 | size_t size) | 61 | size_t size) |
62 | { | 62 | { |
63 | if (unlikely(!object)) /* Skip object if allocation failed */ | ||
64 | return; | ||
65 | |||
63 | /* | 66 | /* |
64 | * Has already been memset(), which initializes the shadow for us | 67 | * Has already been memset(), which initializes the shadow for us |
65 | * as well. | 68 | * as well. |
diff --git a/mm/madvise.c b/mm/madvise.c index f56825b6d2e1..a01147359f3b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -555,8 +555,9 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) | |||
555 | } | 555 | } |
556 | pr_info("Injecting memory failure for page %#lx at %#lx\n", | 556 | pr_info("Injecting memory failure for page %#lx at %#lx\n", |
557 | page_to_pfn(p), start); | 557 | page_to_pfn(p), start); |
558 | /* Ignore return value for now */ | 558 | ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); |
559 | memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); | 559 | if (ret) |
560 | return ret; | ||
560 | } | 561 | } |
561 | return 0; | 562 | return 0; |
562 | } | 563 | } |
@@ -638,14 +639,28 @@ madvise_behavior_valid(int behavior) | |||
638 | * some pages ahead. | 639 | * some pages ahead. |
639 | * MADV_DONTNEED - the application is finished with the given range, | 640 | * MADV_DONTNEED - the application is finished with the given range, |
640 | * so the kernel can free resources associated with it. | 641 | * so the kernel can free resources associated with it. |
642 | * MADV_FREE - the application marks pages in the given range as lazy free, | ||
643 | * where actual purges are postponed until memory pressure happens. | ||
641 | * MADV_REMOVE - the application wants to free up the given range of | 644 | * MADV_REMOVE - the application wants to free up the given range of |
642 | * pages and associated backing store. | 645 | * pages and associated backing store. |
643 | * MADV_DONTFORK - omit this area from child's address space when forking: | 646 | * MADV_DONTFORK - omit this area from child's address space when forking: |
644 | * typically, to avoid COWing pages pinned by get_user_pages(). | 647 | * typically, to avoid COWing pages pinned by get_user_pages(). |
645 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. | 648 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. |
649 | * MADV_HWPOISON - trigger memory error handler as if the given memory range | ||
650 | * were corrupted by unrecoverable hardware memory failure. | ||
651 | * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. | ||
646 | * MADV_MERGEABLE - the application recommends that KSM try to merge pages in | 652 | * MADV_MERGEABLE - the application recommends that KSM try to merge pages in |
647 | * this area with pages of identical content from other such areas. | 653 | * this area with pages of identical content from other such areas. |
648 | * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. | 654 | * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. |
655 | * MADV_HUGEPAGE - the application wants to back the given range by transparent | ||
656 | * huge pages in the future. Existing pages might be coalesced and | ||
657 | * new pages might be allocated as THP. | ||
658 | * MADV_NOHUGEPAGE - mark the given range as not worth being backed by | ||
659 | * transparent huge pages so the existing pages will not be | ||
660 | * coalesced into THP and new pages will not be allocated as THP. | ||
661 | * MADV_DONTDUMP - the application wants to prevent pages in the given range | ||
662 | * from being included in its core dump. | ||
663 | * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. | ||
649 | * | 664 | * |
650 | * return values: | 665 | * return values: |
651 | * zero - success | 666 | * zero - success |
diff --git a/mm/memblock.c b/mm/memblock.c index dd7989929f13..fc7824fa1b42 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -612,14 +612,12 @@ static int __init_memblock memblock_add_region(phys_addr_t base, | |||
612 | int nid, | 612 | int nid, |
613 | unsigned long flags) | 613 | unsigned long flags) |
614 | { | 614 | { |
615 | struct memblock_type *type = &memblock.memory; | ||
616 | |||
617 | memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n", | 615 | memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n", |
618 | (unsigned long long)base, | 616 | (unsigned long long)base, |
619 | (unsigned long long)base + size - 1, | 617 | (unsigned long long)base + size - 1, |
620 | flags, (void *)_RET_IP_); | 618 | flags, (void *)_RET_IP_); |
621 | 619 | ||
622 | return memblock_add_range(type, base, size, nid, flags); | 620 | return memblock_add_range(&memblock.memory, base, size, nid, flags); |
623 | } | 621 | } |
624 | 622 | ||
625 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) | 623 | int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) |
@@ -740,14 +738,12 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base, | |||
740 | int nid, | 738 | int nid, |
741 | unsigned long flags) | 739 | unsigned long flags) |
742 | { | 740 | { |
743 | struct memblock_type *type = &memblock.reserved; | ||
744 | |||
745 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", | 741 | memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", |
746 | (unsigned long long)base, | 742 | (unsigned long long)base, |
747 | (unsigned long long)base + size - 1, | 743 | (unsigned long long)base + size - 1, |
748 | flags, (void *)_RET_IP_); | 744 | flags, (void *)_RET_IP_); |
749 | 745 | ||
750 | return memblock_add_range(type, base, size, nid, flags); | 746 | return memblock_add_range(&memblock.reserved, base, size, nid, flags); |
751 | } | 747 | } |
752 | 748 | ||
753 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | 749 | int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d06cae2de783..42882c1e7fce 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -268,31 +268,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) | |||
268 | return (memcg == root_mem_cgroup); | 268 | return (memcg == root_mem_cgroup); |
269 | } | 269 | } |
270 | 270 | ||
271 | /* | ||
272 | * We restrict the id in the range of [1, 65535], so it can fit into | ||
273 | * an unsigned short. | ||
274 | */ | ||
275 | #define MEM_CGROUP_ID_MAX USHRT_MAX | ||
276 | |||
277 | static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) | ||
278 | { | ||
279 | return memcg->css.id; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * A helper function to get mem_cgroup from ID. must be called under | ||
284 | * rcu_read_lock(). The caller is responsible for calling | ||
285 | * css_tryget_online() if the mem_cgroup is used for charging. (dropping | ||
286 | * refcnt from swap can be called against removed memcg.) | ||
287 | */ | ||
288 | static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) | ||
289 | { | ||
290 | struct cgroup_subsys_state *css; | ||
291 | |||
292 | css = css_from_id(id, &memory_cgrp_subsys); | ||
293 | return mem_cgroup_from_css(css); | ||
294 | } | ||
295 | |||
296 | #ifndef CONFIG_SLOB | 271 | #ifndef CONFIG_SLOB |
297 | /* | 272 | /* |
298 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. | 273 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. |
@@ -1709,19 +1684,13 @@ cleanup: | |||
1709 | } | 1684 | } |
1710 | 1685 | ||
1711 | /** | 1686 | /** |
1712 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction | 1687 | * lock_page_memcg - lock a page->mem_cgroup binding |
1713 | * @page: page that is going to change accounted state | 1688 | * @page: the page |
1714 | * | ||
1715 | * This function must mark the beginning of an accounted page state | ||
1716 | * change to prevent double accounting when the page is concurrently | ||
1717 | * being moved to another memcg: | ||
1718 | * | 1689 | * |
1719 | * memcg = mem_cgroup_begin_page_stat(page); | 1690 | * This function protects unlocked LRU pages from being moved to |
1720 | * if (TestClearPageState(page)) | 1691 | * another cgroup and stabilizes their page->mem_cgroup binding. |
1721 | * mem_cgroup_update_page_stat(memcg, state, -1); | ||
1722 | * mem_cgroup_end_page_stat(memcg); | ||
1723 | */ | 1692 | */ |
1724 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | 1693 | void lock_page_memcg(struct page *page) |
1725 | { | 1694 | { |
1726 | struct mem_cgroup *memcg; | 1695 | struct mem_cgroup *memcg; |
1727 | unsigned long flags; | 1696 | unsigned long flags; |
@@ -1730,25 +1699,18 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) | |||
1730 | * The RCU lock is held throughout the transaction. The fast | 1699 | * The RCU lock is held throughout the transaction. The fast |
1731 | * path can get away without acquiring the memcg->move_lock | 1700 | * path can get away without acquiring the memcg->move_lock |
1732 | * because page moving starts with an RCU grace period. | 1701 | * because page moving starts with an RCU grace period. |
1733 | * | ||
1734 | * The RCU lock also protects the memcg from being freed when | ||
1735 | * the page state that is going to change is the only thing | ||
1736 | * preventing the page from being uncharged. | ||
1737 | * E.g. end-writeback clearing PageWriteback(), which allows | ||
1738 | * migration to go ahead and uncharge the page before the | ||
1739 | * account transaction might be complete. | ||
1740 | */ | 1702 | */ |
1741 | rcu_read_lock(); | 1703 | rcu_read_lock(); |
1742 | 1704 | ||
1743 | if (mem_cgroup_disabled()) | 1705 | if (mem_cgroup_disabled()) |
1744 | return NULL; | 1706 | return; |
1745 | again: | 1707 | again: |
1746 | memcg = page->mem_cgroup; | 1708 | memcg = page->mem_cgroup; |
1747 | if (unlikely(!memcg)) | 1709 | if (unlikely(!memcg)) |
1748 | return NULL; | 1710 | return; |
1749 | 1711 | ||
1750 | if (atomic_read(&memcg->moving_account) <= 0) | 1712 | if (atomic_read(&memcg->moving_account) <= 0) |
1751 | return memcg; | 1713 | return; |
1752 | 1714 | ||
1753 | spin_lock_irqsave(&memcg->move_lock, flags); | 1715 | spin_lock_irqsave(&memcg->move_lock, flags); |
1754 | if (memcg != page->mem_cgroup) { | 1716 | if (memcg != page->mem_cgroup) { |
@@ -1759,21 +1721,23 @@ again: | |||
1759 | /* | 1721 | /* |
1760 | * When charge migration first begins, we can have locked and | 1722 | * When charge migration first begins, we can have locked and |
1761 | * unlocked page stat updates happening concurrently. Track | 1723 | * unlocked page stat updates happening concurrently. Track |
1762 | * the task who has the lock for mem_cgroup_end_page_stat(). | 1724 | * the task who has the lock for unlock_page_memcg(). |
1763 | */ | 1725 | */ |
1764 | memcg->move_lock_task = current; | 1726 | memcg->move_lock_task = current; |
1765 | memcg->move_lock_flags = flags; | 1727 | memcg->move_lock_flags = flags; |
1766 | 1728 | ||
1767 | return memcg; | 1729 | return; |
1768 | } | 1730 | } |
1769 | EXPORT_SYMBOL(mem_cgroup_begin_page_stat); | 1731 | EXPORT_SYMBOL(lock_page_memcg); |
1770 | 1732 | ||
1771 | /** | 1733 | /** |
1772 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | 1734 | * unlock_page_memcg - unlock a page->mem_cgroup binding |
1773 | * @memcg: the memcg that was accounted against | 1735 | * @page: the page |
1774 | */ | 1736 | */ |
1775 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | 1737 | void unlock_page_memcg(struct page *page) |
1776 | { | 1738 | { |
1739 | struct mem_cgroup *memcg = page->mem_cgroup; | ||
1740 | |||
1777 | if (memcg && memcg->move_lock_task == current) { | 1741 | if (memcg && memcg->move_lock_task == current) { |
1778 | unsigned long flags = memcg->move_lock_flags; | 1742 | unsigned long flags = memcg->move_lock_flags; |
1779 | 1743 | ||
@@ -1785,7 +1749,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | |||
1785 | 1749 | ||
1786 | rcu_read_unlock(); | 1750 | rcu_read_unlock(); |
1787 | } | 1751 | } |
1788 | EXPORT_SYMBOL(mem_cgroup_end_page_stat); | 1752 | EXPORT_SYMBOL(unlock_page_memcg); |
1789 | 1753 | ||
1790 | /* | 1754 | /* |
1791 | * size of first charge trial. "32" comes from vmscan.c's magic value. | 1755 | * size of first charge trial. "32" comes from vmscan.c's magic value. |
@@ -4488,7 +4452,7 @@ static int mem_cgroup_move_account(struct page *page, | |||
4488 | VM_BUG_ON(compound && !PageTransHuge(page)); | 4452 | VM_BUG_ON(compound && !PageTransHuge(page)); |
4489 | 4453 | ||
4490 | /* | 4454 | /* |
4491 | * Prevent mem_cgroup_replace_page() from looking at | 4455 | * Prevent mem_cgroup_migrate() from looking at |
4492 | * page->mem_cgroup of its source page while we change it. | 4456 | * page->mem_cgroup of its source page while we change it. |
4493 | */ | 4457 | */ |
4494 | ret = -EBUSY; | 4458 | ret = -EBUSY; |
@@ -4923,9 +4887,9 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4923 | 4887 | ||
4924 | lru_add_drain_all(); | 4888 | lru_add_drain_all(); |
4925 | /* | 4889 | /* |
4926 | * Signal mem_cgroup_begin_page_stat() to take the memcg's | 4890 | * Signal lock_page_memcg() to take the memcg's move_lock |
4927 | * move_lock while we're moving its pages to another memcg. | 4891 | * while we're moving its pages to another memcg. Then wait |
4928 | * Then wait for already started RCU-only updates to finish. | 4892 | * for already started RCU-only updates to finish. |
4929 | */ | 4893 | */ |
4930 | atomic_inc(&mc.from->moving_account); | 4894 | atomic_inc(&mc.from->moving_account); |
4931 | synchronize_rcu(); | 4895 | synchronize_rcu(); |
@@ -5517,16 +5481,16 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) | |||
5517 | } | 5481 | } |
5518 | 5482 | ||
5519 | /** | 5483 | /** |
5520 | * mem_cgroup_replace_page - migrate a charge to another page | 5484 | * mem_cgroup_migrate - charge a page's replacement |
5521 | * @oldpage: currently charged page | 5485 | * @oldpage: currently circulating page |
5522 | * @newpage: page to transfer the charge to | 5486 | * @newpage: replacement page |
5523 | * | 5487 | * |
5524 | * Migrate the charge from @oldpage to @newpage. | 5488 | * Charge @newpage as a replacement page for @oldpage. @oldpage will |
5489 | * be uncharged upon free. | ||
5525 | * | 5490 | * |
5526 | * Both pages must be locked, @newpage->mapping must be set up. | 5491 | * Both pages must be locked, @newpage->mapping must be set up. |
5527 | * Either or both pages might be on the LRU already. | ||
5528 | */ | 5492 | */ |
5529 | void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) | 5493 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) |
5530 | { | 5494 | { |
5531 | struct mem_cgroup *memcg; | 5495 | struct mem_cgroup *memcg; |
5532 | unsigned int nr_pages; | 5496 | unsigned int nr_pages; |
@@ -5559,7 +5523,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) | |||
5559 | page_counter_charge(&memcg->memsw, nr_pages); | 5523 | page_counter_charge(&memcg->memsw, nr_pages); |
5560 | css_get_many(&memcg->css, nr_pages); | 5524 | css_get_many(&memcg->css, nr_pages); |
5561 | 5525 | ||
5562 | commit_charge(newpage, memcg, true); | 5526 | commit_charge(newpage, memcg, false); |
5563 | 5527 | ||
5564 | local_irq_disable(); | 5528 | local_irq_disable(); |
5565 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | 5529 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ac595e7a3a95..67c30eb993f0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -826,8 +826,6 @@ static struct page_state { | |||
826 | #undef lru | 826 | #undef lru |
827 | #undef swapbacked | 827 | #undef swapbacked |
828 | #undef head | 828 | #undef head |
829 | #undef tail | ||
830 | #undef compound | ||
831 | #undef slab | 829 | #undef slab |
832 | #undef reserved | 830 | #undef reserved |
833 | 831 | ||
diff --git a/mm/memory.c b/mm/memory.c index 906d8e3b42c0..0e247642ed5b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1897,7 +1897,9 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
1897 | unsigned long end = addr + size; | 1897 | unsigned long end = addr + size; |
1898 | int err; | 1898 | int err; |
1899 | 1899 | ||
1900 | BUG_ON(addr >= end); | 1900 | if (WARN_ON(addr >= end)) |
1901 | return -EINVAL; | ||
1902 | |||
1901 | pgd = pgd_offset(mm, addr); | 1903 | pgd = pgd_offset(mm, addr); |
1902 | do { | 1904 | do { |
1903 | next = pgd_addr_end(addr, end); | 1905 | next = pgd_addr_end(addr, end); |
@@ -3143,8 +3145,7 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3143 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 3145 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
3144 | unsigned int flags, pte_t orig_pte) | 3146 | unsigned int flags, pte_t orig_pte) |
3145 | { | 3147 | { |
3146 | pgoff_t pgoff = (((address & PAGE_MASK) | 3148 | pgoff_t pgoff = linear_page_index(vma, address); |
3147 | - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
3148 | 3149 | ||
3149 | pte_unmap(page_table); | 3150 | pte_unmap(page_table); |
3150 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ | 3151 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 979b18cbd343..24ea06393816 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -77,6 +77,9 @@ static struct { | |||
77 | #define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) | 77 | #define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) |
78 | #define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) | 78 | #define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) |
79 | 79 | ||
80 | bool memhp_auto_online; | ||
81 | EXPORT_SYMBOL_GPL(memhp_auto_online); | ||
82 | |||
80 | void get_online_mems(void) | 83 | void get_online_mems(void) |
81 | { | 84 | { |
82 | might_sleep(); | 85 | might_sleep(); |
@@ -509,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
509 | int start_sec, end_sec; | 512 | int start_sec, end_sec; |
510 | struct vmem_altmap *altmap; | 513 | struct vmem_altmap *altmap; |
511 | 514 | ||
515 | clear_zone_contiguous(zone); | ||
516 | |||
512 | /* during initialize mem_map, align hot-added range to section */ | 517 | /* during initialize mem_map, align hot-added range to section */ |
513 | start_sec = pfn_to_section_nr(phys_start_pfn); | 518 | start_sec = pfn_to_section_nr(phys_start_pfn); |
514 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 519 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
@@ -521,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
521 | if (altmap->base_pfn != phys_start_pfn | 526 | if (altmap->base_pfn != phys_start_pfn |
522 | || vmem_altmap_offset(altmap) > nr_pages) { | 527 | || vmem_altmap_offset(altmap) > nr_pages) { |
523 | pr_warn_once("memory add fail, invalid altmap\n"); | 528 | pr_warn_once("memory add fail, invalid altmap\n"); |
524 | return -EINVAL; | 529 | err = -EINVAL; |
530 | goto out; | ||
525 | } | 531 | } |
526 | altmap->alloc = 0; | 532 | altmap->alloc = 0; |
527 | } | 533 | } |
@@ -539,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
539 | err = 0; | 545 | err = 0; |
540 | } | 546 | } |
541 | vmemmap_populate_print_last(); | 547 | vmemmap_populate_print_last(); |
542 | 548 | out: | |
549 | set_zone_contiguous(zone); | ||
543 | return err; | 550 | return err; |
544 | } | 551 | } |
545 | EXPORT_SYMBOL_GPL(__add_pages); | 552 | EXPORT_SYMBOL_GPL(__add_pages); |
@@ -811,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
811 | } | 818 | } |
812 | } | 819 | } |
813 | 820 | ||
821 | clear_zone_contiguous(zone); | ||
822 | |||
814 | /* | 823 | /* |
815 | * We can only remove entire sections | 824 | * We can only remove entire sections |
816 | */ | 825 | */ |
@@ -826,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | |||
826 | if (ret) | 835 | if (ret) |
827 | break; | 836 | break; |
828 | } | 837 | } |
838 | |||
839 | set_zone_contiguous(zone); | ||
840 | |||
829 | return ret; | 841 | return ret; |
830 | } | 842 | } |
831 | EXPORT_SYMBOL_GPL(__remove_pages); | 843 | EXPORT_SYMBOL_GPL(__remove_pages); |
@@ -1261,8 +1273,13 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default, | |||
1261 | return zone_default; | 1273 | return zone_default; |
1262 | } | 1274 | } |
1263 | 1275 | ||
1276 | static int online_memory_block(struct memory_block *mem, void *arg) | ||
1277 | { | ||
1278 | return memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); | ||
1279 | } | ||
1280 | |||
1264 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 1281 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
1265 | int __ref add_memory_resource(int nid, struct resource *res) | 1282 | int __ref add_memory_resource(int nid, struct resource *res, bool online) |
1266 | { | 1283 | { |
1267 | u64 start, size; | 1284 | u64 start, size; |
1268 | pg_data_t *pgdat = NULL; | 1285 | pg_data_t *pgdat = NULL; |
@@ -1322,6 +1339,11 @@ int __ref add_memory_resource(int nid, struct resource *res) | |||
1322 | /* create new memmap entry */ | 1339 | /* create new memmap entry */ |
1323 | firmware_map_add_hotplug(start, start + size, "System RAM"); | 1340 | firmware_map_add_hotplug(start, start + size, "System RAM"); |
1324 | 1341 | ||
1342 | /* online pages if requested */ | ||
1343 | if (online) | ||
1344 | walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), | ||
1345 | NULL, online_memory_block); | ||
1346 | |||
1325 | goto out; | 1347 | goto out; |
1326 | 1348 | ||
1327 | error: | 1349 | error: |
@@ -1345,7 +1367,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
1345 | if (IS_ERR(res)) | 1367 | if (IS_ERR(res)) |
1346 | return PTR_ERR(res); | 1368 | return PTR_ERR(res); |
1347 | 1369 | ||
1348 | ret = add_memory_resource(nid, res); | 1370 | ret = add_memory_resource(nid, res, memhp_auto_online); |
1349 | if (ret < 0) | 1371 | if (ret < 0) |
1350 | release_memory_resource(res); | 1372 | release_memory_resource(res); |
1351 | return ret; | 1373 | return ret; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9a3f6b90e628..8cbc74387df3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -643,7 +643,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
643 | 643 | ||
644 | if (flags & MPOL_MF_LAZY) { | 644 | if (flags & MPOL_MF_LAZY) { |
645 | /* Similar to task_numa_work, skip inaccessible VMAs */ | 645 | /* Similar to task_numa_work, skip inaccessible VMAs */ |
646 | if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) | 646 | if (!is_vm_hugetlb_page(vma) && |
647 | (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) && | ||
648 | !(vma->vm_flags & VM_MIXEDMAP)) | ||
647 | change_prot_numa(vma, start, endvma); | 649 | change_prot_numa(vma, start, endvma); |
648 | return 1; | 650 | return 1; |
649 | } | 651 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 3ad0fea5c438..568284ec75d4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/balloon_compaction.h> | 38 | #include <linux/balloon_compaction.h> |
39 | #include <linux/mmu_notifier.h> | 39 | #include <linux/mmu_notifier.h> |
40 | #include <linux/page_idle.h> | 40 | #include <linux/page_idle.h> |
41 | #include <linux/page_owner.h> | ||
41 | 42 | ||
42 | #include <asm/tlbflush.h> | 43 | #include <asm/tlbflush.h> |
43 | 44 | ||
@@ -325,7 +326,6 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
325 | return -EAGAIN; | 326 | return -EAGAIN; |
326 | 327 | ||
327 | /* No turning back from here */ | 328 | /* No turning back from here */ |
328 | set_page_memcg(newpage, page_memcg(page)); | ||
329 | newpage->index = page->index; | 329 | newpage->index = page->index; |
330 | newpage->mapping = page->mapping; | 330 | newpage->mapping = page->mapping; |
331 | if (PageSwapBacked(page)) | 331 | if (PageSwapBacked(page)) |
@@ -372,7 +372,6 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
372 | * Now we know that no one else is looking at the page: | 372 | * Now we know that no one else is looking at the page: |
373 | * no turning back from here. | 373 | * no turning back from here. |
374 | */ | 374 | */ |
375 | set_page_memcg(newpage, page_memcg(page)); | ||
376 | newpage->index = page->index; | 375 | newpage->index = page->index; |
377 | newpage->mapping = page->mapping; | 376 | newpage->mapping = page->mapping; |
378 | if (PageSwapBacked(page)) | 377 | if (PageSwapBacked(page)) |
@@ -457,9 +456,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
457 | return -EAGAIN; | 456 | return -EAGAIN; |
458 | } | 457 | } |
459 | 458 | ||
460 | set_page_memcg(newpage, page_memcg(page)); | ||
461 | newpage->index = page->index; | 459 | newpage->index = page->index; |
462 | newpage->mapping = page->mapping; | 460 | newpage->mapping = page->mapping; |
461 | |||
463 | get_page(newpage); | 462 | get_page(newpage); |
464 | 463 | ||
465 | radix_tree_replace_slot(pslot, newpage); | 464 | radix_tree_replace_slot(pslot, newpage); |
@@ -467,6 +466,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
467 | page_unfreeze_refs(page, expected_count - 1); | 466 | page_unfreeze_refs(page, expected_count - 1); |
468 | 467 | ||
469 | spin_unlock_irq(&mapping->tree_lock); | 468 | spin_unlock_irq(&mapping->tree_lock); |
469 | |||
470 | return MIGRATEPAGE_SUCCESS; | 470 | return MIGRATEPAGE_SUCCESS; |
471 | } | 471 | } |
472 | 472 | ||
@@ -578,6 +578,10 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
578 | */ | 578 | */ |
579 | if (PageWriteback(newpage)) | 579 | if (PageWriteback(newpage)) |
580 | end_page_writeback(newpage); | 580 | end_page_writeback(newpage); |
581 | |||
582 | copy_page_owner(page, newpage); | ||
583 | |||
584 | mem_cgroup_migrate(page, newpage); | ||
581 | } | 585 | } |
582 | 586 | ||
583 | /************************************************************ | 587 | /************************************************************ |
@@ -772,7 +776,6 @@ static int move_to_new_page(struct page *newpage, struct page *page, | |||
772 | * page is freed; but stats require that PageAnon be left as PageAnon. | 776 | * page is freed; but stats require that PageAnon be left as PageAnon. |
773 | */ | 777 | */ |
774 | if (rc == MIGRATEPAGE_SUCCESS) { | 778 | if (rc == MIGRATEPAGE_SUCCESS) { |
775 | set_page_memcg(page, NULL); | ||
776 | if (!PageAnon(page)) | 779 | if (!PageAnon(page)) |
777 | page->mapping = NULL; | 780 | page->mapping = NULL; |
778 | } | 781 | } |
@@ -952,8 +955,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, | |||
952 | } | 955 | } |
953 | 956 | ||
954 | rc = __unmap_and_move(page, newpage, force, mode); | 957 | rc = __unmap_and_move(page, newpage, force, mode); |
955 | if (rc == MIGRATEPAGE_SUCCESS) | 958 | if (rc == MIGRATEPAGE_SUCCESS) { |
956 | put_new_page = NULL; | 959 | put_new_page = NULL; |
960 | set_page_owner_migrate_reason(newpage, reason); | ||
961 | } | ||
957 | 962 | ||
958 | out: | 963 | out: |
959 | if (rc != -EAGAIN) { | 964 | if (rc != -EAGAIN) { |
@@ -1018,7 +1023,7 @@ out: | |||
1018 | static int unmap_and_move_huge_page(new_page_t get_new_page, | 1023 | static int unmap_and_move_huge_page(new_page_t get_new_page, |
1019 | free_page_t put_new_page, unsigned long private, | 1024 | free_page_t put_new_page, unsigned long private, |
1020 | struct page *hpage, int force, | 1025 | struct page *hpage, int force, |
1021 | enum migrate_mode mode) | 1026 | enum migrate_mode mode, int reason) |
1022 | { | 1027 | { |
1023 | int rc = -EAGAIN; | 1028 | int rc = -EAGAIN; |
1024 | int *result = NULL; | 1029 | int *result = NULL; |
@@ -1076,6 +1081,7 @@ put_anon: | |||
1076 | if (rc == MIGRATEPAGE_SUCCESS) { | 1081 | if (rc == MIGRATEPAGE_SUCCESS) { |
1077 | hugetlb_cgroup_migrate(hpage, new_hpage); | 1082 | hugetlb_cgroup_migrate(hpage, new_hpage); |
1078 | put_new_page = NULL; | 1083 | put_new_page = NULL; |
1084 | set_page_owner_migrate_reason(new_hpage, reason); | ||
1079 | } | 1085 | } |
1080 | 1086 | ||
1081 | unlock_page(hpage); | 1087 | unlock_page(hpage); |
@@ -1148,7 +1154,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, | |||
1148 | if (PageHuge(page)) | 1154 | if (PageHuge(page)) |
1149 | rc = unmap_and_move_huge_page(get_new_page, | 1155 | rc = unmap_and_move_huge_page(get_new_page, |
1150 | put_new_page, private, page, | 1156 | put_new_page, private, page, |
1151 | pass > 2, mode); | 1157 | pass > 2, mode, reason); |
1152 | else | 1158 | else |
1153 | rc = unmap_and_move(get_new_page, put_new_page, | 1159 | rc = unmap_and_move(get_new_page, put_new_page, |
1154 | private, page, pass > 2, mode, | 1160 | private, page, pass > 2, mode, |
@@ -1836,9 +1842,8 @@ fail_putback: | |||
1836 | } | 1842 | } |
1837 | 1843 | ||
1838 | mlock_migrate_page(new_page, page); | 1844 | mlock_migrate_page(new_page, page); |
1839 | set_page_memcg(new_page, page_memcg(page)); | ||
1840 | set_page_memcg(page, NULL); | ||
1841 | page_remove_rmap(page, true); | 1845 | page_remove_rmap(page, true); |
1846 | set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED); | ||
1842 | 1847 | ||
1843 | spin_unlock(ptl); | 1848 | spin_unlock(ptl); |
1844 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1849 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dc490c06941b..e97a05d9621f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -386,10 +386,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | |||
386 | static void dump_header(struct oom_control *oc, struct task_struct *p, | 386 | static void dump_header(struct oom_control *oc, struct task_struct *p, |
387 | struct mem_cgroup *memcg) | 387 | struct mem_cgroup *memcg) |
388 | { | 388 | { |
389 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " | 389 | pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, " |
390 | "oom_score_adj=%hd\n", | 390 | "oom_score_adj=%hd\n", |
391 | current->comm, oc->gfp_mask, oc->order, | 391 | current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, |
392 | current->signal->oom_score_adj); | 392 | current->signal->oom_score_adj); |
393 | |||
393 | cpuset_print_current_mems_allowed(); | 394 | cpuset_print_current_mems_allowed(); |
394 | dump_stack(); | 395 | dump_stack(); |
395 | if (memcg) | 396 | if (memcg) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6fe7d15bd1f7..11ff8f758631 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -1169,6 +1169,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, | |||
1169 | unsigned long balanced_dirty_ratelimit; | 1169 | unsigned long balanced_dirty_ratelimit; |
1170 | unsigned long step; | 1170 | unsigned long step; |
1171 | unsigned long x; | 1171 | unsigned long x; |
1172 | unsigned long shift; | ||
1172 | 1173 | ||
1173 | /* | 1174 | /* |
1174 | * The dirty rate will match the writeout rate in long term, except | 1175 | * The dirty rate will match the writeout rate in long term, except |
@@ -1293,11 +1294,11 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, | |||
1293 | * rate itself is constantly fluctuating. So decrease the track speed | 1294 | * rate itself is constantly fluctuating. So decrease the track speed |
1294 | * when it gets close to the target. Helps eliminate pointless tremors. | 1295 | * when it gets close to the target. Helps eliminate pointless tremors. |
1295 | */ | 1296 | */ |
1296 | step >>= dirty_ratelimit / (2 * step + 1); | 1297 | shift = dirty_ratelimit / (2 * step + 1); |
1297 | /* | 1298 | if (shift < BITS_PER_LONG) |
1298 | * Limit the tracking speed to avoid overshooting. | 1299 | step = DIV_ROUND_UP(step >> shift, 8); |
1299 | */ | 1300 | else |
1300 | step = (step + 7) / 8; | 1301 | step = 0; |
1301 | 1302 | ||
1302 | if (dirty_ratelimit < balanced_dirty_ratelimit) | 1303 | if (dirty_ratelimit < balanced_dirty_ratelimit) |
1303 | dirty_ratelimit += step; | 1304 | dirty_ratelimit += step; |
@@ -2409,12 +2410,11 @@ int __set_page_dirty_no_writeback(struct page *page) | |||
2409 | /* | 2410 | /* |
2410 | * Helper function for set_page_dirty family. | 2411 | * Helper function for set_page_dirty family. |
2411 | * | 2412 | * |
2412 | * Caller must hold mem_cgroup_begin_page_stat(). | 2413 | * Caller must hold lock_page_memcg(). |
2413 | * | 2414 | * |
2414 | * NOTE: This relies on being atomic wrt interrupts. | 2415 | * NOTE: This relies on being atomic wrt interrupts. |
2415 | */ | 2416 | */ |
2416 | void account_page_dirtied(struct page *page, struct address_space *mapping, | 2417 | void account_page_dirtied(struct page *page, struct address_space *mapping) |
2417 | struct mem_cgroup *memcg) | ||
2418 | { | 2418 | { |
2419 | struct inode *inode = mapping->host; | 2419 | struct inode *inode = mapping->host; |
2420 | 2420 | ||
@@ -2426,7 +2426,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping, | |||
2426 | inode_attach_wb(inode, page); | 2426 | inode_attach_wb(inode, page); |
2427 | wb = inode_to_wb(inode); | 2427 | wb = inode_to_wb(inode); |
2428 | 2428 | ||
2429 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2429 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
2430 | __inc_zone_page_state(page, NR_FILE_DIRTY); | 2430 | __inc_zone_page_state(page, NR_FILE_DIRTY); |
2431 | __inc_zone_page_state(page, NR_DIRTIED); | 2431 | __inc_zone_page_state(page, NR_DIRTIED); |
2432 | __inc_wb_stat(wb, WB_RECLAIMABLE); | 2432 | __inc_wb_stat(wb, WB_RECLAIMABLE); |
@@ -2441,13 +2441,13 @@ EXPORT_SYMBOL(account_page_dirtied); | |||
2441 | /* | 2441 | /* |
2442 | * Helper function for deaccounting dirty page without writeback. | 2442 | * Helper function for deaccounting dirty page without writeback. |
2443 | * | 2443 | * |
2444 | * Caller must hold mem_cgroup_begin_page_stat(). | 2444 | * Caller must hold lock_page_memcg(). |
2445 | */ | 2445 | */ |
2446 | void account_page_cleaned(struct page *page, struct address_space *mapping, | 2446 | void account_page_cleaned(struct page *page, struct address_space *mapping, |
2447 | struct mem_cgroup *memcg, struct bdi_writeback *wb) | 2447 | struct bdi_writeback *wb) |
2448 | { | 2448 | { |
2449 | if (mapping_cap_account_dirty(mapping)) { | 2449 | if (mapping_cap_account_dirty(mapping)) { |
2450 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2450 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
2451 | dec_zone_page_state(page, NR_FILE_DIRTY); | 2451 | dec_zone_page_state(page, NR_FILE_DIRTY); |
2452 | dec_wb_stat(wb, WB_RECLAIMABLE); | 2452 | dec_wb_stat(wb, WB_RECLAIMABLE); |
2453 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); | 2453 | task_io_account_cancelled_write(PAGE_CACHE_SIZE); |
@@ -2468,26 +2468,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, | |||
2468 | */ | 2468 | */ |
2469 | int __set_page_dirty_nobuffers(struct page *page) | 2469 | int __set_page_dirty_nobuffers(struct page *page) |
2470 | { | 2470 | { |
2471 | struct mem_cgroup *memcg; | 2471 | lock_page_memcg(page); |
2472 | |||
2473 | memcg = mem_cgroup_begin_page_stat(page); | ||
2474 | if (!TestSetPageDirty(page)) { | 2472 | if (!TestSetPageDirty(page)) { |
2475 | struct address_space *mapping = page_mapping(page); | 2473 | struct address_space *mapping = page_mapping(page); |
2476 | unsigned long flags; | 2474 | unsigned long flags; |
2477 | 2475 | ||
2478 | if (!mapping) { | 2476 | if (!mapping) { |
2479 | mem_cgroup_end_page_stat(memcg); | 2477 | unlock_page_memcg(page); |
2480 | return 1; | 2478 | return 1; |
2481 | } | 2479 | } |
2482 | 2480 | ||
2483 | spin_lock_irqsave(&mapping->tree_lock, flags); | 2481 | spin_lock_irqsave(&mapping->tree_lock, flags); |
2484 | BUG_ON(page_mapping(page) != mapping); | 2482 | BUG_ON(page_mapping(page) != mapping); |
2485 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); | 2483 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); |
2486 | account_page_dirtied(page, mapping, memcg); | 2484 | account_page_dirtied(page, mapping); |
2487 | radix_tree_tag_set(&mapping->page_tree, page_index(page), | 2485 | radix_tree_tag_set(&mapping->page_tree, page_index(page), |
2488 | PAGECACHE_TAG_DIRTY); | 2486 | PAGECACHE_TAG_DIRTY); |
2489 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 2487 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
2490 | mem_cgroup_end_page_stat(memcg); | 2488 | unlock_page_memcg(page); |
2491 | 2489 | ||
2492 | if (mapping->host) { | 2490 | if (mapping->host) { |
2493 | /* !PageAnon && !swapper_space */ | 2491 | /* !PageAnon && !swapper_space */ |
@@ -2495,7 +2493,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
2495 | } | 2493 | } |
2496 | return 1; | 2494 | return 1; |
2497 | } | 2495 | } |
2498 | mem_cgroup_end_page_stat(memcg); | 2496 | unlock_page_memcg(page); |
2499 | return 0; | 2497 | return 0; |
2500 | } | 2498 | } |
2501 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); | 2499 | EXPORT_SYMBOL(__set_page_dirty_nobuffers); |
@@ -2625,17 +2623,16 @@ void cancel_dirty_page(struct page *page) | |||
2625 | if (mapping_cap_account_dirty(mapping)) { | 2623 | if (mapping_cap_account_dirty(mapping)) { |
2626 | struct inode *inode = mapping->host; | 2624 | struct inode *inode = mapping->host; |
2627 | struct bdi_writeback *wb; | 2625 | struct bdi_writeback *wb; |
2628 | struct mem_cgroup *memcg; | ||
2629 | bool locked; | 2626 | bool locked; |
2630 | 2627 | ||
2631 | memcg = mem_cgroup_begin_page_stat(page); | 2628 | lock_page_memcg(page); |
2632 | wb = unlocked_inode_to_wb_begin(inode, &locked); | 2629 | wb = unlocked_inode_to_wb_begin(inode, &locked); |
2633 | 2630 | ||
2634 | if (TestClearPageDirty(page)) | 2631 | if (TestClearPageDirty(page)) |
2635 | account_page_cleaned(page, mapping, memcg, wb); | 2632 | account_page_cleaned(page, mapping, wb); |
2636 | 2633 | ||
2637 | unlocked_inode_to_wb_end(inode, locked); | 2634 | unlocked_inode_to_wb_end(inode, locked); |
2638 | mem_cgroup_end_page_stat(memcg); | 2635 | unlock_page_memcg(page); |
2639 | } else { | 2636 | } else { |
2640 | ClearPageDirty(page); | 2637 | ClearPageDirty(page); |
2641 | } | 2638 | } |
@@ -2666,7 +2663,6 @@ int clear_page_dirty_for_io(struct page *page) | |||
2666 | if (mapping && mapping_cap_account_dirty(mapping)) { | 2663 | if (mapping && mapping_cap_account_dirty(mapping)) { |
2667 | struct inode *inode = mapping->host; | 2664 | struct inode *inode = mapping->host; |
2668 | struct bdi_writeback *wb; | 2665 | struct bdi_writeback *wb; |
2669 | struct mem_cgroup *memcg; | ||
2670 | bool locked; | 2666 | bool locked; |
2671 | 2667 | ||
2672 | /* | 2668 | /* |
@@ -2704,16 +2700,14 @@ int clear_page_dirty_for_io(struct page *page) | |||
2704 | * always locked coming in here, so we get the desired | 2700 | * always locked coming in here, so we get the desired |
2705 | * exclusion. | 2701 | * exclusion. |
2706 | */ | 2702 | */ |
2707 | memcg = mem_cgroup_begin_page_stat(page); | ||
2708 | wb = unlocked_inode_to_wb_begin(inode, &locked); | 2703 | wb = unlocked_inode_to_wb_begin(inode, &locked); |
2709 | if (TestClearPageDirty(page)) { | 2704 | if (TestClearPageDirty(page)) { |
2710 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); | 2705 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); |
2711 | dec_zone_page_state(page, NR_FILE_DIRTY); | 2706 | dec_zone_page_state(page, NR_FILE_DIRTY); |
2712 | dec_wb_stat(wb, WB_RECLAIMABLE); | 2707 | dec_wb_stat(wb, WB_RECLAIMABLE); |
2713 | ret = 1; | 2708 | ret = 1; |
2714 | } | 2709 | } |
2715 | unlocked_inode_to_wb_end(inode, locked); | 2710 | unlocked_inode_to_wb_end(inode, locked); |
2716 | mem_cgroup_end_page_stat(memcg); | ||
2717 | return ret; | 2711 | return ret; |
2718 | } | 2712 | } |
2719 | return TestClearPageDirty(page); | 2713 | return TestClearPageDirty(page); |
@@ -2723,10 +2717,9 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); | |||
2723 | int test_clear_page_writeback(struct page *page) | 2717 | int test_clear_page_writeback(struct page *page) |
2724 | { | 2718 | { |
2725 | struct address_space *mapping = page_mapping(page); | 2719 | struct address_space *mapping = page_mapping(page); |
2726 | struct mem_cgroup *memcg; | ||
2727 | int ret; | 2720 | int ret; |
2728 | 2721 | ||
2729 | memcg = mem_cgroup_begin_page_stat(page); | 2722 | lock_page_memcg(page); |
2730 | if (mapping) { | 2723 | if (mapping) { |
2731 | struct inode *inode = mapping->host; | 2724 | struct inode *inode = mapping->host; |
2732 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 2725 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
@@ -2750,21 +2743,20 @@ int test_clear_page_writeback(struct page *page) | |||
2750 | ret = TestClearPageWriteback(page); | 2743 | ret = TestClearPageWriteback(page); |
2751 | } | 2744 | } |
2752 | if (ret) { | 2745 | if (ret) { |
2753 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); | 2746 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); |
2754 | dec_zone_page_state(page, NR_WRITEBACK); | 2747 | dec_zone_page_state(page, NR_WRITEBACK); |
2755 | inc_zone_page_state(page, NR_WRITTEN); | 2748 | inc_zone_page_state(page, NR_WRITTEN); |
2756 | } | 2749 | } |
2757 | mem_cgroup_end_page_stat(memcg); | 2750 | unlock_page_memcg(page); |
2758 | return ret; | 2751 | return ret; |
2759 | } | 2752 | } |
2760 | 2753 | ||
2761 | int __test_set_page_writeback(struct page *page, bool keep_write) | 2754 | int __test_set_page_writeback(struct page *page, bool keep_write) |
2762 | { | 2755 | { |
2763 | struct address_space *mapping = page_mapping(page); | 2756 | struct address_space *mapping = page_mapping(page); |
2764 | struct mem_cgroup *memcg; | ||
2765 | int ret; | 2757 | int ret; |
2766 | 2758 | ||
2767 | memcg = mem_cgroup_begin_page_stat(page); | 2759 | lock_page_memcg(page); |
2768 | if (mapping) { | 2760 | if (mapping) { |
2769 | struct inode *inode = mapping->host; | 2761 | struct inode *inode = mapping->host; |
2770 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 2762 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
@@ -2792,10 +2784,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write) | |||
2792 | ret = TestSetPageWriteback(page); | 2784 | ret = TestSetPageWriteback(page); |
2793 | } | 2785 | } |
2794 | if (!ret) { | 2786 | if (!ret) { |
2795 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); | 2787 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); |
2796 | inc_zone_page_state(page, NR_WRITEBACK); | 2788 | inc_zone_page_state(page, NR_WRITEBACK); |
2797 | } | 2789 | } |
2798 | mem_cgroup_end_page_stat(memcg); | 2790 | unlock_page_memcg(page); |
2799 | return ret; | 2791 | return ret; |
2800 | 2792 | ||
2801 | } | 2793 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 838ca8bb64f7..c46b75d14b6f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -223,6 +223,19 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
223 | #endif | 223 | #endif |
224 | }; | 224 | }; |
225 | 225 | ||
226 | char * const migratetype_names[MIGRATE_TYPES] = { | ||
227 | "Unmovable", | ||
228 | "Movable", | ||
229 | "Reclaimable", | ||
230 | "HighAtomic", | ||
231 | #ifdef CONFIG_CMA | ||
232 | "CMA", | ||
233 | #endif | ||
234 | #ifdef CONFIG_MEMORY_ISOLATION | ||
235 | "Isolate", | ||
236 | #endif | ||
237 | }; | ||
238 | |||
226 | compound_page_dtor * const compound_page_dtors[] = { | 239 | compound_page_dtor * const compound_page_dtors[] = { |
227 | NULL, | 240 | NULL, |
228 | free_compound_page, | 241 | free_compound_page, |
@@ -247,6 +260,7 @@ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | |||
247 | static unsigned long __initdata required_kernelcore; | 260 | static unsigned long __initdata required_kernelcore; |
248 | static unsigned long __initdata required_movablecore; | 261 | static unsigned long __initdata required_movablecore; |
249 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; | 262 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; |
263 | static bool mirrored_kernelcore; | ||
250 | 264 | ||
251 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | 265 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
252 | int movable_zone; | 266 | int movable_zone; |
@@ -416,7 +430,7 @@ static void bad_page(struct page *page, const char *reason, | |||
416 | goto out; | 430 | goto out; |
417 | } | 431 | } |
418 | if (nr_unshown) { | 432 | if (nr_unshown) { |
419 | printk(KERN_ALERT | 433 | pr_alert( |
420 | "BUG: Bad page state: %lu messages suppressed\n", | 434 | "BUG: Bad page state: %lu messages suppressed\n", |
421 | nr_unshown); | 435 | nr_unshown); |
422 | nr_unshown = 0; | 436 | nr_unshown = 0; |
@@ -426,9 +440,14 @@ static void bad_page(struct page *page, const char *reason, | |||
426 | if (nr_shown++ == 0) | 440 | if (nr_shown++ == 0) |
427 | resume = jiffies + 60 * HZ; | 441 | resume = jiffies + 60 * HZ; |
428 | 442 | ||
429 | printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", | 443 | pr_alert("BUG: Bad page state in process %s pfn:%05lx\n", |
430 | current->comm, page_to_pfn(page)); | 444 | current->comm, page_to_pfn(page)); |
431 | dump_page_badflags(page, reason, bad_flags); | 445 | __dump_page(page, reason); |
446 | bad_flags &= page->flags; | ||
447 | if (bad_flags) | ||
448 | pr_alert("bad because of flags: %#lx(%pGp)\n", | ||
449 | bad_flags, &bad_flags); | ||
450 | dump_page_owner(page); | ||
432 | 451 | ||
433 | print_modules(); | 452 | print_modules(); |
434 | dump_stack(); | 453 | dump_stack(); |
@@ -477,7 +496,8 @@ void prep_compound_page(struct page *page, unsigned int order) | |||
477 | 496 | ||
478 | #ifdef CONFIG_DEBUG_PAGEALLOC | 497 | #ifdef CONFIG_DEBUG_PAGEALLOC |
479 | unsigned int _debug_guardpage_minorder; | 498 | unsigned int _debug_guardpage_minorder; |
480 | bool _debug_pagealloc_enabled __read_mostly; | 499 | bool _debug_pagealloc_enabled __read_mostly |
500 | = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); | ||
481 | bool _debug_guardpage_enabled __read_mostly; | 501 | bool _debug_guardpage_enabled __read_mostly; |
482 | 502 | ||
483 | static int __init early_debug_pagealloc(char *buf) | 503 | static int __init early_debug_pagealloc(char *buf) |
@@ -488,6 +508,9 @@ static int __init early_debug_pagealloc(char *buf) | |||
488 | if (strcmp(buf, "on") == 0) | 508 | if (strcmp(buf, "on") == 0) |
489 | _debug_pagealloc_enabled = true; | 509 | _debug_pagealloc_enabled = true; |
490 | 510 | ||
511 | if (strcmp(buf, "off") == 0) | ||
512 | _debug_pagealloc_enabled = false; | ||
513 | |||
491 | return 0; | 514 | return 0; |
492 | } | 515 | } |
493 | early_param("debug_pagealloc", early_debug_pagealloc); | 516 | early_param("debug_pagealloc", early_debug_pagealloc); |
@@ -1002,6 +1025,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
1002 | PAGE_SIZE << order); | 1025 | PAGE_SIZE << order); |
1003 | } | 1026 | } |
1004 | arch_free_page(page, order); | 1027 | arch_free_page(page, order); |
1028 | kernel_poison_pages(page, 1 << order, 0); | ||
1005 | kernel_map_pages(page, 1 << order, 0); | 1029 | kernel_map_pages(page, 1 << order, 0); |
1006 | 1030 | ||
1007 | return true; | 1031 | return true; |
@@ -1104,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn, | |||
1104 | return __free_pages_boot_core(page, pfn, order); | 1128 | return __free_pages_boot_core(page, pfn, order); |
1105 | } | 1129 | } |
1106 | 1130 | ||
1131 | /* | ||
1132 | * Check that the whole (or subset of) a pageblock given by the interval of | ||
1133 | * [start_pfn, end_pfn) is valid and within the same zone, before scanning it | ||
1134 | * with the migration of free compaction scanner. The scanners then need to | ||
1135 | * use only pfn_valid_within() check for arches that allow holes within | ||
1136 | * pageblocks. | ||
1137 | * | ||
1138 | * Return struct page pointer of start_pfn, or NULL if checks were not passed. | ||
1139 | * | ||
1140 | * It's possible on some configurations to have a setup like node0 node1 node0 | ||
1141 | * i.e. it's possible that all pages within a zones range of pages do not | ||
1142 | * belong to a single zone. We assume that a border between node0 and node1 | ||
1143 | * can occur within a single pageblock, but not a node0 node1 node0 | ||
1144 | * interleaving within a single pageblock. It is therefore sufficient to check | ||
1145 | * the first and last page of a pageblock and avoid checking each individual | ||
1146 | * page in a pageblock. | ||
1147 | */ | ||
1148 | struct page *__pageblock_pfn_to_page(unsigned long start_pfn, | ||
1149 | unsigned long end_pfn, struct zone *zone) | ||
1150 | { | ||
1151 | struct page *start_page; | ||
1152 | struct page *end_page; | ||
1153 | |||
1154 | /* end_pfn is one past the range we are checking */ | ||
1155 | end_pfn--; | ||
1156 | |||
1157 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ||
1158 | return NULL; | ||
1159 | |||
1160 | start_page = pfn_to_page(start_pfn); | ||
1161 | |||
1162 | if (page_zone(start_page) != zone) | ||
1163 | return NULL; | ||
1164 | |||
1165 | end_page = pfn_to_page(end_pfn); | ||
1166 | |||
1167 | /* This gives a shorter code than deriving page_zone(end_page) */ | ||
1168 | if (page_zone_id(start_page) != page_zone_id(end_page)) | ||
1169 | return NULL; | ||
1170 | |||
1171 | return start_page; | ||
1172 | } | ||
1173 | |||
1174 | void set_zone_contiguous(struct zone *zone) | ||
1175 | { | ||
1176 | unsigned long block_start_pfn = zone->zone_start_pfn; | ||
1177 | unsigned long block_end_pfn; | ||
1178 | |||
1179 | block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages); | ||
1180 | for (; block_start_pfn < zone_end_pfn(zone); | ||
1181 | block_start_pfn = block_end_pfn, | ||
1182 | block_end_pfn += pageblock_nr_pages) { | ||
1183 | |||
1184 | block_end_pfn = min(block_end_pfn, zone_end_pfn(zone)); | ||
1185 | |||
1186 | if (!__pageblock_pfn_to_page(block_start_pfn, | ||
1187 | block_end_pfn, zone)) | ||
1188 | return; | ||
1189 | } | ||
1190 | |||
1191 | /* We confirm that there is no hole */ | ||
1192 | zone->contiguous = true; | ||
1193 | } | ||
1194 | |||
1195 | void clear_zone_contiguous(struct zone *zone) | ||
1196 | { | ||
1197 | zone->contiguous = false; | ||
1198 | } | ||
1199 | |||
1107 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1200 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
1108 | static void __init deferred_free_range(struct page *page, | 1201 | static void __init deferred_free_range(struct page *page, |
1109 | unsigned long pfn, int nr_pages) | 1202 | unsigned long pfn, int nr_pages) |
@@ -1254,9 +1347,13 @@ free_range: | |||
1254 | pgdat_init_report_one_done(); | 1347 | pgdat_init_report_one_done(); |
1255 | return 0; | 1348 | return 0; |
1256 | } | 1349 | } |
1350 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
1257 | 1351 | ||
1258 | void __init page_alloc_init_late(void) | 1352 | void __init page_alloc_init_late(void) |
1259 | { | 1353 | { |
1354 | struct zone *zone; | ||
1355 | |||
1356 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
1260 | int nid; | 1357 | int nid; |
1261 | 1358 | ||
1262 | /* There will be num_node_state(N_MEMORY) threads */ | 1359 | /* There will be num_node_state(N_MEMORY) threads */ |
@@ -1270,8 +1367,11 @@ void __init page_alloc_init_late(void) | |||
1270 | 1367 | ||
1271 | /* Reinit limits that are based on free pages after the kernel is up */ | 1368 | /* Reinit limits that are based on free pages after the kernel is up */ |
1272 | files_maxfiles_init(); | 1369 | files_maxfiles_init(); |
1370 | #endif | ||
1371 | |||
1372 | for_each_populated_zone(zone) | ||
1373 | set_zone_contiguous(zone); | ||
1273 | } | 1374 | } |
1274 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
1275 | 1375 | ||
1276 | #ifdef CONFIG_CMA | 1376 | #ifdef CONFIG_CMA |
1277 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ | 1377 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ |
@@ -1381,15 +1481,24 @@ static inline int check_new_page(struct page *page) | |||
1381 | return 0; | 1481 | return 0; |
1382 | } | 1482 | } |
1383 | 1483 | ||
1484 | static inline bool free_pages_prezeroed(bool poisoned) | ||
1485 | { | ||
1486 | return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && | ||
1487 | page_poisoning_enabled() && poisoned; | ||
1488 | } | ||
1489 | |||
1384 | static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, | 1490 | static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, |
1385 | int alloc_flags) | 1491 | int alloc_flags) |
1386 | { | 1492 | { |
1387 | int i; | 1493 | int i; |
1494 | bool poisoned = true; | ||
1388 | 1495 | ||
1389 | for (i = 0; i < (1 << order); i++) { | 1496 | for (i = 0; i < (1 << order); i++) { |
1390 | struct page *p = page + i; | 1497 | struct page *p = page + i; |
1391 | if (unlikely(check_new_page(p))) | 1498 | if (unlikely(check_new_page(p))) |
1392 | return 1; | 1499 | return 1; |
1500 | if (poisoned) | ||
1501 | poisoned &= page_is_poisoned(p); | ||
1393 | } | 1502 | } |
1394 | 1503 | ||
1395 | set_page_private(page, 0); | 1504 | set_page_private(page, 0); |
@@ -1397,9 +1506,10 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, | |||
1397 | 1506 | ||
1398 | arch_alloc_page(page, order); | 1507 | arch_alloc_page(page, order); |
1399 | kernel_map_pages(page, 1 << order, 1); | 1508 | kernel_map_pages(page, 1 << order, 1); |
1509 | kernel_poison_pages(page, 1 << order, 1); | ||
1400 | kasan_alloc_pages(page, order); | 1510 | kasan_alloc_pages(page, order); |
1401 | 1511 | ||
1402 | if (gfp_flags & __GFP_ZERO) | 1512 | if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) |
1403 | for (i = 0; i < (1 << order); i++) | 1513 | for (i = 0; i < (1 << order); i++) |
1404 | clear_highpage(page + i); | 1514 | clear_highpage(page + i); |
1405 | 1515 | ||
@@ -2690,9 +2800,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) | |||
2690 | va_end(args); | 2800 | va_end(args); |
2691 | } | 2801 | } |
2692 | 2802 | ||
2693 | pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n", | 2803 | pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n", |
2694 | current->comm, order, gfp_mask); | 2804 | current->comm, order, gfp_mask, &gfp_mask); |
2695 | |||
2696 | dump_stack(); | 2805 | dump_stack(); |
2697 | if (!should_suppress_show_mem()) | 2806 | if (!should_suppress_show_mem()) |
2698 | show_mem(filter); | 2807 | show_mem(filter); |
@@ -4491,6 +4600,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4491 | pg_data_t *pgdat = NODE_DATA(nid); | 4600 | pg_data_t *pgdat = NODE_DATA(nid); |
4492 | unsigned long pfn; | 4601 | unsigned long pfn; |
4493 | unsigned long nr_initialised = 0; | 4602 | unsigned long nr_initialised = 0; |
4603 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
4604 | struct memblock_region *r = NULL, *tmp; | ||
4605 | #endif | ||
4494 | 4606 | ||
4495 | if (highest_memmap_pfn < end_pfn - 1) | 4607 | if (highest_memmap_pfn < end_pfn - 1) |
4496 | highest_memmap_pfn = end_pfn - 1; | 4608 | highest_memmap_pfn = end_pfn - 1; |
@@ -4504,20 +4616,51 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4504 | 4616 | ||
4505 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 4617 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
4506 | /* | 4618 | /* |
4507 | * There can be holes in boot-time mem_map[]s | 4619 | * There can be holes in boot-time mem_map[]s handed to this |
4508 | * handed to this function. They do not | 4620 | * function. They do not exist on hotplugged memory. |
4509 | * exist on hotplugged memory. | ||
4510 | */ | 4621 | */ |
4511 | if (context == MEMMAP_EARLY) { | 4622 | if (context != MEMMAP_EARLY) |
4512 | if (!early_pfn_valid(pfn)) | 4623 | goto not_early; |
4624 | |||
4625 | if (!early_pfn_valid(pfn)) | ||
4626 | continue; | ||
4627 | if (!early_pfn_in_nid(pfn, nid)) | ||
4628 | continue; | ||
4629 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) | ||
4630 | break; | ||
4631 | |||
4632 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
4633 | /* | ||
4634 | * If not mirrored_kernelcore and ZONE_MOVABLE exists, range | ||
4635 | * from zone_movable_pfn[nid] to end of each node should be | ||
4636 | * ZONE_MOVABLE not ZONE_NORMAL. skip it. | ||
4637 | */ | ||
4638 | if (!mirrored_kernelcore && zone_movable_pfn[nid]) | ||
4639 | if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid]) | ||
4513 | continue; | 4640 | continue; |
4514 | if (!early_pfn_in_nid(pfn, nid)) | 4641 | |
4642 | /* | ||
4643 | * Check given memblock attribute by firmware which can affect | ||
4644 | * kernel memory layout. If zone==ZONE_MOVABLE but memory is | ||
4645 | * mirrored, it's an overlapped memmap init. skip it. | ||
4646 | */ | ||
4647 | if (mirrored_kernelcore && zone == ZONE_MOVABLE) { | ||
4648 | if (!r || pfn >= memblock_region_memory_end_pfn(r)) { | ||
4649 | for_each_memblock(memory, tmp) | ||
4650 | if (pfn < memblock_region_memory_end_pfn(tmp)) | ||
4651 | break; | ||
4652 | r = tmp; | ||
4653 | } | ||
4654 | if (pfn >= memblock_region_memory_base_pfn(r) && | ||
4655 | memblock_is_mirror(r)) { | ||
4656 | /* already initialized as NORMAL */ | ||
4657 | pfn = memblock_region_memory_end_pfn(r); | ||
4515 | continue; | 4658 | continue; |
4516 | if (!update_defer_init(pgdat, pfn, end_pfn, | 4659 | } |
4517 | &nr_initialised)) | ||
4518 | break; | ||
4519 | } | 4660 | } |
4661 | #endif | ||
4520 | 4662 | ||
4663 | not_early: | ||
4521 | /* | 4664 | /* |
4522 | * Mark the block movable so that blocks are reserved for | 4665 | * Mark the block movable so that blocks are reserved for |
4523 | * movable at startup. This will force kernel allocations | 4666 | * movable at startup. This will force kernel allocations |
@@ -4934,11 +5077,6 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, | |||
4934 | *zone_end_pfn = min(node_end_pfn, | 5077 | *zone_end_pfn = min(node_end_pfn, |
4935 | arch_zone_highest_possible_pfn[movable_zone]); | 5078 | arch_zone_highest_possible_pfn[movable_zone]); |
4936 | 5079 | ||
4937 | /* Adjust for ZONE_MOVABLE starting within this range */ | ||
4938 | } else if (*zone_start_pfn < zone_movable_pfn[nid] && | ||
4939 | *zone_end_pfn > zone_movable_pfn[nid]) { | ||
4940 | *zone_end_pfn = zone_movable_pfn[nid]; | ||
4941 | |||
4942 | /* Check if this whole range is within ZONE_MOVABLE */ | 5080 | /* Check if this whole range is within ZONE_MOVABLE */ |
4943 | } else if (*zone_start_pfn >= zone_movable_pfn[nid]) | 5081 | } else if (*zone_start_pfn >= zone_movable_pfn[nid]) |
4944 | *zone_start_pfn = *zone_end_pfn; | 5082 | *zone_start_pfn = *zone_end_pfn; |
@@ -4953,31 +5091,31 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
4953 | unsigned long zone_type, | 5091 | unsigned long zone_type, |
4954 | unsigned long node_start_pfn, | 5092 | unsigned long node_start_pfn, |
4955 | unsigned long node_end_pfn, | 5093 | unsigned long node_end_pfn, |
5094 | unsigned long *zone_start_pfn, | ||
5095 | unsigned long *zone_end_pfn, | ||
4956 | unsigned long *ignored) | 5096 | unsigned long *ignored) |
4957 | { | 5097 | { |
4958 | unsigned long zone_start_pfn, zone_end_pfn; | ||
4959 | |||
4960 | /* When hotadd a new node from cpu_up(), the node should be empty */ | 5098 | /* When hotadd a new node from cpu_up(), the node should be empty */ |
4961 | if (!node_start_pfn && !node_end_pfn) | 5099 | if (!node_start_pfn && !node_end_pfn) |
4962 | return 0; | 5100 | return 0; |
4963 | 5101 | ||
4964 | /* Get the start and end of the zone */ | 5102 | /* Get the start and end of the zone */ |
4965 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; | 5103 | *zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; |
4966 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; | 5104 | *zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; |
4967 | adjust_zone_range_for_zone_movable(nid, zone_type, | 5105 | adjust_zone_range_for_zone_movable(nid, zone_type, |
4968 | node_start_pfn, node_end_pfn, | 5106 | node_start_pfn, node_end_pfn, |
4969 | &zone_start_pfn, &zone_end_pfn); | 5107 | zone_start_pfn, zone_end_pfn); |
4970 | 5108 | ||
4971 | /* Check that this node has pages within the zone's required range */ | 5109 | /* Check that this node has pages within the zone's required range */ |
4972 | if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) | 5110 | if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn) |
4973 | return 0; | 5111 | return 0; |
4974 | 5112 | ||
4975 | /* Move the zone boundaries inside the node if necessary */ | 5113 | /* Move the zone boundaries inside the node if necessary */ |
4976 | zone_end_pfn = min(zone_end_pfn, node_end_pfn); | 5114 | *zone_end_pfn = min(*zone_end_pfn, node_end_pfn); |
4977 | zone_start_pfn = max(zone_start_pfn, node_start_pfn); | 5115 | *zone_start_pfn = max(*zone_start_pfn, node_start_pfn); |
4978 | 5116 | ||
4979 | /* Return the spanned pages */ | 5117 | /* Return the spanned pages */ |
4980 | return zone_end_pfn - zone_start_pfn; | 5118 | return *zone_end_pfn - *zone_start_pfn; |
4981 | } | 5119 | } |
4982 | 5120 | ||
4983 | /* | 5121 | /* |
@@ -5023,6 +5161,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
5023 | unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; | 5161 | unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; |
5024 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; | 5162 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; |
5025 | unsigned long zone_start_pfn, zone_end_pfn; | 5163 | unsigned long zone_start_pfn, zone_end_pfn; |
5164 | unsigned long nr_absent; | ||
5026 | 5165 | ||
5027 | /* When hotadd a new node from cpu_up(), the node should be empty */ | 5166 | /* When hotadd a new node from cpu_up(), the node should be empty */ |
5028 | if (!node_start_pfn && !node_end_pfn) | 5167 | if (!node_start_pfn && !node_end_pfn) |
@@ -5034,7 +5173,39 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
5034 | adjust_zone_range_for_zone_movable(nid, zone_type, | 5173 | adjust_zone_range_for_zone_movable(nid, zone_type, |
5035 | node_start_pfn, node_end_pfn, | 5174 | node_start_pfn, node_end_pfn, |
5036 | &zone_start_pfn, &zone_end_pfn); | 5175 | &zone_start_pfn, &zone_end_pfn); |
5037 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | 5176 | nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); |
5177 | |||
5178 | /* | ||
5179 | * ZONE_MOVABLE handling. | ||
5180 | * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages | ||
5181 | * and vice versa. | ||
5182 | */ | ||
5183 | if (zone_movable_pfn[nid]) { | ||
5184 | if (mirrored_kernelcore) { | ||
5185 | unsigned long start_pfn, end_pfn; | ||
5186 | struct memblock_region *r; | ||
5187 | |||
5188 | for_each_memblock(memory, r) { | ||
5189 | start_pfn = clamp(memblock_region_memory_base_pfn(r), | ||
5190 | zone_start_pfn, zone_end_pfn); | ||
5191 | end_pfn = clamp(memblock_region_memory_end_pfn(r), | ||
5192 | zone_start_pfn, zone_end_pfn); | ||
5193 | |||
5194 | if (zone_type == ZONE_MOVABLE && | ||
5195 | memblock_is_mirror(r)) | ||
5196 | nr_absent += end_pfn - start_pfn; | ||
5197 | |||
5198 | if (zone_type == ZONE_NORMAL && | ||
5199 | !memblock_is_mirror(r)) | ||
5200 | nr_absent += end_pfn - start_pfn; | ||
5201 | } | ||
5202 | } else { | ||
5203 | if (zone_type == ZONE_NORMAL) | ||
5204 | nr_absent += node_end_pfn - zone_movable_pfn[nid]; | ||
5205 | } | ||
5206 | } | ||
5207 | |||
5208 | return nr_absent; | ||
5038 | } | 5209 | } |
5039 | 5210 | ||
5040 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 5211 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
@@ -5042,8 +5213,18 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
5042 | unsigned long zone_type, | 5213 | unsigned long zone_type, |
5043 | unsigned long node_start_pfn, | 5214 | unsigned long node_start_pfn, |
5044 | unsigned long node_end_pfn, | 5215 | unsigned long node_end_pfn, |
5216 | unsigned long *zone_start_pfn, | ||
5217 | unsigned long *zone_end_pfn, | ||
5045 | unsigned long *zones_size) | 5218 | unsigned long *zones_size) |
5046 | { | 5219 | { |
5220 | unsigned int zone; | ||
5221 | |||
5222 | *zone_start_pfn = node_start_pfn; | ||
5223 | for (zone = 0; zone < zone_type; zone++) | ||
5224 | *zone_start_pfn += zones_size[zone]; | ||
5225 | |||
5226 | *zone_end_pfn = *zone_start_pfn + zones_size[zone_type]; | ||
5227 | |||
5047 | return zones_size[zone_type]; | 5228 | return zones_size[zone_type]; |
5048 | } | 5229 | } |
5049 | 5230 | ||
@@ -5072,15 +5253,22 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | |||
5072 | 5253 | ||
5073 | for (i = 0; i < MAX_NR_ZONES; i++) { | 5254 | for (i = 0; i < MAX_NR_ZONES; i++) { |
5074 | struct zone *zone = pgdat->node_zones + i; | 5255 | struct zone *zone = pgdat->node_zones + i; |
5256 | unsigned long zone_start_pfn, zone_end_pfn; | ||
5075 | unsigned long size, real_size; | 5257 | unsigned long size, real_size; |
5076 | 5258 | ||
5077 | size = zone_spanned_pages_in_node(pgdat->node_id, i, | 5259 | size = zone_spanned_pages_in_node(pgdat->node_id, i, |
5078 | node_start_pfn, | 5260 | node_start_pfn, |
5079 | node_end_pfn, | 5261 | node_end_pfn, |
5262 | &zone_start_pfn, | ||
5263 | &zone_end_pfn, | ||
5080 | zones_size); | 5264 | zones_size); |
5081 | real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, | 5265 | real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, |
5082 | node_start_pfn, node_end_pfn, | 5266 | node_start_pfn, node_end_pfn, |
5083 | zholes_size); | 5267 | zholes_size); |
5268 | if (size) | ||
5269 | zone->zone_start_pfn = zone_start_pfn; | ||
5270 | else | ||
5271 | zone->zone_start_pfn = 0; | ||
5084 | zone->spanned_pages = size; | 5272 | zone->spanned_pages = size; |
5085 | zone->present_pages = real_size; | 5273 | zone->present_pages = real_size; |
5086 | 5274 | ||
@@ -5201,7 +5389,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
5201 | { | 5389 | { |
5202 | enum zone_type j; | 5390 | enum zone_type j; |
5203 | int nid = pgdat->node_id; | 5391 | int nid = pgdat->node_id; |
5204 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | ||
5205 | int ret; | 5392 | int ret; |
5206 | 5393 | ||
5207 | pgdat_resize_init(pgdat); | 5394 | pgdat_resize_init(pgdat); |
@@ -5222,6 +5409,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
5222 | for (j = 0; j < MAX_NR_ZONES; j++) { | 5409 | for (j = 0; j < MAX_NR_ZONES; j++) { |
5223 | struct zone *zone = pgdat->node_zones + j; | 5410 | struct zone *zone = pgdat->node_zones + j; |
5224 | unsigned long size, realsize, freesize, memmap_pages; | 5411 | unsigned long size, realsize, freesize, memmap_pages; |
5412 | unsigned long zone_start_pfn = zone->zone_start_pfn; | ||
5225 | 5413 | ||
5226 | size = zone->spanned_pages; | 5414 | size = zone->spanned_pages; |
5227 | realsize = freesize = zone->present_pages; | 5415 | realsize = freesize = zone->present_pages; |
@@ -5290,7 +5478,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
5290 | ret = init_currently_empty_zone(zone, zone_start_pfn, size); | 5478 | ret = init_currently_empty_zone(zone, zone_start_pfn, size); |
5291 | BUG_ON(ret); | 5479 | BUG_ON(ret); |
5292 | memmap_init(size, nid, j, zone_start_pfn); | 5480 | memmap_init(size, nid, j, zone_start_pfn); |
5293 | zone_start_pfn += size; | ||
5294 | } | 5481 | } |
5295 | } | 5482 | } |
5296 | 5483 | ||
@@ -5358,6 +5545,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
5358 | pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, | 5545 | pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, |
5359 | (u64)start_pfn << PAGE_SHIFT, | 5546 | (u64)start_pfn << PAGE_SHIFT, |
5360 | end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); | 5547 | end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); |
5548 | #else | ||
5549 | start_pfn = node_start_pfn; | ||
5361 | #endif | 5550 | #endif |
5362 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, | 5551 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, |
5363 | zones_size, zholes_size); | 5552 | zones_size, zholes_size); |
@@ -5529,6 +5718,36 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
5529 | } | 5718 | } |
5530 | 5719 | ||
5531 | /* | 5720 | /* |
5721 | * If kernelcore=mirror is specified, ignore movablecore option | ||
5722 | */ | ||
5723 | if (mirrored_kernelcore) { | ||
5724 | bool mem_below_4gb_not_mirrored = false; | ||
5725 | |||
5726 | for_each_memblock(memory, r) { | ||
5727 | if (memblock_is_mirror(r)) | ||
5728 | continue; | ||
5729 | |||
5730 | nid = r->nid; | ||
5731 | |||
5732 | usable_startpfn = memblock_region_memory_base_pfn(r); | ||
5733 | |||
5734 | if (usable_startpfn < 0x100000) { | ||
5735 | mem_below_4gb_not_mirrored = true; | ||
5736 | continue; | ||
5737 | } | ||
5738 | |||
5739 | zone_movable_pfn[nid] = zone_movable_pfn[nid] ? | ||
5740 | min(usable_startpfn, zone_movable_pfn[nid]) : | ||
5741 | usable_startpfn; | ||
5742 | } | ||
5743 | |||
5744 | if (mem_below_4gb_not_mirrored) | ||
5745 | pr_warn("This configuration results in unmirrored kernel memory."); | ||
5746 | |||
5747 | goto out2; | ||
5748 | } | ||
5749 | |||
5750 | /* | ||
5532 | * If movablecore=nn[KMG] was specified, calculate what size of | 5751 | * If movablecore=nn[KMG] was specified, calculate what size of |
5533 | * kernelcore that corresponds so that memory usable for | 5752 | * kernelcore that corresponds so that memory usable for |
5534 | * any allocation type is evenly spread. If both kernelcore | 5753 | * any allocation type is evenly spread. If both kernelcore |
@@ -5788,6 +6007,12 @@ static int __init cmdline_parse_core(char *p, unsigned long *core) | |||
5788 | */ | 6007 | */ |
5789 | static int __init cmdline_parse_kernelcore(char *p) | 6008 | static int __init cmdline_parse_kernelcore(char *p) |
5790 | { | 6009 | { |
6010 | /* parse kernelcore=mirror */ | ||
6011 | if (parse_option_str(p, "mirror")) { | ||
6012 | mirrored_kernelcore = true; | ||
6013 | return 0; | ||
6014 | } | ||
6015 | |||
5791 | return cmdline_parse_core(p, &required_kernelcore); | 6016 | return cmdline_parse_core(p, &required_kernelcore); |
5792 | } | 6017 | } |
5793 | 6018 | ||
diff --git a/mm/page_ext.c b/mm/page_ext.c index 292ca7b8debd..2d864e64f7fe 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c | |||
@@ -106,12 +106,15 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
106 | struct page_ext *base; | 106 | struct page_ext *base; |
107 | 107 | ||
108 | base = NODE_DATA(page_to_nid(page))->node_page_ext; | 108 | base = NODE_DATA(page_to_nid(page))->node_page_ext; |
109 | #ifdef CONFIG_DEBUG_VM | 109 | #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) |
110 | /* | 110 | /* |
111 | * The sanity checks the page allocator does upon freeing a | 111 | * The sanity checks the page allocator does upon freeing a |
112 | * page can reach here before the page_ext arrays are | 112 | * page can reach here before the page_ext arrays are |
113 | * allocated when feeding a range of pages to the allocator | 113 | * allocated when feeding a range of pages to the allocator |
114 | * for the first time during bootup or memory hotplug. | 114 | * for the first time during bootup or memory hotplug. |
115 | * | ||
116 | * This check is also necessary for ensuring page poisoning | ||
117 | * works as expected when enabled | ||
115 | */ | 118 | */ |
116 | if (unlikely(!base)) | 119 | if (unlikely(!base)) |
117 | return NULL; | 120 | return NULL; |
@@ -180,12 +183,15 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
180 | { | 183 | { |
181 | unsigned long pfn = page_to_pfn(page); | 184 | unsigned long pfn = page_to_pfn(page); |
182 | struct mem_section *section = __pfn_to_section(pfn); | 185 | struct mem_section *section = __pfn_to_section(pfn); |
183 | #ifdef CONFIG_DEBUG_VM | 186 | #if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) |
184 | /* | 187 | /* |
185 | * The sanity checks the page allocator does upon freeing a | 188 | * The sanity checks the page allocator does upon freeing a |
186 | * page can reach here before the page_ext arrays are | 189 | * page can reach here before the page_ext arrays are |
187 | * allocated when feeding a range of pages to the allocator | 190 | * allocated when feeding a range of pages to the allocator |
188 | * for the first time during bootup or memory hotplug. | 191 | * for the first time during bootup or memory hotplug. |
192 | * | ||
193 | * This check is also necessary for ensuring page poisoning | ||
194 | * works as expected when enabled | ||
189 | */ | 195 | */ |
190 | if (!section->page_ext) | 196 | if (!section->page_ext) |
191 | return NULL; | 197 | return NULL; |
diff --git a/mm/page_owner.c b/mm/page_owner.c index 983c3a10fa07..44ad1f00c4e1 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c | |||
@@ -5,10 +5,12 @@ | |||
5 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
6 | #include <linux/stacktrace.h> | 6 | #include <linux/stacktrace.h> |
7 | #include <linux/page_owner.h> | 7 | #include <linux/page_owner.h> |
8 | #include <linux/jump_label.h> | ||
9 | #include <linux/migrate.h> | ||
8 | #include "internal.h" | 10 | #include "internal.h" |
9 | 11 | ||
10 | static bool page_owner_disabled = true; | 12 | static bool page_owner_disabled = true; |
11 | bool page_owner_inited __read_mostly; | 13 | DEFINE_STATIC_KEY_FALSE(page_owner_inited); |
12 | 14 | ||
13 | static void init_early_allocated_pages(void); | 15 | static void init_early_allocated_pages(void); |
14 | 16 | ||
@@ -37,7 +39,7 @@ static void init_page_owner(void) | |||
37 | if (page_owner_disabled) | 39 | if (page_owner_disabled) |
38 | return; | 40 | return; |
39 | 41 | ||
40 | page_owner_inited = true; | 42 | static_branch_enable(&page_owner_inited); |
41 | init_early_allocated_pages(); | 43 | init_early_allocated_pages(); |
42 | } | 44 | } |
43 | 45 | ||
@@ -72,10 +74,18 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) | |||
72 | page_ext->order = order; | 74 | page_ext->order = order; |
73 | page_ext->gfp_mask = gfp_mask; | 75 | page_ext->gfp_mask = gfp_mask; |
74 | page_ext->nr_entries = trace.nr_entries; | 76 | page_ext->nr_entries = trace.nr_entries; |
77 | page_ext->last_migrate_reason = -1; | ||
75 | 78 | ||
76 | __set_bit(PAGE_EXT_OWNER, &page_ext->flags); | 79 | __set_bit(PAGE_EXT_OWNER, &page_ext->flags); |
77 | } | 80 | } |
78 | 81 | ||
82 | void __set_page_owner_migrate_reason(struct page *page, int reason) | ||
83 | { | ||
84 | struct page_ext *page_ext = lookup_page_ext(page); | ||
85 | |||
86 | page_ext->last_migrate_reason = reason; | ||
87 | } | ||
88 | |||
79 | gfp_t __get_page_owner_gfp(struct page *page) | 89 | gfp_t __get_page_owner_gfp(struct page *page) |
80 | { | 90 | { |
81 | struct page_ext *page_ext = lookup_page_ext(page); | 91 | struct page_ext *page_ext = lookup_page_ext(page); |
@@ -83,6 +93,31 @@ gfp_t __get_page_owner_gfp(struct page *page) | |||
83 | return page_ext->gfp_mask; | 93 | return page_ext->gfp_mask; |
84 | } | 94 | } |
85 | 95 | ||
96 | void __copy_page_owner(struct page *oldpage, struct page *newpage) | ||
97 | { | ||
98 | struct page_ext *old_ext = lookup_page_ext(oldpage); | ||
99 | struct page_ext *new_ext = lookup_page_ext(newpage); | ||
100 | int i; | ||
101 | |||
102 | new_ext->order = old_ext->order; | ||
103 | new_ext->gfp_mask = old_ext->gfp_mask; | ||
104 | new_ext->nr_entries = old_ext->nr_entries; | ||
105 | |||
106 | for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++) | ||
107 | new_ext->trace_entries[i] = old_ext->trace_entries[i]; | ||
108 | |||
109 | /* | ||
110 | * We don't clear the bit on the oldpage as it's going to be freed | ||
111 | * after migration. Until then, the info can be useful in case of | ||
112 | * a bug, and the overal stats will be off a bit only temporarily. | ||
113 | * Also, migrate_misplaced_transhuge_page() can still fail the | ||
114 | * migration and then we want the oldpage to retain the info. But | ||
115 | * in that case we also don't need to explicitly clear the info from | ||
116 | * the new page, which will be freed. | ||
117 | */ | ||
118 | __set_bit(PAGE_EXT_OWNER, &new_ext->flags); | ||
119 | } | ||
120 | |||
86 | static ssize_t | 121 | static ssize_t |
87 | print_page_owner(char __user *buf, size_t count, unsigned long pfn, | 122 | print_page_owner(char __user *buf, size_t count, unsigned long pfn, |
88 | struct page *page, struct page_ext *page_ext) | 123 | struct page *page, struct page_ext *page_ext) |
@@ -100,8 +135,9 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
100 | return -ENOMEM; | 135 | return -ENOMEM; |
101 | 136 | ||
102 | ret = snprintf(kbuf, count, | 137 | ret = snprintf(kbuf, count, |
103 | "Page allocated via order %u, mask 0x%x\n", | 138 | "Page allocated via order %u, mask %#x(%pGg)\n", |
104 | page_ext->order, page_ext->gfp_mask); | 139 | page_ext->order, page_ext->gfp_mask, |
140 | &page_ext->gfp_mask); | ||
105 | 141 | ||
106 | if (ret >= count) | 142 | if (ret >= count) |
107 | goto err; | 143 | goto err; |
@@ -110,23 +146,12 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
110 | pageblock_mt = get_pfnblock_migratetype(page, pfn); | 146 | pageblock_mt = get_pfnblock_migratetype(page, pfn); |
111 | page_mt = gfpflags_to_migratetype(page_ext->gfp_mask); | 147 | page_mt = gfpflags_to_migratetype(page_ext->gfp_mask); |
112 | ret += snprintf(kbuf + ret, count - ret, | 148 | ret += snprintf(kbuf + ret, count - ret, |
113 | "PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n", | 149 | "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n", |
114 | pfn, | 150 | pfn, |
151 | migratetype_names[page_mt], | ||
115 | pfn >> pageblock_order, | 152 | pfn >> pageblock_order, |
116 | pageblock_mt, | 153 | migratetype_names[pageblock_mt], |
117 | pageblock_mt != page_mt ? "Fallback" : " ", | 154 | page->flags, &page->flags); |
118 | PageLocked(page) ? "K" : " ", | ||
119 | PageError(page) ? "E" : " ", | ||
120 | PageReferenced(page) ? "R" : " ", | ||
121 | PageUptodate(page) ? "U" : " ", | ||
122 | PageDirty(page) ? "D" : " ", | ||
123 | PageLRU(page) ? "L" : " ", | ||
124 | PageActive(page) ? "A" : " ", | ||
125 | PageSlab(page) ? "S" : " ", | ||
126 | PageWriteback(page) ? "W" : " ", | ||
127 | PageCompound(page) ? "C" : " ", | ||
128 | PageSwapCache(page) ? "B" : " ", | ||
129 | PageMappedToDisk(page) ? "M" : " "); | ||
130 | 155 | ||
131 | if (ret >= count) | 156 | if (ret >= count) |
132 | goto err; | 157 | goto err; |
@@ -135,6 +160,14 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, | |||
135 | if (ret >= count) | 160 | if (ret >= count) |
136 | goto err; | 161 | goto err; |
137 | 162 | ||
163 | if (page_ext->last_migrate_reason != -1) { | ||
164 | ret += snprintf(kbuf + ret, count - ret, | ||
165 | "Page has been migrated, last migrate reason: %s\n", | ||
166 | migrate_reason_names[page_ext->last_migrate_reason]); | ||
167 | if (ret >= count) | ||
168 | goto err; | ||
169 | } | ||
170 | |||
138 | ret += snprintf(kbuf + ret, count - ret, "\n"); | 171 | ret += snprintf(kbuf + ret, count - ret, "\n"); |
139 | if (ret >= count) | 172 | if (ret >= count) |
140 | goto err; | 173 | goto err; |
@@ -150,6 +183,31 @@ err: | |||
150 | return -ENOMEM; | 183 | return -ENOMEM; |
151 | } | 184 | } |
152 | 185 | ||
186 | void __dump_page_owner(struct page *page) | ||
187 | { | ||
188 | struct page_ext *page_ext = lookup_page_ext(page); | ||
189 | struct stack_trace trace = { | ||
190 | .nr_entries = page_ext->nr_entries, | ||
191 | .entries = &page_ext->trace_entries[0], | ||
192 | }; | ||
193 | gfp_t gfp_mask = page_ext->gfp_mask; | ||
194 | int mt = gfpflags_to_migratetype(gfp_mask); | ||
195 | |||
196 | if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { | ||
197 | pr_alert("page_owner info is not active (free page?)\n"); | ||
198 | return; | ||
199 | } | ||
200 | |||
201 | pr_alert("page allocated via order %u, migratetype %s, " | ||
202 | "gfp_mask %#x(%pGg)\n", page_ext->order, | ||
203 | migratetype_names[mt], gfp_mask, &gfp_mask); | ||
204 | print_stack_trace(&trace, 0); | ||
205 | |||
206 | if (page_ext->last_migrate_reason != -1) | ||
207 | pr_alert("page has been migrated, last migrate reason: %s\n", | ||
208 | migrate_reason_names[page_ext->last_migrate_reason]); | ||
209 | } | ||
210 | |||
153 | static ssize_t | 211 | static ssize_t |
154 | read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 212 | read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
155 | { | 213 | { |
@@ -157,7 +215,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
157 | struct page *page; | 215 | struct page *page; |
158 | struct page_ext *page_ext; | 216 | struct page_ext *page_ext; |
159 | 217 | ||
160 | if (!page_owner_inited) | 218 | if (!static_branch_unlikely(&page_owner_inited)) |
161 | return -EINVAL; | 219 | return -EINVAL; |
162 | 220 | ||
163 | page = NULL; | 221 | page = NULL; |
@@ -305,7 +363,7 @@ static int __init pageowner_init(void) | |||
305 | { | 363 | { |
306 | struct dentry *dentry; | 364 | struct dentry *dentry; |
307 | 365 | ||
308 | if (!page_owner_inited) { | 366 | if (!static_branch_unlikely(&page_owner_inited)) { |
309 | pr_info("page_owner is disabled\n"); | 367 | pr_info("page_owner is disabled\n"); |
310 | return 0; | 368 | return 0; |
311 | } | 369 | } |
diff --git a/mm/debug-pagealloc.c b/mm/page_poison.c index 5bf5906ce13b..479e7ea2bea6 100644 --- a/mm/debug-pagealloc.c +++ b/mm/page_poison.c | |||
@@ -6,22 +6,48 @@ | |||
6 | #include <linux/poison.h> | 6 | #include <linux/poison.h> |
7 | #include <linux/ratelimit.h> | 7 | #include <linux/ratelimit.h> |
8 | 8 | ||
9 | static bool page_poisoning_enabled __read_mostly; | 9 | static bool __page_poisoning_enabled __read_mostly; |
10 | static bool want_page_poisoning __read_mostly; | ||
10 | 11 | ||
11 | static bool need_page_poisoning(void) | 12 | static int early_page_poison_param(char *buf) |
12 | { | 13 | { |
13 | if (!debug_pagealloc_enabled()) | 14 | if (!buf) |
14 | return false; | 15 | return -EINVAL; |
16 | |||
17 | if (strcmp(buf, "on") == 0) | ||
18 | want_page_poisoning = true; | ||
19 | else if (strcmp(buf, "off") == 0) | ||
20 | want_page_poisoning = false; | ||
15 | 21 | ||
16 | return true; | 22 | return 0; |
23 | } | ||
24 | early_param("page_poison", early_page_poison_param); | ||
25 | |||
26 | bool page_poisoning_enabled(void) | ||
27 | { | ||
28 | return __page_poisoning_enabled; | ||
29 | } | ||
30 | |||
31 | static bool need_page_poisoning(void) | ||
32 | { | ||
33 | return want_page_poisoning; | ||
17 | } | 34 | } |
18 | 35 | ||
19 | static void init_page_poisoning(void) | 36 | static void init_page_poisoning(void) |
20 | { | 37 | { |
21 | if (!debug_pagealloc_enabled()) | 38 | /* |
22 | return; | 39 | * page poisoning is debug page alloc for some arches. If either |
40 | * of those options are enabled, enable poisoning | ||
41 | */ | ||
42 | if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC)) { | ||
43 | if (!want_page_poisoning && !debug_pagealloc_enabled()) | ||
44 | return; | ||
45 | } else { | ||
46 | if (!want_page_poisoning) | ||
47 | return; | ||
48 | } | ||
23 | 49 | ||
24 | page_poisoning_enabled = true; | 50 | __page_poisoning_enabled = true; |
25 | } | 51 | } |
26 | 52 | ||
27 | struct page_ext_operations page_poisoning_ops = { | 53 | struct page_ext_operations page_poisoning_ops = { |
@@ -45,11 +71,14 @@ static inline void clear_page_poison(struct page *page) | |||
45 | __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); | 71 | __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); |
46 | } | 72 | } |
47 | 73 | ||
48 | static inline bool page_poison(struct page *page) | 74 | bool page_is_poisoned(struct page *page) |
49 | { | 75 | { |
50 | struct page_ext *page_ext; | 76 | struct page_ext *page_ext; |
51 | 77 | ||
52 | page_ext = lookup_page_ext(page); | 78 | page_ext = lookup_page_ext(page); |
79 | if (!page_ext) | ||
80 | return false; | ||
81 | |||
53 | return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); | 82 | return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); |
54 | } | 83 | } |
55 | 84 | ||
@@ -83,6 +112,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) | |||
83 | unsigned char *start; | 112 | unsigned char *start; |
84 | unsigned char *end; | 113 | unsigned char *end; |
85 | 114 | ||
115 | if (IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY)) | ||
116 | return; | ||
117 | |||
86 | start = memchr_inv(mem, PAGE_POISON, bytes); | 118 | start = memchr_inv(mem, PAGE_POISON, bytes); |
87 | if (!start) | 119 | if (!start) |
88 | return; | 120 | return; |
@@ -95,9 +127,9 @@ static void check_poison_mem(unsigned char *mem, size_t bytes) | |||
95 | if (!__ratelimit(&ratelimit)) | 127 | if (!__ratelimit(&ratelimit)) |
96 | return; | 128 | return; |
97 | else if (start == end && single_bit_flip(*start, PAGE_POISON)) | 129 | else if (start == end && single_bit_flip(*start, PAGE_POISON)) |
98 | printk(KERN_ERR "pagealloc: single bit error\n"); | 130 | pr_err("pagealloc: single bit error\n"); |
99 | else | 131 | else |
100 | printk(KERN_ERR "pagealloc: memory corruption\n"); | 132 | pr_err("pagealloc: memory corruption\n"); |
101 | 133 | ||
102 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, | 134 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, |
103 | end - start + 1, 1); | 135 | end - start + 1, 1); |
@@ -108,7 +140,7 @@ static void unpoison_page(struct page *page) | |||
108 | { | 140 | { |
109 | void *addr; | 141 | void *addr; |
110 | 142 | ||
111 | if (!page_poison(page)) | 143 | if (!page_is_poisoned(page)) |
112 | return; | 144 | return; |
113 | 145 | ||
114 | addr = kmap_atomic(page); | 146 | addr = kmap_atomic(page); |
@@ -125,9 +157,9 @@ static void unpoison_pages(struct page *page, int n) | |||
125 | unpoison_page(page + i); | 157 | unpoison_page(page + i); |
126 | } | 158 | } |
127 | 159 | ||
128 | void __kernel_map_pages(struct page *page, int numpages, int enable) | 160 | void kernel_poison_pages(struct page *page, int numpages, int enable) |
129 | { | 161 | { |
130 | if (!page_poisoning_enabled) | 162 | if (!page_poisoning_enabled()) |
131 | return; | 163 | return; |
132 | 164 | ||
133 | if (enable) | 165 | if (enable) |
@@ -135,3 +167,10 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) | |||
135 | else | 167 | else |
136 | poison_pages(page, numpages); | 168 | poison_pages(page, numpages); |
137 | } | 169 | } |
170 | |||
171 | #ifndef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||
172 | void __kernel_map_pages(struct page *page, int numpages, int enable) | ||
173 | { | ||
174 | /* This function does nothing, all work is done via poison pages */ | ||
175 | } | ||
176 | #endif | ||
@@ -1287,21 +1287,17 @@ void page_add_new_anon_rmap(struct page *page, | |||
1287 | */ | 1287 | */ |
1288 | void page_add_file_rmap(struct page *page) | 1288 | void page_add_file_rmap(struct page *page) |
1289 | { | 1289 | { |
1290 | struct mem_cgroup *memcg; | 1290 | lock_page_memcg(page); |
1291 | |||
1292 | memcg = mem_cgroup_begin_page_stat(page); | ||
1293 | if (atomic_inc_and_test(&page->_mapcount)) { | 1291 | if (atomic_inc_and_test(&page->_mapcount)) { |
1294 | __inc_zone_page_state(page, NR_FILE_MAPPED); | 1292 | __inc_zone_page_state(page, NR_FILE_MAPPED); |
1295 | mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | 1293 | mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
1296 | } | 1294 | } |
1297 | mem_cgroup_end_page_stat(memcg); | 1295 | unlock_page_memcg(page); |
1298 | } | 1296 | } |
1299 | 1297 | ||
1300 | static void page_remove_file_rmap(struct page *page) | 1298 | static void page_remove_file_rmap(struct page *page) |
1301 | { | 1299 | { |
1302 | struct mem_cgroup *memcg; | 1300 | lock_page_memcg(page); |
1303 | |||
1304 | memcg = mem_cgroup_begin_page_stat(page); | ||
1305 | 1301 | ||
1306 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ | 1302 | /* Hugepages are not counted in NR_FILE_MAPPED for now. */ |
1307 | if (unlikely(PageHuge(page))) { | 1303 | if (unlikely(PageHuge(page))) { |
@@ -1320,12 +1316,12 @@ static void page_remove_file_rmap(struct page *page) | |||
1320 | * pte lock(a spinlock) is held, which implies preemption disabled. | 1316 | * pte lock(a spinlock) is held, which implies preemption disabled. |
1321 | */ | 1317 | */ |
1322 | __dec_zone_page_state(page, NR_FILE_MAPPED); | 1318 | __dec_zone_page_state(page, NR_FILE_MAPPED); |
1323 | mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | 1319 | mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); |
1324 | 1320 | ||
1325 | if (unlikely(PageMlocked(page))) | 1321 | if (unlikely(PageMlocked(page))) |
1326 | clear_page_mlock(page); | 1322 | clear_page_mlock(page); |
1327 | out: | 1323 | out: |
1328 | mem_cgroup_end_page_stat(memcg); | 1324 | unlock_page_memcg(page); |
1329 | } | 1325 | } |
1330 | 1326 | ||
1331 | static void page_remove_anon_compound_rmap(struct page *page) | 1327 | static void page_remove_anon_compound_rmap(struct page *page) |
diff --git a/mm/shmem.c b/mm/shmem.c index 440e2a7e6c1c..1acfdbc4bd9e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1116,7 +1116,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
1116 | */ | 1116 | */ |
1117 | oldpage = newpage; | 1117 | oldpage = newpage; |
1118 | } else { | 1118 | } else { |
1119 | mem_cgroup_replace_page(oldpage, newpage); | 1119 | mem_cgroup_migrate(oldpage, newpage); |
1120 | lru_cache_add_anon(newpage); | 1120 | lru_cache_add_anon(newpage); |
1121 | *pagep = newpage; | 1121 | *pagep = newpage; |
1122 | } | 1122 | } |
@@ -169,12 +169,6 @@ typedef unsigned short freelist_idx_t; | |||
169 | #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) | 169 | #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1) |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * true if a page was allocated from pfmemalloc reserves for network-based | ||
173 | * swap | ||
174 | */ | ||
175 | static bool pfmemalloc_active __read_mostly; | ||
176 | |||
177 | /* | ||
178 | * struct array_cache | 172 | * struct array_cache |
179 | * | 173 | * |
180 | * Purpose: | 174 | * Purpose: |
@@ -195,10 +189,6 @@ struct array_cache { | |||
195 | * Must have this definition in here for the proper | 189 | * Must have this definition in here for the proper |
196 | * alignment of array_cache. Also simplifies accessing | 190 | * alignment of array_cache. Also simplifies accessing |
197 | * the entries. | 191 | * the entries. |
198 | * | ||
199 | * Entries should not be directly dereferenced as | ||
200 | * entries belonging to slabs marked pfmemalloc will | ||
201 | * have the lower bits set SLAB_OBJ_PFMEMALLOC | ||
202 | */ | 192 | */ |
203 | }; | 193 | }; |
204 | 194 | ||
@@ -207,33 +197,6 @@ struct alien_cache { | |||
207 | struct array_cache ac; | 197 | struct array_cache ac; |
208 | }; | 198 | }; |
209 | 199 | ||
210 | #define SLAB_OBJ_PFMEMALLOC 1 | ||
211 | static inline bool is_obj_pfmemalloc(void *objp) | ||
212 | { | ||
213 | return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; | ||
214 | } | ||
215 | |||
216 | static inline void set_obj_pfmemalloc(void **objp) | ||
217 | { | ||
218 | *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); | ||
219 | return; | ||
220 | } | ||
221 | |||
222 | static inline void clear_obj_pfmemalloc(void **objp) | ||
223 | { | ||
224 | *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * bootstrap: The caches do not work without cpuarrays anymore, but the | ||
229 | * cpuarrays are allocated from the generic caches... | ||
230 | */ | ||
231 | #define BOOT_CPUCACHE_ENTRIES 1 | ||
232 | struct arraycache_init { | ||
233 | struct array_cache cache; | ||
234 | void *entries[BOOT_CPUCACHE_ENTRIES]; | ||
235 | }; | ||
236 | |||
237 | /* | 200 | /* |
238 | * Need this for bootstrapping a per node allocator. | 201 | * Need this for bootstrapping a per node allocator. |
239 | */ | 202 | */ |
@@ -280,9 +243,10 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) | |||
280 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 243 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
281 | } while (0) | 244 | } while (0) |
282 | 245 | ||
246 | #define CFLGS_OBJFREELIST_SLAB (0x40000000UL) | ||
283 | #define CFLGS_OFF_SLAB (0x80000000UL) | 247 | #define CFLGS_OFF_SLAB (0x80000000UL) |
248 | #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB) | ||
284 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 249 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
285 | #define OFF_SLAB_MIN_SIZE (max_t(size_t, PAGE_SIZE >> 5, KMALLOC_MIN_SIZE + 1)) | ||
286 | 250 | ||
287 | #define BATCHREFILL_LIMIT 16 | 251 | #define BATCHREFILL_LIMIT 16 |
288 | /* | 252 | /* |
@@ -390,36 +354,26 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
390 | 354 | ||
391 | #endif | 355 | #endif |
392 | 356 | ||
393 | #define OBJECT_FREE (0) | ||
394 | #define OBJECT_ACTIVE (1) | ||
395 | |||
396 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 357 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
397 | 358 | ||
398 | static void set_obj_status(struct page *page, int idx, int val) | 359 | static inline bool is_store_user_clean(struct kmem_cache *cachep) |
399 | { | 360 | { |
400 | int freelist_size; | 361 | return atomic_read(&cachep->store_user_clean) == 1; |
401 | char *status; | ||
402 | struct kmem_cache *cachep = page->slab_cache; | ||
403 | |||
404 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
405 | status = (char *)page->freelist + freelist_size; | ||
406 | status[idx] = val; | ||
407 | } | 362 | } |
408 | 363 | ||
409 | static inline unsigned int get_obj_status(struct page *page, int idx) | 364 | static inline void set_store_user_clean(struct kmem_cache *cachep) |
410 | { | 365 | { |
411 | int freelist_size; | 366 | atomic_set(&cachep->store_user_clean, 1); |
412 | char *status; | 367 | } |
413 | struct kmem_cache *cachep = page->slab_cache; | ||
414 | |||
415 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
416 | status = (char *)page->freelist + freelist_size; | ||
417 | 368 | ||
418 | return status[idx]; | 369 | static inline void set_store_user_dirty(struct kmem_cache *cachep) |
370 | { | ||
371 | if (is_store_user_clean(cachep)) | ||
372 | atomic_set(&cachep->store_user_clean, 0); | ||
419 | } | 373 | } |
420 | 374 | ||
421 | #else | 375 | #else |
422 | static inline void set_obj_status(struct page *page, int idx, int val) {} | 376 | static inline void set_store_user_dirty(struct kmem_cache *cachep) {} |
423 | 377 | ||
424 | #endif | 378 | #endif |
425 | 379 | ||
@@ -457,6 +411,7 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, | |||
457 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | 411 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); |
458 | } | 412 | } |
459 | 413 | ||
414 | #define BOOT_CPUCACHE_ENTRIES 1 | ||
460 | /* internal cache of cache description objs */ | 415 | /* internal cache of cache description objs */ |
461 | static struct kmem_cache kmem_cache_boot = { | 416 | static struct kmem_cache kmem_cache_boot = { |
462 | .batchcount = 1, | 417 | .batchcount = 1, |
@@ -475,61 +430,13 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
475 | return this_cpu_ptr(cachep->cpu_cache); | 430 | return this_cpu_ptr(cachep->cpu_cache); |
476 | } | 431 | } |
477 | 432 | ||
478 | static size_t calculate_freelist_size(int nr_objs, size_t align) | ||
479 | { | ||
480 | size_t freelist_size; | ||
481 | |||
482 | freelist_size = nr_objs * sizeof(freelist_idx_t); | ||
483 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
484 | freelist_size += nr_objs * sizeof(char); | ||
485 | |||
486 | if (align) | ||
487 | freelist_size = ALIGN(freelist_size, align); | ||
488 | |||
489 | return freelist_size; | ||
490 | } | ||
491 | |||
492 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | ||
493 | size_t idx_size, size_t align) | ||
494 | { | ||
495 | int nr_objs; | ||
496 | size_t remained_size; | ||
497 | size_t freelist_size; | ||
498 | int extra_space = 0; | ||
499 | |||
500 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
501 | extra_space = sizeof(char); | ||
502 | /* | ||
503 | * Ignore padding for the initial guess. The padding | ||
504 | * is at most @align-1 bytes, and @buffer_size is at | ||
505 | * least @align. In the worst case, this result will | ||
506 | * be one greater than the number of objects that fit | ||
507 | * into the memory allocation when taking the padding | ||
508 | * into account. | ||
509 | */ | ||
510 | nr_objs = slab_size / (buffer_size + idx_size + extra_space); | ||
511 | |||
512 | /* | ||
513 | * This calculated number will be either the right | ||
514 | * amount, or one greater than what we want. | ||
515 | */ | ||
516 | remained_size = slab_size - nr_objs * buffer_size; | ||
517 | freelist_size = calculate_freelist_size(nr_objs, align); | ||
518 | if (remained_size < freelist_size) | ||
519 | nr_objs--; | ||
520 | |||
521 | return nr_objs; | ||
522 | } | ||
523 | |||
524 | /* | 433 | /* |
525 | * Calculate the number of objects and left-over bytes for a given buffer size. | 434 | * Calculate the number of objects and left-over bytes for a given buffer size. |
526 | */ | 435 | */ |
527 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 436 | static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size, |
528 | size_t align, int flags, size_t *left_over, | 437 | unsigned long flags, size_t *left_over) |
529 | unsigned int *num) | ||
530 | { | 438 | { |
531 | int nr_objs; | 439 | unsigned int num; |
532 | size_t mgmt_size; | ||
533 | size_t slab_size = PAGE_SIZE << gfporder; | 440 | size_t slab_size = PAGE_SIZE << gfporder; |
534 | 441 | ||
535 | /* | 442 | /* |
@@ -537,26 +444,28 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
537 | * on it. For the latter case, the memory allocated for a | 444 | * on it. For the latter case, the memory allocated for a |
538 | * slab is used for: | 445 | * slab is used for: |
539 | * | 446 | * |
540 | * - One unsigned int for each object | ||
541 | * - Padding to respect alignment of @align | ||
542 | * - @buffer_size bytes for each object | 447 | * - @buffer_size bytes for each object |
448 | * - One freelist_idx_t for each object | ||
449 | * | ||
450 | * We don't need to consider alignment of freelist because | ||
451 | * freelist will be at the end of slab page. The objects will be | ||
452 | * at the correct alignment. | ||
543 | * | 453 | * |
544 | * If the slab management structure is off the slab, then the | 454 | * If the slab management structure is off the slab, then the |
545 | * alignment will already be calculated into the size. Because | 455 | * alignment will already be calculated into the size. Because |
546 | * the slabs are all pages aligned, the objects will be at the | 456 | * the slabs are all pages aligned, the objects will be at the |
547 | * correct alignment when allocated. | 457 | * correct alignment when allocated. |
548 | */ | 458 | */ |
549 | if (flags & CFLGS_OFF_SLAB) { | 459 | if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) { |
550 | mgmt_size = 0; | 460 | num = slab_size / buffer_size; |
551 | nr_objs = slab_size / buffer_size; | 461 | *left_over = slab_size % buffer_size; |
552 | |||
553 | } else { | 462 | } else { |
554 | nr_objs = calculate_nr_objs(slab_size, buffer_size, | 463 | num = slab_size / (buffer_size + sizeof(freelist_idx_t)); |
555 | sizeof(freelist_idx_t), align); | 464 | *left_over = slab_size % |
556 | mgmt_size = calculate_freelist_size(nr_objs, align); | 465 | (buffer_size + sizeof(freelist_idx_t)); |
557 | } | 466 | } |
558 | *num = nr_objs; | 467 | |
559 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 468 | return num; |
560 | } | 469 | } |
561 | 470 | ||
562 | #if DEBUG | 471 | #if DEBUG |
@@ -687,120 +596,21 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
687 | return ac; | 596 | return ac; |
688 | } | 597 | } |
689 | 598 | ||
690 | static inline bool is_slab_pfmemalloc(struct page *page) | 599 | static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, |
691 | { | 600 | struct page *page, void *objp) |
692 | return PageSlabPfmemalloc(page); | ||
693 | } | ||
694 | |||
695 | /* Clears pfmemalloc_active if no slabs have pfmalloc set */ | ||
696 | static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | ||
697 | struct array_cache *ac) | ||
698 | { | ||
699 | struct kmem_cache_node *n = get_node(cachep, numa_mem_id()); | ||
700 | struct page *page; | ||
701 | unsigned long flags; | ||
702 | |||
703 | if (!pfmemalloc_active) | ||
704 | return; | ||
705 | |||
706 | spin_lock_irqsave(&n->list_lock, flags); | ||
707 | list_for_each_entry(page, &n->slabs_full, lru) | ||
708 | if (is_slab_pfmemalloc(page)) | ||
709 | goto out; | ||
710 | |||
711 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
712 | if (is_slab_pfmemalloc(page)) | ||
713 | goto out; | ||
714 | |||
715 | list_for_each_entry(page, &n->slabs_free, lru) | ||
716 | if (is_slab_pfmemalloc(page)) | ||
717 | goto out; | ||
718 | |||
719 | pfmemalloc_active = false; | ||
720 | out: | ||
721 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
722 | } | ||
723 | |||
724 | static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
725 | gfp_t flags, bool force_refill) | ||
726 | { | 601 | { |
727 | int i; | 602 | struct kmem_cache_node *n; |
728 | void *objp = ac->entry[--ac->avail]; | 603 | int page_node; |
729 | 604 | LIST_HEAD(list); | |
730 | /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ | ||
731 | if (unlikely(is_obj_pfmemalloc(objp))) { | ||
732 | struct kmem_cache_node *n; | ||
733 | |||
734 | if (gfp_pfmemalloc_allowed(flags)) { | ||
735 | clear_obj_pfmemalloc(&objp); | ||
736 | return objp; | ||
737 | } | ||
738 | |||
739 | /* The caller cannot use PFMEMALLOC objects, find another one */ | ||
740 | for (i = 0; i < ac->avail; i++) { | ||
741 | /* If a !PFMEMALLOC object is found, swap them */ | ||
742 | if (!is_obj_pfmemalloc(ac->entry[i])) { | ||
743 | objp = ac->entry[i]; | ||
744 | ac->entry[i] = ac->entry[ac->avail]; | ||
745 | ac->entry[ac->avail] = objp; | ||
746 | return objp; | ||
747 | } | ||
748 | } | ||
749 | |||
750 | /* | ||
751 | * If there are empty slabs on the slabs_free list and we are | ||
752 | * being forced to refill the cache, mark this one !pfmemalloc. | ||
753 | */ | ||
754 | n = get_node(cachep, numa_mem_id()); | ||
755 | if (!list_empty(&n->slabs_free) && force_refill) { | ||
756 | struct page *page = virt_to_head_page(objp); | ||
757 | ClearPageSlabPfmemalloc(page); | ||
758 | clear_obj_pfmemalloc(&objp); | ||
759 | recheck_pfmemalloc_active(cachep, ac); | ||
760 | return objp; | ||
761 | } | ||
762 | |||
763 | /* No !PFMEMALLOC objects available */ | ||
764 | ac->avail++; | ||
765 | objp = NULL; | ||
766 | } | ||
767 | |||
768 | return objp; | ||
769 | } | ||
770 | |||
771 | static inline void *ac_get_obj(struct kmem_cache *cachep, | ||
772 | struct array_cache *ac, gfp_t flags, bool force_refill) | ||
773 | { | ||
774 | void *objp; | ||
775 | |||
776 | if (unlikely(sk_memalloc_socks())) | ||
777 | objp = __ac_get_obj(cachep, ac, flags, force_refill); | ||
778 | else | ||
779 | objp = ac->entry[--ac->avail]; | ||
780 | |||
781 | return objp; | ||
782 | } | ||
783 | |||
784 | static noinline void *__ac_put_obj(struct kmem_cache *cachep, | ||
785 | struct array_cache *ac, void *objp) | ||
786 | { | ||
787 | if (unlikely(pfmemalloc_active)) { | ||
788 | /* Some pfmemalloc slabs exist, check if this is one */ | ||
789 | struct page *page = virt_to_head_page(objp); | ||
790 | if (PageSlabPfmemalloc(page)) | ||
791 | set_obj_pfmemalloc(&objp); | ||
792 | } | ||
793 | 605 | ||
794 | return objp; | 606 | page_node = page_to_nid(page); |
795 | } | 607 | n = get_node(cachep, page_node); |
796 | 608 | ||
797 | static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, | 609 | spin_lock(&n->list_lock); |
798 | void *objp) | 610 | free_block(cachep, &objp, 1, page_node, &list); |
799 | { | 611 | spin_unlock(&n->list_lock); |
800 | if (unlikely(sk_memalloc_socks())) | ||
801 | objp = __ac_put_obj(cachep, ac, objp); | ||
802 | 612 | ||
803 | ac->entry[ac->avail++] = objp; | 613 | slabs_destroy(cachep, &list); |
804 | } | 614 | } |
805 | 615 | ||
806 | /* | 616 | /* |
@@ -1003,7 +813,7 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, | |||
1003 | STATS_INC_ACOVERFLOW(cachep); | 813 | STATS_INC_ACOVERFLOW(cachep); |
1004 | __drain_alien_cache(cachep, ac, page_node, &list); | 814 | __drain_alien_cache(cachep, ac, page_node, &list); |
1005 | } | 815 | } |
1006 | ac_put_obj(cachep, ac, objp); | 816 | ac->entry[ac->avail++] = objp; |
1007 | spin_unlock(&alien->lock); | 817 | spin_unlock(&alien->lock); |
1008 | slabs_destroy(cachep, &list); | 818 | slabs_destroy(cachep, &list); |
1009 | } else { | 819 | } else { |
@@ -1540,10 +1350,9 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1540 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs)) | 1350 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs)) |
1541 | return; | 1351 | return; |
1542 | 1352 | ||
1543 | printk(KERN_WARNING | 1353 | pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", |
1544 | "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 1354 | nodeid, gfpflags, &gfpflags); |
1545 | nodeid, gfpflags); | 1355 | pr_warn(" cache: %s, object size: %d, order: %d\n", |
1546 | printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", | ||
1547 | cachep->name, cachep->size, cachep->gfporder); | 1356 | cachep->name, cachep->size, cachep->gfporder); |
1548 | 1357 | ||
1549 | for_each_kmem_cache_node(cachep, node, n) { | 1358 | for_each_kmem_cache_node(cachep, node, n) { |
@@ -1567,8 +1376,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1567 | 1376 | ||
1568 | num_slabs += active_slabs; | 1377 | num_slabs += active_slabs; |
1569 | num_objs = num_slabs * cachep->num; | 1378 | num_objs = num_slabs * cachep->num; |
1570 | printk(KERN_WARNING | 1379 | pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", |
1571 | " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", | ||
1572 | node, active_slabs, num_slabs, active_objs, num_objs, | 1380 | node, active_slabs, num_slabs, active_objs, num_objs, |
1573 | free_objects); | 1381 | free_objects); |
1574 | } | 1382 | } |
@@ -1604,10 +1412,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
1604 | return NULL; | 1412 | return NULL; |
1605 | } | 1413 | } |
1606 | 1414 | ||
1607 | /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ | ||
1608 | if (page_is_pfmemalloc(page)) | ||
1609 | pfmemalloc_active = true; | ||
1610 | |||
1611 | nr_pages = (1 << cachep->gfporder); | 1415 | nr_pages = (1 << cachep->gfporder); |
1612 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1416 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1613 | add_zone_page_state(page_zone(page), | 1417 | add_zone_page_state(page_zone(page), |
@@ -1615,8 +1419,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
1615 | else | 1419 | else |
1616 | add_zone_page_state(page_zone(page), | 1420 | add_zone_page_state(page_zone(page), |
1617 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1421 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1422 | |||
1618 | __SetPageSlab(page); | 1423 | __SetPageSlab(page); |
1619 | if (page_is_pfmemalloc(page)) | 1424 | /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ |
1425 | if (sk_memalloc_socks() && page_is_pfmemalloc(page)) | ||
1620 | SetPageSlabPfmemalloc(page); | 1426 | SetPageSlabPfmemalloc(page); |
1621 | 1427 | ||
1622 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1428 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
@@ -1670,6 +1476,14 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
1670 | } | 1476 | } |
1671 | 1477 | ||
1672 | #if DEBUG | 1478 | #if DEBUG |
1479 | static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) | ||
1480 | { | ||
1481 | if (debug_pagealloc_enabled() && OFF_SLAB(cachep) && | ||
1482 | (cachep->size % PAGE_SIZE) == 0) | ||
1483 | return true; | ||
1484 | |||
1485 | return false; | ||
1486 | } | ||
1673 | 1487 | ||
1674 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1488 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1675 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | 1489 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, |
@@ -1703,6 +1517,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | |||
1703 | } | 1517 | } |
1704 | *addr++ = 0x87654321; | 1518 | *addr++ = 0x87654321; |
1705 | } | 1519 | } |
1520 | |||
1521 | static void slab_kernel_map(struct kmem_cache *cachep, void *objp, | ||
1522 | int map, unsigned long caller) | ||
1523 | { | ||
1524 | if (!is_debug_pagealloc_cache(cachep)) | ||
1525 | return; | ||
1526 | |||
1527 | if (caller) | ||
1528 | store_stackinfo(cachep, objp, caller); | ||
1529 | |||
1530 | kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); | ||
1531 | } | ||
1532 | |||
1533 | #else | ||
1534 | static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, | ||
1535 | int map, unsigned long caller) {} | ||
1536 | |||
1706 | #endif | 1537 | #endif |
1707 | 1538 | ||
1708 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) | 1539 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) |
@@ -1781,6 +1612,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1781 | int size, i; | 1612 | int size, i; |
1782 | int lines = 0; | 1613 | int lines = 0; |
1783 | 1614 | ||
1615 | if (is_debug_pagealloc_cache(cachep)) | ||
1616 | return; | ||
1617 | |||
1784 | realobj = (char *)objp + obj_offset(cachep); | 1618 | realobj = (char *)objp + obj_offset(cachep); |
1785 | size = cachep->object_size; | 1619 | size = cachep->object_size; |
1786 | 1620 | ||
@@ -1842,20 +1676,18 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, | |||
1842 | struct page *page) | 1676 | struct page *page) |
1843 | { | 1677 | { |
1844 | int i; | 1678 | int i; |
1679 | |||
1680 | if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) { | ||
1681 | poison_obj(cachep, page->freelist - obj_offset(cachep), | ||
1682 | POISON_FREE); | ||
1683 | } | ||
1684 | |||
1845 | for (i = 0; i < cachep->num; i++) { | 1685 | for (i = 0; i < cachep->num; i++) { |
1846 | void *objp = index_to_obj(cachep, page, i); | 1686 | void *objp = index_to_obj(cachep, page, i); |
1847 | 1687 | ||
1848 | if (cachep->flags & SLAB_POISON) { | 1688 | if (cachep->flags & SLAB_POISON) { |
1849 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
1850 | if (cachep->size % PAGE_SIZE == 0 && | ||
1851 | OFF_SLAB(cachep)) | ||
1852 | kernel_map_pages(virt_to_page(objp), | ||
1853 | cachep->size / PAGE_SIZE, 1); | ||
1854 | else | ||
1855 | check_poison_obj(cachep, objp); | ||
1856 | #else | ||
1857 | check_poison_obj(cachep, objp); | 1689 | check_poison_obj(cachep, objp); |
1858 | #endif | 1690 | slab_kernel_map(cachep, objp, 1, 0); |
1859 | } | 1691 | } |
1860 | if (cachep->flags & SLAB_RED_ZONE) { | 1692 | if (cachep->flags & SLAB_RED_ZONE) { |
1861 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 1693 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
@@ -1916,7 +1748,6 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) | |||
1916 | * calculate_slab_order - calculate size (page order) of slabs | 1748 | * calculate_slab_order - calculate size (page order) of slabs |
1917 | * @cachep: pointer to the cache that is being created | 1749 | * @cachep: pointer to the cache that is being created |
1918 | * @size: size of objects to be created in this cache. | 1750 | * @size: size of objects to be created in this cache. |
1919 | * @align: required alignment for the objects. | ||
1920 | * @flags: slab allocation flags | 1751 | * @flags: slab allocation flags |
1921 | * | 1752 | * |
1922 | * Also calculates the number of objects per slab. | 1753 | * Also calculates the number of objects per slab. |
@@ -1926,9 +1757,8 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) | |||
1926 | * towards high-order requests, this should be changed. | 1757 | * towards high-order requests, this should be changed. |
1927 | */ | 1758 | */ |
1928 | static size_t calculate_slab_order(struct kmem_cache *cachep, | 1759 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
1929 | size_t size, size_t align, unsigned long flags) | 1760 | size_t size, unsigned long flags) |
1930 | { | 1761 | { |
1931 | unsigned long offslab_limit; | ||
1932 | size_t left_over = 0; | 1762 | size_t left_over = 0; |
1933 | int gfporder; | 1763 | int gfporder; |
1934 | 1764 | ||
@@ -1936,7 +1766,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1936 | unsigned int num; | 1766 | unsigned int num; |
1937 | size_t remainder; | 1767 | size_t remainder; |
1938 | 1768 | ||
1939 | cache_estimate(gfporder, size, align, flags, &remainder, &num); | 1769 | num = cache_estimate(gfporder, size, flags, &remainder); |
1940 | if (!num) | 1770 | if (!num) |
1941 | continue; | 1771 | continue; |
1942 | 1772 | ||
@@ -1945,19 +1775,24 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1945 | break; | 1775 | break; |
1946 | 1776 | ||
1947 | if (flags & CFLGS_OFF_SLAB) { | 1777 | if (flags & CFLGS_OFF_SLAB) { |
1948 | size_t freelist_size_per_obj = sizeof(freelist_idx_t); | 1778 | struct kmem_cache *freelist_cache; |
1779 | size_t freelist_size; | ||
1780 | |||
1781 | freelist_size = num * sizeof(freelist_idx_t); | ||
1782 | freelist_cache = kmalloc_slab(freelist_size, 0u); | ||
1783 | if (!freelist_cache) | ||
1784 | continue; | ||
1785 | |||
1949 | /* | 1786 | /* |
1950 | * Max number of objs-per-slab for caches which | 1787 | * Needed to avoid possible looping condition |
1951 | * use off-slab slabs. Needed to avoid a possible | 1788 | * in cache_grow() |
1952 | * looping condition in cache_grow(). | ||
1953 | */ | 1789 | */ |
1954 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | 1790 | if (OFF_SLAB(freelist_cache)) |
1955 | freelist_size_per_obj += sizeof(char); | 1791 | continue; |
1956 | offslab_limit = size; | ||
1957 | offslab_limit /= freelist_size_per_obj; | ||
1958 | 1792 | ||
1959 | if (num > offslab_limit) | 1793 | /* check if off slab has enough benefit */ |
1960 | break; | 1794 | if (freelist_cache->size > cachep->size / 2) |
1795 | continue; | ||
1961 | } | 1796 | } |
1962 | 1797 | ||
1963 | /* Found something acceptable - save it away */ | 1798 | /* Found something acceptable - save it away */ |
@@ -2075,6 +1910,79 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
2075 | return cachep; | 1910 | return cachep; |
2076 | } | 1911 | } |
2077 | 1912 | ||
1913 | static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, | ||
1914 | size_t size, unsigned long flags) | ||
1915 | { | ||
1916 | size_t left; | ||
1917 | |||
1918 | cachep->num = 0; | ||
1919 | |||
1920 | if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) | ||
1921 | return false; | ||
1922 | |||
1923 | left = calculate_slab_order(cachep, size, | ||
1924 | flags | CFLGS_OBJFREELIST_SLAB); | ||
1925 | if (!cachep->num) | ||
1926 | return false; | ||
1927 | |||
1928 | if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size) | ||
1929 | return false; | ||
1930 | |||
1931 | cachep->colour = left / cachep->colour_off; | ||
1932 | |||
1933 | return true; | ||
1934 | } | ||
1935 | |||
1936 | static bool set_off_slab_cache(struct kmem_cache *cachep, | ||
1937 | size_t size, unsigned long flags) | ||
1938 | { | ||
1939 | size_t left; | ||
1940 | |||
1941 | cachep->num = 0; | ||
1942 | |||
1943 | /* | ||
1944 | * Always use on-slab management when SLAB_NOLEAKTRACE | ||
1945 | * to avoid recursive calls into kmemleak. | ||
1946 | */ | ||
1947 | if (flags & SLAB_NOLEAKTRACE) | ||
1948 | return false; | ||
1949 | |||
1950 | /* | ||
1951 | * Size is large, assume best to place the slab management obj | ||
1952 | * off-slab (should allow better packing of objs). | ||
1953 | */ | ||
1954 | left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB); | ||
1955 | if (!cachep->num) | ||
1956 | return false; | ||
1957 | |||
1958 | /* | ||
1959 | * If the slab has been placed off-slab, and we have enough space then | ||
1960 | * move it on-slab. This is at the expense of any extra colouring. | ||
1961 | */ | ||
1962 | if (left >= cachep->num * sizeof(freelist_idx_t)) | ||
1963 | return false; | ||
1964 | |||
1965 | cachep->colour = left / cachep->colour_off; | ||
1966 | |||
1967 | return true; | ||
1968 | } | ||
1969 | |||
1970 | static bool set_on_slab_cache(struct kmem_cache *cachep, | ||
1971 | size_t size, unsigned long flags) | ||
1972 | { | ||
1973 | size_t left; | ||
1974 | |||
1975 | cachep->num = 0; | ||
1976 | |||
1977 | left = calculate_slab_order(cachep, size, flags); | ||
1978 | if (!cachep->num) | ||
1979 | return false; | ||
1980 | |||
1981 | cachep->colour = left / cachep->colour_off; | ||
1982 | |||
1983 | return true; | ||
1984 | } | ||
1985 | |||
2078 | /** | 1986 | /** |
2079 | * __kmem_cache_create - Create a cache. | 1987 | * __kmem_cache_create - Create a cache. |
2080 | * @cachep: cache management descriptor | 1988 | * @cachep: cache management descriptor |
@@ -2099,7 +2007,6 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
2099 | int | 2007 | int |
2100 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | 2008 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) |
2101 | { | 2009 | { |
2102 | size_t left_over, freelist_size; | ||
2103 | size_t ralign = BYTES_PER_WORD; | 2010 | size_t ralign = BYTES_PER_WORD; |
2104 | gfp_t gfp; | 2011 | gfp_t gfp; |
2105 | int err; | 2012 | int err; |
@@ -2119,8 +2026,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2119 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 2026 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
2120 | flags |= SLAB_POISON; | 2027 | flags |= SLAB_POISON; |
2121 | #endif | 2028 | #endif |
2122 | if (flags & SLAB_DESTROY_BY_RCU) | ||
2123 | BUG_ON(flags & SLAB_POISON); | ||
2124 | #endif | 2029 | #endif |
2125 | 2030 | ||
2126 | /* | 2031 | /* |
@@ -2152,6 +2057,10 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2152 | * 4) Store it. | 2057 | * 4) Store it. |
2153 | */ | 2058 | */ |
2154 | cachep->align = ralign; | 2059 | cachep->align = ralign; |
2060 | cachep->colour_off = cache_line_size(); | ||
2061 | /* Offset must be a multiple of the alignment. */ | ||
2062 | if (cachep->colour_off < cachep->align) | ||
2063 | cachep->colour_off = cachep->align; | ||
2155 | 2064 | ||
2156 | if (slab_is_available()) | 2065 | if (slab_is_available()) |
2157 | gfp = GFP_KERNEL; | 2066 | gfp = GFP_KERNEL; |
@@ -2179,37 +2088,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2179 | else | 2088 | else |
2180 | size += BYTES_PER_WORD; | 2089 | size += BYTES_PER_WORD; |
2181 | } | 2090 | } |
2182 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | ||
2183 | /* | ||
2184 | * To activate debug pagealloc, off-slab management is necessary | ||
2185 | * requirement. In early phase of initialization, small sized slab | ||
2186 | * doesn't get initialized so it would not be possible. So, we need | ||
2187 | * to check size >= 256. It guarantees that all necessary small | ||
2188 | * sized slab is initialized in current slab initialization sequence. | ||
2189 | */ | ||
2190 | if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && | ||
2191 | size >= 256 && cachep->object_size > cache_line_size() && | ||
2192 | ALIGN(size, cachep->align) < PAGE_SIZE) { | ||
2193 | cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); | ||
2194 | size = PAGE_SIZE; | ||
2195 | } | ||
2196 | #endif | ||
2197 | #endif | 2091 | #endif |
2198 | 2092 | ||
2199 | /* | ||
2200 | * Determine if the slab management is 'on' or 'off' slab. | ||
2201 | * (bootstrapping cannot cope with offslab caches so don't do | ||
2202 | * it too early on. Always use on-slab management when | ||
2203 | * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) | ||
2204 | */ | ||
2205 | if (size >= OFF_SLAB_MIN_SIZE && !slab_early_init && | ||
2206 | !(flags & SLAB_NOLEAKTRACE)) | ||
2207 | /* | ||
2208 | * Size is large, assume best to place the slab management obj | ||
2209 | * off-slab (should allow better packing of objs). | ||
2210 | */ | ||
2211 | flags |= CFLGS_OFF_SLAB; | ||
2212 | |||
2213 | size = ALIGN(size, cachep->align); | 2093 | size = ALIGN(size, cachep->align); |
2214 | /* | 2094 | /* |
2215 | * We should restrict the number of objects in a slab to implement | 2095 | * We should restrict the number of objects in a slab to implement |
@@ -2218,42 +2098,46 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2218 | if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) | 2098 | if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) |
2219 | size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); | 2099 | size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); |
2220 | 2100 | ||
2221 | left_over = calculate_slab_order(cachep, size, cachep->align, flags); | 2101 | #if DEBUG |
2222 | |||
2223 | if (!cachep->num) | ||
2224 | return -E2BIG; | ||
2225 | |||
2226 | freelist_size = calculate_freelist_size(cachep->num, cachep->align); | ||
2227 | |||
2228 | /* | 2102 | /* |
2229 | * If the slab has been placed off-slab, and we have enough space then | 2103 | * To activate debug pagealloc, off-slab management is necessary |
2230 | * move it on-slab. This is at the expense of any extra colouring. | 2104 | * requirement. In early phase of initialization, small sized slab |
2105 | * doesn't get initialized so it would not be possible. So, we need | ||
2106 | * to check size >= 256. It guarantees that all necessary small | ||
2107 | * sized slab is initialized in current slab initialization sequence. | ||
2231 | */ | 2108 | */ |
2232 | if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { | 2109 | if (debug_pagealloc_enabled() && (flags & SLAB_POISON) && |
2233 | flags &= ~CFLGS_OFF_SLAB; | 2110 | size >= 256 && cachep->object_size > cache_line_size()) { |
2234 | left_over -= freelist_size; | 2111 | if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { |
2112 | size_t tmp_size = ALIGN(size, PAGE_SIZE); | ||
2113 | |||
2114 | if (set_off_slab_cache(cachep, tmp_size, flags)) { | ||
2115 | flags |= CFLGS_OFF_SLAB; | ||
2116 | cachep->obj_offset += tmp_size - size; | ||
2117 | size = tmp_size; | ||
2118 | goto done; | ||
2119 | } | ||
2120 | } | ||
2235 | } | 2121 | } |
2122 | #endif | ||
2236 | 2123 | ||
2237 | if (flags & CFLGS_OFF_SLAB) { | 2124 | if (set_objfreelist_slab_cache(cachep, size, flags)) { |
2238 | /* really off slab. No need for manual alignment */ | 2125 | flags |= CFLGS_OBJFREELIST_SLAB; |
2239 | freelist_size = calculate_freelist_size(cachep->num, 0); | 2126 | goto done; |
2127 | } | ||
2240 | 2128 | ||
2241 | #ifdef CONFIG_PAGE_POISONING | 2129 | if (set_off_slab_cache(cachep, size, flags)) { |
2242 | /* If we're going to use the generic kernel_map_pages() | 2130 | flags |= CFLGS_OFF_SLAB; |
2243 | * poisoning, then it's going to smash the contents of | 2131 | goto done; |
2244 | * the redzone and userword anyhow, so switch them off. | ||
2245 | */ | ||
2246 | if (size % PAGE_SIZE == 0 && flags & SLAB_POISON) | ||
2247 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
2248 | #endif | ||
2249 | } | 2132 | } |
2250 | 2133 | ||
2251 | cachep->colour_off = cache_line_size(); | 2134 | if (set_on_slab_cache(cachep, size, flags)) |
2252 | /* Offset must be a multiple of the alignment. */ | 2135 | goto done; |
2253 | if (cachep->colour_off < cachep->align) | 2136 | |
2254 | cachep->colour_off = cachep->align; | 2137 | return -E2BIG; |
2255 | cachep->colour = left_over / cachep->colour_off; | 2138 | |
2256 | cachep->freelist_size = freelist_size; | 2139 | done: |
2140 | cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
2257 | cachep->flags = flags; | 2141 | cachep->flags = flags; |
2258 | cachep->allocflags = __GFP_COMP; | 2142 | cachep->allocflags = __GFP_COMP; |
2259 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) | 2143 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) |
@@ -2261,16 +2145,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2261 | cachep->size = size; | 2145 | cachep->size = size; |
2262 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2146 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
2263 | 2147 | ||
2264 | if (flags & CFLGS_OFF_SLAB) { | 2148 | #if DEBUG |
2265 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); | 2149 | /* |
2266 | /* | 2150 | * If we're going to use the generic kernel_map_pages() |
2267 | * This is a possibility for one of the kmalloc_{dma,}_caches. | 2151 | * poisoning, then it's going to smash the contents of |
2268 | * But since we go off slab only for object size greater than | 2152 | * the redzone and userword anyhow, so switch them off. |
2269 | * OFF_SLAB_MIN_SIZE, and kmalloc_{dma,}_caches get created | 2153 | */ |
2270 | * in ascending order,this should not happen at all. | 2154 | if (IS_ENABLED(CONFIG_PAGE_POISONING) && |
2271 | * But leave a BUG_ON for some lucky dude. | 2155 | (cachep->flags & SLAB_POISON) && |
2272 | */ | 2156 | is_debug_pagealloc_cache(cachep)) |
2273 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); | 2157 | cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2158 | #endif | ||
2159 | |||
2160 | if (OFF_SLAB(cachep)) { | ||
2161 | cachep->freelist_cache = | ||
2162 | kmalloc_slab(cachep->freelist_size, 0u); | ||
2274 | } | 2163 | } |
2275 | 2164 | ||
2276 | err = setup_cpu_cache(cachep, gfp); | 2165 | err = setup_cpu_cache(cachep, gfp); |
@@ -2377,9 +2266,6 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2377 | } | 2266 | } |
2378 | 2267 | ||
2379 | page = list_entry(p, struct page, lru); | 2268 | page = list_entry(p, struct page, lru); |
2380 | #if DEBUG | ||
2381 | BUG_ON(page->active); | ||
2382 | #endif | ||
2383 | list_del(&page->lru); | 2269 | list_del(&page->lru); |
2384 | /* | 2270 | /* |
2385 | * Safe to drop the lock. The slab is no longer linked | 2271 | * Safe to drop the lock. The slab is no longer linked |
@@ -2454,18 +2340,23 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, | |||
2454 | void *freelist; | 2340 | void *freelist; |
2455 | void *addr = page_address(page); | 2341 | void *addr = page_address(page); |
2456 | 2342 | ||
2457 | if (OFF_SLAB(cachep)) { | 2343 | page->s_mem = addr + colour_off; |
2344 | page->active = 0; | ||
2345 | |||
2346 | if (OBJFREELIST_SLAB(cachep)) | ||
2347 | freelist = NULL; | ||
2348 | else if (OFF_SLAB(cachep)) { | ||
2458 | /* Slab management obj is off-slab. */ | 2349 | /* Slab management obj is off-slab. */ |
2459 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, | 2350 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, |
2460 | local_flags, nodeid); | 2351 | local_flags, nodeid); |
2461 | if (!freelist) | 2352 | if (!freelist) |
2462 | return NULL; | 2353 | return NULL; |
2463 | } else { | 2354 | } else { |
2464 | freelist = addr + colour_off; | 2355 | /* We will use last bytes at the slab for freelist */ |
2465 | colour_off += cachep->freelist_size; | 2356 | freelist = addr + (PAGE_SIZE << cachep->gfporder) - |
2357 | cachep->freelist_size; | ||
2466 | } | 2358 | } |
2467 | page->active = 0; | 2359 | |
2468 | page->s_mem = addr + colour_off; | ||
2469 | return freelist; | 2360 | return freelist; |
2470 | } | 2361 | } |
2471 | 2362 | ||
@@ -2480,17 +2371,14 @@ static inline void set_free_obj(struct page *page, | |||
2480 | ((freelist_idx_t *)(page->freelist))[idx] = val; | 2371 | ((freelist_idx_t *)(page->freelist))[idx] = val; |
2481 | } | 2372 | } |
2482 | 2373 | ||
2483 | static void cache_init_objs(struct kmem_cache *cachep, | 2374 | static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) |
2484 | struct page *page) | ||
2485 | { | 2375 | { |
2376 | #if DEBUG | ||
2486 | int i; | 2377 | int i; |
2487 | 2378 | ||
2488 | for (i = 0; i < cachep->num; i++) { | 2379 | for (i = 0; i < cachep->num; i++) { |
2489 | void *objp = index_to_obj(cachep, page, i); | 2380 | void *objp = index_to_obj(cachep, page, i); |
2490 | #if DEBUG | 2381 | |
2491 | /* need to poison the objs? */ | ||
2492 | if (cachep->flags & SLAB_POISON) | ||
2493 | poison_obj(cachep, objp, POISON_FREE); | ||
2494 | if (cachep->flags & SLAB_STORE_USER) | 2382 | if (cachep->flags & SLAB_STORE_USER) |
2495 | *dbg_userword(cachep, objp) = NULL; | 2383 | *dbg_userword(cachep, objp) = NULL; |
2496 | 2384 | ||
@@ -2514,15 +2402,32 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2514 | slab_error(cachep, "constructor overwrote the" | 2402 | slab_error(cachep, "constructor overwrote the" |
2515 | " start of an object"); | 2403 | " start of an object"); |
2516 | } | 2404 | } |
2517 | if ((cachep->size % PAGE_SIZE) == 0 && | 2405 | /* need to poison the objs? */ |
2518 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) | 2406 | if (cachep->flags & SLAB_POISON) { |
2519 | kernel_map_pages(virt_to_page(objp), | 2407 | poison_obj(cachep, objp, POISON_FREE); |
2520 | cachep->size / PAGE_SIZE, 0); | 2408 | slab_kernel_map(cachep, objp, 0, 0); |
2521 | #else | 2409 | } |
2522 | if (cachep->ctor) | 2410 | } |
2523 | cachep->ctor(objp); | ||
2524 | #endif | 2411 | #endif |
2525 | set_obj_status(page, i, OBJECT_FREE); | 2412 | } |
2413 | |||
2414 | static void cache_init_objs(struct kmem_cache *cachep, | ||
2415 | struct page *page) | ||
2416 | { | ||
2417 | int i; | ||
2418 | |||
2419 | cache_init_objs_debug(cachep, page); | ||
2420 | |||
2421 | if (OBJFREELIST_SLAB(cachep)) { | ||
2422 | page->freelist = index_to_obj(cachep, page, cachep->num - 1) + | ||
2423 | obj_offset(cachep); | ||
2424 | } | ||
2425 | |||
2426 | for (i = 0; i < cachep->num; i++) { | ||
2427 | /* constructor could break poison info */ | ||
2428 | if (DEBUG == 0 && cachep->ctor) | ||
2429 | cachep->ctor(index_to_obj(cachep, page, i)); | ||
2430 | |||
2526 | set_free_obj(page, i, i); | 2431 | set_free_obj(page, i, i); |
2527 | } | 2432 | } |
2528 | } | 2433 | } |
@@ -2537,30 +2442,28 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | |||
2537 | } | 2442 | } |
2538 | } | 2443 | } |
2539 | 2444 | ||
2540 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, | 2445 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page) |
2541 | int nodeid) | ||
2542 | { | 2446 | { |
2543 | void *objp; | 2447 | void *objp; |
2544 | 2448 | ||
2545 | objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); | 2449 | objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); |
2546 | page->active++; | 2450 | page->active++; |
2451 | |||
2547 | #if DEBUG | 2452 | #if DEBUG |
2548 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); | 2453 | if (cachep->flags & SLAB_STORE_USER) |
2454 | set_store_user_dirty(cachep); | ||
2549 | #endif | 2455 | #endif |
2550 | 2456 | ||
2551 | return objp; | 2457 | return objp; |
2552 | } | 2458 | } |
2553 | 2459 | ||
2554 | static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | 2460 | static void slab_put_obj(struct kmem_cache *cachep, |
2555 | void *objp, int nodeid) | 2461 | struct page *page, void *objp) |
2556 | { | 2462 | { |
2557 | unsigned int objnr = obj_to_index(cachep, page, objp); | 2463 | unsigned int objnr = obj_to_index(cachep, page, objp); |
2558 | #if DEBUG | 2464 | #if DEBUG |
2559 | unsigned int i; | 2465 | unsigned int i; |
2560 | 2466 | ||
2561 | /* Verify that the slab belongs to the intended node */ | ||
2562 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); | ||
2563 | |||
2564 | /* Verify double free bug */ | 2467 | /* Verify double free bug */ |
2565 | for (i = page->active; i < cachep->num; i++) { | 2468 | for (i = page->active; i < cachep->num; i++) { |
2566 | if (get_free_obj(page, i) == objnr) { | 2469 | if (get_free_obj(page, i) == objnr) { |
@@ -2571,6 +2474,9 @@ static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | |||
2571 | } | 2474 | } |
2572 | #endif | 2475 | #endif |
2573 | page->active--; | 2476 | page->active--; |
2477 | if (!page->freelist) | ||
2478 | page->freelist = objp + obj_offset(cachep); | ||
2479 | |||
2574 | set_free_obj(page, page->active, objnr); | 2480 | set_free_obj(page, page->active, objnr); |
2575 | } | 2481 | } |
2576 | 2482 | ||
@@ -2645,7 +2551,7 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2645 | /* Get slab management. */ | 2551 | /* Get slab management. */ |
2646 | freelist = alloc_slabmgmt(cachep, page, offset, | 2552 | freelist = alloc_slabmgmt(cachep, page, offset, |
2647 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); | 2553 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); |
2648 | if (!freelist) | 2554 | if (OFF_SLAB(cachep) && !freelist) |
2649 | goto opps1; | 2555 | goto opps1; |
2650 | 2556 | ||
2651 | slab_map_pages(cachep, page, freelist); | 2557 | slab_map_pages(cachep, page, freelist); |
@@ -2726,27 +2632,19 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2726 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2632 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
2727 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2633 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
2728 | } | 2634 | } |
2729 | if (cachep->flags & SLAB_STORE_USER) | 2635 | if (cachep->flags & SLAB_STORE_USER) { |
2636 | set_store_user_dirty(cachep); | ||
2730 | *dbg_userword(cachep, objp) = (void *)caller; | 2637 | *dbg_userword(cachep, objp) = (void *)caller; |
2638 | } | ||
2731 | 2639 | ||
2732 | objnr = obj_to_index(cachep, page, objp); | 2640 | objnr = obj_to_index(cachep, page, objp); |
2733 | 2641 | ||
2734 | BUG_ON(objnr >= cachep->num); | 2642 | BUG_ON(objnr >= cachep->num); |
2735 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); | 2643 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
2736 | 2644 | ||
2737 | set_obj_status(page, objnr, OBJECT_FREE); | ||
2738 | if (cachep->flags & SLAB_POISON) { | 2645 | if (cachep->flags & SLAB_POISON) { |
2739 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
2740 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | ||
2741 | store_stackinfo(cachep, objp, caller); | ||
2742 | kernel_map_pages(virt_to_page(objp), | ||
2743 | cachep->size / PAGE_SIZE, 0); | ||
2744 | } else { | ||
2745 | poison_obj(cachep, objp, POISON_FREE); | ||
2746 | } | ||
2747 | #else | ||
2748 | poison_obj(cachep, objp, POISON_FREE); | 2646 | poison_obj(cachep, objp, POISON_FREE); |
2749 | #endif | 2647 | slab_kernel_map(cachep, objp, 0, caller); |
2750 | } | 2648 | } |
2751 | return objp; | 2649 | return objp; |
2752 | } | 2650 | } |
@@ -2756,7 +2654,85 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2756 | #define cache_free_debugcheck(x,objp,z) (objp) | 2654 | #define cache_free_debugcheck(x,objp,z) (objp) |
2757 | #endif | 2655 | #endif |
2758 | 2656 | ||
2759 | static struct page *get_first_slab(struct kmem_cache_node *n) | 2657 | static inline void fixup_objfreelist_debug(struct kmem_cache *cachep, |
2658 | void **list) | ||
2659 | { | ||
2660 | #if DEBUG | ||
2661 | void *next = *list; | ||
2662 | void *objp; | ||
2663 | |||
2664 | while (next) { | ||
2665 | objp = next - obj_offset(cachep); | ||
2666 | next = *(void **)next; | ||
2667 | poison_obj(cachep, objp, POISON_FREE); | ||
2668 | } | ||
2669 | #endif | ||
2670 | } | ||
2671 | |||
2672 | static inline void fixup_slab_list(struct kmem_cache *cachep, | ||
2673 | struct kmem_cache_node *n, struct page *page, | ||
2674 | void **list) | ||
2675 | { | ||
2676 | /* move slabp to correct slabp list: */ | ||
2677 | list_del(&page->lru); | ||
2678 | if (page->active == cachep->num) { | ||
2679 | list_add(&page->lru, &n->slabs_full); | ||
2680 | if (OBJFREELIST_SLAB(cachep)) { | ||
2681 | #if DEBUG | ||
2682 | /* Poisoning will be done without holding the lock */ | ||
2683 | if (cachep->flags & SLAB_POISON) { | ||
2684 | void **objp = page->freelist; | ||
2685 | |||
2686 | *objp = *list; | ||
2687 | *list = objp; | ||
2688 | } | ||
2689 | #endif | ||
2690 | page->freelist = NULL; | ||
2691 | } | ||
2692 | } else | ||
2693 | list_add(&page->lru, &n->slabs_partial); | ||
2694 | } | ||
2695 | |||
2696 | /* Try to find non-pfmemalloc slab if needed */ | ||
2697 | static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, | ||
2698 | struct page *page, bool pfmemalloc) | ||
2699 | { | ||
2700 | if (!page) | ||
2701 | return NULL; | ||
2702 | |||
2703 | if (pfmemalloc) | ||
2704 | return page; | ||
2705 | |||
2706 | if (!PageSlabPfmemalloc(page)) | ||
2707 | return page; | ||
2708 | |||
2709 | /* No need to keep pfmemalloc slab if we have enough free objects */ | ||
2710 | if (n->free_objects > n->free_limit) { | ||
2711 | ClearPageSlabPfmemalloc(page); | ||
2712 | return page; | ||
2713 | } | ||
2714 | |||
2715 | /* Move pfmemalloc slab to the end of list to speed up next search */ | ||
2716 | list_del(&page->lru); | ||
2717 | if (!page->active) | ||
2718 | list_add_tail(&page->lru, &n->slabs_free); | ||
2719 | else | ||
2720 | list_add_tail(&page->lru, &n->slabs_partial); | ||
2721 | |||
2722 | list_for_each_entry(page, &n->slabs_partial, lru) { | ||
2723 | if (!PageSlabPfmemalloc(page)) | ||
2724 | return page; | ||
2725 | } | ||
2726 | |||
2727 | list_for_each_entry(page, &n->slabs_free, lru) { | ||
2728 | if (!PageSlabPfmemalloc(page)) | ||
2729 | return page; | ||
2730 | } | ||
2731 | |||
2732 | return NULL; | ||
2733 | } | ||
2734 | |||
2735 | static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) | ||
2760 | { | 2736 | { |
2761 | struct page *page; | 2737 | struct page *page; |
2762 | 2738 | ||
@@ -2768,21 +2744,51 @@ static struct page *get_first_slab(struct kmem_cache_node *n) | |||
2768 | struct page, lru); | 2744 | struct page, lru); |
2769 | } | 2745 | } |
2770 | 2746 | ||
2747 | if (sk_memalloc_socks()) | ||
2748 | return get_valid_first_slab(n, page, pfmemalloc); | ||
2749 | |||
2771 | return page; | 2750 | return page; |
2772 | } | 2751 | } |
2773 | 2752 | ||
2774 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, | 2753 | static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, |
2775 | bool force_refill) | 2754 | struct kmem_cache_node *n, gfp_t flags) |
2755 | { | ||
2756 | struct page *page; | ||
2757 | void *obj; | ||
2758 | void *list = NULL; | ||
2759 | |||
2760 | if (!gfp_pfmemalloc_allowed(flags)) | ||
2761 | return NULL; | ||
2762 | |||
2763 | spin_lock(&n->list_lock); | ||
2764 | page = get_first_slab(n, true); | ||
2765 | if (!page) { | ||
2766 | spin_unlock(&n->list_lock); | ||
2767 | return NULL; | ||
2768 | } | ||
2769 | |||
2770 | obj = slab_get_obj(cachep, page); | ||
2771 | n->free_objects--; | ||
2772 | |||
2773 | fixup_slab_list(cachep, n, page, &list); | ||
2774 | |||
2775 | spin_unlock(&n->list_lock); | ||
2776 | fixup_objfreelist_debug(cachep, &list); | ||
2777 | |||
2778 | return obj; | ||
2779 | } | ||
2780 | |||
2781 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | ||
2776 | { | 2782 | { |
2777 | int batchcount; | 2783 | int batchcount; |
2778 | struct kmem_cache_node *n; | 2784 | struct kmem_cache_node *n; |
2779 | struct array_cache *ac; | 2785 | struct array_cache *ac; |
2780 | int node; | 2786 | int node; |
2787 | void *list = NULL; | ||
2781 | 2788 | ||
2782 | check_irq_off(); | 2789 | check_irq_off(); |
2783 | node = numa_mem_id(); | 2790 | node = numa_mem_id(); |
2784 | if (unlikely(force_refill)) | 2791 | |
2785 | goto force_grow; | ||
2786 | retry: | 2792 | retry: |
2787 | ac = cpu_cache_get(cachep); | 2793 | ac = cpu_cache_get(cachep); |
2788 | batchcount = ac->batchcount; | 2794 | batchcount = ac->batchcount; |
@@ -2808,7 +2814,7 @@ retry: | |||
2808 | while (batchcount > 0) { | 2814 | while (batchcount > 0) { |
2809 | struct page *page; | 2815 | struct page *page; |
2810 | /* Get slab alloc is to come from. */ | 2816 | /* Get slab alloc is to come from. */ |
2811 | page = get_first_slab(n); | 2817 | page = get_first_slab(n, false); |
2812 | if (!page) | 2818 | if (!page) |
2813 | goto must_grow; | 2819 | goto must_grow; |
2814 | 2820 | ||
@@ -2826,26 +2832,29 @@ retry: | |||
2826 | STATS_INC_ACTIVE(cachep); | 2832 | STATS_INC_ACTIVE(cachep); |
2827 | STATS_SET_HIGH(cachep); | 2833 | STATS_SET_HIGH(cachep); |
2828 | 2834 | ||
2829 | ac_put_obj(cachep, ac, slab_get_obj(cachep, page, | 2835 | ac->entry[ac->avail++] = slab_get_obj(cachep, page); |
2830 | node)); | ||
2831 | } | 2836 | } |
2832 | 2837 | ||
2833 | /* move slabp to correct slabp list: */ | 2838 | fixup_slab_list(cachep, n, page, &list); |
2834 | list_del(&page->lru); | ||
2835 | if (page->active == cachep->num) | ||
2836 | list_add(&page->lru, &n->slabs_full); | ||
2837 | else | ||
2838 | list_add(&page->lru, &n->slabs_partial); | ||
2839 | } | 2839 | } |
2840 | 2840 | ||
2841 | must_grow: | 2841 | must_grow: |
2842 | n->free_objects -= ac->avail; | 2842 | n->free_objects -= ac->avail; |
2843 | alloc_done: | 2843 | alloc_done: |
2844 | spin_unlock(&n->list_lock); | 2844 | spin_unlock(&n->list_lock); |
2845 | fixup_objfreelist_debug(cachep, &list); | ||
2845 | 2846 | ||
2846 | if (unlikely(!ac->avail)) { | 2847 | if (unlikely(!ac->avail)) { |
2847 | int x; | 2848 | int x; |
2848 | force_grow: | 2849 | |
2850 | /* Check if we can use obj in pfmemalloc slab */ | ||
2851 | if (sk_memalloc_socks()) { | ||
2852 | void *obj = cache_alloc_pfmemalloc(cachep, n, flags); | ||
2853 | |||
2854 | if (obj) | ||
2855 | return obj; | ||
2856 | } | ||
2857 | |||
2849 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); | 2858 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); |
2850 | 2859 | ||
2851 | /* cache_grow can reenable interrupts, then ac could change. */ | 2860 | /* cache_grow can reenable interrupts, then ac could change. */ |
@@ -2853,7 +2862,7 @@ force_grow: | |||
2853 | node = numa_mem_id(); | 2862 | node = numa_mem_id(); |
2854 | 2863 | ||
2855 | /* no objects in sight? abort */ | 2864 | /* no objects in sight? abort */ |
2856 | if (!x && (ac->avail == 0 || force_refill)) | 2865 | if (!x && ac->avail == 0) |
2857 | return NULL; | 2866 | return NULL; |
2858 | 2867 | ||
2859 | if (!ac->avail) /* objects refilled by interrupt? */ | 2868 | if (!ac->avail) /* objects refilled by interrupt? */ |
@@ -2861,7 +2870,7 @@ force_grow: | |||
2861 | } | 2870 | } |
2862 | ac->touched = 1; | 2871 | ac->touched = 1; |
2863 | 2872 | ||
2864 | return ac_get_obj(cachep, ac, flags, force_refill); | 2873 | return ac->entry[--ac->avail]; |
2865 | } | 2874 | } |
2866 | 2875 | ||
2867 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | 2876 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
@@ -2877,20 +2886,11 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | |||
2877 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 2886 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
2878 | gfp_t flags, void *objp, unsigned long caller) | 2887 | gfp_t flags, void *objp, unsigned long caller) |
2879 | { | 2888 | { |
2880 | struct page *page; | ||
2881 | |||
2882 | if (!objp) | 2889 | if (!objp) |
2883 | return objp; | 2890 | return objp; |
2884 | if (cachep->flags & SLAB_POISON) { | 2891 | if (cachep->flags & SLAB_POISON) { |
2885 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
2886 | if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) | ||
2887 | kernel_map_pages(virt_to_page(objp), | ||
2888 | cachep->size / PAGE_SIZE, 1); | ||
2889 | else | ||
2890 | check_poison_obj(cachep, objp); | ||
2891 | #else | ||
2892 | check_poison_obj(cachep, objp); | 2892 | check_poison_obj(cachep, objp); |
2893 | #endif | 2893 | slab_kernel_map(cachep, objp, 1, 0); |
2894 | poison_obj(cachep, objp, POISON_INUSE); | 2894 | poison_obj(cachep, objp, POISON_INUSE); |
2895 | } | 2895 | } |
2896 | if (cachep->flags & SLAB_STORE_USER) | 2896 | if (cachep->flags & SLAB_STORE_USER) |
@@ -2910,8 +2910,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
2910 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2910 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
2911 | } | 2911 | } |
2912 | 2912 | ||
2913 | page = virt_to_head_page(objp); | ||
2914 | set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); | ||
2915 | objp += obj_offset(cachep); | 2913 | objp += obj_offset(cachep); |
2916 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 2914 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
2917 | cachep->ctor(objp); | 2915 | cachep->ctor(objp); |
@@ -2926,40 +2924,24 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
2926 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) | 2924 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) |
2927 | #endif | 2925 | #endif |
2928 | 2926 | ||
2929 | static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) | ||
2930 | { | ||
2931 | if (unlikely(cachep == kmem_cache)) | ||
2932 | return false; | ||
2933 | |||
2934 | return should_failslab(cachep->object_size, flags, cachep->flags); | ||
2935 | } | ||
2936 | |||
2937 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 2927 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
2938 | { | 2928 | { |
2939 | void *objp; | 2929 | void *objp; |
2940 | struct array_cache *ac; | 2930 | struct array_cache *ac; |
2941 | bool force_refill = false; | ||
2942 | 2931 | ||
2943 | check_irq_off(); | 2932 | check_irq_off(); |
2944 | 2933 | ||
2945 | ac = cpu_cache_get(cachep); | 2934 | ac = cpu_cache_get(cachep); |
2946 | if (likely(ac->avail)) { | 2935 | if (likely(ac->avail)) { |
2947 | ac->touched = 1; | 2936 | ac->touched = 1; |
2948 | objp = ac_get_obj(cachep, ac, flags, false); | 2937 | objp = ac->entry[--ac->avail]; |
2949 | 2938 | ||
2950 | /* | 2939 | STATS_INC_ALLOCHIT(cachep); |
2951 | * Allow for the possibility all avail objects are not allowed | 2940 | goto out; |
2952 | * by the current flags | ||
2953 | */ | ||
2954 | if (objp) { | ||
2955 | STATS_INC_ALLOCHIT(cachep); | ||
2956 | goto out; | ||
2957 | } | ||
2958 | force_refill = true; | ||
2959 | } | 2941 | } |
2960 | 2942 | ||
2961 | STATS_INC_ALLOCMISS(cachep); | 2943 | STATS_INC_ALLOCMISS(cachep); |
2962 | objp = cache_alloc_refill(cachep, flags, force_refill); | 2944 | objp = cache_alloc_refill(cachep, flags); |
2963 | /* | 2945 | /* |
2964 | * the 'ac' may be updated by cache_alloc_refill(), | 2946 | * the 'ac' may be updated by cache_alloc_refill(), |
2965 | * and kmemleak_erase() requires its correct value. | 2947 | * and kmemleak_erase() requires its correct value. |
@@ -3097,6 +3079,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
3097 | struct page *page; | 3079 | struct page *page; |
3098 | struct kmem_cache_node *n; | 3080 | struct kmem_cache_node *n; |
3099 | void *obj; | 3081 | void *obj; |
3082 | void *list = NULL; | ||
3100 | int x; | 3083 | int x; |
3101 | 3084 | ||
3102 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); | 3085 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); |
@@ -3106,7 +3089,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
3106 | retry: | 3089 | retry: |
3107 | check_irq_off(); | 3090 | check_irq_off(); |
3108 | spin_lock(&n->list_lock); | 3091 | spin_lock(&n->list_lock); |
3109 | page = get_first_slab(n); | 3092 | page = get_first_slab(n, false); |
3110 | if (!page) | 3093 | if (!page) |
3111 | goto must_grow; | 3094 | goto must_grow; |
3112 | 3095 | ||
@@ -3118,17 +3101,13 @@ retry: | |||
3118 | 3101 | ||
3119 | BUG_ON(page->active == cachep->num); | 3102 | BUG_ON(page->active == cachep->num); |
3120 | 3103 | ||
3121 | obj = slab_get_obj(cachep, page, nodeid); | 3104 | obj = slab_get_obj(cachep, page); |
3122 | n->free_objects--; | 3105 | n->free_objects--; |
3123 | /* move slabp to correct slabp list: */ | ||
3124 | list_del(&page->lru); | ||
3125 | 3106 | ||
3126 | if (page->active == cachep->num) | 3107 | fixup_slab_list(cachep, n, page, &list); |
3127 | list_add(&page->lru, &n->slabs_full); | ||
3128 | else | ||
3129 | list_add(&page->lru, &n->slabs_partial); | ||
3130 | 3108 | ||
3131 | spin_unlock(&n->list_lock); | 3109 | spin_unlock(&n->list_lock); |
3110 | fixup_objfreelist_debug(cachep, &list); | ||
3132 | goto done; | 3111 | goto done; |
3133 | 3112 | ||
3134 | must_grow: | 3113 | must_grow: |
@@ -3152,14 +3131,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3152 | int slab_node = numa_mem_id(); | 3131 | int slab_node = numa_mem_id(); |
3153 | 3132 | ||
3154 | flags &= gfp_allowed_mask; | 3133 | flags &= gfp_allowed_mask; |
3155 | 3134 | cachep = slab_pre_alloc_hook(cachep, flags); | |
3156 | lockdep_trace_alloc(flags); | 3135 | if (unlikely(!cachep)) |
3157 | |||
3158 | if (slab_should_failslab(cachep, flags)) | ||
3159 | return NULL; | 3136 | return NULL; |
3160 | 3137 | ||
3161 | cachep = memcg_kmem_get_cache(cachep, flags); | ||
3162 | |||
3163 | cache_alloc_debugcheck_before(cachep, flags); | 3138 | cache_alloc_debugcheck_before(cachep, flags); |
3164 | local_irq_save(save_flags); | 3139 | local_irq_save(save_flags); |
3165 | 3140 | ||
@@ -3188,16 +3163,11 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3188 | out: | 3163 | out: |
3189 | local_irq_restore(save_flags); | 3164 | local_irq_restore(save_flags); |
3190 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3165 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3191 | kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, | ||
3192 | flags); | ||
3193 | 3166 | ||
3194 | if (likely(ptr)) { | 3167 | if (unlikely(flags & __GFP_ZERO) && ptr) |
3195 | kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); | 3168 | memset(ptr, 0, cachep->object_size); |
3196 | if (unlikely(flags & __GFP_ZERO)) | ||
3197 | memset(ptr, 0, cachep->object_size); | ||
3198 | } | ||
3199 | 3169 | ||
3200 | memcg_kmem_put_cache(cachep); | 3170 | slab_post_alloc_hook(cachep, flags, 1, &ptr); |
3201 | return ptr; | 3171 | return ptr; |
3202 | } | 3172 | } |
3203 | 3173 | ||
@@ -3240,30 +3210,21 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) | |||
3240 | void *objp; | 3210 | void *objp; |
3241 | 3211 | ||
3242 | flags &= gfp_allowed_mask; | 3212 | flags &= gfp_allowed_mask; |
3243 | 3213 | cachep = slab_pre_alloc_hook(cachep, flags); | |
3244 | lockdep_trace_alloc(flags); | 3214 | if (unlikely(!cachep)) |
3245 | |||
3246 | if (slab_should_failslab(cachep, flags)) | ||
3247 | return NULL; | 3215 | return NULL; |
3248 | 3216 | ||
3249 | cachep = memcg_kmem_get_cache(cachep, flags); | ||
3250 | |||
3251 | cache_alloc_debugcheck_before(cachep, flags); | 3217 | cache_alloc_debugcheck_before(cachep, flags); |
3252 | local_irq_save(save_flags); | 3218 | local_irq_save(save_flags); |
3253 | objp = __do_cache_alloc(cachep, flags); | 3219 | objp = __do_cache_alloc(cachep, flags); |
3254 | local_irq_restore(save_flags); | 3220 | local_irq_restore(save_flags); |
3255 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3221 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
3256 | kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, | ||
3257 | flags); | ||
3258 | prefetchw(objp); | 3222 | prefetchw(objp); |
3259 | 3223 | ||
3260 | if (likely(objp)) { | 3224 | if (unlikely(flags & __GFP_ZERO) && objp) |
3261 | kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); | 3225 | memset(objp, 0, cachep->object_size); |
3262 | if (unlikely(flags & __GFP_ZERO)) | ||
3263 | memset(objp, 0, cachep->object_size); | ||
3264 | } | ||
3265 | 3226 | ||
3266 | memcg_kmem_put_cache(cachep); | 3227 | slab_post_alloc_hook(cachep, flags, 1, &objp); |
3267 | return objp; | 3228 | return objp; |
3268 | } | 3229 | } |
3269 | 3230 | ||
@@ -3281,13 +3242,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, | |||
3281 | void *objp; | 3242 | void *objp; |
3282 | struct page *page; | 3243 | struct page *page; |
3283 | 3244 | ||
3284 | clear_obj_pfmemalloc(&objpp[i]); | ||
3285 | objp = objpp[i]; | 3245 | objp = objpp[i]; |
3286 | 3246 | ||
3287 | page = virt_to_head_page(objp); | 3247 | page = virt_to_head_page(objp); |
3288 | list_del(&page->lru); | 3248 | list_del(&page->lru); |
3289 | check_spinlock_acquired_node(cachep, node); | 3249 | check_spinlock_acquired_node(cachep, node); |
3290 | slab_put_obj(cachep, page, objp, node); | 3250 | slab_put_obj(cachep, page, objp); |
3291 | STATS_DEC_ACTIVE(cachep); | 3251 | STATS_DEC_ACTIVE(cachep); |
3292 | n->free_objects++; | 3252 | n->free_objects++; |
3293 | 3253 | ||
@@ -3317,9 +3277,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
3317 | LIST_HEAD(list); | 3277 | LIST_HEAD(list); |
3318 | 3278 | ||
3319 | batchcount = ac->batchcount; | 3279 | batchcount = ac->batchcount; |
3320 | #if DEBUG | 3280 | |
3321 | BUG_ON(!batchcount || batchcount > ac->avail); | ||
3322 | #endif | ||
3323 | check_irq_off(); | 3281 | check_irq_off(); |
3324 | n = get_node(cachep, node); | 3282 | n = get_node(cachep, node); |
3325 | spin_lock(&n->list_lock); | 3283 | spin_lock(&n->list_lock); |
@@ -3389,7 +3347,16 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, | |||
3389 | cache_flusharray(cachep, ac); | 3347 | cache_flusharray(cachep, ac); |
3390 | } | 3348 | } |
3391 | 3349 | ||
3392 | ac_put_obj(cachep, ac, objp); | 3350 | if (sk_memalloc_socks()) { |
3351 | struct page *page = virt_to_head_page(objp); | ||
3352 | |||
3353 | if (unlikely(PageSlabPfmemalloc(page))) { | ||
3354 | cache_free_pfmemalloc(cachep, page, objp); | ||
3355 | return; | ||
3356 | } | ||
3357 | } | ||
3358 | |||
3359 | ac->entry[ac->avail++] = objp; | ||
3393 | } | 3360 | } |
3394 | 3361 | ||
3395 | /** | 3362 | /** |
@@ -3411,16 +3378,53 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3411 | } | 3378 | } |
3412 | EXPORT_SYMBOL(kmem_cache_alloc); | 3379 | EXPORT_SYMBOL(kmem_cache_alloc); |
3413 | 3380 | ||
3414 | void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) | 3381 | static __always_inline void |
3382 | cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags, | ||
3383 | size_t size, void **p, unsigned long caller) | ||
3415 | { | 3384 | { |
3416 | __kmem_cache_free_bulk(s, size, p); | 3385 | size_t i; |
3386 | |||
3387 | for (i = 0; i < size; i++) | ||
3388 | p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller); | ||
3417 | } | 3389 | } |
3418 | EXPORT_SYMBOL(kmem_cache_free_bulk); | ||
3419 | 3390 | ||
3420 | int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | 3391 | int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
3421 | void **p) | 3392 | void **p) |
3422 | { | 3393 | { |
3423 | return __kmem_cache_alloc_bulk(s, flags, size, p); | 3394 | size_t i; |
3395 | |||
3396 | s = slab_pre_alloc_hook(s, flags); | ||
3397 | if (!s) | ||
3398 | return 0; | ||
3399 | |||
3400 | cache_alloc_debugcheck_before(s, flags); | ||
3401 | |||
3402 | local_irq_disable(); | ||
3403 | for (i = 0; i < size; i++) { | ||
3404 | void *objp = __do_cache_alloc(s, flags); | ||
3405 | |||
3406 | if (unlikely(!objp)) | ||
3407 | goto error; | ||
3408 | p[i] = objp; | ||
3409 | } | ||
3410 | local_irq_enable(); | ||
3411 | |||
3412 | cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); | ||
3413 | |||
3414 | /* Clear memory outside IRQ disabled section */ | ||
3415 | if (unlikely(flags & __GFP_ZERO)) | ||
3416 | for (i = 0; i < size; i++) | ||
3417 | memset(p[i], 0, s->object_size); | ||
3418 | |||
3419 | slab_post_alloc_hook(s, flags, size, p); | ||
3420 | /* FIXME: Trace call missing. Christoph would like a bulk variant */ | ||
3421 | return size; | ||
3422 | error: | ||
3423 | local_irq_enable(); | ||
3424 | cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); | ||
3425 | slab_post_alloc_hook(s, flags, i, p); | ||
3426 | __kmem_cache_free_bulk(s, i, p); | ||
3427 | return 0; | ||
3424 | } | 3428 | } |
3425 | EXPORT_SYMBOL(kmem_cache_alloc_bulk); | 3429 | EXPORT_SYMBOL(kmem_cache_alloc_bulk); |
3426 | 3430 | ||
@@ -3567,6 +3571,32 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) | |||
3567 | } | 3571 | } |
3568 | EXPORT_SYMBOL(kmem_cache_free); | 3572 | EXPORT_SYMBOL(kmem_cache_free); |
3569 | 3573 | ||
3574 | void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) | ||
3575 | { | ||
3576 | struct kmem_cache *s; | ||
3577 | size_t i; | ||
3578 | |||
3579 | local_irq_disable(); | ||
3580 | for (i = 0; i < size; i++) { | ||
3581 | void *objp = p[i]; | ||
3582 | |||
3583 | if (!orig_s) /* called via kfree_bulk */ | ||
3584 | s = virt_to_cache(objp); | ||
3585 | else | ||
3586 | s = cache_from_obj(orig_s, objp); | ||
3587 | |||
3588 | debug_check_no_locks_freed(objp, s->object_size); | ||
3589 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | ||
3590 | debug_check_no_obj_freed(objp, s->object_size); | ||
3591 | |||
3592 | __cache_free(s, objp, _RET_IP_); | ||
3593 | } | ||
3594 | local_irq_enable(); | ||
3595 | |||
3596 | /* FIXME: add tracing */ | ||
3597 | } | ||
3598 | EXPORT_SYMBOL(kmem_cache_free_bulk); | ||
3599 | |||
3570 | /** | 3600 | /** |
3571 | * kfree - free previously allocated memory | 3601 | * kfree - free previously allocated memory |
3572 | * @objp: pointer returned by kmalloc. | 3602 | * @objp: pointer returned by kmalloc. |
@@ -4102,15 +4132,34 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, | |||
4102 | struct page *page) | 4132 | struct page *page) |
4103 | { | 4133 | { |
4104 | void *p; | 4134 | void *p; |
4105 | int i; | 4135 | int i, j; |
4136 | unsigned long v; | ||
4106 | 4137 | ||
4107 | if (n[0] == n[1]) | 4138 | if (n[0] == n[1]) |
4108 | return; | 4139 | return; |
4109 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { | 4140 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
4110 | if (get_obj_status(page, i) != OBJECT_ACTIVE) | 4141 | bool active = true; |
4142 | |||
4143 | for (j = page->active; j < c->num; j++) { | ||
4144 | if (get_free_obj(page, j) == i) { | ||
4145 | active = false; | ||
4146 | break; | ||
4147 | } | ||
4148 | } | ||
4149 | |||
4150 | if (!active) | ||
4151 | continue; | ||
4152 | |||
4153 | /* | ||
4154 | * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table | ||
4155 | * mapping is established when actual object allocation and | ||
4156 | * we could mistakenly access the unmapped object in the cpu | ||
4157 | * cache. | ||
4158 | */ | ||
4159 | if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v))) | ||
4111 | continue; | 4160 | continue; |
4112 | 4161 | ||
4113 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4162 | if (!add_caller(n, v)) |
4114 | return; | 4163 | return; |
4115 | } | 4164 | } |
4116 | } | 4165 | } |
@@ -4146,21 +4195,31 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4146 | if (!(cachep->flags & SLAB_RED_ZONE)) | 4195 | if (!(cachep->flags & SLAB_RED_ZONE)) |
4147 | return 0; | 4196 | return 0; |
4148 | 4197 | ||
4149 | /* OK, we can do it */ | 4198 | /* |
4199 | * Set store_user_clean and start to grab stored user information | ||
4200 | * for all objects on this cache. If some alloc/free requests comes | ||
4201 | * during the processing, information would be wrong so restart | ||
4202 | * whole processing. | ||
4203 | */ | ||
4204 | do { | ||
4205 | set_store_user_clean(cachep); | ||
4206 | drain_cpu_caches(cachep); | ||
4150 | 4207 | ||
4151 | x[1] = 0; | 4208 | x[1] = 0; |
4152 | 4209 | ||
4153 | for_each_kmem_cache_node(cachep, node, n) { | 4210 | for_each_kmem_cache_node(cachep, node, n) { |
4154 | 4211 | ||
4155 | check_irq_on(); | 4212 | check_irq_on(); |
4156 | spin_lock_irq(&n->list_lock); | 4213 | spin_lock_irq(&n->list_lock); |
4214 | |||
4215 | list_for_each_entry(page, &n->slabs_full, lru) | ||
4216 | handle_slab(x, cachep, page); | ||
4217 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
4218 | handle_slab(x, cachep, page); | ||
4219 | spin_unlock_irq(&n->list_lock); | ||
4220 | } | ||
4221 | } while (!is_store_user_clean(cachep)); | ||
4157 | 4222 | ||
4158 | list_for_each_entry(page, &n->slabs_full, lru) | ||
4159 | handle_slab(x, cachep, page); | ||
4160 | list_for_each_entry(page, &n->slabs_partial, lru) | ||
4161 | handle_slab(x, cachep, page); | ||
4162 | spin_unlock_irq(&n->list_lock); | ||
4163 | } | ||
4164 | name = cachep->name; | 4223 | name = cachep->name; |
4165 | if (x[0] == x[1]) { | 4224 | if (x[0] == x[1]) { |
4166 | /* Increase the buffer size */ | 4225 | /* Increase the buffer size */ |
@@ -38,6 +38,10 @@ struct kmem_cache { | |||
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #include <linux/memcontrol.h> | 40 | #include <linux/memcontrol.h> |
41 | #include <linux/fault-inject.h> | ||
42 | #include <linux/kmemcheck.h> | ||
43 | #include <linux/kasan.h> | ||
44 | #include <linux/kmemleak.h> | ||
41 | 45 | ||
42 | /* | 46 | /* |
43 | * State of the slab allocator. | 47 | * State of the slab allocator. |
@@ -121,7 +125,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
121 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) | 125 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) |
122 | #elif defined(CONFIG_SLUB_DEBUG) | 126 | #elif defined(CONFIG_SLUB_DEBUG) |
123 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | 127 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
124 | SLAB_TRACE | SLAB_DEBUG_FREE) | 128 | SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) |
125 | #else | 129 | #else |
126 | #define SLAB_DEBUG_FLAGS (0) | 130 | #define SLAB_DEBUG_FLAGS (0) |
127 | #endif | 131 | #endif |
@@ -168,7 +172,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
168 | /* | 172 | /* |
169 | * Generic implementation of bulk operations | 173 | * Generic implementation of bulk operations |
170 | * These are useful for situations in which the allocator cannot | 174 | * These are useful for situations in which the allocator cannot |
171 | * perform optimizations. In that case segments of the objecct listed | 175 | * perform optimizations. In that case segments of the object listed |
172 | * may be allocated or freed using these operations. | 176 | * may be allocated or freed using these operations. |
173 | */ | 177 | */ |
174 | void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); | 178 | void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); |
@@ -307,7 +311,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | |||
307 | * to not do even the assignment. In that case, slab_equal_or_root | 311 | * to not do even the assignment. In that case, slab_equal_or_root |
308 | * will also be a constant. | 312 | * will also be a constant. |
309 | */ | 313 | */ |
310 | if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE)) | 314 | if (!memcg_kmem_enabled() && |
315 | !unlikely(s->flags & SLAB_CONSISTENCY_CHECKS)) | ||
311 | return s; | 316 | return s; |
312 | 317 | ||
313 | page = virt_to_head_page(x); | 318 | page = virt_to_head_page(x); |
@@ -321,6 +326,64 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | |||
321 | return s; | 326 | return s; |
322 | } | 327 | } |
323 | 328 | ||
329 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
330 | { | ||
331 | #ifndef CONFIG_SLUB | ||
332 | return s->object_size; | ||
333 | |||
334 | #else /* CONFIG_SLUB */ | ||
335 | # ifdef CONFIG_SLUB_DEBUG | ||
336 | /* | ||
337 | * Debugging requires use of the padding between object | ||
338 | * and whatever may come after it. | ||
339 | */ | ||
340 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
341 | return s->object_size; | ||
342 | # endif | ||
343 | /* | ||
344 | * If we have the need to store the freelist pointer | ||
345 | * back there or track user information then we can | ||
346 | * only use the space before that information. | ||
347 | */ | ||
348 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
349 | return s->inuse; | ||
350 | /* | ||
351 | * Else we can use all the padding etc for the allocation | ||
352 | */ | ||
353 | return s->size; | ||
354 | #endif | ||
355 | } | ||
356 | |||
357 | static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | ||
358 | gfp_t flags) | ||
359 | { | ||
360 | flags &= gfp_allowed_mask; | ||
361 | lockdep_trace_alloc(flags); | ||
362 | might_sleep_if(gfpflags_allow_blocking(flags)); | ||
363 | |||
364 | if (should_failslab(s, flags)) | ||
365 | return NULL; | ||
366 | |||
367 | return memcg_kmem_get_cache(s, flags); | ||
368 | } | ||
369 | |||
370 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | ||
371 | size_t size, void **p) | ||
372 | { | ||
373 | size_t i; | ||
374 | |||
375 | flags &= gfp_allowed_mask; | ||
376 | for (i = 0; i < size; i++) { | ||
377 | void *object = p[i]; | ||
378 | |||
379 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
380 | kmemleak_alloc_recursive(object, s->object_size, 1, | ||
381 | s->flags, flags); | ||
382 | kasan_slab_alloc(s, object); | ||
383 | } | ||
384 | memcg_kmem_put_cache(s); | ||
385 | } | ||
386 | |||
324 | #ifndef CONFIG_SLOB | 387 | #ifndef CONFIG_SLOB |
325 | /* | 388 | /* |
326 | * The slab lists for all objects. | 389 | * The slab lists for all objects. |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 065b7bdabdc3..6afb2263a5c5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -109,8 +109,12 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) | |||
109 | { | 109 | { |
110 | size_t i; | 110 | size_t i; |
111 | 111 | ||
112 | for (i = 0; i < nr; i++) | 112 | for (i = 0; i < nr; i++) { |
113 | kmem_cache_free(s, p[i]); | 113 | if (s) |
114 | kmem_cache_free(s, p[i]); | ||
115 | else | ||
116 | kfree(p[i]); | ||
117 | } | ||
114 | } | 118 | } |
115 | 119 | ||
116 | int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, | 120 | int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, |
@@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
124 | #endif | 124 | #endif |
125 | } | 125 | } |
126 | 126 | ||
127 | static inline void *fixup_red_left(struct kmem_cache *s, void *p) | ||
128 | { | ||
129 | if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) | ||
130 | p += s->red_left_pad; | ||
131 | |||
132 | return p; | ||
133 | } | ||
134 | |||
127 | static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | 135 | static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) |
128 | { | 136 | { |
129 | #ifdef CONFIG_SLUB_CPU_PARTIAL | 137 | #ifdef CONFIG_SLUB_CPU_PARTIAL |
@@ -160,10 +168,18 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | |||
160 | */ | 168 | */ |
161 | #define MAX_PARTIAL 10 | 169 | #define MAX_PARTIAL 10 |
162 | 170 | ||
163 | #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ | 171 | #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \ |
164 | SLAB_POISON | SLAB_STORE_USER) | 172 | SLAB_POISON | SLAB_STORE_USER) |
165 | 173 | ||
166 | /* | 174 | /* |
175 | * These debug flags cannot use CMPXCHG because there might be consistency | ||
176 | * issues when checking or reading debug information | ||
177 | */ | ||
178 | #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \ | ||
179 | SLAB_TRACE) | ||
180 | |||
181 | |||
182 | /* | ||
167 | * Debugging flags that require metadata to be stored in the slab. These get | 183 | * Debugging flags that require metadata to be stored in the slab. These get |
168 | * disabled when slub_debug=O is used and a cache's min order increases with | 184 | * disabled when slub_debug=O is used and a cache's min order increases with |
169 | * metadata. | 185 | * metadata. |
@@ -224,24 +240,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) | |||
224 | * Core slab cache functions | 240 | * Core slab cache functions |
225 | *******************************************************************/ | 241 | *******************************************************************/ |
226 | 242 | ||
227 | /* Verify that a pointer has an address that is valid within a slab page */ | ||
228 | static inline int check_valid_pointer(struct kmem_cache *s, | ||
229 | struct page *page, const void *object) | ||
230 | { | ||
231 | void *base; | ||
232 | |||
233 | if (!object) | ||
234 | return 1; | ||
235 | |||
236 | base = page_address(page); | ||
237 | if (object < base || object >= base + page->objects * s->size || | ||
238 | (object - base) % s->size) { | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | return 1; | ||
243 | } | ||
244 | |||
245 | static inline void *get_freepointer(struct kmem_cache *s, void *object) | 243 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
246 | { | 244 | { |
247 | return *(void **)(object + s->offset); | 245 | return *(void **)(object + s->offset); |
@@ -271,12 +269,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
271 | 269 | ||
272 | /* Loop over all objects in a slab */ | 270 | /* Loop over all objects in a slab */ |
273 | #define for_each_object(__p, __s, __addr, __objects) \ | 271 | #define for_each_object(__p, __s, __addr, __objects) \ |
274 | for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ | 272 | for (__p = fixup_red_left(__s, __addr); \ |
275 | __p += (__s)->size) | 273 | __p < (__addr) + (__objects) * (__s)->size; \ |
274 | __p += (__s)->size) | ||
276 | 275 | ||
277 | #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ | 276 | #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ |
278 | for (__p = (__addr), __idx = 1; __idx <= __objects;\ | 277 | for (__p = fixup_red_left(__s, __addr), __idx = 1; \ |
279 | __p += (__s)->size, __idx++) | 278 | __idx <= __objects; \ |
279 | __p += (__s)->size, __idx++) | ||
280 | 280 | ||
281 | /* Determine object index from a given position */ | 281 | /* Determine object index from a given position */ |
282 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 282 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
@@ -284,30 +284,6 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | |||
284 | return (p - addr) / s->size; | 284 | return (p - addr) / s->size; |
285 | } | 285 | } |
286 | 286 | ||
287 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
288 | { | ||
289 | #ifdef CONFIG_SLUB_DEBUG | ||
290 | /* | ||
291 | * Debugging requires use of the padding between object | ||
292 | * and whatever may come after it. | ||
293 | */ | ||
294 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
295 | return s->object_size; | ||
296 | |||
297 | #endif | ||
298 | /* | ||
299 | * If we have the need to store the freelist pointer | ||
300 | * back there or track user information then we can | ||
301 | * only use the space before that information. | ||
302 | */ | ||
303 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
304 | return s->inuse; | ||
305 | /* | ||
306 | * Else we can use all the padding etc for the allocation | ||
307 | */ | ||
308 | return s->size; | ||
309 | } | ||
310 | |||
311 | static inline int order_objects(int order, unsigned long size, int reserved) | 287 | static inline int order_objects(int order, unsigned long size, int reserved) |
312 | { | 288 | { |
313 | return ((PAGE_SIZE << order) - reserved) / size; | 289 | return ((PAGE_SIZE << order) - reserved) / size; |
@@ -458,6 +434,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) | |||
458 | set_bit(slab_index(p, s, addr), map); | 434 | set_bit(slab_index(p, s, addr), map); |
459 | } | 435 | } |
460 | 436 | ||
437 | static inline int size_from_object(struct kmem_cache *s) | ||
438 | { | ||
439 | if (s->flags & SLAB_RED_ZONE) | ||
440 | return s->size - s->red_left_pad; | ||
441 | |||
442 | return s->size; | ||
443 | } | ||
444 | |||
445 | static inline void *restore_red_left(struct kmem_cache *s, void *p) | ||
446 | { | ||
447 | if (s->flags & SLAB_RED_ZONE) | ||
448 | p -= s->red_left_pad; | ||
449 | |||
450 | return p; | ||
451 | } | ||
452 | |||
461 | /* | 453 | /* |
462 | * Debug settings: | 454 | * Debug settings: |
463 | */ | 455 | */ |
@@ -491,6 +483,26 @@ static inline void metadata_access_disable(void) | |||
491 | /* | 483 | /* |
492 | * Object debugging | 484 | * Object debugging |
493 | */ | 485 | */ |
486 | |||
487 | /* Verify that a pointer has an address that is valid within a slab page */ | ||
488 | static inline int check_valid_pointer(struct kmem_cache *s, | ||
489 | struct page *page, void *object) | ||
490 | { | ||
491 | void *base; | ||
492 | |||
493 | if (!object) | ||
494 | return 1; | ||
495 | |||
496 | base = page_address(page); | ||
497 | object = restore_red_left(s, object); | ||
498 | if (object < base || object >= base + page->objects * s->size || | ||
499 | (object - base) % s->size) { | ||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | return 1; | ||
504 | } | ||
505 | |||
494 | static void print_section(char *text, u8 *addr, unsigned int length) | 506 | static void print_section(char *text, u8 *addr, unsigned int length) |
495 | { | 507 | { |
496 | metadata_access_enable(); | 508 | metadata_access_enable(); |
@@ -630,7 +642,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
630 | pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", | 642 | pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", |
631 | p, p - addr, get_freepointer(s, p)); | 643 | p, p - addr, get_freepointer(s, p)); |
632 | 644 | ||
633 | if (p > addr + 16) | 645 | if (s->flags & SLAB_RED_ZONE) |
646 | print_section("Redzone ", p - s->red_left_pad, s->red_left_pad); | ||
647 | else if (p > addr + 16) | ||
634 | print_section("Bytes b4 ", p - 16, 16); | 648 | print_section("Bytes b4 ", p - 16, 16); |
635 | 649 | ||
636 | print_section("Object ", p, min_t(unsigned long, s->object_size, | 650 | print_section("Object ", p, min_t(unsigned long, s->object_size, |
@@ -647,9 +661,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
647 | if (s->flags & SLAB_STORE_USER) | 661 | if (s->flags & SLAB_STORE_USER) |
648 | off += 2 * sizeof(struct track); | 662 | off += 2 * sizeof(struct track); |
649 | 663 | ||
650 | if (off != s->size) | 664 | if (off != size_from_object(s)) |
651 | /* Beginning of the filler is the free pointer */ | 665 | /* Beginning of the filler is the free pointer */ |
652 | print_section("Padding ", p + off, s->size - off); | 666 | print_section("Padding ", p + off, size_from_object(s) - off); |
653 | 667 | ||
654 | dump_stack(); | 668 | dump_stack(); |
655 | } | 669 | } |
@@ -679,6 +693,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) | |||
679 | { | 693 | { |
680 | u8 *p = object; | 694 | u8 *p = object; |
681 | 695 | ||
696 | if (s->flags & SLAB_RED_ZONE) | ||
697 | memset(p - s->red_left_pad, val, s->red_left_pad); | ||
698 | |||
682 | if (s->flags & __OBJECT_POISON) { | 699 | if (s->flags & __OBJECT_POISON) { |
683 | memset(p, POISON_FREE, s->object_size - 1); | 700 | memset(p, POISON_FREE, s->object_size - 1); |
684 | p[s->object_size - 1] = POISON_END; | 701 | p[s->object_size - 1] = POISON_END; |
@@ -771,11 +788,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) | |||
771 | /* We also have user information there */ | 788 | /* We also have user information there */ |
772 | off += 2 * sizeof(struct track); | 789 | off += 2 * sizeof(struct track); |
773 | 790 | ||
774 | if (s->size == off) | 791 | if (size_from_object(s) == off) |
775 | return 1; | 792 | return 1; |
776 | 793 | ||
777 | return check_bytes_and_report(s, page, p, "Object padding", | 794 | return check_bytes_and_report(s, page, p, "Object padding", |
778 | p + off, POISON_INUSE, s->size - off); | 795 | p + off, POISON_INUSE, size_from_object(s) - off); |
779 | } | 796 | } |
780 | 797 | ||
781 | /* Check the pad bytes at the end of a slab page */ | 798 | /* Check the pad bytes at the end of a slab page */ |
@@ -820,6 +837,10 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
820 | 837 | ||
821 | if (s->flags & SLAB_RED_ZONE) { | 838 | if (s->flags & SLAB_RED_ZONE) { |
822 | if (!check_bytes_and_report(s, page, object, "Redzone", | 839 | if (!check_bytes_and_report(s, page, object, "Redzone", |
840 | object - s->red_left_pad, val, s->red_left_pad)) | ||
841 | return 0; | ||
842 | |||
843 | if (!check_bytes_and_report(s, page, object, "Redzone", | ||
823 | endobject, val, s->inuse - s->object_size)) | 844 | endobject, val, s->inuse - s->object_size)) |
824 | return 0; | 845 | return 0; |
825 | } else { | 846 | } else { |
@@ -1031,20 +1052,32 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, | |||
1031 | init_tracking(s, object); | 1052 | init_tracking(s, object); |
1032 | } | 1053 | } |
1033 | 1054 | ||
1034 | static noinline int alloc_debug_processing(struct kmem_cache *s, | 1055 | static inline int alloc_consistency_checks(struct kmem_cache *s, |
1035 | struct page *page, | 1056 | struct page *page, |
1036 | void *object, unsigned long addr) | 1057 | void *object, unsigned long addr) |
1037 | { | 1058 | { |
1038 | if (!check_slab(s, page)) | 1059 | if (!check_slab(s, page)) |
1039 | goto bad; | 1060 | return 0; |
1040 | 1061 | ||
1041 | if (!check_valid_pointer(s, page, object)) { | 1062 | if (!check_valid_pointer(s, page, object)) { |
1042 | object_err(s, page, object, "Freelist Pointer check fails"); | 1063 | object_err(s, page, object, "Freelist Pointer check fails"); |
1043 | goto bad; | 1064 | return 0; |
1044 | } | 1065 | } |
1045 | 1066 | ||
1046 | if (!check_object(s, page, object, SLUB_RED_INACTIVE)) | 1067 | if (!check_object(s, page, object, SLUB_RED_INACTIVE)) |
1047 | goto bad; | 1068 | return 0; |
1069 | |||
1070 | return 1; | ||
1071 | } | ||
1072 | |||
1073 | static noinline int alloc_debug_processing(struct kmem_cache *s, | ||
1074 | struct page *page, | ||
1075 | void *object, unsigned long addr) | ||
1076 | { | ||
1077 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
1078 | if (!alloc_consistency_checks(s, page, object, addr)) | ||
1079 | goto bad; | ||
1080 | } | ||
1048 | 1081 | ||
1049 | /* Success perform special debug activities for allocs */ | 1082 | /* Success perform special debug activities for allocs */ |
1050 | if (s->flags & SLAB_STORE_USER) | 1083 | if (s->flags & SLAB_STORE_USER) |
@@ -1067,37 +1100,21 @@ bad: | |||
1067 | return 0; | 1100 | return 0; |
1068 | } | 1101 | } |
1069 | 1102 | ||
1070 | /* Supports checking bulk free of a constructed freelist */ | 1103 | static inline int free_consistency_checks(struct kmem_cache *s, |
1071 | static noinline struct kmem_cache_node *free_debug_processing( | 1104 | struct page *page, void *object, unsigned long addr) |
1072 | struct kmem_cache *s, struct page *page, | ||
1073 | void *head, void *tail, int bulk_cnt, | ||
1074 | unsigned long addr, unsigned long *flags) | ||
1075 | { | 1105 | { |
1076 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
1077 | void *object = head; | ||
1078 | int cnt = 0; | ||
1079 | |||
1080 | spin_lock_irqsave(&n->list_lock, *flags); | ||
1081 | slab_lock(page); | ||
1082 | |||
1083 | if (!check_slab(s, page)) | ||
1084 | goto fail; | ||
1085 | |||
1086 | next_object: | ||
1087 | cnt++; | ||
1088 | |||
1089 | if (!check_valid_pointer(s, page, object)) { | 1106 | if (!check_valid_pointer(s, page, object)) { |
1090 | slab_err(s, page, "Invalid object pointer 0x%p", object); | 1107 | slab_err(s, page, "Invalid object pointer 0x%p", object); |
1091 | goto fail; | 1108 | return 0; |
1092 | } | 1109 | } |
1093 | 1110 | ||
1094 | if (on_freelist(s, page, object)) { | 1111 | if (on_freelist(s, page, object)) { |
1095 | object_err(s, page, object, "Object already free"); | 1112 | object_err(s, page, object, "Object already free"); |
1096 | goto fail; | 1113 | return 0; |
1097 | } | 1114 | } |
1098 | 1115 | ||
1099 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) | 1116 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) |
1100 | goto out; | 1117 | return 0; |
1101 | 1118 | ||
1102 | if (unlikely(s != page->slab_cache)) { | 1119 | if (unlikely(s != page->slab_cache)) { |
1103 | if (!PageSlab(page)) { | 1120 | if (!PageSlab(page)) { |
@@ -1110,7 +1127,37 @@ next_object: | |||
1110 | } else | 1127 | } else |
1111 | object_err(s, page, object, | 1128 | object_err(s, page, object, |
1112 | "page slab pointer corrupt."); | 1129 | "page slab pointer corrupt."); |
1113 | goto fail; | 1130 | return 0; |
1131 | } | ||
1132 | return 1; | ||
1133 | } | ||
1134 | |||
1135 | /* Supports checking bulk free of a constructed freelist */ | ||
1136 | static noinline int free_debug_processing( | ||
1137 | struct kmem_cache *s, struct page *page, | ||
1138 | void *head, void *tail, int bulk_cnt, | ||
1139 | unsigned long addr) | ||
1140 | { | ||
1141 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
1142 | void *object = head; | ||
1143 | int cnt = 0; | ||
1144 | unsigned long uninitialized_var(flags); | ||
1145 | int ret = 0; | ||
1146 | |||
1147 | spin_lock_irqsave(&n->list_lock, flags); | ||
1148 | slab_lock(page); | ||
1149 | |||
1150 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
1151 | if (!check_slab(s, page)) | ||
1152 | goto out; | ||
1153 | } | ||
1154 | |||
1155 | next_object: | ||
1156 | cnt++; | ||
1157 | |||
1158 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | ||
1159 | if (!free_consistency_checks(s, page, object, addr)) | ||
1160 | goto out; | ||
1114 | } | 1161 | } |
1115 | 1162 | ||
1116 | if (s->flags & SLAB_STORE_USER) | 1163 | if (s->flags & SLAB_STORE_USER) |
@@ -1124,23 +1171,18 @@ next_object: | |||
1124 | object = get_freepointer(s, object); | 1171 | object = get_freepointer(s, object); |
1125 | goto next_object; | 1172 | goto next_object; |
1126 | } | 1173 | } |
1174 | ret = 1; | ||
1175 | |||
1127 | out: | 1176 | out: |
1128 | if (cnt != bulk_cnt) | 1177 | if (cnt != bulk_cnt) |
1129 | slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", | 1178 | slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", |
1130 | bulk_cnt, cnt); | 1179 | bulk_cnt, cnt); |
1131 | 1180 | ||
1132 | slab_unlock(page); | 1181 | slab_unlock(page); |
1133 | /* | 1182 | spin_unlock_irqrestore(&n->list_lock, flags); |
1134 | * Keep node_lock to preserve integrity | 1183 | if (!ret) |
1135 | * until the object is actually freed | 1184 | slab_fix(s, "Object at 0x%p not freed", object); |
1136 | */ | 1185 | return ret; |
1137 | return n; | ||
1138 | |||
1139 | fail: | ||
1140 | slab_unlock(page); | ||
1141 | spin_unlock_irqrestore(&n->list_lock, *flags); | ||
1142 | slab_fix(s, "Object at 0x%p not freed", object); | ||
1143 | return NULL; | ||
1144 | } | 1186 | } |
1145 | 1187 | ||
1146 | static int __init setup_slub_debug(char *str) | 1188 | static int __init setup_slub_debug(char *str) |
@@ -1172,7 +1214,7 @@ static int __init setup_slub_debug(char *str) | |||
1172 | for (; *str && *str != ','; str++) { | 1214 | for (; *str && *str != ','; str++) { |
1173 | switch (tolower(*str)) { | 1215 | switch (tolower(*str)) { |
1174 | case 'f': | 1216 | case 'f': |
1175 | slub_debug |= SLAB_DEBUG_FREE; | 1217 | slub_debug |= SLAB_CONSISTENCY_CHECKS; |
1176 | break; | 1218 | break; |
1177 | case 'z': | 1219 | case 'z': |
1178 | slub_debug |= SLAB_RED_ZONE; | 1220 | slub_debug |= SLAB_RED_ZONE; |
@@ -1231,10 +1273,10 @@ static inline void setup_object_debug(struct kmem_cache *s, | |||
1231 | static inline int alloc_debug_processing(struct kmem_cache *s, | 1273 | static inline int alloc_debug_processing(struct kmem_cache *s, |
1232 | struct page *page, void *object, unsigned long addr) { return 0; } | 1274 | struct page *page, void *object, unsigned long addr) { return 0; } |
1233 | 1275 | ||
1234 | static inline struct kmem_cache_node *free_debug_processing( | 1276 | static inline int free_debug_processing( |
1235 | struct kmem_cache *s, struct page *page, | 1277 | struct kmem_cache *s, struct page *page, |
1236 | void *head, void *tail, int bulk_cnt, | 1278 | void *head, void *tail, int bulk_cnt, |
1237 | unsigned long addr, unsigned long *flags) { return NULL; } | 1279 | unsigned long addr) { return 0; } |
1238 | 1280 | ||
1239 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | 1281 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) |
1240 | { return 1; } | 1282 | { return 1; } |
@@ -1281,36 +1323,6 @@ static inline void kfree_hook(const void *x) | |||
1281 | kasan_kfree_large(x); | 1323 | kasan_kfree_large(x); |
1282 | } | 1324 | } |
1283 | 1325 | ||
1284 | static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | ||
1285 | gfp_t flags) | ||
1286 | { | ||
1287 | flags &= gfp_allowed_mask; | ||
1288 | lockdep_trace_alloc(flags); | ||
1289 | might_sleep_if(gfpflags_allow_blocking(flags)); | ||
1290 | |||
1291 | if (should_failslab(s->object_size, flags, s->flags)) | ||
1292 | return NULL; | ||
1293 | |||
1294 | return memcg_kmem_get_cache(s, flags); | ||
1295 | } | ||
1296 | |||
1297 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | ||
1298 | size_t size, void **p) | ||
1299 | { | ||
1300 | size_t i; | ||
1301 | |||
1302 | flags &= gfp_allowed_mask; | ||
1303 | for (i = 0; i < size; i++) { | ||
1304 | void *object = p[i]; | ||
1305 | |||
1306 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
1307 | kmemleak_alloc_recursive(object, s->object_size, 1, | ||
1308 | s->flags, flags); | ||
1309 | kasan_slab_alloc(s, object); | ||
1310 | } | ||
1311 | memcg_kmem_put_cache(s); | ||
1312 | } | ||
1313 | |||
1314 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 1326 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
1315 | { | 1327 | { |
1316 | kmemleak_free_recursive(x, s->flags); | 1328 | kmemleak_free_recursive(x, s->flags); |
@@ -1470,7 +1482,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1470 | set_freepointer(s, p, NULL); | 1482 | set_freepointer(s, p, NULL); |
1471 | } | 1483 | } |
1472 | 1484 | ||
1473 | page->freelist = start; | 1485 | page->freelist = fixup_red_left(s, start); |
1474 | page->inuse = page->objects; | 1486 | page->inuse = page->objects; |
1475 | page->frozen = 1; | 1487 | page->frozen = 1; |
1476 | 1488 | ||
@@ -1506,7 +1518,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1506 | int order = compound_order(page); | 1518 | int order = compound_order(page); |
1507 | int pages = 1 << order; | 1519 | int pages = 1 << order; |
1508 | 1520 | ||
1509 | if (kmem_cache_debug(s)) { | 1521 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
1510 | void *p; | 1522 | void *p; |
1511 | 1523 | ||
1512 | slab_pad_check(s, page); | 1524 | slab_pad_check(s, page); |
@@ -2224,8 +2236,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2224 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) | 2236 | if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) |
2225 | return; | 2237 | return; |
2226 | 2238 | ||
2227 | pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 2239 | pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", |
2228 | nid, gfpflags); | 2240 | nid, gfpflags, &gfpflags); |
2229 | pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", | 2241 | pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", |
2230 | s->name, s->object_size, s->size, oo_order(s->oo), | 2242 | s->name, s->object_size, s->size, oo_order(s->oo), |
2231 | oo_order(s->min)); | 2243 | oo_order(s->min)); |
@@ -2642,8 +2654,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2642 | stat(s, FREE_SLOWPATH); | 2654 | stat(s, FREE_SLOWPATH); |
2643 | 2655 | ||
2644 | if (kmem_cache_debug(s) && | 2656 | if (kmem_cache_debug(s) && |
2645 | !(n = free_debug_processing(s, page, head, tail, cnt, | 2657 | !free_debug_processing(s, page, head, tail, cnt, addr)) |
2646 | addr, &flags))) | ||
2647 | return; | 2658 | return; |
2648 | 2659 | ||
2649 | do { | 2660 | do { |
@@ -2815,6 +2826,7 @@ struct detached_freelist { | |||
2815 | void *tail; | 2826 | void *tail; |
2816 | void *freelist; | 2827 | void *freelist; |
2817 | int cnt; | 2828 | int cnt; |
2829 | struct kmem_cache *s; | ||
2818 | }; | 2830 | }; |
2819 | 2831 | ||
2820 | /* | 2832 | /* |
@@ -2829,26 +2841,45 @@ struct detached_freelist { | |||
2829 | * synchronization primitive. Look ahead in the array is limited due | 2841 | * synchronization primitive. Look ahead in the array is limited due |
2830 | * to performance reasons. | 2842 | * to performance reasons. |
2831 | */ | 2843 | */ |
2832 | static int build_detached_freelist(struct kmem_cache *s, size_t size, | 2844 | static inline |
2833 | void **p, struct detached_freelist *df) | 2845 | int build_detached_freelist(struct kmem_cache *s, size_t size, |
2846 | void **p, struct detached_freelist *df) | ||
2834 | { | 2847 | { |
2835 | size_t first_skipped_index = 0; | 2848 | size_t first_skipped_index = 0; |
2836 | int lookahead = 3; | 2849 | int lookahead = 3; |
2837 | void *object; | 2850 | void *object; |
2851 | struct page *page; | ||
2838 | 2852 | ||
2839 | /* Always re-init detached_freelist */ | 2853 | /* Always re-init detached_freelist */ |
2840 | df->page = NULL; | 2854 | df->page = NULL; |
2841 | 2855 | ||
2842 | do { | 2856 | do { |
2843 | object = p[--size]; | 2857 | object = p[--size]; |
2858 | /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */ | ||
2844 | } while (!object && size); | 2859 | } while (!object && size); |
2845 | 2860 | ||
2846 | if (!object) | 2861 | if (!object) |
2847 | return 0; | 2862 | return 0; |
2848 | 2863 | ||
2864 | page = virt_to_head_page(object); | ||
2865 | if (!s) { | ||
2866 | /* Handle kalloc'ed objects */ | ||
2867 | if (unlikely(!PageSlab(page))) { | ||
2868 | BUG_ON(!PageCompound(page)); | ||
2869 | kfree_hook(object); | ||
2870 | __free_kmem_pages(page, compound_order(page)); | ||
2871 | p[size] = NULL; /* mark object processed */ | ||
2872 | return size; | ||
2873 | } | ||
2874 | /* Derive kmem_cache from object */ | ||
2875 | df->s = page->slab_cache; | ||
2876 | } else { | ||
2877 | df->s = cache_from_obj(s, object); /* Support for memcg */ | ||
2878 | } | ||
2879 | |||
2849 | /* Start new detached freelist */ | 2880 | /* Start new detached freelist */ |
2850 | set_freepointer(s, object, NULL); | 2881 | df->page = page; |
2851 | df->page = virt_to_head_page(object); | 2882 | set_freepointer(df->s, object, NULL); |
2852 | df->tail = object; | 2883 | df->tail = object; |
2853 | df->freelist = object; | 2884 | df->freelist = object; |
2854 | p[size] = NULL; /* mark object processed */ | 2885 | p[size] = NULL; /* mark object processed */ |
@@ -2862,7 +2893,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, | |||
2862 | /* df->page is always set at this point */ | 2893 | /* df->page is always set at this point */ |
2863 | if (df->page == virt_to_head_page(object)) { | 2894 | if (df->page == virt_to_head_page(object)) { |
2864 | /* Opportunity build freelist */ | 2895 | /* Opportunity build freelist */ |
2865 | set_freepointer(s, object, df->freelist); | 2896 | set_freepointer(df->s, object, df->freelist); |
2866 | df->freelist = object; | 2897 | df->freelist = object; |
2867 | df->cnt++; | 2898 | df->cnt++; |
2868 | p[size] = NULL; /* mark object processed */ | 2899 | p[size] = NULL; /* mark object processed */ |
@@ -2881,25 +2912,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, | |||
2881 | return first_skipped_index; | 2912 | return first_skipped_index; |
2882 | } | 2913 | } |
2883 | 2914 | ||
2884 | |||
2885 | /* Note that interrupts must be enabled when calling this function. */ | 2915 | /* Note that interrupts must be enabled when calling this function. */ |
2886 | void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) | 2916 | void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) |
2887 | { | 2917 | { |
2888 | if (WARN_ON(!size)) | 2918 | if (WARN_ON(!size)) |
2889 | return; | 2919 | return; |
2890 | 2920 | ||
2891 | do { | 2921 | do { |
2892 | struct detached_freelist df; | 2922 | struct detached_freelist df; |
2893 | struct kmem_cache *s; | ||
2894 | |||
2895 | /* Support for memcg */ | ||
2896 | s = cache_from_obj(orig_s, p[size - 1]); | ||
2897 | 2923 | ||
2898 | size = build_detached_freelist(s, size, p, &df); | 2924 | size = build_detached_freelist(s, size, p, &df); |
2899 | if (unlikely(!df.page)) | 2925 | if (unlikely(!df.page)) |
2900 | continue; | 2926 | continue; |
2901 | 2927 | ||
2902 | slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); | 2928 | slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); |
2903 | } while (likely(size)); | 2929 | } while (likely(size)); |
2904 | } | 2930 | } |
2905 | EXPORT_SYMBOL(kmem_cache_free_bulk); | 2931 | EXPORT_SYMBOL(kmem_cache_free_bulk); |
@@ -3285,7 +3311,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3285 | */ | 3311 | */ |
3286 | size += 2 * sizeof(struct track); | 3312 | size += 2 * sizeof(struct track); |
3287 | 3313 | ||
3288 | if (flags & SLAB_RED_ZONE) | 3314 | if (flags & SLAB_RED_ZONE) { |
3289 | /* | 3315 | /* |
3290 | * Add some empty padding so that we can catch | 3316 | * Add some empty padding so that we can catch |
3291 | * overwrites from earlier objects rather than let | 3317 | * overwrites from earlier objects rather than let |
@@ -3294,6 +3320,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3294 | * of the object. | 3320 | * of the object. |
3295 | */ | 3321 | */ |
3296 | size += sizeof(void *); | 3322 | size += sizeof(void *); |
3323 | |||
3324 | s->red_left_pad = sizeof(void *); | ||
3325 | s->red_left_pad = ALIGN(s->red_left_pad, s->align); | ||
3326 | size += s->red_left_pad; | ||
3327 | } | ||
3297 | #endif | 3328 | #endif |
3298 | 3329 | ||
3299 | /* | 3330 | /* |
@@ -3357,7 +3388,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
3357 | 3388 | ||
3358 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 3389 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ |
3359 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | 3390 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) |
3360 | if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) | 3391 | if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0) |
3361 | /* Enable fast mode */ | 3392 | /* Enable fast mode */ |
3362 | s->flags |= __CMPXCHG_DOUBLE; | 3393 | s->flags |= __CMPXCHG_DOUBLE; |
3363 | #endif | 3394 | #endif |
@@ -4812,16 +4843,16 @@ SLAB_ATTR_RO(total_objects); | |||
4812 | 4843 | ||
4813 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) | 4844 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) |
4814 | { | 4845 | { |
4815 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); | 4846 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS)); |
4816 | } | 4847 | } |
4817 | 4848 | ||
4818 | static ssize_t sanity_checks_store(struct kmem_cache *s, | 4849 | static ssize_t sanity_checks_store(struct kmem_cache *s, |
4819 | const char *buf, size_t length) | 4850 | const char *buf, size_t length) |
4820 | { | 4851 | { |
4821 | s->flags &= ~SLAB_DEBUG_FREE; | 4852 | s->flags &= ~SLAB_CONSISTENCY_CHECKS; |
4822 | if (buf[0] == '1') { | 4853 | if (buf[0] == '1') { |
4823 | s->flags &= ~__CMPXCHG_DOUBLE; | 4854 | s->flags &= ~__CMPXCHG_DOUBLE; |
4824 | s->flags |= SLAB_DEBUG_FREE; | 4855 | s->flags |= SLAB_CONSISTENCY_CHECKS; |
4825 | } | 4856 | } |
4826 | return length; | 4857 | return length; |
4827 | } | 4858 | } |
@@ -4865,7 +4896,6 @@ static ssize_t red_zone_store(struct kmem_cache *s, | |||
4865 | 4896 | ||
4866 | s->flags &= ~SLAB_RED_ZONE; | 4897 | s->flags &= ~SLAB_RED_ZONE; |
4867 | if (buf[0] == '1') { | 4898 | if (buf[0] == '1') { |
4868 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4869 | s->flags |= SLAB_RED_ZONE; | 4899 | s->flags |= SLAB_RED_ZONE; |
4870 | } | 4900 | } |
4871 | calculate_sizes(s, -1); | 4901 | calculate_sizes(s, -1); |
@@ -4886,7 +4916,6 @@ static ssize_t poison_store(struct kmem_cache *s, | |||
4886 | 4916 | ||
4887 | s->flags &= ~SLAB_POISON; | 4917 | s->flags &= ~SLAB_POISON; |
4888 | if (buf[0] == '1') { | 4918 | if (buf[0] == '1') { |
4889 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4890 | s->flags |= SLAB_POISON; | 4919 | s->flags |= SLAB_POISON; |
4891 | } | 4920 | } |
4892 | calculate_sizes(s, -1); | 4921 | calculate_sizes(s, -1); |
@@ -5356,7 +5385,7 @@ static char *create_unique_id(struct kmem_cache *s) | |||
5356 | *p++ = 'd'; | 5385 | *p++ = 'd'; |
5357 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | 5386 | if (s->flags & SLAB_RECLAIM_ACCOUNT) |
5358 | *p++ = 'a'; | 5387 | *p++ = 'a'; |
5359 | if (s->flags & SLAB_DEBUG_FREE) | 5388 | if (s->flags & SLAB_CONSISTENCY_CHECKS) |
5360 | *p++ = 'F'; | 5389 | *p++ = 'F'; |
5361 | if (!(s->flags & SLAB_NOTRACK)) | 5390 | if (!(s->flags & SLAB_NOTRACK)) |
5362 | *p++ = 't'; | 5391 | *p++ = 't'; |
diff --git a/mm/truncate.c b/mm/truncate.c index e3ee0e27cd17..7598b552ae03 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -519,7 +519,6 @@ EXPORT_SYMBOL(invalidate_mapping_pages); | |||
519 | static int | 519 | static int |
520 | invalidate_complete_page2(struct address_space *mapping, struct page *page) | 520 | invalidate_complete_page2(struct address_space *mapping, struct page *page) |
521 | { | 521 | { |
522 | struct mem_cgroup *memcg; | ||
523 | unsigned long flags; | 522 | unsigned long flags; |
524 | 523 | ||
525 | if (page->mapping != mapping) | 524 | if (page->mapping != mapping) |
@@ -528,15 +527,13 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
528 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) | 527 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) |
529 | return 0; | 528 | return 0; |
530 | 529 | ||
531 | memcg = mem_cgroup_begin_page_stat(page); | ||
532 | spin_lock_irqsave(&mapping->tree_lock, flags); | 530 | spin_lock_irqsave(&mapping->tree_lock, flags); |
533 | if (PageDirty(page)) | 531 | if (PageDirty(page)) |
534 | goto failed; | 532 | goto failed; |
535 | 533 | ||
536 | BUG_ON(page_has_private(page)); | 534 | BUG_ON(page_has_private(page)); |
537 | __delete_from_page_cache(page, NULL, memcg); | 535 | __delete_from_page_cache(page, NULL); |
538 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 536 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
539 | mem_cgroup_end_page_stat(memcg); | ||
540 | 537 | ||
541 | if (mapping->a_ops->freepage) | 538 | if (mapping->a_ops->freepage) |
542 | mapping->a_ops->freepage(page); | 539 | mapping->a_ops->freepage(page); |
@@ -545,7 +542,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
545 | return 1; | 542 | return 1; |
546 | failed: | 543 | failed: |
547 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 544 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
548 | mem_cgroup_end_page_stat(memcg); | ||
549 | return 0; | 545 | return 0; |
550 | } | 546 | } |
551 | 547 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 71b1c29948db..dd984470248f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -195,25 +195,25 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) | |||
195 | { | 195 | { |
196 | unsigned long nr; | 196 | unsigned long nr; |
197 | 197 | ||
198 | nr = zone_page_state(zone, NR_ACTIVE_FILE) + | 198 | nr = zone_page_state_snapshot(zone, NR_ACTIVE_FILE) + |
199 | zone_page_state(zone, NR_INACTIVE_FILE) + | 199 | zone_page_state_snapshot(zone, NR_INACTIVE_FILE) + |
200 | zone_page_state(zone, NR_ISOLATED_FILE); | 200 | zone_page_state_snapshot(zone, NR_ISOLATED_FILE); |
201 | 201 | ||
202 | if (get_nr_swap_pages() > 0) | 202 | if (get_nr_swap_pages() > 0) |
203 | nr += zone_page_state(zone, NR_ACTIVE_ANON) + | 203 | nr += zone_page_state_snapshot(zone, NR_ACTIVE_ANON) + |
204 | zone_page_state(zone, NR_INACTIVE_ANON) + | 204 | zone_page_state_snapshot(zone, NR_INACTIVE_ANON) + |
205 | zone_page_state(zone, NR_ISOLATED_ANON); | 205 | zone_page_state_snapshot(zone, NR_ISOLATED_ANON); |
206 | 206 | ||
207 | return nr; | 207 | return nr; |
208 | } | 208 | } |
209 | 209 | ||
210 | bool zone_reclaimable(struct zone *zone) | 210 | bool zone_reclaimable(struct zone *zone) |
211 | { | 211 | { |
212 | return zone_page_state(zone, NR_PAGES_SCANNED) < | 212 | return zone_page_state_snapshot(zone, NR_PAGES_SCANNED) < |
213 | zone_reclaimable_pages(zone) * 6; | 213 | zone_reclaimable_pages(zone) * 6; |
214 | } | 214 | } |
215 | 215 | ||
216 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | 216 | unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) |
217 | { | 217 | { |
218 | if (!mem_cgroup_disabled()) | 218 | if (!mem_cgroup_disabled()) |
219 | return mem_cgroup_get_lru_size(lruvec, lru); | 219 | return mem_cgroup_get_lru_size(lruvec, lru); |
@@ -228,14 +228,6 @@ int register_shrinker(struct shrinker *shrinker) | |||
228 | { | 228 | { |
229 | size_t size = sizeof(*shrinker->nr_deferred); | 229 | size_t size = sizeof(*shrinker->nr_deferred); |
230 | 230 | ||
231 | /* | ||
232 | * If we only have one possible node in the system anyway, save | ||
233 | * ourselves the trouble and disable NUMA aware behavior. This way we | ||
234 | * will save memory and some small loop time later. | ||
235 | */ | ||
236 | if (nr_node_ids == 1) | ||
237 | shrinker->flags &= ~SHRINKER_NUMA_AWARE; | ||
238 | |||
239 | if (shrinker->flags & SHRINKER_NUMA_AWARE) | 231 | if (shrinker->flags & SHRINKER_NUMA_AWARE) |
240 | size *= nr_node_ids; | 232 | size *= nr_node_ids; |
241 | 233 | ||
@@ -611,12 +603,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
611 | bool reclaimed) | 603 | bool reclaimed) |
612 | { | 604 | { |
613 | unsigned long flags; | 605 | unsigned long flags; |
614 | struct mem_cgroup *memcg; | ||
615 | 606 | ||
616 | BUG_ON(!PageLocked(page)); | 607 | BUG_ON(!PageLocked(page)); |
617 | BUG_ON(mapping != page_mapping(page)); | 608 | BUG_ON(mapping != page_mapping(page)); |
618 | 609 | ||
619 | memcg = mem_cgroup_begin_page_stat(page); | ||
620 | spin_lock_irqsave(&mapping->tree_lock, flags); | 610 | spin_lock_irqsave(&mapping->tree_lock, flags); |
621 | /* | 611 | /* |
622 | * The non racy check for a busy page. | 612 | * The non racy check for a busy page. |
@@ -656,7 +646,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
656 | mem_cgroup_swapout(page, swap); | 646 | mem_cgroup_swapout(page, swap); |
657 | __delete_from_swap_cache(page); | 647 | __delete_from_swap_cache(page); |
658 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 648 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
659 | mem_cgroup_end_page_stat(memcg); | ||
660 | swapcache_free(swap); | 649 | swapcache_free(swap); |
661 | } else { | 650 | } else { |
662 | void (*freepage)(struct page *); | 651 | void (*freepage)(struct page *); |
@@ -682,9 +671,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
682 | if (reclaimed && page_is_file_cache(page) && | 671 | if (reclaimed && page_is_file_cache(page) && |
683 | !mapping_exiting(mapping) && !dax_mapping(mapping)) | 672 | !mapping_exiting(mapping) && !dax_mapping(mapping)) |
684 | shadow = workingset_eviction(mapping, page); | 673 | shadow = workingset_eviction(mapping, page); |
685 | __delete_from_page_cache(page, shadow, memcg); | 674 | __delete_from_page_cache(page, shadow); |
686 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 675 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
687 | mem_cgroup_end_page_stat(memcg); | ||
688 | 676 | ||
689 | if (freepage != NULL) | 677 | if (freepage != NULL) |
690 | freepage(page); | 678 | freepage(page); |
@@ -694,7 +682,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, | |||
694 | 682 | ||
695 | cannot_free: | 683 | cannot_free: |
696 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 684 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
697 | mem_cgroup_end_page_stat(memcg); | ||
698 | return 0; | 685 | return 0; |
699 | } | 686 | } |
700 | 687 | ||
@@ -1931,8 +1918,8 @@ static bool inactive_file_is_low(struct lruvec *lruvec) | |||
1931 | unsigned long inactive; | 1918 | unsigned long inactive; |
1932 | unsigned long active; | 1919 | unsigned long active; |
1933 | 1920 | ||
1934 | inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE); | 1921 | inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); |
1935 | active = get_lru_size(lruvec, LRU_ACTIVE_FILE); | 1922 | active = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); |
1936 | 1923 | ||
1937 | return active > inactive; | 1924 | return active > inactive; |
1938 | } | 1925 | } |
@@ -2071,7 +2058,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, | |||
2071 | * system is under heavy pressure. | 2058 | * system is under heavy pressure. |
2072 | */ | 2059 | */ |
2073 | if (!inactive_file_is_low(lruvec) && | 2060 | if (!inactive_file_is_low(lruvec) && |
2074 | get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { | 2061 | lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { |
2075 | scan_balance = SCAN_FILE; | 2062 | scan_balance = SCAN_FILE; |
2076 | goto out; | 2063 | goto out; |
2077 | } | 2064 | } |
@@ -2097,10 +2084,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, | |||
2097 | * anon in [0], file in [1] | 2084 | * anon in [0], file in [1] |
2098 | */ | 2085 | */ |
2099 | 2086 | ||
2100 | anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + | 2087 | anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + |
2101 | get_lru_size(lruvec, LRU_INACTIVE_ANON); | 2088 | lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); |
2102 | file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + | 2089 | file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + |
2103 | get_lru_size(lruvec, LRU_INACTIVE_FILE); | 2090 | lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); |
2104 | 2091 | ||
2105 | spin_lock_irq(&zone->lru_lock); | 2092 | spin_lock_irq(&zone->lru_lock); |
2106 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 2093 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
@@ -2138,7 +2125,7 @@ out: | |||
2138 | unsigned long size; | 2125 | unsigned long size; |
2139 | unsigned long scan; | 2126 | unsigned long scan; |
2140 | 2127 | ||
2141 | size = get_lru_size(lruvec, lru); | 2128 | size = lruvec_lru_size(lruvec, lru); |
2142 | scan = size >> sc->priority; | 2129 | scan = size >> sc->priority; |
2143 | 2130 | ||
2144 | if (!scan && pass && force_scan) | 2131 | if (!scan && pass && force_scan) |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 084c6725b373..69ce64f7b8d7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -924,19 +924,6 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, | |||
924 | #endif | 924 | #endif |
925 | 925 | ||
926 | #ifdef CONFIG_PROC_FS | 926 | #ifdef CONFIG_PROC_FS |
927 | static char * const migratetype_names[MIGRATE_TYPES] = { | ||
928 | "Unmovable", | ||
929 | "Movable", | ||
930 | "Reclaimable", | ||
931 | "HighAtomic", | ||
932 | #ifdef CONFIG_CMA | ||
933 | "CMA", | ||
934 | #endif | ||
935 | #ifdef CONFIG_MEMORY_ISOLATION | ||
936 | "Isolate", | ||
937 | #endif | ||
938 | }; | ||
939 | |||
940 | static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, | 927 | static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, |
941 | struct zone *zone) | 928 | struct zone *zone) |
942 | { | 929 | { |
@@ -1133,7 +1120,7 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) | |||
1133 | #ifdef CONFIG_PAGE_OWNER | 1120 | #ifdef CONFIG_PAGE_OWNER |
1134 | int mtype; | 1121 | int mtype; |
1135 | 1122 | ||
1136 | if (!page_owner_inited) | 1123 | if (!static_branch_unlikely(&page_owner_inited)) |
1137 | return; | 1124 | return; |
1138 | 1125 | ||
1139 | drain_all_pages(NULL); | 1126 | drain_all_pages(NULL); |
diff --git a/mm/workingset.c b/mm/workingset.c index 61ead9e5549d..6130ba0b2641 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -152,8 +152,25 @@ | |||
152 | * refault distance will immediately activate the refaulting page. | 152 | * refault distance will immediately activate the refaulting page. |
153 | */ | 153 | */ |
154 | 154 | ||
155 | static void *pack_shadow(unsigned long eviction, struct zone *zone) | 155 | #define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ |
156 | ZONES_SHIFT + NODES_SHIFT + \ | ||
157 | MEM_CGROUP_ID_SHIFT) | ||
158 | #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) | ||
159 | |||
160 | /* | ||
161 | * Eviction timestamps need to be able to cover the full range of | ||
162 | * actionable refaults. However, bits are tight in the radix tree | ||
163 | * entry, and after storing the identifier for the lruvec there might | ||
164 | * not be enough left to represent every single actionable refault. In | ||
165 | * that case, we have to sacrifice granularity for distance, and group | ||
166 | * evictions into coarser buckets by shaving off lower timestamp bits. | ||
167 | */ | ||
168 | static unsigned int bucket_order __read_mostly; | ||
169 | |||
170 | static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction) | ||
156 | { | 171 | { |
172 | eviction >>= bucket_order; | ||
173 | eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; | ||
157 | eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); | 174 | eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); |
158 | eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); | 175 | eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); |
159 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); | 176 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); |
@@ -161,45 +178,23 @@ static void *pack_shadow(unsigned long eviction, struct zone *zone) | |||
161 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); | 178 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); |
162 | } | 179 | } |
163 | 180 | ||
164 | static void unpack_shadow(void *shadow, | 181 | static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep, |
165 | struct zone **zone, | 182 | unsigned long *evictionp) |
166 | unsigned long *distance) | ||
167 | { | 183 | { |
168 | unsigned long entry = (unsigned long)shadow; | 184 | unsigned long entry = (unsigned long)shadow; |
169 | unsigned long eviction; | 185 | int memcgid, nid, zid; |
170 | unsigned long refault; | ||
171 | unsigned long mask; | ||
172 | int zid, nid; | ||
173 | 186 | ||
174 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; | 187 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; |
175 | zid = entry & ((1UL << ZONES_SHIFT) - 1); | 188 | zid = entry & ((1UL << ZONES_SHIFT) - 1); |
176 | entry >>= ZONES_SHIFT; | 189 | entry >>= ZONES_SHIFT; |
177 | nid = entry & ((1UL << NODES_SHIFT) - 1); | 190 | nid = entry & ((1UL << NODES_SHIFT) - 1); |
178 | entry >>= NODES_SHIFT; | 191 | entry >>= NODES_SHIFT; |
179 | eviction = entry; | 192 | memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); |
193 | entry >>= MEM_CGROUP_ID_SHIFT; | ||
180 | 194 | ||
181 | *zone = NODE_DATA(nid)->node_zones + zid; | 195 | *memcgidp = memcgid; |
182 | 196 | *zonep = NODE_DATA(nid)->node_zones + zid; | |
183 | refault = atomic_long_read(&(*zone)->inactive_age); | 197 | *evictionp = entry << bucket_order; |
184 | mask = ~0UL >> (NODES_SHIFT + ZONES_SHIFT + | ||
185 | RADIX_TREE_EXCEPTIONAL_SHIFT); | ||
186 | /* | ||
187 | * The unsigned subtraction here gives an accurate distance | ||
188 | * across inactive_age overflows in most cases. | ||
189 | * | ||
190 | * There is a special case: usually, shadow entries have a | ||
191 | * short lifetime and are either refaulted or reclaimed along | ||
192 | * with the inode before they get too old. But it is not | ||
193 | * impossible for the inactive_age to lap a shadow entry in | ||
194 | * the field, which can then can result in a false small | ||
195 | * refault distance, leading to a false activation should this | ||
196 | * old entry actually refault again. However, earlier kernels | ||
197 | * used to deactivate unconditionally with *every* reclaim | ||
198 | * invocation for the longest time, so the occasional | ||
199 | * inappropriate activation leading to pressure on the active | ||
200 | * list is not a problem. | ||
201 | */ | ||
202 | *distance = (refault - eviction) & mask; | ||
203 | } | 198 | } |
204 | 199 | ||
205 | /** | 200 | /** |
@@ -212,11 +207,20 @@ static void unpack_shadow(void *shadow, | |||
212 | */ | 207 | */ |
213 | void *workingset_eviction(struct address_space *mapping, struct page *page) | 208 | void *workingset_eviction(struct address_space *mapping, struct page *page) |
214 | { | 209 | { |
210 | struct mem_cgroup *memcg = page_memcg(page); | ||
215 | struct zone *zone = page_zone(page); | 211 | struct zone *zone = page_zone(page); |
212 | int memcgid = mem_cgroup_id(memcg); | ||
216 | unsigned long eviction; | 213 | unsigned long eviction; |
214 | struct lruvec *lruvec; | ||
217 | 215 | ||
218 | eviction = atomic_long_inc_return(&zone->inactive_age); | 216 | /* Page is fully exclusive and pins page->mem_cgroup */ |
219 | return pack_shadow(eviction, zone); | 217 | VM_BUG_ON_PAGE(PageLRU(page), page); |
218 | VM_BUG_ON_PAGE(page_count(page), page); | ||
219 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
220 | |||
221 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | ||
222 | eviction = atomic_long_inc_return(&lruvec->inactive_age); | ||
223 | return pack_shadow(memcgid, zone, eviction); | ||
220 | } | 224 | } |
221 | 225 | ||
222 | /** | 226 | /** |
@@ -231,12 +235,64 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) | |||
231 | bool workingset_refault(void *shadow) | 235 | bool workingset_refault(void *shadow) |
232 | { | 236 | { |
233 | unsigned long refault_distance; | 237 | unsigned long refault_distance; |
238 | unsigned long active_file; | ||
239 | struct mem_cgroup *memcg; | ||
240 | unsigned long eviction; | ||
241 | struct lruvec *lruvec; | ||
242 | unsigned long refault; | ||
234 | struct zone *zone; | 243 | struct zone *zone; |
244 | int memcgid; | ||
245 | |||
246 | unpack_shadow(shadow, &memcgid, &zone, &eviction); | ||
247 | |||
248 | rcu_read_lock(); | ||
249 | /* | ||
250 | * Look up the memcg associated with the stored ID. It might | ||
251 | * have been deleted since the page's eviction. | ||
252 | * | ||
253 | * Note that in rare events the ID could have been recycled | ||
254 | * for a new cgroup that refaults a shared page. This is | ||
255 | * impossible to tell from the available data. However, this | ||
256 | * should be a rare and limited disturbance, and activations | ||
257 | * are always speculative anyway. Ultimately, it's the aging | ||
258 | * algorithm's job to shake out the minimum access frequency | ||
259 | * for the active cache. | ||
260 | * | ||
261 | * XXX: On !CONFIG_MEMCG, this will always return NULL; it | ||
262 | * would be better if the root_mem_cgroup existed in all | ||
263 | * configurations instead. | ||
264 | */ | ||
265 | memcg = mem_cgroup_from_id(memcgid); | ||
266 | if (!mem_cgroup_disabled() && !memcg) { | ||
267 | rcu_read_unlock(); | ||
268 | return false; | ||
269 | } | ||
270 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | ||
271 | refault = atomic_long_read(&lruvec->inactive_age); | ||
272 | active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); | ||
273 | rcu_read_unlock(); | ||
274 | |||
275 | /* | ||
276 | * The unsigned subtraction here gives an accurate distance | ||
277 | * across inactive_age overflows in most cases. | ||
278 | * | ||
279 | * There is a special case: usually, shadow entries have a | ||
280 | * short lifetime and are either refaulted or reclaimed along | ||
281 | * with the inode before they get too old. But it is not | ||
282 | * impossible for the inactive_age to lap a shadow entry in | ||
283 | * the field, which can then can result in a false small | ||
284 | * refault distance, leading to a false activation should this | ||
285 | * old entry actually refault again. However, earlier kernels | ||
286 | * used to deactivate unconditionally with *every* reclaim | ||
287 | * invocation for the longest time, so the occasional | ||
288 | * inappropriate activation leading to pressure on the active | ||
289 | * list is not a problem. | ||
290 | */ | ||
291 | refault_distance = (refault - eviction) & EVICTION_MASK; | ||
235 | 292 | ||
236 | unpack_shadow(shadow, &zone, &refault_distance); | ||
237 | inc_zone_state(zone, WORKINGSET_REFAULT); | 293 | inc_zone_state(zone, WORKINGSET_REFAULT); |
238 | 294 | ||
239 | if (refault_distance <= zone_page_state(zone, NR_ACTIVE_FILE)) { | 295 | if (refault_distance <= active_file) { |
240 | inc_zone_state(zone, WORKINGSET_ACTIVATE); | 296 | inc_zone_state(zone, WORKINGSET_ACTIVATE); |
241 | return true; | 297 | return true; |
242 | } | 298 | } |
@@ -249,7 +305,22 @@ bool workingset_refault(void *shadow) | |||
249 | */ | 305 | */ |
250 | void workingset_activation(struct page *page) | 306 | void workingset_activation(struct page *page) |
251 | { | 307 | { |
252 | atomic_long_inc(&page_zone(page)->inactive_age); | 308 | struct lruvec *lruvec; |
309 | |||
310 | lock_page_memcg(page); | ||
311 | /* | ||
312 | * Filter non-memcg pages here, e.g. unmap can call | ||
313 | * mark_page_accessed() on VDSO pages. | ||
314 | * | ||
315 | * XXX: See workingset_refault() - this should return | ||
316 | * root_mem_cgroup even for !CONFIG_MEMCG. | ||
317 | */ | ||
318 | if (!mem_cgroup_disabled() && !page_memcg(page)) | ||
319 | goto out; | ||
320 | lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page)); | ||
321 | atomic_long_inc(&lruvec->inactive_age); | ||
322 | out: | ||
323 | unlock_page_memcg(page); | ||
253 | } | 324 | } |
254 | 325 | ||
255 | /* | 326 | /* |
@@ -398,8 +469,25 @@ static struct lock_class_key shadow_nodes_key; | |||
398 | 469 | ||
399 | static int __init workingset_init(void) | 470 | static int __init workingset_init(void) |
400 | { | 471 | { |
472 | unsigned int timestamp_bits; | ||
473 | unsigned int max_order; | ||
401 | int ret; | 474 | int ret; |
402 | 475 | ||
476 | BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); | ||
477 | /* | ||
478 | * Calculate the eviction bucket size to cover the longest | ||
479 | * actionable refault distance, which is currently half of | ||
480 | * memory (totalram_pages/2). However, memory hotplug may add | ||
481 | * some more pages at runtime, so keep working with up to | ||
482 | * double the initial memory by using totalram_pages as-is. | ||
483 | */ | ||
484 | timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; | ||
485 | max_order = fls_long(totalram_pages - 1); | ||
486 | if (max_order > timestamp_bits) | ||
487 | bucket_order = max_order - timestamp_bits; | ||
488 | printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | ||
489 | timestamp_bits, max_order, bucket_order); | ||
490 | |||
403 | ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | 491 | ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); |
404 | if (ret) | 492 | if (ret) |
405 | goto err; | 493 | goto err; |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 643a86c49020..2d5589b61e9f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -50,8 +50,7 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; | |||
50 | #define TSBITS 6 | 50 | #define TSBITS 6 |
51 | #define TSMASK (((__u32)1 << TSBITS) - 1) | 51 | #define TSMASK (((__u32)1 << TSBITS) - 1) |
52 | 52 | ||
53 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], | 53 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch); |
54 | ipv4_cookie_scratch); | ||
55 | 54 | ||
56 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | 55 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, |
57 | u32 count, int c) | 56 | u32 count, int c) |
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef20795e..aae3e5ca63ea 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c | |||
@@ -41,8 +41,7 @@ static __u16 const msstab[] = { | |||
41 | 9000 - 60, | 41 | 9000 - 60, |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], | 44 | static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); |
45 | ipv6_cookie_scratch); | ||
46 | 45 | ||
47 | static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, | 46 | static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, |
48 | __be16 sport, __be16 dport, u32 count, int c) | 47 | __be16 sport, __be16 dport, u32 count, int c) |
diff --git a/net/rds/page.c b/net/rds/page.c index 5a14e6d6a926..616f21f4e7d7 100644 --- a/net/rds/page.c +++ b/net/rds/page.c | |||
@@ -42,8 +42,8 @@ struct rds_page_remainder { | |||
42 | unsigned long r_offset; | 42 | unsigned long r_offset; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, | 45 | static |
46 | rds_page_remainders); | 46 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * returns 0 on success or -errno on failure. | 49 | * returns 0 on success or -errno on failure. |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 874132b26d23..d574d13ba963 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -3240,6 +3240,30 @@ sub process { | |||
3240 | #ignore lines not being added | 3240 | #ignore lines not being added |
3241 | next if ($line =~ /^[^\+]/); | 3241 | next if ($line =~ /^[^\+]/); |
3242 | 3242 | ||
3243 | # check for declarations of signed or unsigned without int | ||
3244 | while ($line =~ m{($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) { | ||
3245 | my $type = $1; | ||
3246 | my $var = $2; | ||
3247 | $var = "" if (!defined $var); | ||
3248 | if ($type =~ /^(?:(?:$Storage|$Inline|$Attribute)\s+)*((?:un)?signed)((?:\s*\*)*)\s*$/) { | ||
3249 | my $sign = $1; | ||
3250 | my $pointer = $2; | ||
3251 | |||
3252 | $pointer = "" if (!defined $pointer); | ||
3253 | |||
3254 | if (WARN("UNSPECIFIED_INT", | ||
3255 | "Prefer '" . trim($sign) . " int" . rtrim($pointer) . "' to bare use of '$sign" . rtrim($pointer) . "'\n" . $herecurr) && | ||
3256 | $fix) { | ||
3257 | my $decl = trim($sign) . " int "; | ||
3258 | my $comp_pointer = $pointer; | ||
3259 | $comp_pointer =~ s/\s//g; | ||
3260 | $decl .= $comp_pointer; | ||
3261 | $decl = rtrim($decl) if ($var eq ""); | ||
3262 | $fixed[$fixlinenr] =~ s@\b$sign\s*\Q$pointer\E\s*$var\b@$decl$var@; | ||
3263 | } | ||
3264 | } | ||
3265 | } | ||
3266 | |||
3243 | # TEST: allow direct testing of the type matcher. | 3267 | # TEST: allow direct testing of the type matcher. |
3244 | if ($dbg_type) { | 3268 | if ($dbg_type) { |
3245 | if ($line =~ /^.\s*$Declare\s*$/) { | 3269 | if ($line =~ /^.\s*$Declare\s*$/) { |
@@ -4109,7 +4133,7 @@ sub process { | |||
4109 | ## } | 4133 | ## } |
4110 | 4134 | ||
4111 | #need space before brace following if, while, etc | 4135 | #need space before brace following if, while, etc |
4112 | if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\){/) || | 4136 | if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) || |
4113 | $line =~ /do\{/) { | 4137 | $line =~ /do\{/) { |
4114 | if (ERROR("SPACING", | 4138 | if (ERROR("SPACING", |
4115 | "space required before the open brace '{'\n" . $herecurr) && | 4139 | "space required before the open brace '{'\n" . $herecurr) && |
@@ -4561,6 +4585,9 @@ sub process { | |||
4561 | { | 4585 | { |
4562 | } | 4586 | } |
4563 | 4587 | ||
4588 | # Make asm volatile uses seem like a generic function | ||
4589 | $dstat =~ s/\b_*asm_*\s+_*volatile_*\b/asm_volatile/g; | ||
4590 | |||
4564 | my $exceptions = qr{ | 4591 | my $exceptions = qr{ |
4565 | $Declare| | 4592 | $Declare| |
4566 | module_param_named| | 4593 | module_param_named| |
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 8fa81e84e295..638b143ee60f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <stdlib.h> | 22 | #include <stdlib.h> |
23 | #include <string.h> | 23 | #include <string.h> |
24 | #include <ctype.h> | 24 | #include <ctype.h> |
25 | #include <limits.h> | ||
25 | 26 | ||
26 | #ifndef ARRAY_SIZE | 27 | #ifndef ARRAY_SIZE |
27 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) | 28 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) |
@@ -34,6 +35,7 @@ struct sym_entry { | |||
34 | unsigned int len; | 35 | unsigned int len; |
35 | unsigned int start_pos; | 36 | unsigned int start_pos; |
36 | unsigned char *sym; | 37 | unsigned char *sym; |
38 | unsigned int percpu_absolute; | ||
37 | }; | 39 | }; |
38 | 40 | ||
39 | struct addr_range { | 41 | struct addr_range { |
@@ -42,6 +44,7 @@ struct addr_range { | |||
42 | }; | 44 | }; |
43 | 45 | ||
44 | static unsigned long long _text; | 46 | static unsigned long long _text; |
47 | static unsigned long long relative_base; | ||
45 | static struct addr_range text_ranges[] = { | 48 | static struct addr_range text_ranges[] = { |
46 | { "_stext", "_etext" }, | 49 | { "_stext", "_etext" }, |
47 | { "_sinittext", "_einittext" }, | 50 | { "_sinittext", "_einittext" }, |
@@ -61,6 +64,7 @@ static int all_symbols = 0; | |||
61 | static int absolute_percpu = 0; | 64 | static int absolute_percpu = 0; |
62 | static char symbol_prefix_char = '\0'; | 65 | static char symbol_prefix_char = '\0'; |
63 | static unsigned long long kernel_start_addr = 0; | 66 | static unsigned long long kernel_start_addr = 0; |
67 | static int base_relative = 0; | ||
64 | 68 | ||
65 | int token_profit[0x10000]; | 69 | int token_profit[0x10000]; |
66 | 70 | ||
@@ -74,7 +78,7 @@ static void usage(void) | |||
74 | fprintf(stderr, "Usage: kallsyms [--all-symbols] " | 78 | fprintf(stderr, "Usage: kallsyms [--all-symbols] " |
75 | "[--symbol-prefix=<prefix char>] " | 79 | "[--symbol-prefix=<prefix char>] " |
76 | "[--page-offset=<CONFIG_PAGE_OFFSET>] " | 80 | "[--page-offset=<CONFIG_PAGE_OFFSET>] " |
77 | "< in.map > out.S\n"); | 81 | "[--base-relative] < in.map > out.S\n"); |
78 | exit(1); | 82 | exit(1); |
79 | } | 83 | } |
80 | 84 | ||
@@ -171,6 +175,8 @@ static int read_symbol(FILE *in, struct sym_entry *s) | |||
171 | strcpy((char *)s->sym + 1, str); | 175 | strcpy((char *)s->sym + 1, str); |
172 | s->sym[0] = stype; | 176 | s->sym[0] = stype; |
173 | 177 | ||
178 | s->percpu_absolute = 0; | ||
179 | |||
174 | /* Record if we've found __per_cpu_start/end. */ | 180 | /* Record if we've found __per_cpu_start/end. */ |
175 | check_symbol_range(sym, s->addr, &percpu_range, 1); | 181 | check_symbol_range(sym, s->addr, &percpu_range, 1); |
176 | 182 | ||
@@ -202,6 +208,8 @@ static int symbol_valid(struct sym_entry *s) | |||
202 | */ | 208 | */ |
203 | static char *special_symbols[] = { | 209 | static char *special_symbols[] = { |
204 | "kallsyms_addresses", | 210 | "kallsyms_addresses", |
211 | "kallsyms_offsets", | ||
212 | "kallsyms_relative_base", | ||
205 | "kallsyms_num_syms", | 213 | "kallsyms_num_syms", |
206 | "kallsyms_names", | 214 | "kallsyms_names", |
207 | "kallsyms_markers", | 215 | "kallsyms_markers", |
@@ -325,7 +333,7 @@ static int expand_symbol(unsigned char *data, int len, char *result) | |||
325 | 333 | ||
326 | static int symbol_absolute(struct sym_entry *s) | 334 | static int symbol_absolute(struct sym_entry *s) |
327 | { | 335 | { |
328 | return toupper(s->sym[0]) == 'A'; | 336 | return s->percpu_absolute; |
329 | } | 337 | } |
330 | 338 | ||
331 | static void write_src(void) | 339 | static void write_src(void) |
@@ -346,16 +354,48 @@ static void write_src(void) | |||
346 | 354 | ||
347 | printf("\t.section .rodata, \"a\"\n"); | 355 | printf("\t.section .rodata, \"a\"\n"); |
348 | 356 | ||
349 | /* Provide proper symbols relocatability by their '_text' | 357 | /* Provide proper symbols relocatability by their relativeness |
350 | * relativeness. The symbol names cannot be used to construct | 358 | * to a fixed anchor point in the runtime image, either '_text' |
351 | * normal symbol references as the list of symbols contains | 359 | * for absolute address tables, in which case the linker will |
352 | * symbols that are declared static and are private to their | 360 | * emit the final addresses at build time. Otherwise, use the |
353 | * .o files. This prevents .tmp_kallsyms.o or any other | 361 | * offset relative to the lowest value encountered of all relative |
354 | * object from referencing them. | 362 | * symbols, and emit non-relocatable fixed offsets that will be fixed |
363 | * up at runtime. | ||
364 | * | ||
365 | * The symbol names cannot be used to construct normal symbol | ||
366 | * references as the list of symbols contains symbols that are | ||
367 | * declared static and are private to their .o files. This prevents | ||
368 | * .tmp_kallsyms.o or any other object from referencing them. | ||
355 | */ | 369 | */ |
356 | output_label("kallsyms_addresses"); | 370 | if (!base_relative) |
371 | output_label("kallsyms_addresses"); | ||
372 | else | ||
373 | output_label("kallsyms_offsets"); | ||
374 | |||
357 | for (i = 0; i < table_cnt; i++) { | 375 | for (i = 0; i < table_cnt; i++) { |
358 | if (!symbol_absolute(&table[i])) { | 376 | if (base_relative) { |
377 | long long offset; | ||
378 | int overflow; | ||
379 | |||
380 | if (!absolute_percpu) { | ||
381 | offset = table[i].addr - relative_base; | ||
382 | overflow = (offset < 0 || offset > UINT_MAX); | ||
383 | } else if (symbol_absolute(&table[i])) { | ||
384 | offset = table[i].addr; | ||
385 | overflow = (offset < 0 || offset > INT_MAX); | ||
386 | } else { | ||
387 | offset = relative_base - table[i].addr - 1; | ||
388 | overflow = (offset < INT_MIN || offset >= 0); | ||
389 | } | ||
390 | if (overflow) { | ||
391 | fprintf(stderr, "kallsyms failure: " | ||
392 | "%s symbol value %#llx out of range in relative mode\n", | ||
393 | symbol_absolute(&table[i]) ? "absolute" : "relative", | ||
394 | table[i].addr); | ||
395 | exit(EXIT_FAILURE); | ||
396 | } | ||
397 | printf("\t.long\t%#x\n", (int)offset); | ||
398 | } else if (!symbol_absolute(&table[i])) { | ||
359 | if (_text <= table[i].addr) | 399 | if (_text <= table[i].addr) |
360 | printf("\tPTR\t_text + %#llx\n", | 400 | printf("\tPTR\t_text + %#llx\n", |
361 | table[i].addr - _text); | 401 | table[i].addr - _text); |
@@ -368,6 +408,12 @@ static void write_src(void) | |||
368 | } | 408 | } |
369 | printf("\n"); | 409 | printf("\n"); |
370 | 410 | ||
411 | if (base_relative) { | ||
412 | output_label("kallsyms_relative_base"); | ||
413 | printf("\tPTR\t_text - %#llx\n", _text - relative_base); | ||
414 | printf("\n"); | ||
415 | } | ||
416 | |||
371 | output_label("kallsyms_num_syms"); | 417 | output_label("kallsyms_num_syms"); |
372 | printf("\tPTR\t%d\n", table_cnt); | 418 | printf("\tPTR\t%d\n", table_cnt); |
373 | printf("\n"); | 419 | printf("\n"); |
@@ -681,8 +727,27 @@ static void make_percpus_absolute(void) | |||
681 | unsigned int i; | 727 | unsigned int i; |
682 | 728 | ||
683 | for (i = 0; i < table_cnt; i++) | 729 | for (i = 0; i < table_cnt; i++) |
684 | if (symbol_in_range(&table[i], &percpu_range, 1)) | 730 | if (symbol_in_range(&table[i], &percpu_range, 1)) { |
731 | /* | ||
732 | * Keep the 'A' override for percpu symbols to | ||
733 | * ensure consistent behavior compared to older | ||
734 | * versions of this tool. | ||
735 | */ | ||
685 | table[i].sym[0] = 'A'; | 736 | table[i].sym[0] = 'A'; |
737 | table[i].percpu_absolute = 1; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | /* find the minimum non-absolute symbol address */ | ||
742 | static void record_relative_base(void) | ||
743 | { | ||
744 | unsigned int i; | ||
745 | |||
746 | relative_base = -1ULL; | ||
747 | for (i = 0; i < table_cnt; i++) | ||
748 | if (!symbol_absolute(&table[i]) && | ||
749 | table[i].addr < relative_base) | ||
750 | relative_base = table[i].addr; | ||
686 | } | 751 | } |
687 | 752 | ||
688 | int main(int argc, char **argv) | 753 | int main(int argc, char **argv) |
@@ -703,7 +768,9 @@ int main(int argc, char **argv) | |||
703 | } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { | 768 | } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { |
704 | const char *p = &argv[i][14]; | 769 | const char *p = &argv[i][14]; |
705 | kernel_start_addr = strtoull(p, NULL, 16); | 770 | kernel_start_addr = strtoull(p, NULL, 16); |
706 | } else | 771 | } else if (strcmp(argv[i], "--base-relative") == 0) |
772 | base_relative = 1; | ||
773 | else | ||
707 | usage(); | 774 | usage(); |
708 | } | 775 | } |
709 | } else if (argc != 1) | 776 | } else if (argc != 1) |
@@ -712,6 +779,8 @@ int main(int argc, char **argv) | |||
712 | read_map(stdin); | 779 | read_map(stdin); |
713 | if (absolute_percpu) | 780 | if (absolute_percpu) |
714 | make_percpus_absolute(); | 781 | make_percpus_absolute(); |
782 | if (base_relative) | ||
783 | record_relative_base(); | ||
715 | sort_symbols(); | 784 | sort_symbols(); |
716 | optimize_token_table(); | 785 | optimize_token_table(); |
717 | write_src(); | 786 | write_src(); |
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index ba6c34ea5429..453ede9d2f3d 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh | |||
@@ -86,10 +86,14 @@ kallsyms() | |||
86 | kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" | 86 | kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" |
87 | fi | 87 | fi |
88 | 88 | ||
89 | if [ -n "${CONFIG_X86_64}" ]; then | 89 | if [ -n "${CONFIG_KALLSYMS_ABSOLUTE_PERCPU}" ]; then |
90 | kallsymopt="${kallsymopt} --absolute-percpu" | 90 | kallsymopt="${kallsymopt} --absolute-percpu" |
91 | fi | 91 | fi |
92 | 92 | ||
93 | if [ -n "${CONFIG_KALLSYMS_BASE_RELATIVE}" ]; then | ||
94 | kallsymopt="${kallsymopt} --base-relative" | ||
95 | fi | ||
96 | |||
93 | local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ | 97 | local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ |
94 | ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" | 98 | ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" |
95 | 99 | ||
diff --git a/scripts/namespace.pl b/scripts/namespace.pl index a71be6b7cdec..9f3c9d47a4a5 100755 --- a/scripts/namespace.pl +++ b/scripts/namespace.pl | |||
@@ -117,6 +117,8 @@ my %nameexception = ( | |||
117 | 'kallsyms_names' => 1, | 117 | 'kallsyms_names' => 1, |
118 | 'kallsyms_num_syms' => 1, | 118 | 'kallsyms_num_syms' => 1, |
119 | 'kallsyms_addresses'=> 1, | 119 | 'kallsyms_addresses'=> 1, |
120 | 'kallsyms_offsets' => 1, | ||
121 | 'kallsyms_relative_base'=> 1, | ||
120 | '__this_module' => 1, | 122 | '__this_module' => 1, |
121 | '_etext' => 1, | 123 | '_etext' => 1, |
122 | '_edata' => 1, | 124 | '_edata' => 1, |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4d3340cce9a0..c9cb3be47cff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b) | |||
602 | return fa->flags - fb->flags; | 602 | return fa->flags - fb->flags; |
603 | } | 603 | } |
604 | 604 | ||
605 | /* see include/trace/events/gfpflags.h */ | 605 | /* see include/trace/events/mmflags.h */ |
606 | static const struct { | 606 | static const struct { |
607 | const char *original; | 607 | const char *original; |
608 | const char *compact; | 608 | const char *compact; |
@@ -612,30 +612,39 @@ static const struct { | |||
612 | { "GFP_HIGHUSER", "HU" }, | 612 | { "GFP_HIGHUSER", "HU" }, |
613 | { "GFP_USER", "U" }, | 613 | { "GFP_USER", "U" }, |
614 | { "GFP_TEMPORARY", "TMP" }, | 614 | { "GFP_TEMPORARY", "TMP" }, |
615 | { "GFP_KERNEL_ACCOUNT", "KAC" }, | ||
615 | { "GFP_KERNEL", "K" }, | 616 | { "GFP_KERNEL", "K" }, |
616 | { "GFP_NOFS", "NF" }, | 617 | { "GFP_NOFS", "NF" }, |
617 | { "GFP_ATOMIC", "A" }, | 618 | { "GFP_ATOMIC", "A" }, |
618 | { "GFP_NOIO", "NI" }, | 619 | { "GFP_NOIO", "NI" }, |
619 | { "GFP_HIGH", "H" }, | ||
620 | { "GFP_WAIT", "W" }, | ||
621 | { "GFP_IO", "I" }, | ||
622 | { "GFP_COLD", "CO" }, | ||
623 | { "GFP_NOWARN", "NWR" }, | ||
624 | { "GFP_REPEAT", "R" }, | ||
625 | { "GFP_NOFAIL", "NF" }, | ||
626 | { "GFP_NORETRY", "NR" }, | ||
627 | { "GFP_COMP", "C" }, | ||
628 | { "GFP_ZERO", "Z" }, | ||
629 | { "GFP_NOMEMALLOC", "NMA" }, | ||
630 | { "GFP_MEMALLOC", "MA" }, | ||
631 | { "GFP_HARDWALL", "HW" }, | ||
632 | { "GFP_THISNODE", "TN" }, | ||
633 | { "GFP_RECLAIMABLE", "RC" }, | ||
634 | { "GFP_MOVABLE", "M" }, | ||
635 | { "GFP_NOTRACK", "NT" }, | ||
636 | { "GFP_NO_KSWAPD", "NK" }, | ||
637 | { "GFP_OTHER_NODE", "ON" }, | ||
638 | { "GFP_NOWAIT", "NW" }, | 620 | { "GFP_NOWAIT", "NW" }, |
621 | { "GFP_DMA", "D" }, | ||
622 | { "__GFP_HIGHMEM", "HM" }, | ||
623 | { "GFP_DMA32", "D32" }, | ||
624 | { "__GFP_HIGH", "H" }, | ||
625 | { "__GFP_ATOMIC", "_A" }, | ||
626 | { "__GFP_IO", "I" }, | ||
627 | { "__GFP_FS", "F" }, | ||
628 | { "__GFP_COLD", "CO" }, | ||
629 | { "__GFP_NOWARN", "NWR" }, | ||
630 | { "__GFP_REPEAT", "R" }, | ||
631 | { "__GFP_NOFAIL", "NF" }, | ||
632 | { "__GFP_NORETRY", "NR" }, | ||
633 | { "__GFP_COMP", "C" }, | ||
634 | { "__GFP_ZERO", "Z" }, | ||
635 | { "__GFP_NOMEMALLOC", "NMA" }, | ||
636 | { "__GFP_MEMALLOC", "MA" }, | ||
637 | { "__GFP_HARDWALL", "HW" }, | ||
638 | { "__GFP_THISNODE", "TN" }, | ||
639 | { "__GFP_RECLAIMABLE", "RC" }, | ||
640 | { "__GFP_MOVABLE", "M" }, | ||
641 | { "__GFP_ACCOUNT", "AC" }, | ||
642 | { "__GFP_NOTRACK", "NT" }, | ||
643 | { "__GFP_WRITE", "WR" }, | ||
644 | { "__GFP_RECLAIM", "R" }, | ||
645 | { "__GFP_DIRECT_RECLAIM", "DR" }, | ||
646 | { "__GFP_KSWAPD_RECLAIM", "KR" }, | ||
647 | { "__GFP_OTHER_NODE", "ON" }, | ||
639 | }; | 648 | }; |
640 | 649 | ||
641 | static size_t max_gfp_len; | 650 | static size_t max_gfp_len; |
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 86e698d07e20..1889163f2f05 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c | |||
@@ -135,7 +135,7 @@ static void usage(void) | |||
135 | "\nValid debug options (FZPUT may be combined)\n" | 135 | "\nValid debug options (FZPUT may be combined)\n" |
136 | "a / A Switch on all debug options (=FZUP)\n" | 136 | "a / A Switch on all debug options (=FZUP)\n" |
137 | "- Switch off all debug options\n" | 137 | "- Switch off all debug options\n" |
138 | "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" | 138 | "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" |
139 | "z / Z Redzoning\n" | 139 | "z / Z Redzoning\n" |
140 | "p / P Poisoning\n" | 140 | "p / P Poisoning\n" |
141 | "u / U Tracking\n" | 141 | "u / U Tracking\n" |