diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 17:19:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 17:19:26 -0400 |
commit | 3b54765cca23152ec0cc254b75c877c10f6e2870 (patch) | |
tree | 795785d2a9d7498df9452be138867bd996c4cea5 | |
parent | 3fd14cdcc05a682b03743683ce3a726898b20555 (diff) | |
parent | 97b1255cb27c551d7c3c5c496d787da40772da99 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- a few misc things
- ocfs2 updates
- the v9fs maintainers have been missing for a long time. I've taken
over v9fs patch slinging.
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (116 commits)
mm,oom_reaper: check for MMF_OOM_SKIP before complaining
mm/ksm: fix interaction with THP
mm/memblock.c: cast constant ULLONG_MAX to phys_addr_t
headers: untangle kmemleak.h from mm.h
include/linux/mmdebug.h: make VM_WARN* non-rvals
mm/page_isolation.c: make start_isolate_page_range() fail if already isolated
mm: change return type to vm_fault_t
mm, oom: remove 3% bonus for CAP_SYS_ADMIN processes
mm, page_alloc: wakeup kcompactd even if kswapd cannot free more memory
kernel/fork.c: detect early free of a live mm
mm: make counting of list_lru_one::nr_items lockless
mm/swap_state.c: make bool enable_vma_readahead and swap_vma_readahead() static
block_invalidatepage(): only release page if the full page was invalidated
mm: kernel-doc: add missing parameter descriptions
mm/swap.c: remove @cold parameter description for release_pages()
mm/nommu: remove description of alloc_vm_area
zram: drop max_zpage_size and use zs_huge_class_size()
zsmalloc: introduce zs_huge_class_size()
mm: fix races between swapoff and flush dcache
fs/direct-io.c: minor cleanups in do_blockdev_direct_IO
...
151 files changed, 1601 insertions, 1269 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 34dac7cef4cf..3c87a69cffcb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -1840,30 +1840,29 @@ | |||
1840 | keepinitrd [HW,ARM] | 1840 | keepinitrd [HW,ARM] |
1841 | 1841 | ||
1842 | kernelcore= [KNL,X86,IA-64,PPC] | 1842 | kernelcore= [KNL,X86,IA-64,PPC] |
1843 | Format: nn[KMGTPE] | "mirror" | 1843 | Format: nn[KMGTPE] | nn% | "mirror" |
1844 | This parameter | 1844 | This parameter specifies the amount of memory usable by |
1845 | specifies the amount of memory usable by the kernel | 1845 | the kernel for non-movable allocations. The requested |
1846 | for non-movable allocations. The requested amount is | 1846 | amount is spread evenly throughout all nodes in the |
1847 | spread evenly throughout all nodes in the system. The | 1847 | system as ZONE_NORMAL. The remaining memory is used for |
1848 | remaining memory in each node is used for Movable | 1848 | movable memory in its own zone, ZONE_MOVABLE. In the |
1849 | pages. In the event, a node is too small to have both | 1849 | event, a node is too small to have both ZONE_NORMAL and |
1850 | kernelcore and Movable pages, kernelcore pages will | 1850 | ZONE_MOVABLE, kernelcore memory will take priority and |
1851 | take priority and other nodes will have a larger number | 1851 | other nodes will have a larger ZONE_MOVABLE. |
1852 | of Movable pages. The Movable zone is used for the | 1852 | |
1853 | allocation of pages that may be reclaimed or moved | 1853 | ZONE_MOVABLE is used for the allocation of pages that |
1854 | by the page migration subsystem. This means that | 1854 | may be reclaimed or moved by the page migration |
1855 | HugeTLB pages may not be allocated from this zone. | 1855 | subsystem. Note that allocations like PTEs-from-HighMem |
1856 | Note that allocations like PTEs-from-HighMem still | 1856 | still use the HighMem zone if it exists, and the Normal |
1857 | use the HighMem zone if it exists, and the Normal | ||
1858 | zone if it does not. | 1857 | zone if it does not. |
1859 | 1858 | ||
1860 | Instead of specifying the amount of memory (nn[KMGTPE]), | 1859 | It is possible to specify the exact amount of memory in |
1861 | you can specify "mirror" option. In case "mirror" | 1860 | the form of "nn[KMGTPE]", a percentage of total system |
1861 | memory in the form of "nn%", or "mirror". If "mirror" | ||
1862 | option is specified, mirrored (reliable) memory is used | 1862 | option is specified, mirrored (reliable) memory is used |
1863 | for non-movable allocations and remaining memory is used | 1863 | for non-movable allocations and remaining memory is used |
1864 | for Movable pages. nn[KMGTPE] and "mirror" are exclusive, | 1864 | for Movable pages. "nn[KMGTPE]", "nn%", and "mirror" |
1865 | so you can NOT specify nn[KMGTPE] and "mirror" at the same | 1865 | are exclusive, so you cannot specify multiple forms. |
1866 | time. | ||
1867 | 1866 | ||
1868 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. | 1867 | kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. |
1869 | Format: <Controller#>[,poll interval] | 1868 | Format: <Controller#>[,poll interval] |
@@ -2377,13 +2376,14 @@ | |||
2377 | mousedev.yres= [MOUSE] Vertical screen resolution, used for devices | 2376 | mousedev.yres= [MOUSE] Vertical screen resolution, used for devices |
2378 | reporting absolute coordinates, such as tablets | 2377 | reporting absolute coordinates, such as tablets |
2379 | 2378 | ||
2380 | movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter | 2379 | movablecore= [KNL,X86,IA-64,PPC] |
2381 | is similar to kernelcore except it specifies the | 2380 | Format: nn[KMGTPE] | nn% |
2382 | amount of memory used for migratable allocations. | 2381 | This parameter is the complement to kernelcore=, it |
2383 | If both kernelcore and movablecore is specified, | 2382 | specifies the amount of memory used for migratable |
2384 | then kernelcore will be at *least* the specified | 2383 | allocations. If both kernelcore and movablecore is |
2385 | value but may be more. If movablecore on its own | 2384 | specified, then kernelcore will be at *least* the |
2386 | is specified, the administrator must be careful | 2385 | specified value but may be more. If movablecore on its |
2386 | own is specified, the administrator must be careful | ||
2387 | that the amount of memory usable for all allocations | 2387 | that the amount of memory usable for all allocations |
2388 | is not too small. | 2388 | is not too small. |
2389 | 2389 | ||
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index ba976805853a..66bfd8396877 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl | |||
@@ -111,7 +111,7 @@ my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flag | |||
111 | my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; | 111 | my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; |
112 | my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; | 112 | my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; |
113 | my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; | 113 | my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; |
114 | my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)'; | 114 | my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; |
115 | my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; | 115 | my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; |
116 | my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; | 116 | my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; |
117 | my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; | 117 | my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; |
@@ -201,7 +201,7 @@ $regex_kswapd_sleep = generate_traceevent_regex( | |||
201 | $regex_wakeup_kswapd = generate_traceevent_regex( | 201 | $regex_wakeup_kswapd = generate_traceevent_regex( |
202 | "vmscan/mm_vmscan_wakeup_kswapd", | 202 | "vmscan/mm_vmscan_wakeup_kswapd", |
203 | $regex_wakeup_kswapd_default, | 203 | $regex_wakeup_kswapd_default, |
204 | "nid", "zid", "order"); | 204 | "nid", "zid", "order", "gfp_flags"); |
205 | $regex_lru_isolate = generate_traceevent_regex( | 205 | $regex_lru_isolate = generate_traceevent_regex( |
206 | "vmscan/mm_vmscan_lru_isolate", | 206 | "vmscan/mm_vmscan_lru_isolate", |
207 | $regex_lru_isolate_default, | 207 | $regex_lru_isolate_default, |
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 2072f3451e9c..9dbe645ee127 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c | |||
@@ -833,7 +833,7 @@ void flush_dcache_page(struct page *page) | |||
833 | } | 833 | } |
834 | 834 | ||
835 | /* don't handle anon pages here */ | 835 | /* don't handle anon pages here */ |
836 | mapping = page_mapping(page); | 836 | mapping = page_mapping_file(page); |
837 | if (!mapping) | 837 | if (!mapping) |
838 | return; | 838 | return; |
839 | 839 | ||
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 16a8a804e958..e8fe51f4e97a 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c | |||
@@ -128,12 +128,7 @@ asmlinkage void __div0(void) | |||
128 | error("Attempting division by 0!"); | 128 | error("Attempting division by 0!"); |
129 | } | 129 | } |
130 | 130 | ||
131 | unsigned long __stack_chk_guard; | 131 | const unsigned long __stack_chk_guard = 0x000a0dff; |
132 | |||
133 | void __stack_chk_guard_setup(void) | ||
134 | { | ||
135 | __stack_chk_guard = 0x000a0dff; | ||
136 | } | ||
137 | 132 | ||
138 | void __stack_chk_fail(void) | 133 | void __stack_chk_fail(void) |
139 | { | 134 | { |
@@ -150,8 +145,6 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, | |||
150 | { | 145 | { |
151 | int ret; | 146 | int ret; |
152 | 147 | ||
153 | __stack_chk_guard_setup(); | ||
154 | |||
155 | output_data = (unsigned char *)output_start; | 148 | output_data = (unsigned char *)output_start; |
156 | free_mem_ptr = free_mem_ptr_p; | 149 | free_mem_ptr = free_mem_ptr_p; |
157 | free_mem_end_ptr = free_mem_ptr_end_p; | 150 | free_mem_end_ptr = free_mem_ptr_end_p; |
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 1267e64133b9..0224416cba3c 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c | |||
@@ -70,7 +70,7 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, | |||
70 | void *kto = kmap_atomic(to); | 70 | void *kto = kmap_atomic(to); |
71 | 71 | ||
72 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) | 72 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) |
73 | __flush_dcache_page(page_mapping(from), from); | 73 | __flush_dcache_page(page_mapping_file(from), from); |
74 | 74 | ||
75 | raw_spin_lock(&minicache_lock); | 75 | raw_spin_lock(&minicache_lock); |
76 | 76 | ||
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 70423345da26..a698e575e321 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c | |||
@@ -76,7 +76,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, | |||
76 | unsigned long kfrom, kto; | 76 | unsigned long kfrom, kto; |
77 | 77 | ||
78 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) | 78 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) |
79 | __flush_dcache_page(page_mapping(from), from); | 79 | __flush_dcache_page(page_mapping_file(from), from); |
80 | 80 | ||
81 | /* FIXME: not highmem safe */ | 81 | /* FIXME: not highmem safe */ |
82 | discard_old_kernel_data(page_address(to)); | 82 | discard_old_kernel_data(page_address(to)); |
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 0fb85025344d..97972379f4d6 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c | |||
@@ -90,7 +90,7 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, | |||
90 | void *kto = kmap_atomic(to); | 90 | void *kto = kmap_atomic(to); |
91 | 91 | ||
92 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) | 92 | if (!test_and_set_bit(PG_dcache_clean, &from->flags)) |
93 | __flush_dcache_page(page_mapping(from), from); | 93 | __flush_dcache_page(page_mapping_file(from), from); |
94 | 94 | ||
95 | raw_spin_lock(&minicache_lock); | 95 | raw_spin_lock(&minicache_lock); |
96 | 96 | ||
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index d9e0d00a6699..4d75dae5ac96 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c | |||
@@ -195,7 +195,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, | |||
195 | if (page == ZERO_PAGE(0)) | 195 | if (page == ZERO_PAGE(0)) |
196 | return; | 196 | return; |
197 | 197 | ||
198 | mapping = page_mapping(page); | 198 | mapping = page_mapping_file(page); |
199 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) | 199 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) |
200 | __flush_dcache_page(mapping, page); | 200 | __flush_dcache_page(mapping, page); |
201 | if (mapping) { | 201 | if (mapping) { |
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index f1e6190aa7ea..58469623b015 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c | |||
@@ -285,7 +285,7 @@ void __sync_icache_dcache(pte_t pteval) | |||
285 | 285 | ||
286 | page = pfn_to_page(pfn); | 286 | page = pfn_to_page(pfn); |
287 | if (cache_is_vipt_aliasing()) | 287 | if (cache_is_vipt_aliasing()) |
288 | mapping = page_mapping(page); | 288 | mapping = page_mapping_file(page); |
289 | else | 289 | else |
290 | mapping = NULL; | 290 | mapping = NULL; |
291 | 291 | ||
@@ -333,7 +333,7 @@ void flush_dcache_page(struct page *page) | |||
333 | return; | 333 | return; |
334 | } | 334 | } |
335 | 335 | ||
336 | mapping = page_mapping(page); | 336 | mapping = page_mapping_file(page); |
337 | 337 | ||
338 | if (!cache_ops_need_broadcast() && | 338 | if (!cache_ops_need_broadcast() && |
339 | mapping && !page_mapcount(page)) | 339 | mapping && !page_mapcount(page)) |
@@ -363,7 +363,7 @@ void flush_kernel_dcache_page(struct page *page) | |||
363 | if (cache_is_vivt() || cache_is_vipt_aliasing()) { | 363 | if (cache_is_vivt() || cache_is_vipt_aliasing()) { |
364 | struct address_space *mapping; | 364 | struct address_space *mapping; |
365 | 365 | ||
366 | mapping = page_mapping(page); | 366 | mapping = page_mapping_file(page); |
367 | 367 | ||
368 | if (!mapping || mapping_mapped(mapping)) { | 368 | if (!mapping || mapping_mapped(mapping)) { |
369 | void *addr; | 369 | void *addr; |
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index fdf99e9dd4c3..81df9047e110 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c | |||
@@ -76,12 +76,7 @@ void error(char *x) | |||
76 | #include "../../../../lib/decompress_unxz.c" | 76 | #include "../../../../lib/decompress_unxz.c" |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | unsigned long __stack_chk_guard; | 79 | const unsigned long __stack_chk_guard = 0x000a0dff; |
80 | |||
81 | void __stack_chk_guard_setup(void) | ||
82 | { | ||
83 | __stack_chk_guard = 0x000a0dff; | ||
84 | } | ||
85 | 80 | ||
86 | void __stack_chk_fail(void) | 81 | void __stack_chk_fail(void) |
87 | { | 82 | { |
@@ -92,8 +87,6 @@ void decompress_kernel(unsigned long boot_heap_start) | |||
92 | { | 87 | { |
93 | unsigned long zimage_start, zimage_size; | 88 | unsigned long zimage_start, zimage_size; |
94 | 89 | ||
95 | __stack_chk_guard_setup(); | ||
96 | |||
97 | zimage_start = (unsigned long)(&__image_begin); | 90 | zimage_start = (unsigned long)(&__image_begin); |
98 | zimage_size = (unsigned long)(&__image_end) - | 91 | zimage_size = (unsigned long)(&__image_end) - |
99 | (unsigned long)(&__image_begin); | 92 | (unsigned long)(&__image_begin); |
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 44ac64d51827..0d3c656feba0 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c | |||
@@ -86,7 +86,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, | |||
86 | 86 | ||
87 | void __flush_dcache_page(struct page *page) | 87 | void __flush_dcache_page(struct page *page) |
88 | { | 88 | { |
89 | struct address_space *mapping = page_mapping(page); | 89 | struct address_space *mapping = page_mapping_file(page); |
90 | unsigned long addr; | 90 | unsigned long addr; |
91 | 91 | ||
92 | if (mapping && !mapping_mapped(mapping)) { | 92 | if (mapping && !mapping_mapped(mapping)) { |
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c index 87bf88ed04c6..506f6e1c86d5 100644 --- a/arch/nios2/mm/cacheflush.c +++ b/arch/nios2/mm/cacheflush.c | |||
@@ -180,7 +180,7 @@ void flush_dcache_page(struct page *page) | |||
180 | if (page == ZERO_PAGE(0)) | 180 | if (page == ZERO_PAGE(0)) |
181 | return; | 181 | return; |
182 | 182 | ||
183 | mapping = page_mapping(page); | 183 | mapping = page_mapping_file(page); |
184 | 184 | ||
185 | /* Flush this page if there are aliases. */ | 185 | /* Flush this page if there are aliases. */ |
186 | if (mapping && !mapping_mapped(mapping)) { | 186 | if (mapping && !mapping_mapped(mapping)) { |
@@ -215,7 +215,7 @@ void update_mmu_cache(struct vm_area_struct *vma, | |||
215 | if (page == ZERO_PAGE(0)) | 215 | if (page == ZERO_PAGE(0)) |
216 | return; | 216 | return; |
217 | 217 | ||
218 | mapping = page_mapping(page); | 218 | mapping = page_mapping_file(page); |
219 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) | 219 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) |
220 | __flush_dcache_page(mapping, page); | 220 | __flush_dcache_page(mapping, page); |
221 | 221 | ||
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index e3b45546d589..a99da95fc9fd 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c | |||
@@ -88,7 +88,8 @@ update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) | |||
88 | return; | 88 | return; |
89 | 89 | ||
90 | page = pfn_to_page(pfn); | 90 | page = pfn_to_page(pfn); |
91 | if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) { | 91 | if (page_mapping_file(page) && |
92 | test_bit(PG_dcache_dirty, &page->flags)) { | ||
92 | flush_kernel_dcache_page_addr(pfn_va(pfn)); | 93 | flush_kernel_dcache_page_addr(pfn_va(pfn)); |
93 | clear_bit(PG_dcache_dirty, &page->flags); | 94 | clear_bit(PG_dcache_dirty, &page->flags); |
94 | } else if (parisc_requires_coherency()) | 95 | } else if (parisc_requires_coherency()) |
@@ -304,7 +305,7 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, | |||
304 | 305 | ||
305 | void flush_dcache_page(struct page *page) | 306 | void flush_dcache_page(struct page *page) |
306 | { | 307 | { |
307 | struct address_space *mapping = page_mapping(page); | 308 | struct address_space *mapping = page_mapping_file(page); |
308 | struct vm_area_struct *mpnt; | 309 | struct vm_area_struct *mpnt; |
309 | unsigned long offset; | 310 | unsigned long offset; |
310 | unsigned long addr, old_addr = 0; | 311 | unsigned long addr, old_addr = 0; |
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 1a4847f67ea8..6f6751d3eba9 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h | |||
@@ -118,12 +118,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, | |||
118 | unsigned long ceiling); | 118 | unsigned long ceiling); |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs | ||
122 | * to override the version in mm/hugetlb.c | ||
123 | */ | ||
124 | #define vma_mmu_pagesize vma_mmu_pagesize | ||
125 | |||
126 | /* | ||
127 | * If the arch doesn't supply something else, assume that hugepage | 121 | * If the arch doesn't supply something else, assume that hugepage |
128 | * size aligned regions are ok without further preparation. | 122 | * size aligned regions are ok without further preparation. |
129 | */ | 123 | */ |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 876da2bc1796..3a08d211d2ee 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -568,10 +568,7 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) | |||
568 | if (!radix_enabled()) | 568 | if (!radix_enabled()) |
569 | return 1UL << mmu_psize_to_shift(psize); | 569 | return 1UL << mmu_psize_to_shift(psize); |
570 | #endif | 570 | #endif |
571 | if (!is_vm_hugetlb_page(vma)) | 571 | return vma_kernel_pagesize(vma); |
572 | return PAGE_SIZE; | ||
573 | |||
574 | return huge_page_size(hstate_vma(vma)); | ||
575 | } | 572 | } |
576 | 573 | ||
577 | static inline bool is_power_of_4(unsigned long x) | 574 | static inline bool is_power_of_4(unsigned long x) |
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0a2d8e806ed..9a8a084e4aba 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c | |||
@@ -112,7 +112,7 @@ static int mm_iommu_move_page_from_cma(struct page *page) | |||
112 | put_page(page); /* Drop the gup reference */ | 112 | put_page(page); /* Drop the gup reference */ |
113 | 113 | ||
114 | ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, | 114 | ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, |
115 | NULL, 0, MIGRATE_SYNC, MR_CMA); | 115 | NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE); |
116 | if (ret) { | 116 | if (ret) { |
117 | if (!list_empty(&cma_migrate_pages)) | 117 | if (!list_empty(&cma_migrate_pages)) |
118 | putback_movable_pages(&cma_migrate_pages); | 118 | putback_movable_pages(&cma_migrate_pages); |
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index a6198d4f0f03..5ca3e22d0512 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/suspend.h> | 38 | #include <linux/suspend.h> |
39 | #include <linux/memblock.h> | 39 | #include <linux/memblock.h> |
40 | #include <linux/gfp.h> | 40 | #include <linux/gfp.h> |
41 | #include <linux/kmemleak.h> | ||
41 | #include <asm/io.h> | 42 | #include <asm/io.h> |
42 | #include <asm/prom.h> | 43 | #include <asm/prom.h> |
43 | #include <asm/iommu.h> | 44 | #include <asm/iommu.h> |
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index c4dae27172b3..6243a7e537d0 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/kmemleak.h> | ||
13 | #include <linux/bitmap.h> | 14 | #include <linux/bitmap.h> |
14 | #include <linux/bootmem.h> | 15 | #include <linux/bootmem.h> |
15 | #include <asm/msi_bitmap.h> | 16 | #include <asm/msi_bitmap.h> |
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index c7a627620e5e..8c867b43c8eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/hardirq.h> | 15 | #include <linux/hardirq.h> |
16 | #include <linux/log2.h> | 16 | #include <linux/log2.h> |
17 | #include <linux/kprobes.h> | 17 | #include <linux/kprobes.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/kmemleak.h> |
19 | #include <linux/time.h> | 19 | #include <linux/time.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/sched/signal.h> | 21 | #include <linux/sched/signal.h> |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a4a9fe1934e9..2f8f7d7dd9a8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/err.h> | 27 | #include <linux/err.h> |
28 | #include <linux/spinlock.h> | 28 | #include <linux/spinlock.h> |
29 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
30 | #include <linux/kmemleak.h> | ||
31 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
32 | #include <linux/interrupt.h> | 31 | #include <linux/interrupt.h> |
33 | #include <linux/irqflags.h> | 32 | #include <linux/irqflags.h> |
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 627ce8e75e01..c15cac9251b9 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c | |||
@@ -104,12 +104,7 @@ static void error(char *x) | |||
104 | while(1); /* Halt */ | 104 | while(1); /* Halt */ |
105 | } | 105 | } |
106 | 106 | ||
107 | unsigned long __stack_chk_guard; | 107 | const unsigned long __stack_chk_guard = 0x000a0dff; |
108 | |||
109 | void __stack_chk_guard_setup(void) | ||
110 | { | ||
111 | __stack_chk_guard = 0x000a0dff; | ||
112 | } | ||
113 | 108 | ||
114 | void __stack_chk_fail(void) | 109 | void __stack_chk_fail(void) |
115 | { | 110 | { |
@@ -130,8 +125,6 @@ void decompress_kernel(void) | |||
130 | { | 125 | { |
131 | unsigned long output_addr; | 126 | unsigned long output_addr; |
132 | 127 | ||
133 | __stack_chk_guard_setup(); | ||
134 | |||
135 | #ifdef CONFIG_SUPERH64 | 128 | #ifdef CONFIG_SUPERH64 |
136 | output_addr = (CONFIG_MEMORY_START + 0x2000); | 129 | output_addr = (CONFIG_MEMORY_START + 0x2000); |
137 | #else | 130 | #else |
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 58aaa4f33b81..eee911422cf9 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -112,7 +112,7 @@ static void sh4_flush_dcache_page(void *arg) | |||
112 | struct page *page = arg; | 112 | struct page *page = arg; |
113 | unsigned long addr = (unsigned long)page_address(page); | 113 | unsigned long addr = (unsigned long)page_address(page); |
114 | #ifndef CONFIG_SMP | 114 | #ifndef CONFIG_SMP |
115 | struct address_space *mapping = page_mapping(page); | 115 | struct address_space *mapping = page_mapping_file(page); |
116 | 116 | ||
117 | if (mapping && !mapping_mapped(mapping)) | 117 | if (mapping && !mapping_mapped(mapping)) |
118 | clear_bit(PG_dcache_clean, &page->flags); | 118 | clear_bit(PG_dcache_clean, &page->flags); |
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c index 6cd2aa395817..ed25eba80667 100644 --- a/arch/sh/mm/cache-sh7705.c +++ b/arch/sh/mm/cache-sh7705.c | |||
@@ -136,7 +136,7 @@ static void __flush_dcache_page(unsigned long phys) | |||
136 | static void sh7705_flush_dcache_page(void *arg) | 136 | static void sh7705_flush_dcache_page(void *arg) |
137 | { | 137 | { |
138 | struct page *page = arg; | 138 | struct page *page = arg; |
139 | struct address_space *mapping = page_mapping(page); | 139 | struct address_space *mapping = page_mapping_file(page); |
140 | 140 | ||
141 | if (mapping && !mapping_mapped(mapping)) | 141 | if (mapping && !mapping_mapped(mapping)) |
142 | clear_bit(PG_dcache_clean, &page->flags); | 142 | clear_bit(PG_dcache_clean, &page->flags); |
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index d66dde833f5e..713670e6d13d 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/seq_file.h> | 22 | #include <linux/seq_file.h> |
23 | #include <linux/ftrace.h> | 23 | #include <linux/ftrace.h> |
24 | #include <linux/irq.h> | 24 | #include <linux/irq.h> |
25 | #include <linux/kmemleak.h> | ||
26 | 25 | ||
27 | #include <asm/ptrace.h> | 26 | #include <asm/ptrace.h> |
28 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index c50182cd2f64..d3ea1f3c06a0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -929,9 +929,9 @@ static inline void __local_flush_dcache_page(struct page *page) | |||
929 | #ifdef DCACHE_ALIASING_POSSIBLE | 929 | #ifdef DCACHE_ALIASING_POSSIBLE |
930 | __flush_dcache_page(page_address(page), | 930 | __flush_dcache_page(page_address(page), |
931 | ((tlb_type == spitfire) && | 931 | ((tlb_type == spitfire) && |
932 | page_mapping(page) != NULL)); | 932 | page_mapping_file(page) != NULL)); |
933 | #else | 933 | #else |
934 | if (page_mapping(page) != NULL && | 934 | if (page_mapping_file(page) != NULL && |
935 | tlb_type == spitfire) | 935 | tlb_type == spitfire) |
936 | __flush_icache_page(__pa(page_address(page))); | 936 | __flush_icache_page(__pa(page_address(page))); |
937 | #endif | 937 | #endif |
@@ -958,7 +958,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) | |||
958 | 958 | ||
959 | if (tlb_type == spitfire) { | 959 | if (tlb_type == spitfire) { |
960 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); | 960 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
961 | if (page_mapping(page) != NULL) | 961 | if (page_mapping_file(page) != NULL) |
962 | data0 |= ((u64)1 << 32); | 962 | data0 |= ((u64)1 << 32); |
963 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 963 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
964 | #ifdef DCACHE_ALIASING_POSSIBLE | 964 | #ifdef DCACHE_ALIASING_POSSIBLE |
@@ -994,7 +994,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) | |||
994 | pg_addr = page_address(page); | 994 | pg_addr = page_address(page); |
995 | if (tlb_type == spitfire) { | 995 | if (tlb_type == spitfire) { |
996 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); | 996 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
997 | if (page_mapping(page) != NULL) | 997 | if (page_mapping_file(page) != NULL) |
998 | data0 |= ((u64)1 << 32); | 998 | data0 |= ((u64)1 << 32); |
999 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 999 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
1000 | #ifdef DCACHE_ALIASING_POSSIBLE | 1000 | #ifdef DCACHE_ALIASING_POSSIBLE |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index cb9ebac6663f..8aeb1aabe76e 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -206,9 +206,9 @@ inline void flush_dcache_page_impl(struct page *page) | |||
206 | #ifdef DCACHE_ALIASING_POSSIBLE | 206 | #ifdef DCACHE_ALIASING_POSSIBLE |
207 | __flush_dcache_page(page_address(page), | 207 | __flush_dcache_page(page_address(page), |
208 | ((tlb_type == spitfire) && | 208 | ((tlb_type == spitfire) && |
209 | page_mapping(page) != NULL)); | 209 | page_mapping_file(page) != NULL)); |
210 | #else | 210 | #else |
211 | if (page_mapping(page) != NULL && | 211 | if (page_mapping_file(page) != NULL && |
212 | tlb_type == spitfire) | 212 | tlb_type == spitfire) |
213 | __flush_icache_page(__pa(page_address(page))); | 213 | __flush_icache_page(__pa(page_address(page))); |
214 | #endif | 214 | #endif |
@@ -490,7 +490,7 @@ void flush_dcache_page(struct page *page) | |||
490 | 490 | ||
491 | this_cpu = get_cpu(); | 491 | this_cpu = get_cpu(); |
492 | 492 | ||
493 | mapping = page_mapping(page); | 493 | mapping = page_mapping_file(page); |
494 | if (mapping && !mapping_mapped(mapping)) { | 494 | if (mapping && !mapping_mapped(mapping)) { |
495 | int dirty = test_bit(PG_dcache_dirty, &page->flags); | 495 | int dirty = test_bit(PG_dcache_dirty, &page->flags); |
496 | if (dirty) { | 496 | if (dirty) { |
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index b5cfab711651..3d72d2deb13b 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c | |||
@@ -128,7 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, | |||
128 | goto no_cache_flush; | 128 | goto no_cache_flush; |
129 | 129 | ||
130 | /* A real file page? */ | 130 | /* A real file page? */ |
131 | mapping = page_mapping(page); | 131 | mapping = page_mapping_file(page); |
132 | if (!mapping) | 132 | if (!mapping) |
133 | goto no_cache_flush; | 133 | goto no_cache_flush; |
134 | 134 | ||
diff --git a/arch/unicore32/mm/flush.c b/arch/unicore32/mm/flush.c index 6d4c096ffa2a..74f4d636df2d 100644 --- a/arch/unicore32/mm/flush.c +++ b/arch/unicore32/mm/flush.c | |||
@@ -83,7 +83,7 @@ void flush_dcache_page(struct page *page) | |||
83 | if (page == ZERO_PAGE(0)) | 83 | if (page == ZERO_PAGE(0)) |
84 | return; | 84 | return; |
85 | 85 | ||
86 | mapping = page_mapping(page); | 86 | mapping = page_mapping_file(page); |
87 | 87 | ||
88 | if (mapping && !mapping_mapped(mapping)) | 88 | if (mapping && !mapping_mapped(mapping)) |
89 | clear_bit(PG_dcache_clean, &page->flags); | 89 | clear_bit(PG_dcache_clean, &page->flags); |
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 4f5a532bee13..0c94b7b4514d 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c | |||
@@ -503,7 +503,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, | |||
503 | if (page == ZERO_PAGE(0)) | 503 | if (page == ZERO_PAGE(0)) |
504 | return; | 504 | return; |
505 | 505 | ||
506 | mapping = page_mapping(page); | 506 | mapping = page_mapping_file(page); |
507 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) | 507 | if (!test_and_set_bit(PG_dcache_clean, &page->flags)) |
508 | __flush_dcache_page(mapping, page); | 508 | __flush_dcache_page(mapping, page); |
509 | if (mapping) | 509 | if (mapping) |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 14437116ffea..77625b60a510 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
7 | #include <linux/gfp.h> | 7 | #include <linux/gfp.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/kmemleak.h> | ||
10 | 9 | ||
11 | #include <asm/proto.h> | 10 | #include <asm/proto.h> |
12 | #include <asm/dma.h> | 11 | #include <asm/dma.h> |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 45241de66785..dca9abf2b85c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1328,14 +1328,39 @@ int kern_addr_valid(unsigned long addr) | |||
1328 | return pfn_valid(pte_pfn(*pte)); | 1328 | return pfn_valid(pte_pfn(*pte)); |
1329 | } | 1329 | } |
1330 | 1330 | ||
1331 | /* | ||
1332 | * Block size is the minimum amount of memory which can be hotplugged or | ||
1333 | * hotremoved. It must be power of two and must be equal or larger than | ||
1334 | * MIN_MEMORY_BLOCK_SIZE. | ||
1335 | */ | ||
1336 | #define MAX_BLOCK_SIZE (2UL << 30) | ||
1337 | |||
1338 | /* Amount of ram needed to start using large blocks */ | ||
1339 | #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) | ||
1340 | |||
1331 | static unsigned long probe_memory_block_size(void) | 1341 | static unsigned long probe_memory_block_size(void) |
1332 | { | 1342 | { |
1333 | unsigned long bz = MIN_MEMORY_BLOCK_SIZE; | 1343 | unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; |
1344 | unsigned long bz; | ||
1334 | 1345 | ||
1335 | /* if system is UV or has 64GB of RAM or more, use large blocks */ | 1346 | /* If this is UV system, always set 2G block size */ |
1336 | if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) | 1347 | if (is_uv_system()) { |
1337 | bz = 2UL << 30; /* 2GB */ | 1348 | bz = MAX_BLOCK_SIZE; |
1349 | goto done; | ||
1350 | } | ||
1338 | 1351 | ||
1352 | /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ | ||
1353 | if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { | ||
1354 | bz = MIN_MEMORY_BLOCK_SIZE; | ||
1355 | goto done; | ||
1356 | } | ||
1357 | |||
1358 | /* Find the largest allowed block size that aligns to memory end */ | ||
1359 | for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { | ||
1360 | if (IS_ALIGNED(boot_mem_end, bz)) | ||
1361 | break; | ||
1362 | } | ||
1363 | done: | ||
1339 | pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); | 1364 | pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); |
1340 | 1365 | ||
1341 | return bz; | 1366 | return bz; |
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 57dc231a0709..9220dcde7520 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c | |||
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(copy_user_highpage); | |||
127 | 127 | ||
128 | void flush_dcache_page(struct page *page) | 128 | void flush_dcache_page(struct page *page) |
129 | { | 129 | { |
130 | struct address_space *mapping = page_mapping(page); | 130 | struct address_space *mapping = page_mapping_file(page); |
131 | 131 | ||
132 | /* | 132 | /* |
133 | * If we have a mapping but the page is not mapped to user-space | 133 | * If we have a mapping but the page is not mapped to user-space |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index fe4b24f05f6a..79fcd2bae96b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -187,13 +187,14 @@ int memory_isolate_notify(unsigned long val, void *v) | |||
187 | } | 187 | } |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * The probe routines leave the pages reserved, just as the bootmem code does. | 190 | * The probe routines leave the pages uninitialized, just as the bootmem code |
191 | * Make sure they're still that way. | 191 | * does. Make sure we do not access them, but instead use only information from |
192 | * within sections. | ||
192 | */ | 193 | */ |
193 | static bool pages_correctly_reserved(unsigned long start_pfn) | 194 | static bool pages_correctly_probed(unsigned long start_pfn) |
194 | { | 195 | { |
195 | int i, j; | 196 | unsigned long section_nr = pfn_to_section_nr(start_pfn); |
196 | struct page *page; | 197 | unsigned long section_nr_end = section_nr + sections_per_block; |
197 | unsigned long pfn = start_pfn; | 198 | unsigned long pfn = start_pfn; |
198 | 199 | ||
199 | /* | 200 | /* |
@@ -201,21 +202,24 @@ static bool pages_correctly_reserved(unsigned long start_pfn) | |||
201 | * SPARSEMEM_VMEMMAP. We lookup the page once per section | 202 | * SPARSEMEM_VMEMMAP. We lookup the page once per section |
202 | * and assume memmap is contiguous within each section | 203 | * and assume memmap is contiguous within each section |
203 | */ | 204 | */ |
204 | for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) { | 205 | for (; section_nr < section_nr_end; section_nr++) { |
205 | if (WARN_ON_ONCE(!pfn_valid(pfn))) | 206 | if (WARN_ON_ONCE(!pfn_valid(pfn))) |
206 | return false; | 207 | return false; |
207 | page = pfn_to_page(pfn); | ||
208 | |||
209 | for (j = 0; j < PAGES_PER_SECTION; j++) { | ||
210 | if (PageReserved(page + j)) | ||
211 | continue; | ||
212 | |||
213 | printk(KERN_WARNING "section number %ld page number %d " | ||
214 | "not reserved, was it already online?\n", | ||
215 | pfn_to_section_nr(pfn), j); | ||
216 | 208 | ||
209 | if (!present_section_nr(section_nr)) { | ||
210 | pr_warn("section %ld pfn[%lx, %lx) not present", | ||
211 | section_nr, pfn, pfn + PAGES_PER_SECTION); | ||
212 | return false; | ||
213 | } else if (!valid_section_nr(section_nr)) { | ||
214 | pr_warn("section %ld pfn[%lx, %lx) no valid memmap", | ||
215 | section_nr, pfn, pfn + PAGES_PER_SECTION); | ||
216 | return false; | ||
217 | } else if (online_section_nr(section_nr)) { | ||
218 | pr_warn("section %ld pfn[%lx, %lx) is already online", | ||
219 | section_nr, pfn, pfn + PAGES_PER_SECTION); | ||
217 | return false; | 220 | return false; |
218 | } | 221 | } |
222 | pfn += PAGES_PER_SECTION; | ||
219 | } | 223 | } |
220 | 224 | ||
221 | return true; | 225 | return true; |
@@ -237,7 +241,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t | |||
237 | 241 | ||
238 | switch (action) { | 242 | switch (action) { |
239 | case MEM_ONLINE: | 243 | case MEM_ONLINE: |
240 | if (!pages_correctly_reserved(start_pfn)) | 244 | if (!pages_correctly_probed(start_pfn)) |
241 | return -EBUSY; | 245 | return -EBUSY; |
242 | 246 | ||
243 | ret = online_pages(start_pfn, nr_pages, online_type); | 247 | ret = online_pages(start_pfn, nr_pages, online_type); |
@@ -708,7 +712,7 @@ static int add_memory_block(int base_section_nr) | |||
708 | * need an interface for the VM to add new memory regions, | 712 | * need an interface for the VM to add new memory regions, |
709 | * but without onlining it. | 713 | * but without onlining it. |
710 | */ | 714 | */ |
711 | int register_new_memory(int nid, struct mem_section *section) | 715 | int hotplug_memory_register(int nid, struct mem_section *section) |
712 | { | 716 | { |
713 | int ret = 0; | 717 | int ret = 0; |
714 | struct memory_block *mem; | 718 | struct memory_block *mem; |
@@ -727,7 +731,7 @@ int register_new_memory(int nid, struct mem_section *section) | |||
727 | } | 731 | } |
728 | 732 | ||
729 | if (mem->section_count == sections_per_block) | 733 | if (mem->section_count == sections_per_block) |
730 | ret = register_mem_sect_under_node(mem, nid); | 734 | ret = register_mem_sect_under_node(mem, nid, false); |
731 | out: | 735 | out: |
732 | mutex_unlock(&mem_sysfs_mutex); | 736 | mutex_unlock(&mem_sysfs_mutex); |
733 | return ret; | 737 | return ret; |
diff --git a/drivers/base/node.c b/drivers/base/node.c index c5f81fc621ac..7a3a580821e0 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c | |||
@@ -399,13 +399,16 @@ static int __ref get_nid_for_pfn(unsigned long pfn) | |||
399 | } | 399 | } |
400 | 400 | ||
401 | /* register memory section under specified node if it spans that node */ | 401 | /* register memory section under specified node if it spans that node */ |
402 | int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) | 402 | int register_mem_sect_under_node(struct memory_block *mem_blk, int nid, |
403 | bool check_nid) | ||
403 | { | 404 | { |
404 | int ret; | 405 | int ret; |
405 | unsigned long pfn, sect_start_pfn, sect_end_pfn; | 406 | unsigned long pfn, sect_start_pfn, sect_end_pfn; |
406 | 407 | ||
407 | if (!mem_blk) | 408 | if (!mem_blk) |
408 | return -EFAULT; | 409 | return -EFAULT; |
410 | |||
411 | mem_blk->nid = nid; | ||
409 | if (!node_online(nid)) | 412 | if (!node_online(nid)) |
410 | return 0; | 413 | return 0; |
411 | 414 | ||
@@ -425,11 +428,18 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) | |||
425 | continue; | 428 | continue; |
426 | } | 429 | } |
427 | 430 | ||
428 | page_nid = get_nid_for_pfn(pfn); | 431 | /* |
429 | if (page_nid < 0) | 432 | * We need to check if page belongs to nid only for the boot |
430 | continue; | 433 | * case, during hotplug we know that all pages in the memory |
431 | if (page_nid != nid) | 434 | * block belong to the same node. |
432 | continue; | 435 | */ |
436 | if (check_nid) { | ||
437 | page_nid = get_nid_for_pfn(pfn); | ||
438 | if (page_nid < 0) | ||
439 | continue; | ||
440 | if (page_nid != nid) | ||
441 | continue; | ||
442 | } | ||
433 | ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, | 443 | ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, |
434 | &mem_blk->dev.kobj, | 444 | &mem_blk->dev.kobj, |
435 | kobject_name(&mem_blk->dev.kobj)); | 445 | kobject_name(&mem_blk->dev.kobj)); |
@@ -504,7 +514,7 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) | |||
504 | 514 | ||
505 | mem_blk = find_memory_block_hinted(mem_sect, mem_blk); | 515 | mem_blk = find_memory_block_hinted(mem_sect, mem_blk); |
506 | 516 | ||
507 | ret = register_mem_sect_under_node(mem_blk, nid); | 517 | ret = register_mem_sect_under_node(mem_blk, nid, true); |
508 | if (!err) | 518 | if (!err) |
509 | err = ret; | 519 | err = ret; |
510 | 520 | ||
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 71b449613cfa..0f3fadd71230 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c | |||
@@ -44,6 +44,11 @@ static const char *default_compressor = "lzo"; | |||
44 | 44 | ||
45 | /* Module params (documentation at end) */ | 45 | /* Module params (documentation at end) */ |
46 | static unsigned int num_devices = 1; | 46 | static unsigned int num_devices = 1; |
47 | /* | ||
48 | * Pages that compress to sizes equals or greater than this are stored | ||
49 | * uncompressed in memory. | ||
50 | */ | ||
51 | static size_t huge_class_size; | ||
47 | 52 | ||
48 | static void zram_free_page(struct zram *zram, size_t index); | 53 | static void zram_free_page(struct zram *zram, size_t index); |
49 | 54 | ||
@@ -786,6 +791,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) | |||
786 | return false; | 791 | return false; |
787 | } | 792 | } |
788 | 793 | ||
794 | if (!huge_class_size) | ||
795 | huge_class_size = zs_huge_class_size(zram->mem_pool); | ||
789 | return true; | 796 | return true; |
790 | } | 797 | } |
791 | 798 | ||
@@ -965,7 +972,7 @@ compress_again: | |||
965 | return ret; | 972 | return ret; |
966 | } | 973 | } |
967 | 974 | ||
968 | if (unlikely(comp_len > max_zpage_size)) { | 975 | if (unlikely(comp_len >= huge_class_size)) { |
969 | if (zram_wb_enabled(zram) && allow_wb) { | 976 | if (zram_wb_enabled(zram) && allow_wb) { |
970 | zcomp_stream_put(zram->comp); | 977 | zcomp_stream_put(zram->comp); |
971 | ret = write_to_bdev(zram, bvec, index, bio, &element); | 978 | ret = write_to_bdev(zram, bvec, index, bio, &element); |
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1e9bf65c0bfb..008861220723 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h | |||
@@ -21,22 +21,6 @@ | |||
21 | 21 | ||
22 | #include "zcomp.h" | 22 | #include "zcomp.h" |
23 | 23 | ||
24 | /*-- Configurable parameters */ | ||
25 | |||
26 | /* | ||
27 | * Pages that compress to size greater than this are stored | ||
28 | * uncompressed in memory. | ||
29 | */ | ||
30 | static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; | ||
31 | |||
32 | /* | ||
33 | * NOTE: max_zpage_size must be less than or equal to: | ||
34 | * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would | ||
35 | * always return failure. | ||
36 | */ | ||
37 | |||
38 | /*-- End of configurable params */ | ||
39 | |||
40 | #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) | 24 | #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) |
41 | #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) | 25 | #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) |
42 | #define ZRAM_LOGICAL_BLOCK_SHIFT 12 | 26 | #define ZRAM_LOGICAL_BLOCK_SHIFT 12 |
diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 2137dbc29877..0b61f48f21a6 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c | |||
@@ -439,10 +439,20 @@ static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr) | |||
439 | return 0; | 439 | return 0; |
440 | } | 440 | } |
441 | 441 | ||
442 | static unsigned long dev_dax_pagesize(struct vm_area_struct *vma) | ||
443 | { | ||
444 | struct file *filp = vma->vm_file; | ||
445 | struct dev_dax *dev_dax = filp->private_data; | ||
446 | struct dax_region *dax_region = dev_dax->region; | ||
447 | |||
448 | return dax_region->align; | ||
449 | } | ||
450 | |||
442 | static const struct vm_operations_struct dax_vm_ops = { | 451 | static const struct vm_operations_struct dax_vm_ops = { |
443 | .fault = dev_dax_fault, | 452 | .fault = dev_dax_fault, |
444 | .huge_fault = dev_dax_huge_fault, | 453 | .huge_fault = dev_dax_huge_fault, |
445 | .split = dev_dax_split, | 454 | .split = dev_dax_split, |
455 | .pagesize = dev_dax_pagesize, | ||
446 | }; | 456 | }; |
447 | 457 | ||
448 | static int dax_mmap(struct file *filp, struct vm_area_struct *vma) | 458 | static int dax_mmap(struct file *filp, struct vm_area_struct *vma) |
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 2138102ef611..c5f4f7691b57 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/io.h> | 17 | #include <linux/io.h> |
18 | #include <linux/iommu.h> | 18 | #include <linux/iommu.h> |
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/kmemleak.h> | ||
20 | #include <linux/list.h> | 21 | #include <linux/list.h> |
21 | #include <linux/of.h> | 22 | #include <linux/of.h> |
22 | #include <linux/of_iommu.h> | 23 | #include <linux/of_iommu.h> |
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 542930cd183d..5a96fd14ac22 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/io.h> | 25 | #include <linux/io.h> |
26 | #include <linux/iommu.h> | 26 | #include <linux/iommu.h> |
27 | #include <linux/iopoll.h> | 27 | #include <linux/iopoll.h> |
28 | #include <linux/kmemleak.h> | ||
29 | #include <linux/list.h> | 28 | #include <linux/list.h> |
30 | #include <linux/of_address.h> | 29 | #include <linux/of_address.h> |
31 | #include <linux/of_iommu.h> | 30 | #include <linux/of_iommu.h> |
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1b4af54a4968..30371274409d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/of_net.h> | 35 | #include <linux/of_net.h> |
36 | #include <linux/of_device.h> | 36 | #include <linux/of_device.h> |
37 | #include <linux/if_vlan.h> | 37 | #include <linux/if_vlan.h> |
38 | #include <linux/kmemleak.h> | ||
38 | 39 | ||
39 | #include <linux/pinctrl/consumer.h> | 40 | #include <linux/pinctrl/consumer.h> |
40 | 41 | ||
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 2437422625bf..57bb8f049e59 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "efuse.h" | 31 | #include "efuse.h" |
32 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
33 | #include <linux/export.h> | 33 | #include <linux/export.h> |
34 | #include <linux/kmemleak.h> | ||
35 | #include <linux/module.h> | 34 | #include <linux/module.h> |
36 | 35 | ||
37 | MODULE_AUTHOR("lizhaoming <chaoming_li@realsil.com.cn>"); | 36 | MODULE_AUTHOR("lizhaoming <chaoming_li@realsil.com.cn>"); |
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c index 015476e3f7e5..f3bff66e85d0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include "../rtl8192ce/def.h" | 32 | #include "../rtl8192ce/def.h" |
33 | #include "fw_common.h" | 33 | #include "fw_common.h" |
34 | #include <linux/export.h> | 34 | #include <linux/export.h> |
35 | #include <linux/kmemleak.h> | ||
36 | 35 | ||
37 | static void _rtl92c_enable_fw_download(struct ieee80211_hw *hw, bool enable) | 36 | static void _rtl92c_enable_fw_download(struct ieee80211_hw *hw, bool enable) |
38 | { | 37 | { |
diff --git a/drivers/staging/rtl8188eu/hal/fw.c b/drivers/staging/rtl8188eu/hal/fw.c index 03d091bad13a..6b67b38a6a9f 100644 --- a/drivers/staging/rtl8188eu/hal/fw.c +++ b/drivers/staging/rtl8188eu/hal/fw.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include "rtl8188e_hal.h" | 30 | #include "rtl8188e_hal.h" |
31 | 31 | ||
32 | #include <linux/firmware.h> | 32 | #include <linux/firmware.h> |
33 | #include <linux/kmemleak.h> | 33 | #include <linux/slab.h> |
34 | 34 | ||
35 | static void _rtl88e_enable_fw_download(struct adapter *adapt, bool enable) | 35 | static void _rtl88e_enable_fw_download(struct adapter *adapt, bool enable) |
36 | { | 36 | { |
diff --git a/drivers/staging/rtlwifi/pci.c b/drivers/staging/rtlwifi/pci.c index 70a64a5f564a..d56810eabde7 100644 --- a/drivers/staging/rtlwifi/pci.c +++ b/drivers/staging/rtlwifi/pci.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "efuse.h" | 31 | #include "efuse.h" |
32 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
33 | #include <linux/export.h> | 33 | #include <linux/export.h> |
34 | #include <linux/kmemleak.h> | ||
35 | #include <linux/module.h> | 34 | #include <linux/module.h> |
36 | 35 | ||
37 | MODULE_AUTHOR("lizhaoming <chaoming_li@realsil.com.cn>"); | 36 | MODULE_AUTHOR("lizhaoming <chaoming_li@realsil.com.cn>"); |
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 71458f493cf8..21d464a29cf8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
26 | #include <linux/kmemleak.h> | ||
27 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
28 | #include <xen/xen.h> | 27 | #include <xen/xen.h> |
29 | 28 | ||
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 8fb89ddc6cc7..e622f0f10502 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -292,6 +292,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
292 | #ifdef CONFIG_9P_FSCACHE | 292 | #ifdef CONFIG_9P_FSCACHE |
293 | kfree(v9ses->cachetag); | 293 | kfree(v9ses->cachetag); |
294 | v9ses->cachetag = match_strdup(&args[0]); | 294 | v9ses->cachetag = match_strdup(&args[0]); |
295 | if (!v9ses->cachetag) { | ||
296 | ret = -ENOMEM; | ||
297 | goto free_and_return; | ||
298 | } | ||
295 | #endif | 299 | #endif |
296 | break; | 300 | break; |
297 | case Opt_cache: | 301 | case Opt_cache: |
@@ -471,6 +475,9 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
471 | return fid; | 475 | return fid; |
472 | 476 | ||
473 | err_clnt: | 477 | err_clnt: |
478 | #ifdef CONFIG_9P_FSCACHE | ||
479 | kfree(v9ses->cachetag); | ||
480 | #endif | ||
474 | p9_client_destroy(v9ses->clnt); | 481 | p9_client_destroy(v9ses->clnt); |
475 | err_names: | 482 | err_names: |
476 | kfree(v9ses->uname); | 483 | kfree(v9ses->uname); |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bdabb2765d1b..9ee534159cc6 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -579,6 +579,24 @@ static int v9fs_at_to_dotl_flags(int flags) | |||
579 | } | 579 | } |
580 | 580 | ||
581 | /** | 581 | /** |
582 | * v9fs_dec_count - helper functon to drop i_nlink. | ||
583 | * | ||
584 | * If a directory had nlink <= 2 (including . and ..), then we should not drop | ||
585 | * the link count, which indicates the underlying exported fs doesn't maintain | ||
586 | * nlink accurately. e.g. | ||
587 | * - overlayfs sets nlink to 1 for merged dir | ||
588 | * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more | ||
589 | * than EXT4_LINK_MAX (65000) links. | ||
590 | * | ||
591 | * @inode: inode whose nlink is being dropped | ||
592 | */ | ||
593 | static void v9fs_dec_count(struct inode *inode) | ||
594 | { | ||
595 | if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) | ||
596 | drop_nlink(inode); | ||
597 | } | ||
598 | |||
599 | /** | ||
582 | * v9fs_remove - helper function to remove files and directories | 600 | * v9fs_remove - helper function to remove files and directories |
583 | * @dir: directory inode that is being deleted | 601 | * @dir: directory inode that is being deleted |
584 | * @dentry: dentry that is being deleted | 602 | * @dentry: dentry that is being deleted |
@@ -621,9 +639,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) | |||
621 | */ | 639 | */ |
622 | if (flags & AT_REMOVEDIR) { | 640 | if (flags & AT_REMOVEDIR) { |
623 | clear_nlink(inode); | 641 | clear_nlink(inode); |
624 | drop_nlink(dir); | 642 | v9fs_dec_count(dir); |
625 | } else | 643 | } else |
626 | drop_nlink(inode); | 644 | v9fs_dec_count(inode); |
627 | 645 | ||
628 | v9fs_invalidate_inode_attr(inode); | 646 | v9fs_invalidate_inode_attr(inode); |
629 | v9fs_invalidate_inode_attr(dir); | 647 | v9fs_invalidate_inode_attr(dir); |
@@ -1024,12 +1042,12 @@ clunk_newdir: | |||
1024 | if (S_ISDIR(new_inode->i_mode)) | 1042 | if (S_ISDIR(new_inode->i_mode)) |
1025 | clear_nlink(new_inode); | 1043 | clear_nlink(new_inode); |
1026 | else | 1044 | else |
1027 | drop_nlink(new_inode); | 1045 | v9fs_dec_count(new_inode); |
1028 | } | 1046 | } |
1029 | if (S_ISDIR(old_inode->i_mode)) { | 1047 | if (S_ISDIR(old_inode->i_mode)) { |
1030 | if (!new_inode) | 1048 | if (!new_inode) |
1031 | inc_nlink(new_dir); | 1049 | inc_nlink(new_dir); |
1032 | drop_nlink(old_dir); | 1050 | v9fs_dec_count(old_dir); |
1033 | } | 1051 | } |
1034 | v9fs_invalidate_inode_attr(old_inode); | 1052 | v9fs_invalidate_inode_attr(old_inode); |
1035 | v9fs_invalidate_inode_attr(old_dir); | 1053 | v9fs_invalidate_inode_attr(old_dir); |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index af03c2a901eb..48ce50484e80 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -94,7 +94,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
94 | if (v9ses->cache) | 94 | if (v9ses->cache) |
95 | sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; | 95 | sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; |
96 | 96 | ||
97 | sb->s_flags |= SB_ACTIVE | SB_DIRSYNC | SB_NOATIME; | 97 | sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; |
98 | if (!v9ses->cache) | 98 | if (!v9ses->cache) |
99 | sb->s_flags |= SB_SYNCHRONOUS; | 99 | sb->s_flags |= SB_SYNCHRONOUS; |
100 | 100 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index fe09ef9c21f3..7a506c55a993 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1324,7 +1324,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) | |||
1324 | * @bdev: struct bdev to adjust. | 1324 | * @bdev: struct bdev to adjust. |
1325 | * | 1325 | * |
1326 | * This routine checks to see if the bdev size does not match the disk size | 1326 | * This routine checks to see if the bdev size does not match the disk size |
1327 | * and adjusts it if it differs. | 1327 | * and adjusts it if it differs. When shrinking the bdev size, its all caches |
1328 | * are freed. | ||
1328 | */ | 1329 | */ |
1329 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | 1330 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) |
1330 | { | 1331 | { |
@@ -1337,7 +1338,8 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | |||
1337 | "%s: detected capacity change from %lld to %lld\n", | 1338 | "%s: detected capacity change from %lld to %lld\n", |
1338 | disk->disk_name, bdev_size, disk_size); | 1339 | disk->disk_name, bdev_size, disk_size); |
1339 | i_size_write(bdev->bd_inode, disk_size); | 1340 | i_size_write(bdev->bd_inode, disk_size); |
1340 | flush_disk(bdev, false); | 1341 | if (bdev_size > disk_size) |
1342 | flush_disk(bdev, false); | ||
1341 | } | 1343 | } |
1342 | } | 1344 | } |
1343 | EXPORT_SYMBOL(check_disk_size_change); | 1345 | EXPORT_SYMBOL(check_disk_size_change); |
diff --git a/fs/buffer.c b/fs/buffer.c index 9a73924db22f..ec5dd39071e6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1511,7 +1511,7 @@ void block_invalidatepage(struct page *page, unsigned int offset, | |||
1511 | * The get_block cached value has been unconditionally invalidated, | 1511 | * The get_block cached value has been unconditionally invalidated, |
1512 | * so real IO is not possible anymore. | 1512 | * so real IO is not possible anymore. |
1513 | */ | 1513 | */ |
1514 | if (offset == 0) | 1514 | if (length == PAGE_SIZE) |
1515 | try_to_release_page(page, 0); | 1515 | try_to_release_page(page, 0); |
1516 | out: | 1516 | out: |
1517 | return; | 1517 | return; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index ba12ee659673..874607bb6e02 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1177,9 +1177,9 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, | |||
1177 | unsigned blkbits = i_blkbits; | 1177 | unsigned blkbits = i_blkbits; |
1178 | unsigned blocksize_mask = (1 << blkbits) - 1; | 1178 | unsigned blocksize_mask = (1 << blkbits) - 1; |
1179 | ssize_t retval = -EINVAL; | 1179 | ssize_t retval = -EINVAL; |
1180 | size_t count = iov_iter_count(iter); | 1180 | const size_t count = iov_iter_count(iter); |
1181 | loff_t offset = iocb->ki_pos; | 1181 | loff_t offset = iocb->ki_pos; |
1182 | loff_t end = offset + count; | 1182 | const loff_t end = offset + count; |
1183 | struct dio *dio; | 1183 | struct dio *dio; |
1184 | struct dio_submit sdio = { 0, }; | 1184 | struct dio_submit sdio = { 0, }; |
1185 | struct buffer_head map_bh = { 0, }; | 1185 | struct buffer_head map_bh = { 0, }; |
@@ -1200,7 +1200,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, | |||
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | /* watch out for a 0 len io from a tricksy fs */ | 1202 | /* watch out for a 0 len io from a tricksy fs */ |
1203 | if (iov_iter_rw(iter) == READ && !iov_iter_count(iter)) | 1203 | if (iov_iter_rw(iter) == READ && !count) |
1204 | return 0; | 1204 | return 0; |
1205 | 1205 | ||
1206 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); | 1206 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); |
@@ -1315,8 +1315,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, | |||
1315 | 1315 | ||
1316 | dio->should_dirty = (iter->type == ITER_IOVEC); | 1316 | dio->should_dirty = (iter->type == ITER_IOVEC); |
1317 | sdio.iter = iter; | 1317 | sdio.iter = iter; |
1318 | sdio.final_block_in_request = | 1318 | sdio.final_block_in_request = end >> blkbits; |
1319 | (offset + iov_iter_count(iter)) >> blkbits; | ||
1320 | 1319 | ||
1321 | /* | 1320 | /* |
1322 | * In case of non-aligned buffers, we may need 2 more | 1321 | * In case of non-aligned buffers, we may need 2 more |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9a254dcc0e7..d508c7844681 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -138,10 +138,14 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
138 | 138 | ||
139 | /* | 139 | /* |
140 | * page based offset in vm_pgoff could be sufficiently large to | 140 | * page based offset in vm_pgoff could be sufficiently large to |
141 | * overflow a (l)off_t when converted to byte offset. | 141 | * overflow a loff_t when converted to byte offset. This can |
142 | * only happen on architectures where sizeof(loff_t) == | ||
143 | * sizeof(unsigned long). So, only check in those instances. | ||
142 | */ | 144 | */ |
143 | if (vma->vm_pgoff & PGOFF_LOFFT_MAX) | 145 | if (sizeof(unsigned long) == sizeof(loff_t)) { |
144 | return -EINVAL; | 146 | if (vma->vm_pgoff & PGOFF_LOFFT_MAX) |
147 | return -EINVAL; | ||
148 | } | ||
145 | 149 | ||
146 | /* must be huge page aligned */ | 150 | /* must be huge page aligned */ |
147 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) | 151 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9a876bb07cac..0f157bbd3e0f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -7119,7 +7119,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
7119 | goto out_commit; | 7119 | goto out_commit; |
7120 | did_quota = 1; | 7120 | did_quota = 1; |
7121 | 7121 | ||
7122 | data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; | 7122 | data_ac->ac_resv = &oi->ip_la_data_resv; |
7123 | 7123 | ||
7124 | ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, | 7124 | ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, |
7125 | &num); | 7125 | &num); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e8e205bf2e41..302cd7caa4a7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -346,7 +346,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
346 | unlock = 0; | 346 | unlock = 0; |
347 | 347 | ||
348 | out_alloc: | 348 | out_alloc: |
349 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 349 | up_read(&oi->ip_alloc_sem); |
350 | out_inode_unlock: | 350 | out_inode_unlock: |
351 | ocfs2_inode_unlock(inode, 0); | 351 | ocfs2_inode_unlock(inode, 0); |
352 | out: | 352 | out: |
@@ -2213,7 +2213,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, | |||
2213 | down_write(&oi->ip_alloc_sem); | 2213 | down_write(&oi->ip_alloc_sem); |
2214 | 2214 | ||
2215 | if (first_get_block) { | 2215 | if (first_get_block) { |
2216 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 2216 | if (ocfs2_sparse_alloc(osb)) |
2217 | ret = ocfs2_zero_tail(inode, di_bh, pos); | 2217 | ret = ocfs2_zero_tail(inode, di_bh, pos); |
2218 | else | 2218 | else |
2219 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, | 2219 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 8614ff069d99..3494a62ed749 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -78,7 +78,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) | |||
78 | /* | 78 | /* |
79 | * Using a named enum representing lock types in terms of #N bit stored in | 79 | * Using a named enum representing lock types in terms of #N bit stored in |
80 | * iocb->private, which is going to be used for communication between | 80 | * iocb->private, which is going to be used for communication between |
81 | * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). | 81 | * ocfs2_dio_end_io() and ocfs2_file_write/read_iter(). |
82 | */ | 82 | */ |
83 | enum ocfs2_iocb_lock_bits { | 83 | enum ocfs2_iocb_lock_bits { |
84 | OCFS2_IOCB_RW_LOCK = 0, | 84 | OCFS2_IOCB_RW_LOCK = 0, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ea8c551bcd7e..91a8889abf9b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -570,7 +570,16 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, | |||
570 | current_page, vec_len, vec_start); | 570 | current_page, vec_len, vec_start); |
571 | 571 | ||
572 | len = bio_add_page(bio, page, vec_len, vec_start); | 572 | len = bio_add_page(bio, page, vec_len, vec_start); |
573 | if (len != vec_len) break; | 573 | if (len != vec_len) { |
574 | mlog(ML_ERROR, "Adding page[%d] to bio failed, " | ||
575 | "page %p, len %d, vec_len %u, vec_start %u, " | ||
576 | "bi_sector %llu\n", current_page, page, len, | ||
577 | vec_len, vec_start, | ||
578 | (unsigned long long)bio->bi_iter.bi_sector); | ||
579 | bio_put(bio); | ||
580 | bio = ERR_PTR(-EIO); | ||
581 | return bio; | ||
582 | } | ||
574 | 583 | ||
575 | cs += vec_len / (PAGE_SIZE/spp); | 584 | cs += vec_len / (PAGE_SIZE/spp); |
576 | vec_start = 0; | 585 | vec_start = 0; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 977763d4c27d..b048d4fa3959 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3072,7 +3072,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3072 | * We need to return the correct block within the | 3072 | * We need to return the correct block within the |
3073 | * cluster which should hold our entry. | 3073 | * cluster which should hold our entry. |
3074 | */ | 3074 | */ |
3075 | off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), | 3075 | off = ocfs2_dx_dir_hash_idx(osb, |
3076 | &lookup->dl_hinfo); | 3076 | &lookup->dl_hinfo); |
3077 | get_bh(dx_leaves[off]); | 3077 | get_bh(dx_leaves[off]); |
3078 | lookup->dl_dx_leaf_bh = dx_leaves[off]; | 3078 | lookup->dl_dx_leaf_bh = dx_leaves[off]; |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fd6bbbbd7d78..39831fc2fd52 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -224,14 +224,12 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
224 | struct dlm_lock *lock) | 224 | struct dlm_lock *lock) |
225 | { | 225 | { |
226 | dlm_astlockfunc_t *fn; | 226 | dlm_astlockfunc_t *fn; |
227 | struct dlm_lockstatus *lksb; | ||
228 | 227 | ||
229 | mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name, | 228 | mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name, |
230 | res->lockname.len, res->lockname.name, | 229 | res->lockname.len, res->lockname.name, |
231 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | 230 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
232 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); | 231 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie))); |
233 | 232 | ||
234 | lksb = lock->lksb; | ||
235 | fn = lock->ast; | 233 | fn = lock->ast; |
236 | BUG_ON(lock->ml.node != dlm->node_num); | 234 | BUG_ON(lock->ml.node != dlm->node_num); |
237 | 235 | ||
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e9f3705c4c9f..d06e27ec4be4 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -140,6 +140,7 @@ struct dlm_ctxt | |||
140 | u8 node_num; | 140 | u8 node_num; |
141 | u32 key; | 141 | u32 key; |
142 | u8 joining_node; | 142 | u8 joining_node; |
143 | u8 migrate_done; /* set to 1 means node has migrated all lock resources */ | ||
143 | wait_queue_head_t dlm_join_events; | 144 | wait_queue_head_t dlm_join_events; |
144 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 145 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
145 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 146 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
@@ -960,13 +961,10 @@ static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm, | |||
960 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res); | 961 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res); |
961 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res); | 962 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res); |
962 | 963 | ||
963 | u8 dlm_nm_this_node(struct dlm_ctxt *dlm); | ||
964 | void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); | 964 | void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
965 | void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); | 965 | void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
966 | 966 | ||
967 | 967 | ||
968 | int dlm_nm_init(struct dlm_ctxt *dlm); | ||
969 | int dlm_heartbeat_init(struct dlm_ctxt *dlm); | ||
970 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); | 968 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); |
971 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); | 969 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); |
972 | 970 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index e1fea149f50b..425081be6161 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -461,6 +461,19 @@ redo_bucket: | |||
461 | cond_resched_lock(&dlm->spinlock); | 461 | cond_resched_lock(&dlm->spinlock); |
462 | num += n; | 462 | num += n; |
463 | } | 463 | } |
464 | |||
465 | if (!num) { | ||
466 | if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { | ||
467 | mlog(0, "%s: perhaps there are more lock resources " | ||
468 | "need to be migrated after dlm recovery\n", dlm->name); | ||
469 | ret = -EAGAIN; | ||
470 | } else { | ||
471 | mlog(0, "%s: we won't do dlm recovery after migrating " | ||
472 | "all lock resources\n", dlm->name); | ||
473 | dlm->migrate_done = 1; | ||
474 | } | ||
475 | } | ||
476 | |||
464 | spin_unlock(&dlm->spinlock); | 477 | spin_unlock(&dlm->spinlock); |
465 | wake_up(&dlm->dlm_thread_wq); | 478 | wake_up(&dlm->dlm_thread_wq); |
466 | 479 | ||
@@ -675,20 +688,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) | |||
675 | spin_unlock(&dlm->spinlock); | 688 | spin_unlock(&dlm->spinlock); |
676 | } | 689 | } |
677 | 690 | ||
678 | int dlm_shutting_down(struct dlm_ctxt *dlm) | ||
679 | { | ||
680 | int ret = 0; | ||
681 | |||
682 | spin_lock(&dlm_domain_lock); | ||
683 | |||
684 | if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) | ||
685 | ret = 1; | ||
686 | |||
687 | spin_unlock(&dlm_domain_lock); | ||
688 | |||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | void dlm_unregister_domain(struct dlm_ctxt *dlm) | 691 | void dlm_unregister_domain(struct dlm_ctxt *dlm) |
693 | { | 692 | { |
694 | int leave = 0; | 693 | int leave = 0; |
@@ -2052,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
2052 | dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; | 2051 | dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; |
2053 | init_waitqueue_head(&dlm->dlm_join_events); | 2052 | init_waitqueue_head(&dlm->dlm_join_events); |
2054 | 2053 | ||
2054 | dlm->migrate_done = 0; | ||
2055 | |||
2055 | dlm->reco.new_master = O2NM_INVALID_NODE_NUM; | 2056 | dlm->reco.new_master = O2NM_INVALID_NODE_NUM; |
2056 | dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; | 2057 | dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; |
2057 | 2058 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h index fd6122a38dbd..8a9281411c18 100644 --- a/fs/ocfs2/dlm/dlmdomain.h +++ b/fs/ocfs2/dlm/dlmdomain.h | |||
@@ -28,7 +28,30 @@ | |||
28 | extern spinlock_t dlm_domain_lock; | 28 | extern spinlock_t dlm_domain_lock; |
29 | extern struct list_head dlm_domains; | 29 | extern struct list_head dlm_domains; |
30 | 30 | ||
31 | int dlm_shutting_down(struct dlm_ctxt *dlm); | 31 | static inline int dlm_joined(struct dlm_ctxt *dlm) |
32 | { | ||
33 | int ret = 0; | ||
34 | |||
35 | spin_lock(&dlm_domain_lock); | ||
36 | if (dlm->dlm_state == DLM_CTXT_JOINED) | ||
37 | ret = 1; | ||
38 | spin_unlock(&dlm_domain_lock); | ||
39 | |||
40 | return ret; | ||
41 | } | ||
42 | |||
43 | static inline int dlm_shutting_down(struct dlm_ctxt *dlm) | ||
44 | { | ||
45 | int ret = 0; | ||
46 | |||
47 | spin_lock(&dlm_domain_lock); | ||
48 | if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN) | ||
49 | ret = 1; | ||
50 | spin_unlock(&dlm_domain_lock); | ||
51 | |||
52 | return ret; | ||
53 | } | ||
54 | |||
32 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, | 55 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, |
33 | int node_num); | 56 | int node_num); |
34 | 57 | ||
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 66c2a491f68d..74962315794e 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -77,8 +77,7 @@ int dlm_init_lock_cache(void) | |||
77 | 77 | ||
78 | void dlm_destroy_lock_cache(void) | 78 | void dlm_destroy_lock_cache(void) |
79 | { | 79 | { |
80 | if (dlm_lock_cache) | 80 | kmem_cache_destroy(dlm_lock_cache); |
81 | kmem_cache_destroy(dlm_lock_cache); | ||
82 | } | 81 | } |
83 | 82 | ||
84 | /* Tell us whether we can grant a new lock request. | 83 | /* Tell us whether we can grant a new lock request. |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a7df226f9449..aaca0949fe53 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -414,8 +414,7 @@ int dlm_init_mle_cache(void) | |||
414 | 414 | ||
415 | void dlm_destroy_mle_cache(void) | 415 | void dlm_destroy_mle_cache(void) |
416 | { | 416 | { |
417 | if (dlm_mle_cache) | 417 | kmem_cache_destroy(dlm_mle_cache); |
418 | kmem_cache_destroy(dlm_mle_cache); | ||
419 | } | 418 | } |
420 | 419 | ||
421 | static void dlm_mle_release(struct kref *kref) | 420 | static void dlm_mle_release(struct kref *kref) |
@@ -472,15 +471,11 @@ bail: | |||
472 | 471 | ||
473 | void dlm_destroy_master_caches(void) | 472 | void dlm_destroy_master_caches(void) |
474 | { | 473 | { |
475 | if (dlm_lockname_cache) { | 474 | kmem_cache_destroy(dlm_lockname_cache); |
476 | kmem_cache_destroy(dlm_lockname_cache); | 475 | dlm_lockname_cache = NULL; |
477 | dlm_lockname_cache = NULL; | ||
478 | } | ||
479 | 476 | ||
480 | if (dlm_lockres_cache) { | 477 | kmem_cache_destroy(dlm_lockres_cache); |
481 | kmem_cache_destroy(dlm_lockres_cache); | 478 | dlm_lockres_cache = NULL; |
482 | dlm_lockres_cache = NULL; | ||
483 | } | ||
484 | } | 479 | } |
485 | 480 | ||
486 | static void dlm_lockres_release(struct kref *kref) | 481 | static void dlm_lockres_release(struct kref *kref) |
@@ -2495,13 +2490,13 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
2495 | } | 2490 | } |
2496 | 2491 | ||
2497 | /* | 2492 | /* |
2498 | * A migrateable resource is one that is : | 2493 | * A migratable resource is one that is : |
2499 | * 1. locally mastered, and, | 2494 | * 1. locally mastered, and, |
2500 | * 2. zero local locks, and, | 2495 | * 2. zero local locks, and, |
2501 | * 3. one or more non-local locks, or, one or more references | 2496 | * 3. one or more non-local locks, or, one or more references |
2502 | * Returns 1 if yes, 0 if not. | 2497 | * Returns 1 if yes, 0 if not. |
2503 | */ | 2498 | */ |
2504 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | 2499 | static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm, |
2505 | struct dlm_lock_resource *res) | 2500 | struct dlm_lock_resource *res) |
2506 | { | 2501 | { |
2507 | enum dlm_lockres_list idx; | 2502 | enum dlm_lockres_list idx; |
@@ -2532,7 +2527,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2532 | continue; | 2527 | continue; |
2533 | } | 2528 | } |
2534 | cookie = be64_to_cpu(lock->ml.cookie); | 2529 | cookie = be64_to_cpu(lock->ml.cookie); |
2535 | mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " | 2530 | mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on " |
2536 | "%s list\n", dlm->name, res->lockname.len, | 2531 | "%s list\n", dlm->name, res->lockname.len, |
2537 | res->lockname.name, | 2532 | res->lockname.name, |
2538 | dlm_get_lock_cookie_node(cookie), | 2533 | dlm_get_lock_cookie_node(cookie), |
@@ -2548,7 +2543,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2548 | return 0; | 2543 | return 0; |
2549 | } | 2544 | } |
2550 | 2545 | ||
2551 | mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, | 2546 | mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len, |
2552 | res->lockname.name); | 2547 | res->lockname.name); |
2553 | 2548 | ||
2554 | return 1; | 2549 | return 1; |
@@ -2792,7 +2787,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
2792 | assert_spin_locked(&dlm->spinlock); | 2787 | assert_spin_locked(&dlm->spinlock); |
2793 | 2788 | ||
2794 | spin_lock(&res->spinlock); | 2789 | spin_lock(&res->spinlock); |
2795 | if (dlm_is_lockres_migrateable(dlm, res)) | 2790 | if (dlm_is_lockres_migratable(dlm, res)) |
2796 | target = dlm_pick_migration_target(dlm, res); | 2791 | target = dlm_pick_migration_target(dlm, res); |
2797 | spin_unlock(&res->spinlock); | 2792 | spin_unlock(&res->spinlock); |
2798 | 2793 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ec8f75813beb..802636d50365 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -62,7 +62,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node); | |||
62 | static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); | 62 | static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); |
63 | static int dlm_request_all_locks(struct dlm_ctxt *dlm, | 63 | static int dlm_request_all_locks(struct dlm_ctxt *dlm, |
64 | u8 request_from, u8 dead_node); | 64 | u8 request_from, u8 dead_node); |
65 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node); | 65 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm); |
66 | 66 | ||
67 | static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res); | 67 | static inline int dlm_num_locks_in_lockres(struct dlm_lock_resource *res); |
68 | static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, | 68 | static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, |
@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm) | |||
423 | 423 | ||
424 | static void dlm_begin_recovery(struct dlm_ctxt *dlm) | 424 | static void dlm_begin_recovery(struct dlm_ctxt *dlm) |
425 | { | 425 | { |
426 | spin_lock(&dlm->spinlock); | 426 | assert_spin_locked(&dlm->spinlock); |
427 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); | 427 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); |
428 | printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", | 428 | printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", |
429 | dlm->name, dlm->reco.dead_node); | 429 | dlm->name, dlm->reco.dead_node); |
430 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; | 430 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; |
431 | spin_unlock(&dlm->spinlock); | ||
432 | } | 431 | } |
433 | 432 | ||
434 | static void dlm_end_recovery(struct dlm_ctxt *dlm) | 433 | static void dlm_end_recovery(struct dlm_ctxt *dlm) |
@@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
456 | 455 | ||
457 | spin_lock(&dlm->spinlock); | 456 | spin_lock(&dlm->spinlock); |
458 | 457 | ||
458 | if (dlm->migrate_done) { | ||
459 | mlog(0, "%s: no need do recovery after migrating all " | ||
460 | "lock resources\n", dlm->name); | ||
461 | spin_unlock(&dlm->spinlock); | ||
462 | return 0; | ||
463 | } | ||
464 | |||
459 | /* check to see if the new master has died */ | 465 | /* check to see if the new master has died */ |
460 | if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && | 466 | if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && |
461 | test_bit(dlm->reco.new_master, dlm->recovery_map)) { | 467 | test_bit(dlm->reco.new_master, dlm->recovery_map)) { |
@@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
490 | mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", | 496 | mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", |
491 | dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), | 497 | dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), |
492 | dlm->reco.dead_node); | 498 | dlm->reco.dead_node); |
493 | spin_unlock(&dlm->spinlock); | ||
494 | 499 | ||
495 | /* take write barrier */ | 500 | /* take write barrier */ |
496 | /* (stops the list reshuffling thread, proxy ast handling) */ | 501 | /* (stops the list reshuffling thread, proxy ast handling) */ |
497 | dlm_begin_recovery(dlm); | 502 | dlm_begin_recovery(dlm); |
498 | 503 | ||
504 | spin_unlock(&dlm->spinlock); | ||
505 | |||
499 | if (dlm->reco.new_master == dlm->node_num) | 506 | if (dlm->reco.new_master == dlm->node_num) |
500 | goto master_here; | 507 | goto master_here; |
501 | 508 | ||
@@ -739,7 +746,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
739 | } | 746 | } |
740 | 747 | ||
741 | if (destroy) | 748 | if (destroy) |
742 | dlm_destroy_recovery_area(dlm, dead_node); | 749 | dlm_destroy_recovery_area(dlm); |
743 | 750 | ||
744 | return status; | 751 | return status; |
745 | } | 752 | } |
@@ -764,7 +771,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | |||
764 | 771 | ||
765 | ndata = kzalloc(sizeof(*ndata), GFP_NOFS); | 772 | ndata = kzalloc(sizeof(*ndata), GFP_NOFS); |
766 | if (!ndata) { | 773 | if (!ndata) { |
767 | dlm_destroy_recovery_area(dlm, dead_node); | 774 | dlm_destroy_recovery_area(dlm); |
768 | return -ENOMEM; | 775 | return -ENOMEM; |
769 | } | 776 | } |
770 | ndata->node_num = num; | 777 | ndata->node_num = num; |
@@ -778,7 +785,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | |||
778 | return 0; | 785 | return 0; |
779 | } | 786 | } |
780 | 787 | ||
781 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) | 788 | static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm) |
782 | { | 789 | { |
783 | struct dlm_reco_node_data *ndata, *next; | 790 | struct dlm_reco_node_data *ndata, *next; |
784 | LIST_HEAD(tmplist); | 791 | LIST_HEAD(tmplist); |
@@ -1378,6 +1385,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1378 | if (!dlm_grab(dlm)) | 1385 | if (!dlm_grab(dlm)) |
1379 | return -EINVAL; | 1386 | return -EINVAL; |
1380 | 1387 | ||
1388 | if (!dlm_joined(dlm)) { | ||
1389 | mlog(ML_ERROR, "Domain %s not joined! " | ||
1390 | "lockres %.*s, master %u\n", | ||
1391 | dlm->name, mres->lockname_len, | ||
1392 | mres->lockname, mres->master); | ||
1393 | dlm_put(dlm); | ||
1394 | return -EINVAL; | ||
1395 | } | ||
1396 | |||
1381 | BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION))); | 1397 | BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION))); |
1382 | 1398 | ||
1383 | real_master = mres->master; | 1399 | real_master = mres->master; |
@@ -1807,7 +1823,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1807 | int i, j, bad; | 1823 | int i, j, bad; |
1808 | struct dlm_lock *lock; | 1824 | struct dlm_lock *lock; |
1809 | u8 from = O2NM_MAX_NODES; | 1825 | u8 from = O2NM_MAX_NODES; |
1810 | unsigned int added = 0; | ||
1811 | __be64 c; | 1826 | __be64 c; |
1812 | 1827 | ||
1813 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); | 1828 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); |
@@ -1823,7 +1838,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1823 | spin_lock(&res->spinlock); | 1838 | spin_lock(&res->spinlock); |
1824 | dlm_lockres_set_refmap_bit(dlm, res, from); | 1839 | dlm_lockres_set_refmap_bit(dlm, res, from); |
1825 | spin_unlock(&res->spinlock); | 1840 | spin_unlock(&res->spinlock); |
1826 | added++; | ||
1827 | break; | 1841 | break; |
1828 | } | 1842 | } |
1829 | BUG_ON(ml->highest_blocked != LKM_IVMODE); | 1843 | BUG_ON(ml->highest_blocked != LKM_IVMODE); |
@@ -1911,7 +1925,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1911 | /* do not alter lock refcount. switching lists. */ | 1925 | /* do not alter lock refcount. switching lists. */ |
1912 | list_move_tail(&lock->list, queue); | 1926 | list_move_tail(&lock->list, queue); |
1913 | spin_unlock(&res->spinlock); | 1927 | spin_unlock(&res->spinlock); |
1914 | added++; | ||
1915 | 1928 | ||
1916 | mlog(0, "just reordered a local lock!\n"); | 1929 | mlog(0, "just reordered a local lock!\n"); |
1917 | continue; | 1930 | continue; |
@@ -2037,7 +2050,6 @@ skip_lvb: | |||
2037 | "setting refmap bit\n", dlm->name, | 2050 | "setting refmap bit\n", dlm->name, |
2038 | res->lockname.len, res->lockname.name, ml->node); | 2051 | res->lockname.len, res->lockname.name, ml->node); |
2039 | dlm_lockres_set_refmap_bit(dlm, res, ml->node); | 2052 | dlm_lockres_set_refmap_bit(dlm, res, ml->node); |
2040 | added++; | ||
2041 | } | 2053 | } |
2042 | spin_unlock(&res->spinlock); | 2054 | spin_unlock(&res->spinlock); |
2043 | } | 2055 | } |
@@ -2331,13 +2343,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2331 | __dlm_dirty_lockres(dlm, res); | 2343 | __dlm_dirty_lockres(dlm, res); |
2332 | } | 2344 | } |
2333 | 2345 | ||
2334 | /* if this node is the recovery master, and there are no | ||
2335 | * locks for a given lockres owned by this node that are in | ||
2336 | * either PR or EX mode, zero out the lvb before requesting. | ||
2337 | * | ||
2338 | */ | ||
2339 | |||
2340 | |||
2341 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | 2346 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) |
2342 | { | 2347 | { |
2343 | struct dlm_lock_resource *res; | 2348 | struct dlm_lock_resource *res; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index b552d1f8508c..97a972efab83 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -1756,8 +1756,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
1756 | 1756 | ||
1757 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; | 1757 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
1758 | 1758 | ||
1759 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 1759 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); |
1760 | 0); | ||
1761 | if (status < 0) | 1760 | if (status < 0) |
1762 | mlog_errno(status); | 1761 | mlog_errno(status); |
1763 | 1762 | ||
@@ -1796,7 +1795,7 @@ void ocfs2_rw_unlock(struct inode *inode, int write) | |||
1796 | write ? "EXMODE" : "PRMODE"); | 1795 | write ? "EXMODE" : "PRMODE"); |
1797 | 1796 | ||
1798 | if (!ocfs2_mount_local(osb)) | 1797 | if (!ocfs2_mount_local(osb)) |
1799 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 1798 | ocfs2_cluster_unlock(osb, lockres, level); |
1800 | } | 1799 | } |
1801 | 1800 | ||
1802 | /* | 1801 | /* |
@@ -1816,8 +1815,7 @@ int ocfs2_open_lock(struct inode *inode) | |||
1816 | 1815 | ||
1817 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1816 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
1818 | 1817 | ||
1819 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1818 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0); |
1820 | DLM_LOCK_PR, 0, 0); | ||
1821 | if (status < 0) | 1819 | if (status < 0) |
1822 | mlog_errno(status); | 1820 | mlog_errno(status); |
1823 | 1821 | ||
@@ -1854,8 +1852,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write) | |||
1854 | * other nodes and the -EAGAIN will indicate to the caller that | 1852 | * other nodes and the -EAGAIN will indicate to the caller that |
1855 | * this inode is still in use. | 1853 | * this inode is still in use. |
1856 | */ | 1854 | */ |
1857 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1855 | status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0); |
1858 | level, DLM_LKF_NOQUEUE, 0); | ||
1859 | 1856 | ||
1860 | out: | 1857 | out: |
1861 | return status; | 1858 | return status; |
@@ -1876,11 +1873,9 @@ void ocfs2_open_unlock(struct inode *inode) | |||
1876 | goto out; | 1873 | goto out; |
1877 | 1874 | ||
1878 | if(lockres->l_ro_holders) | 1875 | if(lockres->l_ro_holders) |
1879 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1876 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR); |
1880 | DLM_LOCK_PR); | ||
1881 | if(lockres->l_ex_holders) | 1877 | if(lockres->l_ex_holders) |
1882 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1878 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
1883 | DLM_LOCK_EX); | ||
1884 | 1879 | ||
1885 | out: | 1880 | out: |
1886 | return; | 1881 | return; |
@@ -2601,9 +2596,9 @@ void ocfs2_inode_unlock(struct inode *inode, | |||
2601 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2596 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2602 | ex ? "EXMODE" : "PRMODE"); | 2597 | ex ? "EXMODE" : "PRMODE"); |
2603 | 2598 | ||
2604 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | 2599 | if (!ocfs2_is_hard_readonly(osb) && |
2605 | !ocfs2_mount_local(osb)) | 2600 | !ocfs2_mount_local(osb)) |
2606 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | 2601 | ocfs2_cluster_unlock(osb, lockres, level); |
2607 | } | 2602 | } |
2608 | 2603 | ||
2609 | /* | 2604 | /* |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5d1784a365a3..6ee94bc23f5b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -101,7 +101,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
101 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 101 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
102 | 102 | ||
103 | trace_ocfs2_file_open(inode, file, file->f_path.dentry, | 103 | trace_ocfs2_file_open(inode, file, file->f_path.dentry, |
104 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 104 | (unsigned long long)oi->ip_blkno, |
105 | file->f_path.dentry->d_name.len, | 105 | file->f_path.dentry->d_name.len, |
106 | file->f_path.dentry->d_name.name, mode); | 106 | file->f_path.dentry->d_name.name, mode); |
107 | 107 | ||
@@ -116,7 +116,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) | |||
116 | /* Check that the inode hasn't been wiped from disk by another | 116 | /* Check that the inode hasn't been wiped from disk by another |
117 | * node. If it hasn't then we're safe as long as we hold the | 117 | * node. If it hasn't then we're safe as long as we hold the |
118 | * spin lock until our increment of open count. */ | 118 | * spin lock until our increment of open count. */ |
119 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 119 | if (oi->ip_flags & OCFS2_INODE_DELETED) { |
120 | spin_unlock(&oi->ip_lock); | 120 | spin_unlock(&oi->ip_lock); |
121 | 121 | ||
122 | status = -ENOENT; | 122 | status = -ENOENT; |
@@ -190,7 +190,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, | |||
190 | bool needs_barrier = false; | 190 | bool needs_barrier = false; |
191 | 191 | ||
192 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, | 192 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, |
193 | OCFS2_I(inode)->ip_blkno, | 193 | oi->ip_blkno, |
194 | file->f_path.dentry->d_name.len, | 194 | file->f_path.dentry->d_name.len, |
195 | file->f_path.dentry->d_name.name, | 195 | file->f_path.dentry->d_name.name, |
196 | (unsigned long long)datasync); | 196 | (unsigned long long)datasync); |
@@ -296,7 +296,7 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
296 | ocfs2_journal_dirty(handle, bh); | 296 | ocfs2_journal_dirty(handle, bh); |
297 | 297 | ||
298 | out_commit: | 298 | out_commit: |
299 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 299 | ocfs2_commit_trans(osb, handle); |
300 | out: | 300 | out: |
301 | return ret; | 301 | return ret; |
302 | } | 302 | } |
@@ -2257,7 +2257,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2257 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | 2257 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; |
2258 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; | 2258 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; |
2259 | 2259 | ||
2260 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, | 2260 | trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry, |
2261 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2261 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2262 | file->f_path.dentry->d_name.len, | 2262 | file->f_path.dentry->d_name.len, |
2263 | file->f_path.dentry->d_name.name, | 2263 | file->f_path.dentry->d_name.name, |
@@ -2405,7 +2405,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2405 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; | 2405 | int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; |
2406 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; | 2406 | int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; |
2407 | 2407 | ||
2408 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, | 2408 | trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry, |
2409 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2409 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2410 | filp->f_path.dentry->d_name.len, | 2410 | filp->f_path.dentry->d_name.len, |
2411 | filp->f_path.dentry->d_name.name, | 2411 | filp->f_path.dentry->d_name.name, |
@@ -2448,7 +2448,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2448 | * | 2448 | * |
2449 | * Take and drop the meta data lock to update inode fields | 2449 | * Take and drop the meta data lock to update inode fields |
2450 | * like i_size. This allows the checks down below | 2450 | * like i_size. This allows the checks down below |
2451 | * generic_file_aio_read() a chance of actually working. | 2451 | * generic_file_read_iter() a chance of actually working. |
2452 | */ | 2452 | */ |
2453 | ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level, | 2453 | ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level, |
2454 | !nowait); | 2454 | !nowait); |
@@ -2460,7 +2460,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | |||
2460 | ocfs2_inode_unlock(inode, lock_level); | 2460 | ocfs2_inode_unlock(inode, lock_level); |
2461 | 2461 | ||
2462 | ret = generic_file_read_iter(iocb, to); | 2462 | ret = generic_file_read_iter(iocb, to); |
2463 | trace_generic_file_aio_read_ret(ret); | 2463 | trace_generic_file_read_iter_ret(ret); |
2464 | 2464 | ||
2465 | /* buffered aio wouldn't have proper lock coverage today */ | 2465 | /* buffered aio wouldn't have proper lock coverage today */ |
2466 | BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); | 2466 | BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); |
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 6b92cb241138..f65f2b2f594d 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c | |||
@@ -53,36 +53,6 @@ static const char * const ocfs2_filecheck_errs[] = { | |||
53 | "UNSUPPORTED" | 53 | "UNSUPPORTED" |
54 | }; | 54 | }; |
55 | 55 | ||
56 | static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock); | ||
57 | static LIST_HEAD(ocfs2_filecheck_sysfs_list); | ||
58 | |||
59 | struct ocfs2_filecheck { | ||
60 | struct list_head fc_head; /* File check entry list head */ | ||
61 | spinlock_t fc_lock; | ||
62 | unsigned int fc_max; /* Maximum number of entry in list */ | ||
63 | unsigned int fc_size; /* Current entry count in list */ | ||
64 | unsigned int fc_done; /* Finished entry count in list */ | ||
65 | }; | ||
66 | |||
67 | struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per mounting */ | ||
68 | struct list_head fs_list; | ||
69 | atomic_t fs_count; | ||
70 | struct super_block *fs_sb; | ||
71 | struct kset *fs_devicekset; | ||
72 | struct kset *fs_fcheckkset; | ||
73 | struct ocfs2_filecheck *fs_fcheck; | ||
74 | }; | ||
75 | |||
76 | #define OCFS2_FILECHECK_MAXSIZE 100 | ||
77 | #define OCFS2_FILECHECK_MINSIZE 10 | ||
78 | |||
79 | /* File check operation type */ | ||
80 | enum { | ||
81 | OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */ | ||
82 | OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */ | ||
83 | OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */ | ||
84 | }; | ||
85 | |||
86 | struct ocfs2_filecheck_entry { | 56 | struct ocfs2_filecheck_entry { |
87 | struct list_head fe_list; | 57 | struct list_head fe_list; |
88 | unsigned long fe_ino; | 58 | unsigned long fe_ino; |
@@ -110,35 +80,84 @@ ocfs2_filecheck_error(int errno) | |||
110 | return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1]; | 80 | return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1]; |
111 | } | 81 | } |
112 | 82 | ||
113 | static ssize_t ocfs2_filecheck_show(struct kobject *kobj, | 83 | static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj, |
114 | struct kobj_attribute *attr, | 84 | struct kobj_attribute *attr, |
115 | char *buf); | 85 | char *buf); |
116 | static ssize_t ocfs2_filecheck_store(struct kobject *kobj, | 86 | static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, |
117 | struct kobj_attribute *attr, | 87 | struct kobj_attribute *attr, |
118 | const char *buf, size_t count); | 88 | const char *buf, size_t count); |
119 | static struct kobj_attribute ocfs2_attr_filecheck_chk = | 89 | static struct kobj_attribute ocfs2_filecheck_attr_chk = |
120 | __ATTR(check, S_IRUSR | S_IWUSR, | 90 | __ATTR(check, S_IRUSR | S_IWUSR, |
121 | ocfs2_filecheck_show, | 91 | ocfs2_filecheck_attr_show, |
122 | ocfs2_filecheck_store); | 92 | ocfs2_filecheck_attr_store); |
123 | static struct kobj_attribute ocfs2_attr_filecheck_fix = | 93 | static struct kobj_attribute ocfs2_filecheck_attr_fix = |
124 | __ATTR(fix, S_IRUSR | S_IWUSR, | 94 | __ATTR(fix, S_IRUSR | S_IWUSR, |
125 | ocfs2_filecheck_show, | 95 | ocfs2_filecheck_attr_show, |
126 | ocfs2_filecheck_store); | 96 | ocfs2_filecheck_attr_store); |
127 | static struct kobj_attribute ocfs2_attr_filecheck_set = | 97 | static struct kobj_attribute ocfs2_filecheck_attr_set = |
128 | __ATTR(set, S_IRUSR | S_IWUSR, | 98 | __ATTR(set, S_IRUSR | S_IWUSR, |
129 | ocfs2_filecheck_show, | 99 | ocfs2_filecheck_attr_show, |
130 | ocfs2_filecheck_store); | 100 | ocfs2_filecheck_attr_store); |
101 | static struct attribute *ocfs2_filecheck_attrs[] = { | ||
102 | &ocfs2_filecheck_attr_chk.attr, | ||
103 | &ocfs2_filecheck_attr_fix.attr, | ||
104 | &ocfs2_filecheck_attr_set.attr, | ||
105 | NULL | ||
106 | }; | ||
107 | |||
108 | static void ocfs2_filecheck_release(struct kobject *kobj) | ||
109 | { | ||
110 | struct ocfs2_filecheck_sysfs_entry *entry = container_of(kobj, | ||
111 | struct ocfs2_filecheck_sysfs_entry, fs_kobj); | ||
112 | |||
113 | complete(&entry->fs_kobj_unregister); | ||
114 | } | ||
115 | |||
116 | static ssize_t | ||
117 | ocfs2_filecheck_show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
118 | { | ||
119 | ssize_t ret = -EIO; | ||
120 | struct kobj_attribute *kattr = container_of(attr, | ||
121 | struct kobj_attribute, attr); | ||
122 | |||
123 | kobject_get(kobj); | ||
124 | if (kattr->show) | ||
125 | ret = kattr->show(kobj, kattr, buf); | ||
126 | kobject_put(kobj); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | static ssize_t | ||
131 | ocfs2_filecheck_store(struct kobject *kobj, struct attribute *attr, | ||
132 | const char *buf, size_t count) | ||
133 | { | ||
134 | ssize_t ret = -EIO; | ||
135 | struct kobj_attribute *kattr = container_of(attr, | ||
136 | struct kobj_attribute, attr); | ||
137 | |||
138 | kobject_get(kobj); | ||
139 | if (kattr->store) | ||
140 | ret = kattr->store(kobj, kattr, buf, count); | ||
141 | kobject_put(kobj); | ||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | static const struct sysfs_ops ocfs2_filecheck_ops = { | ||
146 | .show = ocfs2_filecheck_show, | ||
147 | .store = ocfs2_filecheck_store, | ||
148 | }; | ||
149 | |||
150 | static struct kobj_type ocfs2_ktype_filecheck = { | ||
151 | .default_attrs = ocfs2_filecheck_attrs, | ||
152 | .sysfs_ops = &ocfs2_filecheck_ops, | ||
153 | .release = ocfs2_filecheck_release, | ||
154 | }; | ||
131 | 155 | ||
132 | static void | 156 | static void |
133 | ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) | 157 | ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) |
134 | { | 158 | { |
135 | struct ocfs2_filecheck_entry *p; | 159 | struct ocfs2_filecheck_entry *p; |
136 | 160 | ||
137 | if (!atomic_dec_and_test(&entry->fs_count)) { | ||
138 | wait_var_event(&entry->fs_count, | ||
139 | !atomic_read(&entry->fs_count)); | ||
140 | } | ||
141 | |||
142 | spin_lock(&entry->fs_fcheck->fc_lock); | 161 | spin_lock(&entry->fs_fcheck->fc_lock); |
143 | while (!list_empty(&entry->fs_fcheck->fc_head)) { | 162 | while (!list_empty(&entry->fs_fcheck->fc_head)) { |
144 | p = list_first_entry(&entry->fs_fcheck->fc_head, | 163 | p = list_first_entry(&entry->fs_fcheck->fc_head, |
@@ -149,151 +168,48 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) | |||
149 | } | 168 | } |
150 | spin_unlock(&entry->fs_fcheck->fc_lock); | 169 | spin_unlock(&entry->fs_fcheck->fc_lock); |
151 | 170 | ||
152 | kset_unregister(entry->fs_fcheckkset); | ||
153 | kset_unregister(entry->fs_devicekset); | ||
154 | kfree(entry->fs_fcheck); | 171 | kfree(entry->fs_fcheck); |
155 | kfree(entry); | 172 | entry->fs_fcheck = NULL; |
156 | } | ||
157 | |||
158 | static void | ||
159 | ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry) | ||
160 | { | ||
161 | spin_lock(&ocfs2_filecheck_sysfs_lock); | ||
162 | list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list); | ||
163 | spin_unlock(&ocfs2_filecheck_sysfs_lock); | ||
164 | } | 173 | } |
165 | 174 | ||
166 | static int ocfs2_filecheck_sysfs_del(const char *devname) | 175 | int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb) |
167 | { | 176 | { |
168 | struct ocfs2_filecheck_sysfs_entry *p; | 177 | int ret; |
169 | 178 | struct ocfs2_filecheck *fcheck; | |
170 | spin_lock(&ocfs2_filecheck_sysfs_lock); | 179 | struct ocfs2_filecheck_sysfs_entry *entry = &osb->osb_fc_ent; |
171 | list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) { | ||
172 | if (!strcmp(p->fs_sb->s_id, devname)) { | ||
173 | list_del(&p->fs_list); | ||
174 | spin_unlock(&ocfs2_filecheck_sysfs_lock); | ||
175 | ocfs2_filecheck_sysfs_free(p); | ||
176 | return 0; | ||
177 | } | ||
178 | } | ||
179 | spin_unlock(&ocfs2_filecheck_sysfs_lock); | ||
180 | return 1; | ||
181 | } | ||
182 | |||
183 | static void | ||
184 | ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry) | ||
185 | { | ||
186 | if (atomic_dec_and_test(&entry->fs_count)) | ||
187 | wake_up_var(&entry->fs_count); | ||
188 | } | ||
189 | |||
190 | static struct ocfs2_filecheck_sysfs_entry * | ||
191 | ocfs2_filecheck_sysfs_get(const char *devname) | ||
192 | { | ||
193 | struct ocfs2_filecheck_sysfs_entry *p = NULL; | ||
194 | |||
195 | spin_lock(&ocfs2_filecheck_sysfs_lock); | ||
196 | list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) { | ||
197 | if (!strcmp(p->fs_sb->s_id, devname)) { | ||
198 | atomic_inc(&p->fs_count); | ||
199 | spin_unlock(&ocfs2_filecheck_sysfs_lock); | ||
200 | return p; | ||
201 | } | ||
202 | } | ||
203 | spin_unlock(&ocfs2_filecheck_sysfs_lock); | ||
204 | return NULL; | ||
205 | } | ||
206 | |||
207 | int ocfs2_filecheck_create_sysfs(struct super_block *sb) | ||
208 | { | ||
209 | int ret = 0; | ||
210 | struct kset *device_kset = NULL; | ||
211 | struct kset *fcheck_kset = NULL; | ||
212 | struct ocfs2_filecheck *fcheck = NULL; | ||
213 | struct ocfs2_filecheck_sysfs_entry *entry = NULL; | ||
214 | struct attribute **attrs = NULL; | ||
215 | struct attribute_group attrgp; | ||
216 | |||
217 | if (!ocfs2_kset) | ||
218 | return -ENOMEM; | ||
219 | |||
220 | attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS); | ||
221 | if (!attrs) { | ||
222 | ret = -ENOMEM; | ||
223 | goto error; | ||
224 | } else { | ||
225 | attrs[0] = &ocfs2_attr_filecheck_chk.attr; | ||
226 | attrs[1] = &ocfs2_attr_filecheck_fix.attr; | ||
227 | attrs[2] = &ocfs2_attr_filecheck_set.attr; | ||
228 | attrs[3] = NULL; | ||
229 | memset(&attrgp, 0, sizeof(attrgp)); | ||
230 | attrgp.attrs = attrs; | ||
231 | } | ||
232 | 180 | ||
233 | fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS); | 181 | fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS); |
234 | if (!fcheck) { | 182 | if (!fcheck) |
235 | ret = -ENOMEM; | 183 | return -ENOMEM; |
236 | goto error; | ||
237 | } else { | ||
238 | INIT_LIST_HEAD(&fcheck->fc_head); | ||
239 | spin_lock_init(&fcheck->fc_lock); | ||
240 | fcheck->fc_max = OCFS2_FILECHECK_MINSIZE; | ||
241 | fcheck->fc_size = 0; | ||
242 | fcheck->fc_done = 0; | ||
243 | } | ||
244 | |||
245 | if (strlen(sb->s_id) <= 0) { | ||
246 | mlog(ML_ERROR, | ||
247 | "Cannot get device basename when create filecheck sysfs\n"); | ||
248 | ret = -ENODEV; | ||
249 | goto error; | ||
250 | } | ||
251 | |||
252 | device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); | ||
253 | if (!device_kset) { | ||
254 | ret = -ENOMEM; | ||
255 | goto error; | ||
256 | } | ||
257 | |||
258 | fcheck_kset = kset_create_and_add("filecheck", NULL, | ||
259 | &device_kset->kobj); | ||
260 | if (!fcheck_kset) { | ||
261 | ret = -ENOMEM; | ||
262 | goto error; | ||
263 | } | ||
264 | |||
265 | ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp); | ||
266 | if (ret) | ||
267 | goto error; | ||
268 | 184 | ||
269 | entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS); | 185 | INIT_LIST_HEAD(&fcheck->fc_head); |
270 | if (!entry) { | 186 | spin_lock_init(&fcheck->fc_lock); |
271 | ret = -ENOMEM; | 187 | fcheck->fc_max = OCFS2_FILECHECK_MINSIZE; |
272 | goto error; | 188 | fcheck->fc_size = 0; |
273 | } else { | 189 | fcheck->fc_done = 0; |
274 | atomic_set(&entry->fs_count, 1); | 190 | |
275 | entry->fs_sb = sb; | 191 | entry->fs_kobj.kset = osb->osb_dev_kset; |
276 | entry->fs_devicekset = device_kset; | 192 | init_completion(&entry->fs_kobj_unregister); |
277 | entry->fs_fcheckkset = fcheck_kset; | 193 | ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck, |
278 | entry->fs_fcheck = fcheck; | 194 | NULL, "filecheck"); |
279 | ocfs2_filecheck_sysfs_add(entry); | 195 | if (ret) { |
196 | kfree(fcheck); | ||
197 | return ret; | ||
280 | } | 198 | } |
281 | 199 | ||
282 | kfree(attrs); | 200 | entry->fs_fcheck = fcheck; |
283 | return 0; | 201 | return 0; |
284 | |||
285 | error: | ||
286 | kfree(attrs); | ||
287 | kfree(entry); | ||
288 | kfree(fcheck); | ||
289 | kset_unregister(fcheck_kset); | ||
290 | kset_unregister(device_kset); | ||
291 | return ret; | ||
292 | } | 202 | } |
293 | 203 | ||
294 | int ocfs2_filecheck_remove_sysfs(struct super_block *sb) | 204 | void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb) |
295 | { | 205 | { |
296 | return ocfs2_filecheck_sysfs_del(sb->s_id); | 206 | if (!osb->osb_fc_ent.fs_fcheck) |
207 | return; | ||
208 | |||
209 | kobject_del(&osb->osb_fc_ent.fs_kobj); | ||
210 | kobject_put(&osb->osb_fc_ent.fs_kobj); | ||
211 | wait_for_completion(&osb->osb_fc_ent.fs_kobj_unregister); | ||
212 | ocfs2_filecheck_sysfs_free(&osb->osb_fc_ent); | ||
297 | } | 213 | } |
298 | 214 | ||
299 | static int | 215 | static int |
@@ -310,7 +226,7 @@ ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent, | |||
310 | 226 | ||
311 | spin_lock(&ent->fs_fcheck->fc_lock); | 227 | spin_lock(&ent->fs_fcheck->fc_lock); |
312 | if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) { | 228 | if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) { |
313 | mlog(ML_ERROR, | 229 | mlog(ML_NOTICE, |
314 | "Cannot set online file check maximum entry number " | 230 | "Cannot set online file check maximum entry number " |
315 | "to %u due to too many pending entries(%u)\n", | 231 | "to %u due to too many pending entries(%u)\n", |
316 | len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done); | 232 | len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done); |
@@ -387,7 +303,7 @@ ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count, | |||
387 | return 0; | 303 | return 0; |
388 | } | 304 | } |
389 | 305 | ||
390 | static ssize_t ocfs2_filecheck_show(struct kobject *kobj, | 306 | static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj, |
391 | struct kobj_attribute *attr, | 307 | struct kobj_attribute *attr, |
392 | char *buf) | 308 | char *buf) |
393 | { | 309 | { |
@@ -395,19 +311,12 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj, | |||
395 | ssize_t ret = 0, total = 0, remain = PAGE_SIZE; | 311 | ssize_t ret = 0, total = 0, remain = PAGE_SIZE; |
396 | unsigned int type; | 312 | unsigned int type; |
397 | struct ocfs2_filecheck_entry *p; | 313 | struct ocfs2_filecheck_entry *p; |
398 | struct ocfs2_filecheck_sysfs_entry *ent; | 314 | struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj, |
315 | struct ocfs2_filecheck_sysfs_entry, fs_kobj); | ||
399 | 316 | ||
400 | if (ocfs2_filecheck_type_parse(attr->attr.name, &type)) | 317 | if (ocfs2_filecheck_type_parse(attr->attr.name, &type)) |
401 | return -EINVAL; | 318 | return -EINVAL; |
402 | 319 | ||
403 | ent = ocfs2_filecheck_sysfs_get(kobj->parent->name); | ||
404 | if (!ent) { | ||
405 | mlog(ML_ERROR, | ||
406 | "Cannot get the corresponding entry via device basename %s\n", | ||
407 | kobj->name); | ||
408 | return -ENODEV; | ||
409 | } | ||
410 | |||
411 | if (type == OCFS2_FILECHECK_TYPE_SET) { | 320 | if (type == OCFS2_FILECHECK_TYPE_SET) { |
412 | spin_lock(&ent->fs_fcheck->fc_lock); | 321 | spin_lock(&ent->fs_fcheck->fc_lock); |
413 | total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max); | 322 | total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max); |
@@ -441,11 +350,26 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj, | |||
441 | spin_unlock(&ent->fs_fcheck->fc_lock); | 350 | spin_unlock(&ent->fs_fcheck->fc_lock); |
442 | 351 | ||
443 | exit: | 352 | exit: |
444 | ocfs2_filecheck_sysfs_put(ent); | ||
445 | return total; | 353 | return total; |
446 | } | 354 | } |
447 | 355 | ||
448 | static int | 356 | static inline int |
357 | ocfs2_filecheck_is_dup_entry(struct ocfs2_filecheck_sysfs_entry *ent, | ||
358 | unsigned long ino) | ||
359 | { | ||
360 | struct ocfs2_filecheck_entry *p; | ||
361 | |||
362 | list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) { | ||
363 | if (!p->fe_done) { | ||
364 | if (p->fe_ino == ino) | ||
365 | return 1; | ||
366 | } | ||
367 | } | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | static inline int | ||
449 | ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent) | 373 | ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent) |
450 | { | 374 | { |
451 | struct ocfs2_filecheck_entry *p; | 375 | struct ocfs2_filecheck_entry *p; |
@@ -484,21 +408,21 @@ static void | |||
484 | ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent, | 408 | ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent, |
485 | struct ocfs2_filecheck_entry *entry) | 409 | struct ocfs2_filecheck_entry *entry) |
486 | { | 410 | { |
487 | entry->fe_done = 1; | ||
488 | spin_lock(&ent->fs_fcheck->fc_lock); | 411 | spin_lock(&ent->fs_fcheck->fc_lock); |
412 | entry->fe_done = 1; | ||
489 | ent->fs_fcheck->fc_done++; | 413 | ent->fs_fcheck->fc_done++; |
490 | spin_unlock(&ent->fs_fcheck->fc_lock); | 414 | spin_unlock(&ent->fs_fcheck->fc_lock); |
491 | } | 415 | } |
492 | 416 | ||
493 | static unsigned int | 417 | static unsigned int |
494 | ocfs2_filecheck_handle(struct super_block *sb, | 418 | ocfs2_filecheck_handle(struct ocfs2_super *osb, |
495 | unsigned long ino, unsigned int flags) | 419 | unsigned long ino, unsigned int flags) |
496 | { | 420 | { |
497 | unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS; | 421 | unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS; |
498 | struct inode *inode = NULL; | 422 | struct inode *inode = NULL; |
499 | int rc; | 423 | int rc; |
500 | 424 | ||
501 | inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0); | 425 | inode = ocfs2_iget(osb, ino, flags, 0); |
502 | if (IS_ERR(inode)) { | 426 | if (IS_ERR(inode)) { |
503 | rc = (int)(-(long)inode); | 427 | rc = (int)(-(long)inode); |
504 | if (rc >= OCFS2_FILECHECK_ERR_START && | 428 | if (rc >= OCFS2_FILECHECK_ERR_START && |
@@ -516,11 +440,14 @@ static void | |||
516 | ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent, | 440 | ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent, |
517 | struct ocfs2_filecheck_entry *entry) | 441 | struct ocfs2_filecheck_entry *entry) |
518 | { | 442 | { |
443 | struct ocfs2_super *osb = container_of(ent, struct ocfs2_super, | ||
444 | osb_fc_ent); | ||
445 | |||
519 | if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK) | 446 | if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK) |
520 | entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb, | 447 | entry->fe_status = ocfs2_filecheck_handle(osb, |
521 | entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK); | 448 | entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK); |
522 | else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX) | 449 | else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX) |
523 | entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb, | 450 | entry->fe_status = ocfs2_filecheck_handle(osb, |
524 | entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX); | 451 | entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX); |
525 | else | 452 | else |
526 | entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED; | 453 | entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED; |
@@ -528,30 +455,21 @@ ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent, | |||
528 | ocfs2_filecheck_done_entry(ent, entry); | 455 | ocfs2_filecheck_done_entry(ent, entry); |
529 | } | 456 | } |
530 | 457 | ||
531 | static ssize_t ocfs2_filecheck_store(struct kobject *kobj, | 458 | static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, |
532 | struct kobj_attribute *attr, | 459 | struct kobj_attribute *attr, |
533 | const char *buf, size_t count) | 460 | const char *buf, size_t count) |
534 | { | 461 | { |
462 | ssize_t ret = 0; | ||
535 | struct ocfs2_filecheck_args args; | 463 | struct ocfs2_filecheck_args args; |
536 | struct ocfs2_filecheck_entry *entry; | 464 | struct ocfs2_filecheck_entry *entry; |
537 | struct ocfs2_filecheck_sysfs_entry *ent; | 465 | struct ocfs2_filecheck_sysfs_entry *ent = container_of(kobj, |
538 | ssize_t ret = 0; | 466 | struct ocfs2_filecheck_sysfs_entry, fs_kobj); |
539 | 467 | ||
540 | if (count == 0) | 468 | if (count == 0) |
541 | return count; | 469 | return count; |
542 | 470 | ||
543 | if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) { | 471 | if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) |
544 | mlog(ML_ERROR, "Invalid arguments for online file check\n"); | ||
545 | return -EINVAL; | 472 | return -EINVAL; |
546 | } | ||
547 | |||
548 | ent = ocfs2_filecheck_sysfs_get(kobj->parent->name); | ||
549 | if (!ent) { | ||
550 | mlog(ML_ERROR, | ||
551 | "Cannot get the corresponding entry via device basename %s\n", | ||
552 | kobj->parent->name); | ||
553 | return -ENODEV; | ||
554 | } | ||
555 | 473 | ||
556 | if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) { | 474 | if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) { |
557 | ret = ocfs2_filecheck_adjust_max(ent, args.fa_len); | 475 | ret = ocfs2_filecheck_adjust_max(ent, args.fa_len); |
@@ -565,13 +483,16 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj, | |||
565 | } | 483 | } |
566 | 484 | ||
567 | spin_lock(&ent->fs_fcheck->fc_lock); | 485 | spin_lock(&ent->fs_fcheck->fc_lock); |
568 | if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && | 486 | if (ocfs2_filecheck_is_dup_entry(ent, args.fa_ino)) { |
569 | (ent->fs_fcheck->fc_done == 0)) { | 487 | ret = -EEXIST; |
570 | mlog(ML_ERROR, | 488 | kfree(entry); |
489 | } else if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && | ||
490 | (ent->fs_fcheck->fc_done == 0)) { | ||
491 | mlog(ML_NOTICE, | ||
571 | "Cannot do more file check " | 492 | "Cannot do more file check " |
572 | "since file check queue(%u) is full now\n", | 493 | "since file check queue(%u) is full now\n", |
573 | ent->fs_fcheck->fc_max); | 494 | ent->fs_fcheck->fc_max); |
574 | ret = -EBUSY; | 495 | ret = -EAGAIN; |
575 | kfree(entry); | 496 | kfree(entry); |
576 | } else { | 497 | } else { |
577 | if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && | 498 | if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) && |
@@ -596,6 +517,5 @@ static ssize_t ocfs2_filecheck_store(struct kobject *kobj, | |||
596 | ocfs2_filecheck_handle_entry(ent, entry); | 517 | ocfs2_filecheck_handle_entry(ent, entry); |
597 | 518 | ||
598 | exit: | 519 | exit: |
599 | ocfs2_filecheck_sysfs_put(ent); | ||
600 | return (!ret ? count : ret); | 520 | return (!ret ? count : ret); |
601 | } | 521 | } |
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h index e5cd002a2c09..6a22ee79e8d0 100644 --- a/fs/ocfs2/filecheck.h +++ b/fs/ocfs2/filecheck.h | |||
@@ -43,7 +43,32 @@ enum { | |||
43 | #define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED | 43 | #define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED |
44 | #define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED | 44 | #define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED |
45 | 45 | ||
46 | int ocfs2_filecheck_create_sysfs(struct super_block *sb); | 46 | struct ocfs2_filecheck { |
47 | int ocfs2_filecheck_remove_sysfs(struct super_block *sb); | 47 | struct list_head fc_head; /* File check entry list head */ |
48 | spinlock_t fc_lock; | ||
49 | unsigned int fc_max; /* Maximum number of entry in list */ | ||
50 | unsigned int fc_size; /* Current entry count in list */ | ||
51 | unsigned int fc_done; /* Finished entry count in list */ | ||
52 | }; | ||
53 | |||
54 | #define OCFS2_FILECHECK_MAXSIZE 100 | ||
55 | #define OCFS2_FILECHECK_MINSIZE 10 | ||
56 | |||
57 | /* File check operation type */ | ||
58 | enum { | ||
59 | OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */ | ||
60 | OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */ | ||
61 | OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */ | ||
62 | }; | ||
63 | |||
64 | struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per partition */ | ||
65 | struct kobject fs_kobj; | ||
66 | struct completion fs_kobj_unregister; | ||
67 | struct ocfs2_filecheck *fs_fcheck; | ||
68 | }; | ||
69 | |||
70 | |||
71 | int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb); | ||
72 | void ocfs2_filecheck_remove_sysfs(struct ocfs2_super *osb); | ||
48 | 73 | ||
49 | #endif /* FILECHECK_H */ | 74 | #endif /* FILECHECK_H */ |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index d51b80edd972..ddc3e9470c87 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -1135,7 +1135,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1135 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, | 1135 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, |
1136 | inode->i_nlink); | 1136 | inode->i_nlink); |
1137 | 1137 | ||
1138 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1138 | mlog_bug_on_msg(osb == NULL, |
1139 | "Inode=%lu\n", inode->i_ino); | 1139 | "Inode=%lu\n", inode->i_ino); |
1140 | 1140 | ||
1141 | dquot_drop(inode); | 1141 | dquot_drop(inode); |
@@ -1150,7 +1150,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1150 | ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); | 1150 | ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); |
1151 | ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); | 1151 | ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); |
1152 | 1152 | ||
1153 | ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, | 1153 | ocfs2_resv_discard(&osb->osb_la_resmap, |
1154 | &oi->ip_la_data_resv); | 1154 | &oi->ip_la_data_resv); |
1155 | ocfs2_resv_init_once(&oi->ip_la_data_resv); | 1155 | ocfs2_resv_init_once(&oi->ip_la_data_resv); |
1156 | 1156 | ||
@@ -1160,7 +1160,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1160 | * exception here are successfully wiped inodes - their | 1160 | * exception here are successfully wiped inodes - their |
1161 | * metadata can now be considered to be part of the system | 1161 | * metadata can now be considered to be part of the system |
1162 | * inodes from which it came. */ | 1162 | * inodes from which it came. */ |
1163 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED)) | 1163 | if (!(oi->ip_flags & OCFS2_INODE_DELETED)) |
1164 | ocfs2_checkpoint_inode(inode); | 1164 | ocfs2_checkpoint_inode(inode); |
1165 | 1165 | ||
1166 | mlog_bug_on_msg(!list_empty(&oi->ip_io_markers), | 1166 | mlog_bug_on_msg(!list_empty(&oi->ip_io_markers), |
@@ -1223,7 +1223,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1223 | * the journal is flushed before journal shutdown. Thus it is safe to | 1223 | * the journal is flushed before journal shutdown. Thus it is safe to |
1224 | * have inodes get cleaned up after journal shutdown. | 1224 | * have inodes get cleaned up after journal shutdown. |
1225 | */ | 1225 | */ |
1226 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, | 1226 | jbd2_journal_release_jbd_inode(osb->journal->j_journal, |
1227 | &oi->ip_jinode); | 1227 | &oi->ip_jinode); |
1228 | } | 1228 | } |
1229 | 1229 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index c801eddc4bf3..8dd6f703c819 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -525,7 +525,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
525 | * these are used by the support functions here and in | 525 | * these are used by the support functions here and in |
526 | * callers. */ | 526 | * callers. */ |
527 | inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); | 527 | inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); |
528 | OCFS2_I(inode)->ip_blkno = fe_blkno; | 528 | oi->ip_blkno = fe_blkno; |
529 | spin_lock(&osb->osb_lock); | 529 | spin_lock(&osb->osb_lock); |
530 | inode->i_generation = osb->s_next_generation++; | 530 | inode->i_generation = osb->s_next_generation++; |
531 | spin_unlock(&osb->osb_lock); | 531 | spin_unlock(&osb->osb_lock); |
@@ -1186,8 +1186,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
1186 | } | 1186 | } |
1187 | 1187 | ||
1188 | trace_ocfs2_double_lock_end( | 1188 | trace_ocfs2_double_lock_end( |
1189 | (unsigned long long)OCFS2_I(inode1)->ip_blkno, | 1189 | (unsigned long long)oi1->ip_blkno, |
1190 | (unsigned long long)OCFS2_I(inode2)->ip_blkno); | 1190 | (unsigned long long)oi2->ip_blkno); |
1191 | 1191 | ||
1192 | bail: | 1192 | bail: |
1193 | if (status) | 1193 | if (status) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6867eef2e06b..4f86ac0027b5 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -50,6 +50,8 @@ | |||
50 | 50 | ||
51 | #include "reservations.h" | 51 | #include "reservations.h" |
52 | 52 | ||
53 | #include "filecheck.h" | ||
54 | |||
53 | /* Caching of metadata buffers */ | 55 | /* Caching of metadata buffers */ |
54 | 56 | ||
55 | /* Most user visible OCFS2 inodes will have very few pieces of | 57 | /* Most user visible OCFS2 inodes will have very few pieces of |
@@ -472,6 +474,12 @@ struct ocfs2_super | |||
472 | * workqueue and schedule on our own. | 474 | * workqueue and schedule on our own. |
473 | */ | 475 | */ |
474 | struct workqueue_struct *ocfs2_wq; | 476 | struct workqueue_struct *ocfs2_wq; |
477 | |||
478 | /* sysfs directory per partition */ | ||
479 | struct kset *osb_dev_kset; | ||
480 | |||
481 | /* file check related stuff */ | ||
482 | struct ocfs2_filecheck_sysfs_entry osb_fc_ent; | ||
475 | }; | 483 | }; |
476 | 484 | ||
477 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 485 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index e2a11aaece10..2ee76a90ba8f 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
@@ -1311,11 +1311,11 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_release); | |||
1311 | 1311 | ||
1312 | DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); | 1312 | DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); |
1313 | 1313 | ||
1314 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write); | 1314 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter); |
1315 | 1315 | ||
1316 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); | 1316 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); |
1317 | 1317 | ||
1318 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); | 1318 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter); |
1319 | 1319 | ||
1320 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); | 1320 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); |
1321 | 1321 | ||
@@ -1467,7 +1467,7 @@ TRACE_EVENT(ocfs2_prepare_inode_for_write, | |||
1467 | __entry->saved_pos, __entry->count, __entry->wait) | 1467 | __entry->saved_pos, __entry->count, __entry->wait) |
1468 | ); | 1468 | ); |
1469 | 1469 | ||
1470 | DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret); | 1470 | DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret); |
1471 | 1471 | ||
1472 | /* End of trace events for fs/ocfs2/file.c. */ | 1472 | /* End of trace events for fs/ocfs2/file.c. */ |
1473 | 1473 | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index ab156e35ec00..01c6b3894406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -573,7 +573,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, | |||
573 | BUG_ON(ocfs2_is_refcount_inode(inode)); | 573 | BUG_ON(ocfs2_is_refcount_inode(inode)); |
574 | 574 | ||
575 | trace_ocfs2_create_refcount_tree( | 575 | trace_ocfs2_create_refcount_tree( |
576 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 576 | (unsigned long long)oi->ip_blkno); |
577 | 577 | ||
578 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | 578 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); |
579 | if (ret) { | 579 | if (ret) { |
@@ -3359,7 +3359,7 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3359 | unsigned int ext_flags; | 3359 | unsigned int ext_flags; |
3360 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 3360 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
3361 | 3361 | ||
3362 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { | 3362 | if (!ocfs2_refcount_tree(osb)) { |
3363 | return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", | 3363 | return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n", |
3364 | inode->i_ino); | 3364 | inode->i_ino); |
3365 | } | 3365 | } |
@@ -3707,7 +3707,7 @@ int ocfs2_add_refcount_flag(struct inode *inode, | |||
3707 | trace_ocfs2_add_refcount_flag(ref_blocks, credits); | 3707 | trace_ocfs2_add_refcount_flag(ref_blocks, credits); |
3708 | 3708 | ||
3709 | if (ref_blocks) { | 3709 | if (ref_blocks) { |
3710 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), | 3710 | ret = ocfs2_reserve_new_metadata_blocks(osb, |
3711 | ref_blocks, &meta_ac); | 3711 | ref_blocks, &meta_ac); |
3712 | if (ret) { | 3712 | if (ret) { |
3713 | mlog_errno(ret); | 3713 | mlog_errno(ret); |
@@ -4766,8 +4766,8 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode, | |||
4766 | *bh2 = *bh1; | 4766 | *bh2 = *bh1; |
4767 | 4767 | ||
4768 | trace_ocfs2_double_lock_end( | 4768 | trace_ocfs2_double_lock_end( |
4769 | (unsigned long long)OCFS2_I(inode1)->ip_blkno, | 4769 | (unsigned long long)oi1->ip_blkno, |
4770 | (unsigned long long)OCFS2_I(inode2)->ip_blkno); | 4770 | (unsigned long long)oi2->ip_blkno); |
4771 | 4771 | ||
4772 | return 0; | 4772 | return 0; |
4773 | 4773 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d8f5f6ce99dc..f7c972fbed6a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -79,8 +79,6 @@ static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) | |||
79 | return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); | 79 | return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | ||
83 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | ||
84 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 82 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
85 | static int ocfs2_block_group_fill(handle_t *handle, | 83 | static int ocfs2_block_group_fill(handle_t *handle, |
86 | struct inode *alloc_inode, | 84 | struct inode *alloc_inode, |
@@ -387,7 +385,7 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
387 | 385 | ||
388 | memset(bg, 0, sb->s_blocksize); | 386 | memset(bg, 0, sb->s_blocksize); |
389 | strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); | 387 | strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); |
390 | bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); | 388 | bg->bg_generation = cpu_to_le32(osb->fs_generation); |
391 | bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1, | 389 | bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1, |
392 | osb->s_feature_incompat)); | 390 | osb->s_feature_incompat)); |
393 | bg->bg_chain = cpu_to_le16(my_chain); | 391 | bg->bg_chain = cpu_to_le16(my_chain); |
@@ -1521,7 +1519,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
1521 | OCFS2_I(inode)->ip_clusters, max_bits); | 1519 | OCFS2_I(inode)->ip_clusters, max_bits); |
1522 | } | 1520 | } |
1523 | 1521 | ||
1524 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 1522 | ret = ocfs2_block_group_find_clear_bits(osb, |
1525 | group_bh, bits_wanted, | 1523 | group_bh, bits_wanted, |
1526 | max_bits, res); | 1524 | max_bits, res); |
1527 | if (ret) | 1525 | if (ret) |
@@ -2626,53 +2624,6 @@ int ocfs2_release_clusters(handle_t *handle, | |||
2626 | _ocfs2_clear_bit); | 2624 | _ocfs2_clear_bit); |
2627 | } | 2625 | } |
2628 | 2626 | ||
2629 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) | ||
2630 | { | ||
2631 | printk("Block Group:\n"); | ||
2632 | printk("bg_signature: %s\n", bg->bg_signature); | ||
2633 | printk("bg_size: %u\n", bg->bg_size); | ||
2634 | printk("bg_bits: %u\n", bg->bg_bits); | ||
2635 | printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); | ||
2636 | printk("bg_chain: %u\n", bg->bg_chain); | ||
2637 | printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); | ||
2638 | printk("bg_next_group: %llu\n", | ||
2639 | (unsigned long long)bg->bg_next_group); | ||
2640 | printk("bg_parent_dinode: %llu\n", | ||
2641 | (unsigned long long)bg->bg_parent_dinode); | ||
2642 | printk("bg_blkno: %llu\n", | ||
2643 | (unsigned long long)bg->bg_blkno); | ||
2644 | } | ||
2645 | |||
2646 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) | ||
2647 | { | ||
2648 | int i; | ||
2649 | |||
2650 | printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); | ||
2651 | printk("i_signature: %s\n", fe->i_signature); | ||
2652 | printk("i_size: %llu\n", | ||
2653 | (unsigned long long)fe->i_size); | ||
2654 | printk("i_clusters: %u\n", fe->i_clusters); | ||
2655 | printk("i_generation: %u\n", | ||
2656 | le32_to_cpu(fe->i_generation)); | ||
2657 | printk("id1.bitmap1.i_used: %u\n", | ||
2658 | le32_to_cpu(fe->id1.bitmap1.i_used)); | ||
2659 | printk("id1.bitmap1.i_total: %u\n", | ||
2660 | le32_to_cpu(fe->id1.bitmap1.i_total)); | ||
2661 | printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); | ||
2662 | printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); | ||
2663 | printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); | ||
2664 | printk("id2.i_chain.cl_next_free_rec: %u\n", | ||
2665 | fe->id2.i_chain.cl_next_free_rec); | ||
2666 | for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { | ||
2667 | printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, | ||
2668 | fe->id2.i_chain.cl_recs[i].c_free); | ||
2669 | printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, | ||
2670 | fe->id2.i_chain.cl_recs[i].c_total); | ||
2671 | printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, | ||
2672 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); | ||
2673 | } | ||
2674 | } | ||
2675 | |||
2676 | /* | 2627 | /* |
2677 | * For a given allocation, determine which allocators will need to be | 2628 | * For a given allocation, determine which allocators will need to be |
2678 | * accessed, and lock them, reserving the appropriate number of bits. | 2629 | * accessed, and lock them, reserving the appropriate number of bits. |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ffa4952d432b..3415e0b09398 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -423,10 +423,10 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) | |||
423 | ocfs2_schedule_truncate_log_flush(osb, 0); | 423 | ocfs2_schedule_truncate_log_flush(osb, 0); |
424 | } | 424 | } |
425 | 425 | ||
426 | if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, | 426 | if (jbd2_journal_start_commit(osb->journal->j_journal, |
427 | &target)) { | 427 | &target)) { |
428 | if (wait) | 428 | if (wait) |
429 | jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, | 429 | jbd2_log_wait_commit(osb->journal->j_journal, |
430 | target); | 430 | target); |
431 | } | 431 | } |
432 | return 0; | 432 | return 0; |
@@ -1161,6 +1161,23 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1161 | 1161 | ||
1162 | ocfs2_complete_mount_recovery(osb); | 1162 | ocfs2_complete_mount_recovery(osb); |
1163 | 1163 | ||
1164 | osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, | ||
1165 | &ocfs2_kset->kobj); | ||
1166 | if (!osb->osb_dev_kset) { | ||
1167 | status = -ENOMEM; | ||
1168 | mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id); | ||
1169 | goto read_super_error; | ||
1170 | } | ||
1171 | |||
1172 | /* Create filecheck sysfs related directories/files at | ||
1173 | * /sys/fs/ocfs2/<devname>/filecheck */ | ||
1174 | if (ocfs2_filecheck_create_sysfs(osb)) { | ||
1175 | status = -ENOMEM; | ||
1176 | mlog(ML_ERROR, "Unable to create filecheck sysfs directory at " | ||
1177 | "/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id); | ||
1178 | goto read_super_error; | ||
1179 | } | ||
1180 | |||
1164 | if (ocfs2_mount_local(osb)) | 1181 | if (ocfs2_mount_local(osb)) |
1165 | snprintf(nodestr, sizeof(nodestr), "local"); | 1182 | snprintf(nodestr, sizeof(nodestr), "local"); |
1166 | else | 1183 | else |
@@ -1199,9 +1216,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1199 | /* Start this when the mount is almost sure of being successful */ | 1216 | /* Start this when the mount is almost sure of being successful */ |
1200 | ocfs2_orphan_scan_start(osb); | 1217 | ocfs2_orphan_scan_start(osb); |
1201 | 1218 | ||
1202 | /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */ | ||
1203 | ocfs2_filecheck_create_sysfs(sb); | ||
1204 | |||
1205 | return status; | 1219 | return status; |
1206 | 1220 | ||
1207 | read_super_error: | 1221 | read_super_error: |
@@ -1653,7 +1667,6 @@ static void ocfs2_put_super(struct super_block *sb) | |||
1653 | 1667 | ||
1654 | ocfs2_sync_blockdev(sb); | 1668 | ocfs2_sync_blockdev(sb); |
1655 | ocfs2_dismount_volume(sb, 0); | 1669 | ocfs2_dismount_volume(sb, 0); |
1656 | ocfs2_filecheck_remove_sysfs(sb); | ||
1657 | } | 1670 | } |
1658 | 1671 | ||
1659 | static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) | 1672 | static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -1768,12 +1781,9 @@ static int ocfs2_initialize_mem_caches(void) | |||
1768 | NULL); | 1781 | NULL); |
1769 | if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || | 1782 | if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || |
1770 | !ocfs2_qf_chunk_cachep) { | 1783 | !ocfs2_qf_chunk_cachep) { |
1771 | if (ocfs2_inode_cachep) | 1784 | kmem_cache_destroy(ocfs2_inode_cachep); |
1772 | kmem_cache_destroy(ocfs2_inode_cachep); | 1785 | kmem_cache_destroy(ocfs2_dquot_cachep); |
1773 | if (ocfs2_dquot_cachep) | 1786 | kmem_cache_destroy(ocfs2_qf_chunk_cachep); |
1774 | kmem_cache_destroy(ocfs2_dquot_cachep); | ||
1775 | if (ocfs2_qf_chunk_cachep) | ||
1776 | kmem_cache_destroy(ocfs2_qf_chunk_cachep); | ||
1777 | return -ENOMEM; | 1787 | return -ENOMEM; |
1778 | } | 1788 | } |
1779 | 1789 | ||
@@ -1787,16 +1797,13 @@ static void ocfs2_free_mem_caches(void) | |||
1787 | * destroy cache. | 1797 | * destroy cache. |
1788 | */ | 1798 | */ |
1789 | rcu_barrier(); | 1799 | rcu_barrier(); |
1790 | if (ocfs2_inode_cachep) | 1800 | kmem_cache_destroy(ocfs2_inode_cachep); |
1791 | kmem_cache_destroy(ocfs2_inode_cachep); | ||
1792 | ocfs2_inode_cachep = NULL; | 1801 | ocfs2_inode_cachep = NULL; |
1793 | 1802 | ||
1794 | if (ocfs2_dquot_cachep) | 1803 | kmem_cache_destroy(ocfs2_dquot_cachep); |
1795 | kmem_cache_destroy(ocfs2_dquot_cachep); | ||
1796 | ocfs2_dquot_cachep = NULL; | 1804 | ocfs2_dquot_cachep = NULL; |
1797 | 1805 | ||
1798 | if (ocfs2_qf_chunk_cachep) | 1806 | kmem_cache_destroy(ocfs2_qf_chunk_cachep); |
1799 | kmem_cache_destroy(ocfs2_qf_chunk_cachep); | ||
1800 | ocfs2_qf_chunk_cachep = NULL; | 1807 | ocfs2_qf_chunk_cachep = NULL; |
1801 | } | 1808 | } |
1802 | 1809 | ||
@@ -1899,6 +1906,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1899 | osb = OCFS2_SB(sb); | 1906 | osb = OCFS2_SB(sb); |
1900 | BUG_ON(!osb); | 1907 | BUG_ON(!osb); |
1901 | 1908 | ||
1909 | /* Remove file check sysfs related directores/files, | ||
1910 | * and wait for the pending file check operations */ | ||
1911 | ocfs2_filecheck_remove_sysfs(osb); | ||
1912 | |||
1913 | kset_unregister(osb->osb_dev_kset); | ||
1914 | |||
1902 | debugfs_remove(osb->osb_ctxt); | 1915 | debugfs_remove(osb->osb_ctxt); |
1903 | 1916 | ||
1904 | /* Orphan scan should be stopped as early as possible */ | 1917 | /* Orphan scan should be stopped as early as possible */ |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 82e17b076ce7..78f09c76ab3c 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
@@ -633,6 +633,5 @@ int __init init_ocfs2_uptodate_cache(void) | |||
633 | 633 | ||
634 | void exit_ocfs2_uptodate_cache(void) | 634 | void exit_ocfs2_uptodate_cache(void) |
635 | { | 635 | { |
636 | if (ocfs2_uptodate_cachep) | 636 | kmem_cache_destroy(ocfs2_uptodate_cachep); |
637 | kmem_cache_destroy(ocfs2_uptodate_cachep); | ||
638 | } | 637 | } |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c261c1dfd374..3a24ce3deb01 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -3564,7 +3564,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
3564 | .not_found = -ENODATA, | 3564 | .not_found = -ENODATA, |
3565 | }; | 3565 | }; |
3566 | 3566 | ||
3567 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | 3567 | if (!ocfs2_supports_xattr(osb)) |
3568 | return -EOPNOTSUPP; | 3568 | return -EOPNOTSUPP; |
3569 | 3569 | ||
3570 | /* | 3570 | /* |
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index c3c95d18bf43..7e6c77740413 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h | |||
@@ -64,10 +64,11 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, | |||
64 | 64 | ||
65 | struct kmem_cache; | 65 | struct kmem_cache; |
66 | 66 | ||
67 | int should_failslab(struct kmem_cache *s, gfp_t gfpflags); | ||
67 | #ifdef CONFIG_FAILSLAB | 68 | #ifdef CONFIG_FAILSLAB |
68 | extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); | 69 | extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); |
69 | #else | 70 | #else |
70 | static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) | 71 | static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
71 | { | 72 | { |
72 | return false; | 73 | return false; |
73 | } | 74 | } |
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d6459bd1376d..de784fd11d12 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h | |||
@@ -43,7 +43,7 @@ void kasan_unpoison_stack_above_sp_to(const void *watermark); | |||
43 | void kasan_alloc_pages(struct page *page, unsigned int order); | 43 | void kasan_alloc_pages(struct page *page, unsigned int order); |
44 | void kasan_free_pages(struct page *page, unsigned int order); | 44 | void kasan_free_pages(struct page *page, unsigned int order); |
45 | 45 | ||
46 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, | 46 | void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, |
47 | slab_flags_t *flags); | 47 | slab_flags_t *flags); |
48 | void kasan_cache_shrink(struct kmem_cache *cache); | 48 | void kasan_cache_shrink(struct kmem_cache *cache); |
49 | void kasan_cache_shutdown(struct kmem_cache *cache); | 49 | void kasan_cache_shutdown(struct kmem_cache *cache); |
@@ -92,7 +92,7 @@ static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} | |||
92 | static inline void kasan_free_pages(struct page *page, unsigned int order) {} | 92 | static inline void kasan_free_pages(struct page *page, unsigned int order) {} |
93 | 93 | ||
94 | static inline void kasan_cache_create(struct kmem_cache *cache, | 94 | static inline void kasan_cache_create(struct kmem_cache *cache, |
95 | size_t *size, | 95 | unsigned int *size, |
96 | slab_flags_t *flags) {} | 96 | slab_flags_t *flags) {} |
97 | static inline void kasan_cache_shrink(struct kmem_cache *cache) {} | 97 | static inline void kasan_cache_shrink(struct kmem_cache *cache) {} |
98 | static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} | 98 | static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} |
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index bb8129a3474d..96def9d15b1b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h | |||
@@ -32,6 +32,7 @@ struct list_lru_one { | |||
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct list_lru_memcg { | 34 | struct list_lru_memcg { |
35 | struct rcu_head rcu; | ||
35 | /* array of per cgroup lists, indexed by memcg_cache_id */ | 36 | /* array of per cgroup lists, indexed by memcg_cache_id */ |
36 | struct list_lru_one *lru[0]; | 37 | struct list_lru_one *lru[0]; |
37 | }; | 38 | }; |
@@ -43,7 +44,7 @@ struct list_lru_node { | |||
43 | struct list_lru_one lru; | 44 | struct list_lru_one lru; |
44 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) | 45 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) |
45 | /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ | 46 | /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ |
46 | struct list_lru_memcg *memcg_lrus; | 47 | struct list_lru_memcg __rcu *memcg_lrus; |
47 | #endif | 48 | #endif |
48 | long nr_items; | 49 | long nr_items; |
49 | } ____cacheline_aligned_in_smp; | 50 | } ____cacheline_aligned_in_smp; |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f92ea7783652..0257aee7ab4b 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -416,21 +416,11 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) | |||
416 | { | 416 | { |
417 | } | 417 | } |
418 | #endif | 418 | #endif |
419 | |||
420 | extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, | ||
421 | phys_addr_t end_addr); | ||
422 | #else | 419 | #else |
423 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) | 420 | static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) |
424 | { | 421 | { |
425 | return 0; | 422 | return 0; |
426 | } | 423 | } |
427 | |||
428 | static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, | ||
429 | phys_addr_t end_addr) | ||
430 | { | ||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | #endif /* CONFIG_HAVE_MEMBLOCK */ | 424 | #endif /* CONFIG_HAVE_MEMBLOCK */ |
435 | 425 | ||
436 | #endif /* __KERNEL__ */ | 426 | #endif /* __KERNEL__ */ |
diff --git a/include/linux/memory.h b/include/linux/memory.h index f71e732c77b2..31ca3e28b0eb 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h | |||
@@ -33,6 +33,7 @@ struct memory_block { | |||
33 | void *hw; /* optional pointer to fw/hw data */ | 33 | void *hw; /* optional pointer to fw/hw data */ |
34 | int (*phys_callback)(struct memory_block *); | 34 | int (*phys_callback)(struct memory_block *); |
35 | struct device dev; | 35 | struct device dev; |
36 | int nid; /* NID for this memory block */ | ||
36 | }; | 37 | }; |
37 | 38 | ||
38 | int arch_get_memory_phys_device(unsigned long start_pfn); | 39 | int arch_get_memory_phys_device(unsigned long start_pfn); |
@@ -109,7 +110,7 @@ extern int register_memory_notifier(struct notifier_block *nb); | |||
109 | extern void unregister_memory_notifier(struct notifier_block *nb); | 110 | extern void unregister_memory_notifier(struct notifier_block *nb); |
110 | extern int register_memory_isolate_notifier(struct notifier_block *nb); | 111 | extern int register_memory_isolate_notifier(struct notifier_block *nb); |
111 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); | 112 | extern void unregister_memory_isolate_notifier(struct notifier_block *nb); |
112 | extern int register_new_memory(int, struct mem_section *); | 113 | int hotplug_memory_register(int nid, struct mem_section *section); |
113 | #ifdef CONFIG_MEMORY_HOTREMOVE | 114 | #ifdef CONFIG_MEMORY_HOTREMOVE |
114 | extern int unregister_memory_section(struct mem_section *); | 115 | extern int unregister_memory_section(struct mem_section *); |
115 | #endif | 116 | #endif |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index aba5f86eb038..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -52,24 +52,6 @@ enum { | |||
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * pgdat resizing functions | ||
56 | */ | ||
57 | static inline | ||
58 | void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) | ||
59 | { | ||
60 | spin_lock_irqsave(&pgdat->node_size_lock, *flags); | ||
61 | } | ||
62 | static inline | ||
63 | void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) | ||
64 | { | ||
65 | spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); | ||
66 | } | ||
67 | static inline | ||
68 | void pgdat_resize_init(struct pglist_data *pgdat) | ||
69 | { | ||
70 | spin_lock_init(&pgdat->node_size_lock); | ||
71 | } | ||
72 | /* | ||
73 | * Zone resizing functions | 55 | * Zone resizing functions |
74 | * | 56 | * |
75 | * Note: any attempt to resize a zone should has pgdat_resize_lock() | 57 | * Note: any attempt to resize a zone should has pgdat_resize_lock() |
@@ -246,13 +228,6 @@ extern void clear_zone_contiguous(struct zone *zone); | |||
246 | ___page; \ | 228 | ___page; \ |
247 | }) | 229 | }) |
248 | 230 | ||
249 | /* | ||
250 | * Stub functions for when hotplug is off | ||
251 | */ | ||
252 | static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} | ||
253 | static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} | ||
254 | static inline void pgdat_resize_init(struct pglist_data *pgdat) {} | ||
255 | |||
256 | static inline unsigned zone_span_seqbegin(struct zone *zone) | 231 | static inline unsigned zone_span_seqbegin(struct zone *zone) |
257 | { | 232 | { |
258 | return 0; | 233 | return 0; |
@@ -293,6 +268,34 @@ static inline bool movable_node_is_enabled(void) | |||
293 | } | 268 | } |
294 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ | 269 | #endif /* ! CONFIG_MEMORY_HOTPLUG */ |
295 | 270 | ||
271 | #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) | ||
272 | /* | ||
273 | * pgdat resizing functions | ||
274 | */ | ||
275 | static inline | ||
276 | void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) | ||
277 | { | ||
278 | spin_lock_irqsave(&pgdat->node_size_lock, *flags); | ||
279 | } | ||
280 | static inline | ||
281 | void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) | ||
282 | { | ||
283 | spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); | ||
284 | } | ||
285 | static inline | ||
286 | void pgdat_resize_init(struct pglist_data *pgdat) | ||
287 | { | ||
288 | spin_lock_init(&pgdat->node_size_lock); | ||
289 | } | ||
290 | #else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ | ||
291 | /* | ||
292 | * Stub functions for when hotplug is off | ||
293 | */ | ||
294 | static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} | ||
295 | static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} | ||
296 | static inline void pgdat_resize_init(struct pglist_data *pgdat) {} | ||
297 | #endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ | ||
298 | |||
296 | #ifdef CONFIG_MEMORY_HOTREMOVE | 299 | #ifdef CONFIG_MEMORY_HOTREMOVE |
297 | 300 | ||
298 | extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); | 301 | extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); |
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a2246cf670ba..ab45f8a0d288 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -25,7 +25,7 @@ enum migrate_reason { | |||
25 | MR_SYSCALL, /* also applies to cpusets */ | 25 | MR_SYSCALL, /* also applies to cpusets */ |
26 | MR_MEMPOLICY_MBIND, | 26 | MR_MEMPOLICY_MBIND, |
27 | MR_NUMA_MISPLACED, | 27 | MR_NUMA_MISPLACED, |
28 | MR_CMA, | 28 | MR_CONTIG_RANGE, |
29 | MR_TYPES | 29 | MR_TYPES |
30 | }; | 30 | }; |
31 | 31 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index f945dff34925..3ad632366973 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -386,17 +386,19 @@ struct vm_operations_struct { | |||
386 | void (*close)(struct vm_area_struct * area); | 386 | void (*close)(struct vm_area_struct * area); |
387 | int (*split)(struct vm_area_struct * area, unsigned long addr); | 387 | int (*split)(struct vm_area_struct * area, unsigned long addr); |
388 | int (*mremap)(struct vm_area_struct * area); | 388 | int (*mremap)(struct vm_area_struct * area); |
389 | int (*fault)(struct vm_fault *vmf); | 389 | vm_fault_t (*fault)(struct vm_fault *vmf); |
390 | int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); | 390 | vm_fault_t (*huge_fault)(struct vm_fault *vmf, |
391 | enum page_entry_size pe_size); | ||
391 | void (*map_pages)(struct vm_fault *vmf, | 392 | void (*map_pages)(struct vm_fault *vmf, |
392 | pgoff_t start_pgoff, pgoff_t end_pgoff); | 393 | pgoff_t start_pgoff, pgoff_t end_pgoff); |
394 | unsigned long (*pagesize)(struct vm_area_struct * area); | ||
393 | 395 | ||
394 | /* notification that a previously read-only page is about to become | 396 | /* notification that a previously read-only page is about to become |
395 | * writable, if an error is returned it will cause a SIGBUS */ | 397 | * writable, if an error is returned it will cause a SIGBUS */ |
396 | int (*page_mkwrite)(struct vm_fault *vmf); | 398 | vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); |
397 | 399 | ||
398 | /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ | 400 | /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ |
399 | int (*pfn_mkwrite)(struct vm_fault *vmf); | 401 | vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); |
400 | 402 | ||
401 | /* called by access_process_vm when get_user_pages() fails, typically | 403 | /* called by access_process_vm when get_user_pages() fails, typically |
402 | * for use by special VMAs that can switch between memory and hardware | 404 | * for use by special VMAs that can switch between memory and hardware |
@@ -903,7 +905,9 @@ extern int page_to_nid(const struct page *page); | |||
903 | #else | 905 | #else |
904 | static inline int page_to_nid(const struct page *page) | 906 | static inline int page_to_nid(const struct page *page) |
905 | { | 907 | { |
906 | return (page->flags >> NODES_PGSHIFT) & NODES_MASK; | 908 | struct page *p = (struct page *)page; |
909 | |||
910 | return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK; | ||
907 | } | 911 | } |
908 | #endif | 912 | #endif |
909 | 913 | ||
@@ -1152,6 +1156,7 @@ static inline pgoff_t page_index(struct page *page) | |||
1152 | 1156 | ||
1153 | bool page_mapped(struct page *page); | 1157 | bool page_mapped(struct page *page); |
1154 | struct address_space *page_mapping(struct page *page); | 1158 | struct address_space *page_mapping(struct page *page); |
1159 | struct address_space *page_mapping_file(struct page *page); | ||
1155 | 1160 | ||
1156 | /* | 1161 | /* |
1157 | * Return true only if the page has been allocated with | 1162 | * Return true only if the page has been allocated with |
@@ -2420,6 +2425,44 @@ int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, | |||
2420 | pfn_t pfn); | 2425 | pfn_t pfn); |
2421 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); | 2426 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
2422 | 2427 | ||
2428 | static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, | ||
2429 | unsigned long addr, struct page *page) | ||
2430 | { | ||
2431 | int err = vm_insert_page(vma, addr, page); | ||
2432 | |||
2433 | if (err == -ENOMEM) | ||
2434 | return VM_FAULT_OOM; | ||
2435 | if (err < 0 && err != -EBUSY) | ||
2436 | return VM_FAULT_SIGBUS; | ||
2437 | |||
2438 | return VM_FAULT_NOPAGE; | ||
2439 | } | ||
2440 | |||
2441 | static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, | ||
2442 | unsigned long addr, pfn_t pfn) | ||
2443 | { | ||
2444 | int err = vm_insert_mixed(vma, addr, pfn); | ||
2445 | |||
2446 | if (err == -ENOMEM) | ||
2447 | return VM_FAULT_OOM; | ||
2448 | if (err < 0 && err != -EBUSY) | ||
2449 | return VM_FAULT_SIGBUS; | ||
2450 | |||
2451 | return VM_FAULT_NOPAGE; | ||
2452 | } | ||
2453 | |||
2454 | static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, | ||
2455 | unsigned long addr, unsigned long pfn) | ||
2456 | { | ||
2457 | int err = vm_insert_pfn(vma, addr, pfn); | ||
2458 | |||
2459 | if (err == -ENOMEM) | ||
2460 | return VM_FAULT_OOM; | ||
2461 | if (err < 0 && err != -EBUSY) | ||
2462 | return VM_FAULT_SIGBUS; | ||
2463 | |||
2464 | return VM_FAULT_NOPAGE; | ||
2465 | } | ||
2423 | 2466 | ||
2424 | struct page *follow_page_mask(struct vm_area_struct *vma, | 2467 | struct page *follow_page_mask(struct vm_area_struct *vma, |
2425 | unsigned long address, unsigned int foll_flags, | 2468 | unsigned long address, unsigned int foll_flags, |
@@ -2589,7 +2632,7 @@ extern int get_hwpoison_page(struct page *page); | |||
2589 | extern int sysctl_memory_failure_early_kill; | 2632 | extern int sysctl_memory_failure_early_kill; |
2590 | extern int sysctl_memory_failure_recovery; | 2633 | extern int sysctl_memory_failure_recovery; |
2591 | extern void shake_page(struct page *p, int access); | 2634 | extern void shake_page(struct page *p, int access); |
2592 | extern atomic_long_t num_poisoned_pages; | 2635 | extern atomic_long_t num_poisoned_pages __read_mostly; |
2593 | extern int soft_offline_page(struct page *page, int flags); | 2636 | extern int soft_offline_page(struct page *page, int flags); |
2594 | 2637 | ||
2595 | 2638 | ||
@@ -2611,6 +2654,7 @@ enum mf_action_page_type { | |||
2611 | MF_MSG_POISONED_HUGE, | 2654 | MF_MSG_POISONED_HUGE, |
2612 | MF_MSG_HUGE, | 2655 | MF_MSG_HUGE, |
2613 | MF_MSG_FREE_HUGE, | 2656 | MF_MSG_FREE_HUGE, |
2657 | MF_MSG_NON_PMD_HUGE, | ||
2614 | MF_MSG_UNMAP_FAILED, | 2658 | MF_MSG_UNMAP_FAILED, |
2615 | MF_MSG_DIRTY_SWAPCACHE, | 2659 | MF_MSG_DIRTY_SWAPCACHE, |
2616 | MF_MSG_CLEAN_SWAPCACHE, | 2660 | MF_MSG_CLEAN_SWAPCACHE, |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fd1af6b9591d..21612347d311 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #endif | 22 | #endif |
23 | #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) | 23 | #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) |
24 | 24 | ||
25 | typedef int vm_fault_t; | ||
26 | |||
25 | struct address_space; | 27 | struct address_space; |
26 | struct mem_cgroup; | 28 | struct mem_cgroup; |
27 | struct hmm; | 29 | struct hmm; |
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 57b0030d3800..2ad72d2c8cc5 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h | |||
@@ -37,10 +37,10 @@ void dump_mm(const struct mm_struct *mm); | |||
37 | BUG(); \ | 37 | BUG(); \ |
38 | } \ | 38 | } \ |
39 | } while (0) | 39 | } while (0) |
40 | #define VM_WARN_ON(cond) WARN_ON(cond) | 40 | #define VM_WARN_ON(cond) (void)WARN_ON(cond) |
41 | #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) | 41 | #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) |
42 | #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) | 42 | #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) |
43 | #define VM_WARN(cond, format...) WARN(cond, format) | 43 | #define VM_WARN(cond, format...) (void)WARN(cond, format) |
44 | #else | 44 | #else |
45 | #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) | 45 | #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) |
46 | #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) | 46 | #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a2db4576e499..f11ae29005f1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -633,14 +633,15 @@ typedef struct pglist_data { | |||
633 | #ifndef CONFIG_NO_BOOTMEM | 633 | #ifndef CONFIG_NO_BOOTMEM |
634 | struct bootmem_data *bdata; | 634 | struct bootmem_data *bdata; |
635 | #endif | 635 | #endif |
636 | #ifdef CONFIG_MEMORY_HOTPLUG | 636 | #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) |
637 | /* | 637 | /* |
638 | * Must be held any time you expect node_start_pfn, node_present_pages | 638 | * Must be held any time you expect node_start_pfn, node_present_pages |
639 | * or node_spanned_pages stay constant. Holding this will also | 639 | * or node_spanned_pages stay constant. Holding this will also |
640 | * guarantee that any pfn_valid() stays that way. | 640 | * guarantee that any pfn_valid() stays that way. |
641 | * | 641 | * |
642 | * pgdat_resize_lock() and pgdat_resize_unlock() are provided to | 642 | * pgdat_resize_lock() and pgdat_resize_unlock() are provided to |
643 | * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. | 643 | * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG |
644 | * or CONFIG_DEFERRED_STRUCT_PAGE_INIT. | ||
644 | * | 645 | * |
645 | * Nests above zone->lock and zone->span_seqlock | 646 | * Nests above zone->lock and zone->span_seqlock |
646 | */ | 647 | */ |
@@ -775,7 +776,8 @@ static inline bool is_dev_zone(const struct zone *zone) | |||
775 | #include <linux/memory_hotplug.h> | 776 | #include <linux/memory_hotplug.h> |
776 | 777 | ||
777 | void build_all_zonelists(pg_data_t *pgdat); | 778 | void build_all_zonelists(pg_data_t *pgdat); |
778 | void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); | 779 | void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, |
780 | enum zone_type classzone_idx); | ||
779 | bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, | 781 | bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, |
780 | int classzone_idx, unsigned int alloc_flags, | 782 | int classzone_idx, unsigned int alloc_flags, |
781 | long free_pages); | 783 | long free_pages); |
diff --git a/include/linux/node.h b/include/linux/node.h index 4ece0fee0ffc..41f171861dcc 100644 --- a/include/linux/node.h +++ b/include/linux/node.h | |||
@@ -67,7 +67,7 @@ extern void unregister_one_node(int nid); | |||
67 | extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); | 67 | extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); |
68 | extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); | 68 | extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); |
69 | extern int register_mem_sect_under_node(struct memory_block *mem_blk, | 69 | extern int register_mem_sect_under_node(struct memory_block *mem_blk, |
70 | int nid); | 70 | int nid, bool check_nid); |
71 | extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, | 71 | extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, |
72 | unsigned long phys_index); | 72 | unsigned long phys_index); |
73 | 73 | ||
@@ -97,7 +97,7 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) | |||
97 | return 0; | 97 | return 0; |
98 | } | 98 | } |
99 | static inline int register_mem_sect_under_node(struct memory_block *mem_blk, | 99 | static inline int register_mem_sect_under_node(struct memory_block *mem_blk, |
100 | int nid) | 100 | int nid, bool check_nid) |
101 | { | 101 | { |
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 50c2b8786831..e34a27727b9a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -156,9 +156,18 @@ static __always_inline int PageCompound(struct page *page) | |||
156 | return test_bit(PG_head, &page->flags) || PageTail(page); | 156 | return test_bit(PG_head, &page->flags) || PageTail(page); |
157 | } | 157 | } |
158 | 158 | ||
159 | #define PAGE_POISON_PATTERN -1l | ||
160 | static inline int PagePoisoned(const struct page *page) | ||
161 | { | ||
162 | return page->flags == PAGE_POISON_PATTERN; | ||
163 | } | ||
164 | |||
159 | /* | 165 | /* |
160 | * Page flags policies wrt compound pages | 166 | * Page flags policies wrt compound pages |
161 | * | 167 | * |
168 | * PF_POISONED_CHECK | ||
169 | * check if this struct page poisoned/uninitialized | ||
170 | * | ||
162 | * PF_ANY: | 171 | * PF_ANY: |
163 | * the page flag is relevant for small, head and tail pages. | 172 | * the page flag is relevant for small, head and tail pages. |
164 | * | 173 | * |
@@ -176,17 +185,20 @@ static __always_inline int PageCompound(struct page *page) | |||
176 | * PF_NO_COMPOUND: | 185 | * PF_NO_COMPOUND: |
177 | * the page flag is not relevant for compound pages. | 186 | * the page flag is not relevant for compound pages. |
178 | */ | 187 | */ |
179 | #define PF_ANY(page, enforce) page | 188 | #define PF_POISONED_CHECK(page) ({ \ |
180 | #define PF_HEAD(page, enforce) compound_head(page) | 189 | VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ |
190 | page; }) | ||
191 | #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) | ||
192 | #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) | ||
181 | #define PF_ONLY_HEAD(page, enforce) ({ \ | 193 | #define PF_ONLY_HEAD(page, enforce) ({ \ |
182 | VM_BUG_ON_PGFLAGS(PageTail(page), page); \ | 194 | VM_BUG_ON_PGFLAGS(PageTail(page), page); \ |
183 | page;}) | 195 | PF_POISONED_CHECK(page); }) |
184 | #define PF_NO_TAIL(page, enforce) ({ \ | 196 | #define PF_NO_TAIL(page, enforce) ({ \ |
185 | VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ | 197 | VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ |
186 | compound_head(page);}) | 198 | PF_POISONED_CHECK(compound_head(page)); }) |
187 | #define PF_NO_COMPOUND(page, enforce) ({ \ | 199 | #define PF_NO_COMPOUND(page, enforce) ({ \ |
188 | VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ | 200 | VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ |
189 | page;}) | 201 | PF_POISONED_CHECK(page); }) |
190 | 202 | ||
191 | /* | 203 | /* |
192 | * Macros to create function definitions for page flags | 204 | * Macros to create function definitions for page flags |
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 760d74a0e9a9..14d14beb1f7f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h | |||
@@ -175,8 +175,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) | |||
175 | VM_BUG_ON_PAGE(page_count(page) != 0, page); | 175 | VM_BUG_ON_PAGE(page_count(page) != 0, page); |
176 | VM_BUG_ON(count == 0); | 176 | VM_BUG_ON(count == 0); |
177 | 177 | ||
178 | smp_mb(); | 178 | atomic_set_release(&page->_refcount, count); |
179 | atomic_set(&page->_refcount, count); | ||
180 | if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) | 179 | if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) |
181 | __page_ref_unfreeze(page, count); | 180 | __page_ref_unfreeze(page, count); |
182 | } | 181 | } |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 231abc8976c5..81ebd71f8c03 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -125,7 +125,6 @@ | |||
125 | #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ | 125 | #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ |
126 | (unsigned long)ZERO_SIZE_PTR) | 126 | (unsigned long)ZERO_SIZE_PTR) |
127 | 127 | ||
128 | #include <linux/kmemleak.h> | ||
129 | #include <linux/kasan.h> | 128 | #include <linux/kasan.h> |
130 | 129 | ||
131 | struct mem_cgroup; | 130 | struct mem_cgroup; |
@@ -137,12 +136,13 @@ bool slab_is_available(void); | |||
137 | 136 | ||
138 | extern bool usercopy_fallback; | 137 | extern bool usercopy_fallback; |
139 | 138 | ||
140 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 139 | struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, |
141 | size_t align, slab_flags_t flags, | 140 | unsigned int align, slab_flags_t flags, |
142 | void (*ctor)(void *)); | 141 | void (*ctor)(void *)); |
143 | struct kmem_cache *kmem_cache_create_usercopy(const char *name, | 142 | struct kmem_cache *kmem_cache_create_usercopy(const char *name, |
144 | size_t size, size_t align, slab_flags_t flags, | 143 | unsigned int size, unsigned int align, |
145 | size_t useroffset, size_t usersize, | 144 | slab_flags_t flags, |
145 | unsigned int useroffset, unsigned int usersize, | ||
146 | void (*ctor)(void *)); | 146 | void (*ctor)(void *)); |
147 | void kmem_cache_destroy(struct kmem_cache *); | 147 | void kmem_cache_destroy(struct kmem_cache *); |
148 | int kmem_cache_shrink(struct kmem_cache *); | 148 | int kmem_cache_shrink(struct kmem_cache *); |
@@ -308,7 +308,7 @@ extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; | |||
308 | * 2 = 129 .. 192 bytes | 308 | * 2 = 129 .. 192 bytes |
309 | * n = 2^(n-1)+1 .. 2^n | 309 | * n = 2^(n-1)+1 .. 2^n |
310 | */ | 310 | */ |
311 | static __always_inline int kmalloc_index(size_t size) | 311 | static __always_inline unsigned int kmalloc_index(size_t size) |
312 | { | 312 | { |
313 | if (!size) | 313 | if (!size) |
314 | return 0; | 314 | return 0; |
@@ -504,7 +504,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) | |||
504 | return kmalloc_large(size, flags); | 504 | return kmalloc_large(size, flags); |
505 | #ifndef CONFIG_SLOB | 505 | #ifndef CONFIG_SLOB |
506 | if (!(flags & GFP_DMA)) { | 506 | if (!(flags & GFP_DMA)) { |
507 | int index = kmalloc_index(size); | 507 | unsigned int index = kmalloc_index(size); |
508 | 508 | ||
509 | if (!index) | 509 | if (!index) |
510 | return ZERO_SIZE_PTR; | 510 | return ZERO_SIZE_PTR; |
@@ -522,11 +522,11 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) | |||
522 | * return size or 0 if a kmalloc cache for that | 522 | * return size or 0 if a kmalloc cache for that |
523 | * size does not exist | 523 | * size does not exist |
524 | */ | 524 | */ |
525 | static __always_inline int kmalloc_size(int n) | 525 | static __always_inline unsigned int kmalloc_size(unsigned int n) |
526 | { | 526 | { |
527 | #ifndef CONFIG_SLOB | 527 | #ifndef CONFIG_SLOB |
528 | if (n > 2) | 528 | if (n > 2) |
529 | return 1 << n; | 529 | return 1U << n; |
530 | 530 | ||
531 | if (n == 1 && KMALLOC_MIN_SIZE <= 32) | 531 | if (n == 1 && KMALLOC_MIN_SIZE <= 32) |
532 | return 96; | 532 | return 96; |
@@ -542,7 +542,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) | |||
542 | #ifndef CONFIG_SLOB | 542 | #ifndef CONFIG_SLOB |
543 | if (__builtin_constant_p(size) && | 543 | if (__builtin_constant_p(size) && |
544 | size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { | 544 | size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { |
545 | int i = kmalloc_index(size); | 545 | unsigned int i = kmalloc_index(size); |
546 | 546 | ||
547 | if (!i) | 547 | if (!i) |
548 | return ZERO_SIZE_PTR; | 548 | return ZERO_SIZE_PTR; |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 7385547c04b1..d9228e4d0320 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -85,8 +85,8 @@ struct kmem_cache { | |||
85 | unsigned int *random_seq; | 85 | unsigned int *random_seq; |
86 | #endif | 86 | #endif |
87 | 87 | ||
88 | size_t useroffset; /* Usercopy region offset */ | 88 | unsigned int useroffset; /* Usercopy region offset */ |
89 | size_t usersize; /* Usercopy region size */ | 89 | unsigned int usersize; /* Usercopy region size */ |
90 | 90 | ||
91 | struct kmem_cache_node *node[MAX_NUMNODES]; | 91 | struct kmem_cache_node *node[MAX_NUMNODES]; |
92 | }; | 92 | }; |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 8ad99c47b19c..3773e26c08c1 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -73,7 +73,7 @@ struct kmem_cache_cpu { | |||
73 | * given order would contain. | 73 | * given order would contain. |
74 | */ | 74 | */ |
75 | struct kmem_cache_order_objects { | 75 | struct kmem_cache_order_objects { |
76 | unsigned long x; | 76 | unsigned int x; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | /* | 79 | /* |
@@ -84,11 +84,12 @@ struct kmem_cache { | |||
84 | /* Used for retriving partial slabs etc */ | 84 | /* Used for retriving partial slabs etc */ |
85 | slab_flags_t flags; | 85 | slab_flags_t flags; |
86 | unsigned long min_partial; | 86 | unsigned long min_partial; |
87 | int size; /* The size of an object including meta data */ | 87 | unsigned int size; /* The size of an object including meta data */ |
88 | int object_size; /* The size of an object without meta data */ | 88 | unsigned int object_size;/* The size of an object without meta data */ |
89 | int offset; /* Free pointer offset. */ | 89 | unsigned int offset; /* Free pointer offset. */ |
90 | #ifdef CONFIG_SLUB_CPU_PARTIAL | 90 | #ifdef CONFIG_SLUB_CPU_PARTIAL |
91 | int cpu_partial; /* Number of per cpu partial objects to keep around */ | 91 | /* Number of per cpu partial objects to keep around */ |
92 | unsigned int cpu_partial; | ||
92 | #endif | 93 | #endif |
93 | struct kmem_cache_order_objects oo; | 94 | struct kmem_cache_order_objects oo; |
94 | 95 | ||
@@ -98,10 +99,10 @@ struct kmem_cache { | |||
98 | gfp_t allocflags; /* gfp flags to use on each alloc */ | 99 | gfp_t allocflags; /* gfp flags to use on each alloc */ |
99 | int refcount; /* Refcount for slab cache destroy */ | 100 | int refcount; /* Refcount for slab cache destroy */ |
100 | void (*ctor)(void *); | 101 | void (*ctor)(void *); |
101 | int inuse; /* Offset to metadata */ | 102 | unsigned int inuse; /* Offset to metadata */ |
102 | int align; /* Alignment */ | 103 | unsigned int align; /* Alignment */ |
103 | int reserved; /* Reserved bytes at the end of slabs */ | 104 | unsigned int reserved; /* Reserved bytes at the end of slabs */ |
104 | int red_left_pad; /* Left redzone padding size */ | 105 | unsigned int red_left_pad; /* Left redzone padding size */ |
105 | const char *name; /* Name (only for display!) */ | 106 | const char *name; /* Name (only for display!) */ |
106 | struct list_head list; /* List of slab caches */ | 107 | struct list_head list; /* List of slab caches */ |
107 | #ifdef CONFIG_SYSFS | 108 | #ifdef CONFIG_SYSFS |
@@ -110,7 +111,8 @@ struct kmem_cache { | |||
110 | #endif | 111 | #endif |
111 | #ifdef CONFIG_MEMCG | 112 | #ifdef CONFIG_MEMCG |
112 | struct memcg_cache_params memcg_params; | 113 | struct memcg_cache_params memcg_params; |
113 | int max_attr_size; /* for propagation, maximum size of a stored attr */ | 114 | /* for propagation, maximum size of a stored attr */ |
115 | unsigned int max_attr_size; | ||
114 | #ifdef CONFIG_SYSFS | 116 | #ifdef CONFIG_SYSFS |
115 | struct kset *memcg_kset; | 117 | struct kset *memcg_kset; |
116 | #endif | 118 | #endif |
@@ -124,7 +126,7 @@ struct kmem_cache { | |||
124 | /* | 126 | /* |
125 | * Defragmentation by allocating from a remote node. | 127 | * Defragmentation by allocating from a remote node. |
126 | */ | 128 | */ |
127 | int remote_node_defrag_ratio; | 129 | unsigned int remote_node_defrag_ratio; |
128 | #endif | 130 | #endif |
129 | 131 | ||
130 | #ifdef CONFIG_SLAB_FREELIST_RANDOM | 132 | #ifdef CONFIG_SLAB_FREELIST_RANDOM |
@@ -135,8 +137,8 @@ struct kmem_cache { | |||
135 | struct kasan_cache kasan_info; | 137 | struct kasan_cache kasan_info; |
136 | #endif | 138 | #endif |
137 | 139 | ||
138 | size_t useroffset; /* Usercopy region offset */ | 140 | unsigned int useroffset; /* Usercopy region offset */ |
139 | size_t usersize; /* Usercopy region size */ | 141 | unsigned int usersize; /* Usercopy region size */ |
140 | 142 | ||
141 | struct kmem_cache_node *node[MAX_NUMNODES]; | 143 | struct kmem_cache_node *node[MAX_NUMNODES]; |
142 | }; | 144 | }; |
diff --git a/include/linux/swap.h b/include/linux/swap.h index a1a3f4ed94ce..2417d288e016 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -400,7 +400,6 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, | |||
400 | #define SWAP_ADDRESS_SPACE_SHIFT 14 | 400 | #define SWAP_ADDRESS_SPACE_SHIFT 14 |
401 | #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) | 401 | #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) |
402 | extern struct address_space *swapper_spaces[]; | 402 | extern struct address_space *swapper_spaces[]; |
403 | extern bool swap_vma_readahead; | ||
404 | #define swap_address_space(entry) \ | 403 | #define swap_address_space(entry) \ |
405 | (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ | 404 | (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ |
406 | >> SWAP_ADDRESS_SPACE_SHIFT]) | 405 | >> SWAP_ADDRESS_SPACE_SHIFT]) |
@@ -422,14 +421,10 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, | |||
422 | extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, | 421 | extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, |
423 | struct vm_area_struct *vma, unsigned long addr, | 422 | struct vm_area_struct *vma, unsigned long addr, |
424 | bool *new_page_allocated); | 423 | bool *new_page_allocated); |
425 | extern struct page *swapin_readahead(swp_entry_t, gfp_t, | 424 | extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, |
426 | struct vm_area_struct *vma, unsigned long addr); | 425 | struct vm_fault *vmf); |
427 | 426 | extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, | |
428 | extern struct page *swap_readahead_detect(struct vm_fault *vmf, | 427 | struct vm_fault *vmf); |
429 | struct vma_swap_readahead *swap_ra); | ||
430 | extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | ||
431 | struct vm_fault *vmf, | ||
432 | struct vma_swap_readahead *swap_ra); | ||
433 | 428 | ||
434 | /* linux/mm/swapfile.c */ | 429 | /* linux/mm/swapfile.c */ |
435 | extern atomic_long_t nr_swap_pages; | 430 | extern atomic_long_t nr_swap_pages; |
@@ -437,11 +432,6 @@ extern long total_swap_pages; | |||
437 | extern atomic_t nr_rotate_swap; | 432 | extern atomic_t nr_rotate_swap; |
438 | extern bool has_usable_swap(void); | 433 | extern bool has_usable_swap(void); |
439 | 434 | ||
440 | static inline bool swap_use_vma_readahead(void) | ||
441 | { | ||
442 | return READ_ONCE(swap_vma_readahead) && !atomic_read(&nr_rotate_swap); | ||
443 | } | ||
444 | |||
445 | /* Swap 50% full? Release swapcache more aggressively.. */ | 435 | /* Swap 50% full? Release swapcache more aggressively.. */ |
446 | static inline bool vm_swap_full(void) | 436 | static inline bool vm_swap_full(void) |
447 | { | 437 | { |
@@ -537,26 +527,14 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp) | |||
537 | { | 527 | { |
538 | } | 528 | } |
539 | 529 | ||
540 | static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, | 530 | static inline struct page *swap_cluster_readahead(swp_entry_t entry, |
541 | struct vm_area_struct *vma, unsigned long addr) | 531 | gfp_t gfp_mask, struct vm_fault *vmf) |
542 | { | 532 | { |
543 | return NULL; | 533 | return NULL; |
544 | } | 534 | } |
545 | 535 | ||
546 | static inline bool swap_use_vma_readahead(void) | 536 | static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, |
547 | { | 537 | struct vm_fault *vmf) |
548 | return false; | ||
549 | } | ||
550 | |||
551 | static inline struct page *swap_readahead_detect( | ||
552 | struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) | ||
553 | { | ||
554 | return NULL; | ||
555 | } | ||
556 | |||
557 | static inline struct page *do_swap_page_readahead( | ||
558 | swp_entry_t fentry, gfp_t gfp_mask, | ||
559 | struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) | ||
560 | { | 538 | { |
561 | return NULL; | 539 | return NULL; |
562 | } | 540 | } |
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 57a8e98f2708..2219cce81ca4 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h | |||
@@ -47,6 +47,8 @@ void zs_destroy_pool(struct zs_pool *pool); | |||
47 | unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); | 47 | unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); |
48 | void zs_free(struct zs_pool *pool, unsigned long obj); | 48 | void zs_free(struct zs_pool *pool, unsigned long obj); |
49 | 49 | ||
50 | size_t zs_huge_class_size(struct zs_pool *pool); | ||
51 | |||
50 | void *zs_map_object(struct zs_pool *pool, unsigned long handle, | 52 | void *zs_map_object(struct zs_pool *pool, unsigned long handle, |
51 | enum zs_mapmode mm); | 53 | enum zs_mapmode mm); |
52 | void zs_unmap_object(struct zs_pool *pool, unsigned long handle); | 54 | void zs_unmap_object(struct zs_pool *pool, unsigned long handle); |
diff --git a/include/net/sock.h b/include/net/sock.h index 49bd2c1796b0..74d725fdbe0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1114,8 +1114,8 @@ struct proto { | |||
1114 | struct kmem_cache *slab; | 1114 | struct kmem_cache *slab; |
1115 | unsigned int obj_size; | 1115 | unsigned int obj_size; |
1116 | slab_flags_t slab_flags; | 1116 | slab_flags_t slab_flags; |
1117 | size_t useroffset; /* Usercopy region offset */ | 1117 | unsigned int useroffset; /* Usercopy region offset */ |
1118 | size_t usersize; /* Usercopy region size */ | 1118 | unsigned int usersize; /* Usercopy region size */ |
1119 | 1119 | ||
1120 | struct percpu_counter *orphan_count; | 1120 | struct percpu_counter *orphan_count; |
1121 | 1121 | ||
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index bcf4daccd6be..711372845945 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h | |||
@@ -20,7 +20,7 @@ | |||
20 | EM( MR_SYSCALL, "syscall_or_cpuset") \ | 20 | EM( MR_SYSCALL, "syscall_or_cpuset") \ |
21 | EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ | 21 | EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ |
22 | EM( MR_NUMA_MISPLACED, "numa_misplaced") \ | 22 | EM( MR_NUMA_MISPLACED, "numa_misplaced") \ |
23 | EMe(MR_CMA, "cma") | 23 | EMe(MR_CONTIG_RANGE, "contig_range") |
24 | 24 | ||
25 | /* | 25 | /* |
26 | * First define the enums in the above macros to be exported to userspace | 26 | * First define the enums in the above macros to be exported to userspace |
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index e0b8b9173e1c..6570c5b45ba1 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -78,26 +78,29 @@ TRACE_EVENT(mm_vmscan_kswapd_wake, | |||
78 | 78 | ||
79 | TRACE_EVENT(mm_vmscan_wakeup_kswapd, | 79 | TRACE_EVENT(mm_vmscan_wakeup_kswapd, |
80 | 80 | ||
81 | TP_PROTO(int nid, int zid, int order), | 81 | TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags), |
82 | 82 | ||
83 | TP_ARGS(nid, zid, order), | 83 | TP_ARGS(nid, zid, order, gfp_flags), |
84 | 84 | ||
85 | TP_STRUCT__entry( | 85 | TP_STRUCT__entry( |
86 | __field( int, nid ) | 86 | __field( int, nid ) |
87 | __field( int, zid ) | 87 | __field( int, zid ) |
88 | __field( int, order ) | 88 | __field( int, order ) |
89 | __field( gfp_t, gfp_flags ) | ||
89 | ), | 90 | ), |
90 | 91 | ||
91 | TP_fast_assign( | 92 | TP_fast_assign( |
92 | __entry->nid = nid; | 93 | __entry->nid = nid; |
93 | __entry->zid = zid; | 94 | __entry->zid = zid; |
94 | __entry->order = order; | 95 | __entry->order = order; |
96 | __entry->gfp_flags = gfp_flags; | ||
95 | ), | 97 | ), |
96 | 98 | ||
97 | TP_printk("nid=%d zid=%d order=%d", | 99 | TP_printk("nid=%d zid=%d order=%d gfp_flags=%s", |
98 | __entry->nid, | 100 | __entry->nid, |
99 | __entry->zid, | 101 | __entry->zid, |
100 | __entry->order) | 102 | __entry->order, |
103 | show_gfp_flags(__entry->gfp_flags)) | ||
101 | ); | 104 | ); |
102 | 105 | ||
103 | DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, | 106 | DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, |
diff --git a/kernel/fork.c b/kernel/fork.c index f71b67dc156d..242c8c93d285 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -595,6 +595,8 @@ static void check_mm(struct mm_struct *mm) | |||
595 | void __mmdrop(struct mm_struct *mm) | 595 | void __mmdrop(struct mm_struct *mm) |
596 | { | 596 | { |
597 | BUG_ON(mm == &init_mm); | 597 | BUG_ON(mm == &init_mm); |
598 | WARN_ON_ONCE(mm == current->mm); | ||
599 | WARN_ON_ONCE(mm == current->active_mm); | ||
598 | mm_free_pgd(mm); | 600 | mm_free_pgd(mm); |
599 | destroy_context(mm); | 601 | destroy_context(mm); |
600 | hmm_mm_destroy(mm); | 602 | hmm_mm_destroy(mm); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 28b68995a417..e8afd6086f23 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5560,6 +5560,7 @@ void idle_task_exit(void) | |||
5560 | 5560 | ||
5561 | if (mm != &init_mm) { | 5561 | if (mm != &init_mm) { |
5562 | switch_mm(mm, &init_mm, current); | 5562 | switch_mm(mm, &init_mm, current); |
5563 | current->active_mm = &init_mm; | ||
5563 | finish_arch_post_lock_switch(); | 5564 | finish_arch_post_lock_switch(); |
5564 | } | 5565 | } |
5565 | mmdrop(mm); | 5566 | mmdrop(mm); |
diff --git a/kernel/ucount.c b/kernel/ucount.c index b4eeee03934f..f48d1b6376a4 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/cred.h> | 11 | #include <linux/cred.h> |
12 | #include <linux/hash.h> | 12 | #include <linux/hash.h> |
13 | #include <linux/kmemleak.h> | ||
13 | #include <linux/user_namespace.h> | 14 | #include <linux/user_namespace.h> |
14 | 15 | ||
15 | #define UCOUNTS_HASHTABLE_BITS 10 | 16 | #define UCOUNTS_HASHTABLE_BITS 10 |
diff --git a/lib/bitmap.c b/lib/bitmap.c index 9e498c77ed0e..a42eff7e8c48 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
607 | /* if no digit is after '-', it's wrong*/ | 607 | /* if no digit is after '-', it's wrong*/ |
608 | if (at_start && in_range) | 608 | if (at_start && in_range) |
609 | return -EINVAL; | 609 | return -EINVAL; |
610 | if (!(a <= b) || !(used_size <= group_size)) | 610 | if (!(a <= b) || group_size == 0 || !(used_size <= group_size)) |
611 | return -EINVAL; | 611 | return -EINVAL; |
612 | if (b >= nmaskbits) | 612 | if (b >= nmaskbits) |
613 | return -ERANGE; | 613 | return -ERANGE; |
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index b3f235baa05d..413367cf569e 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c | |||
@@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { | |||
255 | {-EINVAL, "-1", NULL, 8, 0}, | 255 | {-EINVAL, "-1", NULL, 8, 0}, |
256 | {-EINVAL, "-0", NULL, 8, 0}, | 256 | {-EINVAL, "-0", NULL, 8, 0}, |
257 | {-EINVAL, "10-1", NULL, 8, 0}, | 257 | {-EINVAL, "10-1", NULL, 8, 0}, |
258 | {-EINVAL, "0-31:", NULL, 8, 0}, | ||
259 | {-EINVAL, "0-31:0", NULL, 8, 0}, | ||
260 | {-EINVAL, "0-31:0/0", NULL, 8, 0}, | ||
261 | {-EINVAL, "0-31:1/0", NULL, 8, 0}, | ||
258 | {-EINVAL, "0-31:10/1", NULL, 8, 0}, | 262 | {-EINVAL, "0-31:10/1", NULL, 8, 0}, |
259 | }; | 263 | }; |
260 | 264 | ||
diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 078a61480573..cee000ac54d8 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/delay.h> | 22 | #include <linux/delay.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/vmalloc.h> | ||
24 | 25 | ||
25 | #define TEST_FIRMWARE_NAME "test-firmware.bin" | 26 | #define TEST_FIRMWARE_NAME "test-firmware.bin" |
26 | #define TEST_FIRMWARE_NUM_REQS 4 | 27 | #define TEST_FIRMWARE_NUM_REQS 4 |
diff --git a/mm/Makefile b/mm/Makefile index e669f02c5a54..b4e54a9ae9c5 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ | |||
37 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 37 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
38 | util.o mmzone.o vmstat.o backing-dev.o \ | 38 | util.o mmzone.o vmstat.o backing-dev.o \ |
39 | mm_init.o mmu_context.o percpu.o slab_common.o \ | 39 | mm_init.o mmu_context.o percpu.o slab_common.o \ |
40 | compaction.o vmacache.o swap_slots.o \ | 40 | compaction.o vmacache.o \ |
41 | interval_tree.o list_lru.o workingset.o \ | 41 | interval_tree.o list_lru.o workingset.o \ |
42 | debug.o $(mmu-y) | 42 | debug.o $(mmu-y) |
43 | 43 | ||
@@ -55,7 +55,7 @@ ifdef CONFIG_MMU | |||
55 | endif | 55 | endif |
56 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 56 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
57 | 57 | ||
58 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o | 58 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o |
59 | obj-$(CONFIG_FRONTSWAP) += frontswap.o | 59 | obj-$(CONFIG_FRONTSWAP) += frontswap.o |
60 | obj-$(CONFIG_ZSWAP) += zswap.o | 60 | obj-$(CONFIG_ZSWAP) += zswap.o |
61 | obj-$(CONFIG_HAS_DMA) += dmapool.o | 61 | obj-$(CONFIG_HAS_DMA) += dmapool.o |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d2984e9fcf08..08b9aab631ab 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -100,18 +100,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) | |||
100 | 100 | ||
101 | return 0; | 101 | return 0; |
102 | } | 102 | } |
103 | 103 | DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats); | |
104 | static int bdi_debug_stats_open(struct inode *inode, struct file *file) | ||
105 | { | ||
106 | return single_open(file, bdi_debug_stats_show, inode->i_private); | ||
107 | } | ||
108 | |||
109 | static const struct file_operations bdi_debug_stats_fops = { | ||
110 | .open = bdi_debug_stats_open, | ||
111 | .read = seq_read, | ||
112 | .llseek = seq_lseek, | ||
113 | .release = single_release, | ||
114 | }; | ||
115 | 104 | ||
116 | static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) | 105 | static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) |
117 | { | 106 | { |
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/cma.h> | 35 | #include <linux/cma.h> |
36 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
37 | #include <linux/io.h> | 37 | #include <linux/io.h> |
38 | #include <linux/kmemleak.h> | ||
38 | #include <trace/events/cma.h> | 39 | #include <trace/events/cma.h> |
39 | 40 | ||
40 | #include "cma.h" | 41 | #include "cma.h" |
@@ -165,6 +166,9 @@ core_initcall(cma_init_reserved_areas); | |||
165 | * @base: Base address of the reserved area | 166 | * @base: Base address of the reserved area |
166 | * @size: Size of the reserved area (in bytes), | 167 | * @size: Size of the reserved area (in bytes), |
167 | * @order_per_bit: Order of pages represented by one bit on bitmap. | 168 | * @order_per_bit: Order of pages represented by one bit on bitmap. |
169 | * @name: The name of the area. If this parameter is NULL, the name of | ||
170 | * the area will be set to "cmaN", where N is a running counter of | ||
171 | * used areas. | ||
168 | * @res_cma: Pointer to store the created cma region. | 172 | * @res_cma: Pointer to store the created cma region. |
169 | * | 173 | * |
170 | * This function creates custom contiguous area from already reserved memory. | 174 | * This function creates custom contiguous area from already reserved memory. |
@@ -227,6 +231,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, | |||
227 | * @alignment: Alignment for the CMA area, should be power of 2 or zero | 231 | * @alignment: Alignment for the CMA area, should be power of 2 or zero |
228 | * @order_per_bit: Order of pages represented by one bit on bitmap. | 232 | * @order_per_bit: Order of pages represented by one bit on bitmap. |
229 | * @fixed: hint about where to place the reserved area | 233 | * @fixed: hint about where to place the reserved area |
234 | * @name: The name of the area. See function cma_init_reserved_mem() | ||
230 | * @res_cma: Pointer to store the created cma region. | 235 | * @res_cma: Pointer to store the created cma region. |
231 | * | 236 | * |
232 | * This function reserves memory from early allocator. It should be | 237 | * This function reserves memory from early allocator. It should be |
@@ -390,6 +395,7 @@ static inline void cma_debug_show_areas(struct cma *cma) { } | |||
390 | * @cma: Contiguous memory region for which the allocation is performed. | 395 | * @cma: Contiguous memory region for which the allocation is performed. |
391 | * @count: Requested number of pages. | 396 | * @count: Requested number of pages. |
392 | * @align: Requested alignment of pages (in PAGE_SIZE order). | 397 | * @align: Requested alignment of pages (in PAGE_SIZE order). |
398 | * @gfp_mask: GFP mask to use during compaction | ||
393 | * | 399 | * |
394 | * This function allocates part of contiguous memory on specific | 400 | * This function allocates part of contiguous memory on specific |
395 | * contiguous memory area. | 401 | * contiguous memory area. |
diff --git a/mm/compaction.c b/mm/compaction.c index 2c8999d027ab..88d01a50a015 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -576,6 +576,7 @@ isolate_fail: | |||
576 | 576 | ||
577 | /** | 577 | /** |
578 | * isolate_freepages_range() - isolate free pages. | 578 | * isolate_freepages_range() - isolate free pages. |
579 | * @cc: Compaction control structure. | ||
579 | * @start_pfn: The first PFN to start isolating. | 580 | * @start_pfn: The first PFN to start isolating. |
580 | * @end_pfn: The one-past-last PFN. | 581 | * @end_pfn: The one-past-last PFN. |
581 | * | 582 | * |
@@ -1988,6 +1989,14 @@ static void kcompactd_do_work(pg_data_t *pgdat) | |||
1988 | compaction_defer_reset(zone, cc.order, false); | 1989 | compaction_defer_reset(zone, cc.order, false); |
1989 | } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { | 1990 | } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { |
1990 | /* | 1991 | /* |
1992 | * Buddy pages may become stranded on pcps that could | ||
1993 | * otherwise coalesce on the zone's free area for | ||
1994 | * order >= cc.order. This is ratelimited by the | ||
1995 | * upcoming deferral. | ||
1996 | */ | ||
1997 | drain_all_pages(zone); | ||
1998 | |||
1999 | /* | ||
1991 | * We use sync migration mode here, so we defer like | 2000 | * We use sync migration mode here, so we defer like |
1992 | * sync direct compaction does. | 2001 | * sync direct compaction does. |
1993 | */ | 2002 | */ |
diff --git a/mm/failslab.c b/mm/failslab.c index 8087d976a809..1f2f248e3601 100644 --- a/mm/failslab.c +++ b/mm/failslab.c | |||
@@ -14,7 +14,7 @@ static struct { | |||
14 | .cache_filter = false, | 14 | .cache_filter = false, |
15 | }; | 15 | }; |
16 | 16 | ||
17 | bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) | 17 | bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) |
18 | { | 18 | { |
19 | /* No fault-injection for bootstrap cache */ | 19 | /* No fault-injection for bootstrap cache */ |
20 | if (unlikely(s == kmem_cache)) | 20 | if (unlikely(s == kmem_cache)) |
@@ -531,7 +531,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | |||
531 | * reCOWed by userspace write). | 531 | * reCOWed by userspace write). |
532 | */ | 532 | */ |
533 | if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) | 533 | if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) |
534 | *flags |= FOLL_COW; | 534 | *flags |= FOLL_COW; |
535 | return 0; | 535 | return 0; |
536 | } | 536 | } |
537 | 537 | ||
@@ -1638,7 +1638,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
1638 | PMD_SHIFT, next, write, pages, nr)) | 1638 | PMD_SHIFT, next, write, pages, nr)) |
1639 | return 0; | 1639 | return 0; |
1640 | } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | 1640 | } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) |
1641 | return 0; | 1641 | return 0; |
1642 | } while (pmdp++, addr = next, addr != end); | 1642 | } while (pmdp++, addr = next, addr != end); |
1643 | 1643 | ||
1644 | return 1; | 1644 | return 1; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5a68730eebd6..f0ae8d1d4329 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2356,26 +2356,13 @@ static void __split_huge_page_tail(struct page *head, int tail, | |||
2356 | struct page *page_tail = head + tail; | 2356 | struct page *page_tail = head + tail; |
2357 | 2357 | ||
2358 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); | 2358 | VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); |
2359 | VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail); | ||
2360 | 2359 | ||
2361 | /* | 2360 | /* |
2362 | * tail_page->_refcount is zero and not changing from under us. But | 2361 | * Clone page flags before unfreezing refcount. |
2363 | * get_page_unless_zero() may be running from under us on the | 2362 | * |
2364 | * tail_page. If we used atomic_set() below instead of atomic_inc() or | 2363 | * After successful get_page_unless_zero() might follow flags change, |
2365 | * atomic_add(), we would then run atomic_set() concurrently with | 2364 | * for exmaple lock_page() which set PG_waiters. |
2366 | * get_page_unless_zero(), and atomic_set() is implemented in C not | ||
2367 | * using locked ops. spin_unlock on x86 sometime uses locked ops | ||
2368 | * because of PPro errata 66, 92, so unless somebody can guarantee | ||
2369 | * atomic_set() here would be safe on all archs (and not only on x86), | ||
2370 | * it's safer to use atomic_inc()/atomic_add(). | ||
2371 | */ | 2365 | */ |
2372 | if (PageAnon(head) && !PageSwapCache(head)) { | ||
2373 | page_ref_inc(page_tail); | ||
2374 | } else { | ||
2375 | /* Additional pin to radix tree */ | ||
2376 | page_ref_add(page_tail, 2); | ||
2377 | } | ||
2378 | |||
2379 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 2366 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
2380 | page_tail->flags |= (head->flags & | 2367 | page_tail->flags |= (head->flags & |
2381 | ((1L << PG_referenced) | | 2368 | ((1L << PG_referenced) | |
@@ -2388,14 +2375,21 @@ static void __split_huge_page_tail(struct page *head, int tail, | |||
2388 | (1L << PG_unevictable) | | 2375 | (1L << PG_unevictable) | |
2389 | (1L << PG_dirty))); | 2376 | (1L << PG_dirty))); |
2390 | 2377 | ||
2391 | /* | 2378 | /* Page flags must be visible before we make the page non-compound. */ |
2392 | * After clearing PageTail the gup refcount can be released. | ||
2393 | * Page flags also must be visible before we make the page non-compound. | ||
2394 | */ | ||
2395 | smp_wmb(); | 2379 | smp_wmb(); |
2396 | 2380 | ||
2381 | /* | ||
2382 | * Clear PageTail before unfreezing page refcount. | ||
2383 | * | ||
2384 | * After successful get_page_unless_zero() might follow put_page() | ||
2385 | * which needs correct compound_head(). | ||
2386 | */ | ||
2397 | clear_compound_head(page_tail); | 2387 | clear_compound_head(page_tail); |
2398 | 2388 | ||
2389 | /* Finally unfreeze refcount. Additional reference from page cache. */ | ||
2390 | page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || | ||
2391 | PageSwapCache(head))); | ||
2392 | |||
2399 | if (page_is_young(head)) | 2393 | if (page_is_young(head)) |
2400 | set_page_young(page_tail); | 2394 | set_page_young(page_tail); |
2401 | if (page_is_idle(head)) | 2395 | if (page_is_idle(head)) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 976bbc5646fe..218679138255 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -637,29 +637,22 @@ EXPORT_SYMBOL_GPL(linear_hugepage_index); | |||
637 | */ | 637 | */ |
638 | unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) | 638 | unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) |
639 | { | 639 | { |
640 | struct hstate *hstate; | 640 | if (vma->vm_ops && vma->vm_ops->pagesize) |
641 | 641 | return vma->vm_ops->pagesize(vma); | |
642 | if (!is_vm_hugetlb_page(vma)) | 642 | return PAGE_SIZE; |
643 | return PAGE_SIZE; | ||
644 | |||
645 | hstate = hstate_vma(vma); | ||
646 | |||
647 | return 1UL << huge_page_shift(hstate); | ||
648 | } | 643 | } |
649 | EXPORT_SYMBOL_GPL(vma_kernel_pagesize); | 644 | EXPORT_SYMBOL_GPL(vma_kernel_pagesize); |
650 | 645 | ||
651 | /* | 646 | /* |
652 | * Return the page size being used by the MMU to back a VMA. In the majority | 647 | * Return the page size being used by the MMU to back a VMA. In the majority |
653 | * of cases, the page size used by the kernel matches the MMU size. On | 648 | * of cases, the page size used by the kernel matches the MMU size. On |
654 | * architectures where it differs, an architecture-specific version of this | 649 | * architectures where it differs, an architecture-specific 'strong' |
655 | * function is required. | 650 | * version of this symbol is required. |
656 | */ | 651 | */ |
657 | #ifndef vma_mmu_pagesize | 652 | __weak unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) |
658 | unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) | ||
659 | { | 653 | { |
660 | return vma_kernel_pagesize(vma); | 654 | return vma_kernel_pagesize(vma); |
661 | } | 655 | } |
662 | #endif | ||
663 | 656 | ||
664 | /* | 657 | /* |
665 | * Flags for MAP_PRIVATE reservations. These are stored in the bottom | 658 | * Flags for MAP_PRIVATE reservations. These are stored in the bottom |
@@ -3153,6 +3146,13 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) | |||
3153 | return 0; | 3146 | return 0; |
3154 | } | 3147 | } |
3155 | 3148 | ||
3149 | static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) | ||
3150 | { | ||
3151 | struct hstate *hstate = hstate_vma(vma); | ||
3152 | |||
3153 | return 1UL << huge_page_shift(hstate); | ||
3154 | } | ||
3155 | |||
3156 | /* | 3156 | /* |
3157 | * We cannot handle pagefaults against hugetlb pages at all. They cause | 3157 | * We cannot handle pagefaults against hugetlb pages at all. They cause |
3158 | * handle_mm_fault() to try to instantiate regular-sized pages in the | 3158 | * handle_mm_fault() to try to instantiate regular-sized pages in the |
@@ -3170,6 +3170,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { | |||
3170 | .open = hugetlb_vm_op_open, | 3170 | .open = hugetlb_vm_op_open, |
3171 | .close = hugetlb_vm_op_close, | 3171 | .close = hugetlb_vm_op_close, |
3172 | .split = hugetlb_vm_op_split, | 3172 | .split = hugetlb_vm_op_split, |
3173 | .pagesize = hugetlb_vm_op_pagesize, | ||
3173 | }; | 3174 | }; |
3174 | 3175 | ||
3175 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, | 3176 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, |
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index e13d911251e7..bc0e68f7dc75 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c | |||
@@ -323,9 +323,9 @@ void kasan_free_pages(struct page *page, unsigned int order) | |||
323 | * Adaptive redzone policy taken from the userspace AddressSanitizer runtime. | 323 | * Adaptive redzone policy taken from the userspace AddressSanitizer runtime. |
324 | * For larger allocations larger redzones are used. | 324 | * For larger allocations larger redzones are used. |
325 | */ | 325 | */ |
326 | static size_t optimal_redzone(size_t object_size) | 326 | static unsigned int optimal_redzone(unsigned int object_size) |
327 | { | 327 | { |
328 | int rz = | 328 | return |
329 | object_size <= 64 - 16 ? 16 : | 329 | object_size <= 64 - 16 ? 16 : |
330 | object_size <= 128 - 32 ? 32 : | 330 | object_size <= 128 - 32 ? 32 : |
331 | object_size <= 512 - 64 ? 64 : | 331 | object_size <= 512 - 64 ? 64 : |
@@ -333,14 +333,13 @@ static size_t optimal_redzone(size_t object_size) | |||
333 | object_size <= (1 << 14) - 256 ? 256 : | 333 | object_size <= (1 << 14) - 256 ? 256 : |
334 | object_size <= (1 << 15) - 512 ? 512 : | 334 | object_size <= (1 << 15) - 512 ? 512 : |
335 | object_size <= (1 << 16) - 1024 ? 1024 : 2048; | 335 | object_size <= (1 << 16) - 1024 ? 1024 : 2048; |
336 | return rz; | ||
337 | } | 336 | } |
338 | 337 | ||
339 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, | 338 | void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, |
340 | slab_flags_t *flags) | 339 | slab_flags_t *flags) |
341 | { | 340 | { |
341 | unsigned int orig_size = *size; | ||
342 | int redzone_adjust; | 342 | int redzone_adjust; |
343 | int orig_size = *size; | ||
344 | 343 | ||
345 | /* Add alloc meta. */ | 344 | /* Add alloc meta. */ |
346 | cache->kasan_info.alloc_meta_offset = *size; | 345 | cache->kasan_info.alloc_meta_offset = *size; |
@@ -358,7 +357,8 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size, | |||
358 | if (redzone_adjust > 0) | 357 | if (redzone_adjust > 0) |
359 | *size += redzone_adjust; | 358 | *size += redzone_adjust; |
360 | 359 | ||
361 | *size = min(KMALLOC_MAX_SIZE, max(*size, cache->object_size + | 360 | *size = min_t(unsigned int, KMALLOC_MAX_SIZE, |
361 | max(*size, cache->object_size + | ||
362 | optimal_redzone(cache->object_size))); | 362 | optimal_redzone(cache->object_size))); |
363 | 363 | ||
364 | /* | 364 | /* |
@@ -382,7 +382,8 @@ void kasan_cache_shrink(struct kmem_cache *cache) | |||
382 | 382 | ||
383 | void kasan_cache_shutdown(struct kmem_cache *cache) | 383 | void kasan_cache_shutdown(struct kmem_cache *cache) |
384 | { | 384 | { |
385 | quarantine_remove_cache(cache); | 385 | if (!__kmem_cache_empty(cache)) |
386 | quarantine_remove_cache(cache); | ||
386 | } | 387 | } |
387 | 388 | ||
388 | size_t kasan_metadata_size(struct kmem_cache *cache) | 389 | size_t kasan_metadata_size(struct kmem_cache *cache) |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 46c2290a08f1..9a085d525bbc 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -1187,6 +1187,11 @@ EXPORT_SYMBOL(kmemleak_no_scan); | |||
1187 | /** | 1187 | /** |
1188 | * kmemleak_alloc_phys - similar to kmemleak_alloc but taking a physical | 1188 | * kmemleak_alloc_phys - similar to kmemleak_alloc but taking a physical |
1189 | * address argument | 1189 | * address argument |
1190 | * @phys: physical address of the object | ||
1191 | * @size: size of the object | ||
1192 | * @min_count: minimum number of references to this object. | ||
1193 | * See kmemleak_alloc() | ||
1194 | * @gfp: kmalloc() flags used for kmemleak internal memory allocations | ||
1190 | */ | 1195 | */ |
1191 | void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, | 1196 | void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, |
1192 | gfp_t gfp) | 1197 | gfp_t gfp) |
@@ -1199,6 +1204,9 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); | |||
1199 | /** | 1204 | /** |
1200 | * kmemleak_free_part_phys - similar to kmemleak_free_part but taking a | 1205 | * kmemleak_free_part_phys - similar to kmemleak_free_part but taking a |
1201 | * physical address argument | 1206 | * physical address argument |
1207 | * @phys: physical address if the beginning or inside an object. This | ||
1208 | * also represents the start of the range to be freed | ||
1209 | * @size: size to be unregistered | ||
1202 | */ | 1210 | */ |
1203 | void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) | 1211 | void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) |
1204 | { | 1212 | { |
@@ -1210,6 +1218,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); | |||
1210 | /** | 1218 | /** |
1211 | * kmemleak_not_leak_phys - similar to kmemleak_not_leak but taking a physical | 1219 | * kmemleak_not_leak_phys - similar to kmemleak_not_leak but taking a physical |
1212 | * address argument | 1220 | * address argument |
1221 | * @phys: physical address of the object | ||
1213 | */ | 1222 | */ |
1214 | void __ref kmemleak_not_leak_phys(phys_addr_t phys) | 1223 | void __ref kmemleak_not_leak_phys(phys_addr_t phys) |
1215 | { | 1224 | { |
@@ -1221,6 +1230,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); | |||
1221 | /** | 1230 | /** |
1222 | * kmemleak_ignore_phys - similar to kmemleak_ignore but taking a physical | 1231 | * kmemleak_ignore_phys - similar to kmemleak_ignore but taking a physical |
1223 | * address argument | 1232 | * address argument |
1233 | * @phys: physical address of the object | ||
1224 | */ | 1234 | */ |
1225 | void __ref kmemleak_ignore_phys(phys_addr_t phys) | 1235 | void __ref kmemleak_ignore_phys(phys_addr_t phys) |
1226 | { | 1236 | { |
@@ -1963,7 +1973,7 @@ static void kmemleak_disable(void) | |||
1963 | /* | 1973 | /* |
1964 | * Allow boot-time kmemleak disabling (enabled by default). | 1974 | * Allow boot-time kmemleak disabling (enabled by default). |
1965 | */ | 1975 | */ |
1966 | static int kmemleak_boot_config(char *str) | 1976 | static int __init kmemleak_boot_config(char *str) |
1967 | { | 1977 | { |
1968 | if (!str) | 1978 | if (!str) |
1969 | return -EINVAL; | 1979 | return -EINVAL; |
@@ -1318,10 +1318,10 @@ bool is_page_sharing_candidate(struct stable_node *stable_node) | |||
1318 | return __is_page_sharing_candidate(stable_node, 0); | 1318 | return __is_page_sharing_candidate(stable_node, 0); |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | struct page *stable_node_dup(struct stable_node **_stable_node_dup, | 1321 | static struct page *stable_node_dup(struct stable_node **_stable_node_dup, |
1322 | struct stable_node **_stable_node, | 1322 | struct stable_node **_stable_node, |
1323 | struct rb_root *root, | 1323 | struct rb_root *root, |
1324 | bool prune_stale_stable_nodes) | 1324 | bool prune_stale_stable_nodes) |
1325 | { | 1325 | { |
1326 | struct stable_node *dup, *found = NULL, *stable_node = *_stable_node; | 1326 | struct stable_node *dup, *found = NULL, *stable_node = *_stable_node; |
1327 | struct hlist_node *hlist_safe; | 1327 | struct hlist_node *hlist_safe; |
@@ -2082,8 +2082,22 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
2082 | tree_rmap_item = | 2082 | tree_rmap_item = |
2083 | unstable_tree_search_insert(rmap_item, page, &tree_page); | 2083 | unstable_tree_search_insert(rmap_item, page, &tree_page); |
2084 | if (tree_rmap_item) { | 2084 | if (tree_rmap_item) { |
2085 | bool split; | ||
2086 | |||
2085 | kpage = try_to_merge_two_pages(rmap_item, page, | 2087 | kpage = try_to_merge_two_pages(rmap_item, page, |
2086 | tree_rmap_item, tree_page); | 2088 | tree_rmap_item, tree_page); |
2089 | /* | ||
2090 | * If both pages we tried to merge belong to the same compound | ||
2091 | * page, then we actually ended up increasing the reference | ||
2092 | * count of the same compound page twice, and split_huge_page | ||
2093 | * failed. | ||
2094 | * Here we set a flag if that happened, and we use it later to | ||
2095 | * try split_huge_page again. Since we call put_page right | ||
2096 | * afterwards, the reference count will be correct and | ||
2097 | * split_huge_page should succeed. | ||
2098 | */ | ||
2099 | split = PageTransCompound(page) | ||
2100 | && compound_head(page) == compound_head(tree_page); | ||
2087 | put_page(tree_page); | 2101 | put_page(tree_page); |
2088 | if (kpage) { | 2102 | if (kpage) { |
2089 | /* | 2103 | /* |
@@ -2110,6 +2124,20 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
2110 | break_cow(tree_rmap_item); | 2124 | break_cow(tree_rmap_item); |
2111 | break_cow(rmap_item); | 2125 | break_cow(rmap_item); |
2112 | } | 2126 | } |
2127 | } else if (split) { | ||
2128 | /* | ||
2129 | * We are here if we tried to merge two pages and | ||
2130 | * failed because they both belonged to the same | ||
2131 | * compound page. We will split the page now, but no | ||
2132 | * merging will take place. | ||
2133 | * We do not want to add the cost of a full lock; if | ||
2134 | * the page is locked, it is better to skip it and | ||
2135 | * perhaps try again later. | ||
2136 | */ | ||
2137 | if (!trylock_page(page)) | ||
2138 | return; | ||
2139 | split_huge_page(page); | ||
2140 | unlock_page(page); | ||
2113 | } | 2141 | } |
2114 | } | 2142 | } |
2115 | } | 2143 | } |
diff --git a/mm/list_lru.c b/mm/list_lru.c index fd41e969ede5..fcfb6c89ed47 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c | |||
@@ -52,14 +52,15 @@ static inline bool list_lru_memcg_aware(struct list_lru *lru) | |||
52 | static inline struct list_lru_one * | 52 | static inline struct list_lru_one * |
53 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | 53 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) |
54 | { | 54 | { |
55 | struct list_lru_memcg *memcg_lrus; | ||
55 | /* | 56 | /* |
56 | * The lock protects the array of per cgroup lists from relocation | 57 | * Either lock or RCU protects the array of per cgroup lists |
57 | * (see memcg_update_list_lru_node). | 58 | * from relocation (see memcg_update_list_lru_node). |
58 | */ | 59 | */ |
59 | lockdep_assert_held(&nlru->lock); | 60 | memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, |
60 | if (nlru->memcg_lrus && idx >= 0) | 61 | lockdep_is_held(&nlru->lock)); |
61 | return nlru->memcg_lrus->lru[idx]; | 62 | if (memcg_lrus && idx >= 0) |
62 | 63 | return memcg_lrus->lru[idx]; | |
63 | return &nlru->lru; | 64 | return &nlru->lru; |
64 | } | 65 | } |
65 | 66 | ||
@@ -168,10 +169,10 @@ static unsigned long __list_lru_count_one(struct list_lru *lru, | |||
168 | struct list_lru_one *l; | 169 | struct list_lru_one *l; |
169 | unsigned long count; | 170 | unsigned long count; |
170 | 171 | ||
171 | spin_lock(&nlru->lock); | 172 | rcu_read_lock(); |
172 | l = list_lru_from_memcg_idx(nlru, memcg_idx); | 173 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
173 | count = l->nr_items; | 174 | count = l->nr_items; |
174 | spin_unlock(&nlru->lock); | 175 | rcu_read_unlock(); |
175 | 176 | ||
176 | return count; | 177 | return count; |
177 | } | 178 | } |
@@ -324,24 +325,41 @@ fail: | |||
324 | 325 | ||
325 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) | 326 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) |
326 | { | 327 | { |
328 | struct list_lru_memcg *memcg_lrus; | ||
327 | int size = memcg_nr_cache_ids; | 329 | int size = memcg_nr_cache_ids; |
328 | 330 | ||
329 | nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); | 331 | memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + |
330 | if (!nlru->memcg_lrus) | 332 | size * sizeof(void *), GFP_KERNEL); |
333 | if (!memcg_lrus) | ||
331 | return -ENOMEM; | 334 | return -ENOMEM; |
332 | 335 | ||
333 | if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { | 336 | if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { |
334 | kvfree(nlru->memcg_lrus); | 337 | kvfree(memcg_lrus); |
335 | return -ENOMEM; | 338 | return -ENOMEM; |
336 | } | 339 | } |
340 | RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); | ||
337 | 341 | ||
338 | return 0; | 342 | return 0; |
339 | } | 343 | } |
340 | 344 | ||
341 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) | 345 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) |
342 | { | 346 | { |
343 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); | 347 | struct list_lru_memcg *memcg_lrus; |
344 | kvfree(nlru->memcg_lrus); | 348 | /* |
349 | * This is called when shrinker has already been unregistered, | ||
350 | * and nobody can use it. So, there is no need to use kvfree_rcu(). | ||
351 | */ | ||
352 | memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); | ||
353 | __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); | ||
354 | kvfree(memcg_lrus); | ||
355 | } | ||
356 | |||
357 | static void kvfree_rcu(struct rcu_head *head) | ||
358 | { | ||
359 | struct list_lru_memcg *mlru; | ||
360 | |||
361 | mlru = container_of(head, struct list_lru_memcg, rcu); | ||
362 | kvfree(mlru); | ||
345 | } | 363 | } |
346 | 364 | ||
347 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, | 365 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, |
@@ -351,8 +369,9 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, | |||
351 | 369 | ||
352 | BUG_ON(old_size > new_size); | 370 | BUG_ON(old_size > new_size); |
353 | 371 | ||
354 | old = nlru->memcg_lrus; | 372 | old = rcu_dereference_protected(nlru->memcg_lrus, |
355 | new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL); | 373 | lockdep_is_held(&list_lrus_mutex)); |
374 | new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); | ||
356 | if (!new) | 375 | if (!new) |
357 | return -ENOMEM; | 376 | return -ENOMEM; |
358 | 377 | ||
@@ -361,29 +380,33 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, | |||
361 | return -ENOMEM; | 380 | return -ENOMEM; |
362 | } | 381 | } |
363 | 382 | ||
364 | memcpy(new, old, old_size * sizeof(void *)); | 383 | memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); |
365 | 384 | ||
366 | /* | 385 | /* |
367 | * The lock guarantees that we won't race with a reader | 386 | * The locking below allows readers that hold nlru->lock avoid taking |
368 | * (see list_lru_from_memcg_idx). | 387 | * rcu_read_lock (see list_lru_from_memcg_idx). |
369 | * | 388 | * |
370 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | 389 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, |
371 | * we have to use IRQ-safe primitives here to avoid deadlock. | 390 | * we have to use IRQ-safe primitives here to avoid deadlock. |
372 | */ | 391 | */ |
373 | spin_lock_irq(&nlru->lock); | 392 | spin_lock_irq(&nlru->lock); |
374 | nlru->memcg_lrus = new; | 393 | rcu_assign_pointer(nlru->memcg_lrus, new); |
375 | spin_unlock_irq(&nlru->lock); | 394 | spin_unlock_irq(&nlru->lock); |
376 | 395 | ||
377 | kvfree(old); | 396 | call_rcu(&old->rcu, kvfree_rcu); |
378 | return 0; | 397 | return 0; |
379 | } | 398 | } |
380 | 399 | ||
381 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, | 400 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, |
382 | int old_size, int new_size) | 401 | int old_size, int new_size) |
383 | { | 402 | { |
403 | struct list_lru_memcg *memcg_lrus; | ||
404 | |||
405 | memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, | ||
406 | lockdep_is_held(&list_lrus_mutex)); | ||
384 | /* do not bother shrinking the array back to the old size, because we | 407 | /* do not bother shrinking the array back to the old size, because we |
385 | * cannot handle allocation failures here */ | 408 | * cannot handle allocation failures here */ |
386 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); | 409 | __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); |
387 | } | 410 | } |
388 | 411 | ||
389 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | 412 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) |
diff --git a/mm/memblock.c b/mm/memblock.c index 48376bd33274..9b04568ad42a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/poison.h> | 17 | #include <linux/poison.h> |
18 | #include <linux/pfn.h> | 18 | #include <linux/pfn.h> |
19 | #include <linux/debugfs.h> | 19 | #include <linux/debugfs.h> |
20 | #include <linux/kmemleak.h> | ||
20 | #include <linux/seq_file.h> | 21 | #include <linux/seq_file.h> |
21 | #include <linux/memblock.h> | 22 | #include <linux/memblock.h> |
22 | 23 | ||
@@ -924,7 +925,7 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags, | |||
924 | r = &type_b->regions[idx_b]; | 925 | r = &type_b->regions[idx_b]; |
925 | r_start = idx_b ? r[-1].base + r[-1].size : 0; | 926 | r_start = idx_b ? r[-1].base + r[-1].size : 0; |
926 | r_end = idx_b < type_b->cnt ? | 927 | r_end = idx_b < type_b->cnt ? |
927 | r->base : ULLONG_MAX; | 928 | r->base : (phys_addr_t)ULLONG_MAX; |
928 | 929 | ||
929 | /* | 930 | /* |
930 | * if idx_b advanced past idx_a, | 931 | * if idx_b advanced past idx_a, |
@@ -1040,7 +1041,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags, | |||
1040 | r = &type_b->regions[idx_b]; | 1041 | r = &type_b->regions[idx_b]; |
1041 | r_start = idx_b ? r[-1].base + r[-1].size : 0; | 1042 | r_start = idx_b ? r[-1].base + r[-1].size : 0; |
1042 | r_end = idx_b < type_b->cnt ? | 1043 | r_end = idx_b < type_b->cnt ? |
1043 | r->base : ULLONG_MAX; | 1044 | r->base : (phys_addr_t)ULLONG_MAX; |
1044 | /* | 1045 | /* |
1045 | * if idx_b advanced past idx_a, | 1046 | * if idx_b advanced past idx_a, |
1046 | * break out to advance idx_a | 1047 | * break out to advance idx_a |
@@ -1345,7 +1346,7 @@ void * __init memblock_virt_alloc_try_nid_raw( | |||
1345 | min_addr, max_addr, nid); | 1346 | min_addr, max_addr, nid); |
1346 | #ifdef CONFIG_DEBUG_VM | 1347 | #ifdef CONFIG_DEBUG_VM |
1347 | if (ptr && size > 0) | 1348 | if (ptr && size > 0) |
1348 | memset(ptr, 0xff, size); | 1349 | memset(ptr, PAGE_POISON_PATTERN, size); |
1349 | #endif | 1350 | #endif |
1350 | return ptr; | 1351 | return ptr; |
1351 | } | 1352 | } |
@@ -1750,29 +1751,6 @@ static void __init_memblock memblock_dump(struct memblock_type *type) | |||
1750 | } | 1751 | } |
1751 | } | 1752 | } |
1752 | 1753 | ||
1753 | extern unsigned long __init_memblock | ||
1754 | memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) | ||
1755 | { | ||
1756 | struct memblock_region *rgn; | ||
1757 | unsigned long size = 0; | ||
1758 | int idx; | ||
1759 | |||
1760 | for_each_memblock_type(idx, (&memblock.reserved), rgn) { | ||
1761 | phys_addr_t start, end; | ||
1762 | |||
1763 | if (rgn->base + rgn->size < start_addr) | ||
1764 | continue; | ||
1765 | if (rgn->base > end_addr) | ||
1766 | continue; | ||
1767 | |||
1768 | start = rgn->base; | ||
1769 | end = start + rgn->size; | ||
1770 | size += end - start; | ||
1771 | } | ||
1772 | |||
1773 | return size; | ||
1774 | } | ||
1775 | |||
1776 | void __init_memblock __memblock_dump_all(void) | 1754 | void __init_memblock __memblock_dump_all(void) |
1777 | { | 1755 | { |
1778 | pr_info("MEMBLOCK configuration:\n"); | 1756 | pr_info("MEMBLOCK configuration:\n"); |
@@ -1818,18 +1796,7 @@ static int memblock_debug_show(struct seq_file *m, void *private) | |||
1818 | } | 1796 | } |
1819 | return 0; | 1797 | return 0; |
1820 | } | 1798 | } |
1821 | 1799 | DEFINE_SHOW_ATTRIBUTE(memblock_debug); | |
1822 | static int memblock_debug_open(struct inode *inode, struct file *file) | ||
1823 | { | ||
1824 | return single_open(file, memblock_debug_show, inode->i_private); | ||
1825 | } | ||
1826 | |||
1827 | static const struct file_operations memblock_debug_fops = { | ||
1828 | .open = memblock_debug_open, | ||
1829 | .read = seq_read, | ||
1830 | .llseek = seq_lseek, | ||
1831 | .release = single_release, | ||
1832 | }; | ||
1833 | 1800 | ||
1834 | static int __init memblock_init_debugfs(void) | 1801 | static int __init memblock_init_debugfs(void) |
1835 | { | 1802 | { |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8291b75f42c8..2d4bf647cf01 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -502,6 +502,7 @@ static const char * const action_page_types[] = { | |||
502 | [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned", | 502 | [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned", |
503 | [MF_MSG_HUGE] = "huge page", | 503 | [MF_MSG_HUGE] = "huge page", |
504 | [MF_MSG_FREE_HUGE] = "free huge page", | 504 | [MF_MSG_FREE_HUGE] = "free huge page", |
505 | [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page", | ||
505 | [MF_MSG_UNMAP_FAILED] = "unmapping failed page", | 506 | [MF_MSG_UNMAP_FAILED] = "unmapping failed page", |
506 | [MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page", | 507 | [MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page", |
507 | [MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page", | 508 | [MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page", |
@@ -1084,6 +1085,21 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) | |||
1084 | return 0; | 1085 | return 0; |
1085 | } | 1086 | } |
1086 | 1087 | ||
1088 | /* | ||
1089 | * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so | ||
1090 | * simply disable it. In order to make it work properly, we need | ||
1091 | * make sure that: | ||
1092 | * - conversion of a pud that maps an error hugetlb into hwpoison | ||
1093 | * entry properly works, and | ||
1094 | * - other mm code walking over page table is aware of pud-aligned | ||
1095 | * hwpoison entries. | ||
1096 | */ | ||
1097 | if (huge_page_size(page_hstate(head)) > PMD_SIZE) { | ||
1098 | action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED); | ||
1099 | res = -EBUSY; | ||
1100 | goto out; | ||
1101 | } | ||
1102 | |||
1087 | if (!hwpoison_user_mappings(p, pfn, flags, &head)) { | 1103 | if (!hwpoison_user_mappings(p, pfn, flags, &head)) { |
1088 | action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); | 1104 | action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); |
1089 | res = -EBUSY; | 1105 | res = -EBUSY; |
diff --git a/mm/memory.c b/mm/memory.c index aed37325d94e..01f5464e0fd2 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
2883 | int do_swap_page(struct vm_fault *vmf) | 2883 | int do_swap_page(struct vm_fault *vmf) |
2884 | { | 2884 | { |
2885 | struct vm_area_struct *vma = vmf->vma; | 2885 | struct vm_area_struct *vma = vmf->vma; |
2886 | struct page *page = NULL, *swapcache = NULL; | 2886 | struct page *page = NULL, *swapcache; |
2887 | struct mem_cgroup *memcg; | 2887 | struct mem_cgroup *memcg; |
2888 | struct vma_swap_readahead swap_ra; | ||
2889 | swp_entry_t entry; | 2888 | swp_entry_t entry; |
2890 | pte_t pte; | 2889 | pte_t pte; |
2891 | int locked; | 2890 | int locked; |
2892 | int exclusive = 0; | 2891 | int exclusive = 0; |
2893 | int ret = 0; | 2892 | int ret = 0; |
2894 | bool vma_readahead = swap_use_vma_readahead(); | ||
2895 | 2893 | ||
2896 | if (vma_readahead) { | 2894 | if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) |
2897 | page = swap_readahead_detect(vmf, &swap_ra); | ||
2898 | swapcache = page; | ||
2899 | } | ||
2900 | |||
2901 | if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) { | ||
2902 | if (page) | ||
2903 | put_page(page); | ||
2904 | goto out; | 2895 | goto out; |
2905 | } | ||
2906 | 2896 | ||
2907 | entry = pte_to_swp_entry(vmf->orig_pte); | 2897 | entry = pte_to_swp_entry(vmf->orig_pte); |
2908 | if (unlikely(non_swap_entry(entry))) { | 2898 | if (unlikely(non_swap_entry(entry))) { |
@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2928 | 2918 | ||
2929 | 2919 | ||
2930 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | 2920 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); |
2931 | if (!page) { | 2921 | page = lookup_swap_cache(entry, vma, vmf->address); |
2932 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, | 2922 | swapcache = page; |
2933 | vmf->address); | ||
2934 | swapcache = page; | ||
2935 | } | ||
2936 | 2923 | ||
2937 | if (!page) { | 2924 | if (!page) { |
2938 | struct swap_info_struct *si = swp_swap_info(entry); | 2925 | struct swap_info_struct *si = swp_swap_info(entry); |
@@ -2940,7 +2927,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2940 | if (si->flags & SWP_SYNCHRONOUS_IO && | 2927 | if (si->flags & SWP_SYNCHRONOUS_IO && |
2941 | __swap_count(si, entry) == 1) { | 2928 | __swap_count(si, entry) == 1) { |
2942 | /* skip swapcache */ | 2929 | /* skip swapcache */ |
2943 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); | 2930 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, |
2931 | vmf->address); | ||
2944 | if (page) { | 2932 | if (page) { |
2945 | __SetPageLocked(page); | 2933 | __SetPageLocked(page); |
2946 | __SetPageSwapBacked(page); | 2934 | __SetPageSwapBacked(page); |
@@ -2949,12 +2937,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2949 | swap_readpage(page, true); | 2937 | swap_readpage(page, true); |
2950 | } | 2938 | } |
2951 | } else { | 2939 | } else { |
2952 | if (vma_readahead) | 2940 | page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, |
2953 | page = do_swap_page_readahead(entry, | 2941 | vmf); |
2954 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); | ||
2955 | else | ||
2956 | page = swapin_readahead(entry, | ||
2957 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | ||
2958 | swapcache = page; | 2942 | swapcache = page; |
2959 | } | 2943 | } |
2960 | 2944 | ||
@@ -2982,7 +2966,6 @@ int do_swap_page(struct vm_fault *vmf) | |||
2982 | */ | 2966 | */ |
2983 | ret = VM_FAULT_HWPOISON; | 2967 | ret = VM_FAULT_HWPOISON; |
2984 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2968 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2985 | swapcache = page; | ||
2986 | goto out_release; | 2969 | goto out_release; |
2987 | } | 2970 | } |
2988 | 2971 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b2bd52ff7605..cc6dfa5832ca 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -250,7 +250,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, | |||
250 | struct vmem_altmap *altmap, bool want_memblock) | 250 | struct vmem_altmap *altmap, bool want_memblock) |
251 | { | 251 | { |
252 | int ret; | 252 | int ret; |
253 | int i; | ||
254 | 253 | ||
255 | if (pfn_valid(phys_start_pfn)) | 254 | if (pfn_valid(phys_start_pfn)) |
256 | return -EEXIST; | 255 | return -EEXIST; |
@@ -259,27 +258,10 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, | |||
259 | if (ret < 0) | 258 | if (ret < 0) |
260 | return ret; | 259 | return ret; |
261 | 260 | ||
262 | /* | ||
263 | * Make all the pages reserved so that nobody will stumble over half | ||
264 | * initialized state. | ||
265 | * FIXME: We also have to associate it with a node because page_to_nid | ||
266 | * relies on having page with the proper node. | ||
267 | */ | ||
268 | for (i = 0; i < PAGES_PER_SECTION; i++) { | ||
269 | unsigned long pfn = phys_start_pfn + i; | ||
270 | struct page *page; | ||
271 | if (!pfn_valid(pfn)) | ||
272 | continue; | ||
273 | |||
274 | page = pfn_to_page(pfn); | ||
275 | set_page_node(page, nid); | ||
276 | SetPageReserved(page); | ||
277 | } | ||
278 | |||
279 | if (!want_memblock) | 261 | if (!want_memblock) |
280 | return 0; | 262 | return 0; |
281 | 263 | ||
282 | return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); | 264 | return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); |
283 | } | 265 | } |
284 | 266 | ||
285 | /* | 267 | /* |
@@ -559,6 +541,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, | |||
559 | * @zone: zone from which pages need to be removed | 541 | * @zone: zone from which pages need to be removed |
560 | * @phys_start_pfn: starting pageframe (must be aligned to start of a section) | 542 | * @phys_start_pfn: starting pageframe (must be aligned to start of a section) |
561 | * @nr_pages: number of pages to remove (must be multiple of section size) | 543 | * @nr_pages: number of pages to remove (must be multiple of section size) |
544 | * @altmap: alternative device page map or %NULL if default memmap is used | ||
562 | * | 545 | * |
563 | * Generic helper function to remove section mappings and sysfs entries | 546 | * Generic helper function to remove section mappings and sysfs entries |
564 | * for the section of the memory we are removing. Caller needs to make | 547 | * for the section of the memory we are removing. Caller needs to make |
@@ -908,8 +891,15 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
908 | int nid; | 891 | int nid; |
909 | int ret; | 892 | int ret; |
910 | struct memory_notify arg; | 893 | struct memory_notify arg; |
894 | struct memory_block *mem; | ||
895 | |||
896 | /* | ||
897 | * We can't use pfn_to_nid() because nid might be stored in struct page | ||
898 | * which is not yet initialized. Instead, we find nid from memory block. | ||
899 | */ | ||
900 | mem = find_memory_block(__pfn_to_section(pfn)); | ||
901 | nid = mem->nid; | ||
911 | 902 | ||
912 | nid = pfn_to_nid(pfn); | ||
913 | /* associate pfn range with the zone */ | 903 | /* associate pfn range with the zone */ |
914 | zone = move_pfn_range(online_type, nid, pfn, nr_pages); | 904 | zone = move_pfn_range(online_type, nid, pfn, nr_pages); |
915 | 905 | ||
@@ -1055,6 +1045,7 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) | |||
1055 | 1045 | ||
1056 | /** | 1046 | /** |
1057 | * try_online_node - online a node if offlined | 1047 | * try_online_node - online a node if offlined |
1048 | * @nid: the node ID | ||
1058 | * | 1049 | * |
1059 | * called by cpu_up() to online a node without onlined memory. | 1050 | * called by cpu_up() to online a node without onlined memory. |
1060 | */ | 1051 | */ |
@@ -1083,15 +1074,16 @@ out: | |||
1083 | 1074 | ||
1084 | static int check_hotplug_memory_range(u64 start, u64 size) | 1075 | static int check_hotplug_memory_range(u64 start, u64 size) |
1085 | { | 1076 | { |
1086 | u64 start_pfn = PFN_DOWN(start); | 1077 | unsigned long block_sz = memory_block_size_bytes(); |
1078 | u64 block_nr_pages = block_sz >> PAGE_SHIFT; | ||
1087 | u64 nr_pages = size >> PAGE_SHIFT; | 1079 | u64 nr_pages = size >> PAGE_SHIFT; |
1080 | u64 start_pfn = PFN_DOWN(start); | ||
1088 | 1081 | ||
1089 | /* Memory range must be aligned with section */ | 1082 | /* memory range must be block size aligned */ |
1090 | if ((start_pfn & ~PAGE_SECTION_MASK) || | 1083 | if (!nr_pages || !IS_ALIGNED(start_pfn, block_nr_pages) || |
1091 | (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) { | 1084 | !IS_ALIGNED(nr_pages, block_nr_pages)) { |
1092 | pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n", | 1085 | pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx", |
1093 | (unsigned long long)start, | 1086 | block_sz, start, size); |
1094 | (unsigned long long)size); | ||
1095 | return -EINVAL; | 1087 | return -EINVAL; |
1096 | } | 1088 | } |
1097 | 1089 | ||
@@ -1814,6 +1806,7 @@ static int check_and_unmap_cpu_on_node(pg_data_t *pgdat) | |||
1814 | 1806 | ||
1815 | /** | 1807 | /** |
1816 | * try_offline_node | 1808 | * try_offline_node |
1809 | * @nid: the node ID | ||
1817 | * | 1810 | * |
1818 | * Offline a node if all memory sections and cpus of the node are removed. | 1811 | * Offline a node if all memory sections and cpus of the node are removed. |
1819 | * | 1812 | * |
@@ -1857,6 +1850,9 @@ EXPORT_SYMBOL(try_offline_node); | |||
1857 | 1850 | ||
1858 | /** | 1851 | /** |
1859 | * remove_memory | 1852 | * remove_memory |
1853 | * @nid: the node ID | ||
1854 | * @start: physical address of the region to remove | ||
1855 | * @size: size of the region to remove | ||
1860 | * | 1856 | * |
1861 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug | 1857 | * NOTE: The caller must call lock_device_hotplug() to serialize hotplug |
1862 | * and online/offline operations before this call, as required by | 1858 | * and online/offline operations before this call, as required by |
@@ -3191,13 +3191,15 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) | |||
3191 | if (rlimit(RLIMIT_DATA) == 0 && | 3191 | if (rlimit(RLIMIT_DATA) == 0 && |
3192 | mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT) | 3192 | mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT) |
3193 | return true; | 3193 | return true; |
3194 | if (!ignore_rlimit_data) { | 3194 | |
3195 | pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n", | 3195 | pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n", |
3196 | current->comm, current->pid, | 3196 | current->comm, current->pid, |
3197 | (mm->data_vm + npages) << PAGE_SHIFT, | 3197 | (mm->data_vm + npages) << PAGE_SHIFT, |
3198 | rlimit(RLIMIT_DATA)); | 3198 | rlimit(RLIMIT_DATA), |
3199 | ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data"); | ||
3200 | |||
3201 | if (!ignore_rlimit_data) | ||
3199 | return false; | 3202 | return false; |
3200 | } | ||
3201 | } | 3203 | } |
3202 | 3204 | ||
3203 | return true; | 3205 | return true; |
diff --git a/mm/nommu.c b/mm/nommu.c index 4f8720243ae7..13723736d38f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -457,18 +457,6 @@ void __weak vmalloc_sync_all(void) | |||
457 | { | 457 | { |
458 | } | 458 | } |
459 | 459 | ||
460 | /** | ||
461 | * alloc_vm_area - allocate a range of kernel address space | ||
462 | * @size: size of the area | ||
463 | * | ||
464 | * Returns: NULL on failure, vm_struct on success | ||
465 | * | ||
466 | * This function reserves a range of kernel address space, and | ||
467 | * allocates pagetables to map that range. No actual mappings | ||
468 | * are created. If the kernel address space is not shared | ||
469 | * between processes, it syncs the pagetable across all | ||
470 | * processes. | ||
471 | */ | ||
472 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) | 460 | struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) |
473 | { | 461 | { |
474 | BUG(); | 462 | BUG(); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f2e7dfb81eee..ff992fa8760a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -185,6 +185,8 @@ static bool is_dump_unreclaim_slabs(void) | |||
185 | * oom_badness - heuristic function to determine which candidate task to kill | 185 | * oom_badness - heuristic function to determine which candidate task to kill |
186 | * @p: task struct of which task we should calculate | 186 | * @p: task struct of which task we should calculate |
187 | * @totalpages: total present RAM allowed for page allocation | 187 | * @totalpages: total present RAM allowed for page allocation |
188 | * @memcg: task's memory controller, if constrained | ||
189 | * @nodemask: nodemask passed to page allocator for mempolicy ooms | ||
188 | * | 190 | * |
189 | * The heuristic for determining which task to kill is made to be as simple and | 191 | * The heuristic for determining which task to kill is made to be as simple and |
190 | * predictable as possible. The goal is to return the highest value for the | 192 | * predictable as possible. The goal is to return the highest value for the |
@@ -224,13 +226,6 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
224 | mm_pgtables_bytes(p->mm) / PAGE_SIZE; | 226 | mm_pgtables_bytes(p->mm) / PAGE_SIZE; |
225 | task_unlock(p); | 227 | task_unlock(p); |
226 | 228 | ||
227 | /* | ||
228 | * Root processes get 3% bonus, just like the __vm_enough_memory() | ||
229 | * implementation used by LSMs. | ||
230 | */ | ||
231 | if (has_capability_noaudit(p, CAP_SYS_ADMIN)) | ||
232 | points -= (points * 3) / 100; | ||
233 | |||
234 | /* Normalize to oom_score_adj units */ | 229 | /* Normalize to oom_score_adj units */ |
235 | adj *= totalpages / 1000; | 230 | adj *= totalpages / 1000; |
236 | points += adj; | 231 | points += adj; |
@@ -595,7 +590,8 @@ static void oom_reap_task(struct task_struct *tsk) | |||
595 | while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) | 590 | while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) |
596 | schedule_timeout_idle(HZ/10); | 591 | schedule_timeout_idle(HZ/10); |
597 | 592 | ||
598 | if (attempts <= MAX_OOM_REAP_RETRIES) | 593 | if (attempts <= MAX_OOM_REAP_RETRIES || |
594 | test_bit(MMF_OOM_SKIP, &mm->flags)) | ||
599 | goto done; | 595 | goto done; |
600 | 596 | ||
601 | 597 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4ea018263210..0b97b8ece4a9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -265,17 +265,19 @@ int min_free_kbytes = 1024; | |||
265 | int user_min_free_kbytes = -1; | 265 | int user_min_free_kbytes = -1; |
266 | int watermark_scale_factor = 10; | 266 | int watermark_scale_factor = 10; |
267 | 267 | ||
268 | static unsigned long __meminitdata nr_kernel_pages; | 268 | static unsigned long nr_kernel_pages __meminitdata; |
269 | static unsigned long __meminitdata nr_all_pages; | 269 | static unsigned long nr_all_pages __meminitdata; |
270 | static unsigned long __meminitdata dma_reserve; | 270 | static unsigned long dma_reserve __meminitdata; |
271 | 271 | ||
272 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 272 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
273 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | 273 | static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata; |
274 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | 274 | static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata; |
275 | static unsigned long __initdata required_kernelcore; | 275 | static unsigned long required_kernelcore __initdata; |
276 | static unsigned long __initdata required_movablecore; | 276 | static unsigned long required_kernelcore_percent __initdata; |
277 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; | 277 | static unsigned long required_movablecore __initdata; |
278 | static bool mirrored_kernelcore; | 278 | static unsigned long required_movablecore_percent __initdata; |
279 | static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; | ||
280 | static bool mirrored_kernelcore __meminitdata; | ||
279 | 281 | ||
280 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | 282 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
281 | int movable_zone; | 283 | int movable_zone; |
@@ -292,40 +294,6 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
292 | int page_group_by_mobility_disabled __read_mostly; | 294 | int page_group_by_mobility_disabled __read_mostly; |
293 | 295 | ||
294 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 296 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
295 | |||
296 | /* | ||
297 | * Determine how many pages need to be initialized during early boot | ||
298 | * (non-deferred initialization). | ||
299 | * The value of first_deferred_pfn will be set later, once non-deferred pages | ||
300 | * are initialized, but for now set it ULONG_MAX. | ||
301 | */ | ||
302 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
303 | { | ||
304 | phys_addr_t start_addr, end_addr; | ||
305 | unsigned long max_pgcnt; | ||
306 | unsigned long reserved; | ||
307 | |||
308 | /* | ||
309 | * Initialise at least 2G of a node but also take into account that | ||
310 | * two large system hashes that can take up 1GB for 0.25TB/node. | ||
311 | */ | ||
312 | max_pgcnt = max(2UL << (30 - PAGE_SHIFT), | ||
313 | (pgdat->node_spanned_pages >> 8)); | ||
314 | |||
315 | /* | ||
316 | * Compensate the all the memblock reservations (e.g. crash kernel) | ||
317 | * from the initial estimation to make sure we will initialize enough | ||
318 | * memory to boot. | ||
319 | */ | ||
320 | start_addr = PFN_PHYS(pgdat->node_start_pfn); | ||
321 | end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt); | ||
322 | reserved = memblock_reserved_memory_within(start_addr, end_addr); | ||
323 | max_pgcnt += PHYS_PFN(reserved); | ||
324 | |||
325 | pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages); | ||
326 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
327 | } | ||
328 | |||
329 | /* Returns true if the struct page for the pfn is uninitialised */ | 297 | /* Returns true if the struct page for the pfn is uninitialised */ |
330 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) | 298 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) |
331 | { | 299 | { |
@@ -361,10 +329,6 @@ static inline bool update_defer_init(pg_data_t *pgdat, | |||
361 | return true; | 329 | return true; |
362 | } | 330 | } |
363 | #else | 331 | #else |
364 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
365 | { | ||
366 | } | ||
367 | |||
368 | static inline bool early_page_uninitialised(unsigned long pfn) | 332 | static inline bool early_page_uninitialised(unsigned long pfn) |
369 | { | 333 | { |
370 | return false; | 334 | return false; |
@@ -1099,6 +1063,15 @@ static bool bulkfree_pcp_prepare(struct page *page) | |||
1099 | } | 1063 | } |
1100 | #endif /* CONFIG_DEBUG_VM */ | 1064 | #endif /* CONFIG_DEBUG_VM */ |
1101 | 1065 | ||
1066 | static inline void prefetch_buddy(struct page *page) | ||
1067 | { | ||
1068 | unsigned long pfn = page_to_pfn(page); | ||
1069 | unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0); | ||
1070 | struct page *buddy = page + (buddy_pfn - pfn); | ||
1071 | |||
1072 | prefetch(buddy); | ||
1073 | } | ||
1074 | |||
1102 | /* | 1075 | /* |
1103 | * Frees a number of pages from the PCP lists | 1076 | * Frees a number of pages from the PCP lists |
1104 | * Assumes all pages on list are in same zone, and of same order. | 1077 | * Assumes all pages on list are in same zone, and of same order. |
@@ -1115,13 +1088,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1115 | { | 1088 | { |
1116 | int migratetype = 0; | 1089 | int migratetype = 0; |
1117 | int batch_free = 0; | 1090 | int batch_free = 0; |
1091 | int prefetch_nr = 0; | ||
1118 | bool isolated_pageblocks; | 1092 | bool isolated_pageblocks; |
1119 | 1093 | struct page *page, *tmp; | |
1120 | spin_lock(&zone->lock); | 1094 | LIST_HEAD(head); |
1121 | isolated_pageblocks = has_isolate_pageblock(zone); | ||
1122 | 1095 | ||
1123 | while (count) { | 1096 | while (count) { |
1124 | struct page *page; | ||
1125 | struct list_head *list; | 1097 | struct list_head *list; |
1126 | 1098 | ||
1127 | /* | 1099 | /* |
@@ -1143,26 +1115,48 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1143 | batch_free = count; | 1115 | batch_free = count; |
1144 | 1116 | ||
1145 | do { | 1117 | do { |
1146 | int mt; /* migratetype of the to-be-freed page */ | ||
1147 | |||
1148 | page = list_last_entry(list, struct page, lru); | 1118 | page = list_last_entry(list, struct page, lru); |
1149 | /* must delete as __free_one_page list manipulates */ | 1119 | /* must delete to avoid corrupting pcp list */ |
1150 | list_del(&page->lru); | 1120 | list_del(&page->lru); |
1151 | 1121 | pcp->count--; | |
1152 | mt = get_pcppage_migratetype(page); | ||
1153 | /* MIGRATE_ISOLATE page should not go to pcplists */ | ||
1154 | VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | ||
1155 | /* Pageblock could have been isolated meanwhile */ | ||
1156 | if (unlikely(isolated_pageblocks)) | ||
1157 | mt = get_pageblock_migratetype(page); | ||
1158 | 1122 | ||
1159 | if (bulkfree_pcp_prepare(page)) | 1123 | if (bulkfree_pcp_prepare(page)) |
1160 | continue; | 1124 | continue; |
1161 | 1125 | ||
1162 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); | 1126 | list_add_tail(&page->lru, &head); |
1163 | trace_mm_page_pcpu_drain(page, 0, mt); | 1127 | |
1128 | /* | ||
1129 | * We are going to put the page back to the global | ||
1130 | * pool, prefetch its buddy to speed up later access | ||
1131 | * under zone->lock. It is believed the overhead of | ||
1132 | * an additional test and calculating buddy_pfn here | ||
1133 | * can be offset by reduced memory latency later. To | ||
1134 | * avoid excessive prefetching due to large count, only | ||
1135 | * prefetch buddy for the first pcp->batch nr of pages. | ||
1136 | */ | ||
1137 | if (prefetch_nr++ < pcp->batch) | ||
1138 | prefetch_buddy(page); | ||
1164 | } while (--count && --batch_free && !list_empty(list)); | 1139 | } while (--count && --batch_free && !list_empty(list)); |
1165 | } | 1140 | } |
1141 | |||
1142 | spin_lock(&zone->lock); | ||
1143 | isolated_pageblocks = has_isolate_pageblock(zone); | ||
1144 | |||
1145 | /* | ||
1146 | * Use safe version since after __free_one_page(), | ||
1147 | * page->lru.next will not point to original list. | ||
1148 | */ | ||
1149 | list_for_each_entry_safe(page, tmp, &head, lru) { | ||
1150 | int mt = get_pcppage_migratetype(page); | ||
1151 | /* MIGRATE_ISOLATE page should not go to pcplists */ | ||
1152 | VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | ||
1153 | /* Pageblock could have been isolated meanwhile */ | ||
1154 | if (unlikely(isolated_pageblocks)) | ||
1155 | mt = get_pageblock_migratetype(page); | ||
1156 | |||
1157 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); | ||
1158 | trace_mm_page_pcpu_drain(page, 0, mt); | ||
1159 | } | ||
1166 | spin_unlock(&zone->lock); | 1160 | spin_unlock(&zone->lock); |
1167 | } | 1161 | } |
1168 | 1162 | ||
@@ -1181,10 +1175,9 @@ static void free_one_page(struct zone *zone, | |||
1181 | } | 1175 | } |
1182 | 1176 | ||
1183 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1177 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
1184 | unsigned long zone, int nid, bool zero) | 1178 | unsigned long zone, int nid) |
1185 | { | 1179 | { |
1186 | if (zero) | 1180 | mm_zero_struct_page(page); |
1187 | mm_zero_struct_page(page); | ||
1188 | set_page_links(page, zone, nid, pfn); | 1181 | set_page_links(page, zone, nid, pfn); |
1189 | init_page_count(page); | 1182 | init_page_count(page); |
1190 | page_mapcount_reset(page); | 1183 | page_mapcount_reset(page); |
@@ -1198,12 +1191,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |||
1198 | #endif | 1191 | #endif |
1199 | } | 1192 | } |
1200 | 1193 | ||
1201 | static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, | ||
1202 | int nid, bool zero) | ||
1203 | { | ||
1204 | return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero); | ||
1205 | } | ||
1206 | |||
1207 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1194 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
1208 | static void __meminit init_reserved_page(unsigned long pfn) | 1195 | static void __meminit init_reserved_page(unsigned long pfn) |
1209 | { | 1196 | { |
@@ -1222,7 +1209,7 @@ static void __meminit init_reserved_page(unsigned long pfn) | |||
1222 | if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) | 1209 | if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) |
1223 | break; | 1210 | break; |
1224 | } | 1211 | } |
1225 | __init_single_pfn(pfn, zid, nid, true); | 1212 | __init_single_page(pfn_to_page(pfn), pfn, zid, nid); |
1226 | } | 1213 | } |
1227 | #else | 1214 | #else |
1228 | static inline void init_reserved_page(unsigned long pfn) | 1215 | static inline void init_reserved_page(unsigned long pfn) |
@@ -1506,7 +1493,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, | |||
1506 | } else if (!(pfn & nr_pgmask)) { | 1493 | } else if (!(pfn & nr_pgmask)) { |
1507 | deferred_free_range(pfn - nr_free, nr_free); | 1494 | deferred_free_range(pfn - nr_free, nr_free); |
1508 | nr_free = 1; | 1495 | nr_free = 1; |
1509 | cond_resched(); | 1496 | touch_nmi_watchdog(); |
1510 | } else { | 1497 | } else { |
1511 | nr_free++; | 1498 | nr_free++; |
1512 | } | 1499 | } |
@@ -1535,11 +1522,11 @@ static unsigned long __init deferred_init_pages(int nid, int zid, | |||
1535 | continue; | 1522 | continue; |
1536 | } else if (!page || !(pfn & nr_pgmask)) { | 1523 | } else if (!page || !(pfn & nr_pgmask)) { |
1537 | page = pfn_to_page(pfn); | 1524 | page = pfn_to_page(pfn); |
1538 | cond_resched(); | 1525 | touch_nmi_watchdog(); |
1539 | } else { | 1526 | } else { |
1540 | page++; | 1527 | page++; |
1541 | } | 1528 | } |
1542 | __init_single_page(page, pfn, zid, nid, true); | 1529 | __init_single_page(page, pfn, zid, nid); |
1543 | nr_pages++; | 1530 | nr_pages++; |
1544 | } | 1531 | } |
1545 | return (nr_pages); | 1532 | return (nr_pages); |
@@ -1552,23 +1539,25 @@ static int __init deferred_init_memmap(void *data) | |||
1552 | int nid = pgdat->node_id; | 1539 | int nid = pgdat->node_id; |
1553 | unsigned long start = jiffies; | 1540 | unsigned long start = jiffies; |
1554 | unsigned long nr_pages = 0; | 1541 | unsigned long nr_pages = 0; |
1555 | unsigned long spfn, epfn; | 1542 | unsigned long spfn, epfn, first_init_pfn, flags; |
1556 | phys_addr_t spa, epa; | 1543 | phys_addr_t spa, epa; |
1557 | int zid; | 1544 | int zid; |
1558 | struct zone *zone; | 1545 | struct zone *zone; |
1559 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; | ||
1560 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 1546 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
1561 | u64 i; | 1547 | u64 i; |
1562 | 1548 | ||
1549 | /* Bind memory initialisation thread to a local node if possible */ | ||
1550 | if (!cpumask_empty(cpumask)) | ||
1551 | set_cpus_allowed_ptr(current, cpumask); | ||
1552 | |||
1553 | pgdat_resize_lock(pgdat, &flags); | ||
1554 | first_init_pfn = pgdat->first_deferred_pfn; | ||
1563 | if (first_init_pfn == ULONG_MAX) { | 1555 | if (first_init_pfn == ULONG_MAX) { |
1556 | pgdat_resize_unlock(pgdat, &flags); | ||
1564 | pgdat_init_report_one_done(); | 1557 | pgdat_init_report_one_done(); |
1565 | return 0; | 1558 | return 0; |
1566 | } | 1559 | } |
1567 | 1560 | ||
1568 | /* Bind memory initialisation thread to a local node if possible */ | ||
1569 | if (!cpumask_empty(cpumask)) | ||
1570 | set_cpus_allowed_ptr(current, cpumask); | ||
1571 | |||
1572 | /* Sanity check boundaries */ | 1561 | /* Sanity check boundaries */ |
1573 | BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); | 1562 | BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); |
1574 | BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); | 1563 | BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); |
@@ -1598,6 +1587,7 @@ static int __init deferred_init_memmap(void *data) | |||
1598 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | 1587 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); |
1599 | deferred_free_pages(nid, zid, spfn, epfn); | 1588 | deferred_free_pages(nid, zid, spfn, epfn); |
1600 | } | 1589 | } |
1590 | pgdat_resize_unlock(pgdat, &flags); | ||
1601 | 1591 | ||
1602 | /* Sanity check that the next zone really is unpopulated */ | 1592 | /* Sanity check that the next zone really is unpopulated */ |
1603 | WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); | 1593 | WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); |
@@ -1608,6 +1598,117 @@ static int __init deferred_init_memmap(void *data) | |||
1608 | pgdat_init_report_one_done(); | 1598 | pgdat_init_report_one_done(); |
1609 | return 0; | 1599 | return 0; |
1610 | } | 1600 | } |
1601 | |||
1602 | /* | ||
1603 | * During boot we initialize deferred pages on-demand, as needed, but once | ||
1604 | * page_alloc_init_late() has finished, the deferred pages are all initialized, | ||
1605 | * and we can permanently disable that path. | ||
1606 | */ | ||
1607 | static DEFINE_STATIC_KEY_TRUE(deferred_pages); | ||
1608 | |||
1609 | /* | ||
1610 | * If this zone has deferred pages, try to grow it by initializing enough | ||
1611 | * deferred pages to satisfy the allocation specified by order, rounded up to | ||
1612 | * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments | ||
1613 | * of SECTION_SIZE bytes by initializing struct pages in increments of | ||
1614 | * PAGES_PER_SECTION * sizeof(struct page) bytes. | ||
1615 | * | ||
1616 | * Return true when zone was grown, otherwise return false. We return true even | ||
1617 | * when we grow less than requested, to let the caller decide if there are | ||
1618 | * enough pages to satisfy the allocation. | ||
1619 | * | ||
1620 | * Note: We use noinline because this function is needed only during boot, and | ||
1621 | * it is called from a __ref function _deferred_grow_zone. This way we are | ||
1622 | * making sure that it is not inlined into permanent text section. | ||
1623 | */ | ||
1624 | static noinline bool __init | ||
1625 | deferred_grow_zone(struct zone *zone, unsigned int order) | ||
1626 | { | ||
1627 | int zid = zone_idx(zone); | ||
1628 | int nid = zone_to_nid(zone); | ||
1629 | pg_data_t *pgdat = NODE_DATA(nid); | ||
1630 | unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); | ||
1631 | unsigned long nr_pages = 0; | ||
1632 | unsigned long first_init_pfn, spfn, epfn, t, flags; | ||
1633 | unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; | ||
1634 | phys_addr_t spa, epa; | ||
1635 | u64 i; | ||
1636 | |||
1637 | /* Only the last zone may have deferred pages */ | ||
1638 | if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) | ||
1639 | return false; | ||
1640 | |||
1641 | pgdat_resize_lock(pgdat, &flags); | ||
1642 | |||
1643 | /* | ||
1644 | * If deferred pages have been initialized while we were waiting for | ||
1645 | * the lock, return true, as the zone was grown. The caller will retry | ||
1646 | * this zone. We won't return to this function since the caller also | ||
1647 | * has this static branch. | ||
1648 | */ | ||
1649 | if (!static_branch_unlikely(&deferred_pages)) { | ||
1650 | pgdat_resize_unlock(pgdat, &flags); | ||
1651 | return true; | ||
1652 | } | ||
1653 | |||
1654 | /* | ||
1655 | * If someone grew this zone while we were waiting for spinlock, return | ||
1656 | * true, as there might be enough pages already. | ||
1657 | */ | ||
1658 | if (first_deferred_pfn != pgdat->first_deferred_pfn) { | ||
1659 | pgdat_resize_unlock(pgdat, &flags); | ||
1660 | return true; | ||
1661 | } | ||
1662 | |||
1663 | first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn); | ||
1664 | |||
1665 | if (first_init_pfn >= pgdat_end_pfn(pgdat)) { | ||
1666 | pgdat_resize_unlock(pgdat, &flags); | ||
1667 | return false; | ||
1668 | } | ||
1669 | |||
1670 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1671 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1672 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1673 | |||
1674 | while (spfn < epfn && nr_pages < nr_pages_needed) { | ||
1675 | t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); | ||
1676 | first_deferred_pfn = min(t, epfn); | ||
1677 | nr_pages += deferred_init_pages(nid, zid, spfn, | ||
1678 | first_deferred_pfn); | ||
1679 | spfn = first_deferred_pfn; | ||
1680 | } | ||
1681 | |||
1682 | if (nr_pages >= nr_pages_needed) | ||
1683 | break; | ||
1684 | } | ||
1685 | |||
1686 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1687 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1688 | epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa)); | ||
1689 | deferred_free_pages(nid, zid, spfn, epfn); | ||
1690 | |||
1691 | if (first_deferred_pfn == epfn) | ||
1692 | break; | ||
1693 | } | ||
1694 | pgdat->first_deferred_pfn = first_deferred_pfn; | ||
1695 | pgdat_resize_unlock(pgdat, &flags); | ||
1696 | |||
1697 | return nr_pages > 0; | ||
1698 | } | ||
1699 | |||
1700 | /* | ||
1701 | * deferred_grow_zone() is __init, but it is called from | ||
1702 | * get_page_from_freelist() during early boot until deferred_pages permanently | ||
1703 | * disables this call. This is why we have refdata wrapper to avoid warning, | ||
1704 | * and to ensure that the function body gets unloaded. | ||
1705 | */ | ||
1706 | static bool __ref | ||
1707 | _deferred_grow_zone(struct zone *zone, unsigned int order) | ||
1708 | { | ||
1709 | return deferred_grow_zone(zone, order); | ||
1710 | } | ||
1711 | |||
1611 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | 1712 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
1612 | 1713 | ||
1613 | void __init page_alloc_init_late(void) | 1714 | void __init page_alloc_init_late(void) |
@@ -1626,6 +1727,12 @@ void __init page_alloc_init_late(void) | |||
1626 | /* Block until all are initialised */ | 1727 | /* Block until all are initialised */ |
1627 | wait_for_completion(&pgdat_init_all_done_comp); | 1728 | wait_for_completion(&pgdat_init_all_done_comp); |
1628 | 1729 | ||
1730 | /* | ||
1731 | * We initialized the rest of the deferred pages. Permanently disable | ||
1732 | * on-demand struct page initialization. | ||
1733 | */ | ||
1734 | static_branch_disable(&deferred_pages); | ||
1735 | |||
1629 | /* Reinit limits that are based on free pages after the kernel is up */ | 1736 | /* Reinit limits that are based on free pages after the kernel is up */ |
1630 | files_maxfiles_init(); | 1737 | files_maxfiles_init(); |
1631 | #endif | 1738 | #endif |
@@ -2418,10 +2525,8 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
2418 | local_irq_save(flags); | 2525 | local_irq_save(flags); |
2419 | batch = READ_ONCE(pcp->batch); | 2526 | batch = READ_ONCE(pcp->batch); |
2420 | to_drain = min(pcp->count, batch); | 2527 | to_drain = min(pcp->count, batch); |
2421 | if (to_drain > 0) { | 2528 | if (to_drain > 0) |
2422 | free_pcppages_bulk(zone, to_drain, pcp); | 2529 | free_pcppages_bulk(zone, to_drain, pcp); |
2423 | pcp->count -= to_drain; | ||
2424 | } | ||
2425 | local_irq_restore(flags); | 2530 | local_irq_restore(flags); |
2426 | } | 2531 | } |
2427 | #endif | 2532 | #endif |
@@ -2443,10 +2548,8 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) | |||
2443 | pset = per_cpu_ptr(zone->pageset, cpu); | 2548 | pset = per_cpu_ptr(zone->pageset, cpu); |
2444 | 2549 | ||
2445 | pcp = &pset->pcp; | 2550 | pcp = &pset->pcp; |
2446 | if (pcp->count) { | 2551 | if (pcp->count) |
2447 | free_pcppages_bulk(zone, pcp->count, pcp); | 2552 | free_pcppages_bulk(zone, pcp->count, pcp); |
2448 | pcp->count = 0; | ||
2449 | } | ||
2450 | local_irq_restore(flags); | 2553 | local_irq_restore(flags); |
2451 | } | 2554 | } |
2452 | 2555 | ||
@@ -2670,7 +2773,6 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) | |||
2670 | if (pcp->count >= pcp->high) { | 2773 | if (pcp->count >= pcp->high) { |
2671 | unsigned long batch = READ_ONCE(pcp->batch); | 2774 | unsigned long batch = READ_ONCE(pcp->batch); |
2672 | free_pcppages_bulk(zone, batch, pcp); | 2775 | free_pcppages_bulk(zone, batch, pcp); |
2673 | pcp->count -= batch; | ||
2674 | } | 2776 | } |
2675 | } | 2777 | } |
2676 | 2778 | ||
@@ -3205,6 +3307,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | |||
3205 | ac_classzone_idx(ac), alloc_flags)) { | 3307 | ac_classzone_idx(ac), alloc_flags)) { |
3206 | int ret; | 3308 | int ret; |
3207 | 3309 | ||
3310 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
3311 | /* | ||
3312 | * Watermark failed for this zone, but see if we can | ||
3313 | * grow this zone if it contains deferred pages. | ||
3314 | */ | ||
3315 | if (static_branch_unlikely(&deferred_pages)) { | ||
3316 | if (_deferred_grow_zone(zone, order)) | ||
3317 | goto try_this_zone; | ||
3318 | } | ||
3319 | #endif | ||
3208 | /* Checked here to keep the fast path fast */ | 3320 | /* Checked here to keep the fast path fast */ |
3209 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 3321 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); |
3210 | if (alloc_flags & ALLOC_NO_WATERMARKS) | 3322 | if (alloc_flags & ALLOC_NO_WATERMARKS) |
@@ -3246,6 +3358,14 @@ try_this_zone: | |||
3246 | reserve_highatomic_pageblock(page, zone, order); | 3358 | reserve_highatomic_pageblock(page, zone, order); |
3247 | 3359 | ||
3248 | return page; | 3360 | return page; |
3361 | } else { | ||
3362 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
3363 | /* Try again if zone has deferred pages */ | ||
3364 | if (static_branch_unlikely(&deferred_pages)) { | ||
3365 | if (_deferred_grow_zone(zone, order)) | ||
3366 | goto try_this_zone; | ||
3367 | } | ||
3368 | #endif | ||
3249 | } | 3369 | } |
3250 | } | 3370 | } |
3251 | 3371 | ||
@@ -3685,16 +3805,18 @@ retry: | |||
3685 | return page; | 3805 | return page; |
3686 | } | 3806 | } |
3687 | 3807 | ||
3688 | static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) | 3808 | static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask, |
3809 | const struct alloc_context *ac) | ||
3689 | { | 3810 | { |
3690 | struct zoneref *z; | 3811 | struct zoneref *z; |
3691 | struct zone *zone; | 3812 | struct zone *zone; |
3692 | pg_data_t *last_pgdat = NULL; | 3813 | pg_data_t *last_pgdat = NULL; |
3814 | enum zone_type high_zoneidx = ac->high_zoneidx; | ||
3693 | 3815 | ||
3694 | for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, | 3816 | for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx, |
3695 | ac->high_zoneidx, ac->nodemask) { | 3817 | ac->nodemask) { |
3696 | if (last_pgdat != zone->zone_pgdat) | 3818 | if (last_pgdat != zone->zone_pgdat) |
3697 | wakeup_kswapd(zone, order, ac->high_zoneidx); | 3819 | wakeup_kswapd(zone, gfp_mask, order, high_zoneidx); |
3698 | last_pgdat = zone->zone_pgdat; | 3820 | last_pgdat = zone->zone_pgdat; |
3699 | } | 3821 | } |
3700 | } | 3822 | } |
@@ -3973,7 +4095,7 @@ retry_cpuset: | |||
3973 | goto nopage; | 4095 | goto nopage; |
3974 | 4096 | ||
3975 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | 4097 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) |
3976 | wake_all_kswapds(order, ac); | 4098 | wake_all_kswapds(order, gfp_mask, ac); |
3977 | 4099 | ||
3978 | /* | 4100 | /* |
3979 | * The adjusted alloc_flags might result in immediate success, so try | 4101 | * The adjusted alloc_flags might result in immediate success, so try |
@@ -4031,7 +4153,7 @@ retry_cpuset: | |||
4031 | retry: | 4153 | retry: |
4032 | /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ | 4154 | /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ |
4033 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | 4155 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) |
4034 | wake_all_kswapds(order, ac); | 4156 | wake_all_kswapds(order, gfp_mask, ac); |
4035 | 4157 | ||
4036 | reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); | 4158 | reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); |
4037 | if (reserve_flags) | 4159 | if (reserve_flags) |
@@ -5334,6 +5456,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
5334 | pg_data_t *pgdat = NODE_DATA(nid); | 5456 | pg_data_t *pgdat = NODE_DATA(nid); |
5335 | unsigned long pfn; | 5457 | unsigned long pfn; |
5336 | unsigned long nr_initialised = 0; | 5458 | unsigned long nr_initialised = 0; |
5459 | struct page *page; | ||
5337 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 5460 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
5338 | struct memblock_region *r = NULL, *tmp; | 5461 | struct memblock_region *r = NULL, *tmp; |
5339 | #endif | 5462 | #endif |
@@ -5386,6 +5509,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
5386 | #endif | 5509 | #endif |
5387 | 5510 | ||
5388 | not_early: | 5511 | not_early: |
5512 | page = pfn_to_page(pfn); | ||
5513 | __init_single_page(page, pfn, zone, nid); | ||
5514 | if (context == MEMMAP_HOTPLUG) | ||
5515 | SetPageReserved(page); | ||
5516 | |||
5389 | /* | 5517 | /* |
5390 | * Mark the block movable so that blocks are reserved for | 5518 | * Mark the block movable so that blocks are reserved for |
5391 | * movable at startup. This will force kernel allocations | 5519 | * movable at startup. This will force kernel allocations |
@@ -5402,15 +5530,8 @@ not_early: | |||
5402 | * because this is done early in sparse_add_one_section | 5530 | * because this is done early in sparse_add_one_section |
5403 | */ | 5531 | */ |
5404 | if (!(pfn & (pageblock_nr_pages - 1))) { | 5532 | if (!(pfn & (pageblock_nr_pages - 1))) { |
5405 | struct page *page = pfn_to_page(pfn); | ||
5406 | |||
5407 | __init_single_page(page, pfn, zone, nid, | ||
5408 | context != MEMMAP_HOTPLUG); | ||
5409 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 5533 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
5410 | cond_resched(); | 5534 | cond_resched(); |
5411 | } else { | ||
5412 | __init_single_pfn(pfn, zone, nid, | ||
5413 | context != MEMMAP_HOTPLUG); | ||
5414 | } | 5535 | } |
5415 | } | 5536 | } |
5416 | } | 5537 | } |
@@ -6241,7 +6362,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
6241 | 6362 | ||
6242 | alloc_node_mem_map(pgdat); | 6363 | alloc_node_mem_map(pgdat); |
6243 | 6364 | ||
6244 | reset_deferred_meminit(pgdat); | 6365 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
6366 | /* | ||
6367 | * We start only with one section of pages, more pages are added as | ||
6368 | * needed until the rest of deferred pages are initialized. | ||
6369 | */ | ||
6370 | pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, | ||
6371 | pgdat->node_spanned_pages); | ||
6372 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
6373 | #endif | ||
6245 | free_area_init_core(pgdat); | 6374 | free_area_init_core(pgdat); |
6246 | } | 6375 | } |
6247 | 6376 | ||
@@ -6471,7 +6600,18 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
6471 | } | 6600 | } |
6472 | 6601 | ||
6473 | /* | 6602 | /* |
6474 | * If movablecore=nn[KMG] was specified, calculate what size of | 6603 | * If kernelcore=nn% or movablecore=nn% was specified, calculate the |
6604 | * amount of necessary memory. | ||
6605 | */ | ||
6606 | if (required_kernelcore_percent) | ||
6607 | required_kernelcore = (totalpages * 100 * required_kernelcore_percent) / | ||
6608 | 10000UL; | ||
6609 | if (required_movablecore_percent) | ||
6610 | required_movablecore = (totalpages * 100 * required_movablecore_percent) / | ||
6611 | 10000UL; | ||
6612 | |||
6613 | /* | ||
6614 | * If movablecore= was specified, calculate what size of | ||
6475 | * kernelcore that corresponds so that memory usable for | 6615 | * kernelcore that corresponds so that memory usable for |
6476 | * any allocation type is evenly spread. If both kernelcore | 6616 | * any allocation type is evenly spread. If both kernelcore |
6477 | * and movablecore are specified, then the value of kernelcore | 6617 | * and movablecore are specified, then the value of kernelcore |
@@ -6711,18 +6851,30 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
6711 | zero_resv_unavail(); | 6851 | zero_resv_unavail(); |
6712 | } | 6852 | } |
6713 | 6853 | ||
6714 | static int __init cmdline_parse_core(char *p, unsigned long *core) | 6854 | static int __init cmdline_parse_core(char *p, unsigned long *core, |
6855 | unsigned long *percent) | ||
6715 | { | 6856 | { |
6716 | unsigned long long coremem; | 6857 | unsigned long long coremem; |
6858 | char *endptr; | ||
6859 | |||
6717 | if (!p) | 6860 | if (!p) |
6718 | return -EINVAL; | 6861 | return -EINVAL; |
6719 | 6862 | ||
6720 | coremem = memparse(p, &p); | 6863 | /* Value may be a percentage of total memory, otherwise bytes */ |
6721 | *core = coremem >> PAGE_SHIFT; | 6864 | coremem = simple_strtoull(p, &endptr, 0); |
6865 | if (*endptr == '%') { | ||
6866 | /* Paranoid check for percent values greater than 100 */ | ||
6867 | WARN_ON(coremem > 100); | ||
6722 | 6868 | ||
6723 | /* Paranoid check that UL is enough for the coremem value */ | 6869 | *percent = coremem; |
6724 | WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX); | 6870 | } else { |
6871 | coremem = memparse(p, &p); | ||
6872 | /* Paranoid check that UL is enough for the coremem value */ | ||
6873 | WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX); | ||
6725 | 6874 | ||
6875 | *core = coremem >> PAGE_SHIFT; | ||
6876 | *percent = 0UL; | ||
6877 | } | ||
6726 | return 0; | 6878 | return 0; |
6727 | } | 6879 | } |
6728 | 6880 | ||
@@ -6738,7 +6890,8 @@ static int __init cmdline_parse_kernelcore(char *p) | |||
6738 | return 0; | 6890 | return 0; |
6739 | } | 6891 | } |
6740 | 6892 | ||
6741 | return cmdline_parse_core(p, &required_kernelcore); | 6893 | return cmdline_parse_core(p, &required_kernelcore, |
6894 | &required_kernelcore_percent); | ||
6742 | } | 6895 | } |
6743 | 6896 | ||
6744 | /* | 6897 | /* |
@@ -6747,7 +6900,8 @@ static int __init cmdline_parse_kernelcore(char *p) | |||
6747 | */ | 6900 | */ |
6748 | static int __init cmdline_parse_movablecore(char *p) | 6901 | static int __init cmdline_parse_movablecore(char *p) |
6749 | { | 6902 | { |
6750 | return cmdline_parse_core(p, &required_movablecore); | 6903 | return cmdline_parse_core(p, &required_movablecore, |
6904 | &required_movablecore_percent); | ||
6751 | } | 6905 | } |
6752 | 6906 | ||
6753 | early_param("kernelcore", cmdline_parse_kernelcore); | 6907 | early_param("kernelcore", cmdline_parse_kernelcore); |
@@ -7591,7 +7745,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
7591 | cc->nr_migratepages -= nr_reclaimed; | 7745 | cc->nr_migratepages -= nr_reclaimed; |
7592 | 7746 | ||
7593 | ret = migrate_pages(&cc->migratepages, alloc_migrate_target, | 7747 | ret = migrate_pages(&cc->migratepages, alloc_migrate_target, |
7594 | NULL, 0, cc->mode, MR_CMA); | 7748 | NULL, 0, cc->mode, MR_CONTIG_RANGE); |
7595 | } | 7749 | } |
7596 | if (ret < 0) { | 7750 | if (ret < 0) { |
7597 | putback_movable_pages(&cc->migratepages); | 7751 | putback_movable_pages(&cc->migratepages); |
@@ -7611,11 +7765,11 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
7611 | * @gfp_mask: GFP mask to use during compaction | 7765 | * @gfp_mask: GFP mask to use during compaction |
7612 | * | 7766 | * |
7613 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES | 7767 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES |
7614 | * aligned, however it's the caller's responsibility to guarantee that | 7768 | * aligned. The PFN range must belong to a single zone. |
7615 | * we are the only thread that changes migrate type of pageblocks the | ||
7616 | * pages fall in. | ||
7617 | * | 7769 | * |
7618 | * The PFN range must belong to a single zone. | 7770 | * The first thing this routine does is attempt to MIGRATE_ISOLATE all |
7771 | * pageblocks in the range. Once isolated, the pageblocks should not | ||
7772 | * be modified by others. | ||
7619 | * | 7773 | * |
7620 | * Returns zero on success or negative error code. On success all | 7774 | * Returns zero on success or negative error code. On success all |
7621 | * pages which PFN is in [start, end) are allocated for the caller and | 7775 | * pages which PFN is in [start, end) are allocated for the caller and |
diff --git a/mm/page_idle.c b/mm/page_idle.c index 0a49374e6931..e412a63b2b74 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c | |||
@@ -65,11 +65,15 @@ static bool page_idle_clear_pte_refs_one(struct page *page, | |||
65 | while (page_vma_mapped_walk(&pvmw)) { | 65 | while (page_vma_mapped_walk(&pvmw)) { |
66 | addr = pvmw.address; | 66 | addr = pvmw.address; |
67 | if (pvmw.pte) { | 67 | if (pvmw.pte) { |
68 | referenced = ptep_clear_young_notify(vma, addr, | 68 | /* |
69 | pvmw.pte); | 69 | * For PTE-mapped THP, one sub page is referenced, |
70 | * the whole THP is referenced. | ||
71 | */ | ||
72 | if (ptep_clear_young_notify(vma, addr, pvmw.pte)) | ||
73 | referenced = true; | ||
70 | } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { | 74 | } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
71 | referenced = pmdp_clear_young_notify(vma, addr, | 75 | if (pmdp_clear_young_notify(vma, addr, pvmw.pmd)) |
72 | pvmw.pmd); | 76 | referenced = true; |
73 | } else { | 77 | } else { |
74 | /* unexpected pmd-mapped page? */ | 78 | /* unexpected pmd-mapped page? */ |
75 | WARN_ON_ONCE(1); | 79 | WARN_ON_ONCE(1); |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 165ed8117bd1..61dee77bb211 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -28,6 +28,14 @@ static int set_migratetype_isolate(struct page *page, int migratetype, | |||
28 | 28 | ||
29 | spin_lock_irqsave(&zone->lock, flags); | 29 | spin_lock_irqsave(&zone->lock, flags); |
30 | 30 | ||
31 | /* | ||
32 | * We assume the caller intended to SET migrate type to isolate. | ||
33 | * If it is already set, then someone else must have raced and | ||
34 | * set it before us. Return -EBUSY | ||
35 | */ | ||
36 | if (is_migrate_isolate_page(page)) | ||
37 | goto out; | ||
38 | |||
31 | pfn = page_to_pfn(page); | 39 | pfn = page_to_pfn(page); |
32 | arg.start_pfn = pfn; | 40 | arg.start_pfn = pfn; |
33 | arg.nr_pages = pageblock_nr_pages; | 41 | arg.nr_pages = pageblock_nr_pages; |
@@ -166,7 +174,15 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) | |||
166 | * future will not be allocated again. | 174 | * future will not be allocated again. |
167 | * | 175 | * |
168 | * start_pfn/end_pfn must be aligned to pageblock_order. | 176 | * start_pfn/end_pfn must be aligned to pageblock_order. |
169 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. | 177 | * Return 0 on success and -EBUSY if any part of range cannot be isolated. |
178 | * | ||
179 | * There is no high level synchronization mechanism that prevents two threads | ||
180 | * from trying to isolate overlapping ranges. If this happens, one thread | ||
181 | * will notice pageblocks in the overlapping range already set to isolate. | ||
182 | * This happens in set_migratetype_isolate, and set_migratetype_isolate | ||
183 | * returns an error. We then clean up by restoring the migration type on | ||
184 | * pageblocks we may have modified and return -EBUSY to caller. This | ||
185 | * prevents two threads from simultaneously working on overlapping ranges. | ||
170 | */ | 186 | */ |
171 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, | 187 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
172 | unsigned migratetype, bool skip_hwpoisoned_pages) | 188 | unsigned migratetype, bool skip_hwpoisoned_pages) |
diff --git a/mm/page_owner.c b/mm/page_owner.c index 7172e0a80e13..75d21a2259b3 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c | |||
@@ -35,7 +35,7 @@ static depot_stack_handle_t early_handle; | |||
35 | 35 | ||
36 | static void init_early_allocated_pages(void); | 36 | static void init_early_allocated_pages(void); |
37 | 37 | ||
38 | static int early_page_owner_param(char *buf) | 38 | static int __init early_page_owner_param(char *buf) |
39 | { | 39 | { |
40 | if (!buf) | 40 | if (!buf) |
41 | return -EINVAL; | 41 | return -EINVAL; |
diff --git a/mm/page_poison.c b/mm/page_poison.c index e83fd44867de..aa2b3d34e8ea 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c | |||
@@ -9,7 +9,7 @@ | |||
9 | 9 | ||
10 | static bool want_page_poisoning __read_mostly; | 10 | static bool want_page_poisoning __read_mostly; |
11 | 11 | ||
12 | static int early_page_poison_param(char *buf) | 12 | static int __init early_page_poison_param(char *buf) |
13 | { | 13 | { |
14 | if (!buf) | 14 | if (!buf) |
15 | return -EINVAL; | 15 | return -EINVAL; |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8d2da5dec1e0..c3084ff2569d 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -258,6 +258,9 @@ static int __walk_page_range(unsigned long start, unsigned long end, | |||
258 | 258 | ||
259 | /** | 259 | /** |
260 | * walk_page_range - walk page table with caller specific callbacks | 260 | * walk_page_range - walk page table with caller specific callbacks |
261 | * @start: start address of the virtual address range | ||
262 | * @end: end address of the virtual address range | ||
263 | * @walk: mm_walk structure defining the callbacks and the target address space | ||
261 | * | 264 | * |
262 | * Recursively walk the page table tree of the process represented by @walk->mm | 265 | * Recursively walk the page table tree of the process represented by @walk->mm |
263 | * within the virtual address range [@start, @end). During walking, we can do | 266 | * within the virtual address range [@start, @end). During walking, we can do |
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 7a58460bfd27..063ff60ecd90 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c | |||
@@ -223,18 +223,7 @@ alloc_buffer: | |||
223 | 223 | ||
224 | return 0; | 224 | return 0; |
225 | } | 225 | } |
226 | 226 | DEFINE_SHOW_ATTRIBUTE(percpu_stats); | |
227 | static int percpu_stats_open(struct inode *inode, struct file *filp) | ||
228 | { | ||
229 | return single_open(filp, percpu_stats_show, NULL); | ||
230 | } | ||
231 | |||
232 | static const struct file_operations percpu_stats_fops = { | ||
233 | .open = percpu_stats_open, | ||
234 | .read = seq_read, | ||
235 | .llseek = seq_lseek, | ||
236 | .release = single_release, | ||
237 | }; | ||
238 | 227 | ||
239 | static int __init init_percpu_stats_debugfs(void) | 228 | static int __init init_percpu_stats_debugfs(void) |
240 | { | 229 | { |
@@ -1171,6 +1171,7 @@ void page_add_new_anon_rmap(struct page *page, | |||
1171 | /** | 1171 | /** |
1172 | * page_add_file_rmap - add pte mapping to a file page | 1172 | * page_add_file_rmap - add pte mapping to a file page |
1173 | * @page: the page to add the mapping to | 1173 | * @page: the page to add the mapping to |
1174 | * @compound: charge the page as compound or small page | ||
1174 | * | 1175 | * |
1175 | * The caller needs to hold the pte lock. | 1176 | * The caller needs to hold the pte lock. |
1176 | */ | 1177 | */ |
diff --git a/mm/shmem.c b/mm/shmem.c index b85919243399..4424fc0c33aa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1422,9 +1422,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, | |||
1422 | { | 1422 | { |
1423 | struct vm_area_struct pvma; | 1423 | struct vm_area_struct pvma; |
1424 | struct page *page; | 1424 | struct page *page; |
1425 | struct vm_fault vmf; | ||
1425 | 1426 | ||
1426 | shmem_pseudo_vma_init(&pvma, info, index); | 1427 | shmem_pseudo_vma_init(&pvma, info, index); |
1427 | page = swapin_readahead(swap, gfp, &pvma, 0); | 1428 | vmf.vma = &pvma; |
1429 | vmf.address = 0; | ||
1430 | page = swap_cluster_readahead(swap, gfp, &vmf); | ||
1428 | shmem_pseudo_vma_destroy(&pvma); | 1431 | shmem_pseudo_vma_destroy(&pvma); |
1429 | 1432 | ||
1430 | return page; | 1433 | return page; |
@@ -1869,7 +1869,7 @@ static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
1869 | return 0; | 1869 | return 0; |
1870 | } | 1870 | } |
1871 | 1871 | ||
1872 | slab_flags_t kmem_cache_flags(unsigned long object_size, | 1872 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
1873 | slab_flags_t flags, const char *name, | 1873 | slab_flags_t flags, const char *name, |
1874 | void (*ctor)(void *)) | 1874 | void (*ctor)(void *)) |
1875 | { | 1875 | { |
@@ -1877,7 +1877,7 @@ slab_flags_t kmem_cache_flags(unsigned long object_size, | |||
1877 | } | 1877 | } |
1878 | 1878 | ||
1879 | struct kmem_cache * | 1879 | struct kmem_cache * |
1880 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 1880 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
1881 | slab_flags_t flags, void (*ctor)(void *)) | 1881 | slab_flags_t flags, void (*ctor)(void *)) |
1882 | { | 1882 | { |
1883 | struct kmem_cache *cachep; | 1883 | struct kmem_cache *cachep; |
@@ -1994,7 +1994,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) | |||
1994 | size_t ralign = BYTES_PER_WORD; | 1994 | size_t ralign = BYTES_PER_WORD; |
1995 | gfp_t gfp; | 1995 | gfp_t gfp; |
1996 | int err; | 1996 | int err; |
1997 | size_t size = cachep->size; | 1997 | unsigned int size = cachep->size; |
1998 | 1998 | ||
1999 | #if DEBUG | 1999 | #if DEBUG |
2000 | #if FORCED_DEBUG | 2000 | #if FORCED_DEBUG |
@@ -2291,6 +2291,18 @@ out: | |||
2291 | return nr_freed; | 2291 | return nr_freed; |
2292 | } | 2292 | } |
2293 | 2293 | ||
2294 | bool __kmem_cache_empty(struct kmem_cache *s) | ||
2295 | { | ||
2296 | int node; | ||
2297 | struct kmem_cache_node *n; | ||
2298 | |||
2299 | for_each_kmem_cache_node(s, node, n) | ||
2300 | if (!list_empty(&n->slabs_full) || | ||
2301 | !list_empty(&n->slabs_partial)) | ||
2302 | return false; | ||
2303 | return true; | ||
2304 | } | ||
2305 | |||
2294 | int __kmem_cache_shrink(struct kmem_cache *cachep) | 2306 | int __kmem_cache_shrink(struct kmem_cache *cachep) |
2295 | { | 2307 | { |
2296 | int ret = 0; | 2308 | int ret = 0; |
@@ -22,8 +22,8 @@ struct kmem_cache { | |||
22 | unsigned int size; /* The aligned/padded/added on size */ | 22 | unsigned int size; /* The aligned/padded/added on size */ |
23 | unsigned int align; /* Alignment as calculated */ | 23 | unsigned int align; /* Alignment as calculated */ |
24 | slab_flags_t flags; /* Active flags on the slab */ | 24 | slab_flags_t flags; /* Active flags on the slab */ |
25 | size_t useroffset; /* Usercopy region offset */ | 25 | unsigned int useroffset;/* Usercopy region offset */ |
26 | size_t usersize; /* Usercopy region size */ | 26 | unsigned int usersize; /* Usercopy region size */ |
27 | const char *name; /* Slab name for sysfs */ | 27 | const char *name; /* Slab name for sysfs */ |
28 | int refcount; /* Use counter */ | 28 | int refcount; /* Use counter */ |
29 | void (*ctor)(void *); /* Called on object slot creation */ | 29 | void (*ctor)(void *); /* Called on object slot creation */ |
@@ -77,7 +77,7 @@ extern struct kmem_cache *kmem_cache; | |||
77 | /* A table of kmalloc cache names and sizes */ | 77 | /* A table of kmalloc cache names and sizes */ |
78 | extern const struct kmalloc_info_struct { | 78 | extern const struct kmalloc_info_struct { |
79 | const char *name; | 79 | const char *name; |
80 | unsigned long size; | 80 | unsigned int size; |
81 | } kmalloc_info[]; | 81 | } kmalloc_info[]; |
82 | 82 | ||
83 | #ifndef CONFIG_SLOB | 83 | #ifndef CONFIG_SLOB |
@@ -93,31 +93,31 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t); | |||
93 | /* Functions provided by the slab allocators */ | 93 | /* Functions provided by the slab allocators */ |
94 | int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); | 94 | int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); |
95 | 95 | ||
96 | extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, | 96 | struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size, |
97 | slab_flags_t flags, size_t useroffset, | 97 | slab_flags_t flags, unsigned int useroffset, |
98 | size_t usersize); | 98 | unsigned int usersize); |
99 | extern void create_boot_cache(struct kmem_cache *, const char *name, | 99 | extern void create_boot_cache(struct kmem_cache *, const char *name, |
100 | size_t size, slab_flags_t flags, size_t useroffset, | 100 | unsigned int size, slab_flags_t flags, |
101 | size_t usersize); | 101 | unsigned int useroffset, unsigned int usersize); |
102 | 102 | ||
103 | int slab_unmergeable(struct kmem_cache *s); | 103 | int slab_unmergeable(struct kmem_cache *s); |
104 | struct kmem_cache *find_mergeable(size_t size, size_t align, | 104 | struct kmem_cache *find_mergeable(unsigned size, unsigned align, |
105 | slab_flags_t flags, const char *name, void (*ctor)(void *)); | 105 | slab_flags_t flags, const char *name, void (*ctor)(void *)); |
106 | #ifndef CONFIG_SLOB | 106 | #ifndef CONFIG_SLOB |
107 | struct kmem_cache * | 107 | struct kmem_cache * |
108 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 108 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
109 | slab_flags_t flags, void (*ctor)(void *)); | 109 | slab_flags_t flags, void (*ctor)(void *)); |
110 | 110 | ||
111 | slab_flags_t kmem_cache_flags(unsigned long object_size, | 111 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
112 | slab_flags_t flags, const char *name, | 112 | slab_flags_t flags, const char *name, |
113 | void (*ctor)(void *)); | 113 | void (*ctor)(void *)); |
114 | #else | 114 | #else |
115 | static inline struct kmem_cache * | 115 | static inline struct kmem_cache * |
116 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 116 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
117 | slab_flags_t flags, void (*ctor)(void *)) | 117 | slab_flags_t flags, void (*ctor)(void *)) |
118 | { return NULL; } | 118 | { return NULL; } |
119 | 119 | ||
120 | static inline slab_flags_t kmem_cache_flags(unsigned long object_size, | 120 | static inline slab_flags_t kmem_cache_flags(unsigned int object_size, |
121 | slab_flags_t flags, const char *name, | 121 | slab_flags_t flags, const char *name, |
122 | void (*ctor)(void *)) | 122 | void (*ctor)(void *)) |
123 | { | 123 | { |
@@ -166,6 +166,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned long object_size, | |||
166 | SLAB_TEMPORARY | \ | 166 | SLAB_TEMPORARY | \ |
167 | SLAB_ACCOUNT) | 167 | SLAB_ACCOUNT) |
168 | 168 | ||
169 | bool __kmem_cache_empty(struct kmem_cache *); | ||
169 | int __kmem_cache_shutdown(struct kmem_cache *); | 170 | int __kmem_cache_shutdown(struct kmem_cache *); |
170 | void __kmem_cache_release(struct kmem_cache *); | 171 | void __kmem_cache_release(struct kmem_cache *); |
171 | int __kmem_cache_shrink(struct kmem_cache *); | 172 | int __kmem_cache_shrink(struct kmem_cache *); |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 10f127b2de7c..98dcdc352062 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/poison.h> | 10 | #include <linux/poison.h> |
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/memory.h> | 12 | #include <linux/memory.h> |
13 | #include <linux/cache.h> | ||
13 | #include <linux/compiler.h> | 14 | #include <linux/compiler.h> |
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
@@ -81,38 +82,19 @@ unsigned int kmem_cache_size(struct kmem_cache *s) | |||
81 | EXPORT_SYMBOL(kmem_cache_size); | 82 | EXPORT_SYMBOL(kmem_cache_size); |
82 | 83 | ||
83 | #ifdef CONFIG_DEBUG_VM | 84 | #ifdef CONFIG_DEBUG_VM |
84 | static int kmem_cache_sanity_check(const char *name, size_t size) | 85 | static int kmem_cache_sanity_check(const char *name, unsigned int size) |
85 | { | 86 | { |
86 | struct kmem_cache *s = NULL; | ||
87 | |||
88 | if (!name || in_interrupt() || size < sizeof(void *) || | 87 | if (!name || in_interrupt() || size < sizeof(void *) || |
89 | size > KMALLOC_MAX_SIZE) { | 88 | size > KMALLOC_MAX_SIZE) { |
90 | pr_err("kmem_cache_create(%s) integrity check failed\n", name); | 89 | pr_err("kmem_cache_create(%s) integrity check failed\n", name); |
91 | return -EINVAL; | 90 | return -EINVAL; |
92 | } | 91 | } |
93 | 92 | ||
94 | list_for_each_entry(s, &slab_caches, list) { | ||
95 | char tmp; | ||
96 | int res; | ||
97 | |||
98 | /* | ||
99 | * This happens when the module gets unloaded and doesn't | ||
100 | * destroy its slab cache and no-one else reuses the vmalloc | ||
101 | * area of the module. Print a warning. | ||
102 | */ | ||
103 | res = probe_kernel_address(s->name, tmp); | ||
104 | if (res) { | ||
105 | pr_err("Slab cache with size %d has lost its name\n", | ||
106 | s->object_size); | ||
107 | continue; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ | 93 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |
112 | return 0; | 94 | return 0; |
113 | } | 95 | } |
114 | #else | 96 | #else |
115 | static inline int kmem_cache_sanity_check(const char *name, size_t size) | 97 | static inline int kmem_cache_sanity_check(const char *name, unsigned int size) |
116 | { | 98 | { |
117 | return 0; | 99 | return 0; |
118 | } | 100 | } |
@@ -279,8 +261,8 @@ static inline void memcg_unlink_cache(struct kmem_cache *s) | |||
279 | * Figure out what the alignment of the objects will be given a set of | 261 | * Figure out what the alignment of the objects will be given a set of |
280 | * flags, a user specified alignment and the size of the objects. | 262 | * flags, a user specified alignment and the size of the objects. |
281 | */ | 263 | */ |
282 | static unsigned long calculate_alignment(unsigned long flags, | 264 | static unsigned int calculate_alignment(slab_flags_t flags, |
283 | unsigned long align, unsigned long size) | 265 | unsigned int align, unsigned int size) |
284 | { | 266 | { |
285 | /* | 267 | /* |
286 | * If the user wants hardware cache aligned objects then follow that | 268 | * If the user wants hardware cache aligned objects then follow that |
@@ -290,7 +272,7 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
290 | * alignment though. If that is greater then use it. | 272 | * alignment though. If that is greater then use it. |
291 | */ | 273 | */ |
292 | if (flags & SLAB_HWCACHE_ALIGN) { | 274 | if (flags & SLAB_HWCACHE_ALIGN) { |
293 | unsigned long ralign; | 275 | unsigned int ralign; |
294 | 276 | ||
295 | ralign = cache_line_size(); | 277 | ralign = cache_line_size(); |
296 | while (size <= ralign / 2) | 278 | while (size <= ralign / 2) |
@@ -330,7 +312,7 @@ int slab_unmergeable(struct kmem_cache *s) | |||
330 | return 0; | 312 | return 0; |
331 | } | 313 | } |
332 | 314 | ||
333 | struct kmem_cache *find_mergeable(size_t size, size_t align, | 315 | struct kmem_cache *find_mergeable(unsigned int size, unsigned int align, |
334 | slab_flags_t flags, const char *name, void (*ctor)(void *)) | 316 | slab_flags_t flags, const char *name, void (*ctor)(void *)) |
335 | { | 317 | { |
336 | struct kmem_cache *s; | 318 | struct kmem_cache *s; |
@@ -378,9 +360,9 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, | |||
378 | } | 360 | } |
379 | 361 | ||
380 | static struct kmem_cache *create_cache(const char *name, | 362 | static struct kmem_cache *create_cache(const char *name, |
381 | size_t object_size, size_t size, size_t align, | 363 | unsigned int object_size, unsigned int align, |
382 | slab_flags_t flags, size_t useroffset, | 364 | slab_flags_t flags, unsigned int useroffset, |
383 | size_t usersize, void (*ctor)(void *), | 365 | unsigned int usersize, void (*ctor)(void *), |
384 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) | 366 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
385 | { | 367 | { |
386 | struct kmem_cache *s; | 368 | struct kmem_cache *s; |
@@ -395,8 +377,7 @@ static struct kmem_cache *create_cache(const char *name, | |||
395 | goto out; | 377 | goto out; |
396 | 378 | ||
397 | s->name = name; | 379 | s->name = name; |
398 | s->object_size = object_size; | 380 | s->size = s->object_size = object_size; |
399 | s->size = size; | ||
400 | s->align = align; | 381 | s->align = align; |
401 | s->ctor = ctor; | 382 | s->ctor = ctor; |
402 | s->useroffset = useroffset; | 383 | s->useroffset = useroffset; |
@@ -451,8 +432,10 @@ out_free_cache: | |||
451 | * as davem. | 432 | * as davem. |
452 | */ | 433 | */ |
453 | struct kmem_cache * | 434 | struct kmem_cache * |
454 | kmem_cache_create_usercopy(const char *name, size_t size, size_t align, | 435 | kmem_cache_create_usercopy(const char *name, |
455 | slab_flags_t flags, size_t useroffset, size_t usersize, | 436 | unsigned int size, unsigned int align, |
437 | slab_flags_t flags, | ||
438 | unsigned int useroffset, unsigned int usersize, | ||
456 | void (*ctor)(void *)) | 439 | void (*ctor)(void *)) |
457 | { | 440 | { |
458 | struct kmem_cache *s = NULL; | 441 | struct kmem_cache *s = NULL; |
@@ -500,7 +483,7 @@ kmem_cache_create_usercopy(const char *name, size_t size, size_t align, | |||
500 | goto out_unlock; | 483 | goto out_unlock; |
501 | } | 484 | } |
502 | 485 | ||
503 | s = create_cache(cache_name, size, size, | 486 | s = create_cache(cache_name, size, |
504 | calculate_alignment(flags, align, size), | 487 | calculate_alignment(flags, align, size), |
505 | flags, useroffset, usersize, ctor, NULL, NULL); | 488 | flags, useroffset, usersize, ctor, NULL, NULL); |
506 | if (IS_ERR(s)) { | 489 | if (IS_ERR(s)) { |
@@ -531,7 +514,7 @@ out_unlock: | |||
531 | EXPORT_SYMBOL(kmem_cache_create_usercopy); | 514 | EXPORT_SYMBOL(kmem_cache_create_usercopy); |
532 | 515 | ||
533 | struct kmem_cache * | 516 | struct kmem_cache * |
534 | kmem_cache_create(const char *name, size_t size, size_t align, | 517 | kmem_cache_create(const char *name, unsigned int size, unsigned int align, |
535 | slab_flags_t flags, void (*ctor)(void *)) | 518 | slab_flags_t flags, void (*ctor)(void *)) |
536 | { | 519 | { |
537 | return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, | 520 | return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, |
@@ -647,7 +630,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, | |||
647 | goto out_unlock; | 630 | goto out_unlock; |
648 | 631 | ||
649 | s = create_cache(cache_name, root_cache->object_size, | 632 | s = create_cache(cache_name, root_cache->object_size, |
650 | root_cache->size, root_cache->align, | 633 | root_cache->align, |
651 | root_cache->flags & CACHE_CREATE_MASK, | 634 | root_cache->flags & CACHE_CREATE_MASK, |
652 | root_cache->useroffset, root_cache->usersize, | 635 | root_cache->useroffset, root_cache->usersize, |
653 | root_cache->ctor, memcg, root_cache); | 636 | root_cache->ctor, memcg, root_cache); |
@@ -916,8 +899,9 @@ bool slab_is_available(void) | |||
916 | 899 | ||
917 | #ifndef CONFIG_SLOB | 900 | #ifndef CONFIG_SLOB |
918 | /* Create a cache during boot when no slab services are available yet */ | 901 | /* Create a cache during boot when no slab services are available yet */ |
919 | void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, | 902 | void __init create_boot_cache(struct kmem_cache *s, const char *name, |
920 | slab_flags_t flags, size_t useroffset, size_t usersize) | 903 | unsigned int size, slab_flags_t flags, |
904 | unsigned int useroffset, unsigned int usersize) | ||
921 | { | 905 | { |
922 | int err; | 906 | int err; |
923 | 907 | ||
@@ -932,15 +916,15 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz | |||
932 | err = __kmem_cache_create(s, flags); | 916 | err = __kmem_cache_create(s, flags); |
933 | 917 | ||
934 | if (err) | 918 | if (err) |
935 | panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", | 919 | panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n", |
936 | name, size, err); | 920 | name, size, err); |
937 | 921 | ||
938 | s->refcount = -1; /* Exempt from merging for now */ | 922 | s->refcount = -1; /* Exempt from merging for now */ |
939 | } | 923 | } |
940 | 924 | ||
941 | struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, | 925 | struct kmem_cache *__init create_kmalloc_cache(const char *name, |
942 | slab_flags_t flags, size_t useroffset, | 926 | unsigned int size, slab_flags_t flags, |
943 | size_t usersize) | 927 | unsigned int useroffset, unsigned int usersize) |
944 | { | 928 | { |
945 | struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); | 929 | struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); |
946 | 930 | ||
@@ -954,11 +938,11 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, | |||
954 | return s; | 938 | return s; |
955 | } | 939 | } |
956 | 940 | ||
957 | struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; | 941 | struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init; |
958 | EXPORT_SYMBOL(kmalloc_caches); | 942 | EXPORT_SYMBOL(kmalloc_caches); |
959 | 943 | ||
960 | #ifdef CONFIG_ZONE_DMA | 944 | #ifdef CONFIG_ZONE_DMA |
961 | struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; | 945 | struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init; |
962 | EXPORT_SYMBOL(kmalloc_dma_caches); | 946 | EXPORT_SYMBOL(kmalloc_dma_caches); |
963 | #endif | 947 | #endif |
964 | 948 | ||
@@ -968,7 +952,7 @@ EXPORT_SYMBOL(kmalloc_dma_caches); | |||
968 | * of two cache sizes there. The size of larger slabs can be determined using | 952 | * of two cache sizes there. The size of larger slabs can be determined using |
969 | * fls. | 953 | * fls. |
970 | */ | 954 | */ |
971 | static s8 size_index[24] = { | 955 | static u8 size_index[24] __ro_after_init = { |
972 | 3, /* 8 */ | 956 | 3, /* 8 */ |
973 | 4, /* 16 */ | 957 | 4, /* 16 */ |
974 | 5, /* 24 */ | 958 | 5, /* 24 */ |
@@ -995,7 +979,7 @@ static s8 size_index[24] = { | |||
995 | 2 /* 192 */ | 979 | 2 /* 192 */ |
996 | }; | 980 | }; |
997 | 981 | ||
998 | static inline int size_index_elem(size_t bytes) | 982 | static inline unsigned int size_index_elem(unsigned int bytes) |
999 | { | 983 | { |
1000 | return (bytes - 1) / 8; | 984 | return (bytes - 1) / 8; |
1001 | } | 985 | } |
@@ -1006,7 +990,7 @@ static inline int size_index_elem(size_t bytes) | |||
1006 | */ | 990 | */ |
1007 | struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) | 991 | struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) |
1008 | { | 992 | { |
1009 | int index; | 993 | unsigned int index; |
1010 | 994 | ||
1011 | if (unlikely(size > KMALLOC_MAX_SIZE)) { | 995 | if (unlikely(size > KMALLOC_MAX_SIZE)) { |
1012 | WARN_ON_ONCE(!(flags & __GFP_NOWARN)); | 996 | WARN_ON_ONCE(!(flags & __GFP_NOWARN)); |
@@ -1064,13 +1048,13 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = { | |||
1064 | */ | 1048 | */ |
1065 | void __init setup_kmalloc_cache_index_table(void) | 1049 | void __init setup_kmalloc_cache_index_table(void) |
1066 | { | 1050 | { |
1067 | int i; | 1051 | unsigned int i; |
1068 | 1052 | ||
1069 | BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || | 1053 | BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || |
1070 | (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); | 1054 | (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); |
1071 | 1055 | ||
1072 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { | 1056 | for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { |
1073 | int elem = size_index_elem(i); | 1057 | unsigned int elem = size_index_elem(i); |
1074 | 1058 | ||
1075 | if (elem >= ARRAY_SIZE(size_index)) | 1059 | if (elem >= ARRAY_SIZE(size_index)) |
1076 | break; | 1060 | break; |
@@ -1137,9 +1121,9 @@ void __init create_kmalloc_caches(slab_flags_t flags) | |||
1137 | struct kmem_cache *s = kmalloc_caches[i]; | 1121 | struct kmem_cache *s = kmalloc_caches[i]; |
1138 | 1122 | ||
1139 | if (s) { | 1123 | if (s) { |
1140 | int size = kmalloc_size(i); | 1124 | unsigned int size = kmalloc_size(i); |
1141 | char *n = kasprintf(GFP_NOWAIT, | 1125 | char *n = kasprintf(GFP_NOWAIT, |
1142 | "dma-kmalloc-%d", size); | 1126 | "dma-kmalloc-%u", size); |
1143 | 1127 | ||
1144 | BUG_ON(!n); | 1128 | BUG_ON(!n); |
1145 | kmalloc_dma_caches[i] = create_kmalloc_cache(n, | 1129 | kmalloc_dma_caches[i] = create_kmalloc_cache(n, |
@@ -1182,10 +1166,10 @@ EXPORT_SYMBOL(kmalloc_order_trace); | |||
1182 | #ifdef CONFIG_SLAB_FREELIST_RANDOM | 1166 | #ifdef CONFIG_SLAB_FREELIST_RANDOM |
1183 | /* Randomize a generic freelist */ | 1167 | /* Randomize a generic freelist */ |
1184 | static void freelist_randomize(struct rnd_state *state, unsigned int *list, | 1168 | static void freelist_randomize(struct rnd_state *state, unsigned int *list, |
1185 | size_t count) | 1169 | unsigned int count) |
1186 | { | 1170 | { |
1187 | size_t i; | ||
1188 | unsigned int rand; | 1171 | unsigned int rand; |
1172 | unsigned int i; | ||
1189 | 1173 | ||
1190 | for (i = 0; i < count; i++) | 1174 | for (i = 0; i < count; i++) |
1191 | list[i] = i; | 1175 | list[i] = i; |
@@ -1532,3 +1516,11 @@ EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); | |||
1532 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); | 1516 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); |
1533 | EXPORT_TRACEPOINT_SYMBOL(kfree); | 1517 | EXPORT_TRACEPOINT_SYMBOL(kfree); |
1534 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); | 1518 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); |
1519 | |||
1520 | int should_failslab(struct kmem_cache *s, gfp_t gfpflags) | ||
1521 | { | ||
1522 | if (__should_failslab(s, gfpflags)) | ||
1523 | return -ENOMEM; | ||
1524 | return 0; | ||
1525 | } | ||
1526 | ALLOW_ERROR_INJECTION(should_failslab, ERRNO); | ||
@@ -311,18 +311,18 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
311 | __p += (__s)->size, __idx++) | 311 | __p += (__s)->size, __idx++) |
312 | 312 | ||
313 | /* Determine object index from a given position */ | 313 | /* Determine object index from a given position */ |
314 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 314 | static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) |
315 | { | 315 | { |
316 | return (p - addr) / s->size; | 316 | return (p - addr) / s->size; |
317 | } | 317 | } |
318 | 318 | ||
319 | static inline int order_objects(int order, unsigned long size, int reserved) | 319 | static inline unsigned int order_objects(unsigned int order, unsigned int size, unsigned int reserved) |
320 | { | 320 | { |
321 | return ((PAGE_SIZE << order) - reserved) / size; | 321 | return (((unsigned int)PAGE_SIZE << order) - reserved) / size; |
322 | } | 322 | } |
323 | 323 | ||
324 | static inline struct kmem_cache_order_objects oo_make(int order, | 324 | static inline struct kmem_cache_order_objects oo_make(unsigned int order, |
325 | unsigned long size, int reserved) | 325 | unsigned int size, unsigned int reserved) |
326 | { | 326 | { |
327 | struct kmem_cache_order_objects x = { | 327 | struct kmem_cache_order_objects x = { |
328 | (order << OO_SHIFT) + order_objects(order, size, reserved) | 328 | (order << OO_SHIFT) + order_objects(order, size, reserved) |
@@ -331,12 +331,12 @@ static inline struct kmem_cache_order_objects oo_make(int order, | |||
331 | return x; | 331 | return x; |
332 | } | 332 | } |
333 | 333 | ||
334 | static inline int oo_order(struct kmem_cache_order_objects x) | 334 | static inline unsigned int oo_order(struct kmem_cache_order_objects x) |
335 | { | 335 | { |
336 | return x.x >> OO_SHIFT; | 336 | return x.x >> OO_SHIFT; |
337 | } | 337 | } |
338 | 338 | ||
339 | static inline int oo_objects(struct kmem_cache_order_objects x) | 339 | static inline unsigned int oo_objects(struct kmem_cache_order_objects x) |
340 | { | 340 | { |
341 | return x.x & OO_MASK; | 341 | return x.x & OO_MASK; |
342 | } | 342 | } |
@@ -466,7 +466,7 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) | |||
466 | set_bit(slab_index(p, s, addr), map); | 466 | set_bit(slab_index(p, s, addr), map); |
467 | } | 467 | } |
468 | 468 | ||
469 | static inline int size_from_object(struct kmem_cache *s) | 469 | static inline unsigned int size_from_object(struct kmem_cache *s) |
470 | { | 470 | { |
471 | if (s->flags & SLAB_RED_ZONE) | 471 | if (s->flags & SLAB_RED_ZONE) |
472 | return s->size - s->red_left_pad; | 472 | return s->size - s->red_left_pad; |
@@ -598,13 +598,13 @@ static void init_tracking(struct kmem_cache *s, void *object) | |||
598 | set_track(s, object, TRACK_ALLOC, 0UL); | 598 | set_track(s, object, TRACK_ALLOC, 0UL); |
599 | } | 599 | } |
600 | 600 | ||
601 | static void print_track(const char *s, struct track *t) | 601 | static void print_track(const char *s, struct track *t, unsigned long pr_time) |
602 | { | 602 | { |
603 | if (!t->addr) | 603 | if (!t->addr) |
604 | return; | 604 | return; |
605 | 605 | ||
606 | pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n", | 606 | pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n", |
607 | s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); | 607 | s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid); |
608 | #ifdef CONFIG_STACKTRACE | 608 | #ifdef CONFIG_STACKTRACE |
609 | { | 609 | { |
610 | int i; | 610 | int i; |
@@ -619,11 +619,12 @@ static void print_track(const char *s, struct track *t) | |||
619 | 619 | ||
620 | static void print_tracking(struct kmem_cache *s, void *object) | 620 | static void print_tracking(struct kmem_cache *s, void *object) |
621 | { | 621 | { |
622 | unsigned long pr_time = jiffies; | ||
622 | if (!(s->flags & SLAB_STORE_USER)) | 623 | if (!(s->flags & SLAB_STORE_USER)) |
623 | return; | 624 | return; |
624 | 625 | ||
625 | print_track("Allocated", get_track(s, object, TRACK_ALLOC)); | 626 | print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time); |
626 | print_track("Freed", get_track(s, object, TRACK_FREE)); | 627 | print_track("Freed", get_track(s, object, TRACK_FREE), pr_time); |
627 | } | 628 | } |
628 | 629 | ||
629 | static void print_page_info(struct page *page) | 630 | static void print_page_info(struct page *page) |
@@ -680,7 +681,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
680 | print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); | 681 | print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); |
681 | 682 | ||
682 | print_section(KERN_ERR, "Object ", p, | 683 | print_section(KERN_ERR, "Object ", p, |
683 | min_t(unsigned long, s->object_size, PAGE_SIZE)); | 684 | min_t(unsigned int, s->object_size, PAGE_SIZE)); |
684 | if (s->flags & SLAB_RED_ZONE) | 685 | if (s->flags & SLAB_RED_ZONE) |
685 | print_section(KERN_ERR, "Redzone ", p + s->object_size, | 686 | print_section(KERN_ERR, "Redzone ", p + s->object_size, |
686 | s->inuse - s->object_size); | 687 | s->inuse - s->object_size); |
@@ -1292,7 +1293,7 @@ out: | |||
1292 | 1293 | ||
1293 | __setup("slub_debug", setup_slub_debug); | 1294 | __setup("slub_debug", setup_slub_debug); |
1294 | 1295 | ||
1295 | slab_flags_t kmem_cache_flags(unsigned long object_size, | 1296 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
1296 | slab_flags_t flags, const char *name, | 1297 | slab_flags_t flags, const char *name, |
1297 | void (*ctor)(void *)) | 1298 | void (*ctor)(void *)) |
1298 | { | 1299 | { |
@@ -1325,7 +1326,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, | |||
1325 | struct page *page) {} | 1326 | struct page *page) {} |
1326 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, | 1327 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, |
1327 | struct page *page) {} | 1328 | struct page *page) {} |
1328 | slab_flags_t kmem_cache_flags(unsigned long object_size, | 1329 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
1329 | slab_flags_t flags, const char *name, | 1330 | slab_flags_t flags, const char *name, |
1330 | void (*ctor)(void *)) | 1331 | void (*ctor)(void *)) |
1331 | { | 1332 | { |
@@ -1435,7 +1436,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, | |||
1435 | gfp_t flags, int node, struct kmem_cache_order_objects oo) | 1436 | gfp_t flags, int node, struct kmem_cache_order_objects oo) |
1436 | { | 1437 | { |
1437 | struct page *page; | 1438 | struct page *page; |
1438 | int order = oo_order(oo); | 1439 | unsigned int order = oo_order(oo); |
1439 | 1440 | ||
1440 | if (node == NUMA_NO_NODE) | 1441 | if (node == NUMA_NO_NODE) |
1441 | page = alloc_pages(flags, order); | 1442 | page = alloc_pages(flags, order); |
@@ -1454,8 +1455,8 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, | |||
1454 | /* Pre-initialize the random sequence cache */ | 1455 | /* Pre-initialize the random sequence cache */ |
1455 | static int init_cache_random_seq(struct kmem_cache *s) | 1456 | static int init_cache_random_seq(struct kmem_cache *s) |
1456 | { | 1457 | { |
1458 | unsigned int count = oo_objects(s->oo); | ||
1457 | int err; | 1459 | int err; |
1458 | unsigned long i, count = oo_objects(s->oo); | ||
1459 | 1460 | ||
1460 | /* Bailout if already initialised */ | 1461 | /* Bailout if already initialised */ |
1461 | if (s->random_seq) | 1462 | if (s->random_seq) |
@@ -1470,6 +1471,8 @@ static int init_cache_random_seq(struct kmem_cache *s) | |||
1470 | 1471 | ||
1471 | /* Transform to an offset on the set of pages */ | 1472 | /* Transform to an offset on the set of pages */ |
1472 | if (s->random_seq) { | 1473 | if (s->random_seq) { |
1474 | unsigned int i; | ||
1475 | |||
1473 | for (i = 0; i < count; i++) | 1476 | for (i = 0; i < count; i++) |
1474 | s->random_seq[i] *= s->size; | 1477 | s->random_seq[i] *= s->size; |
1475 | } | 1478 | } |
@@ -1811,7 +1814,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |||
1811 | { | 1814 | { |
1812 | struct page *page, *page2; | 1815 | struct page *page, *page2; |
1813 | void *object = NULL; | 1816 | void *object = NULL; |
1814 | int available = 0; | 1817 | unsigned int available = 0; |
1815 | int objects; | 1818 | int objects; |
1816 | 1819 | ||
1817 | /* | 1820 | /* |
@@ -2398,7 +2401,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2398 | 2401 | ||
2399 | pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", | 2402 | pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n", |
2400 | nid, gfpflags, &gfpflags); | 2403 | nid, gfpflags, &gfpflags); |
2401 | pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", | 2404 | pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n", |
2402 | s->name, s->object_size, s->size, oo_order(s->oo), | 2405 | s->name, s->object_size, s->size, oo_order(s->oo), |
2403 | oo_order(s->min)); | 2406 | oo_order(s->min)); |
2404 | 2407 | ||
@@ -3181,9 +3184,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); | |||
3181 | * and increases the number of allocations possible without having to | 3184 | * and increases the number of allocations possible without having to |
3182 | * take the list_lock. | 3185 | * take the list_lock. |
3183 | */ | 3186 | */ |
3184 | static int slub_min_order; | 3187 | static unsigned int slub_min_order; |
3185 | static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; | 3188 | static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; |
3186 | static int slub_min_objects; | 3189 | static unsigned int slub_min_objects; |
3187 | 3190 | ||
3188 | /* | 3191 | /* |
3189 | * Calculate the order of allocation given an slab object size. | 3192 | * Calculate the order of allocation given an slab object size. |
@@ -3210,20 +3213,21 @@ static int slub_min_objects; | |||
3210 | * requested a higher mininum order then we start with that one instead of | 3213 | * requested a higher mininum order then we start with that one instead of |
3211 | * the smallest order which will fit the object. | 3214 | * the smallest order which will fit the object. |
3212 | */ | 3215 | */ |
3213 | static inline int slab_order(int size, int min_objects, | 3216 | static inline unsigned int slab_order(unsigned int size, |
3214 | int max_order, int fract_leftover, int reserved) | 3217 | unsigned int min_objects, unsigned int max_order, |
3218 | unsigned int fract_leftover, unsigned int reserved) | ||
3215 | { | 3219 | { |
3216 | int order; | 3220 | unsigned int min_order = slub_min_order; |
3217 | int rem; | 3221 | unsigned int order; |
3218 | int min_order = slub_min_order; | ||
3219 | 3222 | ||
3220 | if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) | 3223 | if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) |
3221 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; | 3224 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; |
3222 | 3225 | ||
3223 | for (order = max(min_order, get_order(min_objects * size + reserved)); | 3226 | for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved)); |
3224 | order <= max_order; order++) { | 3227 | order <= max_order; order++) { |
3225 | 3228 | ||
3226 | unsigned long slab_size = PAGE_SIZE << order; | 3229 | unsigned int slab_size = (unsigned int)PAGE_SIZE << order; |
3230 | unsigned int rem; | ||
3227 | 3231 | ||
3228 | rem = (slab_size - reserved) % size; | 3232 | rem = (slab_size - reserved) % size; |
3229 | 3233 | ||
@@ -3234,12 +3238,11 @@ static inline int slab_order(int size, int min_objects, | |||
3234 | return order; | 3238 | return order; |
3235 | } | 3239 | } |
3236 | 3240 | ||
3237 | static inline int calculate_order(int size, int reserved) | 3241 | static inline int calculate_order(unsigned int size, unsigned int reserved) |
3238 | { | 3242 | { |
3239 | int order; | 3243 | unsigned int order; |
3240 | int min_objects; | 3244 | unsigned int min_objects; |
3241 | int fraction; | 3245 | unsigned int max_objects; |
3242 | int max_objects; | ||
3243 | 3246 | ||
3244 | /* | 3247 | /* |
3245 | * Attempt to find best configuration for a slab. This | 3248 | * Attempt to find best configuration for a slab. This |
@@ -3256,6 +3259,8 @@ static inline int calculate_order(int size, int reserved) | |||
3256 | min_objects = min(min_objects, max_objects); | 3259 | min_objects = min(min_objects, max_objects); |
3257 | 3260 | ||
3258 | while (min_objects > 1) { | 3261 | while (min_objects > 1) { |
3262 | unsigned int fraction; | ||
3263 | |||
3259 | fraction = 16; | 3264 | fraction = 16; |
3260 | while (fraction >= 4) { | 3265 | while (fraction >= 4) { |
3261 | order = slab_order(size, min_objects, | 3266 | order = slab_order(size, min_objects, |
@@ -3457,8 +3462,8 @@ static void set_cpu_partial(struct kmem_cache *s) | |||
3457 | static int calculate_sizes(struct kmem_cache *s, int forced_order) | 3462 | static int calculate_sizes(struct kmem_cache *s, int forced_order) |
3458 | { | 3463 | { |
3459 | slab_flags_t flags = s->flags; | 3464 | slab_flags_t flags = s->flags; |
3460 | size_t size = s->object_size; | 3465 | unsigned int size = s->object_size; |
3461 | int order; | 3466 | unsigned int order; |
3462 | 3467 | ||
3463 | /* | 3468 | /* |
3464 | * Round up object size to the next word boundary. We can only | 3469 | * Round up object size to the next word boundary. We can only |
@@ -3548,7 +3553,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3548 | else | 3553 | else |
3549 | order = calculate_order(size, s->reserved); | 3554 | order = calculate_order(size, s->reserved); |
3550 | 3555 | ||
3551 | if (order < 0) | 3556 | if ((int)order < 0) |
3552 | return 0; | 3557 | return 0; |
3553 | 3558 | ||
3554 | s->allocflags = 0; | 3559 | s->allocflags = 0; |
@@ -3632,8 +3637,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) | |||
3632 | free_kmem_cache_nodes(s); | 3637 | free_kmem_cache_nodes(s); |
3633 | error: | 3638 | error: |
3634 | if (flags & SLAB_PANIC) | 3639 | if (flags & SLAB_PANIC) |
3635 | panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n", | 3640 | panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n", |
3636 | s->name, (unsigned long)s->size, s->size, | 3641 | s->name, s->size, s->size, |
3637 | oo_order(s->oo), s->offset, (unsigned long)flags); | 3642 | oo_order(s->oo), s->offset, (unsigned long)flags); |
3638 | return -EINVAL; | 3643 | return -EINVAL; |
3639 | } | 3644 | } |
@@ -3691,6 +3696,17 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |||
3691 | discard_slab(s, page); | 3696 | discard_slab(s, page); |
3692 | } | 3697 | } |
3693 | 3698 | ||
3699 | bool __kmem_cache_empty(struct kmem_cache *s) | ||
3700 | { | ||
3701 | int node; | ||
3702 | struct kmem_cache_node *n; | ||
3703 | |||
3704 | for_each_kmem_cache_node(s, node, n) | ||
3705 | if (n->nr_partial || slabs_node(s, node)) | ||
3706 | return false; | ||
3707 | return true; | ||
3708 | } | ||
3709 | |||
3694 | /* | 3710 | /* |
3695 | * Release all resources used by a slab cache. | 3711 | * Release all resources used by a slab cache. |
3696 | */ | 3712 | */ |
@@ -3716,7 +3732,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s) | |||
3716 | 3732 | ||
3717 | static int __init setup_slub_min_order(char *str) | 3733 | static int __init setup_slub_min_order(char *str) |
3718 | { | 3734 | { |
3719 | get_option(&str, &slub_min_order); | 3735 | get_option(&str, (int *)&slub_min_order); |
3720 | 3736 | ||
3721 | return 1; | 3737 | return 1; |
3722 | } | 3738 | } |
@@ -3725,8 +3741,8 @@ __setup("slub_min_order=", setup_slub_min_order); | |||
3725 | 3741 | ||
3726 | static int __init setup_slub_max_order(char *str) | 3742 | static int __init setup_slub_max_order(char *str) |
3727 | { | 3743 | { |
3728 | get_option(&str, &slub_max_order); | 3744 | get_option(&str, (int *)&slub_max_order); |
3729 | slub_max_order = min(slub_max_order, MAX_ORDER - 1); | 3745 | slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1); |
3730 | 3746 | ||
3731 | return 1; | 3747 | return 1; |
3732 | } | 3748 | } |
@@ -3735,7 +3751,7 @@ __setup("slub_max_order=", setup_slub_max_order); | |||
3735 | 3751 | ||
3736 | static int __init setup_slub_min_objects(char *str) | 3752 | static int __init setup_slub_min_objects(char *str) |
3737 | { | 3753 | { |
3738 | get_option(&str, &slub_min_objects); | 3754 | get_option(&str, (int *)&slub_min_objects); |
3739 | 3755 | ||
3740 | return 1; | 3756 | return 1; |
3741 | } | 3757 | } |
@@ -3824,7 +3840,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, | |||
3824 | bool to_user) | 3840 | bool to_user) |
3825 | { | 3841 | { |
3826 | struct kmem_cache *s; | 3842 | struct kmem_cache *s; |
3827 | unsigned long offset; | 3843 | unsigned int offset; |
3828 | size_t object_size; | 3844 | size_t object_size; |
3829 | 3845 | ||
3830 | /* Find object and usable object size. */ | 3846 | /* Find object and usable object size. */ |
@@ -4230,7 +4246,7 @@ void __init kmem_cache_init(void) | |||
4230 | cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, | 4246 | cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, |
4231 | slub_cpu_dead); | 4247 | slub_cpu_dead); |
4232 | 4248 | ||
4233 | pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%u, Nodes=%d\n", | 4249 | pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n", |
4234 | cache_line_size(), | 4250 | cache_line_size(), |
4235 | slub_min_order, slub_max_order, slub_min_objects, | 4251 | slub_min_order, slub_max_order, slub_min_objects, |
4236 | nr_cpu_ids, nr_node_ids); | 4252 | nr_cpu_ids, nr_node_ids); |
@@ -4241,7 +4257,7 @@ void __init kmem_cache_init_late(void) | |||
4241 | } | 4257 | } |
4242 | 4258 | ||
4243 | struct kmem_cache * | 4259 | struct kmem_cache * |
4244 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 4260 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
4245 | slab_flags_t flags, void (*ctor)(void *)) | 4261 | slab_flags_t flags, void (*ctor)(void *)) |
4246 | { | 4262 | { |
4247 | struct kmem_cache *s, *c; | 4263 | struct kmem_cache *s, *c; |
@@ -4254,13 +4270,12 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
4254 | * Adjust the object sizes so that we clear | 4270 | * Adjust the object sizes so that we clear |
4255 | * the complete object on kzalloc. | 4271 | * the complete object on kzalloc. |
4256 | */ | 4272 | */ |
4257 | s->object_size = max(s->object_size, (int)size); | 4273 | s->object_size = max(s->object_size, size); |
4258 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 4274 | s->inuse = max(s->inuse, ALIGN(size, sizeof(void *))); |
4259 | 4275 | ||
4260 | for_each_memcg_cache(c, s) { | 4276 | for_each_memcg_cache(c, s) { |
4261 | c->object_size = s->object_size; | 4277 | c->object_size = s->object_size; |
4262 | c->inuse = max_t(int, c->inuse, | 4278 | c->inuse = max(c->inuse, ALIGN(size, sizeof(void *))); |
4263 | ALIGN(size, sizeof(void *))); | ||
4264 | } | 4279 | } |
4265 | 4280 | ||
4266 | if (sysfs_slab_alias(s, name)) { | 4281 | if (sysfs_slab_alias(s, name)) { |
@@ -4889,35 +4904,35 @@ struct slab_attribute { | |||
4889 | 4904 | ||
4890 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) | 4905 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) |
4891 | { | 4906 | { |
4892 | return sprintf(buf, "%d\n", s->size); | 4907 | return sprintf(buf, "%u\n", s->size); |
4893 | } | 4908 | } |
4894 | SLAB_ATTR_RO(slab_size); | 4909 | SLAB_ATTR_RO(slab_size); |
4895 | 4910 | ||
4896 | static ssize_t align_show(struct kmem_cache *s, char *buf) | 4911 | static ssize_t align_show(struct kmem_cache *s, char *buf) |
4897 | { | 4912 | { |
4898 | return sprintf(buf, "%d\n", s->align); | 4913 | return sprintf(buf, "%u\n", s->align); |
4899 | } | 4914 | } |
4900 | SLAB_ATTR_RO(align); | 4915 | SLAB_ATTR_RO(align); |
4901 | 4916 | ||
4902 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) | 4917 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) |
4903 | { | 4918 | { |
4904 | return sprintf(buf, "%d\n", s->object_size); | 4919 | return sprintf(buf, "%u\n", s->object_size); |
4905 | } | 4920 | } |
4906 | SLAB_ATTR_RO(object_size); | 4921 | SLAB_ATTR_RO(object_size); |
4907 | 4922 | ||
4908 | static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) | 4923 | static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) |
4909 | { | 4924 | { |
4910 | return sprintf(buf, "%d\n", oo_objects(s->oo)); | 4925 | return sprintf(buf, "%u\n", oo_objects(s->oo)); |
4911 | } | 4926 | } |
4912 | SLAB_ATTR_RO(objs_per_slab); | 4927 | SLAB_ATTR_RO(objs_per_slab); |
4913 | 4928 | ||
4914 | static ssize_t order_store(struct kmem_cache *s, | 4929 | static ssize_t order_store(struct kmem_cache *s, |
4915 | const char *buf, size_t length) | 4930 | const char *buf, size_t length) |
4916 | { | 4931 | { |
4917 | unsigned long order; | 4932 | unsigned int order; |
4918 | int err; | 4933 | int err; |
4919 | 4934 | ||
4920 | err = kstrtoul(buf, 10, &order); | 4935 | err = kstrtouint(buf, 10, &order); |
4921 | if (err) | 4936 | if (err) |
4922 | return err; | 4937 | return err; |
4923 | 4938 | ||
@@ -4930,7 +4945,7 @@ static ssize_t order_store(struct kmem_cache *s, | |||
4930 | 4945 | ||
4931 | static ssize_t order_show(struct kmem_cache *s, char *buf) | 4946 | static ssize_t order_show(struct kmem_cache *s, char *buf) |
4932 | { | 4947 | { |
4933 | return sprintf(buf, "%d\n", oo_order(s->oo)); | 4948 | return sprintf(buf, "%u\n", oo_order(s->oo)); |
4934 | } | 4949 | } |
4935 | SLAB_ATTR(order); | 4950 | SLAB_ATTR(order); |
4936 | 4951 | ||
@@ -4962,10 +4977,10 @@ static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) | |||
4962 | static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, | 4977 | static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, |
4963 | size_t length) | 4978 | size_t length) |
4964 | { | 4979 | { |
4965 | unsigned long objects; | 4980 | unsigned int objects; |
4966 | int err; | 4981 | int err; |
4967 | 4982 | ||
4968 | err = kstrtoul(buf, 10, &objects); | 4983 | err = kstrtouint(buf, 10, &objects); |
4969 | if (err) | 4984 | if (err) |
4970 | return err; | 4985 | return err; |
4971 | if (objects && !kmem_cache_has_cpu_partial(s)) | 4986 | if (objects && !kmem_cache_has_cpu_partial(s)) |
@@ -5081,7 +5096,7 @@ SLAB_ATTR_RO(cache_dma); | |||
5081 | 5096 | ||
5082 | static ssize_t usersize_show(struct kmem_cache *s, char *buf) | 5097 | static ssize_t usersize_show(struct kmem_cache *s, char *buf) |
5083 | { | 5098 | { |
5084 | return sprintf(buf, "%zu\n", s->usersize); | 5099 | return sprintf(buf, "%u\n", s->usersize); |
5085 | } | 5100 | } |
5086 | SLAB_ATTR_RO(usersize); | 5101 | SLAB_ATTR_RO(usersize); |
5087 | 5102 | ||
@@ -5093,7 +5108,7 @@ SLAB_ATTR_RO(destroy_by_rcu); | |||
5093 | 5108 | ||
5094 | static ssize_t reserved_show(struct kmem_cache *s, char *buf) | 5109 | static ssize_t reserved_show(struct kmem_cache *s, char *buf) |
5095 | { | 5110 | { |
5096 | return sprintf(buf, "%d\n", s->reserved); | 5111 | return sprintf(buf, "%u\n", s->reserved); |
5097 | } | 5112 | } |
5098 | SLAB_ATTR_RO(reserved); | 5113 | SLAB_ATTR_RO(reserved); |
5099 | 5114 | ||
@@ -5288,21 +5303,22 @@ SLAB_ATTR(shrink); | |||
5288 | #ifdef CONFIG_NUMA | 5303 | #ifdef CONFIG_NUMA |
5289 | static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) | 5304 | static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) |
5290 | { | 5305 | { |
5291 | return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10); | 5306 | return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10); |
5292 | } | 5307 | } |
5293 | 5308 | ||
5294 | static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | 5309 | static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, |
5295 | const char *buf, size_t length) | 5310 | const char *buf, size_t length) |
5296 | { | 5311 | { |
5297 | unsigned long ratio; | 5312 | unsigned int ratio; |
5298 | int err; | 5313 | int err; |
5299 | 5314 | ||
5300 | err = kstrtoul(buf, 10, &ratio); | 5315 | err = kstrtouint(buf, 10, &ratio); |
5301 | if (err) | 5316 | if (err) |
5302 | return err; | 5317 | return err; |
5318 | if (ratio > 100) | ||
5319 | return -ERANGE; | ||
5303 | 5320 | ||
5304 | if (ratio <= 100) | 5321 | s->remote_node_defrag_ratio = ratio * 10; |
5305 | s->remote_node_defrag_ratio = ratio * 10; | ||
5306 | 5322 | ||
5307 | return length; | 5323 | return length; |
5308 | } | 5324 | } |
@@ -5663,7 +5679,7 @@ static char *create_unique_id(struct kmem_cache *s) | |||
5663 | *p++ = 'A'; | 5679 | *p++ = 'A'; |
5664 | if (p != name + 1) | 5680 | if (p != name + 1) |
5665 | *p++ = '-'; | 5681 | *p++ = '-'; |
5666 | p += sprintf(p, "%07d", s->size); | 5682 | p += sprintf(p, "%07u", s->size); |
5667 | 5683 | ||
5668 | BUG_ON(p > name + ID_STR_LENGTH - 1); | 5684 | BUG_ON(p > name + ID_STR_LENGTH - 1); |
5669 | return name; | 5685 | return name; |
diff --git a/mm/sparse.c b/mm/sparse.c index 58cab483e81b..62eef264a7bd 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -779,7 +779,13 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, | |||
779 | goto out; | 779 | goto out; |
780 | } | 780 | } |
781 | 781 | ||
782 | memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION); | 782 | #ifdef CONFIG_DEBUG_VM |
783 | /* | ||
784 | * Poison uninitialized struct pages in order to catch invalid flags | ||
785 | * combinations. | ||
786 | */ | ||
787 | memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION); | ||
788 | #endif | ||
783 | 789 | ||
784 | section_mark_present(ms); | 790 | section_mark_present(ms); |
785 | 791 | ||
@@ -707,7 +707,6 @@ void lru_add_drain_all(void) | |||
707 | * release_pages - batched put_page() | 707 | * release_pages - batched put_page() |
708 | * @pages: array of pages to release | 708 | * @pages: array of pages to release |
709 | * @nr: number of pages | 709 | * @nr: number of pages |
710 | * @cold: whether the pages are cache cold | ||
711 | * | 710 | * |
712 | * Decrement the reference count on all the pages in @pages. If it | 711 | * Decrement the reference count on all the pages in @pages. If it |
713 | * fell to zero, remove the page from the LRU and free it. | 712 | * fell to zero, remove the page from the LRU and free it. |
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index bebc19292018..f2641894f440 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c | |||
@@ -34,8 +34,6 @@ | |||
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | 36 | ||
37 | #ifdef CONFIG_SWAP | ||
38 | |||
39 | static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); | 37 | static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); |
40 | static bool swap_slot_cache_active; | 38 | static bool swap_slot_cache_active; |
41 | bool swap_slot_cache_enabled; | 39 | bool swap_slot_cache_enabled; |
@@ -356,5 +354,3 @@ repeat: | |||
356 | 354 | ||
357 | return entry; | 355 | return entry; |
358 | } | 356 | } |
359 | |||
360 | #endif /* CONFIG_SWAP */ | ||
diff --git a/mm/swap_state.c b/mm/swap_state.c index 39ae7cfad90f..f233dccd3b1b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -38,7 +38,7 @@ static const struct address_space_operations swap_aops = { | |||
38 | 38 | ||
39 | struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; | 39 | struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; |
40 | static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; | 40 | static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; |
41 | bool swap_vma_readahead __read_mostly = true; | 41 | static bool enable_vma_readahead __read_mostly = true; |
42 | 42 | ||
43 | #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) | 43 | #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) |
44 | #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) | 44 | #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) |
@@ -322,6 +322,11 @@ void free_pages_and_swap_cache(struct page **pages, int nr) | |||
322 | release_pages(pagep, nr); | 322 | release_pages(pagep, nr); |
323 | } | 323 | } |
324 | 324 | ||
325 | static inline bool swap_use_vma_readahead(void) | ||
326 | { | ||
327 | return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); | ||
328 | } | ||
329 | |||
325 | /* | 330 | /* |
326 | * Lookup a swap entry in the swap cache. A found page will be returned | 331 | * Lookup a swap entry in the swap cache. A found page will be returned |
327 | * unlocked and with its refcount incremented - we rely on the kernel | 332 | * unlocked and with its refcount incremented - we rely on the kernel |
@@ -332,32 +337,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, | |||
332 | unsigned long addr) | 337 | unsigned long addr) |
333 | { | 338 | { |
334 | struct page *page; | 339 | struct page *page; |
335 | unsigned long ra_info; | ||
336 | int win, hits, readahead; | ||
337 | 340 | ||
338 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); | 341 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
339 | 342 | ||
340 | INC_CACHE_INFO(find_total); | 343 | INC_CACHE_INFO(find_total); |
341 | if (page) { | 344 | if (page) { |
345 | bool vma_ra = swap_use_vma_readahead(); | ||
346 | bool readahead; | ||
347 | |||
342 | INC_CACHE_INFO(find_success); | 348 | INC_CACHE_INFO(find_success); |
349 | /* | ||
350 | * At the moment, we don't support PG_readahead for anon THP | ||
351 | * so let's bail out rather than confusing the readahead stat. | ||
352 | */ | ||
343 | if (unlikely(PageTransCompound(page))) | 353 | if (unlikely(PageTransCompound(page))) |
344 | return page; | 354 | return page; |
355 | |||
345 | readahead = TestClearPageReadahead(page); | 356 | readahead = TestClearPageReadahead(page); |
346 | if (vma) { | 357 | if (vma && vma_ra) { |
347 | ra_info = GET_SWAP_RA_VAL(vma); | 358 | unsigned long ra_val; |
348 | win = SWAP_RA_WIN(ra_info); | 359 | int win, hits; |
349 | hits = SWAP_RA_HITS(ra_info); | 360 | |
361 | ra_val = GET_SWAP_RA_VAL(vma); | ||
362 | win = SWAP_RA_WIN(ra_val); | ||
363 | hits = SWAP_RA_HITS(ra_val); | ||
350 | if (readahead) | 364 | if (readahead) |
351 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); | 365 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); |
352 | atomic_long_set(&vma->swap_readahead_info, | 366 | atomic_long_set(&vma->swap_readahead_info, |
353 | SWAP_RA_VAL(addr, win, hits)); | 367 | SWAP_RA_VAL(addr, win, hits)); |
354 | } | 368 | } |
369 | |||
355 | if (readahead) { | 370 | if (readahead) { |
356 | count_vm_event(SWAP_RA_HIT); | 371 | count_vm_event(SWAP_RA_HIT); |
357 | if (!vma) | 372 | if (!vma || !vma_ra) |
358 | atomic_inc(&swapin_readahead_hits); | 373 | atomic_inc(&swapin_readahead_hits); |
359 | } | 374 | } |
360 | } | 375 | } |
376 | |||
361 | return page; | 377 | return page; |
362 | } | 378 | } |
363 | 379 | ||
@@ -533,11 +549,10 @@ static unsigned long swapin_nr_pages(unsigned long offset) | |||
533 | } | 549 | } |
534 | 550 | ||
535 | /** | 551 | /** |
536 | * swapin_readahead - swap in pages in hope we need them soon | 552 | * swap_cluster_readahead - swap in pages in hope we need them soon |
537 | * @entry: swap entry of this memory | 553 | * @entry: swap entry of this memory |
538 | * @gfp_mask: memory allocation flags | 554 | * @gfp_mask: memory allocation flags |
539 | * @vma: user vma this address belongs to | 555 | * @vmf: fault information |
540 | * @addr: target address for mempolicy | ||
541 | * | 556 | * |
542 | * Returns the struct page for entry and addr, after queueing swapin. | 557 | * Returns the struct page for entry and addr, after queueing swapin. |
543 | * | 558 | * |
@@ -549,10 +564,10 @@ static unsigned long swapin_nr_pages(unsigned long offset) | |||
549 | * This has been extended to use the NUMA policies from the mm triggering | 564 | * This has been extended to use the NUMA policies from the mm triggering |
550 | * the readahead. | 565 | * the readahead. |
551 | * | 566 | * |
552 | * Caller must hold down_read on the vma->vm_mm if vma is not NULL. | 567 | * Caller must hold down_read on the vma->vm_mm if vmf->vma is not NULL. |
553 | */ | 568 | */ |
554 | struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | 569 | struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, |
555 | struct vm_area_struct *vma, unsigned long addr) | 570 | struct vm_fault *vmf) |
556 | { | 571 | { |
557 | struct page *page; | 572 | struct page *page; |
558 | unsigned long entry_offset = swp_offset(entry); | 573 | unsigned long entry_offset = swp_offset(entry); |
@@ -562,6 +577,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
562 | struct swap_info_struct *si = swp_swap_info(entry); | 577 | struct swap_info_struct *si = swp_swap_info(entry); |
563 | struct blk_plug plug; | 578 | struct blk_plug plug; |
564 | bool do_poll = true, page_allocated; | 579 | bool do_poll = true, page_allocated; |
580 | struct vm_area_struct *vma = vmf->vma; | ||
581 | unsigned long addr = vmf->address; | ||
565 | 582 | ||
566 | mask = swapin_nr_pages(offset) - 1; | 583 | mask = swapin_nr_pages(offset) - 1; |
567 | if (!mask) | 584 | if (!mask) |
@@ -586,8 +603,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
586 | continue; | 603 | continue; |
587 | if (page_allocated) { | 604 | if (page_allocated) { |
588 | swap_readpage(page, false); | 605 | swap_readpage(page, false); |
589 | if (offset != entry_offset && | 606 | if (offset != entry_offset) { |
590 | likely(!PageTransCompound(page))) { | ||
591 | SetPageReadahead(page); | 607 | SetPageReadahead(page); |
592 | count_vm_event(SWAP_RA); | 608 | count_vm_event(SWAP_RA); |
593 | } | 609 | } |
@@ -649,16 +665,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, | |||
649 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); | 665 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); |
650 | } | 666 | } |
651 | 667 | ||
652 | struct page *swap_readahead_detect(struct vm_fault *vmf, | 668 | static void swap_ra_info(struct vm_fault *vmf, |
653 | struct vma_swap_readahead *swap_ra) | 669 | struct vma_swap_readahead *ra_info) |
654 | { | 670 | { |
655 | struct vm_area_struct *vma = vmf->vma; | 671 | struct vm_area_struct *vma = vmf->vma; |
656 | unsigned long swap_ra_info; | 672 | unsigned long ra_val; |
657 | struct page *page; | ||
658 | swp_entry_t entry; | 673 | swp_entry_t entry; |
659 | unsigned long faddr, pfn, fpfn; | 674 | unsigned long faddr, pfn, fpfn; |
660 | unsigned long start, end; | 675 | unsigned long start, end; |
661 | pte_t *pte; | 676 | pte_t *pte, *orig_pte; |
662 | unsigned int max_win, hits, prev_win, win, left; | 677 | unsigned int max_win, hits, prev_win, win, left; |
663 | #ifndef CONFIG_64BIT | 678 | #ifndef CONFIG_64BIT |
664 | pte_t *tpte; | 679 | pte_t *tpte; |
@@ -667,30 +682,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
667 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), | 682 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), |
668 | SWAP_RA_ORDER_CEILING); | 683 | SWAP_RA_ORDER_CEILING); |
669 | if (max_win == 1) { | 684 | if (max_win == 1) { |
670 | swap_ra->win = 1; | 685 | ra_info->win = 1; |
671 | return NULL; | 686 | return; |
672 | } | 687 | } |
673 | 688 | ||
674 | faddr = vmf->address; | 689 | faddr = vmf->address; |
675 | entry = pte_to_swp_entry(vmf->orig_pte); | 690 | orig_pte = pte = pte_offset_map(vmf->pmd, faddr); |
676 | if ((unlikely(non_swap_entry(entry)))) | 691 | entry = pte_to_swp_entry(*pte); |
677 | return NULL; | 692 | if ((unlikely(non_swap_entry(entry)))) { |
678 | page = lookup_swap_cache(entry, vma, faddr); | 693 | pte_unmap(orig_pte); |
679 | if (page) | 694 | return; |
680 | return page; | 695 | } |
681 | 696 | ||
682 | fpfn = PFN_DOWN(faddr); | 697 | fpfn = PFN_DOWN(faddr); |
683 | swap_ra_info = GET_SWAP_RA_VAL(vma); | 698 | ra_val = GET_SWAP_RA_VAL(vma); |
684 | pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); | 699 | pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); |
685 | prev_win = SWAP_RA_WIN(swap_ra_info); | 700 | prev_win = SWAP_RA_WIN(ra_val); |
686 | hits = SWAP_RA_HITS(swap_ra_info); | 701 | hits = SWAP_RA_HITS(ra_val); |
687 | swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, | 702 | ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, |
688 | max_win, prev_win); | 703 | max_win, prev_win); |
689 | atomic_long_set(&vma->swap_readahead_info, | 704 | atomic_long_set(&vma->swap_readahead_info, |
690 | SWAP_RA_VAL(faddr, win, 0)); | 705 | SWAP_RA_VAL(faddr, win, 0)); |
691 | 706 | ||
692 | if (win == 1) | 707 | if (win == 1) { |
693 | return NULL; | 708 | pte_unmap(orig_pte); |
709 | return; | ||
710 | } | ||
694 | 711 | ||
695 | /* Copy the PTEs because the page table may be unmapped */ | 712 | /* Copy the PTEs because the page table may be unmapped */ |
696 | if (fpfn == pfn + 1) | 713 | if (fpfn == pfn + 1) |
@@ -703,23 +720,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
703 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, | 720 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, |
704 | &start, &end); | 721 | &start, &end); |
705 | } | 722 | } |
706 | swap_ra->nr_pte = end - start; | 723 | ra_info->nr_pte = end - start; |
707 | swap_ra->offset = fpfn - start; | 724 | ra_info->offset = fpfn - start; |
708 | pte = vmf->pte - swap_ra->offset; | 725 | pte -= ra_info->offset; |
709 | #ifdef CONFIG_64BIT | 726 | #ifdef CONFIG_64BIT |
710 | swap_ra->ptes = pte; | 727 | ra_info->ptes = pte; |
711 | #else | 728 | #else |
712 | tpte = swap_ra->ptes; | 729 | tpte = ra_info->ptes; |
713 | for (pfn = start; pfn != end; pfn++) | 730 | for (pfn = start; pfn != end; pfn++) |
714 | *tpte++ = *pte++; | 731 | *tpte++ = *pte++; |
715 | #endif | 732 | #endif |
716 | 733 | pte_unmap(orig_pte); | |
717 | return NULL; | ||
718 | } | 734 | } |
719 | 735 | ||
720 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | 736 | static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
721 | struct vm_fault *vmf, | 737 | struct vm_fault *vmf) |
722 | struct vma_swap_readahead *swap_ra) | ||
723 | { | 738 | { |
724 | struct blk_plug plug; | 739 | struct blk_plug plug; |
725 | struct vm_area_struct *vma = vmf->vma; | 740 | struct vm_area_struct *vma = vmf->vma; |
@@ -728,12 +743,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
728 | swp_entry_t entry; | 743 | swp_entry_t entry; |
729 | unsigned int i; | 744 | unsigned int i; |
730 | bool page_allocated; | 745 | bool page_allocated; |
746 | struct vma_swap_readahead ra_info = {0,}; | ||
731 | 747 | ||
732 | if (swap_ra->win == 1) | 748 | swap_ra_info(vmf, &ra_info); |
749 | if (ra_info.win == 1) | ||
733 | goto skip; | 750 | goto skip; |
734 | 751 | ||
735 | blk_start_plug(&plug); | 752 | blk_start_plug(&plug); |
736 | for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; | 753 | for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; |
737 | i++, pte++) { | 754 | i++, pte++) { |
738 | pentry = *pte; | 755 | pentry = *pte; |
739 | if (pte_none(pentry)) | 756 | if (pte_none(pentry)) |
@@ -749,8 +766,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
749 | continue; | 766 | continue; |
750 | if (page_allocated) { | 767 | if (page_allocated) { |
751 | swap_readpage(page, false); | 768 | swap_readpage(page, false); |
752 | if (i != swap_ra->offset && | 769 | if (i != ra_info.offset) { |
753 | likely(!PageTransCompound(page))) { | ||
754 | SetPageReadahead(page); | 770 | SetPageReadahead(page); |
755 | count_vm_event(SWAP_RA); | 771 | count_vm_event(SWAP_RA); |
756 | } | 772 | } |
@@ -761,23 +777,43 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
761 | lru_add_drain(); | 777 | lru_add_drain(); |
762 | skip: | 778 | skip: |
763 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, | 779 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, |
764 | swap_ra->win == 1); | 780 | ra_info.win == 1); |
781 | } | ||
782 | |||
783 | /** | ||
784 | * swapin_readahead - swap in pages in hope we need them soon | ||
785 | * @entry: swap entry of this memory | ||
786 | * @gfp_mask: memory allocation flags | ||
787 | * @vmf: fault information | ||
788 | * | ||
789 | * Returns the struct page for entry and addr, after queueing swapin. | ||
790 | * | ||
791 | * It's a main entry function for swap readahead. By the configuration, | ||
792 | * it will read ahead blocks by cluster-based(ie, physical disk based) | ||
793 | * or vma-based(ie, virtual address based on faulty address) readahead. | ||
794 | */ | ||
795 | struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | ||
796 | struct vm_fault *vmf) | ||
797 | { | ||
798 | return swap_use_vma_readahead() ? | ||
799 | swap_vma_readahead(entry, gfp_mask, vmf) : | ||
800 | swap_cluster_readahead(entry, gfp_mask, vmf); | ||
765 | } | 801 | } |
766 | 802 | ||
767 | #ifdef CONFIG_SYSFS | 803 | #ifdef CONFIG_SYSFS |
768 | static ssize_t vma_ra_enabled_show(struct kobject *kobj, | 804 | static ssize_t vma_ra_enabled_show(struct kobject *kobj, |
769 | struct kobj_attribute *attr, char *buf) | 805 | struct kobj_attribute *attr, char *buf) |
770 | { | 806 | { |
771 | return sprintf(buf, "%s\n", swap_vma_readahead ? "true" : "false"); | 807 | return sprintf(buf, "%s\n", enable_vma_readahead ? "true" : "false"); |
772 | } | 808 | } |
773 | static ssize_t vma_ra_enabled_store(struct kobject *kobj, | 809 | static ssize_t vma_ra_enabled_store(struct kobject *kobj, |
774 | struct kobj_attribute *attr, | 810 | struct kobj_attribute *attr, |
775 | const char *buf, size_t count) | 811 | const char *buf, size_t count) |
776 | { | 812 | { |
777 | if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) | 813 | if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) |
778 | swap_vma_readahead = true; | 814 | enable_vma_readahead = true; |
779 | else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) | 815 | else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) |
780 | swap_vma_readahead = false; | 816 | enable_vma_readahead = false; |
781 | else | 817 | else |
782 | return -EINVAL; | 818 | return -EINVAL; |
783 | 819 | ||
@@ -515,6 +515,16 @@ struct address_space *page_mapping(struct page *page) | |||
515 | } | 515 | } |
516 | EXPORT_SYMBOL(page_mapping); | 516 | EXPORT_SYMBOL(page_mapping); |
517 | 517 | ||
518 | /* | ||
519 | * For file cache pages, return the address_space, otherwise return NULL | ||
520 | */ | ||
521 | struct address_space *page_mapping_file(struct page *page) | ||
522 | { | ||
523 | if (unlikely(PageSwapCache(page))) | ||
524 | return NULL; | ||
525 | return page_mapping(page); | ||
526 | } | ||
527 | |||
518 | /* Slow path of page_mapcount() for compound pages */ | 528 | /* Slow path of page_mapcount() for compound pages */ |
519 | int __page_mapcount(struct page *page) | 529 | int __page_mapcount(struct page *page) |
520 | { | 530 | { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index cd5dc3faaa57..4390a8d5be41 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -442,16 +442,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, | |||
442 | if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))) | 442 | if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))) |
443 | return 0; | 443 | return 0; |
444 | 444 | ||
445 | if (!down_read_trylock(&shrinker_rwsem)) { | 445 | if (!down_read_trylock(&shrinker_rwsem)) |
446 | /* | ||
447 | * If we would return 0, our callers would understand that we | ||
448 | * have nothing else to shrink and give up trying. By returning | ||
449 | * 1 we keep it going and assume we'll be able to shrink next | ||
450 | * time. | ||
451 | */ | ||
452 | freed = 1; | ||
453 | goto out; | 446 | goto out; |
454 | } | ||
455 | 447 | ||
456 | list_for_each_entry(shrinker, &shrinker_list, list) { | 448 | list_for_each_entry(shrinker, &shrinker_list, list) { |
457 | struct shrink_control sc = { | 449 | struct shrink_control sc = { |
@@ -3547,16 +3539,21 @@ kswapd_try_sleep: | |||
3547 | } | 3539 | } |
3548 | 3540 | ||
3549 | /* | 3541 | /* |
3550 | * A zone is low on free memory, so wake its kswapd task to service it. | 3542 | * A zone is low on free memory or too fragmented for high-order memory. If |
3543 | * kswapd should reclaim (direct reclaim is deferred), wake it up for the zone's | ||
3544 | * pgdat. It will wake up kcompactd after reclaiming memory. If kswapd reclaim | ||
3545 | * has failed or is not needed, still wake up kcompactd if only compaction is | ||
3546 | * needed. | ||
3551 | */ | 3547 | */ |
3552 | void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | 3548 | void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, |
3549 | enum zone_type classzone_idx) | ||
3553 | { | 3550 | { |
3554 | pg_data_t *pgdat; | 3551 | pg_data_t *pgdat; |
3555 | 3552 | ||
3556 | if (!managed_zone(zone)) | 3553 | if (!managed_zone(zone)) |
3557 | return; | 3554 | return; |
3558 | 3555 | ||
3559 | if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) | 3556 | if (!cpuset_zone_allowed(zone, gfp_flags)) |
3560 | return; | 3557 | return; |
3561 | pgdat = zone->zone_pgdat; | 3558 | pgdat = zone->zone_pgdat; |
3562 | pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, | 3559 | pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, |
@@ -3565,14 +3562,23 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | |||
3565 | if (!waitqueue_active(&pgdat->kswapd_wait)) | 3562 | if (!waitqueue_active(&pgdat->kswapd_wait)) |
3566 | return; | 3563 | return; |
3567 | 3564 | ||
3568 | /* Hopeless node, leave it to direct reclaim */ | 3565 | /* Hopeless node, leave it to direct reclaim if possible */ |
3569 | if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) | 3566 | if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || |
3570 | return; | 3567 | pgdat_balanced(pgdat, order, classzone_idx)) { |
3571 | 3568 | /* | |
3572 | if (pgdat_balanced(pgdat, order, classzone_idx)) | 3569 | * There may be plenty of free memory available, but it's too |
3570 | * fragmented for high-order allocations. Wake up kcompactd | ||
3571 | * and rely on compaction_suitable() to determine if it's | ||
3572 | * needed. If it fails, it will defer subsequent attempts to | ||
3573 | * ratelimit its work. | ||
3574 | */ | ||
3575 | if (!(gfp_flags & __GFP_DIRECT_RECLAIM)) | ||
3576 | wakeup_kcompactd(pgdat, order, classzone_idx); | ||
3573 | return; | 3577 | return; |
3578 | } | ||
3574 | 3579 | ||
3575 | trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order); | 3580 | trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order, |
3581 | gfp_flags); | ||
3576 | wake_up_interruptible(&pgdat->kswapd_wait); | 3582 | wake_up_interruptible(&pgdat->kswapd_wait); |
3577 | } | 3583 | } |
3578 | 3584 | ||
@@ -3877,7 +3883,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) | |||
3877 | */ | 3883 | */ |
3878 | int page_evictable(struct page *page) | 3884 | int page_evictable(struct page *page) |
3879 | { | 3885 | { |
3880 | return !mapping_unevictable(page_mapping(page)) && !PageMlocked(page); | 3886 | int ret; |
3887 | |||
3888 | /* Prevent address_space of inode and swap cache from being freed */ | ||
3889 | rcu_read_lock(); | ||
3890 | ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page); | ||
3891 | rcu_read_unlock(); | ||
3892 | return ret; | ||
3881 | } | 3893 | } |
3882 | 3894 | ||
3883 | #ifdef CONFIG_SHMEM | 3895 | #ifdef CONFIG_SHMEM |
diff --git a/mm/z3fold.c b/mm/z3fold.c index d589d318727f..f579ad4a8100 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
@@ -620,24 +620,27 @@ lookup: | |||
620 | bud = FIRST; | 620 | bud = FIRST; |
621 | } | 621 | } |
622 | 622 | ||
623 | spin_lock(&pool->stale_lock); | 623 | page = NULL; |
624 | zhdr = list_first_entry_or_null(&pool->stale, | 624 | if (can_sleep) { |
625 | struct z3fold_header, buddy); | 625 | spin_lock(&pool->stale_lock); |
626 | /* | 626 | zhdr = list_first_entry_or_null(&pool->stale, |
627 | * Before allocating a page, let's see if we can take one from the | 627 | struct z3fold_header, buddy); |
628 | * stale pages list. cancel_work_sync() can sleep so we must make | 628 | /* |
629 | * sure it won't be called in case we're in atomic context. | 629 | * Before allocating a page, let's see if we can take one from |
630 | */ | 630 | * the stale pages list. cancel_work_sync() can sleep so we |
631 | if (zhdr && (can_sleep || !work_pending(&zhdr->work))) { | 631 | * limit this case to the contexts where we can sleep |
632 | list_del(&zhdr->buddy); | 632 | */ |
633 | spin_unlock(&pool->stale_lock); | 633 | if (zhdr) { |
634 | if (can_sleep) | 634 | list_del(&zhdr->buddy); |
635 | spin_unlock(&pool->stale_lock); | ||
635 | cancel_work_sync(&zhdr->work); | 636 | cancel_work_sync(&zhdr->work); |
636 | page = virt_to_page(zhdr); | 637 | page = virt_to_page(zhdr); |
637 | } else { | 638 | } else { |
638 | spin_unlock(&pool->stale_lock); | 639 | spin_unlock(&pool->stale_lock); |
639 | page = alloc_page(gfp); | 640 | } |
640 | } | 641 | } |
642 | if (!page) | ||
643 | page = alloc_page(gfp); | ||
641 | 644 | ||
642 | if (!page) | 645 | if (!page) |
643 | return -ENOMEM; | 646 | return -ENOMEM; |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b7f61cd1c709..61cb05dc950c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -193,6 +193,7 @@ static struct vfsmount *zsmalloc_mnt; | |||
193 | * (see: fix_fullness_group()) | 193 | * (see: fix_fullness_group()) |
194 | */ | 194 | */ |
195 | static const int fullness_threshold_frac = 4; | 195 | static const int fullness_threshold_frac = 4; |
196 | static size_t huge_class_size; | ||
196 | 197 | ||
197 | struct size_class { | 198 | struct size_class { |
198 | spinlock_t lock; | 199 | spinlock_t lock; |
@@ -642,18 +643,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v) | |||
642 | 643 | ||
643 | return 0; | 644 | return 0; |
644 | } | 645 | } |
645 | 646 | DEFINE_SHOW_ATTRIBUTE(zs_stats_size); | |
646 | static int zs_stats_size_open(struct inode *inode, struct file *file) | ||
647 | { | ||
648 | return single_open(file, zs_stats_size_show, inode->i_private); | ||
649 | } | ||
650 | |||
651 | static const struct file_operations zs_stat_size_ops = { | ||
652 | .open = zs_stats_size_open, | ||
653 | .read = seq_read, | ||
654 | .llseek = seq_lseek, | ||
655 | .release = single_release, | ||
656 | }; | ||
657 | 647 | ||
658 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) | 648 | static void zs_pool_stat_create(struct zs_pool *pool, const char *name) |
659 | { | 649 | { |
@@ -672,7 +662,7 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name) | |||
672 | pool->stat_dentry = entry; | 662 | pool->stat_dentry = entry; |
673 | 663 | ||
674 | entry = debugfs_create_file("classes", S_IFREG | S_IRUGO, | 664 | entry = debugfs_create_file("classes", S_IFREG | S_IRUGO, |
675 | pool->stat_dentry, pool, &zs_stat_size_ops); | 665 | pool->stat_dentry, pool, &zs_stats_size_fops); |
676 | if (!entry) { | 666 | if (!entry) { |
677 | pr_warn("%s: debugfs file entry <%s> creation failed\n", | 667 | pr_warn("%s: debugfs file entry <%s> creation failed\n", |
678 | name, "classes"); | 668 | name, "classes"); |
@@ -861,6 +851,7 @@ static struct page *get_next_page(struct page *page) | |||
861 | 851 | ||
862 | /** | 852 | /** |
863 | * obj_to_location - get (<page>, <obj_idx>) from encoded object value | 853 | * obj_to_location - get (<page>, <obj_idx>) from encoded object value |
854 | * @obj: the encoded object value | ||
864 | * @page: page object resides in zspage | 855 | * @page: page object resides in zspage |
865 | * @obj_idx: object index | 856 | * @obj_idx: object index |
866 | */ | 857 | */ |
@@ -1311,6 +1302,7 @@ EXPORT_SYMBOL_GPL(zs_get_total_pages); | |||
1311 | * zs_map_object - get address of allocated object from handle. | 1302 | * zs_map_object - get address of allocated object from handle. |
1312 | * @pool: pool from which the object was allocated | 1303 | * @pool: pool from which the object was allocated |
1313 | * @handle: handle returned from zs_malloc | 1304 | * @handle: handle returned from zs_malloc |
1305 | * @mm: maping mode to use | ||
1314 | * | 1306 | * |
1315 | * Before using an object allocated from zs_malloc, it must be mapped using | 1307 | * Before using an object allocated from zs_malloc, it must be mapped using |
1316 | * this function. When done with the object, it must be unmapped using | 1308 | * this function. When done with the object, it must be unmapped using |
@@ -1418,6 +1410,25 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |||
1418 | } | 1410 | } |
1419 | EXPORT_SYMBOL_GPL(zs_unmap_object); | 1411 | EXPORT_SYMBOL_GPL(zs_unmap_object); |
1420 | 1412 | ||
1413 | /** | ||
1414 | * zs_huge_class_size() - Returns the size (in bytes) of the first huge | ||
1415 | * zsmalloc &size_class. | ||
1416 | * @pool: zsmalloc pool to use | ||
1417 | * | ||
1418 | * The function returns the size of the first huge class - any object of equal | ||
1419 | * or bigger size will be stored in zspage consisting of a single physical | ||
1420 | * page. | ||
1421 | * | ||
1422 | * Context: Any context. | ||
1423 | * | ||
1424 | * Return: the size (in bytes) of the first huge zsmalloc &size_class. | ||
1425 | */ | ||
1426 | size_t zs_huge_class_size(struct zs_pool *pool) | ||
1427 | { | ||
1428 | return huge_class_size; | ||
1429 | } | ||
1430 | EXPORT_SYMBOL_GPL(zs_huge_class_size); | ||
1431 | |||
1421 | static unsigned long obj_malloc(struct size_class *class, | 1432 | static unsigned long obj_malloc(struct size_class *class, |
1422 | struct zspage *zspage, unsigned long handle) | 1433 | struct zspage *zspage, unsigned long handle) |
1423 | { | 1434 | { |
@@ -2375,6 +2386,27 @@ struct zs_pool *zs_create_pool(const char *name) | |||
2375 | objs_per_zspage = pages_per_zspage * PAGE_SIZE / size; | 2386 | objs_per_zspage = pages_per_zspage * PAGE_SIZE / size; |
2376 | 2387 | ||
2377 | /* | 2388 | /* |
2389 | * We iterate from biggest down to smallest classes, | ||
2390 | * so huge_class_size holds the size of the first huge | ||
2391 | * class. Any object bigger than or equal to that will | ||
2392 | * endup in the huge class. | ||
2393 | */ | ||
2394 | if (pages_per_zspage != 1 && objs_per_zspage != 1 && | ||
2395 | !huge_class_size) { | ||
2396 | huge_class_size = size; | ||
2397 | /* | ||
2398 | * The object uses ZS_HANDLE_SIZE bytes to store the | ||
2399 | * handle. We need to subtract it, because zs_malloc() | ||
2400 | * unconditionally adds handle size before it performs | ||
2401 | * size class search - so object may be smaller than | ||
2402 | * huge class size, yet it still can end up in the huge | ||
2403 | * class because it grows by ZS_HANDLE_SIZE extra bytes | ||
2404 | * right before class lookup. | ||
2405 | */ | ||
2406 | huge_class_size -= (ZS_HANDLE_SIZE - 1); | ||
2407 | } | ||
2408 | |||
2409 | /* | ||
2378 | * size_class is used for normal zsmalloc operation such | 2410 | * size_class is used for normal zsmalloc operation such |
2379 | * as alloc/free for that size. Although it is natural that we | 2411 | * as alloc/free for that size. Although it is natural that we |
2380 | * have one size_class for each size, there is a chance that we | 2412 | * have one size_class for each size, there is a chance that we |
diff --git a/net/9p/client.c b/net/9p/client.c index b433aff5ff13..21e6df1cc70f 100644 --- a/net/9p/client.c +++ b/net/9p/client.c | |||
@@ -190,7 +190,9 @@ static int parse_opts(char *opts, struct p9_client *clnt) | |||
190 | p9_debug(P9_DEBUG_ERROR, | 190 | p9_debug(P9_DEBUG_ERROR, |
191 | "problem allocating copy of trans arg\n"); | 191 | "problem allocating copy of trans arg\n"); |
192 | goto free_and_return; | 192 | goto free_and_return; |
193 | } | 193 | } |
194 | |||
195 | v9fs_put_trans(clnt->trans_mod); | ||
194 | clnt->trans_mod = v9fs_get_trans_by_name(s); | 196 | clnt->trans_mod = v9fs_get_trans_by_name(s); |
195 | if (clnt->trans_mod == NULL) { | 197 | if (clnt->trans_mod == NULL) { |
196 | pr_info("Could not find request transport: %s\n", | 198 | pr_info("Could not find request transport: %s\n", |
@@ -226,6 +228,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) | |||
226 | } | 228 | } |
227 | 229 | ||
228 | free_and_return: | 230 | free_and_return: |
231 | v9fs_put_trans(clnt->trans_mod); | ||
229 | kfree(tmp_options); | 232 | kfree(tmp_options); |
230 | return ret; | 233 | return ret; |
231 | } | 234 | } |
@@ -769,7 +772,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
769 | if (err < 0) { | 772 | if (err < 0) { |
770 | if (err != -ERESTARTSYS && err != -EFAULT) | 773 | if (err != -ERESTARTSYS && err != -EFAULT) |
771 | c->status = Disconnected; | 774 | c->status = Disconnected; |
772 | goto reterr; | 775 | goto recalc_sigpending; |
773 | } | 776 | } |
774 | again: | 777 | again: |
775 | /* Wait for the response */ | 778 | /* Wait for the response */ |
@@ -804,6 +807,7 @@ again: | |||
804 | if (req->status == REQ_STATUS_RCVD) | 807 | if (req->status == REQ_STATUS_RCVD) |
805 | err = 0; | 808 | err = 0; |
806 | } | 809 | } |
810 | recalc_sigpending: | ||
807 | if (sigpending) { | 811 | if (sigpending) { |
808 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 812 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
809 | recalc_sigpending(); | 813 | recalc_sigpending(); |
@@ -867,7 +871,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, | |||
867 | if (err == -EIO) | 871 | if (err == -EIO) |
868 | c->status = Disconnected; | 872 | c->status = Disconnected; |
869 | if (err != -ERESTARTSYS) | 873 | if (err != -ERESTARTSYS) |
870 | goto reterr; | 874 | goto recalc_sigpending; |
871 | } | 875 | } |
872 | if (req->status == REQ_STATUS_ERROR) { | 876 | if (req->status == REQ_STATUS_ERROR) { |
873 | p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); | 877 | p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); |
@@ -885,6 +889,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, | |||
885 | if (req->status == REQ_STATUS_RCVD) | 889 | if (req->status == REQ_STATUS_RCVD) |
886 | err = 0; | 890 | err = 0; |
887 | } | 891 | } |
892 | recalc_sigpending: | ||
888 | if (sigpending) { | 893 | if (sigpending) { |
889 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 894 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
890 | recalc_sigpending(); | 895 | recalc_sigpending(); |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b3b609f0eeb5..b1a2c5e38530 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/vmalloc.h> | 15 | #include <linux/vmalloc.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/kmemleak.h> | ||
19 | 18 | ||
20 | #include <net/ip.h> | 19 | #include <net/ip.h> |
21 | #include <net/sock.h> | 20 | #include <net/sock.h> |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8322e479f299..594a1c605c92 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -108,7 +108,6 @@ | |||
108 | #include <net/rtnetlink.h> | 108 | #include <net/rtnetlink.h> |
109 | #ifdef CONFIG_SYSCTL | 109 | #ifdef CONFIG_SYSCTL |
110 | #include <linux/sysctl.h> | 110 | #include <linux/sysctl.h> |
111 | #include <linux/kmemleak.h> | ||
112 | #endif | 111 | #endif |
113 | #include <net/secure_seq.h> | 112 | #include <net/secure_seq.h> |
114 | #include <net/ip_tunnels.h> | 113 | #include <net/ip_tunnels.h> |
diff --git a/scripts/faddr2line b/scripts/faddr2line index 7721d5b2b0c0..9e5735a4d3a5 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line | |||
@@ -163,7 +163,17 @@ __faddr2line() { | |||
163 | 163 | ||
164 | # pass real address to addr2line | 164 | # pass real address to addr2line |
165 | echo "$func+$offset/$sym_size:" | 165 | echo "$func+$offset/$sym_size:" |
166 | ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" | 166 | local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;") |
167 | [[ -z $file_lines ]] && return | ||
168 | |||
169 | # show each line with context | ||
170 | echo "$file_lines" | while read -r line | ||
171 | do | ||
172 | echo $line | ||
173 | eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') | ||
174 | awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f | ||
175 | done | ||
176 | |||
167 | DONE=1 | 177 | DONE=1 |
168 | 178 | ||
169 | done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') | 179 | done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') |
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 9a65eeaf7dfa..6134302c143c 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/sysctl.h> | 23 | #include <linux/sysctl.h> |
24 | #include <linux/audit.h> | 24 | #include <linux/audit.h> |
25 | #include <linux/user_namespace.h> | 25 | #include <linux/user_namespace.h> |
26 | #include <linux/kmemleak.h> | ||
27 | #include <net/sock.h> | 26 | #include <net/sock.h> |
28 | 27 | ||
29 | #include "include/apparmor.h" | 28 | #include "include/apparmor.h" |
diff --git a/security/keys/big_key.c b/security/keys/big_key.c index fa728f662a6f..933623784ccd 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
19 | #include <linux/scatterlist.h> | 19 | #include <linux/scatterlist.h> |
20 | #include <linux/random.h> | 20 | #include <linux/random.h> |
21 | #include <linux/vmalloc.h> | ||
21 | #include <keys/user-type.h> | 22 | #include <keys/user-type.h> |
22 | #include <keys/big_key-type.h> | 23 | #include <keys/big_key-type.h> |
23 | #include <crypto/aead.h> | 24 | #include <crypto/aead.h> |