diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 22:42:40 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 22:42:40 -0500 |
commit | 7c225c69f86c934e3be9be63ecde754e286838d7 (patch) | |
tree | ff2df419b0c4886b37407235f7d21215e4cf45e4 | |
parent | 6363b3f3ac5be096d08c8c504128befa0c033529 (diff) | |
parent | 1b7176aea0a924ac59c6a283129d3e8eb00aa915 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- a few misc bits
- ocfs2 updates
- almost all of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (131 commits)
memory hotplug: fix comments when adding section
mm: make alloc_node_mem_map a void call if we don't have CONFIG_FLAT_NODE_MEM_MAP
mm: simplify nodemask printing
mm,oom_reaper: remove pointless kthread_run() error check
mm/page_ext.c: check if page_ext is not prepared
writeback: remove unused function parameter
mm: do not rely on preempt_count in print_vma_addr
mm, sparse: do not swamp log with huge vmemmap allocation failures
mm/hmm: remove redundant variable align_end
mm/list_lru.c: mark expected switch fall-through
mm/shmem.c: mark expected switch fall-through
mm/page_alloc.c: broken deferred calculation
mm: don't warn about allocations which stall for too long
fs: fuse: account fuse_inode slab memory as reclaimable
mm, page_alloc: fix potential false positive in __zone_watermark_ok
mm: mlock: remove lru_add_drain_all()
mm, sysctl: make NUMA stats configurable
shmem: convert shmem_init_inodecache() to void
Unify migrate_pages and move_pages access checks
mm, pagevec: rename pagevec drained field
...
250 files changed, 2276 insertions, 4084 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b74e13312fdc..00bb04972612 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -1864,13 +1864,6 @@ | |||
1864 | Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, | 1864 | Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, |
1865 | the default is off. | 1865 | the default is off. |
1866 | 1866 | ||
1867 | kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode | ||
1868 | Valid arguments: 0, 1, 2 | ||
1869 | kmemcheck=0 (disabled) | ||
1870 | kmemcheck=1 (enabled) | ||
1871 | kmemcheck=2 (one-shot mode) | ||
1872 | Default: 2 (one-shot mode) | ||
1873 | |||
1874 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | 1867 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. |
1875 | Default is 0 (don't ignore, but inject #GP) | 1868 | Default is 0 (don't ignore, but inject #GP) |
1876 | 1869 | ||
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index a81787cd47d7..e313925fb0fa 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst | |||
@@ -21,7 +21,6 @@ whole; patches welcome! | |||
21 | kasan | 21 | kasan |
22 | ubsan | 22 | ubsan |
23 | kmemleak | 23 | kmemleak |
24 | kmemcheck | ||
25 | gdb-kernel-debugging | 24 | gdb-kernel-debugging |
26 | kgdb | 25 | kgdb |
27 | kselftest | 26 | kselftest |
diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst deleted file mode 100644 index 7f3d1985de74..000000000000 --- a/Documentation/dev-tools/kmemcheck.rst +++ /dev/null | |||
@@ -1,733 +0,0 @@ | |||
1 | Getting started with kmemcheck | ||
2 | ============================== | ||
3 | |||
4 | Vegard Nossum <vegardno@ifi.uio.no> | ||
5 | |||
6 | |||
7 | Introduction | ||
8 | ------------ | ||
9 | |||
10 | kmemcheck is a debugging feature for the Linux Kernel. More specifically, it | ||
11 | is a dynamic checker that detects and warns about some uses of uninitialized | ||
12 | memory. | ||
13 | |||
14 | Userspace programmers might be familiar with Valgrind's memcheck. The main | ||
15 | difference between memcheck and kmemcheck is that memcheck works for userspace | ||
16 | programs only, and kmemcheck works for the kernel only. The implementations | ||
17 | are of course vastly different. Because of this, kmemcheck is not as accurate | ||
18 | as memcheck, but it turns out to be good enough in practice to discover real | ||
19 | programmer errors that the compiler is not able to find through static | ||
20 | analysis. | ||
21 | |||
22 | Enabling kmemcheck on a kernel will probably slow it down to the extent that | ||
23 | the machine will not be usable for normal workloads such as e.g. an | ||
24 | interactive desktop. kmemcheck will also cause the kernel to use about twice | ||
25 | as much memory as normal. For this reason, kmemcheck is strictly a debugging | ||
26 | feature. | ||
27 | |||
28 | |||
29 | Downloading | ||
30 | ----------- | ||
31 | |||
32 | As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel. | ||
33 | |||
34 | |||
35 | Configuring and compiling | ||
36 | ------------------------- | ||
37 | |||
38 | kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of | ||
39 | configuration variables must have specific settings in order for the kmemcheck | ||
40 | menu to even appear in "menuconfig". These are: | ||
41 | |||
42 | - ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n`` | ||
43 | This option is located under "General setup" / "Optimize for size". | ||
44 | |||
45 | Without this, gcc will use certain optimizations that usually lead to | ||
46 | false positive warnings from kmemcheck. An example of this is a 16-bit | ||
47 | field in a struct, where gcc may load 32 bits, then discard the upper | ||
48 | 16 bits. kmemcheck sees only the 32-bit load, and may trigger a | ||
49 | warning for the upper 16 bits (if they're uninitialized). | ||
50 | |||
51 | - ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y`` | ||
52 | This option is located under "General setup" / "Choose SLAB | ||
53 | allocator". | ||
54 | |||
55 | - ``CONFIG_FUNCTION_TRACER=n`` | ||
56 | This option is located under "Kernel hacking" / "Tracers" / "Kernel | ||
57 | Function Tracer" | ||
58 | |||
59 | When function tracing is compiled in, gcc emits a call to another | ||
60 | function at the beginning of every function. This means that when the | ||
61 | page fault handler is called, the ftrace framework will be called | ||
62 | before kmemcheck has had a chance to handle the fault. If ftrace then | ||
63 | modifies memory that was tracked by kmemcheck, the result is an | ||
64 | endless recursive page fault. | ||
65 | |||
66 | - ``CONFIG_DEBUG_PAGEALLOC=n`` | ||
67 | This option is located under "Kernel hacking" / "Memory Debugging" | ||
68 | / "Debug page memory allocations". | ||
69 | |||
70 | In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also | ||
71 | located under "Kernel hacking". With this, you will be able to get line number | ||
72 | information from the kmemcheck warnings, which is extremely valuable in | ||
73 | debugging a problem. This option is not mandatory, however, because it slows | ||
74 | down the compilation process and produces a much bigger kernel image. | ||
75 | |||
76 | Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory | ||
77 | Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows | ||
78 | a description of the kmemcheck configuration variables: | ||
79 | |||
80 | - ``CONFIG_KMEMCHECK`` | ||
81 | This must be enabled in order to use kmemcheck at all... | ||
82 | |||
83 | - ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT`` | ||
84 | This option controls the status of kmemcheck at boot-time. "Enabled" | ||
85 | will enable kmemcheck right from the start, "disabled" will boot the | ||
86 | kernel as normal (but with the kmemcheck code compiled in, so it can | ||
87 | be enabled at run-time after the kernel has booted), and "one-shot" is | ||
88 | a special mode which will turn kmemcheck off automatically after | ||
89 | detecting the first use of uninitialized memory. | ||
90 | |||
91 | If you are using kmemcheck to actively debug a problem, then you | ||
92 | probably want to choose "enabled" here. | ||
93 | |||
94 | The one-shot mode is mostly useful in automated test setups because it | ||
95 | can prevent floods of warnings and increase the chances of the machine | ||
96 | surviving in case something is really wrong. In other cases, the one- | ||
97 | shot mode could actually be counter-productive because it would turn | ||
98 | itself off at the very first error -- in the case of a false positive | ||
99 | too -- and this would come in the way of debugging the specific | ||
100 | problem you were interested in. | ||
101 | |||
102 | If you would like to use your kernel as normal, but with a chance to | ||
103 | enable kmemcheck in case of some problem, it might be a good idea to | ||
104 | choose "disabled" here. When kmemcheck is disabled, most of the run- | ||
105 | time overhead is not incurred, and the kernel will be almost as fast | ||
106 | as normal. | ||
107 | |||
108 | - ``CONFIG_KMEMCHECK_QUEUE_SIZE`` | ||
109 | Select the maximum number of error reports to store in an internal | ||
110 | (fixed-size) buffer. Since errors can occur virtually anywhere and in | ||
111 | any context, we need a temporary storage area which is guaranteed not | ||
112 | to generate any other page faults when accessed. The queue will be | ||
113 | emptied as soon as a tasklet may be scheduled. If the queue is full, | ||
114 | new error reports will be lost. | ||
115 | |||
116 | The default value of 64 is probably fine. If some code produces more | ||
117 | than 64 errors within an irqs-off section, then the code is likely to | ||
118 | produce many, many more, too, and these additional reports seldom give | ||
119 | any more information (the first report is usually the most valuable | ||
120 | anyway). | ||
121 | |||
122 | This number might have to be adjusted if you are not using serial | ||
123 | console or similar to capture the kernel log. If you are using the | ||
124 | "dmesg" command to save the log, then getting a lot of kmemcheck | ||
125 | warnings might overflow the kernel log itself, and the earlier reports | ||
126 | will get lost in that way instead. Try setting this to 10 or so on | ||
127 | such a setup. | ||
128 | |||
129 | - ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT`` | ||
130 | Select the number of shadow bytes to save along with each entry of the | ||
131 | error-report queue. These bytes indicate what parts of an allocation | ||
132 | are initialized, uninitialized, etc. and will be displayed when an | ||
133 | error is detected to help the debugging of a particular problem. | ||
134 | |||
135 | The number entered here is actually the logarithm of the number of | ||
136 | bytes that will be saved. So if you pick for example 5 here, kmemcheck | ||
137 | will save 2^5 = 32 bytes. | ||
138 | |||
139 | The default value should be fine for debugging most problems. It also | ||
140 | fits nicely within 80 columns. | ||
141 | |||
142 | - ``CONFIG_KMEMCHECK_PARTIAL_OK`` | ||
143 | This option (when enabled) works around certain GCC optimizations that | ||
144 | produce 32-bit reads from 16-bit variables where the upper 16 bits are | ||
145 | thrown away afterwards. | ||
146 | |||
147 | The default value (enabled) is recommended. This may of course hide | ||
148 | some real errors, but disabling it would probably produce a lot of | ||
149 | false positives. | ||
150 | |||
151 | - ``CONFIG_KMEMCHECK_BITOPS_OK`` | ||
152 | This option silences warnings that would be generated for bit-field | ||
153 | accesses where not all the bits are initialized at the same time. This | ||
154 | may also hide some real bugs. | ||
155 | |||
156 | This option is probably obsolete, or it should be replaced with | ||
157 | the kmemcheck-/bitfield-annotations for the code in question. The | ||
158 | default value is therefore fine. | ||
159 | |||
160 | Now compile the kernel as usual. | ||
161 | |||
162 | |||
163 | How to use | ||
164 | ---------- | ||
165 | |||
166 | Booting | ||
167 | ~~~~~~~ | ||
168 | |||
169 | First some information about the command-line options. There is only one | ||
170 | option specific to kmemcheck, and this is called "kmemcheck". It can be used | ||
171 | to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT`` | ||
172 | option. Its possible settings are: | ||
173 | |||
174 | - ``kmemcheck=0`` (disabled) | ||
175 | - ``kmemcheck=1`` (enabled) | ||
176 | - ``kmemcheck=2`` (one-shot mode) | ||
177 | |||
178 | If SLUB debugging has been enabled in the kernel, it may take precedence over | ||
179 | kmemcheck in such a way that the slab caches which are under SLUB debugging | ||
180 | will not be tracked by kmemcheck. In order to ensure that this doesn't happen | ||
181 | (even though it shouldn't by default), use SLUB's boot option ``slub_debug``, | ||
182 | like this: ``slub_debug=-`` | ||
183 | |||
184 | In fact, this option may also be used for fine-grained control over SLUB vs. | ||
185 | kmemcheck. For example, if the command line includes | ||
186 | ``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only | ||
187 | for the "dentry" slab cache, and with kmemcheck tracking all the other | ||
188 | caches. This is advanced usage, however, and is not generally recommended. | ||
189 | |||
190 | |||
191 | Run-time enable/disable | ||
192 | ~~~~~~~~~~~~~~~~~~~~~~~ | ||
193 | |||
194 | When the kernel has booted, it is possible to enable or disable kmemcheck at | ||
195 | run-time. WARNING: This feature is still experimental and may cause false | ||
196 | positive warnings to appear. Therefore, try not to use this. If you find that | ||
197 | it doesn't work properly (e.g. you see an unreasonable amount of warnings), I | ||
198 | will be happy to take bug reports. | ||
199 | |||
200 | Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.:: | ||
201 | |||
202 | $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck | ||
203 | |||
204 | The numbers are the same as for the ``kmemcheck=`` command-line option. | ||
205 | |||
206 | |||
207 | Debugging | ||
208 | ~~~~~~~~~ | ||
209 | |||
210 | A typical report will look something like this:: | ||
211 | |||
212 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
213 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
214 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
215 | ^ | ||
216 | |||
217 | Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A | ||
218 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
219 | RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 | ||
220 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
221 | RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 | ||
222 | RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 | ||
223 | R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e | ||
224 | R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 | ||
225 | FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 | ||
226 | CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 | ||
227 | CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 | ||
228 | DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | ||
229 | DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 | ||
230 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
231 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
232 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
233 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
234 | [<ffffffffffffffff>] 0xffffffffffffffff | ||
235 | |||
236 | The single most valuable information in this report is the RIP (or EIP on 32- | ||
237 | bit) value. This will help us pinpoint exactly which instruction that caused | ||
238 | the warning. | ||
239 | |||
240 | If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do | ||
241 | is give this address to the addr2line program, like this:: | ||
242 | |||
243 | $ addr2line -e vmlinux -i ffffffff8104ede8 | ||
244 | arch/x86/include/asm/string_64.h:12 | ||
245 | include/asm-generic/siginfo.h:287 | ||
246 | kernel/signal.c:380 | ||
247 | kernel/signal.c:410 | ||
248 | |||
249 | The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:** | ||
250 | This must be the vmlinux of the kernel that produced the warning in the | ||
251 | first place! If not, the line number information will almost certainly be | ||
252 | wrong. | ||
253 | |||
254 | The "``-i``" tells addr2line to also print the line numbers of inlined | ||
255 | functions. In this case, the flag was very important, because otherwise, | ||
256 | it would only have printed the first line, which is just a call to | ||
257 | ``memcpy()``, which could be called from a thousand places in the kernel, and | ||
258 | is therefore not very useful. These inlined functions would not show up in | ||
259 | the stack trace above, simply because the kernel doesn't load the extra | ||
260 | debugging information. This technique can of course be used with ordinary | ||
261 | kernel oopses as well. | ||
262 | |||
263 | In this case, it's the caller of ``memcpy()`` that is interesting, and it can be | ||
264 | found in ``include/asm-generic/siginfo.h``, line 287:: | ||
265 | |||
266 | 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) | ||
267 | 282 { | ||
268 | 283 if (from->si_code < 0) | ||
269 | 284 memcpy(to, from, sizeof(*to)); | ||
270 | 285 else | ||
271 | 286 /* _sigchld is currently the largest know union member */ | ||
272 | 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); | ||
273 | 288 } | ||
274 | |||
275 | Since this was a read (kmemcheck usually warns about reads only, though it can | ||
276 | warn about writes to unallocated or freed memory as well), it was probably the | ||
277 | "from" argument which contained some uninitialized bytes. Following the chain | ||
278 | of calls, we move upwards to see where "from" was allocated or initialized, | ||
279 | ``kernel/signal.c``, line 380:: | ||
280 | |||
281 | 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | ||
282 | 360 { | ||
283 | ... | ||
284 | 367 list_for_each_entry(q, &list->list, list) { | ||
285 | 368 if (q->info.si_signo == sig) { | ||
286 | 369 if (first) | ||
287 | 370 goto still_pending; | ||
288 | 371 first = q; | ||
289 | ... | ||
290 | 377 if (first) { | ||
291 | 378 still_pending: | ||
292 | 379 list_del_init(&first->list); | ||
293 | 380 copy_siginfo(info, &first->info); | ||
294 | 381 __sigqueue_free(first); | ||
295 | ... | ||
296 | 392 } | ||
297 | 393 } | ||
298 | |||
299 | Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The | ||
300 | variable ``first`` was found on a list -- passed in as the second argument to | ||
301 | ``collect_signal()``. We continue our journey through the stack, to figure out | ||
302 | where the item on "list" was allocated or initialized. We move to line 410:: | ||
303 | |||
304 | 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | ||
305 | 396 siginfo_t *info) | ||
306 | 397 { | ||
307 | ... | ||
308 | 410 collect_signal(sig, pending, info); | ||
309 | ... | ||
310 | 414 } | ||
311 | |||
312 | Now we need to follow the ``pending`` pointer, since that is being passed on to | ||
313 | ``collect_signal()`` as ``list``. At this point, we've run out of lines from the | ||
314 | "addr2line" output. Not to worry, we just paste the next addresses from the | ||
315 | kmemcheck stack dump, i.e.:: | ||
316 | |||
317 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
318 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
319 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
320 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
321 | |||
322 | $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ | ||
323 | ffffffff8100b87d ffffffff8100c7b5 | ||
324 | kernel/signal.c:446 | ||
325 | kernel/signal.c:1806 | ||
326 | arch/x86/kernel/signal.c:805 | ||
327 | arch/x86/kernel/signal.c:871 | ||
328 | arch/x86/kernel/entry_64.S:694 | ||
329 | |||
330 | Remember that since these addresses were found on the stack and not as the | ||
331 | RIP value, they actually point to the _next_ instruction (they are return | ||
332 | addresses). This becomes obvious when we look at the code for line 446:: | ||
333 | |||
334 | 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | ||
335 | 423 { | ||
336 | ... | ||
337 | 431 signr = __dequeue_signal(&tsk->signal->shared_pending, | ||
338 | 432 mask, info); | ||
339 | 433 /* | ||
340 | 434 * itimer signal ? | ||
341 | 435 * | ||
342 | 436 * itimers are process shared and we restart periodic | ||
343 | 437 * itimers in the signal delivery path to prevent DoS | ||
344 | 438 * attacks in the high resolution timer case. This is | ||
345 | 439 * compliant with the old way of self restarting | ||
346 | 440 * itimers, as the SIGALRM is a legacy signal and only | ||
347 | 441 * queued once. Changing the restart behaviour to | ||
348 | 442 * restart the timer in the signal dequeue path is | ||
349 | 443 * reducing the timer noise on heavy loaded !highres | ||
350 | 444 * systems too. | ||
351 | 445 */ | ||
352 | 446 if (unlikely(signr == SIGALRM)) { | ||
353 | ... | ||
354 | 489 } | ||
355 | |||
356 | So instead of looking at 446, we should be looking at 431, which is the line | ||
357 | that executes just before 446. Here we see that what we are looking for is | ||
358 | ``&tsk->signal->shared_pending``. | ||
359 | |||
360 | Our next task is now to figure out which function that puts items on this | ||
361 | ``shared_pending`` list. A crude, but efficient tool, is ``git grep``:: | ||
362 | |||
363 | $ git grep -n 'shared_pending' kernel/ | ||
364 | ... | ||
365 | kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; | ||
366 | kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; | ||
367 | ... | ||
368 | |||
369 | There were more results, but none of them were related to list operations, | ||
370 | and these were the only assignments. We inspect the line numbers more closely | ||
371 | and find that this is indeed where items are being added to the list:: | ||
372 | |||
373 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
374 | 817 int group) | ||
375 | 818 { | ||
376 | ... | ||
377 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
378 | ... | ||
379 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
380 | 852 (is_si_special(info) || | ||
381 | 853 info->si_code >= 0))); | ||
382 | 854 if (q) { | ||
383 | 855 list_add_tail(&q->list, &pending->list); | ||
384 | ... | ||
385 | 890 } | ||
386 | |||
387 | and:: | ||
388 | |||
389 | 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | ||
390 | 1310 { | ||
391 | .... | ||
392 | 1339 pending = group ? &t->signal->shared_pending : &t->pending; | ||
393 | 1340 list_add_tail(&q->list, &pending->list); | ||
394 | .... | ||
395 | 1347 } | ||
396 | |||
397 | In the first case, the list element we are looking for, ``q``, is being | ||
398 | returned from the function ``__sigqueue_alloc()``, which looks like an | ||
399 | allocation function. Let's take a look at it:: | ||
400 | |||
401 | 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, | ||
402 | 188 int override_rlimit) | ||
403 | 189 { | ||
404 | 190 struct sigqueue *q = NULL; | ||
405 | 191 struct user_struct *user; | ||
406 | 192 | ||
407 | 193 /* | ||
408 | 194 * We won't get problems with the target's UID changing under us | ||
409 | 195 * because changing it requires RCU be used, and if t != current, the | ||
410 | 196 * caller must be holding the RCU readlock (by way of a spinlock) and | ||
411 | 197 * we use RCU protection here | ||
412 | 198 */ | ||
413 | 199 user = get_uid(__task_cred(t)->user); | ||
414 | 200 atomic_inc(&user->sigpending); | ||
415 | 201 if (override_rlimit || | ||
416 | 202 atomic_read(&user->sigpending) <= | ||
417 | 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) | ||
418 | 204 q = kmem_cache_alloc(sigqueue_cachep, flags); | ||
419 | 205 if (unlikely(q == NULL)) { | ||
420 | 206 atomic_dec(&user->sigpending); | ||
421 | 207 free_uid(user); | ||
422 | 208 } else { | ||
423 | 209 INIT_LIST_HEAD(&q->list); | ||
424 | 210 q->flags = 0; | ||
425 | 211 q->user = user; | ||
426 | 212 } | ||
427 | 213 | ||
428 | 214 return q; | ||
429 | 215 } | ||
430 | |||
431 | We see that this function initializes ``q->list``, ``q->flags``, and | ||
432 | ``q->user``. It seems that now is the time to look at the definition of | ||
433 | ``struct sigqueue``, e.g.:: | ||
434 | |||
435 | 14 struct sigqueue { | ||
436 | 15 struct list_head list; | ||
437 | 16 int flags; | ||
438 | 17 siginfo_t info; | ||
439 | 18 struct user_struct *user; | ||
440 | 19 }; | ||
441 | |||
442 | And, you might remember, it was a ``memcpy()`` on ``&first->info`` that | ||
443 | caused the warning, so this makes perfect sense. It also seems reasonable | ||
444 | to assume that it is the caller of ``__sigqueue_alloc()`` that has the | ||
445 | responsibility of filling out (initializing) this member. | ||
446 | |||
447 | But just which fields of the struct were uninitialized? Let's look at | ||
448 | kmemcheck's report again:: | ||
449 | |||
450 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
451 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
452 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
453 | ^ | ||
454 | |||
455 | These first two lines are the memory dump of the memory object itself, and | ||
456 | the shadow bytemap, respectively. The memory object itself is in this case | ||
457 | ``&first->info``. Just beware that the start of this dump is NOT the start | ||
458 | of the object itself! The position of the caret (^) corresponds with the | ||
459 | address of the read (ffff88003e4a2024). | ||
460 | |||
461 | The shadow bytemap dump legend is as follows: | ||
462 | |||
463 | - i: initialized | ||
464 | - u: uninitialized | ||
465 | - a: unallocated (memory has been allocated by the slab layer, but has not | ||
466 | yet been handed off to anybody) | ||
467 | - f: freed (memory has been allocated by the slab layer, but has been freed | ||
468 | by the previous owner) | ||
469 | |||
470 | In order to figure out where (relative to the start of the object) the | ||
471 | uninitialized memory was located, we have to look at the disassembly. For | ||
472 | that, we'll need the RIP address again:: | ||
473 | |||
474 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
475 | |||
476 | $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: | ||
477 | ffffffff8104edc8: mov %r8,0x8(%r8) | ||
478 | ffffffff8104edcc: test %r10d,%r10d | ||
479 | ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> | ||
480 | ffffffff8104edd5: mov %rax,%rdx | ||
481 | ffffffff8104edd8: mov $0xc,%ecx | ||
482 | ffffffff8104eddd: mov %r13,%rdi | ||
483 | ffffffff8104ede0: mov $0x30,%eax | ||
484 | ffffffff8104ede5: mov %rdx,%rsi | ||
485 | ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) | ||
486 | ffffffff8104edea: test $0x2,%al | ||
487 | ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> | ||
488 | ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) | ||
489 | ffffffff8104edf0: test $0x1,%al | ||
490 | ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> | ||
491 | ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) | ||
492 | ffffffff8104edf5: mov %r8,%rdi | ||
493 | ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> | ||
494 | |||
495 | As expected, it's the "``rep movsl``" instruction from the ``memcpy()`` | ||
496 | that causes the warning. We know about ``REP MOVSL`` that it uses the register | ||
497 | ``RCX`` to count the number of remaining iterations. By taking a look at the | ||
498 | register dump again (from the kmemcheck report), we can figure out how many | ||
499 | bytes were left to copy:: | ||
500 | |||
501 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
502 | |||
503 | By looking at the disassembly, we also see that ``%ecx`` is being loaded | ||
504 | with the value ``$0xc`` just before (ffffffff8104edd8), so we are very | ||
505 | lucky. Keep in mind that this is the number of iterations, not bytes. And | ||
506 | since this is a "long" operation, we need to multiply by 4 to get the | ||
507 | number of bytes. So this means that the uninitialized value was encountered | ||
508 | at 4 * (0xc - 0x9) = 12 bytes from the start of the object. | ||
509 | |||
510 | We can now try to figure out which field of the "``struct siginfo``" that | ||
511 | was not initialized. This is the beginning of the struct:: | ||
512 | |||
513 | 40 typedef struct siginfo { | ||
514 | 41 int si_signo; | ||
515 | 42 int si_errno; | ||
516 | 43 int si_code; | ||
517 | 44 | ||
518 | 45 union { | ||
519 | .. | ||
520 | 92 } _sifields; | ||
521 | 93 } siginfo_t; | ||
522 | |||
523 | On 64-bit, the int is 4 bytes long, so it must the union member that has | ||
524 | not been initialized. We can verify this using gdb:: | ||
525 | |||
526 | $ gdb vmlinux | ||
527 | ... | ||
528 | (gdb) p &((struct siginfo *) 0)->_sifields | ||
529 | $1 = (union {...} *) 0x10 | ||
530 | |||
531 | Actually, it seems that the union member is located at offset 0x10 -- which | ||
532 | means that gcc has inserted 4 bytes of padding between the members ``si_code`` | ||
533 | and ``_sifields``. We can now get a fuller picture of the memory dump:: | ||
534 | |||
535 | _----------------------------=> si_code | ||
536 | / _--------------------=> (padding) | ||
537 | | / _------------=> _sifields(._kill._pid) | ||
538 | | | / _----=> _sifields(._kill._uid) | ||
539 | | | | / | ||
540 | -------|-------|-------|-------| | ||
541 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
542 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
543 | |||
544 | This allows us to realize another important fact: ``si_code`` contains the | ||
545 | value 0x80. Remember that x86 is little endian, so the first 4 bytes | ||
546 | "80000000" are really the number 0x00000080. With a bit of research, we | ||
547 | find that this is actually the constant ``SI_KERNEL`` defined in | ||
548 | ``include/asm-generic/siginfo.h``:: | ||
549 | |||
550 | 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ | ||
551 | |||
552 | This macro is used in exactly one place in the x86 kernel: In ``send_signal()`` | ||
553 | in ``kernel/signal.c``:: | ||
554 | |||
555 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
556 | 817 int group) | ||
557 | 818 { | ||
558 | ... | ||
559 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
560 | ... | ||
561 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
562 | 852 (is_si_special(info) || | ||
563 | 853 info->si_code >= 0))); | ||
564 | 854 if (q) { | ||
565 | 855 list_add_tail(&q->list, &pending->list); | ||
566 | 856 switch ((unsigned long) info) { | ||
567 | ... | ||
568 | 865 case (unsigned long) SEND_SIG_PRIV: | ||
569 | 866 q->info.si_signo = sig; | ||
570 | 867 q->info.si_errno = 0; | ||
571 | 868 q->info.si_code = SI_KERNEL; | ||
572 | 869 q->info.si_pid = 0; | ||
573 | 870 q->info.si_uid = 0; | ||
574 | 871 break; | ||
575 | ... | ||
576 | 890 } | ||
577 | |||
578 | Not only does this match with the ``.si_code`` member, it also matches the place | ||
579 | we found earlier when looking for where siginfo_t objects are enqueued on the | ||
580 | ``shared_pending`` list. | ||
581 | |||
582 | So to sum up: It seems that it is the padding introduced by the compiler | ||
583 | between two struct fields that is uninitialized, and this gets reported when | ||
584 | we do a ``memcpy()`` on the struct. This means that we have identified a false | ||
585 | positive warning. | ||
586 | |||
587 | Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls | ||
588 | when both the source and destination addresses are tracked. (Instead, we copy | ||
589 | the shadow bytemap as well). In this case, the destination address clearly | ||
590 | was not tracked. We can dig a little deeper into the stack trace from above:: | ||
591 | |||
592 | arch/x86/kernel/signal.c:805 | ||
593 | arch/x86/kernel/signal.c:871 | ||
594 | arch/x86/kernel/entry_64.S:694 | ||
595 | |||
596 | And we clearly see that the destination siginfo object is located on the | ||
597 | stack:: | ||
598 | |||
599 | 782 static void do_signal(struct pt_regs *regs) | ||
600 | 783 { | ||
601 | 784 struct k_sigaction ka; | ||
602 | 785 siginfo_t info; | ||
603 | ... | ||
604 | 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
605 | ... | ||
606 | 854 } | ||
607 | |||
608 | And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the | ||
609 | destination argument. | ||
610 | |||
611 | Now, even though we didn't find an actual error here, the example is still a | ||
612 | good one, because it shows how one would go about to find out what the report | ||
613 | was all about. | ||
614 | |||
615 | |||
616 | Annotating false positives | ||
617 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
618 | |||
619 | There are a few different ways to make annotations in the source code that | ||
620 | will keep kmemcheck from checking and reporting certain allocations. Here | ||
621 | they are: | ||
622 | |||
623 | - ``__GFP_NOTRACK_FALSE_POSITIVE`` | ||
624 | This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()`` | ||
625 | (therefore also to other functions that end up calling one of | ||
626 | these) to indicate that the allocation should not be tracked | ||
627 | because it would lead to a false positive report. This is a "big | ||
628 | hammer" way of silencing kmemcheck; after all, even if the false | ||
629 | positive pertains to particular field in a struct, for example, we | ||
630 | will now lose the ability to find (real) errors in other parts of | ||
631 | the same struct. | ||
632 | |||
633 | Example:: | ||
634 | |||
635 | /* No warnings will ever trigger on accessing any part of x */ | ||
636 | x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); | ||
637 | |||
638 | - ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and | ||
639 | ``kmemcheck_annotate_bitfield(ptr, name)`` | ||
640 | The first two of these three macros can be used inside struct | ||
641 | definitions to signal, respectively, the beginning and end of a | ||
642 | bitfield. Additionally, this will assign the bitfield a name, which | ||
643 | is given as an argument to the macros. | ||
644 | |||
645 | Having used these markers, one can later use | ||
646 | kmemcheck_annotate_bitfield() at the point of allocation, to indicate | ||
647 | which parts of the allocation is part of a bitfield. | ||
648 | |||
649 | Example:: | ||
650 | |||
651 | struct foo { | ||
652 | int x; | ||
653 | |||
654 | kmemcheck_bitfield_begin(flags); | ||
655 | int flag_a:1; | ||
656 | int flag_b:1; | ||
657 | kmemcheck_bitfield_end(flags); | ||
658 | |||
659 | int y; | ||
660 | }; | ||
661 | |||
662 | struct foo *x = kmalloc(sizeof *x); | ||
663 | |||
664 | /* No warnings will trigger on accessing the bitfield of x */ | ||
665 | kmemcheck_annotate_bitfield(x, flags); | ||
666 | |||
667 | Note that ``kmemcheck_annotate_bitfield()`` can be used even before the | ||
668 | return value of ``kmalloc()`` is checked -- in other words, passing NULL | ||
669 | as the first argument is legal (and will do nothing). | ||
670 | |||
671 | |||
672 | Reporting errors | ||
673 | ---------------- | ||
674 | |||
675 | As we have seen, kmemcheck will produce false positive reports. Therefore, it | ||
676 | is not very wise to blindly post kmemcheck warnings to mailing lists and | ||
677 | maintainers. Instead, I encourage maintainers and developers to find errors | ||
678 | in their own code. If you get a warning, you can try to work around it, try | ||
679 | to figure out if it's a real error or not, or simply ignore it. Most | ||
680 | developers know their own code and will quickly and efficiently determine the | ||
681 | root cause of a kmemcheck report. This is therefore also the most efficient | ||
682 | way to work with kmemcheck. | ||
683 | |||
684 | That said, we (the kmemcheck maintainers) will always be on the lookout for | ||
685 | false positives that we can annotate and silence. So whatever you find, | ||
686 | please drop us a note privately! Kernel configs and steps to reproduce (if | ||
687 | available) are of course a great help too. | ||
688 | |||
689 | Happy hacking! | ||
690 | |||
691 | |||
692 | Technical description | ||
693 | --------------------- | ||
694 | |||
695 | kmemcheck works by marking memory pages non-present. This means that whenever | ||
696 | somebody attempts to access the page, a page fault is generated. The page | ||
697 | fault handler notices that the page was in fact only hidden, and so it calls | ||
698 | on the kmemcheck code to make further investigations. | ||
699 | |||
700 | When the investigations are completed, kmemcheck "shows" the page by marking | ||
701 | it present (as it would be under normal circumstances). This way, the | ||
702 | interrupted code can continue as usual. | ||
703 | |||
704 | But after the instruction has been executed, we should hide the page again, so | ||
705 | that we can catch the next access too! Now kmemcheck makes use of a debugging | ||
706 | feature of the processor, namely single-stepping. When the processor has | ||
707 | finished the one instruction that generated the memory access, a debug | ||
708 | exception is raised. From here, we simply hide the page again and continue | ||
709 | execution, this time with the single-stepping feature turned off. | ||
710 | |||
711 | kmemcheck requires some assistance from the memory allocator in order to work. | ||
712 | The memory allocator needs to | ||
713 | |||
714 | 1. Tell kmemcheck about newly allocated pages and pages that are about to | ||
715 | be freed. This allows kmemcheck to set up and tear down the shadow memory | ||
716 | for the pages in question. The shadow memory stores the status of each | ||
717 | byte in the allocation proper, e.g. whether it is initialized or | ||
718 | uninitialized. | ||
719 | |||
720 | 2. Tell kmemcheck which parts of memory should be marked uninitialized. | ||
721 | There are actually a few more states, such as "not yet allocated" and | ||
722 | "recently freed". | ||
723 | |||
724 | If a slab cache is set up using the SLAB_NOTRACK flag, it will never return | ||
725 | memory that can take page faults because of kmemcheck. | ||
726 | |||
727 | If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still | ||
728 | request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. | ||
729 | This does not prevent the page faults from occurring, however, but marks the | ||
730 | object in question as being initialized so that no warnings will ever be | ||
731 | produced for this object. | ||
732 | |||
733 | Currently, the SLAB and SLUB allocators are supported by kmemcheck. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index adba21b5ada7..ec571b9bb18a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8) | |||
250 | VmExe size of text segment | 250 | VmExe size of text segment |
251 | VmLib size of shared library code | 251 | VmLib size of shared library code |
252 | VmPTE size of page table entries | 252 | VmPTE size of page table entries |
253 | VmPMD size of second level page tables | ||
254 | VmSwap amount of swap used by anonymous private data | 253 | VmSwap amount of swap used by anonymous private data |
255 | (shmem swap usage is not included) | 254 | (shmem swap usage is not included) |
256 | HugetlbPages size of hugetlb memory portions | 255 | HugetlbPages size of hugetlb memory portions |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 9baf66a9ef4e..055c8b3e1018 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -58,6 +58,7 @@ Currently, these files are in /proc/sys/vm: | |||
58 | - percpu_pagelist_fraction | 58 | - percpu_pagelist_fraction |
59 | - stat_interval | 59 | - stat_interval |
60 | - stat_refresh | 60 | - stat_refresh |
61 | - numa_stat | ||
61 | - swappiness | 62 | - swappiness |
62 | - user_reserve_kbytes | 63 | - user_reserve_kbytes |
63 | - vfs_cache_pressure | 64 | - vfs_cache_pressure |
@@ -157,6 +158,10 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any | |||
157 | value lower than this limit will be ignored and the old configuration will be | 158 | value lower than this limit will be ignored and the old configuration will be |
158 | retained. | 159 | retained. |
159 | 160 | ||
161 | Note: the value of dirty_bytes also must be set greater than | ||
162 | dirty_background_bytes or the amount of memory corresponding to | ||
163 | dirty_background_ratio. | ||
164 | |||
160 | ============================================================== | 165 | ============================================================== |
161 | 166 | ||
162 | dirty_expire_centisecs | 167 | dirty_expire_centisecs |
@@ -176,6 +181,9 @@ generating disk writes will itself start writing out dirty data. | |||
176 | 181 | ||
177 | The total available memory is not equal to total system memory. | 182 | The total available memory is not equal to total system memory. |
178 | 183 | ||
184 | Note: dirty_ratio must be set greater than dirty_background_ratio or | ||
185 | ratio corresponding to dirty_background_bytes. | ||
186 | |||
179 | ============================================================== | 187 | ============================================================== |
180 | 188 | ||
181 | dirty_writeback_centisecs | 189 | dirty_writeback_centisecs |
@@ -622,7 +630,7 @@ oom_dump_tasks | |||
622 | 630 | ||
623 | Enables a system-wide task dump (excluding kernel threads) to be produced | 631 | Enables a system-wide task dump (excluding kernel threads) to be produced |
624 | when the kernel performs an OOM-killing and includes such information as | 632 | when the kernel performs an OOM-killing and includes such information as |
625 | pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj | 633 | pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj |
626 | score, and name. This is helpful to determine why the OOM killer was | 634 | score, and name. This is helpful to determine why the OOM killer was |
627 | invoked, to identify the rogue task that caused it, and to determine why | 635 | invoked, to identify the rogue task that caused it, and to determine why |
628 | the OOM killer chose the task it did to kill. | 636 | the OOM killer chose the task it did to kill. |
@@ -792,6 +800,21 @@ with no ill effects: errors and warnings on these stats are suppressed.) | |||
792 | 800 | ||
793 | ============================================================== | 801 | ============================================================== |
794 | 802 | ||
803 | numa_stat | ||
804 | |||
805 | This interface allows runtime configuration of numa statistics. | ||
806 | |||
807 | When page allocation performance becomes a bottleneck and you can tolerate | ||
808 | some possible tool breakage and decreased numa counter precision, you can | ||
809 | do: | ||
810 | echo 0 > /proc/sys/vm/numa_stat | ||
811 | |||
812 | When page allocation performance is not a bottleneck and you want all | ||
813 | tooling to work, you can do: | ||
814 | echo 1 > /proc/sys/vm/numa_stat | ||
815 | |||
816 | ============================================================== | ||
817 | |||
795 | swappiness | 818 | swappiness |
796 | 819 | ||
797 | This control is used to define how aggressive the kernel will swap | 820 | This control is used to define how aggressive the kernel will swap |
diff --git a/Documentation/vm/mmu_notifier.txt b/Documentation/vm/mmu_notifier.txt new file mode 100644 index 000000000000..23b462566bb7 --- /dev/null +++ b/Documentation/vm/mmu_notifier.txt | |||
@@ -0,0 +1,93 @@ | |||
1 | When do you need to notify inside page table lock ? | ||
2 | |||
3 | When clearing a pte/pmd we are given a choice to notify the event through | ||
4 | (notify version of *_clear_flush call mmu_notifier_invalidate_range) under | ||
5 | the page table lock. But that notification is not necessary in all cases. | ||
6 | |||
7 | For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use | ||
8 | thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a | ||
9 | process virtual address space). There is only 2 cases when you need to notify | ||
10 | those secondary TLB while holding page table lock when clearing a pte/pmd: | ||
11 | |||
12 | A) page backing address is free before mmu_notifier_invalidate_range_end() | ||
13 | B) a page table entry is updated to point to a new page (COW, write fault | ||
14 | on zero page, __replace_page(), ...) | ||
15 | |||
16 | Case A is obvious you do not want to take the risk for the device to write to | ||
17 | a page that might now be used by some completely different task. | ||
18 | |||
19 | Case B is more subtle. For correctness it requires the following sequence to | ||
20 | happen: | ||
21 | - take page table lock | ||
22 | - clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify()) | ||
23 | - set page table entry to point to new page | ||
24 | |||
25 | If clearing the page table entry is not followed by a notify before setting | ||
26 | the new pte/pmd value then you can break memory model like C11 or C++11 for | ||
27 | the device. | ||
28 | |||
29 | Consider the following scenario (device use a feature similar to ATS/PASID): | ||
30 | |||
31 | Two address addrA and addrB such that |addrA - addrB| >= PAGE_SIZE we assume | ||
32 | they are write protected for COW (other case of B apply too). | ||
33 | |||
34 | [Time N] -------------------------------------------------------------------- | ||
35 | CPU-thread-0 {try to write to addrA} | ||
36 | CPU-thread-1 {try to write to addrB} | ||
37 | CPU-thread-2 {} | ||
38 | CPU-thread-3 {} | ||
39 | DEV-thread-0 {read addrA and populate device TLB} | ||
40 | DEV-thread-2 {read addrB and populate device TLB} | ||
41 | [Time N+1] ------------------------------------------------------------------ | ||
42 | CPU-thread-0 {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}} | ||
43 | CPU-thread-1 {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}} | ||
44 | CPU-thread-2 {} | ||
45 | CPU-thread-3 {} | ||
46 | DEV-thread-0 {} | ||
47 | DEV-thread-2 {} | ||
48 | [Time N+2] ------------------------------------------------------------------ | ||
49 | CPU-thread-0 {COW_step1: {update page table to point to new page for addrA}} | ||
50 | CPU-thread-1 {COW_step1: {update page table to point to new page for addrB}} | ||
51 | CPU-thread-2 {} | ||
52 | CPU-thread-3 {} | ||
53 | DEV-thread-0 {} | ||
54 | DEV-thread-2 {} | ||
55 | [Time N+3] ------------------------------------------------------------------ | ||
56 | CPU-thread-0 {preempted} | ||
57 | CPU-thread-1 {preempted} | ||
58 | CPU-thread-2 {write to addrA which is a write to new page} | ||
59 | CPU-thread-3 {} | ||
60 | DEV-thread-0 {} | ||
61 | DEV-thread-2 {} | ||
62 | [Time N+3] ------------------------------------------------------------------ | ||
63 | CPU-thread-0 {preempted} | ||
64 | CPU-thread-1 {preempted} | ||
65 | CPU-thread-2 {} | ||
66 | CPU-thread-3 {write to addrB which is a write to new page} | ||
67 | DEV-thread-0 {} | ||
68 | DEV-thread-2 {} | ||
69 | [Time N+4] ------------------------------------------------------------------ | ||
70 | CPU-thread-0 {preempted} | ||
71 | CPU-thread-1 {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}} | ||
72 | CPU-thread-2 {} | ||
73 | CPU-thread-3 {} | ||
74 | DEV-thread-0 {} | ||
75 | DEV-thread-2 {} | ||
76 | [Time N+5] ------------------------------------------------------------------ | ||
77 | CPU-thread-0 {preempted} | ||
78 | CPU-thread-1 {} | ||
79 | CPU-thread-2 {} | ||
80 | CPU-thread-3 {} | ||
81 | DEV-thread-0 {read addrA from old page} | ||
82 | DEV-thread-2 {read addrB from new page} | ||
83 | |||
84 | So here because at time N+2 the clear page table entry was not pair with a | ||
85 | notification to invalidate the secondary TLB, the device see the new value for | ||
86 | addrB before seing the new value for addrA. This break total memory ordering | ||
87 | for the device. | ||
88 | |||
89 | When changing a pte to write protect or to point to a new write protected page | ||
90 | with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range | ||
91 | call to mmu_notifier_invalidate_range_end() outside the page table lock. This | ||
92 | is true even if the thread doing the page table update is preempted right after | ||
93 | releasing page table lock but before call mmu_notifier_invalidate_range_end(). | ||
diff --git a/MAINTAINERS b/MAINTAINERS index cd7e12dc6af4..b0543c223f6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7692,16 +7692,6 @@ F: include/linux/kdb.h | |||
7692 | F: include/linux/kgdb.h | 7692 | F: include/linux/kgdb.h |
7693 | F: kernel/debug/ | 7693 | F: kernel/debug/ |
7694 | 7694 | ||
7695 | KMEMCHECK | ||
7696 | M: Vegard Nossum <vegardno@ifi.uio.no> | ||
7697 | M: Pekka Enberg <penberg@kernel.org> | ||
7698 | S: Maintained | ||
7699 | F: Documentation/dev-tools/kmemcheck.rst | ||
7700 | F: arch/x86/include/asm/kmemcheck.h | ||
7701 | F: arch/x86/mm/kmemcheck/ | ||
7702 | F: include/linux/kmemcheck.h | ||
7703 | F: mm/kmemcheck.c | ||
7704 | |||
7705 | KMEMLEAK | 7695 | KMEMLEAK |
7706 | M: Catalin Marinas <catalin.marinas@arm.com> | 7696 | M: Catalin Marinas <catalin.marinas@arm.com> |
7707 | S: Maintained | 7697 | S: Maintained |
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 0722ec6be692..6821f1249300 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <linux/mm_types.h> | 7 | #include <linux/mm_types.h> |
8 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
9 | #include <linux/dma-debug.h> | 9 | #include <linux/dma-debug.h> |
10 | #include <linux/kmemcheck.h> | ||
11 | #include <linux/kref.h> | 10 | #include <linux/kref.h> |
12 | 11 | ||
13 | #define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) | 12 | #define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) |
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index b2902a5cd780..2d7344f0e208 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h | |||
@@ -57,7 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | |||
57 | extern pgd_t *pgd_alloc(struct mm_struct *mm); | 57 | extern pgd_t *pgd_alloc(struct mm_struct *mm); |
58 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); | 58 | extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); |
59 | 59 | ||
60 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 60 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) |
61 | 61 | ||
62 | static inline void clean_pte_table(pte_t *pte) | 62 | static inline void clean_pte_table(pte_t *pte) |
63 | { | 63 | { |
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index c1c1a5c67da1..61e281cb29fb 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c | |||
@@ -141,7 +141,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) | |||
141 | pte = pmd_pgtable(*pmd); | 141 | pte = pmd_pgtable(*pmd); |
142 | pmd_clear(pmd); | 142 | pmd_clear(pmd); |
143 | pte_free(mm, pte); | 143 | pte_free(mm, pte); |
144 | atomic_long_dec(&mm->nr_ptes); | 144 | mm_dec_nr_ptes(mm); |
145 | no_pmd: | 145 | no_pmd: |
146 | pud_clear(pud); | 146 | pud_clear(pud); |
147 | pmd_free(mm, pmd); | 147 | pmd_free(mm, pmd); |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ba6aab55d464..a93339f5178f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -85,7 +85,7 @@ config ARM64 | |||
85 | select HAVE_ARCH_BITREVERSE | 85 | select HAVE_ARCH_BITREVERSE |
86 | select HAVE_ARCH_HUGE_VMAP | 86 | select HAVE_ARCH_HUGE_VMAP |
87 | select HAVE_ARCH_JUMP_LABEL | 87 | select HAVE_ARCH_JUMP_LABEL |
88 | select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) | 88 | select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) |
89 | select HAVE_ARCH_KGDB | 89 | select HAVE_ARCH_KGDB |
90 | select HAVE_ARCH_MMAP_RND_BITS | 90 | select HAVE_ARCH_MMAP_RND_BITS |
91 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT | 91 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT |
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index d25f4f137c2a..5ca6a573a701 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | #define check_pgt_cache() do { } while (0) | 27 | #define check_pgt_cache() do { } while (0) |
28 | 28 | ||
29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 29 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) |
30 | #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) | 30 | #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) |
31 | 31 | ||
32 | #if CONFIG_PGTABLE_LEVELS > 2 | 32 | #if CONFIG_PGTABLE_LEVELS > 2 |
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 81f03959a4ab..acba49fb5aac 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c | |||
@@ -11,6 +11,7 @@ | |||
11 | */ | 11 | */ |
12 | 12 | ||
13 | #define pr_fmt(fmt) "kasan: " fmt | 13 | #define pr_fmt(fmt) "kasan: " fmt |
14 | #include <linux/bootmem.h> | ||
14 | #include <linux/kasan.h> | 15 | #include <linux/kasan.h> |
15 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
16 | #include <linux/sched/task.h> | 17 | #include <linux/sched/task.h> |
@@ -35,77 +36,117 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); | |||
35 | * with the physical address from __pa_symbol. | 36 | * with the physical address from __pa_symbol. |
36 | */ | 37 | */ |
37 | 38 | ||
38 | static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, | 39 | static phys_addr_t __init kasan_alloc_zeroed_page(int node) |
39 | unsigned long end) | ||
40 | { | 40 | { |
41 | pte_t *pte; | 41 | void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, |
42 | unsigned long next; | 42 | __pa(MAX_DMA_ADDRESS), |
43 | MEMBLOCK_ALLOC_ACCESSIBLE, node); | ||
44 | return __pa(p); | ||
45 | } | ||
46 | |||
47 | static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node, | ||
48 | bool early) | ||
49 | { | ||
50 | if (pmd_none(*pmd)) { | ||
51 | phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte) | ||
52 | : kasan_alloc_zeroed_page(node); | ||
53 | __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); | ||
54 | } | ||
55 | |||
56 | return early ? pte_offset_kimg(pmd, addr) | ||
57 | : pte_offset_kernel(pmd, addr); | ||
58 | } | ||
43 | 59 | ||
44 | if (pmd_none(*pmd)) | 60 | static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node, |
45 | __pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE); | 61 | bool early) |
62 | { | ||
63 | if (pud_none(*pud)) { | ||
64 | phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd) | ||
65 | : kasan_alloc_zeroed_page(node); | ||
66 | __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE); | ||
67 | } | ||
68 | |||
69 | return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr); | ||
70 | } | ||
71 | |||
72 | static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node, | ||
73 | bool early) | ||
74 | { | ||
75 | if (pgd_none(*pgd)) { | ||
76 | phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud) | ||
77 | : kasan_alloc_zeroed_page(node); | ||
78 | __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE); | ||
79 | } | ||
80 | |||
81 | return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr); | ||
82 | } | ||
83 | |||
84 | static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr, | ||
85 | unsigned long end, int node, bool early) | ||
86 | { | ||
87 | unsigned long next; | ||
88 | pte_t *pte = kasan_pte_offset(pmd, addr, node, early); | ||
46 | 89 | ||
47 | pte = pte_offset_kimg(pmd, addr); | ||
48 | do { | 90 | do { |
91 | phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page) | ||
92 | : kasan_alloc_zeroed_page(node); | ||
49 | next = addr + PAGE_SIZE; | 93 | next = addr + PAGE_SIZE; |
50 | set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page), | 94 | set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); |
51 | PAGE_KERNEL)); | ||
52 | } while (pte++, addr = next, addr != end && pte_none(*pte)); | 95 | } while (pte++, addr = next, addr != end && pte_none(*pte)); |
53 | } | 96 | } |
54 | 97 | ||
55 | static void __init kasan_early_pmd_populate(pud_t *pud, | 98 | static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr, |
56 | unsigned long addr, | 99 | unsigned long end, int node, bool early) |
57 | unsigned long end) | ||
58 | { | 100 | { |
59 | pmd_t *pmd; | ||
60 | unsigned long next; | 101 | unsigned long next; |
102 | pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early); | ||
61 | 103 | ||
62 | if (pud_none(*pud)) | ||
63 | __pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE); | ||
64 | |||
65 | pmd = pmd_offset_kimg(pud, addr); | ||
66 | do { | 104 | do { |
67 | next = pmd_addr_end(addr, end); | 105 | next = pmd_addr_end(addr, end); |
68 | kasan_early_pte_populate(pmd, addr, next); | 106 | kasan_pte_populate(pmd, addr, next, node, early); |
69 | } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); | 107 | } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); |
70 | } | 108 | } |
71 | 109 | ||
72 | static void __init kasan_early_pud_populate(pgd_t *pgd, | 110 | static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr, |
73 | unsigned long addr, | 111 | unsigned long end, int node, bool early) |
74 | unsigned long end) | ||
75 | { | 112 | { |
76 | pud_t *pud; | ||
77 | unsigned long next; | 113 | unsigned long next; |
114 | pud_t *pud = kasan_pud_offset(pgd, addr, node, early); | ||
78 | 115 | ||
79 | if (pgd_none(*pgd)) | ||
80 | __pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE); | ||
81 | |||
82 | pud = pud_offset_kimg(pgd, addr); | ||
83 | do { | 116 | do { |
84 | next = pud_addr_end(addr, end); | 117 | next = pud_addr_end(addr, end); |
85 | kasan_early_pmd_populate(pud, addr, next); | 118 | kasan_pmd_populate(pud, addr, next, node, early); |
86 | } while (pud++, addr = next, addr != end && pud_none(*pud)); | 119 | } while (pud++, addr = next, addr != end && pud_none(*pud)); |
87 | } | 120 | } |
88 | 121 | ||
89 | static void __init kasan_map_early_shadow(void) | 122 | static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, |
123 | int node, bool early) | ||
90 | { | 124 | { |
91 | unsigned long addr = KASAN_SHADOW_START; | ||
92 | unsigned long end = KASAN_SHADOW_END; | ||
93 | unsigned long next; | 125 | unsigned long next; |
94 | pgd_t *pgd; | 126 | pgd_t *pgd; |
95 | 127 | ||
96 | pgd = pgd_offset_k(addr); | 128 | pgd = pgd_offset_k(addr); |
97 | do { | 129 | do { |
98 | next = pgd_addr_end(addr, end); | 130 | next = pgd_addr_end(addr, end); |
99 | kasan_early_pud_populate(pgd, addr, next); | 131 | kasan_pud_populate(pgd, addr, next, node, early); |
100 | } while (pgd++, addr = next, addr != end); | 132 | } while (pgd++, addr = next, addr != end); |
101 | } | 133 | } |
102 | 134 | ||
135 | /* The early shadow maps everything to a single page of zeroes */ | ||
103 | asmlinkage void __init kasan_early_init(void) | 136 | asmlinkage void __init kasan_early_init(void) |
104 | { | 137 | { |
105 | BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); | 138 | BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); |
106 | BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); | 139 | BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); |
107 | BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); | 140 | BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); |
108 | kasan_map_early_shadow(); | 141 | kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, |
142 | true); | ||
143 | } | ||
144 | |||
145 | /* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ | ||
146 | static void __init kasan_map_populate(unsigned long start, unsigned long end, | ||
147 | int node) | ||
148 | { | ||
149 | kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); | ||
109 | } | 150 | } |
110 | 151 | ||
111 | /* | 152 | /* |
@@ -142,8 +183,8 @@ void __init kasan_init(void) | |||
142 | struct memblock_region *reg; | 183 | struct memblock_region *reg; |
143 | int i; | 184 | int i; |
144 | 185 | ||
145 | kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); | 186 | kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; |
146 | kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); | 187 | kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); |
147 | 188 | ||
148 | mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); | 189 | mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); |
149 | mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); | 190 | mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); |
@@ -161,19 +202,8 @@ void __init kasan_init(void) | |||
161 | 202 | ||
162 | clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); | 203 | clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); |
163 | 204 | ||
164 | vmemmap_populate(kimg_shadow_start, kimg_shadow_end, | 205 | kasan_map_populate(kimg_shadow_start, kimg_shadow_end, |
165 | pfn_to_nid(virt_to_pfn(lm_alias(_text)))); | 206 | pfn_to_nid(virt_to_pfn(lm_alias(_text)))); |
166 | |||
167 | /* | ||
168 | * vmemmap_populate() has populated the shadow region that covers the | ||
169 | * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round | ||
170 | * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent | ||
171 | * kasan_populate_zero_shadow() from replacing the page table entries | ||
172 | * (PMD or PTE) at the edges of the shadow region for the kernel | ||
173 | * image. | ||
174 | */ | ||
175 | kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE); | ||
176 | kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); | ||
177 | 207 | ||
178 | kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, | 208 | kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, |
179 | (void *)mod_shadow_start); | 209 | (void *)mod_shadow_start); |
@@ -191,9 +221,9 @@ void __init kasan_init(void) | |||
191 | if (start >= end) | 221 | if (start >= end) |
192 | break; | 222 | break; |
193 | 223 | ||
194 | vmemmap_populate((unsigned long)kasan_mem_to_shadow(start), | 224 | kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), |
195 | (unsigned long)kasan_mem_to_shadow(end), | 225 | (unsigned long)kasan_mem_to_shadow(end), |
196 | pfn_to_nid(virt_to_pfn(start))); | 226 | pfn_to_nid(virt_to_pfn(start))); |
197 | } | 227 | } |
198 | 228 | ||
199 | /* | 229 | /* |
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 328f0a292316..cf464100e838 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c | |||
@@ -42,21 +42,9 @@ | |||
42 | #undef DEBUG | 42 | #undef DEBUG |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * BAD_PAGE is the page that is used for page faults when linux | ||
46 | * is out-of-memory. Older versions of linux just did a | ||
47 | * do_exit(), but using this instead means there is less risk | ||
48 | * for a process dying in kernel mode, possibly leaving a inode | ||
49 | * unused etc.. | ||
50 | * | ||
51 | * BAD_PAGETABLE is the accompanying page-table: it is initialized | ||
52 | * to point to BAD_PAGE entries. | ||
53 | * | ||
54 | * ZERO_PAGE is a special page that is used for zero-initialized | 45 | * ZERO_PAGE is a special page that is used for zero-initialized |
55 | * data and COW. | 46 | * data and COW. |
56 | */ | 47 | */ |
57 | static unsigned long empty_bad_page_table; | ||
58 | static unsigned long empty_bad_page; | ||
59 | |||
60 | unsigned long empty_zero_page; | 48 | unsigned long empty_zero_page; |
61 | EXPORT_SYMBOL(empty_zero_page); | 49 | EXPORT_SYMBOL(empty_zero_page); |
62 | 50 | ||
@@ -72,8 +60,6 @@ void __init paging_init(void) | |||
72 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; | 60 | unsigned long zones_size[MAX_NR_ZONES] = {0, }; |
73 | 61 | ||
74 | /* allocate some pages for kernel housekeeping tasks */ | 62 | /* allocate some pages for kernel housekeeping tasks */ |
75 | empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
76 | empty_bad_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
77 | empty_zero_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | 63 | empty_zero_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); |
78 | 64 | ||
79 | memset((void *) empty_zero_page, 0, PAGE_SIZE); | 65 | memset((void *) empty_zero_page, 0, PAGE_SIZE); |
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index eeead51bed2d..015287ac8ce8 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c | |||
@@ -40,20 +40,9 @@ | |||
40 | #include <asm/sections.h> | 40 | #include <asm/sections.h> |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * BAD_PAGE is the page that is used for page faults when linux | ||
44 | * is out-of-memory. Older versions of linux just did a | ||
45 | * do_exit(), but using this instead means there is less risk | ||
46 | * for a process dying in kernel mode, possibly leaving a inode | ||
47 | * unused etc.. | ||
48 | * | ||
49 | * BAD_PAGETABLE is the accompanying page-table: it is initialized | ||
50 | * to point to BAD_PAGE entries. | ||
51 | * | ||
52 | * ZERO_PAGE is a special page that is used for zero-initialized | 43 | * ZERO_PAGE is a special page that is used for zero-initialized |
53 | * data and COW. | 44 | * data and COW. |
54 | */ | 45 | */ |
55 | static unsigned long empty_bad_page_table; | ||
56 | static unsigned long empty_bad_page; | ||
57 | unsigned long empty_zero_page; | 46 | unsigned long empty_zero_page; |
58 | 47 | ||
59 | /* | 48 | /* |
@@ -78,8 +67,6 @@ void __init paging_init(void) | |||
78 | * Initialize the bad page table and bad page to point | 67 | * Initialize the bad page table and bad page to point |
79 | * to a couple of allocated pages. | 68 | * to a couple of allocated pages. |
80 | */ | 69 | */ |
81 | empty_bad_page_table = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); | ||
82 | empty_bad_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); | ||
83 | empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); | 70 | empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); |
84 | memset((void *)empty_zero_page, 0, PAGE_SIZE); | 71 | memset((void *)empty_zero_page, 0, PAGE_SIZE); |
85 | 72 | ||
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 0d9446c37ae8..498398d915c1 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig | |||
@@ -196,8 +196,8 @@ config TIMER_DIVIDE | |||
196 | default "128" | 196 | default "128" |
197 | 197 | ||
198 | config CPU_BIG_ENDIAN | 198 | config CPU_BIG_ENDIAN |
199 | bool "Generate big endian code" | 199 | bool |
200 | default n | 200 | default !CPU_LITTLE_ENDIAN |
201 | 201 | ||
202 | config CPU_LITTLE_ENDIAN | 202 | config CPU_LITTLE_ENDIAN |
203 | bool "Generate little endian code" | 203 | bool "Generate little endian code" |
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 67fe6dc5211c..0036ea0c7173 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h | |||
@@ -31,12 +31,7 @@ | |||
31 | * tables. Each page table is also a single 4K page, giving 512 (== | 31 | * tables. Each page table is also a single 4K page, giving 512 (== |
32 | * PTRS_PER_PTE) 8 byte ptes. Each pud entry is initialized to point to | 32 | * PTRS_PER_PTE) 8 byte ptes. Each pud entry is initialized to point to |
33 | * invalid_pmd_table, each pmd entry is initialized to point to | 33 | * invalid_pmd_table, each pmd entry is initialized to point to |
34 | * invalid_pte_table, each pte is initialized to 0. When memory is low, | 34 | * invalid_pte_table, each pte is initialized to 0. |
35 | * and a pmd table or a page table allocation fails, empty_bad_pmd_table | ||
36 | * and empty_bad_page_table is returned back to higher layer code, so | ||
37 | * that the failure is recognized later on. Linux does not seem to | ||
38 | * handle these failures very well though. The empty_bad_page_table has | ||
39 | * invalid pte entries in it, to force page faults. | ||
40 | * | 35 | * |
41 | * Kernel mappings: kernel mappings are held in the swapper_pg_table. | 36 | * Kernel mappings: kernel mappings are held in the swapper_pg_table. |
42 | * The layout is identical to userspace except it's indexed with the | 37 | * The layout is identical to userspace except it's indexed with the |
@@ -175,7 +170,6 @@ | |||
175 | printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) | 170 | printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) |
176 | 171 | ||
177 | extern pte_t invalid_pte_table[PTRS_PER_PTE]; | 172 | extern pte_t invalid_pte_table[PTRS_PER_PTE]; |
178 | extern pte_t empty_bad_page_table[PTRS_PER_PTE]; | ||
179 | 173 | ||
180 | #ifndef __PAGETABLE_PUD_FOLDED | 174 | #ifndef __PAGETABLE_PUD_FOLDED |
181 | /* | 175 | /* |
diff --git a/arch/mn10300/kernel/head.S b/arch/mn10300/kernel/head.S index 73e00fc78072..0b15f759e0d2 100644 --- a/arch/mn10300/kernel/head.S +++ b/arch/mn10300/kernel/head.S | |||
@@ -434,14 +434,6 @@ ENTRY(empty_zero_page) | |||
434 | .space PAGE_SIZE | 434 | .space PAGE_SIZE |
435 | 435 | ||
436 | .balign PAGE_SIZE | 436 | .balign PAGE_SIZE |
437 | ENTRY(empty_bad_page) | ||
438 | .space PAGE_SIZE | ||
439 | |||
440 | .balign PAGE_SIZE | ||
441 | ENTRY(empty_bad_pte_table) | ||
442 | .space PAGE_SIZE | ||
443 | |||
444 | .balign PAGE_SIZE | ||
445 | ENTRY(large_page_table) | 437 | ENTRY(large_page_table) |
446 | .space PAGE_SIZE | 438 | .space PAGE_SIZE |
447 | 439 | ||
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index f41bd3cb76d9..e212a1f0b6d2 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h | |||
@@ -23,7 +23,6 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/dma-debug.h> | 25 | #include <linux/dma-debug.h> |
26 | #include <linux/kmemcheck.h> | ||
27 | #include <linux/dma-mapping.h> | 26 | #include <linux/dma-mapping.h> |
28 | 27 | ||
29 | extern const struct dma_map_ops or1k_dma_map_ops; | 28 | extern const struct dma_map_ops or1k_dma_map_ops; |
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index a14203c005f1..e11f03007b57 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h | |||
@@ -18,7 +18,7 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) | |||
18 | } | 18 | } |
19 | #endif /* MODULE */ | 19 | #endif /* MODULE */ |
20 | 20 | ||
21 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 21 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) |
22 | 22 | ||
23 | #ifdef CONFIG_PPC_BOOK3S | 23 | #ifdef CONFIG_PPC_BOOK3S |
24 | #include <asm/book3s/pgalloc.h> | 24 | #include <asm/book3s/pgalloc.h> |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1571a498a33f..a9b9083c5e49 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -433,6 +433,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
433 | pud = pud_offset(pgd, start); | 433 | pud = pud_offset(pgd, start); |
434 | pgd_clear(pgd); | 434 | pgd_clear(pgd); |
435 | pud_free_tlb(tlb, pud, start); | 435 | pud_free_tlb(tlb, pud, start); |
436 | mm_dec_nr_puds(tlb->mm); | ||
436 | } | 437 | } |
437 | 438 | ||
438 | /* | 439 | /* |
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 05e15386d4cb..a7e998158f37 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c | |||
@@ -200,7 +200,7 @@ static void destroy_pagetable_page(struct mm_struct *mm) | |||
200 | /* We allow PTE_FRAG_NR fragments from a PTE page */ | 200 | /* We allow PTE_FRAG_NR fragments from a PTE page */ |
201 | if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) { | 201 | if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) { |
202 | pgtable_page_dtor(page); | 202 | pgtable_page_dtor(page); |
203 | free_hot_cold_page(page, 0); | 203 | free_unref_page(page); |
204 | } | 204 | } |
205 | } | 205 | } |
206 | 206 | ||
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index ac0717a90ca6..1ec3aee43624 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -404,7 +404,7 @@ void pte_fragment_free(unsigned long *table, int kernel) | |||
404 | if (put_page_testzero(page)) { | 404 | if (put_page_testzero(page)) { |
405 | if (!kernel) | 405 | if (!kernel) |
406 | pgtable_page_dtor(page); | 406 | pgtable_page_dtor(page); |
407 | free_hot_cold_page(page, 0); | 407 | free_unref_page(page); |
408 | } | 408 | } |
409 | } | 409 | } |
410 | 410 | ||
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 43607bb12cc2..cf4c1cb17dcd 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h | |||
@@ -44,6 +44,8 @@ static inline int init_new_context(struct task_struct *tsk, | |||
44 | mm->context.asce_limit = STACK_TOP_MAX; | 44 | mm->context.asce_limit = STACK_TOP_MAX; |
45 | mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | | 45 | mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
46 | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; | 46 | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; |
47 | /* pgd_alloc() did not account this pud */ | ||
48 | mm_inc_nr_puds(mm); | ||
47 | break; | 49 | break; |
48 | case -PAGE_SIZE: | 50 | case -PAGE_SIZE: |
49 | /* forked 5-level task, set new asce with new_mm->pgd */ | 51 | /* forked 5-level task, set new asce with new_mm->pgd */ |
@@ -59,7 +61,7 @@ static inline int init_new_context(struct task_struct *tsk, | |||
59 | /* forked 2-level compat task, set new asce with new mm->pgd */ | 61 | /* forked 2-level compat task, set new asce with new mm->pgd */ |
60 | mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | | 62 | mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | |
61 | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; | 63 | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; |
62 | /* pgd_alloc() did not increase mm->nr_pmds */ | 64 | /* pgd_alloc() did not account this pmd */ |
63 | mm_inc_nr_pmds(mm); | 65 | mm_inc_nr_pmds(mm); |
64 | } | 66 | } |
65 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | 67 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); |
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index e1d751ae2498..1a2526676a87 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c | |||
@@ -1172,11 +1172,11 @@ static int __init dwarf_unwinder_init(void) | |||
1172 | 1172 | ||
1173 | dwarf_frame_cachep = kmem_cache_create("dwarf_frames", | 1173 | dwarf_frame_cachep = kmem_cache_create("dwarf_frames", |
1174 | sizeof(struct dwarf_frame), 0, | 1174 | sizeof(struct dwarf_frame), 0, |
1175 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); | 1175 | SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL); |
1176 | 1176 | ||
1177 | dwarf_reg_cachep = kmem_cache_create("dwarf_regs", | 1177 | dwarf_reg_cachep = kmem_cache_create("dwarf_regs", |
1178 | sizeof(struct dwarf_reg), 0, | 1178 | sizeof(struct dwarf_reg), 0, |
1179 | SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL); | 1179 | SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL); |
1180 | 1180 | ||
1181 | dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, | 1181 | dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ, |
1182 | dwarf_frame_cachep); | 1182 | dwarf_frame_cachep); |
diff --git a/arch/sh/kernel/head_64.S b/arch/sh/kernel/head_64.S index defd851abefa..cca491397a28 100644 --- a/arch/sh/kernel/head_64.S +++ b/arch/sh/kernel/head_64.S | |||
@@ -101,14 +101,6 @@ empty_zero_page: | |||
101 | mmu_pdtp_cache: | 101 | mmu_pdtp_cache: |
102 | .space PAGE_SIZE, 0 | 102 | .space PAGE_SIZE, 0 |
103 | 103 | ||
104 | .global empty_bad_page | ||
105 | empty_bad_page: | ||
106 | .space PAGE_SIZE, 0 | ||
107 | |||
108 | .global empty_bad_pte_table | ||
109 | empty_bad_pte_table: | ||
110 | .space PAGE_SIZE, 0 | ||
111 | |||
112 | .global fpu_in_use | 104 | .global fpu_in_use |
113 | fpu_in_use: .quad 0 | 105 | fpu_in_use: .quad 0 |
114 | 106 | ||
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index b2d9963d5978..68b1a67533ce 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c | |||
@@ -59,7 +59,7 @@ void arch_task_cache_init(void) | |||
59 | 59 | ||
60 | task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, | 60 | task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, |
61 | __alignof__(union thread_xstate), | 61 | __alignof__(union thread_xstate), |
62 | SLAB_PANIC | SLAB_NOTRACK, NULL); | 62 | SLAB_PANIC, NULL); |
63 | } | 63 | } |
64 | 64 | ||
65 | #ifdef CONFIG_SH_FPU_EMU | 65 | #ifdef CONFIG_SH_FPU_EMU |
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index fd9d9bac7cfa..5a9e96be1665 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h | |||
@@ -231,6 +231,36 @@ extern unsigned long _PAGE_ALL_SZ_BITS; | |||
231 | extern struct page *mem_map_zero; | 231 | extern struct page *mem_map_zero; |
232 | #define ZERO_PAGE(vaddr) (mem_map_zero) | 232 | #define ZERO_PAGE(vaddr) (mem_map_zero) |
233 | 233 | ||
234 | /* This macro must be updated when the size of struct page grows above 80 | ||
235 | * or reduces below 64. | ||
236 | * The idea that compiler optimizes out switch() statement, and only | ||
237 | * leaves clrx instructions | ||
238 | */ | ||
239 | #define mm_zero_struct_page(pp) do { \ | ||
240 | unsigned long *_pp = (void *)(pp); \ | ||
241 | \ | ||
242 | /* Check that struct page is either 64, 72, or 80 bytes */ \ | ||
243 | BUILD_BUG_ON(sizeof(struct page) & 7); \ | ||
244 | BUILD_BUG_ON(sizeof(struct page) < 64); \ | ||
245 | BUILD_BUG_ON(sizeof(struct page) > 80); \ | ||
246 | \ | ||
247 | switch (sizeof(struct page)) { \ | ||
248 | case 80: \ | ||
249 | _pp[9] = 0; /* fallthrough */ \ | ||
250 | case 72: \ | ||
251 | _pp[8] = 0; /* fallthrough */ \ | ||
252 | default: \ | ||
253 | _pp[7] = 0; \ | ||
254 | _pp[6] = 0; \ | ||
255 | _pp[5] = 0; \ | ||
256 | _pp[4] = 0; \ | ||
257 | _pp[3] = 0; \ | ||
258 | _pp[2] = 0; \ | ||
259 | _pp[1] = 0; \ | ||
260 | _pp[0] = 0; \ | ||
261 | } \ | ||
262 | } while (0) | ||
263 | |||
234 | /* PFNs are real physical page numbers. However, mem_map only begins to record | 264 | /* PFNs are real physical page numbers. However, mem_map only begins to record |
235 | * per-page information starting at pfn_base. This is to handle systems where | 265 | * per-page information starting at pfn_base. This is to handle systems where |
236 | * the first physical page in the machine is at some huge physical address, | 266 | * the first physical page in the machine is at some huge physical address, |
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 5078b7f68890..0112d6942288 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c | |||
@@ -397,7 +397,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
397 | 397 | ||
398 | pmd_clear(pmd); | 398 | pmd_clear(pmd); |
399 | pte_free_tlb(tlb, token, addr); | 399 | pte_free_tlb(tlb, token, addr); |
400 | atomic_long_dec(&tlb->mm->nr_ptes); | 400 | mm_dec_nr_ptes(tlb->mm); |
401 | } | 401 | } |
402 | 402 | ||
403 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 403 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
@@ -472,6 +472,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
472 | pud = pud_offset(pgd, start); | 472 | pud = pud_offset(pgd, start); |
473 | pgd_clear(pgd); | 473 | pgd_clear(pgd); |
474 | pud_free_tlb(tlb, pud, start); | 474 | pud_free_tlb(tlb, pud, start); |
475 | mm_dec_nr_puds(tlb->mm); | ||
475 | } | 476 | } |
476 | 477 | ||
477 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, | 478 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 61bdc1270d19..55ba62957e64 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2540,10 +2540,17 @@ void __init mem_init(void) | |||
2540 | { | 2540 | { |
2541 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); | 2541 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
2542 | 2542 | ||
2543 | register_page_bootmem_info(); | ||
2544 | free_all_bootmem(); | 2543 | free_all_bootmem(); |
2545 | 2544 | ||
2546 | /* | 2545 | /* |
2546 | * Must be done after boot memory is put on freelist, because here we | ||
2547 | * might set fields in deferred struct pages that have not yet been | ||
2548 | * initialized, and free_all_bootmem() initializes all the reserved | ||
2549 | * deferred pages for us. | ||
2550 | */ | ||
2551 | register_page_bootmem_info(); | ||
2552 | |||
2553 | /* | ||
2547 | * Set up the zero page, mark it reserved, so that page count | 2554 | * Set up the zero page, mark it reserved, so that page count |
2548 | * is not manipulated when freeing the page from user ptes. | 2555 | * is not manipulated when freeing the page from user ptes. |
2549 | */ | 2556 | */ |
@@ -2637,30 +2644,19 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, | |||
2637 | vstart = vstart & PMD_MASK; | 2644 | vstart = vstart & PMD_MASK; |
2638 | vend = ALIGN(vend, PMD_SIZE); | 2645 | vend = ALIGN(vend, PMD_SIZE); |
2639 | for (; vstart < vend; vstart += PMD_SIZE) { | 2646 | for (; vstart < vend; vstart += PMD_SIZE) { |
2640 | pgd_t *pgd = pgd_offset_k(vstart); | 2647 | pgd_t *pgd = vmemmap_pgd_populate(vstart, node); |
2641 | unsigned long pte; | 2648 | unsigned long pte; |
2642 | pud_t *pud; | 2649 | pud_t *pud; |
2643 | pmd_t *pmd; | 2650 | pmd_t *pmd; |
2644 | 2651 | ||
2645 | if (pgd_none(*pgd)) { | 2652 | if (!pgd) |
2646 | pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); | 2653 | return -ENOMEM; |
2647 | |||
2648 | if (!new) | ||
2649 | return -ENOMEM; | ||
2650 | pgd_populate(&init_mm, pgd, new); | ||
2651 | } | ||
2652 | |||
2653 | pud = pud_offset(pgd, vstart); | ||
2654 | if (pud_none(*pud)) { | ||
2655 | pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); | ||
2656 | 2654 | ||
2657 | if (!new) | 2655 | pud = vmemmap_pud_populate(pgd, vstart, node); |
2658 | return -ENOMEM; | 2656 | if (!pud) |
2659 | pud_populate(&init_mm, pud, new); | 2657 | return -ENOMEM; |
2660 | } | ||
2661 | 2658 | ||
2662 | pmd = pmd_offset(pud, vstart); | 2659 | pmd = pmd_offset(pud, vstart); |
2663 | |||
2664 | pte = pmd_val(*pmd); | 2660 | pte = pmd_val(*pmd); |
2665 | if (!(pte & _PAGE_VALID)) { | 2661 | if (!(pte & _PAGE_VALID)) { |
2666 | void *block = vmemmap_alloc_block(PMD_SIZE, node); | 2662 | void *block = vmemmap_alloc_block(PMD_SIZE, node); |
@@ -2927,7 +2923,7 @@ void __flush_tlb_all(void) | |||
2927 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, | 2923 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, |
2928 | unsigned long address) | 2924 | unsigned long address) |
2929 | { | 2925 | { |
2930 | struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); | 2926 | struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
2931 | pte_t *pte = NULL; | 2927 | pte_t *pte = NULL; |
2932 | 2928 | ||
2933 | if (page) | 2929 | if (page) |
@@ -2939,11 +2935,11 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, | |||
2939 | pgtable_t pte_alloc_one(struct mm_struct *mm, | 2935 | pgtable_t pte_alloc_one(struct mm_struct *mm, |
2940 | unsigned long address) | 2936 | unsigned long address) |
2941 | { | 2937 | { |
2942 | struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); | 2938 | struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
2943 | if (!page) | 2939 | if (!page) |
2944 | return NULL; | 2940 | return NULL; |
2945 | if (!pgtable_page_ctor(page)) { | 2941 | if (!pgtable_page_ctor(page)) { |
2946 | free_hot_cold_page(page, 0); | 2942 | free_unref_page(page); |
2947 | return NULL; | 2943 | return NULL; |
2948 | } | 2944 | } |
2949 | return (pte_t *) page_address(page); | 2945 | return (pte_t *) page_address(page); |
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index b51cc28acd0a..4432f31e8479 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -409,7 +409,7 @@ void __homecache_free_pages(struct page *page, unsigned int order) | |||
409 | if (put_page_testzero(page)) { | 409 | if (put_page_testzero(page)) { |
410 | homecache_change_page_home(page, order, PAGE_HOME_HASH); | 410 | homecache_change_page_home(page, order, PAGE_HOME_HASH); |
411 | if (order == 0) { | 411 | if (order == 0) { |
412 | free_hot_cold_page(page, false); | 412 | free_unref_page(page); |
413 | } else { | 413 | } else { |
414 | init_page_count(page); | 414 | init_page_count(page); |
415 | __free_pages(page, order); | 415 | __free_pages(page, order); |
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index e7437ec62710..3c0e470ea646 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c | |||
@@ -22,8 +22,6 @@ | |||
22 | /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ | 22 | /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ |
23 | unsigned long *empty_zero_page = NULL; | 23 | unsigned long *empty_zero_page = NULL; |
24 | EXPORT_SYMBOL(empty_zero_page); | 24 | EXPORT_SYMBOL(empty_zero_page); |
25 | /* allocated in paging_init and unchanged thereafter */ | ||
26 | static unsigned long *empty_bad_page = NULL; | ||
27 | 25 | ||
28 | /* | 26 | /* |
29 | * Initialized during boot, and readonly for initializing page tables | 27 | * Initialized during boot, and readonly for initializing page tables |
@@ -146,7 +144,6 @@ void __init paging_init(void) | |||
146 | int i; | 144 | int i; |
147 | 145 | ||
148 | empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); | 146 | empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); |
149 | empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
150 | for (i = 0; i < ARRAY_SIZE(zones_size); i++) | 147 | for (i = 0; i < ARRAY_SIZE(zones_size); i++) |
151 | zones_size[i] = 0; | 148 | zones_size[i] = 0; |
152 | 149 | ||
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 26775793c204..f0fdb268f8f2 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h | |||
@@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); | |||
28 | #define pgd_alloc(mm) get_pgd_slow(mm) | 28 | #define pgd_alloc(mm) get_pgd_slow(mm) |
29 | #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) | 29 | #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) |
30 | 30 | ||
31 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 31 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Allocate one PTE table. | 34 | * Allocate one PTE table. |
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c index c572a28c76c9..a830a300aaa1 100644 --- a/arch/unicore32/mm/pgd.c +++ b/arch/unicore32/mm/pgd.c | |||
@@ -97,7 +97,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) | |||
97 | pte = pmd_pgtable(*pmd); | 97 | pte = pmd_pgtable(*pmd); |
98 | pmd_clear(pmd); | 98 | pmd_clear(pmd); |
99 | pte_free(mm, pte); | 99 | pte_free(mm, pte); |
100 | atomic_long_dec(&mm->nr_ptes); | 100 | mm_dec_nr_ptes(mm); |
101 | pmd_free(mm, pmd); | 101 | pmd_free(mm, pmd); |
102 | mm_dec_nr_pmds(mm); | 102 | mm_dec_nr_pmds(mm); |
103 | free: | 103 | free: |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f08977d82ca0..df3276d6bfe3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -110,9 +110,8 @@ config X86 | |||
110 | select HAVE_ARCH_AUDITSYSCALL | 110 | select HAVE_ARCH_AUDITSYSCALL |
111 | select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE | 111 | select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE |
112 | select HAVE_ARCH_JUMP_LABEL | 112 | select HAVE_ARCH_JUMP_LABEL |
113 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 113 | select HAVE_ARCH_KASAN if X86_64 |
114 | select HAVE_ARCH_KGDB | 114 | select HAVE_ARCH_KGDB |
115 | select HAVE_ARCH_KMEMCHECK | ||
116 | select HAVE_ARCH_MMAP_RND_BITS if MMU | 115 | select HAVE_ARCH_MMAP_RND_BITS if MMU |
117 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT | 116 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT |
118 | select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT | 117 | select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT |
@@ -1430,7 +1429,7 @@ config ARCH_DMA_ADDR_T_64BIT | |||
1430 | 1429 | ||
1431 | config X86_DIRECT_GBPAGES | 1430 | config X86_DIRECT_GBPAGES |
1432 | def_bool y | 1431 | def_bool y |
1433 | depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK | 1432 | depends on X86_64 && !DEBUG_PAGEALLOC |
1434 | ---help--- | 1433 | ---help--- |
1435 | Certain kernel features effectively disable kernel | 1434 | Certain kernel features effectively disable kernel |
1436 | linear 1 GB mappings (even if the CPU otherwise | 1435 | linear 1 GB mappings (even if the CPU otherwise |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a20eacd9c7e9..3e73bc255e4e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -158,11 +158,6 @@ ifdef CONFIG_X86_X32 | |||
158 | endif | 158 | endif |
159 | export CONFIG_X86_X32_ABI | 159 | export CONFIG_X86_X32_ABI |
160 | 160 | ||
161 | # Don't unroll struct assignments with kmemcheck enabled | ||
162 | ifeq ($(CONFIG_KMEMCHECK),y) | ||
163 | KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) | ||
164 | endif | ||
165 | |||
166 | # | 161 | # |
167 | # If the function graph tracer is used with mcount instead of fentry, | 162 | # If the function graph tracer is used with mcount instead of fentry, |
168 | # '-maccumulate-outgoing-args' is needed to prevent a GCC bug | 163 | # '-maccumulate-outgoing-args' is needed to prevent a GCC bug |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 43cbe843de8d..0350d99bb8fd 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -7,7 +7,6 @@ | |||
7 | * Documentation/DMA-API.txt for documentation. | 7 | * Documentation/DMA-API.txt for documentation. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/kmemcheck.h> | ||
11 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
12 | #include <linux/dma-debug.h> | 11 | #include <linux/dma-debug.h> |
13 | #include <asm/io.h> | 12 | #include <asm/io.h> |
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h index 945a0337fbcf..ea32a7d3cf1b 100644 --- a/arch/x86/include/asm/kmemcheck.h +++ b/arch/x86/include/asm/kmemcheck.h | |||
@@ -1,43 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ASM_X86_KMEMCHECK_H | ||
3 | #define ASM_X86_KMEMCHECK_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | #include <asm/ptrace.h> | ||
7 | |||
8 | #ifdef CONFIG_KMEMCHECK | ||
9 | bool kmemcheck_active(struct pt_regs *regs); | ||
10 | |||
11 | void kmemcheck_show(struct pt_regs *regs); | ||
12 | void kmemcheck_hide(struct pt_regs *regs); | ||
13 | |||
14 | bool kmemcheck_fault(struct pt_regs *regs, | ||
15 | unsigned long address, unsigned long error_code); | ||
16 | bool kmemcheck_trap(struct pt_regs *regs); | ||
17 | #else | ||
18 | static inline bool kmemcheck_active(struct pt_regs *regs) | ||
19 | { | ||
20 | return false; | ||
21 | } | ||
22 | |||
23 | static inline void kmemcheck_show(struct pt_regs *regs) | ||
24 | { | ||
25 | } | ||
26 | |||
27 | static inline void kmemcheck_hide(struct pt_regs *regs) | ||
28 | { | ||
29 | } | ||
30 | |||
31 | static inline bool kmemcheck_fault(struct pt_regs *regs, | ||
32 | unsigned long address, unsigned long error_code) | ||
33 | { | ||
34 | return false; | ||
35 | } | ||
36 | |||
37 | static inline bool kmemcheck_trap(struct pt_regs *regs) | ||
38 | { | ||
39 | return false; | ||
40 | } | ||
41 | #endif /* CONFIG_KMEMCHECK */ | ||
42 | |||
43 | #endif | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f735c3016325..09f9e1e00e3b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -667,11 +667,6 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a) | |||
667 | return false; | 667 | return false; |
668 | } | 668 | } |
669 | 669 | ||
670 | static inline int pte_hidden(pte_t pte) | ||
671 | { | ||
672 | return pte_flags(pte) & _PAGE_HIDDEN; | ||
673 | } | ||
674 | |||
675 | static inline int pmd_present(pmd_t pmd) | 670 | static inline int pmd_present(pmd_t pmd) |
676 | { | 671 | { |
677 | /* | 672 | /* |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9e9b05fc4860..3696398a9475 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -32,7 +32,6 @@ | |||
32 | 32 | ||
33 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 | 33 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 |
34 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 | 34 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 |
35 | #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ | ||
36 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ | 35 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ |
37 | #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 | 36 | #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 |
38 | 37 | ||
@@ -79,18 +78,6 @@ | |||
79 | #define _PAGE_KNL_ERRATUM_MASK 0 | 78 | #define _PAGE_KNL_ERRATUM_MASK 0 |
80 | #endif | 79 | #endif |
81 | 80 | ||
82 | #ifdef CONFIG_KMEMCHECK | ||
83 | #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | ||
84 | #else | ||
85 | #define _PAGE_HIDDEN (_AT(pteval_t, 0)) | ||
86 | #endif | ||
87 | |||
88 | /* | ||
89 | * The same hidden bit is used by kmemcheck, but since kmemcheck | ||
90 | * works on kernel pages while soft-dirty engine on user space, | ||
91 | * they do not conflict with each other. | ||
92 | */ | ||
93 | |||
94 | #ifdef CONFIG_MEM_SOFT_DIRTY | 81 | #ifdef CONFIG_MEM_SOFT_DIRTY |
95 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) | 82 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) |
96 | #else | 83 | #else |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 076502241eae..55d392c6bd29 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
179 | * No 3D Now! | 179 | * No 3D Now! |
180 | */ | 180 | */ |
181 | 181 | ||
182 | #ifndef CONFIG_KMEMCHECK | ||
183 | |||
184 | #if (__GNUC__ >= 4) | 182 | #if (__GNUC__ >= 4) |
185 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) | 183 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) |
186 | #else | 184 | #else |
@@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
189 | ? __constant_memcpy((t), (f), (n)) \ | 187 | ? __constant_memcpy((t), (f), (n)) \ |
190 | : __memcpy((t), (f), (n))) | 188 | : __memcpy((t), (f), (n))) |
191 | #endif | 189 | #endif |
192 | #else | ||
193 | /* | ||
194 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
195 | * because it means that we know both memory operands in advance. | ||
196 | */ | ||
197 | #define memcpy(t, f, n) __memcpy((t), (f), (n)) | ||
198 | #endif | ||
199 | 190 | ||
200 | #endif | 191 | #endif |
201 | #endif /* !CONFIG_FORTIFY_SOURCE */ | 192 | #endif /* !CONFIG_FORTIFY_SOURCE */ |
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 0b1b4445f4c5..533f74c300c2 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len); | |||
33 | extern void *__memcpy(void *to, const void *from, size_t len); | 33 | extern void *__memcpy(void *to, const void *from, size_t len); |
34 | 34 | ||
35 | #ifndef CONFIG_FORTIFY_SOURCE | 35 | #ifndef CONFIG_FORTIFY_SOURCE |
36 | #ifndef CONFIG_KMEMCHECK | ||
37 | #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 | 36 | #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 |
38 | #define memcpy(dst, src, len) \ | 37 | #define memcpy(dst, src, len) \ |
39 | ({ \ | 38 | ({ \ |
@@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len); | |||
46 | __ret; \ | 45 | __ret; \ |
47 | }) | 46 | }) |
48 | #endif | 47 | #endif |
49 | #else | ||
50 | /* | ||
51 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
52 | * because it means that we know both memory operands in advance. | ||
53 | */ | ||
54 | #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) | ||
55 | #endif | ||
56 | #endif /* !CONFIG_FORTIFY_SOURCE */ | 48 | #endif /* !CONFIG_FORTIFY_SOURCE */ |
57 | 49 | ||
58 | #define __HAVE_ARCH_MEMSET | 50 | #define __HAVE_ARCH_MEMSET |
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 1f5c5161ead6..45c8605467f1 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
@@ -1,7 +1,4 @@ | |||
1 | #ifdef CONFIG_KMEMCHECK | 1 | #ifndef _ASM_X86_XOR_H |
2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ | ||
3 | # include <asm-generic/xor.h> | ||
4 | #elif !defined(_ASM_X86_XOR_H) | ||
5 | #define _ASM_X86_XOR_H | 2 | #define _ASM_X86_XOR_H |
6 | 3 | ||
7 | /* | 4 | /* |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b720dacac051..b1af22073e28 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
187 | if (c->x86 == 6 && c->x86_model < 15) | 187 | if (c->x86 == 6 && c->x86_model < 15) |
188 | clear_cpu_cap(c, X86_FEATURE_PAT); | 188 | clear_cpu_cap(c, X86_FEATURE_PAT); |
189 | 189 | ||
190 | #ifdef CONFIG_KMEMCHECK | ||
191 | /* | ||
192 | * P4s have a "fast strings" feature which causes single- | ||
193 | * stepping REP instructions to only generate a #DB on | ||
194 | * cache-line boundaries. | ||
195 | * | ||
196 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | ||
197 | * (model 2) with the same problem. | ||
198 | */ | ||
199 | if (c->x86 == 15) | ||
200 | if (msr_clear_bit(MSR_IA32_MISC_ENABLE, | ||
201 | MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) | ||
202 | pr_info("kmemcheck: Disabling fast string operations\n"); | ||
203 | #endif | ||
204 | |||
205 | /* | 190 | /* |
206 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, | 191 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, |
207 | * clear the fast string and enhanced fast string CPU capabilities. | 192 | * clear the fast string and enhanced fast string CPU capabilities. |
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 7d7715dde901..e5ec3cafa72e 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c | |||
@@ -57,7 +57,7 @@ | |||
57 | # error "Need more virtual address space for the ESPFIX hack" | 57 | # error "Need more virtual address space for the ESPFIX hack" |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) | 60 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) |
61 | 61 | ||
62 | /* This contains the *bottom* address of the espfix stack */ | 62 | /* This contains the *bottom* address of the espfix stack */ |
63 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); | 63 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b7b0f74a2150..989514c94a55 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <linux/edac.h> | 42 | #include <linux/edac.h> |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | #include <asm/kmemcheck.h> | ||
46 | #include <asm/stacktrace.h> | 45 | #include <asm/stacktrace.h> |
47 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
48 | #include <asm/debugreg.h> | 47 | #include <asm/debugreg.h> |
@@ -749,10 +748,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
749 | if (!dr6 && user_mode(regs)) | 748 | if (!dr6 && user_mode(regs)) |
750 | user_icebp = 1; | 749 | user_icebp = 1; |
751 | 750 | ||
752 | /* Catch kmemcheck conditions! */ | ||
753 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | ||
754 | goto exit; | ||
755 | |||
756 | /* Store the virtualized DR6 value */ | 751 | /* Store the virtualized DR6 value */ |
757 | tsk->thread.debugreg6 = dr6; | 752 | tsk->thread.debugreg6 = dr6; |
758 | 753 | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..8e13b8cc6bed 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o | |||
29 | 29 | ||
30 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 30 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
31 | 31 | ||
32 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
33 | |||
34 | KASAN_SANITIZE_kasan_init_$(BITS).o := n | 32 | KASAN_SANITIZE_kasan_init_$(BITS).o := n |
35 | obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o | 33 | obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o |
36 | 34 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3109ba6c6ede..78ca9a8ee454 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <asm/cpufeature.h> /* boot_cpu_has, ... */ | 20 | #include <asm/cpufeature.h> /* boot_cpu_has, ... */ |
21 | #include <asm/traps.h> /* dotraplinkage, ... */ | 21 | #include <asm/traps.h> /* dotraplinkage, ... */ |
22 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 22 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
23 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | ||
24 | #include <asm/fixmap.h> /* VSYSCALL_ADDR */ | 23 | #include <asm/fixmap.h> /* VSYSCALL_ADDR */ |
25 | #include <asm/vsyscall.h> /* emulate_vsyscall */ | 24 | #include <asm/vsyscall.h> /* emulate_vsyscall */ |
26 | #include <asm/vm86.h> /* struct vm86 */ | 25 | #include <asm/vm86.h> /* struct vm86 */ |
@@ -1256,8 +1255,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1256 | * Detect and handle instructions that would cause a page fault for | 1255 | * Detect and handle instructions that would cause a page fault for |
1257 | * both a tracked kernel page and a userspace page. | 1256 | * both a tracked kernel page and a userspace page. |
1258 | */ | 1257 | */ |
1259 | if (kmemcheck_active(regs)) | ||
1260 | kmemcheck_hide(regs); | ||
1261 | prefetchw(&mm->mmap_sem); | 1258 | prefetchw(&mm->mmap_sem); |
1262 | 1259 | ||
1263 | if (unlikely(kmmio_fault(regs, address))) | 1260 | if (unlikely(kmmio_fault(regs, address))) |
@@ -1280,9 +1277,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1280 | if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { | 1277 | if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { |
1281 | if (vmalloc_fault(address) >= 0) | 1278 | if (vmalloc_fault(address) >= 0) |
1282 | return; | 1279 | return; |
1283 | |||
1284 | if (kmemcheck_fault(regs, address, error_code)) | ||
1285 | return; | ||
1286 | } | 1280 | } |
1287 | 1281 | ||
1288 | /* Can handle a stale RO->RW TLB: */ | 1282 | /* Can handle a stale RO->RW TLB: */ |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a22c2b95e513..6fdf91ef130a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -92,8 +92,7 @@ __ref void *alloc_low_pages(unsigned int num) | |||
92 | unsigned int order; | 92 | unsigned int order; |
93 | 93 | ||
94 | order = get_order((unsigned long)num << PAGE_SHIFT); | 94 | order = get_order((unsigned long)num << PAGE_SHIFT); |
95 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | | 95 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); |
96 | __GFP_ZERO, order); | ||
97 | } | 96 | } |
98 | 97 | ||
99 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { | 98 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { |
@@ -164,12 +163,11 @@ static int page_size_mask; | |||
164 | static void __init probe_page_size_mask(void) | 163 | static void __init probe_page_size_mask(void) |
165 | { | 164 | { |
166 | /* | 165 | /* |
167 | * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will | 166 | * For pagealloc debugging, identity mapping will use small pages. |
168 | * use small pages. | ||
169 | * This will simplify cpa(), which otherwise needs to support splitting | 167 | * This will simplify cpa(), which otherwise needs to support splitting |
170 | * large pages into small in interrupt context, etc. | 168 | * large pages into small in interrupt context, etc. |
171 | */ | 169 | */ |
172 | if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) | 170 | if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) |
173 | page_size_mask |= 1 << PG_LEVEL_2M; | 171 | page_size_mask |= 1 << PG_LEVEL_2M; |
174 | else | 172 | else |
175 | direct_gbpages = 0; | 173 | direct_gbpages = 0; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index adcea90a2046..4a837289f2ad 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -184,7 +184,7 @@ static __ref void *spp_getpage(void) | |||
184 | void *ptr; | 184 | void *ptr; |
185 | 185 | ||
186 | if (after_bootmem) | 186 | if (after_bootmem) |
187 | ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); | 187 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); |
188 | else | 188 | else |
189 | ptr = alloc_bootmem_pages(PAGE_SIZE); | 189 | ptr = alloc_bootmem_pages(PAGE_SIZE); |
190 | 190 | ||
@@ -1173,12 +1173,18 @@ void __init mem_init(void) | |||
1173 | 1173 | ||
1174 | /* clear_bss() already clear the empty_zero_page */ | 1174 | /* clear_bss() already clear the empty_zero_page */ |
1175 | 1175 | ||
1176 | register_page_bootmem_info(); | ||
1177 | |||
1178 | /* this will put all memory onto the freelists */ | 1176 | /* this will put all memory onto the freelists */ |
1179 | free_all_bootmem(); | 1177 | free_all_bootmem(); |
1180 | after_bootmem = 1; | 1178 | after_bootmem = 1; |
1181 | 1179 | ||
1180 | /* | ||
1181 | * Must be done after boot memory is put on freelist, because here we | ||
1182 | * might set fields in deferred struct pages that have not yet been | ||
1183 | * initialized, and free_all_bootmem() initializes all the reserved | ||
1184 | * deferred pages for us. | ||
1185 | */ | ||
1186 | register_page_bootmem_info(); | ||
1187 | |||
1182 | /* Register memory areas for /proc/kcore */ | 1188 | /* Register memory areas for /proc/kcore */ |
1183 | kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, | 1189 | kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, |
1184 | PAGE_SIZE, KCORE_OTHER); | 1190 | PAGE_SIZE, KCORE_OTHER); |
@@ -1399,7 +1405,6 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, | |||
1399 | vmemmap_verify((pte_t *)pmd, node, addr, next); | 1405 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
1400 | continue; | 1406 | continue; |
1401 | } | 1407 | } |
1402 | pr_warn_once("vmemmap: falling back to regular page backing\n"); | ||
1403 | if (vmemmap_populate_basepages(addr, next, node)) | 1408 | if (vmemmap_populate_basepages(addr, next, node)) |
1404 | return -ENOMEM; | 1409 | return -ENOMEM; |
1405 | } | 1410 | } |
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 2b60dc6e64b1..99dfed6dfef8 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -4,12 +4,14 @@ | |||
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/kasan.h> | 5 | #include <linux/kasan.h> |
6 | #include <linux/kdebug.h> | 6 | #include <linux/kdebug.h> |
7 | #include <linux/memblock.h> | ||
7 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
9 | #include <linux/sched/task.h> | 10 | #include <linux/sched/task.h> |
10 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
11 | 12 | ||
12 | #include <asm/e820/types.h> | 13 | #include <asm/e820/types.h> |
14 | #include <asm/pgalloc.h> | ||
13 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
14 | #include <asm/sections.h> | 16 | #include <asm/sections.h> |
15 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
@@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; | |||
18 | 20 | ||
19 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); | 21 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
20 | 22 | ||
21 | static int __init map_range(struct range *range) | 23 | static __init void *early_alloc(size_t size, int nid) |
24 | { | ||
25 | return memblock_virt_alloc_try_nid_nopanic(size, size, | ||
26 | __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); | ||
27 | } | ||
28 | |||
29 | static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, | ||
30 | unsigned long end, int nid) | ||
31 | { | ||
32 | pte_t *pte; | ||
33 | |||
34 | if (pmd_none(*pmd)) { | ||
35 | void *p; | ||
36 | |||
37 | if (boot_cpu_has(X86_FEATURE_PSE) && | ||
38 | ((end - addr) == PMD_SIZE) && | ||
39 | IS_ALIGNED(addr, PMD_SIZE)) { | ||
40 | p = early_alloc(PMD_SIZE, nid); | ||
41 | if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) | ||
42 | return; | ||
43 | else if (p) | ||
44 | memblock_free(__pa(p), PMD_SIZE); | ||
45 | } | ||
46 | |||
47 | p = early_alloc(PAGE_SIZE, nid); | ||
48 | pmd_populate_kernel(&init_mm, pmd, p); | ||
49 | } | ||
50 | |||
51 | pte = pte_offset_kernel(pmd, addr); | ||
52 | do { | ||
53 | pte_t entry; | ||
54 | void *p; | ||
55 | |||
56 | if (!pte_none(*pte)) | ||
57 | continue; | ||
58 | |||
59 | p = early_alloc(PAGE_SIZE, nid); | ||
60 | entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); | ||
61 | set_pte_at(&init_mm, addr, pte, entry); | ||
62 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
63 | } | ||
64 | |||
65 | static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, | ||
66 | unsigned long end, int nid) | ||
67 | { | ||
68 | pmd_t *pmd; | ||
69 | unsigned long next; | ||
70 | |||
71 | if (pud_none(*pud)) { | ||
72 | void *p; | ||
73 | |||
74 | if (boot_cpu_has(X86_FEATURE_GBPAGES) && | ||
75 | ((end - addr) == PUD_SIZE) && | ||
76 | IS_ALIGNED(addr, PUD_SIZE)) { | ||
77 | p = early_alloc(PUD_SIZE, nid); | ||
78 | if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) | ||
79 | return; | ||
80 | else if (p) | ||
81 | memblock_free(__pa(p), PUD_SIZE); | ||
82 | } | ||
83 | |||
84 | p = early_alloc(PAGE_SIZE, nid); | ||
85 | pud_populate(&init_mm, pud, p); | ||
86 | } | ||
87 | |||
88 | pmd = pmd_offset(pud, addr); | ||
89 | do { | ||
90 | next = pmd_addr_end(addr, end); | ||
91 | if (!pmd_large(*pmd)) | ||
92 | kasan_populate_pmd(pmd, addr, next, nid); | ||
93 | } while (pmd++, addr = next, addr != end); | ||
94 | } | ||
95 | |||
96 | static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, | ||
97 | unsigned long end, int nid) | ||
98 | { | ||
99 | pud_t *pud; | ||
100 | unsigned long next; | ||
101 | |||
102 | if (p4d_none(*p4d)) { | ||
103 | void *p = early_alloc(PAGE_SIZE, nid); | ||
104 | |||
105 | p4d_populate(&init_mm, p4d, p); | ||
106 | } | ||
107 | |||
108 | pud = pud_offset(p4d, addr); | ||
109 | do { | ||
110 | next = pud_addr_end(addr, end); | ||
111 | if (!pud_large(*pud)) | ||
112 | kasan_populate_pud(pud, addr, next, nid); | ||
113 | } while (pud++, addr = next, addr != end); | ||
114 | } | ||
115 | |||
116 | static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, | ||
117 | unsigned long end, int nid) | ||
118 | { | ||
119 | void *p; | ||
120 | p4d_t *p4d; | ||
121 | unsigned long next; | ||
122 | |||
123 | if (pgd_none(*pgd)) { | ||
124 | p = early_alloc(PAGE_SIZE, nid); | ||
125 | pgd_populate(&init_mm, pgd, p); | ||
126 | } | ||
127 | |||
128 | p4d = p4d_offset(pgd, addr); | ||
129 | do { | ||
130 | next = p4d_addr_end(addr, end); | ||
131 | kasan_populate_p4d(p4d, addr, next, nid); | ||
132 | } while (p4d++, addr = next, addr != end); | ||
133 | } | ||
134 | |||
135 | static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, | ||
136 | int nid) | ||
137 | { | ||
138 | pgd_t *pgd; | ||
139 | unsigned long next; | ||
140 | |||
141 | addr = addr & PAGE_MASK; | ||
142 | end = round_up(end, PAGE_SIZE); | ||
143 | pgd = pgd_offset_k(addr); | ||
144 | do { | ||
145 | next = pgd_addr_end(addr, end); | ||
146 | kasan_populate_pgd(pgd, addr, next, nid); | ||
147 | } while (pgd++, addr = next, addr != end); | ||
148 | } | ||
149 | |||
150 | static void __init map_range(struct range *range) | ||
22 | { | 151 | { |
23 | unsigned long start; | 152 | unsigned long start; |
24 | unsigned long end; | 153 | unsigned long end; |
@@ -26,7 +155,7 @@ static int __init map_range(struct range *range) | |||
26 | start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); | 155 | start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); |
27 | end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); | 156 | end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); |
28 | 157 | ||
29 | return vmemmap_populate(start, end, NUMA_NO_NODE); | 158 | kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); |
30 | } | 159 | } |
31 | 160 | ||
32 | static void __init clear_pgds(unsigned long start, | 161 | static void __init clear_pgds(unsigned long start, |
@@ -189,16 +318,16 @@ void __init kasan_init(void) | |||
189 | if (pfn_mapped[i].end == 0) | 318 | if (pfn_mapped[i].end == 0) |
190 | break; | 319 | break; |
191 | 320 | ||
192 | if (map_range(&pfn_mapped[i])) | 321 | map_range(&pfn_mapped[i]); |
193 | panic("kasan: unable to allocate shadow!"); | ||
194 | } | 322 | } |
323 | |||
195 | kasan_populate_zero_shadow( | 324 | kasan_populate_zero_shadow( |
196 | kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), | 325 | kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), |
197 | kasan_mem_to_shadow((void *)__START_KERNEL_map)); | 326 | kasan_mem_to_shadow((void *)__START_KERNEL_map)); |
198 | 327 | ||
199 | vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), | 328 | kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), |
200 | (unsigned long)kasan_mem_to_shadow(_end), | 329 | (unsigned long)kasan_mem_to_shadow(_end), |
201 | NUMA_NO_NODE); | 330 | early_pfn_to_nid(__pa(_stext))); |
202 | 331 | ||
203 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), | 332 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), |
204 | (void *)KASAN_SHADOW_END); | 333 | (void *)KASAN_SHADOW_END); |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile deleted file mode 100644 index 520b3bce4095..000000000000 --- a/arch/x86/mm/kmemcheck/Makefile +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | ||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 872ec4159a68..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -1,228 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/interrupt.h> | ||
3 | #include <linux/kdebug.h> | ||
4 | #include <linux/kmemcheck.h> | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/ptrace.h> | ||
8 | #include <linux/stacktrace.h> | ||
9 | #include <linux/string.h> | ||
10 | |||
11 | #include "error.h" | ||
12 | #include "shadow.h" | ||
13 | |||
14 | enum kmemcheck_error_type { | ||
15 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
16 | KMEMCHECK_ERROR_BUG, | ||
17 | }; | ||
18 | |||
19 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
20 | |||
21 | struct kmemcheck_error { | ||
22 | enum kmemcheck_error_type type; | ||
23 | |||
24 | union { | ||
25 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
26 | struct { | ||
27 | /* Kind of access that caused the error */ | ||
28 | enum kmemcheck_shadow state; | ||
29 | /* Address and size of the erroneous read */ | ||
30 | unsigned long address; | ||
31 | unsigned int size; | ||
32 | }; | ||
33 | }; | ||
34 | |||
35 | struct pt_regs regs; | ||
36 | struct stack_trace trace; | ||
37 | unsigned long trace_entries[32]; | ||
38 | |||
39 | /* We compress it to a char. */ | ||
40 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
41 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | * Create a ring queue of errors to output. We can't call printk() directly | ||
46 | * from the kmemcheck traps, since this may call the console drivers and | ||
47 | * result in a recursive fault. | ||
48 | */ | ||
49 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
50 | static unsigned int error_count; | ||
51 | static unsigned int error_rd; | ||
52 | static unsigned int error_wr; | ||
53 | static unsigned int error_missed_count; | ||
54 | |||
55 | static struct kmemcheck_error *error_next_wr(void) | ||
56 | { | ||
57 | struct kmemcheck_error *e; | ||
58 | |||
59 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
60 | ++error_missed_count; | ||
61 | return NULL; | ||
62 | } | ||
63 | |||
64 | e = &error_fifo[error_wr]; | ||
65 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
66 | error_wr = 0; | ||
67 | ++error_count; | ||
68 | return e; | ||
69 | } | ||
70 | |||
71 | static struct kmemcheck_error *error_next_rd(void) | ||
72 | { | ||
73 | struct kmemcheck_error *e; | ||
74 | |||
75 | if (error_count == 0) | ||
76 | return NULL; | ||
77 | |||
78 | e = &error_fifo[error_rd]; | ||
79 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
80 | error_rd = 0; | ||
81 | --error_count; | ||
82 | return e; | ||
83 | } | ||
84 | |||
85 | void kmemcheck_error_recall(void) | ||
86 | { | ||
87 | static const char *desc[] = { | ||
88 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
89 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
90 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
91 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
92 | }; | ||
93 | |||
94 | static const char short_desc[] = { | ||
95 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
96 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
97 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
98 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
99 | }; | ||
100 | |||
101 | struct kmemcheck_error *e; | ||
102 | unsigned int i; | ||
103 | |||
104 | e = error_next_rd(); | ||
105 | if (!e) | ||
106 | return; | ||
107 | |||
108 | switch (e->type) { | ||
109 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
110 | printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
112 | desc[e->state] : "(invalid shadow state)", | ||
113 | (void *) e->address); | ||
114 | |||
115 | printk(KERN_WARNING); | ||
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
117 | printk(KERN_CONT "%02x", e->memory_copy[i]); | ||
118 | printk(KERN_CONT "\n"); | ||
119 | |||
120 | printk(KERN_WARNING); | ||
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
123 | printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); | ||
124 | else | ||
125 | printk(KERN_CONT " ?"); | ||
126 | } | ||
127 | printk(KERN_CONT "\n"); | ||
128 | printk(KERN_WARNING "%*c\n", 2 + 2 | ||
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
130 | break; | ||
131 | case KMEMCHECK_ERROR_BUG: | ||
132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | __show_regs(&e->regs, 1); | ||
137 | print_stack_trace(&e->trace, 0); | ||
138 | } | ||
139 | |||
140 | static void do_wakeup(unsigned long data) | ||
141 | { | ||
142 | while (error_count > 0) | ||
143 | kmemcheck_error_recall(); | ||
144 | |||
145 | if (error_missed_count > 0) { | ||
146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
147 | "the queue was too small\n", error_missed_count); | ||
148 | error_missed_count = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
153 | |||
154 | /* | ||
155 | * Save the context of an error report. | ||
156 | */ | ||
157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
159 | { | ||
160 | static unsigned long prev_ip; | ||
161 | |||
162 | struct kmemcheck_error *e; | ||
163 | void *shadow_copy; | ||
164 | void *memory_copy; | ||
165 | |||
166 | /* Don't report several adjacent errors from the same EIP. */ | ||
167 | if (regs->ip == prev_ip) | ||
168 | return; | ||
169 | prev_ip = regs->ip; | ||
170 | |||
171 | e = error_next_wr(); | ||
172 | if (!e) | ||
173 | return; | ||
174 | |||
175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
176 | |||
177 | e->state = state; | ||
178 | e->address = address; | ||
179 | e->size = size; | ||
180 | |||
181 | /* Save regs */ | ||
182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
183 | |||
184 | /* Save stack trace */ | ||
185 | e->trace.nr_entries = 0; | ||
186 | e->trace.entries = e->trace_entries; | ||
187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
188 | e->trace.skip = 0; | ||
189 | save_stack_trace_regs(regs, &e->trace); | ||
190 | |||
191 | /* Round address down to nearest 16 bytes */ | ||
192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
194 | BUG_ON(!shadow_copy); | ||
195 | |||
196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
197 | |||
198 | kmemcheck_show_addr(address); | ||
199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
201 | kmemcheck_hide_addr(address); | ||
202 | |||
203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Save the context of a kmemcheck bug. | ||
208 | */ | ||
209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
210 | { | ||
211 | struct kmemcheck_error *e; | ||
212 | |||
213 | e = error_next_wr(); | ||
214 | if (!e) | ||
215 | return; | ||
216 | |||
217 | e->type = KMEMCHECK_ERROR_BUG; | ||
218 | |||
219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
220 | |||
221 | e->trace.nr_entries = 0; | ||
222 | e->trace.entries = e->trace_entries; | ||
223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
224 | e->trace.skip = 1; | ||
225 | save_stack_trace(&e->trace); | ||
226 | |||
227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h index 39f80d7a874d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/error.h +++ b/arch/x86/mm/kmemcheck/error.h | |||
@@ -1,16 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
4 | |||
5 | #include <linux/ptrace.h> | ||
6 | |||
7 | #include "shadow.h" | ||
8 | |||
9 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
10 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
11 | |||
12 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
13 | |||
14 | void kmemcheck_error_recall(void); | ||
15 | |||
16 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c deleted file mode 100644 index 4515bae36bbe..000000000000 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ /dev/null | |||
@@ -1,658 +0,0 @@ | |||
1 | /** | ||
2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2) as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kmemcheck.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/page-flags.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/ptrace.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/types.h> | ||
22 | |||
23 | #include <asm/cacheflush.h> | ||
24 | #include <asm/kmemcheck.h> | ||
25 | #include <asm/pgtable.h> | ||
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | #include "error.h" | ||
29 | #include "opcode.h" | ||
30 | #include "pte.h" | ||
31 | #include "selftest.h" | ||
32 | #include "shadow.h" | ||
33 | |||
34 | |||
35 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
36 | # define KMEMCHECK_ENABLED 0 | ||
37 | #endif | ||
38 | |||
39 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
40 | # define KMEMCHECK_ENABLED 1 | ||
41 | #endif | ||
42 | |||
43 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
44 | # define KMEMCHECK_ENABLED 2 | ||
45 | #endif | ||
46 | |||
47 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
48 | |||
49 | int __init kmemcheck_init(void) | ||
50 | { | ||
51 | #ifdef CONFIG_SMP | ||
52 | /* | ||
53 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
54 | * runs before SMP is set up. | ||
55 | */ | ||
56 | if (setup_max_cpus > 1) { | ||
57 | printk(KERN_INFO | ||
58 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
59 | setup_max_cpus = 1; | ||
60 | } | ||
61 | #endif | ||
62 | |||
63 | if (!kmemcheck_selftest()) { | ||
64 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
65 | kmemcheck_enabled = 0; | ||
66 | return -EINVAL; | ||
67 | } | ||
68 | |||
69 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | early_initcall(kmemcheck_init); | ||
74 | |||
75 | /* | ||
76 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
77 | */ | ||
78 | static int __init param_kmemcheck(char *str) | ||
79 | { | ||
80 | int val; | ||
81 | int ret; | ||
82 | |||
83 | if (!str) | ||
84 | return -EINVAL; | ||
85 | |||
86 | ret = kstrtoint(str, 0, &val); | ||
87 | if (ret) | ||
88 | return ret; | ||
89 | kmemcheck_enabled = val; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | early_param("kmemcheck", param_kmemcheck); | ||
94 | |||
95 | int kmemcheck_show_addr(unsigned long address) | ||
96 | { | ||
97 | pte_t *pte; | ||
98 | |||
99 | pte = kmemcheck_pte_lookup(address); | ||
100 | if (!pte) | ||
101 | return 0; | ||
102 | |||
103 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
104 | __flush_tlb_one(address); | ||
105 | return 1; | ||
106 | } | ||
107 | |||
108 | int kmemcheck_hide_addr(unsigned long address) | ||
109 | { | ||
110 | pte_t *pte; | ||
111 | |||
112 | pte = kmemcheck_pte_lookup(address); | ||
113 | if (!pte) | ||
114 | return 0; | ||
115 | |||
116 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
117 | __flush_tlb_one(address); | ||
118 | return 1; | ||
119 | } | ||
120 | |||
121 | struct kmemcheck_context { | ||
122 | bool busy; | ||
123 | int balance; | ||
124 | |||
125 | /* | ||
126 | * There can be at most two memory operands to an instruction, but | ||
127 | * each address can cross a page boundary -- so we may need up to | ||
128 | * four addresses that must be hidden/revealed for each fault. | ||
129 | */ | ||
130 | unsigned long addr[4]; | ||
131 | unsigned long n_addrs; | ||
132 | unsigned long flags; | ||
133 | |||
134 | /* Data size of the instruction that caused a fault. */ | ||
135 | unsigned int size; | ||
136 | }; | ||
137 | |||
138 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
139 | |||
140 | bool kmemcheck_active(struct pt_regs *regs) | ||
141 | { | ||
142 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
143 | |||
144 | return data->balance > 0; | ||
145 | } | ||
146 | |||
147 | /* Save an address that needs to be shown/hidden */ | ||
148 | static void kmemcheck_save_addr(unsigned long addr) | ||
149 | { | ||
150 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
151 | |||
152 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
153 | data->addr[data->n_addrs++] = addr; | ||
154 | } | ||
155 | |||
156 | static unsigned int kmemcheck_show_all(void) | ||
157 | { | ||
158 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
159 | unsigned int i; | ||
160 | unsigned int n; | ||
161 | |||
162 | n = 0; | ||
163 | for (i = 0; i < data->n_addrs; ++i) | ||
164 | n += kmemcheck_show_addr(data->addr[i]); | ||
165 | |||
166 | return n; | ||
167 | } | ||
168 | |||
169 | static unsigned int kmemcheck_hide_all(void) | ||
170 | { | ||
171 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
172 | unsigned int i; | ||
173 | unsigned int n; | ||
174 | |||
175 | n = 0; | ||
176 | for (i = 0; i < data->n_addrs; ++i) | ||
177 | n += kmemcheck_hide_addr(data->addr[i]); | ||
178 | |||
179 | return n; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Called from the #PF handler. | ||
184 | */ | ||
185 | void kmemcheck_show(struct pt_regs *regs) | ||
186 | { | ||
187 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
188 | |||
189 | BUG_ON(!irqs_disabled()); | ||
190 | |||
191 | if (unlikely(data->balance != 0)) { | ||
192 | kmemcheck_show_all(); | ||
193 | kmemcheck_error_save_bug(regs); | ||
194 | data->balance = 0; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * None of the addresses actually belonged to kmemcheck. Note that | ||
200 | * this is not an error. | ||
201 | */ | ||
202 | if (kmemcheck_show_all() == 0) | ||
203 | return; | ||
204 | |||
205 | ++data->balance; | ||
206 | |||
207 | /* | ||
208 | * The IF needs to be cleared as well, so that the faulting | ||
209 | * instruction can run "uninterrupted". Otherwise, we might take | ||
210 | * an interrupt and start executing that before we've had a chance | ||
211 | * to hide the page again. | ||
212 | * | ||
213 | * NOTE: In the rare case of multiple faults, we must not override | ||
214 | * the original flags: | ||
215 | */ | ||
216 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
217 | data->flags = regs->flags; | ||
218 | |||
219 | regs->flags |= X86_EFLAGS_TF; | ||
220 | regs->flags &= ~X86_EFLAGS_IF; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Called from the #DB handler. | ||
225 | */ | ||
226 | void kmemcheck_hide(struct pt_regs *regs) | ||
227 | { | ||
228 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
229 | int n; | ||
230 | |||
231 | BUG_ON(!irqs_disabled()); | ||
232 | |||
233 | if (unlikely(data->balance != 1)) { | ||
234 | kmemcheck_show_all(); | ||
235 | kmemcheck_error_save_bug(regs); | ||
236 | data->n_addrs = 0; | ||
237 | data->balance = 0; | ||
238 | |||
239 | if (!(data->flags & X86_EFLAGS_TF)) | ||
240 | regs->flags &= ~X86_EFLAGS_TF; | ||
241 | if (data->flags & X86_EFLAGS_IF) | ||
242 | regs->flags |= X86_EFLAGS_IF; | ||
243 | return; | ||
244 | } | ||
245 | |||
246 | if (kmemcheck_enabled) | ||
247 | n = kmemcheck_hide_all(); | ||
248 | else | ||
249 | n = kmemcheck_show_all(); | ||
250 | |||
251 | if (n == 0) | ||
252 | return; | ||
253 | |||
254 | --data->balance; | ||
255 | |||
256 | data->n_addrs = 0; | ||
257 | |||
258 | if (!(data->flags & X86_EFLAGS_TF)) | ||
259 | regs->flags &= ~X86_EFLAGS_TF; | ||
260 | if (data->flags & X86_EFLAGS_IF) | ||
261 | regs->flags |= X86_EFLAGS_IF; | ||
262 | } | ||
263 | |||
264 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
265 | { | ||
266 | unsigned int i; | ||
267 | |||
268 | for (i = 0; i < n; ++i) { | ||
269 | unsigned long address; | ||
270 | pte_t *pte; | ||
271 | unsigned int level; | ||
272 | |||
273 | address = (unsigned long) page_address(&p[i]); | ||
274 | pte = lookup_address(address, &level); | ||
275 | BUG_ON(!pte); | ||
276 | BUG_ON(level != PG_LEVEL_4K); | ||
277 | |||
278 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
279 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
280 | __flush_tlb_one(address); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | bool kmemcheck_page_is_tracked(struct page *p) | ||
285 | { | ||
286 | /* This will also check the "hidden" flag of the PTE. */ | ||
287 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
288 | } | ||
289 | |||
290 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
291 | { | ||
292 | unsigned int i; | ||
293 | |||
294 | for (i = 0; i < n; ++i) { | ||
295 | unsigned long address; | ||
296 | pte_t *pte; | ||
297 | unsigned int level; | ||
298 | |||
299 | address = (unsigned long) page_address(&p[i]); | ||
300 | pte = lookup_address(address, &level); | ||
301 | BUG_ON(!pte); | ||
302 | BUG_ON(level != PG_LEVEL_4K); | ||
303 | |||
304 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
305 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
306 | __flush_tlb_one(address); | ||
307 | } | ||
308 | } | ||
309 | |||
310 | /* Access may NOT cross page boundary */ | ||
311 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
312 | unsigned long addr, unsigned int size) | ||
313 | { | ||
314 | void *shadow; | ||
315 | enum kmemcheck_shadow status; | ||
316 | |||
317 | shadow = kmemcheck_shadow_lookup(addr); | ||
318 | if (!shadow) | ||
319 | return; | ||
320 | |||
321 | kmemcheck_save_addr(addr); | ||
322 | status = kmemcheck_shadow_test(shadow, size); | ||
323 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
324 | return; | ||
325 | |||
326 | if (kmemcheck_enabled) | ||
327 | kmemcheck_error_save(status, addr, size, regs); | ||
328 | |||
329 | if (kmemcheck_enabled == 2) | ||
330 | kmemcheck_enabled = 0; | ||
331 | |||
332 | /* Don't warn about it again. */ | ||
333 | kmemcheck_shadow_set(shadow, size); | ||
334 | } | ||
335 | |||
336 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
337 | { | ||
338 | enum kmemcheck_shadow status; | ||
339 | void *shadow; | ||
340 | |||
341 | shadow = kmemcheck_shadow_lookup(addr); | ||
342 | if (!shadow) | ||
343 | return true; | ||
344 | |||
345 | status = kmemcheck_shadow_test_all(shadow, size); | ||
346 | |||
347 | return status == KMEMCHECK_SHADOW_INITIALIZED; | ||
348 | } | ||
349 | |||
350 | /* Access may cross page boundary */ | ||
351 | static void kmemcheck_read(struct pt_regs *regs, | ||
352 | unsigned long addr, unsigned int size) | ||
353 | { | ||
354 | unsigned long page = addr & PAGE_MASK; | ||
355 | unsigned long next_addr = addr + size - 1; | ||
356 | unsigned long next_page = next_addr & PAGE_MASK; | ||
357 | |||
358 | if (likely(page == next_page)) { | ||
359 | kmemcheck_read_strict(regs, addr, size); | ||
360 | return; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * What we do is basically to split the access across the | ||
365 | * two pages and handle each part separately. Yes, this means | ||
366 | * that we may now see reads that are 3 + 5 bytes, for | ||
367 | * example (and if both are uninitialized, there will be two | ||
368 | * reports), but it makes the code a lot simpler. | ||
369 | */ | ||
370 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
371 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
372 | } | ||
373 | |||
374 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
375 | unsigned long addr, unsigned int size) | ||
376 | { | ||
377 | void *shadow; | ||
378 | |||
379 | shadow = kmemcheck_shadow_lookup(addr); | ||
380 | if (!shadow) | ||
381 | return; | ||
382 | |||
383 | kmemcheck_save_addr(addr); | ||
384 | kmemcheck_shadow_set(shadow, size); | ||
385 | } | ||
386 | |||
387 | static void kmemcheck_write(struct pt_regs *regs, | ||
388 | unsigned long addr, unsigned int size) | ||
389 | { | ||
390 | unsigned long page = addr & PAGE_MASK; | ||
391 | unsigned long next_addr = addr + size - 1; | ||
392 | unsigned long next_page = next_addr & PAGE_MASK; | ||
393 | |||
394 | if (likely(page == next_page)) { | ||
395 | kmemcheck_write_strict(regs, addr, size); | ||
396 | return; | ||
397 | } | ||
398 | |||
399 | /* See comment in kmemcheck_read(). */ | ||
400 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
401 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * Copying is hard. We have two addresses, each of which may be split across | ||
406 | * a page (and each page will have different shadow addresses). | ||
407 | */ | ||
408 | static void kmemcheck_copy(struct pt_regs *regs, | ||
409 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
410 | { | ||
411 | uint8_t shadow[8]; | ||
412 | enum kmemcheck_shadow status; | ||
413 | |||
414 | unsigned long page; | ||
415 | unsigned long next_addr; | ||
416 | unsigned long next_page; | ||
417 | |||
418 | uint8_t *x; | ||
419 | unsigned int i; | ||
420 | unsigned int n; | ||
421 | |||
422 | BUG_ON(size > sizeof(shadow)); | ||
423 | |||
424 | page = src_addr & PAGE_MASK; | ||
425 | next_addr = src_addr + size - 1; | ||
426 | next_page = next_addr & PAGE_MASK; | ||
427 | |||
428 | if (likely(page == next_page)) { | ||
429 | /* Same page */ | ||
430 | x = kmemcheck_shadow_lookup(src_addr); | ||
431 | if (x) { | ||
432 | kmemcheck_save_addr(src_addr); | ||
433 | for (i = 0; i < size; ++i) | ||
434 | shadow[i] = x[i]; | ||
435 | } else { | ||
436 | for (i = 0; i < size; ++i) | ||
437 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
438 | } | ||
439 | } else { | ||
440 | n = next_page - src_addr; | ||
441 | BUG_ON(n > sizeof(shadow)); | ||
442 | |||
443 | /* First page */ | ||
444 | x = kmemcheck_shadow_lookup(src_addr); | ||
445 | if (x) { | ||
446 | kmemcheck_save_addr(src_addr); | ||
447 | for (i = 0; i < n; ++i) | ||
448 | shadow[i] = x[i]; | ||
449 | } else { | ||
450 | /* Not tracked */ | ||
451 | for (i = 0; i < n; ++i) | ||
452 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
453 | } | ||
454 | |||
455 | /* Second page */ | ||
456 | x = kmemcheck_shadow_lookup(next_page); | ||
457 | if (x) { | ||
458 | kmemcheck_save_addr(next_page); | ||
459 | for (i = n; i < size; ++i) | ||
460 | shadow[i] = x[i - n]; | ||
461 | } else { | ||
462 | /* Not tracked */ | ||
463 | for (i = n; i < size; ++i) | ||
464 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
465 | } | ||
466 | } | ||
467 | |||
468 | page = dst_addr & PAGE_MASK; | ||
469 | next_addr = dst_addr + size - 1; | ||
470 | next_page = next_addr & PAGE_MASK; | ||
471 | |||
472 | if (likely(page == next_page)) { | ||
473 | /* Same page */ | ||
474 | x = kmemcheck_shadow_lookup(dst_addr); | ||
475 | if (x) { | ||
476 | kmemcheck_save_addr(dst_addr); | ||
477 | for (i = 0; i < size; ++i) { | ||
478 | x[i] = shadow[i]; | ||
479 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
480 | } | ||
481 | } | ||
482 | } else { | ||
483 | n = next_page - dst_addr; | ||
484 | BUG_ON(n > sizeof(shadow)); | ||
485 | |||
486 | /* First page */ | ||
487 | x = kmemcheck_shadow_lookup(dst_addr); | ||
488 | if (x) { | ||
489 | kmemcheck_save_addr(dst_addr); | ||
490 | for (i = 0; i < n; ++i) { | ||
491 | x[i] = shadow[i]; | ||
492 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /* Second page */ | ||
497 | x = kmemcheck_shadow_lookup(next_page); | ||
498 | if (x) { | ||
499 | kmemcheck_save_addr(next_page); | ||
500 | for (i = n; i < size; ++i) { | ||
501 | x[i - n] = shadow[i]; | ||
502 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
503 | } | ||
504 | } | ||
505 | } | ||
506 | |||
507 | status = kmemcheck_shadow_test(shadow, size); | ||
508 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
509 | return; | ||
510 | |||
511 | if (kmemcheck_enabled) | ||
512 | kmemcheck_error_save(status, src_addr, size, regs); | ||
513 | |||
514 | if (kmemcheck_enabled == 2) | ||
515 | kmemcheck_enabled = 0; | ||
516 | } | ||
517 | |||
518 | enum kmemcheck_method { | ||
519 | KMEMCHECK_READ, | ||
520 | KMEMCHECK_WRITE, | ||
521 | }; | ||
522 | |||
523 | static void kmemcheck_access(struct pt_regs *regs, | ||
524 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
525 | { | ||
526 | const uint8_t *insn; | ||
527 | const uint8_t *insn_primary; | ||
528 | unsigned int size; | ||
529 | |||
530 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
531 | |||
532 | /* Recursive fault -- ouch. */ | ||
533 | if (data->busy) { | ||
534 | kmemcheck_show_addr(fallback_address); | ||
535 | kmemcheck_error_save_bug(regs); | ||
536 | return; | ||
537 | } | ||
538 | |||
539 | data->busy = true; | ||
540 | |||
541 | insn = (const uint8_t *) regs->ip; | ||
542 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
543 | |||
544 | kmemcheck_opcode_decode(insn, &size); | ||
545 | |||
546 | switch (insn_primary[0]) { | ||
547 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
548 | /* AND, OR, XOR */ | ||
549 | /* | ||
550 | * Unfortunately, these instructions have to be excluded from | ||
551 | * our regular checking since they access only some (and not | ||
552 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
553 | */ | ||
554 | case 0x80: | ||
555 | case 0x81: | ||
556 | case 0x82: | ||
557 | case 0x83: | ||
558 | switch ((insn_primary[1] >> 3) & 7) { | ||
559 | /* OR */ | ||
560 | case 1: | ||
561 | /* AND */ | ||
562 | case 4: | ||
563 | /* XOR */ | ||
564 | case 6: | ||
565 | kmemcheck_write(regs, fallback_address, size); | ||
566 | goto out; | ||
567 | |||
568 | /* ADD */ | ||
569 | case 0: | ||
570 | /* ADC */ | ||
571 | case 2: | ||
572 | /* SBB */ | ||
573 | case 3: | ||
574 | /* SUB */ | ||
575 | case 5: | ||
576 | /* CMP */ | ||
577 | case 7: | ||
578 | break; | ||
579 | } | ||
580 | break; | ||
581 | #endif | ||
582 | |||
583 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
584 | case 0xa4: | ||
585 | case 0xa5: | ||
586 | /* | ||
587 | * These instructions are special because they take two | ||
588 | * addresses, but we only get one page fault. | ||
589 | */ | ||
590 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
591 | goto out; | ||
592 | |||
593 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
594 | case 0xa6: | ||
595 | case 0xa7: | ||
596 | kmemcheck_read(regs, regs->si, size); | ||
597 | kmemcheck_read(regs, regs->di, size); | ||
598 | goto out; | ||
599 | } | ||
600 | |||
601 | /* | ||
602 | * If the opcode isn't special in any way, we use the data from the | ||
603 | * page fault handler to determine the address and type of memory | ||
604 | * access. | ||
605 | */ | ||
606 | switch (fallback_method) { | ||
607 | case KMEMCHECK_READ: | ||
608 | kmemcheck_read(regs, fallback_address, size); | ||
609 | goto out; | ||
610 | case KMEMCHECK_WRITE: | ||
611 | kmemcheck_write(regs, fallback_address, size); | ||
612 | goto out; | ||
613 | } | ||
614 | |||
615 | out: | ||
616 | data->busy = false; | ||
617 | } | ||
618 | |||
619 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
620 | unsigned long error_code) | ||
621 | { | ||
622 | pte_t *pte; | ||
623 | |||
624 | /* | ||
625 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
626 | * mode or non-kernel code segments will _never_ access kernel | ||
627 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
628 | * invoking kmemcheck for PnP BIOS calls. | ||
629 | */ | ||
630 | if (regs->flags & X86_VM_MASK) | ||
631 | return false; | ||
632 | if (regs->cs != __KERNEL_CS) | ||
633 | return false; | ||
634 | |||
635 | pte = kmemcheck_pte_lookup(address); | ||
636 | if (!pte) | ||
637 | return false; | ||
638 | |||
639 | WARN_ON_ONCE(in_nmi()); | ||
640 | |||
641 | if (error_code & 2) | ||
642 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
643 | else | ||
644 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
645 | |||
646 | kmemcheck_show(regs); | ||
647 | return true; | ||
648 | } | ||
649 | |||
650 | bool kmemcheck_trap(struct pt_regs *regs) | ||
651 | { | ||
652 | if (!kmemcheck_active(regs)) | ||
653 | return false; | ||
654 | |||
655 | /* We're done. */ | ||
656 | kmemcheck_hide(regs); | ||
657 | return true; | ||
658 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index df8109ddf7fe..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -1,107 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/types.h> | ||
3 | |||
4 | #include "opcode.h" | ||
5 | |||
6 | static bool opcode_is_prefix(uint8_t b) | ||
7 | { | ||
8 | return | ||
9 | /* Group 1 */ | ||
10 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
11 | /* Group 2 */ | ||
12 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
13 | || b == 0x64 || b == 0x65 | ||
14 | /* Group 3 */ | ||
15 | || b == 0x66 | ||
16 | /* Group 4 */ | ||
17 | || b == 0x67; | ||
18 | } | ||
19 | |||
20 | #ifdef CONFIG_X86_64 | ||
21 | static bool opcode_is_rex_prefix(uint8_t b) | ||
22 | { | ||
23 | return (b & 0xf0) == 0x40; | ||
24 | } | ||
25 | #else | ||
26 | static bool opcode_is_rex_prefix(uint8_t b) | ||
27 | { | ||
28 | return false; | ||
29 | } | ||
30 | #endif | ||
31 | |||
32 | #define REX_W (1 << 3) | ||
33 | |||
34 | /* | ||
35 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
36 | * load/store that caused our #PF and this should work for all the opcodes | ||
37 | * that we care about. Moreover, the ones who invented this instruction set | ||
38 | * should be shot. | ||
39 | */ | ||
40 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
41 | { | ||
42 | /* Default operand size */ | ||
43 | int operand_size_override = 4; | ||
44 | |||
45 | /* prefixes */ | ||
46 | for (; opcode_is_prefix(*op); ++op) { | ||
47 | if (*op == 0x66) | ||
48 | operand_size_override = 2; | ||
49 | } | ||
50 | |||
51 | /* REX prefix */ | ||
52 | if (opcode_is_rex_prefix(*op)) { | ||
53 | uint8_t rex = *op; | ||
54 | |||
55 | ++op; | ||
56 | if (rex & REX_W) { | ||
57 | switch (*op) { | ||
58 | case 0x63: | ||
59 | *size = 4; | ||
60 | return; | ||
61 | case 0x0f: | ||
62 | ++op; | ||
63 | |||
64 | switch (*op) { | ||
65 | case 0xb6: | ||
66 | case 0xbe: | ||
67 | *size = 1; | ||
68 | return; | ||
69 | case 0xb7: | ||
70 | case 0xbf: | ||
71 | *size = 2; | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | break; | ||
76 | } | ||
77 | |||
78 | *size = 8; | ||
79 | return; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /* escape opcode */ | ||
84 | if (*op == 0x0f) { | ||
85 | ++op; | ||
86 | |||
87 | /* | ||
88 | * This is move with zero-extend and sign-extend, respectively; | ||
89 | * we don't have to think about 0xb6/0xbe, because this is | ||
90 | * already handled in the conditional below. | ||
91 | */ | ||
92 | if (*op == 0xb7 || *op == 0xbf) | ||
93 | operand_size_override = 2; | ||
94 | } | ||
95 | |||
96 | *size = (*op & 1) ? operand_size_override : 1; | ||
97 | } | ||
98 | |||
99 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
100 | { | ||
101 | /* skip prefixes */ | ||
102 | while (opcode_is_prefix(*op)) | ||
103 | ++op; | ||
104 | if (opcode_is_rex_prefix(*op)) | ||
105 | ++op; | ||
106 | return op; | ||
107 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h index 51a1ce94c24a..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/opcode.h +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
@@ -1,10 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | |||
7 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
8 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c index 8a03be90272a..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/pte.c +++ b/arch/x86/mm/kmemcheck/pte.c | |||
@@ -1,23 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/mm.h> | ||
3 | |||
4 | #include <asm/pgtable.h> | ||
5 | |||
6 | #include "pte.h" | ||
7 | |||
8 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
9 | { | ||
10 | pte_t *pte; | ||
11 | unsigned int level; | ||
12 | |||
13 | pte = lookup_address(address, &level); | ||
14 | if (!pte) | ||
15 | return NULL; | ||
16 | if (level != PG_LEVEL_4K) | ||
17 | return NULL; | ||
18 | if (!pte_hidden(*pte)) | ||
19 | return NULL; | ||
20 | |||
21 | return pte; | ||
22 | } | ||
23 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h index b595612382c2..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/pte.h +++ b/arch/x86/mm/kmemcheck/pte.h | |||
@@ -1,11 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
4 | |||
5 | #include <linux/mm.h> | ||
6 | |||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
10 | |||
11 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c index 7ce0be1f99eb..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/selftest.c +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -1,71 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/bug.h> | ||
3 | #include <linux/kernel.h> | ||
4 | |||
5 | #include "opcode.h" | ||
6 | #include "selftest.h" | ||
7 | |||
8 | struct selftest_opcode { | ||
9 | unsigned int expected_size; | ||
10 | const uint8_t *insn; | ||
11 | const char *desc; | ||
12 | }; | ||
13 | |||
14 | static const struct selftest_opcode selftest_opcodes[] = { | ||
15 | /* REP MOVS */ | ||
16 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
17 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
18 | |||
19 | /* MOVZX / MOVZXD */ | ||
20 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
21 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
22 | |||
23 | /* MOVSX / MOVSXD */ | ||
24 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
25 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
26 | |||
27 | #ifdef CONFIG_X86_64 | ||
28 | /* MOVZX / MOVZXD */ | ||
29 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
30 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
31 | |||
32 | /* MOVSX / MOVSXD */ | ||
33 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
34 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
35 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
36 | #endif | ||
37 | }; | ||
38 | |||
39 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
40 | { | ||
41 | unsigned size; | ||
42 | |||
43 | kmemcheck_opcode_decode(op->insn, &size); | ||
44 | |||
45 | if (size == op->expected_size) | ||
46 | return true; | ||
47 | |||
48 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
49 | op->desc, op->expected_size, size); | ||
50 | return false; | ||
51 | } | ||
52 | |||
53 | static bool selftest_opcodes_all(void) | ||
54 | { | ||
55 | bool pass = true; | ||
56 | unsigned int i; | ||
57 | |||
58 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
59 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
60 | |||
61 | return pass; | ||
62 | } | ||
63 | |||
64 | bool kmemcheck_selftest(void) | ||
65 | { | ||
66 | bool pass = true; | ||
67 | |||
68 | pass = pass && selftest_opcodes_all(); | ||
69 | |||
70 | return pass; | ||
71 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h index 8d759aae453d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/selftest.h +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
@@ -1,7 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
3 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
4 | |||
5 | bool kmemcheck_selftest(void); | ||
6 | |||
7 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c deleted file mode 100644 index c2638a7d2c10..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.c +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | #include <linux/kmemcheck.h> | ||
2 | #include <linux/export.h> | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | #include <asm/page.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | #include "pte.h" | ||
9 | #include "shadow.h" | ||
10 | |||
11 | /* | ||
12 | * Return the shadow address for the given address. Returns NULL if the | ||
13 | * address is not tracked. | ||
14 | * | ||
15 | * We need to be extremely careful not to follow any invalid pointers, | ||
16 | * because this function can be called for *any* possible address. | ||
17 | */ | ||
18 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
19 | { | ||
20 | pte_t *pte; | ||
21 | struct page *page; | ||
22 | |||
23 | if (!virt_addr_valid(address)) | ||
24 | return NULL; | ||
25 | |||
26 | pte = kmemcheck_pte_lookup(address); | ||
27 | if (!pte) | ||
28 | return NULL; | ||
29 | |||
30 | page = virt_to_page(address); | ||
31 | if (!page->shadow) | ||
32 | return NULL; | ||
33 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
34 | } | ||
35 | |||
36 | static void mark_shadow(void *address, unsigned int n, | ||
37 | enum kmemcheck_shadow status) | ||
38 | { | ||
39 | unsigned long addr = (unsigned long) address; | ||
40 | unsigned long last_addr = addr + n - 1; | ||
41 | unsigned long page = addr & PAGE_MASK; | ||
42 | unsigned long last_page = last_addr & PAGE_MASK; | ||
43 | unsigned int first_n; | ||
44 | void *shadow; | ||
45 | |||
46 | /* If the memory range crosses a page boundary, stop there. */ | ||
47 | if (page == last_page) | ||
48 | first_n = n; | ||
49 | else | ||
50 | first_n = page + PAGE_SIZE - addr; | ||
51 | |||
52 | shadow = kmemcheck_shadow_lookup(addr); | ||
53 | if (shadow) | ||
54 | memset(shadow, status, first_n); | ||
55 | |||
56 | addr += first_n; | ||
57 | n -= first_n; | ||
58 | |||
59 | /* Do full-page memset()s. */ | ||
60 | while (n >= PAGE_SIZE) { | ||
61 | shadow = kmemcheck_shadow_lookup(addr); | ||
62 | if (shadow) | ||
63 | memset(shadow, status, PAGE_SIZE); | ||
64 | |||
65 | addr += PAGE_SIZE; | ||
66 | n -= PAGE_SIZE; | ||
67 | } | ||
68 | |||
69 | /* Do the remaining page, if any. */ | ||
70 | if (n > 0) { | ||
71 | shadow = kmemcheck_shadow_lookup(addr); | ||
72 | if (shadow) | ||
73 | memset(shadow, status, n); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
78 | { | ||
79 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
80 | } | ||
81 | |||
82 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
83 | { | ||
84 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Fill the shadow memory of the given address such that the memory at that | ||
89 | * address is marked as being initialized. | ||
90 | */ | ||
91 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
92 | { | ||
93 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
96 | |||
97 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
98 | { | ||
99 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
100 | } | ||
101 | |||
102 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
103 | { | ||
104 | unsigned int i; | ||
105 | |||
106 | for (i = 0; i < n; ++i) | ||
107 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
108 | } | ||
109 | |||
110 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
111 | { | ||
112 | unsigned int i; | ||
113 | |||
114 | for (i = 0; i < n; ++i) | ||
115 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
116 | } | ||
117 | |||
118 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
119 | { | ||
120 | unsigned int i; | ||
121 | |||
122 | for (i = 0; i < n; ++i) | ||
123 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
124 | } | ||
125 | |||
126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
127 | { | ||
128 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
129 | uint8_t *x; | ||
130 | unsigned int i; | ||
131 | |||
132 | x = shadow; | ||
133 | |||
134 | /* | ||
135 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
136 | * code to access neighboring bytes. | ||
137 | */ | ||
138 | for (i = 0; i < size; ++i) { | ||
139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
140 | return x[i]; | ||
141 | } | ||
142 | |||
143 | return x[0]; | ||
144 | #else | ||
145 | return kmemcheck_shadow_test_all(shadow, size); | ||
146 | #endif | ||
147 | } | ||
148 | |||
149 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) | ||
150 | { | ||
151 | uint8_t *x; | ||
152 | unsigned int i; | ||
153 | |||
154 | x = shadow; | ||
155 | |||
156 | /* All bytes must be initialized. */ | ||
157 | for (i = 0; i < size; ++i) { | ||
158 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
159 | return x[i]; | ||
160 | } | ||
161 | |||
162 | return x[0]; | ||
163 | } | ||
164 | |||
165 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
166 | { | ||
167 | uint8_t *x; | ||
168 | unsigned int i; | ||
169 | |||
170 | x = shadow; | ||
171 | for (i = 0; i < size; ++i) | ||
172 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
173 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index 49768dc18664..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -1,19 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
4 | |||
5 | enum kmemcheck_shadow { | ||
6 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
7 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
8 | KMEMCHECK_SHADOW_INITIALIZED, | ||
9 | KMEMCHECK_SHADOW_FREED, | ||
10 | }; | ||
11 | |||
12 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
13 | |||
14 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
15 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, | ||
16 | unsigned int size); | ||
17 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
18 | |||
19 | #endif | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3fe68483463c..85cf12219dea 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -753,7 +753,7 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, | |||
753 | 753 | ||
754 | if (!debug_pagealloc_enabled()) | 754 | if (!debug_pagealloc_enabled()) |
755 | spin_unlock(&cpa_lock); | 755 | spin_unlock(&cpa_lock); |
756 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); | 756 | base = alloc_pages(GFP_KERNEL, 0); |
757 | if (!debug_pagealloc_enabled()) | 757 | if (!debug_pagealloc_enabled()) |
758 | spin_lock(&cpa_lock); | 758 | spin_lock(&cpa_lock); |
759 | if (!base) | 759 | if (!base) |
@@ -904,7 +904,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) | |||
904 | 904 | ||
905 | static int alloc_pte_page(pmd_t *pmd) | 905 | static int alloc_pte_page(pmd_t *pmd) |
906 | { | 906 | { |
907 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | 907 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); |
908 | if (!pte) | 908 | if (!pte) |
909 | return -1; | 909 | return -1; |
910 | 910 | ||
@@ -914,7 +914,7 @@ static int alloc_pte_page(pmd_t *pmd) | |||
914 | 914 | ||
915 | static int alloc_pmd_page(pud_t *pud) | 915 | static int alloc_pmd_page(pud_t *pud) |
916 | { | 916 | { |
917 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | 917 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); |
918 | if (!pmd) | 918 | if (!pmd) |
919 | return -1; | 919 | return -1; |
920 | 920 | ||
@@ -1120,7 +1120,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | |||
1120 | pgd_entry = cpa->pgd + pgd_index(addr); | 1120 | pgd_entry = cpa->pgd + pgd_index(addr); |
1121 | 1121 | ||
1122 | if (pgd_none(*pgd_entry)) { | 1122 | if (pgd_none(*pgd_entry)) { |
1123 | p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | 1123 | p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); |
1124 | if (!p4d) | 1124 | if (!p4d) |
1125 | return -1; | 1125 | return -1; |
1126 | 1126 | ||
@@ -1132,7 +1132,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | |||
1132 | */ | 1132 | */ |
1133 | p4d = p4d_offset(pgd_entry, addr); | 1133 | p4d = p4d_offset(pgd_entry, addr); |
1134 | if (p4d_none(*p4d)) { | 1134 | if (p4d_none(*p4d)) { |
1135 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | 1135 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL); |
1136 | if (!pud) | 1136 | if (!pud) |
1137 | return -1; | 1137 | return -1; |
1138 | 1138 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 17ebc5a978cc..96d456a94b03 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <asm/fixmap.h> | 7 | #include <asm/fixmap.h> |
8 | #include <asm/mtrr.h> | 8 | #include <asm/mtrr.h> |
9 | 9 | ||
10 | #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO) | 10 | #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) |
11 | 11 | ||
12 | #ifdef CONFIG_HIGHPTE | 12 | #ifdef CONFIG_HIGHPTE |
13 | #define PGALLOC_USER_GFP __GFP_HIGHMEM | 13 | #define PGALLOC_USER_GFP __GFP_HIGHMEM |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 9e4ee5b04b2d..6a151ce70e86 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -207,7 +207,7 @@ int __init efi_alloc_page_tables(void) | |||
207 | if (efi_enabled(EFI_OLD_MEMMAP)) | 207 | if (efi_enabled(EFI_OLD_MEMMAP)) |
208 | return 0; | 208 | return 0; |
209 | 209 | ||
210 | gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; | 210 | gfp_mask = GFP_KERNEL | __GFP_ZERO; |
211 | efi_pgd = (pgd_t *)__get_free_page(gfp_mask); | 211 | efi_pgd = (pgd_t *)__get_free_page(gfp_mask); |
212 | if (!efi_pgd) | 212 | if (!efi_pgd) |
213 | return -ENOMEM; | 213 | return -ENOMEM; |
diff --git a/block/blk-mq.c b/block/blk-mq.c index b600463791ec..11097477eeab 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -2047,7 +2047,7 @@ static int blk_mq_init_hctx(struct request_queue *q, | |||
2047 | * Allocate space for all possible cpus to avoid allocation at | 2047 | * Allocate space for all possible cpus to avoid allocation at |
2048 | * runtime | 2048 | * runtime |
2049 | */ | 2049 | */ |
2050 | hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), | 2050 | hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), |
2051 | GFP_KERNEL, node); | 2051 | GFP_KERNEL, node); |
2052 | if (!hctx->ctxs) | 2052 | if (!hctx->ctxs) |
2053 | goto unregister_cpu_notifier; | 2053 | goto unregister_cpu_notifier; |
diff --git a/crypto/xor.c b/crypto/xor.c index 263af9fb45ea..bce9fe7af40a 100644 --- a/crypto/xor.c +++ b/crypto/xor.c | |||
@@ -122,12 +122,7 @@ calibrate_xor_blocks(void) | |||
122 | goto out; | 122 | goto out; |
123 | } | 123 | } |
124 | 124 | ||
125 | /* | 125 | b1 = (void *) __get_free_pages(GFP_KERNEL, 2); |
126 | * Note: Since the memory is not actually used for _anything_ but to | ||
127 | * test the XOR speed, we don't really want kmemcheck to warn about | ||
128 | * reading uninitialized bytes here. | ||
129 | */ | ||
130 | b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2); | ||
131 | if (!b1) { | 126 | if (!b1) { |
132 | printk(KERN_WARNING "xor: Yikes! No memory available.\n"); | 127 | printk(KERN_WARNING "xor: Yikes! No memory available.\n"); |
133 | return -ENOMEM; | 128 | return -ENOMEM; |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c1cf87718c2e..588360d79fca 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/radix-tree.h> | 20 | #include <linux/radix-tree.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/backing-dev.h> | ||
23 | #ifdef CONFIG_BLK_DEV_RAM_DAX | 24 | #ifdef CONFIG_BLK_DEV_RAM_DAX |
24 | #include <linux/pfn_t.h> | 25 | #include <linux/pfn_t.h> |
25 | #include <linux/dax.h> | 26 | #include <linux/dax.h> |
@@ -448,6 +449,7 @@ static struct brd_device *brd_alloc(int i) | |||
448 | disk->flags = GENHD_FL_EXT_DEVT; | 449 | disk->flags = GENHD_FL_EXT_DEVT; |
449 | sprintf(disk->disk_name, "ram%d", i); | 450 | sprintf(disk->disk_name, "ram%d", i); |
450 | set_capacity(disk, rd_size * 2); | 451 | set_capacity(disk, rd_size * 2); |
452 | disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; | ||
451 | 453 | ||
452 | #ifdef CONFIG_BLK_DEV_RAM_DAX | 454 | #ifdef CONFIG_BLK_DEV_RAM_DAX |
453 | queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); | 455 | queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); |
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 5b8992beffec..4ed0a78fdc09 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c | |||
@@ -23,15 +23,15 @@ static const char * const backends[] = { | |||
23 | #if IS_ENABLED(CONFIG_CRYPTO_LZ4) | 23 | #if IS_ENABLED(CONFIG_CRYPTO_LZ4) |
24 | "lz4", | 24 | "lz4", |
25 | #endif | 25 | #endif |
26 | #if IS_ENABLED(CONFIG_CRYPTO_DEFLATE) | ||
27 | "deflate", | ||
28 | #endif | ||
29 | #if IS_ENABLED(CONFIG_CRYPTO_LZ4HC) | 26 | #if IS_ENABLED(CONFIG_CRYPTO_LZ4HC) |
30 | "lz4hc", | 27 | "lz4hc", |
31 | #endif | 28 | #endif |
32 | #if IS_ENABLED(CONFIG_CRYPTO_842) | 29 | #if IS_ENABLED(CONFIG_CRYPTO_842) |
33 | "842", | 30 | "842", |
34 | #endif | 31 | #endif |
32 | #if IS_ENABLED(CONFIG_CRYPTO_ZSTD) | ||
33 | "zstd", | ||
34 | #endif | ||
35 | NULL | 35 | NULL |
36 | }; | 36 | }; |
37 | 37 | ||
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f149d3e61234..d70eba30003a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c | |||
@@ -122,14 +122,6 @@ static inline bool is_partial_io(struct bio_vec *bvec) | |||
122 | } | 122 | } |
123 | #endif | 123 | #endif |
124 | 124 | ||
125 | static void zram_revalidate_disk(struct zram *zram) | ||
126 | { | ||
127 | revalidate_disk(zram->disk); | ||
128 | /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ | ||
129 | zram->disk->queue->backing_dev_info->capabilities |= | ||
130 | BDI_CAP_STABLE_WRITES; | ||
131 | } | ||
132 | |||
133 | /* | 125 | /* |
134 | * Check if request is within bounds and aligned on zram logical blocks. | 126 | * Check if request is within bounds and aligned on zram logical blocks. |
135 | */ | 127 | */ |
@@ -436,7 +428,7 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry) | |||
436 | WARN_ON_ONCE(!was_set); | 428 | WARN_ON_ONCE(!was_set); |
437 | } | 429 | } |
438 | 430 | ||
439 | void zram_page_end_io(struct bio *bio) | 431 | static void zram_page_end_io(struct bio *bio) |
440 | { | 432 | { |
441 | struct page *page = bio->bi_io_vec[0].bv_page; | 433 | struct page *page = bio->bi_io_vec[0].bv_page; |
442 | 434 | ||
@@ -1373,7 +1365,8 @@ static ssize_t disksize_store(struct device *dev, | |||
1373 | zram->comp = comp; | 1365 | zram->comp = comp; |
1374 | zram->disksize = disksize; | 1366 | zram->disksize = disksize; |
1375 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | 1367 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); |
1376 | zram_revalidate_disk(zram); | 1368 | |
1369 | revalidate_disk(zram->disk); | ||
1377 | up_write(&zram->init_lock); | 1370 | up_write(&zram->init_lock); |
1378 | 1371 | ||
1379 | return len; | 1372 | return len; |
@@ -1420,7 +1413,7 @@ static ssize_t reset_store(struct device *dev, | |||
1420 | /* Make sure all the pending I/O are finished */ | 1413 | /* Make sure all the pending I/O are finished */ |
1421 | fsync_bdev(bdev); | 1414 | fsync_bdev(bdev); |
1422 | zram_reset_device(zram); | 1415 | zram_reset_device(zram); |
1423 | zram_revalidate_disk(zram); | 1416 | revalidate_disk(zram->disk); |
1424 | bdput(bdev); | 1417 | bdput(bdev); |
1425 | 1418 | ||
1426 | mutex_lock(&bdev->bd_mutex); | 1419 | mutex_lock(&bdev->bd_mutex); |
@@ -1539,6 +1532,7 @@ static int zram_add(void) | |||
1539 | /* zram devices sort of resembles non-rotational disks */ | 1532 | /* zram devices sort of resembles non-rotational disks */ |
1540 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); | 1533 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); |
1541 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); | 1534 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); |
1535 | |||
1542 | /* | 1536 | /* |
1543 | * To ensure that we always get PAGE_SIZE aligned | 1537 | * To ensure that we always get PAGE_SIZE aligned |
1544 | * and n*PAGE_SIZED sized I/O requests. | 1538 | * and n*PAGE_SIZED sized I/O requests. |
@@ -1563,6 +1557,8 @@ static int zram_add(void) | |||
1563 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | 1557 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) |
1564 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); | 1558 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); |
1565 | 1559 | ||
1560 | zram->disk->queue->backing_dev_info->capabilities |= | ||
1561 | (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); | ||
1566 | add_disk(zram->disk); | 1562 | add_disk(zram->disk); |
1567 | 1563 | ||
1568 | ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, | 1564 | ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, |
diff --git a/drivers/char/random.c b/drivers/char/random.c index 6c7ccac2679e..ec42c8bb9b0d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
@@ -259,7 +259,6 @@ | |||
259 | #include <linux/cryptohash.h> | 259 | #include <linux/cryptohash.h> |
260 | #include <linux/fips.h> | 260 | #include <linux/fips.h> |
261 | #include <linux/ptrace.h> | 261 | #include <linux/ptrace.h> |
262 | #include <linux/kmemcheck.h> | ||
263 | #include <linux/workqueue.h> | 262 | #include <linux/workqueue.h> |
264 | #include <linux/irq.h> | 263 | #include <linux/irq.h> |
265 | #include <linux/syscalls.h> | 264 | #include <linux/syscalls.h> |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 60d8bedb694d..cd664832f9e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -553,8 +553,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
553 | * invalidated it. Free it and try again | 553 | * invalidated it. Free it and try again |
554 | */ | 554 | */ |
555 | release_pages(e->user_pages, | 555 | release_pages(e->user_pages, |
556 | e->robj->tbo.ttm->num_pages, | 556 | e->robj->tbo.ttm->num_pages); |
557 | false); | ||
558 | kvfree(e->user_pages); | 557 | kvfree(e->user_pages); |
559 | e->user_pages = NULL; | 558 | e->user_pages = NULL; |
560 | } | 559 | } |
@@ -691,8 +690,7 @@ error_free_pages: | |||
691 | continue; | 690 | continue; |
692 | 691 | ||
693 | release_pages(e->user_pages, | 692 | release_pages(e->user_pages, |
694 | e->robj->tbo.ttm->num_pages, | 693 | e->robj->tbo.ttm->num_pages); |
695 | false); | ||
696 | kvfree(e->user_pages); | 694 | kvfree(e->user_pages); |
697 | } | 695 | } |
698 | } | 696 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6149a47fe63d..0bda8f2a188a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -347,7 +347,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, | |||
347 | return 0; | 347 | return 0; |
348 | 348 | ||
349 | free_pages: | 349 | free_pages: |
350 | release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); | 350 | release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); |
351 | 351 | ||
352 | unlock_mmap_sem: | 352 | unlock_mmap_sem: |
353 | up_read(¤t->mm->mmap_sem); | 353 | up_read(¤t->mm->mmap_sem); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index bc746131987f..d792959fac43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -659,7 +659,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
659 | return 0; | 659 | return 0; |
660 | 660 | ||
661 | release_pages: | 661 | release_pages: |
662 | release_pages(pages, pinned, 0); | 662 | release_pages(pages, pinned); |
663 | return r; | 663 | return r; |
664 | } | 664 | } |
665 | 665 | ||
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 57881167ccd2..bcc8c2d7c7c9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c | |||
@@ -779,7 +779,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages( | |||
779 | up_read(&mm->mmap_sem); | 779 | up_read(&mm->mmap_sem); |
780 | 780 | ||
781 | if (ret < 0) { | 781 | if (ret < 0) { |
782 | release_pages(pvec, pinned, 0); | 782 | release_pages(pvec, pinned); |
783 | kvfree(pvec); | 783 | kvfree(pvec); |
784 | return ERR_PTR(ret); | 784 | return ERR_PTR(ret); |
785 | } | 785 | } |
@@ -852,7 +852,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) | |||
852 | } | 852 | } |
853 | } | 853 | } |
854 | 854 | ||
855 | release_pages(pvec, pinned, 0); | 855 | release_pages(pvec, pinned); |
856 | kvfree(pvec); | 856 | kvfree(pvec); |
857 | 857 | ||
858 | work = kmalloc(sizeof(*work), GFP_KERNEL); | 858 | work = kmalloc(sizeof(*work), GFP_KERNEL); |
@@ -886,7 +886,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) | |||
886 | if (etnaviv_obj->pages) { | 886 | if (etnaviv_obj->pages) { |
887 | int npages = etnaviv_obj->base.size >> PAGE_SHIFT; | 887 | int npages = etnaviv_obj->base.size >> PAGE_SHIFT; |
888 | 888 | ||
889 | release_pages(etnaviv_obj->pages, npages, 0); | 889 | release_pages(etnaviv_obj->pages, npages); |
890 | kvfree(etnaviv_obj->pages); | 890 | kvfree(etnaviv_obj->pages); |
891 | } | 891 | } |
892 | put_task_struct(etnaviv_obj->userptr.task); | 892 | put_task_struct(etnaviv_obj->userptr.task); |
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad524cb0f6fc..7982ad817c11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c | |||
@@ -1859,7 +1859,7 @@ static void i915_address_space_init(struct i915_address_space *vm, | |||
1859 | INIT_LIST_HEAD(&vm->unbound_list); | 1859 | INIT_LIST_HEAD(&vm->unbound_list); |
1860 | 1860 | ||
1861 | list_add_tail(&vm->global_link, &dev_priv->vm_list); | 1861 | list_add_tail(&vm->global_link, &dev_priv->vm_list); |
1862 | pagevec_init(&vm->free_pages, false); | 1862 | pagevec_init(&vm->free_pages); |
1863 | } | 1863 | } |
1864 | 1864 | ||
1865 | static void i915_address_space_fini(struct i915_address_space *vm) | 1865 | static void i915_address_space_fini(struct i915_address_space *vm) |
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 709efe2357ea..aa22361bd5a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c | |||
@@ -554,7 +554,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) | |||
554 | } | 554 | } |
555 | mutex_unlock(&obj->mm.lock); | 555 | mutex_unlock(&obj->mm.lock); |
556 | 556 | ||
557 | release_pages(pvec, pinned, 0); | 557 | release_pages(pvec, pinned); |
558 | kvfree(pvec); | 558 | kvfree(pvec); |
559 | 559 | ||
560 | i915_gem_object_put(obj); | 560 | i915_gem_object_put(obj); |
@@ -668,7 +668,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) | |||
668 | __i915_gem_userptr_set_active(obj, true); | 668 | __i915_gem_userptr_set_active(obj, true); |
669 | 669 | ||
670 | if (IS_ERR(pages)) | 670 | if (IS_ERR(pages)) |
671 | release_pages(pvec, pinned, 0); | 671 | release_pages(pvec, pinned); |
672 | kvfree(pvec); | 672 | kvfree(pvec); |
673 | 673 | ||
674 | return pages; | 674 | return pages; |
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index bf69bf9086bf..1fdfc7a46072 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c | |||
@@ -597,7 +597,7 @@ release_sg: | |||
597 | kfree(ttm->sg); | 597 | kfree(ttm->sg); |
598 | 598 | ||
599 | release_pages: | 599 | release_pages: |
600 | release_pages(ttm->pages, pinned, 0); | 600 | release_pages(ttm->pages, pinned); |
601 | return r; | 601 | return r; |
602 | } | 602 | } |
603 | 603 | ||
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 5243ad30dfc0..85dfbba427f6 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c | |||
@@ -1667,8 +1667,9 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) | |||
1667 | } | 1667 | } |
1668 | if (!rcd->rcvegrbuf_phys) { | 1668 | if (!rcd->rcvegrbuf_phys) { |
1669 | rcd->rcvegrbuf_phys = | 1669 | rcd->rcvegrbuf_phys = |
1670 | kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), | 1670 | kmalloc_array_node(chunk, |
1671 | GFP_KERNEL, rcd->node_id); | 1671 | sizeof(rcd->rcvegrbuf_phys[0]), |
1672 | GFP_KERNEL, rcd->node_id); | ||
1672 | if (!rcd->rcvegrbuf_phys) | 1673 | if (!rcd->rcvegrbuf_phys) |
1673 | goto bail_rcvegrbuf; | 1674 | goto bail_rcvegrbuf; |
1674 | } | 1675 | } |
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 410025a19729..9177df60742a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c | |||
@@ -238,7 +238,7 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) | |||
238 | rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; | 238 | rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; |
239 | rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); | 239 | rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); |
240 | rdi->qp_dev->qp_table = | 240 | rdi->qp_dev->qp_table = |
241 | kmalloc_node(rdi->qp_dev->qp_table_size * | 241 | kmalloc_array_node(rdi->qp_dev->qp_table_size, |
242 | sizeof(*rdi->qp_dev->qp_table), | 242 | sizeof(*rdi->qp_dev->qp_table), |
243 | GFP_KERNEL, rdi->dparms.node); | 243 | GFP_KERNEL, rdi->dparms.node); |
244 | if (!rdi->qp_dev->qp_table) | 244 | if (!rdi->qp_dev->qp_table) |
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index 1922cb8f6b88..1c5b7aec13d4 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/kmemcheck.h> | ||
19 | #include <linux/ctype.h> | 18 | #include <linux/ctype.h> |
20 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
21 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
@@ -904,7 +903,6 @@ struct c2port_device *c2port_device_register(char *name, | |||
904 | return ERR_PTR(-EINVAL); | 903 | return ERR_PTR(-EINVAL); |
905 | 904 | ||
906 | c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); | 905 | c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); |
907 | kmemcheck_annotate_bitfield(c2dev, flags); | ||
908 | if (unlikely(!c2dev)) | 906 | if (unlikely(!c2dev)) |
909 | return ERR_PTR(-ENOMEM); | 907 | return ERR_PTR(-ENOMEM); |
910 | 908 | ||
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 5417e4da64ca..7451922c209d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c | |||
@@ -517,7 +517,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) | |||
517 | 517 | ||
518 | 518 | ||
519 | rc = ena_alloc_rx_page(rx_ring, rx_info, | 519 | rc = ena_alloc_rx_page(rx_ring, rx_info, |
520 | __GFP_COLD | GFP_ATOMIC | __GFP_COMP); | 520 | GFP_ATOMIC | __GFP_COMP); |
521 | if (unlikely(rc < 0)) { | 521 | if (unlikely(rc < 0)) { |
522 | netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, | 522 | netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, |
523 | "failed to alloc buffer for rx queue %d\n", | 523 | "failed to alloc buffer for rx queue %d\n", |
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 45d92304068e..cc1e4f820e64 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c | |||
@@ -295,7 +295,7 @@ again: | |||
295 | order = alloc_order; | 295 | order = alloc_order; |
296 | 296 | ||
297 | /* Try to obtain pages, decreasing order if necessary */ | 297 | /* Try to obtain pages, decreasing order if necessary */ |
298 | gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN; | 298 | gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; |
299 | while (order >= 0) { | 299 | while (order >= 0) { |
300 | pages = alloc_pages_node(node, gfp, order); | 300 | pages = alloc_pages_node(node, gfp, order); |
301 | if (pages) | 301 | if (pages) |
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0654e0c76bc2..519ca6534b85 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c | |||
@@ -304,8 +304,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self) | |||
304 | buff->flags = 0U; | 304 | buff->flags = 0U; |
305 | buff->len = AQ_CFG_RX_FRAME_MAX; | 305 | buff->len = AQ_CFG_RX_FRAME_MAX; |
306 | 306 | ||
307 | buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD | | 307 | buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order); |
308 | __GFP_COMP, pages_order); | ||
309 | if (!buff->page) { | 308 | if (!buff->page) { |
310 | err = -ENOMEM; | 309 | err = -ENOMEM; |
311 | goto err_exit; | 310 | goto err_exit; |
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 433f3619de8f..f2d1a076a038 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h | |||
@@ -198,7 +198,7 @@ static inline void | |||
198 | struct sk_buff *skb; | 198 | struct sk_buff *skb; |
199 | struct octeon_skb_page_info *skb_pg_info; | 199 | struct octeon_skb_page_info *skb_pg_info; |
200 | 200 | ||
201 | page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 201 | page = alloc_page(GFP_ATOMIC); |
202 | if (unlikely(!page)) | 202 | if (unlikely(!page)) |
203 | return NULL; | 203 | return NULL; |
204 | 204 | ||
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 92aec17f4b4d..85e28efcda33 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c | |||
@@ -193,7 +193,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) | |||
193 | 193 | ||
194 | if (mlx4_en_prepare_rx_desc(priv, ring, | 194 | if (mlx4_en_prepare_rx_desc(priv, ring, |
195 | ring->actual_size, | 195 | ring->actual_size, |
196 | GFP_KERNEL | __GFP_COLD)) { | 196 | GFP_KERNEL)) { |
197 | if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { | 197 | if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { |
198 | en_err(priv, "Failed to allocate enough rx buffers\n"); | 198 | en_err(priv, "Failed to allocate enough rx buffers\n"); |
199 | return -ENOMEM; | 199 | return -ENOMEM; |
@@ -551,8 +551,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, | |||
551 | do { | 551 | do { |
552 | if (mlx4_en_prepare_rx_desc(priv, ring, | 552 | if (mlx4_en_prepare_rx_desc(priv, ring, |
553 | ring->prod & ring->size_mask, | 553 | ring->prod & ring->size_mask, |
554 | GFP_ATOMIC | __GFP_COLD | | 554 | GFP_ATOMIC | __GFP_MEMALLOC)) |
555 | __GFP_MEMALLOC)) | ||
556 | break; | 555 | break; |
557 | ring->prod++; | 556 | ring->prod++; |
558 | } while (likely(--missing)); | 557 | } while (likely(--missing)); |
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 232044b1b7aa..1a603fdd9e80 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c | |||
@@ -1185,7 +1185,7 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) | |||
1185 | } else { | 1185 | } else { |
1186 | struct page *page; | 1186 | struct page *page; |
1187 | 1187 | ||
1188 | page = alloc_page(GFP_KERNEL | __GFP_COLD); | 1188 | page = alloc_page(GFP_KERNEL); |
1189 | frag = page ? page_address(page) : NULL; | 1189 | frag = page ? page_address(page) : NULL; |
1190 | } | 1190 | } |
1191 | if (!frag) { | 1191 | if (!frag) { |
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 29fea74bff2e..7b97a9969046 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c | |||
@@ -1092,8 +1092,7 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring, | |||
1092 | { | 1092 | { |
1093 | if (!rx_ring->pg_chunk.page) { | 1093 | if (!rx_ring->pg_chunk.page) { |
1094 | u64 map; | 1094 | u64 map; |
1095 | rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP | | 1095 | rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC, |
1096 | GFP_ATOMIC, | ||
1097 | qdev->lbq_buf_order); | 1096 | qdev->lbq_buf_order); |
1098 | if (unlikely(!rx_ring->pg_chunk.page)) { | 1097 | if (unlikely(!rx_ring->pg_chunk.page)) { |
1099 | netif_err(qdev, drv, qdev->ndev, | 1098 | netif_err(qdev, drv, qdev->ndev, |
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 382019b302db..02456ed13a7d 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c | |||
@@ -163,7 +163,7 @@ static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic) | |||
163 | do { | 163 | do { |
164 | page = ef4_reuse_page(rx_queue); | 164 | page = ef4_reuse_page(rx_queue); |
165 | if (page == NULL) { | 165 | if (page == NULL) { |
166 | page = alloc_pages(__GFP_COLD | __GFP_COMP | | 166 | page = alloc_pages(__GFP_COMP | |
167 | (atomic ? GFP_ATOMIC : GFP_KERNEL), | 167 | (atomic ? GFP_ATOMIC : GFP_KERNEL), |
168 | efx->rx_buffer_order); | 168 | efx->rx_buffer_order); |
169 | if (unlikely(page == NULL)) | 169 | if (unlikely(page == NULL)) |
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 8cb60513dca2..cfe76aad79ee 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c | |||
@@ -163,7 +163,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) | |||
163 | do { | 163 | do { |
164 | page = efx_reuse_page(rx_queue); | 164 | page = efx_reuse_page(rx_queue); |
165 | if (page == NULL) { | 165 | if (page == NULL) { |
166 | page = alloc_pages(__GFP_COLD | __GFP_COMP | | 166 | page = alloc_pages(__GFP_COMP | |
167 | (atomic ? GFP_ATOMIC : GFP_KERNEL), | 167 | (atomic ? GFP_ATOMIC : GFP_KERNEL), |
168 | efx->rx_buffer_order); | 168 | efx->rx_buffer_order); |
169 | if (unlikely(page == NULL)) | 169 | if (unlikely(page == NULL)) |
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c index e9672b1f9968..031cf9c3435a 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c | |||
@@ -335,7 +335,7 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata, | |||
335 | dma_addr_t pages_dma; | 335 | dma_addr_t pages_dma; |
336 | 336 | ||
337 | /* Try to obtain pages, decreasing order if necessary */ | 337 | /* Try to obtain pages, decreasing order if necessary */ |
338 | gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; | 338 | gfp |= __GFP_COMP | __GFP_NOWARN; |
339 | while (order >= 0) { | 339 | while (order >= 0) { |
340 | pages = alloc_pages(gfp, order); | 340 | pages = alloc_pages(gfp, order); |
341 | if (pages) | 341 | if (pages) |
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 15e2e3031d36..ed58c746e4af 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c | |||
@@ -906,7 +906,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) | |||
906 | sw_data[0] = (u32)bufptr; | 906 | sw_data[0] = (u32)bufptr; |
907 | } else { | 907 | } else { |
908 | /* Allocate a secondary receive queue entry */ | 908 | /* Allocate a secondary receive queue entry */ |
909 | page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD); | 909 | page = alloc_page(GFP_ATOMIC | GFP_DMA); |
910 | if (unlikely(!page)) { | 910 | if (unlikely(!page)) { |
911 | dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n"); | 911 | dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n"); |
912 | goto fail; | 912 | goto fail; |
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index edf984406ba0..19a985ef9104 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c | |||
@@ -1030,7 +1030,6 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, | |||
1030 | int err; | 1030 | int err; |
1031 | bool oom; | 1031 | bool oom; |
1032 | 1032 | ||
1033 | gfp |= __GFP_COLD; | ||
1034 | do { | 1033 | do { |
1035 | if (vi->mergeable_rx_bufs) | 1034 | if (vi->mergeable_rx_bufs) |
1036 | err = add_recvbuf_mergeable(vi, rq, gfp); | 1035 | err = add_recvbuf_mergeable(vi, rq, gfp); |
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index d5612bd1cc81..e949e3302af4 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/ndctl.h> | 23 | #include <linux/ndctl.h> |
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/nd.h> | 25 | #include <linux/nd.h> |
26 | #include <linux/backing-dev.h> | ||
26 | #include "btt.h" | 27 | #include "btt.h" |
27 | #include "nd.h" | 28 | #include "nd.h" |
28 | 29 | ||
@@ -1402,6 +1403,8 @@ static int btt_blk_init(struct btt *btt) | |||
1402 | btt->btt_disk->private_data = btt; | 1403 | btt->btt_disk->private_data = btt; |
1403 | btt->btt_disk->queue = btt->btt_queue; | 1404 | btt->btt_disk->queue = btt->btt_queue; |
1404 | btt->btt_disk->flags = GENHD_FL_EXT_DEVT; | 1405 | btt->btt_disk->flags = GENHD_FL_EXT_DEVT; |
1406 | btt->btt_disk->queue->backing_dev_info->capabilities |= | ||
1407 | BDI_CAP_SYNCHRONOUS_IO; | ||
1405 | 1408 | ||
1406 | blk_queue_make_request(btt->btt_queue, btt_make_request); | 1409 | blk_queue_make_request(btt->btt_queue, btt_make_request); |
1407 | blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); | 1410 | blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); |
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 39dfd7affa31..7fbc5c5dc8e1 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/uio.h> | 31 | #include <linux/uio.h> |
32 | #include <linux/dax.h> | 32 | #include <linux/dax.h> |
33 | #include <linux/nd.h> | 33 | #include <linux/nd.h> |
34 | #include <linux/backing-dev.h> | ||
34 | #include "pmem.h" | 35 | #include "pmem.h" |
35 | #include "pfn.h" | 36 | #include "pfn.h" |
36 | #include "nd.h" | 37 | #include "nd.h" |
@@ -394,6 +395,7 @@ static int pmem_attach_disk(struct device *dev, | |||
394 | disk->fops = &pmem_fops; | 395 | disk->fops = &pmem_fops; |
395 | disk->queue = q; | 396 | disk->queue = q; |
396 | disk->flags = GENHD_FL_EXT_DEVT; | 397 | disk->flags = GENHD_FL_EXT_DEVT; |
398 | disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; | ||
397 | nvdimm_namespace_disk_name(ndns, disk->disk_name); | 399 | nvdimm_namespace_disk_name(ndns, disk->disk_name); |
398 | set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) | 400 | set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) |
399 | / 512); | 401 | / 512); |
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index 9e538a59f09d..03e55bca4ada 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c | |||
@@ -1152,7 +1152,7 @@ static int mdc_read_page_remote(void *data, struct page *page0) | |||
1152 | } | 1152 | } |
1153 | 1153 | ||
1154 | for (npages = 1; npages < max_pages; npages++) { | 1154 | for (npages = 1; npages < max_pages; npages++) { |
1155 | page = page_cache_alloc_cold(inode->i_mapping); | 1155 | page = page_cache_alloc(inode->i_mapping); |
1156 | if (!page) | 1156 | if (!page) |
1157 | break; | 1157 | break; |
1158 | page_pool[npages] = page; | 1158 | page_pool[npages] = page; |
diff --git a/fs/afs/write.c b/fs/afs/write.c index 106e43db1115..11dd0526b96b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -308,7 +308,7 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, | |||
308 | _enter("{%x:%u},%lx-%lx", | 308 | _enter("{%x:%u},%lx-%lx", |
309 | vnode->fid.vid, vnode->fid.vnode, first, last); | 309 | vnode->fid.vid, vnode->fid.vnode, first, last); |
310 | 310 | ||
311 | pagevec_init(&pv, 0); | 311 | pagevec_init(&pv); |
312 | 312 | ||
313 | do { | 313 | do { |
314 | _debug("kill %lx-%lx", first, last); | 314 | _debug("kill %lx-%lx", first, last); |
@@ -497,20 +497,13 @@ static int afs_writepages_region(struct address_space *mapping, | |||
497 | _enter(",,%lx,%lx,", index, end); | 497 | _enter(",,%lx,%lx,", index, end); |
498 | 498 | ||
499 | do { | 499 | do { |
500 | n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, | 500 | n = find_get_pages_range_tag(mapping, &index, end, |
501 | 1, &page); | 501 | PAGECACHE_TAG_DIRTY, 1, &page); |
502 | if (!n) | 502 | if (!n) |
503 | break; | 503 | break; |
504 | 504 | ||
505 | _debug("wback %lx", page->index); | 505 | _debug("wback %lx", page->index); |
506 | 506 | ||
507 | if (page->index > end) { | ||
508 | *_next = index; | ||
509 | put_page(page); | ||
510 | _leave(" = 0 [%lx]", *_next); | ||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | /* at this point we hold neither mapping->tree_lock nor lock on | 507 | /* at this point we hold neither mapping->tree_lock nor lock on |
515 | * the page itself: the page may be truncated or invalidated | 508 | * the page itself: the page may be truncated or invalidated |
516 | * (changing page->mapping to NULL), or even swizzled back from | 509 | * (changing page->mapping to NULL), or even swizzled back from |
@@ -609,7 +602,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) | |||
609 | 602 | ||
610 | ASSERT(wb != NULL); | 603 | ASSERT(wb != NULL); |
611 | 604 | ||
612 | pagevec_init(&pv, 0); | 605 | pagevec_init(&pv); |
613 | 606 | ||
614 | do { | 607 | do { |
615 | _debug("done %lx-%lx", first, last); | 608 | _debug("done %lx-%lx", first, last); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index adbbc017191c..16045ea86fc1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3797,7 +3797,7 @@ int btree_write_cache_pages(struct address_space *mapping, | |||
3797 | int scanned = 0; | 3797 | int scanned = 0; |
3798 | int tag; | 3798 | int tag; |
3799 | 3799 | ||
3800 | pagevec_init(&pvec, 0); | 3800 | pagevec_init(&pvec); |
3801 | if (wbc->range_cyclic) { | 3801 | if (wbc->range_cyclic) { |
3802 | index = mapping->writeback_index; /* Start from prev offset */ | 3802 | index = mapping->writeback_index; /* Start from prev offset */ |
3803 | end = -1; | 3803 | end = -1; |
@@ -3814,8 +3814,8 @@ retry: | |||
3814 | if (wbc->sync_mode == WB_SYNC_ALL) | 3814 | if (wbc->sync_mode == WB_SYNC_ALL) |
3815 | tag_pages_for_writeback(mapping, index, end); | 3815 | tag_pages_for_writeback(mapping, index, end); |
3816 | while (!done && !nr_to_write_done && (index <= end) && | 3816 | while (!done && !nr_to_write_done && (index <= end) && |
3817 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 3817 | (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
3818 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 3818 | tag))) { |
3819 | unsigned i; | 3819 | unsigned i; |
3820 | 3820 | ||
3821 | scanned = 1; | 3821 | scanned = 1; |
@@ -3825,11 +3825,6 @@ retry: | |||
3825 | if (!PagePrivate(page)) | 3825 | if (!PagePrivate(page)) |
3826 | continue; | 3826 | continue; |
3827 | 3827 | ||
3828 | if (!wbc->range_cyclic && page->index > end) { | ||
3829 | done = 1; | ||
3830 | break; | ||
3831 | } | ||
3832 | |||
3833 | spin_lock(&mapping->private_lock); | 3828 | spin_lock(&mapping->private_lock); |
3834 | if (!PagePrivate(page)) { | 3829 | if (!PagePrivate(page)) { |
3835 | spin_unlock(&mapping->private_lock); | 3830 | spin_unlock(&mapping->private_lock); |
@@ -3941,7 +3936,7 @@ static int extent_write_cache_pages(struct address_space *mapping, | |||
3941 | if (!igrab(inode)) | 3936 | if (!igrab(inode)) |
3942 | return 0; | 3937 | return 0; |
3943 | 3938 | ||
3944 | pagevec_init(&pvec, 0); | 3939 | pagevec_init(&pvec); |
3945 | if (wbc->range_cyclic) { | 3940 | if (wbc->range_cyclic) { |
3946 | index = mapping->writeback_index; /* Start from prev offset */ | 3941 | index = mapping->writeback_index; /* Start from prev offset */ |
3947 | end = -1; | 3942 | end = -1; |
@@ -3961,8 +3956,8 @@ retry: | |||
3961 | tag_pages_for_writeback(mapping, index, end); | 3956 | tag_pages_for_writeback(mapping, index, end); |
3962 | done_index = index; | 3957 | done_index = index; |
3963 | while (!done && !nr_to_write_done && (index <= end) && | 3958 | while (!done && !nr_to_write_done && (index <= end) && |
3964 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 3959 | (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, |
3965 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 3960 | &index, end, tag))) { |
3966 | unsigned i; | 3961 | unsigned i; |
3967 | 3962 | ||
3968 | scanned = 1; | 3963 | scanned = 1; |
@@ -3987,12 +3982,6 @@ retry: | |||
3987 | continue; | 3982 | continue; |
3988 | } | 3983 | } |
3989 | 3984 | ||
3990 | if (!wbc->range_cyclic && page->index > end) { | ||
3991 | done = 1; | ||
3992 | unlock_page(page); | ||
3993 | continue; | ||
3994 | } | ||
3995 | |||
3996 | if (wbc->sync_mode != WB_SYNC_NONE) { | 3985 | if (wbc->sync_mode != WB_SYNC_NONE) { |
3997 | if (PageWriteback(page)) | 3986 | if (PageWriteback(page)) |
3998 | flush_fn(data); | 3987 | flush_fn(data); |
diff --git a/fs/buffer.c b/fs/buffer.c index 1c18a22a6013..0736a6a2e2f0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1592,7 +1592,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) | |||
1592 | struct buffer_head *head; | 1592 | struct buffer_head *head; |
1593 | 1593 | ||
1594 | end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); | 1594 | end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); |
1595 | pagevec_init(&pvec, 0); | 1595 | pagevec_init(&pvec); |
1596 | while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) { | 1596 | while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) { |
1597 | count = pagevec_count(&pvec); | 1597 | count = pagevec_count(&pvec); |
1598 | for (i = 0; i < count; i++) { | 1598 | for (i = 0; i < count; i++) { |
@@ -3514,7 +3514,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length, | |||
3514 | if (length <= 0) | 3514 | if (length <= 0) |
3515 | return -ENOENT; | 3515 | return -ENOENT; |
3516 | 3516 | ||
3517 | pagevec_init(&pvec, 0); | 3517 | pagevec_init(&pvec); |
3518 | 3518 | ||
3519 | do { | 3519 | do { |
3520 | unsigned nr_pages, i; | 3520 | unsigned nr_pages, i; |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 18d7aa61ef0f..883bc7bb12c5 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -256,8 +256,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
256 | goto backing_page_already_present; | 256 | goto backing_page_already_present; |
257 | 257 | ||
258 | if (!newpage) { | 258 | if (!newpage) { |
259 | newpage = __page_cache_alloc(cachefiles_gfp | | 259 | newpage = __page_cache_alloc(cachefiles_gfp); |
260 | __GFP_COLD); | ||
261 | if (!newpage) | 260 | if (!newpage) |
262 | goto nomem_monitor; | 261 | goto nomem_monitor; |
263 | } | 262 | } |
@@ -493,8 +492,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
493 | goto backing_page_already_present; | 492 | goto backing_page_already_present; |
494 | 493 | ||
495 | if (!newpage) { | 494 | if (!newpage) { |
496 | newpage = __page_cache_alloc(cachefiles_gfp | | 495 | newpage = __page_cache_alloc(cachefiles_gfp); |
497 | __GFP_COLD); | ||
498 | if (!newpage) | 496 | if (!newpage) |
499 | goto nomem; | 497 | goto nomem; |
500 | } | 498 | } |
@@ -710,7 +708,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, | |||
710 | /* calculate the shift required to use bmap */ | 708 | /* calculate the shift required to use bmap */ |
711 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | 709 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; |
712 | 710 | ||
713 | pagevec_init(&pagevec, 0); | 711 | pagevec_init(&pagevec); |
714 | 712 | ||
715 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; | 713 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; |
716 | op->op.flags |= FSCACHE_OP_ASYNC; | 714 | op->op.flags |= FSCACHE_OP_ASYNC; |
@@ -844,7 +842,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op, | |||
844 | 842 | ||
845 | ret = cachefiles_has_space(cache, 0, *nr_pages); | 843 | ret = cachefiles_has_space(cache, 0, *nr_pages); |
846 | if (ret == 0) { | 844 | if (ret == 0) { |
847 | pagevec_init(&pagevec, 0); | 845 | pagevec_init(&pagevec); |
848 | 846 | ||
849 | list_for_each_entry(page, pages, lru) { | 847 | list_for_each_entry(page, pages, lru) { |
850 | if (pagevec_add(&pagevec, page) == 0) | 848 | if (pagevec_add(&pagevec, page) == 0) |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4d622654bfbc..dbf07051aacd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -680,7 +680,7 @@ static void ceph_release_pages(struct page **pages, int num) | |||
680 | struct pagevec pvec; | 680 | struct pagevec pvec; |
681 | int i; | 681 | int i; |
682 | 682 | ||
683 | pagevec_init(&pvec, 0); | 683 | pagevec_init(&pvec); |
684 | for (i = 0; i < num; i++) { | 684 | for (i = 0; i < num; i++) { |
685 | if (pagevec_add(&pvec, pages[i]) == 0) | 685 | if (pagevec_add(&pvec, pages[i]) == 0) |
686 | pagevec_release(&pvec); | 686 | pagevec_release(&pvec); |
@@ -811,7 +811,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
811 | if (fsc->mount_options->wsize < wsize) | 811 | if (fsc->mount_options->wsize < wsize) |
812 | wsize = fsc->mount_options->wsize; | 812 | wsize = fsc->mount_options->wsize; |
813 | 813 | ||
814 | pagevec_init(&pvec, 0); | 814 | pagevec_init(&pvec); |
815 | 815 | ||
816 | start_index = wbc->range_cyclic ? mapping->writeback_index : 0; | 816 | start_index = wbc->range_cyclic ? mapping->writeback_index : 0; |
817 | index = start_index; | 817 | index = start_index; |
@@ -870,15 +870,10 @@ retry: | |||
870 | max_pages = wsize >> PAGE_SHIFT; | 870 | max_pages = wsize >> PAGE_SHIFT; |
871 | 871 | ||
872 | get_more_pages: | 872 | get_more_pages: |
873 | pvec_pages = min_t(unsigned, PAGEVEC_SIZE, | 873 | pvec_pages = pagevec_lookup_range_nr_tag(&pvec, mapping, &index, |
874 | max_pages - locked_pages); | 874 | end, PAGECACHE_TAG_DIRTY, |
875 | if (end - index < (u64)(pvec_pages - 1)) | 875 | max_pages - locked_pages); |
876 | pvec_pages = (unsigned)(end - index) + 1; | 876 | dout("pagevec_lookup_range_tag got %d\n", pvec_pages); |
877 | |||
878 | pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
879 | PAGECACHE_TAG_DIRTY, | ||
880 | pvec_pages); | ||
881 | dout("pagevec_lookup_tag got %d\n", pvec_pages); | ||
882 | if (!pvec_pages && !locked_pages) | 877 | if (!pvec_pages && !locked_pages) |
883 | break; | 878 | break; |
884 | for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) { | 879 | for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) { |
@@ -896,16 +891,6 @@ get_more_pages: | |||
896 | unlock_page(page); | 891 | unlock_page(page); |
897 | continue; | 892 | continue; |
898 | } | 893 | } |
899 | if (page->index > end) { | ||
900 | dout("end of range %p\n", page); | ||
901 | /* can't be range_cyclic (1st pass) because | ||
902 | * end == -1 in that case. */ | ||
903 | stop = true; | ||
904 | if (ceph_wbc.head_snapc) | ||
905 | done = true; | ||
906 | unlock_page(page); | ||
907 | break; | ||
908 | } | ||
909 | if (strip_unit_end && (page->index > strip_unit_end)) { | 894 | if (strip_unit_end && (page->index > strip_unit_end)) { |
910 | dout("end of strip unit %p\n", page); | 895 | dout("end of strip unit %p\n", page); |
911 | unlock_page(page); | 896 | unlock_page(page); |
@@ -1177,8 +1162,7 @@ release_pvec_pages: | |||
1177 | index = 0; | 1162 | index = 0; |
1178 | while ((index <= end) && | 1163 | while ((index <= end) && |
1179 | (nr = pagevec_lookup_tag(&pvec, mapping, &index, | 1164 | (nr = pagevec_lookup_tag(&pvec, mapping, &index, |
1180 | PAGECACHE_TAG_WRITEBACK, | 1165 | PAGECACHE_TAG_WRITEBACK))) { |
1181 | PAGEVEC_SIZE))) { | ||
1182 | for (i = 0; i < nr; i++) { | 1166 | for (i = 0; i < nr; i++) { |
1183 | page = pvec.pages[i]; | 1167 | page = pvec.pages[i]; |
1184 | if (page_snap_context(page) != snapc) | 1168 | if (page_snap_context(page) != snapc) |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 92fdf9c35de2..df9f682708c6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1963,8 +1963,6 @@ wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, | |||
1963 | pgoff_t end, pgoff_t *index, | 1963 | pgoff_t end, pgoff_t *index, |
1964 | unsigned int *found_pages) | 1964 | unsigned int *found_pages) |
1965 | { | 1965 | { |
1966 | unsigned int nr_pages; | ||
1967 | struct page **pages; | ||
1968 | struct cifs_writedata *wdata; | 1966 | struct cifs_writedata *wdata; |
1969 | 1967 | ||
1970 | wdata = cifs_writedata_alloc((unsigned int)tofind, | 1968 | wdata = cifs_writedata_alloc((unsigned int)tofind, |
@@ -1972,23 +1970,8 @@ wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, | |||
1972 | if (!wdata) | 1970 | if (!wdata) |
1973 | return NULL; | 1971 | return NULL; |
1974 | 1972 | ||
1975 | /* | 1973 | *found_pages = find_get_pages_range_tag(mapping, index, end, |
1976 | * find_get_pages_tag seems to return a max of 256 on each | 1974 | PAGECACHE_TAG_DIRTY, tofind, wdata->pages); |
1977 | * iteration, so we must call it several times in order to | ||
1978 | * fill the array or the wsize is effectively limited to | ||
1979 | * 256 * PAGE_SIZE. | ||
1980 | */ | ||
1981 | *found_pages = 0; | ||
1982 | pages = wdata->pages; | ||
1983 | do { | ||
1984 | nr_pages = find_get_pages_tag(mapping, index, | ||
1985 | PAGECACHE_TAG_DIRTY, tofind, | ||
1986 | pages); | ||
1987 | *found_pages += nr_pages; | ||
1988 | tofind -= nr_pages; | ||
1989 | pages += nr_pages; | ||
1990 | } while (nr_pages && tofind && *index <= end); | ||
1991 | |||
1992 | return wdata; | 1975 | return wdata; |
1993 | } | 1976 | } |
1994 | 1977 | ||
@@ -565,7 +565,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
565 | ret = __radix_tree_lookup(page_tree, index, &node, &slot); | 565 | ret = __radix_tree_lookup(page_tree, index, &node, &slot); |
566 | WARN_ON_ONCE(ret != entry); | 566 | WARN_ON_ONCE(ret != entry); |
567 | __radix_tree_replace(page_tree, node, slot, | 567 | __radix_tree_replace(page_tree, node, slot, |
568 | new_entry, NULL, NULL); | 568 | new_entry, NULL); |
569 | entry = new_entry; | 569 | entry = new_entry; |
570 | } | 570 | } |
571 | 571 | ||
@@ -614,6 +614,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, | |||
614 | if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) | 614 | if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) |
615 | continue; | 615 | continue; |
616 | 616 | ||
617 | /* | ||
618 | * No need to call mmu_notifier_invalidate_range() as we are | ||
619 | * downgrading page table protection not changing it to point | ||
620 | * to a new page. | ||
621 | * | ||
622 | * See Documentation/vm/mmu_notifier.txt | ||
623 | */ | ||
617 | if (pmdp) { | 624 | if (pmdp) { |
618 | #ifdef CONFIG_FS_DAX_PMD | 625 | #ifdef CONFIG_FS_DAX_PMD |
619 | pmd_t pmd; | 626 | pmd_t pmd; |
@@ -628,7 +635,6 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, | |||
628 | pmd = pmd_wrprotect(pmd); | 635 | pmd = pmd_wrprotect(pmd); |
629 | pmd = pmd_mkclean(pmd); | 636 | pmd = pmd_mkclean(pmd); |
630 | set_pmd_at(vma->vm_mm, address, pmdp, pmd); | 637 | set_pmd_at(vma->vm_mm, address, pmdp, pmd); |
631 | mmu_notifier_invalidate_range(vma->vm_mm, start, end); | ||
632 | unlock_pmd: | 638 | unlock_pmd: |
633 | spin_unlock(ptl); | 639 | spin_unlock(ptl); |
634 | #endif | 640 | #endif |
@@ -643,7 +649,6 @@ unlock_pmd: | |||
643 | pte = pte_wrprotect(pte); | 649 | pte = pte_wrprotect(pte); |
644 | pte = pte_mkclean(pte); | 650 | pte = pte_mkclean(pte); |
645 | set_pte_at(vma->vm_mm, address, ptep, pte); | 651 | set_pte_at(vma->vm_mm, address, ptep, pte); |
646 | mmu_notifier_invalidate_range(vma->vm_mm, start, end); | ||
647 | unlock_pte: | 652 | unlock_pte: |
648 | pte_unmap_unlock(ptep, ptl); | 653 | pte_unmap_unlock(ptep, ptl); |
649 | } | 654 | } |
@@ -789,7 +794,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
789 | 794 | ||
790 | tag_pages_for_writeback(mapping, start_index, end_index); | 795 | tag_pages_for_writeback(mapping, start_index, end_index); |
791 | 796 | ||
792 | pagevec_init(&pvec, 0); | 797 | pagevec_init(&pvec); |
793 | while (!done) { | 798 | while (!done) { |
794 | pvec.nr = find_get_entries_tag(mapping, start_index, | 799 | pvec.nr = find_get_entries_tag(mapping, start_index, |
795 | PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, | 800 | PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE, |
diff --git a/fs/dcache.c b/fs/dcache.c index bcc9f6981569..5c7df1df81ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2705,8 +2705,6 @@ static void swap_names(struct dentry *dentry, struct dentry *target) | |||
2705 | */ | 2705 | */ |
2706 | unsigned int i; | 2706 | unsigned int i; |
2707 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); | 2707 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); |
2708 | kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN); | ||
2709 | kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN); | ||
2710 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { | 2708 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { |
2711 | swap(((long *) &dentry->d_iname)[i], | 2709 | swap(((long *) &dentry->d_iname)[i], |
2712 | ((long *) &target->d_iname)[i]); | 2710 | ((long *) &target->d_iname)[i]); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 6b801186baa5..25aeaa7328ba 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -660,7 +660,7 @@ static struct ecryptfs_cache_info { | |||
660 | struct kmem_cache **cache; | 660 | struct kmem_cache **cache; |
661 | const char *name; | 661 | const char *name; |
662 | size_t size; | 662 | size_t size; |
663 | unsigned long flags; | 663 | slab_flags_t flags; |
664 | void (*ctor)(void *obj); | 664 | void (*ctor)(void *obj); |
665 | } ecryptfs_cache_infos[] = { | 665 | } ecryptfs_cache_infos[] = { |
666 | { | 666 | { |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2633150e41b9..8d2b582fb141 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1719,7 +1719,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, | |||
1719 | ext4_es_remove_extent(inode, start, last - start + 1); | 1719 | ext4_es_remove_extent(inode, start, last - start + 1); |
1720 | } | 1720 | } |
1721 | 1721 | ||
1722 | pagevec_init(&pvec, 0); | 1722 | pagevec_init(&pvec); |
1723 | while (index <= end) { | 1723 | while (index <= end) { |
1724 | nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end); | 1724 | nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end); |
1725 | if (nr_pages == 0) | 1725 | if (nr_pages == 0) |
@@ -2345,7 +2345,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | |||
2345 | lblk = start << bpp_bits; | 2345 | lblk = start << bpp_bits; |
2346 | pblock = mpd->map.m_pblk; | 2346 | pblock = mpd->map.m_pblk; |
2347 | 2347 | ||
2348 | pagevec_init(&pvec, 0); | 2348 | pagevec_init(&pvec); |
2349 | while (start <= end) { | 2349 | while (start <= end) { |
2350 | nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, | 2350 | nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, |
2351 | &start, end); | 2351 | &start, end); |
@@ -2616,12 +2616,12 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | |||
2616 | else | 2616 | else |
2617 | tag = PAGECACHE_TAG_DIRTY; | 2617 | tag = PAGECACHE_TAG_DIRTY; |
2618 | 2618 | ||
2619 | pagevec_init(&pvec, 0); | 2619 | pagevec_init(&pvec); |
2620 | mpd->map.m_len = 0; | 2620 | mpd->map.m_len = 0; |
2621 | mpd->next_page = index; | 2621 | mpd->next_page = index; |
2622 | while (index <= end) { | 2622 | while (index <= end) { |
2623 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2623 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
2624 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2624 | tag); |
2625 | if (nr_pages == 0) | 2625 | if (nr_pages == 0) |
2626 | goto out; | 2626 | goto out; |
2627 | 2627 | ||
@@ -2629,16 +2629,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | |||
2629 | struct page *page = pvec.pages[i]; | 2629 | struct page *page = pvec.pages[i]; |
2630 | 2630 | ||
2631 | /* | 2631 | /* |
2632 | * At this point, the page may be truncated or | ||
2633 | * invalidated (changing page->mapping to NULL), or | ||
2634 | * even swizzled back from swapper_space to tmpfs file | ||
2635 | * mapping. However, page->index will not change | ||
2636 | * because we have a reference on the page. | ||
2637 | */ | ||
2638 | if (page->index > end) | ||
2639 | goto out; | ||
2640 | |||
2641 | /* | ||
2642 | * Accumulated enough dirty pages? This doesn't apply | 2632 | * Accumulated enough dirty pages? This doesn't apply |
2643 | * to WB_SYNC_ALL mode. For integrity sync we have to | 2633 | * to WB_SYNC_ALL mode. For integrity sync we have to |
2644 | * keep going because someone may be concurrently | 2634 | * keep going because someone may be concurrently |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 04fe1df052b2..0bb8e2c022d3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -305,25 +305,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |||
305 | long nr_to_write, enum iostat_type io_type) | 305 | long nr_to_write, enum iostat_type io_type) |
306 | { | 306 | { |
307 | struct address_space *mapping = META_MAPPING(sbi); | 307 | struct address_space *mapping = META_MAPPING(sbi); |
308 | pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; | 308 | pgoff_t index = 0, prev = ULONG_MAX; |
309 | struct pagevec pvec; | 309 | struct pagevec pvec; |
310 | long nwritten = 0; | 310 | long nwritten = 0; |
311 | int nr_pages; | ||
311 | struct writeback_control wbc = { | 312 | struct writeback_control wbc = { |
312 | .for_reclaim = 0, | 313 | .for_reclaim = 0, |
313 | }; | 314 | }; |
314 | struct blk_plug plug; | 315 | struct blk_plug plug; |
315 | 316 | ||
316 | pagevec_init(&pvec, 0); | 317 | pagevec_init(&pvec); |
317 | 318 | ||
318 | blk_start_plug(&plug); | 319 | blk_start_plug(&plug); |
319 | 320 | ||
320 | while (index <= end) { | 321 | while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
321 | int i, nr_pages; | 322 | PAGECACHE_TAG_DIRTY))) { |
322 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 323 | int i; |
323 | PAGECACHE_TAG_DIRTY, | ||
324 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
325 | if (unlikely(nr_pages == 0)) | ||
326 | break; | ||
327 | 324 | ||
328 | for (i = 0; i < nr_pages; i++) { | 325 | for (i = 0; i < nr_pages; i++) { |
329 | struct page *page = pvec.pages[i]; | 326 | struct page *page = pvec.pages[i]; |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 36b535207c88..7b3ad5d8e2e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -1635,7 +1635,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, | |||
1635 | int range_whole = 0; | 1635 | int range_whole = 0; |
1636 | int tag; | 1636 | int tag; |
1637 | 1637 | ||
1638 | pagevec_init(&pvec, 0); | 1638 | pagevec_init(&pvec); |
1639 | 1639 | ||
1640 | if (get_dirty_pages(mapping->host) <= | 1640 | if (get_dirty_pages(mapping->host) <= |
1641 | SM_I(F2FS_M_SB(mapping))->min_hot_blocks) | 1641 | SM_I(F2FS_M_SB(mapping))->min_hot_blocks) |
@@ -1669,8 +1669,8 @@ retry: | |||
1669 | while (!done && (index <= end)) { | 1669 | while (!done && (index <= end)) { |
1670 | int i; | 1670 | int i; |
1671 | 1671 | ||
1672 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 1672 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
1673 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); | 1673 | tag); |
1674 | if (nr_pages == 0) | 1674 | if (nr_pages == 0) |
1675 | break; | 1675 | break; |
1676 | 1676 | ||
@@ -1678,11 +1678,6 @@ retry: | |||
1678 | struct page *page = pvec.pages[i]; | 1678 | struct page *page = pvec.pages[i]; |
1679 | bool submitted = false; | 1679 | bool submitted = false; |
1680 | 1680 | ||
1681 | if (page->index > end) { | ||
1682 | done = 1; | ||
1683 | break; | ||
1684 | } | ||
1685 | |||
1686 | done_index = page->index; | 1681 | done_index = page->index; |
1687 | retry_write: | 1682 | retry_write: |
1688 | lock_page(page); | 1683 | lock_page(page); |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 517e112c8a9a..f78b76ec4707 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -313,18 +313,19 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
313 | static pgoff_t __get_first_dirty_index(struct address_space *mapping, | 313 | static pgoff_t __get_first_dirty_index(struct address_space *mapping, |
314 | pgoff_t pgofs, int whence) | 314 | pgoff_t pgofs, int whence) |
315 | { | 315 | { |
316 | struct pagevec pvec; | 316 | struct page *page; |
317 | int nr_pages; | 317 | int nr_pages; |
318 | 318 | ||
319 | if (whence != SEEK_DATA) | 319 | if (whence != SEEK_DATA) |
320 | return 0; | 320 | return 0; |
321 | 321 | ||
322 | /* find first dirty page index */ | 322 | /* find first dirty page index */ |
323 | pagevec_init(&pvec, 0); | 323 | nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY, |
324 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, | 324 | 1, &page); |
325 | PAGECACHE_TAG_DIRTY, 1); | 325 | if (!nr_pages) |
326 | pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX; | 326 | return ULONG_MAX; |
327 | pagevec_release(&pvec); | 327 | pgofs = page->index; |
328 | put_page(page); | ||
328 | return pgofs; | 329 | return pgofs; |
329 | } | 330 | } |
330 | 331 | ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fca87835a1da..b33dac9592ca 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -1277,21 +1277,17 @@ release_page: | |||
1277 | 1277 | ||
1278 | static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) | 1278 | static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) |
1279 | { | 1279 | { |
1280 | pgoff_t index, end; | 1280 | pgoff_t index; |
1281 | struct pagevec pvec; | 1281 | struct pagevec pvec; |
1282 | struct page *last_page = NULL; | 1282 | struct page *last_page = NULL; |
1283 | int nr_pages; | ||
1283 | 1284 | ||
1284 | pagevec_init(&pvec, 0); | 1285 | pagevec_init(&pvec); |
1285 | index = 0; | 1286 | index = 0; |
1286 | end = ULONG_MAX; | 1287 | |
1287 | 1288 | while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | |
1288 | while (index <= end) { | 1289 | PAGECACHE_TAG_DIRTY))) { |
1289 | int i, nr_pages; | 1290 | int i; |
1290 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | ||
1291 | PAGECACHE_TAG_DIRTY, | ||
1292 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1293 | if (nr_pages == 0) | ||
1294 | break; | ||
1295 | 1291 | ||
1296 | for (i = 0; i < nr_pages; i++) { | 1292 | for (i = 0; i < nr_pages; i++) { |
1297 | struct page *page = pvec.pages[i]; | 1293 | struct page *page = pvec.pages[i]; |
@@ -1425,13 +1421,14 @@ static int f2fs_write_node_page(struct page *page, | |||
1425 | int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, | 1421 | int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, |
1426 | struct writeback_control *wbc, bool atomic) | 1422 | struct writeback_control *wbc, bool atomic) |
1427 | { | 1423 | { |
1428 | pgoff_t index, end; | 1424 | pgoff_t index; |
1429 | pgoff_t last_idx = ULONG_MAX; | 1425 | pgoff_t last_idx = ULONG_MAX; |
1430 | struct pagevec pvec; | 1426 | struct pagevec pvec; |
1431 | int ret = 0; | 1427 | int ret = 0; |
1432 | struct page *last_page = NULL; | 1428 | struct page *last_page = NULL; |
1433 | bool marked = false; | 1429 | bool marked = false; |
1434 | nid_t ino = inode->i_ino; | 1430 | nid_t ino = inode->i_ino; |
1431 | int nr_pages; | ||
1435 | 1432 | ||
1436 | if (atomic) { | 1433 | if (atomic) { |
1437 | last_page = last_fsync_dnode(sbi, ino); | 1434 | last_page = last_fsync_dnode(sbi, ino); |
@@ -1439,17 +1436,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, | |||
1439 | return PTR_ERR_OR_ZERO(last_page); | 1436 | return PTR_ERR_OR_ZERO(last_page); |
1440 | } | 1437 | } |
1441 | retry: | 1438 | retry: |
1442 | pagevec_init(&pvec, 0); | 1439 | pagevec_init(&pvec); |
1443 | index = 0; | 1440 | index = 0; |
1444 | end = ULONG_MAX; | 1441 | |
1445 | 1442 | while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | |
1446 | while (index <= end) { | 1443 | PAGECACHE_TAG_DIRTY))) { |
1447 | int i, nr_pages; | 1444 | int i; |
1448 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | ||
1449 | PAGECACHE_TAG_DIRTY, | ||
1450 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1451 | if (nr_pages == 0) | ||
1452 | break; | ||
1453 | 1445 | ||
1454 | for (i = 0; i < nr_pages; i++) { | 1446 | for (i = 0; i < nr_pages; i++) { |
1455 | struct page *page = pvec.pages[i]; | 1447 | struct page *page = pvec.pages[i]; |
@@ -1548,25 +1540,21 @@ out: | |||
1548 | int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, | 1540 | int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, |
1549 | bool do_balance, enum iostat_type io_type) | 1541 | bool do_balance, enum iostat_type io_type) |
1550 | { | 1542 | { |
1551 | pgoff_t index, end; | 1543 | pgoff_t index; |
1552 | struct pagevec pvec; | 1544 | struct pagevec pvec; |
1553 | int step = 0; | 1545 | int step = 0; |
1554 | int nwritten = 0; | 1546 | int nwritten = 0; |
1555 | int ret = 0; | 1547 | int ret = 0; |
1548 | int nr_pages; | ||
1556 | 1549 | ||
1557 | pagevec_init(&pvec, 0); | 1550 | pagevec_init(&pvec); |
1558 | 1551 | ||
1559 | next_step: | 1552 | next_step: |
1560 | index = 0; | 1553 | index = 0; |
1561 | end = ULONG_MAX; | 1554 | |
1562 | 1555 | while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | |
1563 | while (index <= end) { | 1556 | PAGECACHE_TAG_DIRTY))) { |
1564 | int i, nr_pages; | 1557 | int i; |
1565 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | ||
1566 | PAGECACHE_TAG_DIRTY, | ||
1567 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1568 | if (nr_pages == 0) | ||
1569 | break; | ||
1570 | 1558 | ||
1571 | for (i = 0; i < nr_pages; i++) { | 1559 | for (i = 0; i < nr_pages; i++) { |
1572 | struct page *page = pvec.pages[i]; | 1560 | struct page *page = pvec.pages[i]; |
@@ -1655,27 +1643,20 @@ out: | |||
1655 | 1643 | ||
1656 | int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | 1644 | int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) |
1657 | { | 1645 | { |
1658 | pgoff_t index = 0, end = ULONG_MAX; | 1646 | pgoff_t index = 0; |
1659 | struct pagevec pvec; | 1647 | struct pagevec pvec; |
1660 | int ret2, ret = 0; | 1648 | int ret2, ret = 0; |
1649 | int nr_pages; | ||
1661 | 1650 | ||
1662 | pagevec_init(&pvec, 0); | 1651 | pagevec_init(&pvec); |
1663 | 1652 | ||
1664 | while (index <= end) { | 1653 | while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, |
1665 | int i, nr_pages; | 1654 | PAGECACHE_TAG_WRITEBACK))) { |
1666 | nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, | 1655 | int i; |
1667 | PAGECACHE_TAG_WRITEBACK, | ||
1668 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1669 | if (nr_pages == 0) | ||
1670 | break; | ||
1671 | 1656 | ||
1672 | for (i = 0; i < nr_pages; i++) { | 1657 | for (i = 0; i < nr_pages; i++) { |
1673 | struct page *page = pvec.pages[i]; | 1658 | struct page *page = pvec.pages[i]; |
1674 | 1659 | ||
1675 | /* until radix tree lookup accepts end_index */ | ||
1676 | if (unlikely(page->index > end)) | ||
1677 | continue; | ||
1678 | |||
1679 | if (ino && ino_of_node(page) == ino) { | 1660 | if (ino && ino_of_node(page) == ino) { |
1680 | f2fs_wait_on_page_writeback(page, NODE, true); | 1661 | f2fs_wait_on_page_writeback(page, NODE, true); |
1681 | if (TestClearPageError(page)) | 1662 | if (TestClearPageError(page)) |
diff --git a/fs/file_table.c b/fs/file_table.c index 49e1f2f1a4cb..2dc9f38bd195 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -312,7 +312,7 @@ void put_filp(struct file *file) | |||
312 | void __init files_init(void) | 312 | void __init files_init(void) |
313 | { | 313 | { |
314 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, | 314 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, |
315 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 315 | SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); |
316 | percpu_counter_init(&nr_files, 0, GFP_KERNEL); | 316 | percpu_counter_init(&nr_files, 0, GFP_KERNEL); |
317 | } | 317 | } |
318 | 318 | ||
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 0ad3fd3ad0b4..961029e04027 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -1175,7 +1175,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, | |||
1175 | return; | 1175 | return; |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | pagevec_init(&pvec, 0); | 1178 | pagevec_init(&pvec); |
1179 | next = 0; | 1179 | next = 0; |
1180 | do { | 1180 | do { |
1181 | if (!pagevec_lookup(&pvec, mapping, &next)) | 1181 | if (!pagevec_lookup(&pvec, mapping, &next)) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a42d89371748..17f0d05bfd4c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -1636,7 +1636,7 @@ out_finish: | |||
1636 | 1636 | ||
1637 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) | 1637 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) |
1638 | { | 1638 | { |
1639 | release_pages(req->pages, req->num_pages, false); | 1639 | release_pages(req->pages, req->num_pages); |
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | 1642 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a79e320349cd..2f504d615d92 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -1273,9 +1273,9 @@ static int __init fuse_fs_init(void) | |||
1273 | int err; | 1273 | int err; |
1274 | 1274 | ||
1275 | fuse_inode_cachep = kmem_cache_create("fuse_inode", | 1275 | fuse_inode_cachep = kmem_cache_create("fuse_inode", |
1276 | sizeof(struct fuse_inode), 0, | 1276 | sizeof(struct fuse_inode), 0, |
1277 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, | 1277 | SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, |
1278 | fuse_inode_init_once); | 1278 | fuse_inode_init_once); |
1279 | err = -ENOMEM; | 1279 | err = -ENOMEM; |
1280 | if (!fuse_inode_cachep) | 1280 | if (!fuse_inode_cachep) |
1281 | goto out; | 1281 | goto out; |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 68ed06962537..1daf15a1f00c 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -280,22 +280,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
280 | for(i = 0; i < nr_pages; i++) { | 280 | for(i = 0; i < nr_pages; i++) { |
281 | struct page *page = pvec->pages[i]; | 281 | struct page *page = pvec->pages[i]; |
282 | 282 | ||
283 | /* | ||
284 | * At this point, the page may be truncated or | ||
285 | * invalidated (changing page->mapping to NULL), or | ||
286 | * even swizzled back from swapper_space to tmpfs file | ||
287 | * mapping. However, page->index will not change | ||
288 | * because we have a reference on the page. | ||
289 | */ | ||
290 | if (page->index > end) { | ||
291 | /* | ||
292 | * can't be range_cyclic (1st pass) because | ||
293 | * end == -1 in that case. | ||
294 | */ | ||
295 | ret = 1; | ||
296 | break; | ||
297 | } | ||
298 | |||
299 | *done_index = page->index; | 283 | *done_index = page->index; |
300 | 284 | ||
301 | lock_page(page); | 285 | lock_page(page); |
@@ -387,7 +371,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, | |||
387 | int range_whole = 0; | 371 | int range_whole = 0; |
388 | int tag; | 372 | int tag; |
389 | 373 | ||
390 | pagevec_init(&pvec, 0); | 374 | pagevec_init(&pvec); |
391 | if (wbc->range_cyclic) { | 375 | if (wbc->range_cyclic) { |
392 | writeback_index = mapping->writeback_index; /* prev offset */ | 376 | writeback_index = mapping->writeback_index; /* prev offset */ |
393 | index = writeback_index; | 377 | index = writeback_index; |
@@ -413,8 +397,8 @@ retry: | |||
413 | tag_pages_for_writeback(mapping, index, end); | 397 | tag_pages_for_writeback(mapping, index, end); |
414 | done_index = index; | 398 | done_index = index; |
415 | while (!done && (index <= end)) { | 399 | while (!done && (index <= end)) { |
416 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 400 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
417 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 401 | tag); |
418 | if (nr_pages == 0) | 402 | if (nr_pages == 0) |
419 | break; | 403 | break; |
420 | 404 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ed113ea17aff..1e76730aac0d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -407,7 +407,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, | |||
407 | 407 | ||
408 | memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); | 408 | memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); |
409 | pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); | 409 | pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); |
410 | pagevec_init(&pvec, 0); | 410 | pagevec_init(&pvec); |
411 | next = start; | 411 | next = start; |
412 | while (next < end) { | 412 | while (next < end) { |
413 | /* | 413 | /* |
@@ -668,7 +668,6 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
668 | return error; | 668 | return error; |
669 | 669 | ||
670 | if (ia_valid & ATTR_SIZE) { | 670 | if (ia_valid & ATTR_SIZE) { |
671 | error = -EINVAL; | ||
672 | if (attr->ia_size & ~huge_page_mask(h)) | 671 | if (attr->ia_size & ~huge_page_mask(h)) |
673 | return -EINVAL; | 672 | return -EINVAL; |
674 | error = hugetlb_vmtruncate(inode, attr->ia_size); | 673 | error = hugetlb_vmtruncate(inode, attr->ia_size); |
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 06ffa135dfa6..16a7a67a11c9 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c | |||
@@ -2156,10 +2156,10 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, | |||
2156 | level++) | 2156 | level++) |
2157 | INIT_LIST_HEAD(&lists[level]); | 2157 | INIT_LIST_HEAD(&lists[level]); |
2158 | 2158 | ||
2159 | pagevec_init(&pvec, 0); | 2159 | pagevec_init(&pvec); |
2160 | 2160 | ||
2161 | while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, | 2161 | while (pagevec_lookup_tag(&pvec, btcache, &index, |
2162 | PAGEVEC_SIZE)) { | 2162 | PAGECACHE_TAG_DIRTY)) { |
2163 | for (i = 0; i < pagevec_count(&pvec); i++) { | 2163 | for (i = 0; i < pagevec_count(&pvec); i++) { |
2164 | bh = head = page_buffers(pvec.pages[i]); | 2164 | bh = head = page_buffers(pvec.pages[i]); |
2165 | do { | 2165 | do { |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 8616c46d33da..68241512d7c1 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -255,10 +255,9 @@ int nilfs_copy_dirty_pages(struct address_space *dmap, | |||
255 | pgoff_t index = 0; | 255 | pgoff_t index = 0; |
256 | int err = 0; | 256 | int err = 0; |
257 | 257 | ||
258 | pagevec_init(&pvec, 0); | 258 | pagevec_init(&pvec); |
259 | repeat: | 259 | repeat: |
260 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | 260 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY)) |
261 | PAGEVEC_SIZE)) | ||
262 | return 0; | 261 | return 0; |
263 | 262 | ||
264 | for (i = 0; i < pagevec_count(&pvec); i++) { | 263 | for (i = 0; i < pagevec_count(&pvec); i++) { |
@@ -310,7 +309,7 @@ void nilfs_copy_back_pages(struct address_space *dmap, | |||
310 | pgoff_t index = 0; | 309 | pgoff_t index = 0; |
311 | int err; | 310 | int err; |
312 | 311 | ||
313 | pagevec_init(&pvec, 0); | 312 | pagevec_init(&pvec); |
314 | repeat: | 313 | repeat: |
315 | n = pagevec_lookup(&pvec, smap, &index); | 314 | n = pagevec_lookup(&pvec, smap, &index); |
316 | if (!n) | 315 | if (!n) |
@@ -374,10 +373,10 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) | |||
374 | unsigned int i; | 373 | unsigned int i; |
375 | pgoff_t index = 0; | 374 | pgoff_t index = 0; |
376 | 375 | ||
377 | pagevec_init(&pvec, 0); | 376 | pagevec_init(&pvec); |
378 | 377 | ||
379 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | 378 | while (pagevec_lookup_tag(&pvec, mapping, &index, |
380 | PAGEVEC_SIZE)) { | 379 | PAGECACHE_TAG_DIRTY)) { |
381 | for (i = 0; i < pagevec_count(&pvec); i++) { | 380 | for (i = 0; i < pagevec_count(&pvec); i++) { |
382 | struct page *page = pvec.pages[i]; | 381 | struct page *page = pvec.pages[i]; |
383 | 382 | ||
@@ -519,7 +518,7 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode, | |||
519 | index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); | 518 | index = start_blk >> (PAGE_SHIFT - inode->i_blkbits); |
520 | nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits); | 519 | nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits); |
521 | 520 | ||
522 | pagevec_init(&pvec, 0); | 521 | pagevec_init(&pvec); |
523 | 522 | ||
524 | repeat: | 523 | repeat: |
525 | pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, | 524 | pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 70ded52dc1dd..f65392fecb5c 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -708,21 +708,17 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, | |||
708 | index = start >> PAGE_SHIFT; | 708 | index = start >> PAGE_SHIFT; |
709 | last = end >> PAGE_SHIFT; | 709 | last = end >> PAGE_SHIFT; |
710 | } | 710 | } |
711 | pagevec_init(&pvec, 0); | 711 | pagevec_init(&pvec); |
712 | repeat: | 712 | repeat: |
713 | if (unlikely(index > last) || | 713 | if (unlikely(index > last) || |
714 | !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | 714 | !pagevec_lookup_range_tag(&pvec, mapping, &index, last, |
715 | min_t(pgoff_t, last - index, | 715 | PAGECACHE_TAG_DIRTY)) |
716 | PAGEVEC_SIZE - 1) + 1)) | ||
717 | return ndirties; | 716 | return ndirties; |
718 | 717 | ||
719 | for (i = 0; i < pagevec_count(&pvec); i++) { | 718 | for (i = 0; i < pagevec_count(&pvec); i++) { |
720 | struct buffer_head *bh, *head; | 719 | struct buffer_head *bh, *head; |
721 | struct page *page = pvec.pages[i]; | 720 | struct page *page = pvec.pages[i]; |
722 | 721 | ||
723 | if (unlikely(page->index > last)) | ||
724 | break; | ||
725 | |||
726 | lock_page(page); | 722 | lock_page(page); |
727 | if (!page_has_buffers(page)) | 723 | if (!page_has_buffers(page)) |
728 | create_empty_buffers(page, i_blocksize(inode), 0); | 724 | create_empty_buffers(page, i_blocksize(inode), 0); |
@@ -757,10 +753,10 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, | |||
757 | unsigned int i; | 753 | unsigned int i; |
758 | pgoff_t index = 0; | 754 | pgoff_t index = 0; |
759 | 755 | ||
760 | pagevec_init(&pvec, 0); | 756 | pagevec_init(&pvec); |
761 | 757 | ||
762 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | 758 | while (pagevec_lookup_tag(&pvec, mapping, &index, |
763 | PAGEVEC_SIZE)) { | 759 | PAGECACHE_TAG_DIRTY)) { |
764 | for (i = 0; i < pagevec_count(&pvec); i++) { | 760 | for (i = 0; i < pagevec_count(&pvec); i++) { |
765 | bh = head = page_buffers(pvec.pages[i]); | 761 | bh = head = page_buffers(pvec.pages[i]); |
766 | do { | 762 | do { |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index addd7c5f2d3e..ab5105f9767e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -3585,8 +3585,6 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, | |||
3585 | * The easy case - we can just plop the record right in. | 3585 | * The easy case - we can just plop the record right in. |
3586 | */ | 3586 | */ |
3587 | *left_rec = *split_rec; | 3587 | *left_rec = *split_rec; |
3588 | |||
3589 | has_empty_extent = 0; | ||
3590 | } else | 3588 | } else |
3591 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | 3589 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); |
3592 | 3590 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 88a31e9340a0..d1516327b787 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -134,6 +134,19 @@ bail: | |||
134 | return err; | 134 | return err; |
135 | } | 135 | } |
136 | 136 | ||
137 | static int ocfs2_lock_get_block(struct inode *inode, sector_t iblock, | ||
138 | struct buffer_head *bh_result, int create) | ||
139 | { | ||
140 | int ret = 0; | ||
141 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
142 | |||
143 | down_read(&oi->ip_alloc_sem); | ||
144 | ret = ocfs2_get_block(inode, iblock, bh_result, create); | ||
145 | up_read(&oi->ip_alloc_sem); | ||
146 | |||
147 | return ret; | ||
148 | } | ||
149 | |||
137 | int ocfs2_get_block(struct inode *inode, sector_t iblock, | 150 | int ocfs2_get_block(struct inode *inode, sector_t iblock, |
138 | struct buffer_head *bh_result, int create) | 151 | struct buffer_head *bh_result, int create) |
139 | { | 152 | { |
@@ -2128,7 +2141,7 @@ static void ocfs2_dio_free_write_ctx(struct inode *inode, | |||
2128 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 2141 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
2129 | * fs_count, map_bh, dio->rw == WRITE); | 2142 | * fs_count, map_bh, dio->rw == WRITE); |
2130 | */ | 2143 | */ |
2131 | static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, | 2144 | static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, |
2132 | struct buffer_head *bh_result, int create) | 2145 | struct buffer_head *bh_result, int create) |
2133 | { | 2146 | { |
2134 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2147 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
@@ -2154,12 +2167,9 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, | |||
2154 | * while file size will be changed. | 2167 | * while file size will be changed. |
2155 | */ | 2168 | */ |
2156 | if (pos + total_len <= i_size_read(inode)) { | 2169 | if (pos + total_len <= i_size_read(inode)) { |
2157 | down_read(&oi->ip_alloc_sem); | ||
2158 | /* This is the fast path for re-write. */ | ||
2159 | ret = ocfs2_get_block(inode, iblock, bh_result, create); | ||
2160 | |||
2161 | up_read(&oi->ip_alloc_sem); | ||
2162 | 2170 | ||
2171 | /* This is the fast path for re-write. */ | ||
2172 | ret = ocfs2_lock_get_block(inode, iblock, bh_result, create); | ||
2163 | if (buffer_mapped(bh_result) && | 2173 | if (buffer_mapped(bh_result) && |
2164 | !buffer_new(bh_result) && | 2174 | !buffer_new(bh_result) && |
2165 | ret == 0) | 2175 | ret == 0) |
@@ -2424,9 +2434,9 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
2424 | return 0; | 2434 | return 0; |
2425 | 2435 | ||
2426 | if (iov_iter_rw(iter) == READ) | 2436 | if (iov_iter_rw(iter) == READ) |
2427 | get_block = ocfs2_get_block; | 2437 | get_block = ocfs2_lock_get_block; |
2428 | else | 2438 | else |
2429 | get_block = ocfs2_dio_get_block; | 2439 | get_block = ocfs2_dio_wr_get_block; |
2430 | 2440 | ||
2431 | return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | 2441 | return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, |
2432 | iter, get_block, | 2442 | iter, get_block, |
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index b97bcc6dde7c..b1bb70c8ca4d 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -28,9 +28,6 @@ | |||
28 | 28 | ||
29 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
30 | 30 | ||
31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, | ||
32 | int uptodate); | ||
33 | |||
34 | int ocfs2_write_block(struct ocfs2_super *osb, | 31 | int ocfs2_write_block(struct ocfs2_super *osb, |
35 | struct buffer_head *bh, | 32 | struct buffer_head *bh, |
36 | struct ocfs2_caching_info *ci); | 33 | struct ocfs2_caching_info *ci); |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 3ef5137dc362..a9e67efc0004 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -79,10 +79,8 @@ void o2hb_fill_node_map(unsigned long *map, | |||
79 | unsigned bytes); | 79 | unsigned bytes); |
80 | void o2hb_exit(void); | 80 | void o2hb_exit(void); |
81 | int o2hb_init(void); | 81 | int o2hb_init(void); |
82 | int o2hb_check_node_heartbeating(u8 node_num); | ||
83 | int o2hb_check_node_heartbeating_no_sem(u8 node_num); | 82 | int o2hb_check_node_heartbeating_no_sem(u8 node_num); |
84 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
85 | int o2hb_check_local_node_heartbeating(void); | ||
86 | void o2hb_stop_all_regions(void); | 84 | void o2hb_stop_all_regions(void); |
87 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); | 85 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); |
88 | int o2hb_global_heartbeat_active(void); | 86 | int o2hb_global_heartbeat_active(void); |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index a51200ece93d..da64c3a20eeb 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -40,6 +40,9 @@ char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { | |||
40 | "panic", /* O2NM_FENCE_PANIC */ | 40 | "panic", /* O2NM_FENCE_PANIC */ |
41 | }; | 41 | }; |
42 | 42 | ||
43 | static inline void o2nm_lock_subsystem(void); | ||
44 | static inline void o2nm_unlock_subsystem(void); | ||
45 | |||
43 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) | 46 | struct o2nm_node *o2nm_get_node_by_num(u8 node_num) |
44 | { | 47 | { |
45 | struct o2nm_node *node = NULL; | 48 | struct o2nm_node *node = NULL; |
@@ -181,7 +184,10 @@ static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) | |||
181 | { | 184 | { |
182 | /* through the first node_set .parent | 185 | /* through the first node_set .parent |
183 | * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */ | 186 | * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */ |
184 | return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); | 187 | if (node->nd_item.ci_parent) |
188 | return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent); | ||
189 | else | ||
190 | return NULL; | ||
185 | } | 191 | } |
186 | 192 | ||
187 | enum { | 193 | enum { |
@@ -194,7 +200,7 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, | |||
194 | size_t count) | 200 | size_t count) |
195 | { | 201 | { |
196 | struct o2nm_node *node = to_o2nm_node(item); | 202 | struct o2nm_node *node = to_o2nm_node(item); |
197 | struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); | 203 | struct o2nm_cluster *cluster; |
198 | unsigned long tmp; | 204 | unsigned long tmp; |
199 | char *p = (char *)page; | 205 | char *p = (char *)page; |
200 | int ret = 0; | 206 | int ret = 0; |
@@ -214,6 +220,13 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, | |||
214 | !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) | 220 | !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) |
215 | return -EINVAL; /* XXX */ | 221 | return -EINVAL; /* XXX */ |
216 | 222 | ||
223 | o2nm_lock_subsystem(); | ||
224 | cluster = to_o2nm_cluster_from_node(node); | ||
225 | if (!cluster) { | ||
226 | o2nm_unlock_subsystem(); | ||
227 | return -EINVAL; | ||
228 | } | ||
229 | |||
217 | write_lock(&cluster->cl_nodes_lock); | 230 | write_lock(&cluster->cl_nodes_lock); |
218 | if (cluster->cl_nodes[tmp]) | 231 | if (cluster->cl_nodes[tmp]) |
219 | ret = -EEXIST; | 232 | ret = -EEXIST; |
@@ -226,6 +239,8 @@ static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, | |||
226 | set_bit(tmp, cluster->cl_nodes_bitmap); | 239 | set_bit(tmp, cluster->cl_nodes_bitmap); |
227 | } | 240 | } |
228 | write_unlock(&cluster->cl_nodes_lock); | 241 | write_unlock(&cluster->cl_nodes_lock); |
242 | o2nm_unlock_subsystem(); | ||
243 | |||
229 | if (ret) | 244 | if (ret) |
230 | return ret; | 245 | return ret; |
231 | 246 | ||
@@ -269,7 +284,7 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, | |||
269 | size_t count) | 284 | size_t count) |
270 | { | 285 | { |
271 | struct o2nm_node *node = to_o2nm_node(item); | 286 | struct o2nm_node *node = to_o2nm_node(item); |
272 | struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); | 287 | struct o2nm_cluster *cluster; |
273 | int ret, i; | 288 | int ret, i; |
274 | struct rb_node **p, *parent; | 289 | struct rb_node **p, *parent; |
275 | unsigned int octets[4]; | 290 | unsigned int octets[4]; |
@@ -286,6 +301,13 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, | |||
286 | be32_add_cpu(&ipv4_addr, octets[i] << (i * 8)); | 301 | be32_add_cpu(&ipv4_addr, octets[i] << (i * 8)); |
287 | } | 302 | } |
288 | 303 | ||
304 | o2nm_lock_subsystem(); | ||
305 | cluster = to_o2nm_cluster_from_node(node); | ||
306 | if (!cluster) { | ||
307 | o2nm_unlock_subsystem(); | ||
308 | return -EINVAL; | ||
309 | } | ||
310 | |||
289 | ret = 0; | 311 | ret = 0; |
290 | write_lock(&cluster->cl_nodes_lock); | 312 | write_lock(&cluster->cl_nodes_lock); |
291 | if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) | 313 | if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) |
@@ -298,6 +320,8 @@ static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, | |||
298 | rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); | 320 | rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); |
299 | } | 321 | } |
300 | write_unlock(&cluster->cl_nodes_lock); | 322 | write_unlock(&cluster->cl_nodes_lock); |
323 | o2nm_unlock_subsystem(); | ||
324 | |||
301 | if (ret) | 325 | if (ret) |
302 | return ret; | 326 | return ret; |
303 | 327 | ||
@@ -315,7 +339,7 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, | |||
315 | size_t count) | 339 | size_t count) |
316 | { | 340 | { |
317 | struct o2nm_node *node = to_o2nm_node(item); | 341 | struct o2nm_node *node = to_o2nm_node(item); |
318 | struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); | 342 | struct o2nm_cluster *cluster; |
319 | unsigned long tmp; | 343 | unsigned long tmp; |
320 | char *p = (char *)page; | 344 | char *p = (char *)page; |
321 | ssize_t ret; | 345 | ssize_t ret; |
@@ -333,17 +357,26 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, | |||
333 | !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) | 357 | !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) |
334 | return -EINVAL; /* XXX */ | 358 | return -EINVAL; /* XXX */ |
335 | 359 | ||
360 | o2nm_lock_subsystem(); | ||
361 | cluster = to_o2nm_cluster_from_node(node); | ||
362 | if (!cluster) { | ||
363 | ret = -EINVAL; | ||
364 | goto out; | ||
365 | } | ||
366 | |||
336 | /* the only failure case is trying to set a new local node | 367 | /* the only failure case is trying to set a new local node |
337 | * when a different one is already set */ | 368 | * when a different one is already set */ |
338 | if (tmp && tmp == cluster->cl_has_local && | 369 | if (tmp && tmp == cluster->cl_has_local && |
339 | cluster->cl_local_node != node->nd_num) | 370 | cluster->cl_local_node != node->nd_num) { |
340 | return -EBUSY; | 371 | ret = -EBUSY; |
372 | goto out; | ||
373 | } | ||
341 | 374 | ||
342 | /* bring up the rx thread if we're setting the new local node. */ | 375 | /* bring up the rx thread if we're setting the new local node. */ |
343 | if (tmp && !cluster->cl_has_local) { | 376 | if (tmp && !cluster->cl_has_local) { |
344 | ret = o2net_start_listening(node); | 377 | ret = o2net_start_listening(node); |
345 | if (ret) | 378 | if (ret) |
346 | return ret; | 379 | goto out; |
347 | } | 380 | } |
348 | 381 | ||
349 | if (!tmp && cluster->cl_has_local && | 382 | if (!tmp && cluster->cl_has_local && |
@@ -358,7 +391,11 @@ static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, | |||
358 | cluster->cl_local_node = node->nd_num; | 391 | cluster->cl_local_node = node->nd_num; |
359 | } | 392 | } |
360 | 393 | ||
361 | return count; | 394 | ret = count; |
395 | |||
396 | out: | ||
397 | o2nm_unlock_subsystem(); | ||
398 | return ret; | ||
362 | } | 399 | } |
363 | 400 | ||
364 | CONFIGFS_ATTR(o2nm_node_, num); | 401 | CONFIGFS_ATTR(o2nm_node_, num); |
@@ -738,6 +775,16 @@ static struct o2nm_cluster_group o2nm_cluster_group = { | |||
738 | }, | 775 | }, |
739 | }; | 776 | }; |
740 | 777 | ||
778 | static inline void o2nm_lock_subsystem(void) | ||
779 | { | ||
780 | mutex_lock(&o2nm_cluster_group.cs_subsys.su_mutex); | ||
781 | } | ||
782 | |||
783 | static inline void o2nm_unlock_subsystem(void) | ||
784 | { | ||
785 | mutex_unlock(&o2nm_cluster_group.cs_subsys.su_mutex); | ||
786 | } | ||
787 | |||
741 | int o2nm_depend_item(struct config_item *item) | 788 | int o2nm_depend_item(struct config_item *item) |
742 | { | 789 | { |
743 | return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); | 790 | return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index a2b19fbdcf46..e1fea149f50b 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -394,7 +394,6 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm) | |||
394 | static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) | 394 | static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) |
395 | { | 395 | { |
396 | if (dlm->dlm_worker) { | 396 | if (dlm->dlm_worker) { |
397 | flush_workqueue(dlm->dlm_worker); | ||
398 | destroy_workqueue(dlm->dlm_worker); | 397 | destroy_workqueue(dlm->dlm_worker); |
399 | dlm->dlm_worker = NULL; | 398 | dlm->dlm_worker = NULL; |
400 | } | 399 | } |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3e04279446e8..9c3e0f13ca87 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2616,7 +2616,9 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2616 | * otherwise the assert_master from the new | 2616 | * otherwise the assert_master from the new |
2617 | * master will destroy this. | 2617 | * master will destroy this. |
2618 | */ | 2618 | */ |
2619 | dlm_get_mle_inuse(mle); | 2619 | if (ret != -EEXIST) |
2620 | dlm_get_mle_inuse(mle); | ||
2621 | |||
2620 | spin_unlock(&dlm->master_lock); | 2622 | spin_unlock(&dlm->master_lock); |
2621 | spin_unlock(&dlm->spinlock); | 2623 | spin_unlock(&dlm->spinlock); |
2622 | 2624 | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 74407c6dd592..ec8f75813beb 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2419,6 +2419,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2419 | dlm_lockres_put(res); | 2419 | dlm_lockres_put(res); |
2420 | continue; | 2420 | continue; |
2421 | } | 2421 | } |
2422 | dlm_move_lockres_to_recovery_list(dlm, res); | ||
2422 | } else if (res->owner == dlm->node_num) { | 2423 | } else if (res->owner == dlm->node_num) { |
2423 | dlm_free_dead_locks(dlm, res, dead_node); | 2424 | dlm_free_dead_locks(dlm, res, dead_node); |
2424 | __dlm_lockres_calc_usage(dlm, res); | 2425 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 988137de08f5..9c7c18c0e129 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -670,7 +670,6 @@ static void __exit exit_dlmfs_fs(void) | |||
670 | { | 670 | { |
671 | unregister_filesystem(&dlmfs_fs_type); | 671 | unregister_filesystem(&dlmfs_fs_type); |
672 | 672 | ||
673 | flush_workqueue(user_dlm_worker); | ||
674 | destroy_workqueue(user_dlm_worker); | 673 | destroy_workqueue(user_dlm_worker); |
675 | 674 | ||
676 | /* | 675 | /* |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6e41fc8fabbe..dc455d45a66a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1161,6 +1161,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1161 | } | 1161 | } |
1162 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 1162 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
1163 | if (size_change) { | 1163 | if (size_change) { |
1164 | /* | ||
1165 | * Here we should wait dio to finish before inode lock | ||
1166 | * to avoid a deadlock between ocfs2_setattr() and | ||
1167 | * ocfs2_dio_end_io_write() | ||
1168 | */ | ||
1169 | inode_dio_wait(inode); | ||
1170 | |||
1164 | status = ocfs2_rw_lock(inode, 1); | 1171 | status = ocfs2_rw_lock(inode, 1); |
1165 | if (status < 0) { | 1172 | if (status < 0) { |
1166 | mlog_errno(status); | 1173 | mlog_errno(status); |
@@ -1200,8 +1207,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1200 | if (status) | 1207 | if (status) |
1201 | goto bail_unlock; | 1208 | goto bail_unlock; |
1202 | 1209 | ||
1203 | inode_dio_wait(inode); | ||
1204 | |||
1205 | if (i_size_read(inode) >= attr->ia_size) { | 1210 | if (i_size_read(inode) >= attr->ia_size) { |
1206 | if (ocfs2_should_order_data(inode)) { | 1211 | if (ocfs2_should_order_data(inode)) { |
1207 | status = ocfs2_begin_ordered_truncate(inode, | 1212 | status = ocfs2_begin_ordered_truncate(inode, |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 71f22c8fbffd..9f0b95abc09f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1147,12 +1147,9 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
1147 | GLOBAL_BITMAP_SYSTEM_INODE, | 1147 | GLOBAL_BITMAP_SYSTEM_INODE, |
1148 | OCFS2_INVALID_SLOT, NULL, | 1148 | OCFS2_INVALID_SLOT, NULL, |
1149 | ALLOC_NEW_GROUP); | 1149 | ALLOC_NEW_GROUP); |
1150 | if (status < 0 && status != -ENOSPC) { | 1150 | if (status < 0 && status != -ENOSPC) |
1151 | mlog_errno(status); | 1151 | mlog_errno(status); |
1152 | goto bail; | ||
1153 | } | ||
1154 | 1152 | ||
1155 | bail: | ||
1156 | return status; | 1153 | return status; |
1157 | } | 1154 | } |
1158 | 1155 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 80733496b22a..040bbb6a6e4b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -2521,10 +2521,8 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
2521 | /* This function assumes that the caller has the main osb resource */ | 2521 | /* This function assumes that the caller has the main osb resource */ |
2522 | 2522 | ||
2523 | /* ocfs2_initializer_super have already created this workqueue */ | 2523 | /* ocfs2_initializer_super have already created this workqueue */ |
2524 | if (osb->ocfs2_wq) { | 2524 | if (osb->ocfs2_wq) |
2525 | flush_workqueue(osb->ocfs2_wq); | ||
2526 | destroy_workqueue(osb->ocfs2_wq); | 2525 | destroy_workqueue(osb->ocfs2_wq); |
2527 | } | ||
2528 | 2526 | ||
2529 | ocfs2_free_slot_info(osb); | 2527 | ocfs2_free_slot_info(osb); |
2530 | 2528 | ||
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index b023e4f3d740..d4550c8bbc41 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h | |||
@@ -26,9 +26,6 @@ | |||
26 | #ifndef OCFS2_SUPER_H | 26 | #ifndef OCFS2_SUPER_H |
27 | #define OCFS2_SUPER_H | 27 | #define OCFS2_SUPER_H |
28 | 28 | ||
29 | int ocfs2_publish_get_mount_state(struct ocfs2_super *osb, | ||
30 | int node_num); | ||
31 | |||
32 | __printf(3, 4) | 29 | __printf(3, 4) |
33 | int __ocfs2_error(struct super_block *sb, const char *function, | 30 | int __ocfs2_error(struct super_block *sb, const char *function, |
34 | const char *fmt, ...); | 31 | const char *fmt, ...); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6744bd706ecf..875231c36cb3 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 27 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
28 | { | 28 | { |
29 | unsigned long text, lib, swap, ptes, pmds, anon, file, shmem; | 29 | unsigned long text, lib, swap, anon, file, shmem; |
30 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 30 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; |
31 | 31 | ||
32 | anon = get_mm_counter(mm, MM_ANONPAGES); | 32 | anon = get_mm_counter(mm, MM_ANONPAGES); |
@@ -50,8 +50,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
50 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 50 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; |
51 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 51 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; |
52 | swap = get_mm_counter(mm, MM_SWAPENTS); | 52 | swap = get_mm_counter(mm, MM_SWAPENTS); |
53 | ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes); | ||
54 | pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm); | ||
55 | seq_printf(m, | 53 | seq_printf(m, |
56 | "VmPeak:\t%8lu kB\n" | 54 | "VmPeak:\t%8lu kB\n" |
57 | "VmSize:\t%8lu kB\n" | 55 | "VmSize:\t%8lu kB\n" |
@@ -67,7 +65,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
67 | "VmExe:\t%8lu kB\n" | 65 | "VmExe:\t%8lu kB\n" |
68 | "VmLib:\t%8lu kB\n" | 66 | "VmLib:\t%8lu kB\n" |
69 | "VmPTE:\t%8lu kB\n" | 67 | "VmPTE:\t%8lu kB\n" |
70 | "VmPMD:\t%8lu kB\n" | ||
71 | "VmSwap:\t%8lu kB\n", | 68 | "VmSwap:\t%8lu kB\n", |
72 | hiwater_vm << (PAGE_SHIFT-10), | 69 | hiwater_vm << (PAGE_SHIFT-10), |
73 | total_vm << (PAGE_SHIFT-10), | 70 | total_vm << (PAGE_SHIFT-10), |
@@ -80,8 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
80 | shmem << (PAGE_SHIFT-10), | 77 | shmem << (PAGE_SHIFT-10), |
81 | mm->data_vm << (PAGE_SHIFT-10), | 78 | mm->data_vm << (PAGE_SHIFT-10), |
82 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 79 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, |
83 | ptes >> 10, | 80 | mm_pgtables_bytes(mm) >> 10, |
84 | pmds >> 10, | ||
85 | swap << (PAGE_SHIFT-10)); | 81 | swap << (PAGE_SHIFT-10)); |
86 | hugetlb_report_usage(m, mm); | 82 | hugetlb_report_usage(m, mm); |
87 | } | 83 | } |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f46d133c0949..ac9a4e65ca49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -668,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) | |||
668 | ctx->features = octx->features; | 668 | ctx->features = octx->features; |
669 | ctx->released = false; | 669 | ctx->released = false; |
670 | ctx->mm = vma->vm_mm; | 670 | ctx->mm = vma->vm_mm; |
671 | atomic_inc(&ctx->mm->mm_count); | 671 | mmgrab(ctx->mm); |
672 | 672 | ||
673 | userfaultfd_ctx_get(octx); | 673 | userfaultfd_ctx_get(octx); |
674 | fctx->orig = octx; | 674 | fctx->orig = octx; |
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 758f37ac5ad3..4b87472f35bc 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -104,7 +104,7 @@ kmem_zone_init(int size, char *zone_name) | |||
104 | } | 104 | } |
105 | 105 | ||
106 | static inline kmem_zone_t * | 106 | static inline kmem_zone_t * |
107 | kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, | 107 | kmem_zone_init_flags(int size, char *zone_name, slab_flags_t flags, |
108 | void (*construct)(void *)) | 108 | void (*construct)(void *)) |
109 | { | 109 | { |
110 | return kmem_cache_create(zone_name, size, 0, flags, construct); | 110 | return kmem_cache_create(zone_name, size, 0, flags, construct); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f41ca8486e02..e54e7e0033eb 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -93,7 +93,7 @@ extern void wb_writeout_inc(struct bdi_writeback *wb); | |||
93 | /* | 93 | /* |
94 | * maximal error of a stat counter. | 94 | * maximal error of a stat counter. |
95 | */ | 95 | */ |
96 | static inline unsigned long wb_stat_error(struct bdi_writeback *wb) | 96 | static inline unsigned long wb_stat_error(void) |
97 | { | 97 | { |
98 | #ifdef CONFIG_SMP | 98 | #ifdef CONFIG_SMP |
99 | return nr_cpu_ids * WB_STAT_BATCH; | 99 | return nr_cpu_ids * WB_STAT_BATCH; |
@@ -122,6 +122,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | |||
122 | * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. | 122 | * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. |
123 | * | 123 | * |
124 | * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. | 124 | * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. |
125 | * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be | ||
126 | * inefficient. | ||
125 | */ | 127 | */ |
126 | #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 | 128 | #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 |
127 | #define BDI_CAP_NO_WRITEBACK 0x00000002 | 129 | #define BDI_CAP_NO_WRITEBACK 0x00000002 |
@@ -129,6 +131,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | |||
129 | #define BDI_CAP_STABLE_WRITES 0x00000008 | 131 | #define BDI_CAP_STABLE_WRITES 0x00000008 |
130 | #define BDI_CAP_STRICTLIMIT 0x00000010 | 132 | #define BDI_CAP_STRICTLIMIT 0x00000010 |
131 | #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 | 133 | #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 |
134 | #define BDI_CAP_SYNCHRONOUS_IO 0x00000040 | ||
132 | 135 | ||
133 | #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ | 136 | #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ |
134 | (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) | 137 | (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) |
@@ -174,6 +177,11 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) | |||
174 | long congestion_wait(int sync, long timeout); | 177 | long congestion_wait(int sync, long timeout); |
175 | long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); | 178 | long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); |
176 | 179 | ||
180 | static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) | ||
181 | { | ||
182 | return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; | ||
183 | } | ||
184 | |||
177 | static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) | 185 | static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) |
178 | { | 186 | { |
179 | return bdi->capabilities & BDI_CAP_STABLE_WRITES; | 187 | return bdi->capabilities & BDI_CAP_STABLE_WRITES; |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index fdf40ca04b3c..a53063e9d7d8 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
@@ -161,6 +161,9 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, | |||
161 | #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) | 161 | #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) |
162 | 162 | ||
163 | /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ | 163 | /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ |
164 | void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, | ||
165 | phys_addr_t min_addr, | ||
166 | phys_addr_t max_addr, int nid); | ||
164 | void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size, | 167 | void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size, |
165 | phys_addr_t align, phys_addr_t min_addr, | 168 | phys_addr_t align, phys_addr_t min_addr, |
166 | phys_addr_t max_addr, int nid); | 169 | phys_addr_t max_addr, int nid); |
@@ -177,6 +180,14 @@ static inline void * __init memblock_virt_alloc( | |||
177 | NUMA_NO_NODE); | 180 | NUMA_NO_NODE); |
178 | } | 181 | } |
179 | 182 | ||
183 | static inline void * __init memblock_virt_alloc_raw( | ||
184 | phys_addr_t size, phys_addr_t align) | ||
185 | { | ||
186 | return memblock_virt_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, | ||
187 | BOOTMEM_ALLOC_ACCESSIBLE, | ||
188 | NUMA_NO_NODE); | ||
189 | } | ||
190 | |||
180 | static inline void * __init memblock_virt_alloc_nopanic( | 191 | static inline void * __init memblock_virt_alloc_nopanic( |
181 | phys_addr_t size, phys_addr_t align) | 192 | phys_addr_t size, phys_addr_t align) |
182 | { | 193 | { |
@@ -258,6 +269,14 @@ static inline void * __init memblock_virt_alloc( | |||
258 | return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT); | 269 | return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT); |
259 | } | 270 | } |
260 | 271 | ||
272 | static inline void * __init memblock_virt_alloc_raw( | ||
273 | phys_addr_t size, phys_addr_t align) | ||
274 | { | ||
275 | if (!align) | ||
276 | align = SMP_CACHE_BYTES; | ||
277 | return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT); | ||
278 | } | ||
279 | |||
261 | static inline void * __init memblock_virt_alloc_nopanic( | 280 | static inline void * __init memblock_virt_alloc_nopanic( |
262 | phys_addr_t size, phys_addr_t align) | 281 | phys_addr_t size, phys_addr_t align) |
263 | { | 282 | { |
@@ -310,6 +329,14 @@ static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size, | |||
310 | min_addr); | 329 | min_addr); |
311 | } | 330 | } |
312 | 331 | ||
332 | static inline void * __init memblock_virt_alloc_try_nid_raw( | ||
333 | phys_addr_t size, phys_addr_t align, | ||
334 | phys_addr_t min_addr, phys_addr_t max_addr, int nid) | ||
335 | { | ||
336 | return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align, | ||
337 | min_addr, max_addr); | ||
338 | } | ||
339 | |||
313 | static inline void * __init memblock_virt_alloc_try_nid_nopanic( | 340 | static inline void * __init memblock_virt_alloc_try_nid_nopanic( |
314 | phys_addr_t size, phys_addr_t align, | 341 | phys_addr_t size, phys_addr_t align, |
315 | phys_addr_t min_addr, phys_addr_t max_addr, int nid) | 342 | phys_addr_t min_addr, phys_addr_t max_addr, int nid) |
diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 4efabcb51347..f2736348ca26 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h | |||
@@ -9,8 +9,6 @@ | |||
9 | * the Free Software Foundation | 9 | * the Free Software Foundation |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/kmemcheck.h> | ||
13 | |||
14 | #define C2PORT_NAME_LEN 32 | 12 | #define C2PORT_NAME_LEN 32 |
15 | 13 | ||
16 | struct device; | 14 | struct device; |
@@ -22,10 +20,8 @@ struct device; | |||
22 | /* Main struct */ | 20 | /* Main struct */ |
23 | struct c2port_ops; | 21 | struct c2port_ops; |
24 | struct c2port_device { | 22 | struct c2port_device { |
25 | kmemcheck_bitfield_begin(flags); | ||
26 | unsigned int access:1; | 23 | unsigned int access:1; |
27 | unsigned int flash_access:1; | 24 | unsigned int flash_access:1; |
28 | kmemcheck_bitfield_end(flags); | ||
29 | 25 | ||
30 | int id; | 26 | int id; |
31 | char name[C2PORT_NAME_LEN]; | 27 | char name[C2PORT_NAME_LEN]; |
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eee1499db396..e8f8e8fb244d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/dma-debug.h> | 9 | #include <linux/dma-debug.h> |
10 | #include <linux/dma-direction.h> | 10 | #include <linux/dma-direction.h> |
11 | #include <linux/scatterlist.h> | 11 | #include <linux/scatterlist.h> |
12 | #include <linux/kmemcheck.h> | ||
13 | #include <linux/bug.h> | 12 | #include <linux/bug.h> |
14 | #include <linux/mem_encrypt.h> | 13 | #include <linux/mem_encrypt.h> |
15 | 14 | ||
@@ -232,7 +231,6 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, | |||
232 | const struct dma_map_ops *ops = get_dma_ops(dev); | 231 | const struct dma_map_ops *ops = get_dma_ops(dev); |
233 | dma_addr_t addr; | 232 | dma_addr_t addr; |
234 | 233 | ||
235 | kmemcheck_mark_initialized(ptr, size); | ||
236 | BUG_ON(!valid_dma_direction(dir)); | 234 | BUG_ON(!valid_dma_direction(dir)); |
237 | addr = ops->map_page(dev, virt_to_page(ptr), | 235 | addr = ops->map_page(dev, virt_to_page(ptr), |
238 | offset_in_page(ptr), size, | 236 | offset_in_page(ptr), size, |
@@ -265,11 +263,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, | |||
265 | unsigned long attrs) | 263 | unsigned long attrs) |
266 | { | 264 | { |
267 | const struct dma_map_ops *ops = get_dma_ops(dev); | 265 | const struct dma_map_ops *ops = get_dma_ops(dev); |
268 | int i, ents; | 266 | int ents; |
269 | struct scatterlist *s; | ||
270 | 267 | ||
271 | for_each_sg(sg, s, nents, i) | ||
272 | kmemcheck_mark_initialized(sg_virt(s), s->length); | ||
273 | BUG_ON(!valid_dma_direction(dir)); | 268 | BUG_ON(!valid_dma_direction(dir)); |
274 | ents = ops->map_sg(dev, sg, nents, dir, attrs); | 269 | ents = ops->map_sg(dev, sg, nents, dir, attrs); |
275 | BUG_ON(ents < 0); | 270 | BUG_ON(ents < 0); |
@@ -299,7 +294,6 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, | |||
299 | const struct dma_map_ops *ops = get_dma_ops(dev); | 294 | const struct dma_map_ops *ops = get_dma_ops(dev); |
300 | dma_addr_t addr; | 295 | dma_addr_t addr; |
301 | 296 | ||
302 | kmemcheck_mark_initialized(page_address(page) + offset, size); | ||
303 | BUG_ON(!valid_dma_direction(dir)); | 297 | BUG_ON(!valid_dma_direction(dir)); |
304 | addr = ops->map_page(dev, page, offset, size, dir, attrs); | 298 | addr = ops->map_page(dev, page, offset, size, dir, attrs); |
305 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | 299 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); |
diff --git a/include/linux/filter.h b/include/linux/filter.h index 0cd02ff4ae30..80b5b482cb46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -454,13 +454,11 @@ struct bpf_binary_header { | |||
454 | 454 | ||
455 | struct bpf_prog { | 455 | struct bpf_prog { |
456 | u16 pages; /* Number of allocated pages */ | 456 | u16 pages; /* Number of allocated pages */ |
457 | kmemcheck_bitfield_begin(meta); | ||
458 | u16 jited:1, /* Is our filter JIT'ed? */ | 457 | u16 jited:1, /* Is our filter JIT'ed? */ |
459 | locked:1, /* Program image locked? */ | 458 | locked:1, /* Program image locked? */ |
460 | gpl_compatible:1, /* Is filter GPL compatible? */ | 459 | gpl_compatible:1, /* Is filter GPL compatible? */ |
461 | cb_access:1, /* Is control block accessed? */ | 460 | cb_access:1, /* Is control block accessed? */ |
462 | dst_needed:1; /* Do we need dst entry? */ | 461 | dst_needed:1; /* Do we need dst entry? */ |
463 | kmemcheck_bitfield_end(meta); | ||
464 | enum bpf_prog_type type; /* Type of BPF program */ | 462 | enum bpf_prog_type type; /* Type of BPF program */ |
465 | u32 len; /* Number of filter blocks */ | 463 | u32 len; /* Number of filter blocks */ |
466 | u32 jited_len; /* Size of jited insns in bytes */ | 464 | u32 jited_len; /* Size of jited insns in bytes */ |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 710143741eb5..1a4582b44d32 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -24,7 +24,6 @@ struct vm_area_struct; | |||
24 | #define ___GFP_HIGH 0x20u | 24 | #define ___GFP_HIGH 0x20u |
25 | #define ___GFP_IO 0x40u | 25 | #define ___GFP_IO 0x40u |
26 | #define ___GFP_FS 0x80u | 26 | #define ___GFP_FS 0x80u |
27 | #define ___GFP_COLD 0x100u | ||
28 | #define ___GFP_NOWARN 0x200u | 27 | #define ___GFP_NOWARN 0x200u |
29 | #define ___GFP_RETRY_MAYFAIL 0x400u | 28 | #define ___GFP_RETRY_MAYFAIL 0x400u |
30 | #define ___GFP_NOFAIL 0x800u | 29 | #define ___GFP_NOFAIL 0x800u |
@@ -37,7 +36,6 @@ struct vm_area_struct; | |||
37 | #define ___GFP_THISNODE 0x40000u | 36 | #define ___GFP_THISNODE 0x40000u |
38 | #define ___GFP_ATOMIC 0x80000u | 37 | #define ___GFP_ATOMIC 0x80000u |
39 | #define ___GFP_ACCOUNT 0x100000u | 38 | #define ___GFP_ACCOUNT 0x100000u |
40 | #define ___GFP_NOTRACK 0x200000u | ||
41 | #define ___GFP_DIRECT_RECLAIM 0x400000u | 39 | #define ___GFP_DIRECT_RECLAIM 0x400000u |
42 | #define ___GFP_WRITE 0x800000u | 40 | #define ___GFP_WRITE 0x800000u |
43 | #define ___GFP_KSWAPD_RECLAIM 0x1000000u | 41 | #define ___GFP_KSWAPD_RECLAIM 0x1000000u |
@@ -193,27 +191,15 @@ struct vm_area_struct; | |||
193 | /* | 191 | /* |
194 | * Action modifiers | 192 | * Action modifiers |
195 | * | 193 | * |
196 | * __GFP_COLD indicates that the caller does not expect to be used in the near | ||
197 | * future. Where possible, a cache-cold page will be returned. | ||
198 | * | ||
199 | * __GFP_NOWARN suppresses allocation failure reports. | 194 | * __GFP_NOWARN suppresses allocation failure reports. |
200 | * | 195 | * |
201 | * __GFP_COMP address compound page metadata. | 196 | * __GFP_COMP address compound page metadata. |
202 | * | 197 | * |
203 | * __GFP_ZERO returns a zeroed page on success. | 198 | * __GFP_ZERO returns a zeroed page on success. |
204 | * | ||
205 | * __GFP_NOTRACK avoids tracking with kmemcheck. | ||
206 | * | ||
207 | * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of | ||
208 | * distinguishing in the source between false positives and allocations that | ||
209 | * cannot be supported (e.g. page tables). | ||
210 | */ | 199 | */ |
211 | #define __GFP_COLD ((__force gfp_t)___GFP_COLD) | ||
212 | #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) | 200 | #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) |
213 | #define __GFP_COMP ((__force gfp_t)___GFP_COMP) | 201 | #define __GFP_COMP ((__force gfp_t)___GFP_COMP) |
214 | #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) | 202 | #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) |
215 | #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) | ||
216 | #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) | ||
217 | 203 | ||
218 | /* Disable lockdep for GFP context tracking */ | 204 | /* Disable lockdep for GFP context tracking */ |
219 | #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) | 205 | #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) |
@@ -539,8 +525,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); | |||
539 | 525 | ||
540 | extern void __free_pages(struct page *page, unsigned int order); | 526 | extern void __free_pages(struct page *page, unsigned int order); |
541 | extern void free_pages(unsigned long addr, unsigned int order); | 527 | extern void free_pages(unsigned long addr, unsigned int order); |
542 | extern void free_hot_cold_page(struct page *page, bool cold); | 528 | extern void free_unref_page(struct page *page); |
543 | extern void free_hot_cold_page_list(struct list_head *list, bool cold); | 529 | extern void free_unref_page_list(struct list_head *list); |
544 | 530 | ||
545 | struct page_frag_cache; | 531 | struct page_frag_cache; |
546 | extern void __page_frag_cache_drain(struct page *page, unsigned int count); | 532 | extern void __page_frag_cache_drain(struct page *page, unsigned int count); |
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 96e69979f84d..325017ad9311 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h | |||
@@ -471,9 +471,9 @@ static inline void hmm_devmem_page_set_drvdata(struct page *page, | |||
471 | * @page: pointer to struct page | 471 | * @page: pointer to struct page |
472 | * Return: driver data value | 472 | * Return: driver data value |
473 | */ | 473 | */ |
474 | static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) | 474 | static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) |
475 | { | 475 | { |
476 | unsigned long *drvdata = (unsigned long *)&page->pgmap; | 476 | const unsigned long *drvdata = (const unsigned long *)&page->pgmap; |
477 | 477 | ||
478 | return drvdata[1]; | 478 | return drvdata[1]; |
479 | } | 479 | } |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index baeb872283d9..69c238210325 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) | |||
594 | __tasklet_hi_schedule(t); | 594 | __tasklet_hi_schedule(t); |
595 | } | 595 | } |
596 | 596 | ||
597 | extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); | ||
598 | |||
599 | /* | ||
600 | * This version avoids touching any other tasklets. Needed for kmemcheck | ||
601 | * in order not to take any page faults while enqueueing this tasklet; | ||
602 | * consider VERY carefully whether you really need this or | ||
603 | * tasklet_hi_schedule()... | ||
604 | */ | ||
605 | static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
606 | { | ||
607 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) | ||
608 | __tasklet_hi_schedule_first(t); | ||
609 | } | ||
610 | |||
611 | |||
612 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) | 597 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) |
613 | { | 598 | { |
614 | atomic_inc(&t->count); | 599 | atomic_inc(&t->count); |
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5017269e3f04..e3eb834c9a35 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h | |||
@@ -46,7 +46,7 @@ void kasan_alloc_pages(struct page *page, unsigned int order); | |||
46 | void kasan_free_pages(struct page *page, unsigned int order); | 46 | void kasan_free_pages(struct page *page, unsigned int order); |
47 | 47 | ||
48 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, | 48 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, |
49 | unsigned long *flags); | 49 | slab_flags_t *flags); |
50 | void kasan_cache_shrink(struct kmem_cache *cache); | 50 | void kasan_cache_shrink(struct kmem_cache *cache); |
51 | void kasan_cache_shutdown(struct kmem_cache *cache); | 51 | void kasan_cache_shutdown(struct kmem_cache *cache); |
52 | 52 | ||
@@ -95,7 +95,7 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {} | |||
95 | 95 | ||
96 | static inline void kasan_cache_create(struct kmem_cache *cache, | 96 | static inline void kasan_cache_create(struct kmem_cache *cache, |
97 | size_t *size, | 97 | size_t *size, |
98 | unsigned long *flags) {} | 98 | slab_flags_t *flags) {} |
99 | static inline void kasan_cache_shrink(struct kmem_cache *cache) {} | 99 | static inline void kasan_cache_shrink(struct kmem_cache *cache) {} |
100 | static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} | 100 | static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} |
101 | 101 | ||
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 7b1d7bead7d9..ea32a7d3cf1b 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h | |||
@@ -1,172 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef LINUX_KMEMCHECK_H | ||
3 | #define LINUX_KMEMCHECK_H | ||
4 | |||
5 | #include <linux/mm_types.h> | ||
6 | #include <linux/types.h> | ||
7 | |||
8 | #ifdef CONFIG_KMEMCHECK | ||
9 | extern int kmemcheck_enabled; | ||
10 | |||
11 | /* The slab-related functions. */ | ||
12 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); | ||
13 | void kmemcheck_free_shadow(struct page *page, int order); | ||
14 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
15 | size_t size); | ||
16 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); | ||
17 | |||
18 | void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, | ||
19 | gfp_t gfpflags); | ||
20 | |||
21 | void kmemcheck_show_pages(struct page *p, unsigned int n); | ||
22 | void kmemcheck_hide_pages(struct page *p, unsigned int n); | ||
23 | |||
24 | bool kmemcheck_page_is_tracked(struct page *p); | ||
25 | |||
26 | void kmemcheck_mark_unallocated(void *address, unsigned int n); | ||
27 | void kmemcheck_mark_uninitialized(void *address, unsigned int n); | ||
28 | void kmemcheck_mark_initialized(void *address, unsigned int n); | ||
29 | void kmemcheck_mark_freed(void *address, unsigned int n); | ||
30 | |||
31 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); | ||
32 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); | ||
33 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); | ||
34 | |||
35 | int kmemcheck_show_addr(unsigned long address); | ||
36 | int kmemcheck_hide_addr(unsigned long address); | ||
37 | |||
38 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); | ||
39 | |||
40 | /* | ||
41 | * Bitfield annotations | ||
42 | * | ||
43 | * How to use: If you have a struct using bitfields, for example | ||
44 | * | ||
45 | * struct a { | ||
46 | * int x:8, y:8; | ||
47 | * }; | ||
48 | * | ||
49 | * then this should be rewritten as | ||
50 | * | ||
51 | * struct a { | ||
52 | * kmemcheck_bitfield_begin(flags); | ||
53 | * int x:8, y:8; | ||
54 | * kmemcheck_bitfield_end(flags); | ||
55 | * }; | ||
56 | * | ||
57 | * Now the "flags_begin" and "flags_end" members may be used to refer to the | ||
58 | * beginning and end, respectively, of the bitfield (and things like | ||
59 | * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- | ||
60 | * fields should be annotated: | ||
61 | * | ||
62 | * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); | ||
63 | * kmemcheck_annotate_bitfield(a, flags); | ||
64 | */ | ||
65 | #define kmemcheck_bitfield_begin(name) \ | ||
66 | int name##_begin[0]; | ||
67 | |||
68 | #define kmemcheck_bitfield_end(name) \ | ||
69 | int name##_end[0]; | ||
70 | |||
71 | #define kmemcheck_annotate_bitfield(ptr, name) \ | ||
72 | do { \ | ||
73 | int _n; \ | ||
74 | \ | ||
75 | if (!ptr) \ | ||
76 | break; \ | ||
77 | \ | ||
78 | _n = (long) &((ptr)->name##_end) \ | ||
79 | - (long) &((ptr)->name##_begin); \ | ||
80 | BUILD_BUG_ON(_n < 0); \ | ||
81 | \ | ||
82 | kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ | ||
83 | } while (0) | ||
84 | |||
85 | #define kmemcheck_annotate_variable(var) \ | ||
86 | do { \ | ||
87 | kmemcheck_mark_initialized(&(var), sizeof(var)); \ | ||
88 | } while (0) \ | ||
89 | |||
90 | #else | ||
91 | #define kmemcheck_enabled 0 | ||
92 | |||
93 | static inline void | ||
94 | kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static inline void | ||
99 | kmemcheck_free_shadow(struct page *page, int order) | ||
100 | { | ||
101 | } | ||
102 | |||
103 | static inline void | ||
104 | kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
105 | size_t size) | ||
106 | { | ||
107 | } | ||
108 | |||
109 | static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, | ||
110 | size_t size) | ||
111 | { | ||
112 | } | ||
113 | |||
114 | static inline void kmemcheck_pagealloc_alloc(struct page *p, | ||
115 | unsigned int order, gfp_t gfpflags) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline bool kmemcheck_page_is_tracked(struct page *p) | ||
120 | { | ||
121 | return false; | ||
122 | } | ||
123 | |||
124 | static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
125 | { | ||
126 | } | ||
127 | |||
128 | static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
129 | { | ||
130 | } | ||
131 | |||
132 | static inline void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
133 | { | ||
134 | } | ||
135 | |||
136 | static inline void kmemcheck_mark_freed(void *address, unsigned int n) | ||
137 | { | ||
138 | } | ||
139 | |||
140 | static inline void kmemcheck_mark_unallocated_pages(struct page *p, | ||
141 | unsigned int n) | ||
142 | { | ||
143 | } | ||
144 | |||
145 | static inline void kmemcheck_mark_uninitialized_pages(struct page *p, | ||
146 | unsigned int n) | ||
147 | { | ||
148 | } | ||
149 | |||
150 | static inline void kmemcheck_mark_initialized_pages(struct page *p, | ||
151 | unsigned int n) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
156 | { | ||
157 | return true; | ||
158 | } | ||
159 | |||
160 | #define kmemcheck_bitfield_begin(name) | ||
161 | #define kmemcheck_bitfield_end(name) | ||
162 | #define kmemcheck_annotate_bitfield(ptr, name) \ | ||
163 | do { \ | ||
164 | } while (0) | ||
165 | |||
166 | #define kmemcheck_annotate_variable(var) \ | ||
167 | do { \ | ||
168 | } while (0) | ||
169 | |||
170 | #endif /* CONFIG_KMEMCHECK */ | ||
171 | |||
172 | #endif /* LINUX_KMEMCHECK_H */ | ||
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 590343f6c1b1..5ac416e2d339 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h | |||
@@ -48,14 +48,14 @@ extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; | |||
48 | extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; | 48 | extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; |
49 | 49 | ||
50 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, | 50 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, |
51 | int min_count, unsigned long flags, | 51 | int min_count, slab_flags_t flags, |
52 | gfp_t gfp) | 52 | gfp_t gfp) |
53 | { | 53 | { |
54 | if (!(flags & SLAB_NOLEAKTRACE)) | 54 | if (!(flags & SLAB_NOLEAKTRACE)) |
55 | kmemleak_alloc(ptr, size, min_count, gfp); | 55 | kmemleak_alloc(ptr, size, min_count, gfp); |
56 | } | 56 | } |
57 | 57 | ||
58 | static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) | 58 | static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) |
59 | { | 59 | { |
60 | if (!(flags & SLAB_NOLEAKTRACE)) | 60 | if (!(flags & SLAB_NOLEAKTRACE)) |
61 | kmemleak_free(ptr); | 61 | kmemleak_free(ptr); |
@@ -76,7 +76,7 @@ static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, | |||
76 | { | 76 | { |
77 | } | 77 | } |
78 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, | 78 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, |
79 | int min_count, unsigned long flags, | 79 | int min_count, slab_flags_t flags, |
80 | gfp_t gfp) | 80 | gfp_t gfp) |
81 | { | 81 | { |
82 | } | 82 | } |
@@ -94,7 +94,7 @@ static inline void kmemleak_free(const void *ptr) | |||
94 | static inline void kmemleak_free_part(const void *ptr, size_t size) | 94 | static inline void kmemleak_free_part(const void *ptr, size_t size) |
95 | { | 95 | { |
96 | } | 96 | } |
97 | static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) | 97 | static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) |
98 | { | 98 | { |
99 | } | 99 | } |
100 | static inline void kmemleak_free_percpu(const void __percpu *ptr) | 100 | static inline void kmemleak_free_percpu(const void __percpu *ptr) |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bae11c7e7bf3..7ed0f7782d16 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -237,6 +237,22 @@ unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); | |||
237 | for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ | 237 | for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ |
238 | nid, flags, p_start, p_end, p_nid) | 238 | nid, flags, p_start, p_end, p_nid) |
239 | 239 | ||
240 | /** | ||
241 | * for_each_resv_unavail_range - iterate through reserved and unavailable memory | ||
242 | * @i: u64 used as loop variable | ||
243 | * @flags: pick from blocks based on memory attributes | ||
244 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | ||
245 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | ||
246 | * | ||
247 | * Walks over unavailable but reserved (reserved && !memory) areas of memblock. | ||
248 | * Available as soon as memblock is initialized. | ||
249 | * Note: because this memory does not belong to any physical node, flags and | ||
250 | * nid arguments do not make sense and thus not exported as arguments. | ||
251 | */ | ||
252 | #define for_each_resv_unavail_range(i, p_start, p_end) \ | ||
253 | for_each_mem_range(i, &memblock.reserved, &memblock.memory, \ | ||
254 | NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL) | ||
255 | |||
240 | static inline void memblock_set_region_flags(struct memblock_region *r, | 256 | static inline void memblock_set_region_flags(struct memblock_region *r, |
241 | unsigned long flags) | 257 | unsigned long flags) |
242 | { | 258 | { |
@@ -389,10 +405,10 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo | |||
389 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ | 405 | region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ |
390 | region++) | 406 | region++) |
391 | 407 | ||
392 | #define for_each_memblock_type(memblock_type, rgn) \ | 408 | #define for_each_memblock_type(i, memblock_type, rgn) \ |
393 | for (idx = 0, rgn = &memblock_type->regions[0]; \ | 409 | for (i = 0, rgn = &memblock_type->regions[0]; \ |
394 | idx < memblock_type->cnt; \ | 410 | i < memblock_type->cnt; \ |
395 | idx++, rgn = &memblock_type->regions[idx]) | 411 | i++, rgn = &memblock_type->regions[i]) |
396 | 412 | ||
397 | #ifdef CONFIG_MEMTEST | 413 | #ifdef CONFIG_MEMTEST |
398 | extern void early_memtest(phys_addr_t start, phys_addr_t end); | 414 | extern void early_memtest(phys_addr_t start, phys_addr_t end); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 91b46f99b4d2..c7b1d617dff6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -96,6 +96,15 @@ extern int mmap_rnd_compat_bits __read_mostly; | |||
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * On some architectures it is expensive to call memset() for small sizes. | ||
100 | * Those architectures should provide their own implementation of "struct page" | ||
101 | * zeroing by defining this macro in <asm/pgtable.h>. | ||
102 | */ | ||
103 | #ifndef mm_zero_struct_page | ||
104 | #define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page))) | ||
105 | #endif | ||
106 | |||
107 | /* | ||
99 | * Default maximum number of active map areas, this limits the number of vmas | 108 | * Default maximum number of active map areas, this limits the number of vmas |
100 | * per mm struct. Users can overwrite this number by sysctl but there is a | 109 | * per mm struct. Users can overwrite this number by sysctl but there is a |
101 | * problem. | 110 | * problem. |
@@ -1431,7 +1440,13 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, | |||
1431 | struct bdi_writeback *wb); | 1440 | struct bdi_writeback *wb); |
1432 | int set_page_dirty(struct page *page); | 1441 | int set_page_dirty(struct page *page); |
1433 | int set_page_dirty_lock(struct page *page); | 1442 | int set_page_dirty_lock(struct page *page); |
1434 | void cancel_dirty_page(struct page *page); | 1443 | void __cancel_dirty_page(struct page *page); |
1444 | static inline void cancel_dirty_page(struct page *page) | ||
1445 | { | ||
1446 | /* Avoid atomic ops, locking, etc. when not actually needed. */ | ||
1447 | if (PageDirty(page)) | ||
1448 | __cancel_dirty_page(page); | ||
1449 | } | ||
1435 | int clear_page_dirty_for_io(struct page *page); | 1450 | int clear_page_dirty_for_io(struct page *page); |
1436 | 1451 | ||
1437 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); | 1452 | int get_cmdline(struct task_struct *task, char *buffer, int buflen); |
@@ -1599,26 +1614,32 @@ static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, | |||
1599 | int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); | 1614 | int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); |
1600 | #endif | 1615 | #endif |
1601 | 1616 | ||
1602 | #ifdef __PAGETABLE_PUD_FOLDED | 1617 | #if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU) |
1603 | static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, | 1618 | static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, |
1604 | unsigned long address) | 1619 | unsigned long address) |
1605 | { | 1620 | { |
1606 | return 0; | 1621 | return 0; |
1607 | } | 1622 | } |
1623 | static inline void mm_inc_nr_puds(struct mm_struct *mm) {} | ||
1624 | static inline void mm_dec_nr_puds(struct mm_struct *mm) {} | ||
1625 | |||
1608 | #else | 1626 | #else |
1609 | int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); | 1627 | int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); |
1610 | #endif | ||
1611 | 1628 | ||
1612 | #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) | 1629 | static inline void mm_inc_nr_puds(struct mm_struct *mm) |
1613 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, | ||
1614 | unsigned long address) | ||
1615 | { | 1630 | { |
1616 | return 0; | 1631 | atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); |
1617 | } | 1632 | } |
1618 | 1633 | ||
1619 | static inline void mm_nr_pmds_init(struct mm_struct *mm) {} | 1634 | static inline void mm_dec_nr_puds(struct mm_struct *mm) |
1635 | { | ||
1636 | atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); | ||
1637 | } | ||
1638 | #endif | ||
1620 | 1639 | ||
1621 | static inline unsigned long mm_nr_pmds(struct mm_struct *mm) | 1640 | #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) |
1641 | static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, | ||
1642 | unsigned long address) | ||
1622 | { | 1643 | { |
1623 | return 0; | 1644 | return 0; |
1624 | } | 1645 | } |
@@ -1629,25 +1650,47 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} | |||
1629 | #else | 1650 | #else |
1630 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); | 1651 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); |
1631 | 1652 | ||
1632 | static inline void mm_nr_pmds_init(struct mm_struct *mm) | 1653 | static inline void mm_inc_nr_pmds(struct mm_struct *mm) |
1633 | { | 1654 | { |
1634 | atomic_long_set(&mm->nr_pmds, 0); | 1655 | atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); |
1635 | } | 1656 | } |
1636 | 1657 | ||
1637 | static inline unsigned long mm_nr_pmds(struct mm_struct *mm) | 1658 | static inline void mm_dec_nr_pmds(struct mm_struct *mm) |
1638 | { | 1659 | { |
1639 | return atomic_long_read(&mm->nr_pmds); | 1660 | atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); |
1640 | } | 1661 | } |
1662 | #endif | ||
1641 | 1663 | ||
1642 | static inline void mm_inc_nr_pmds(struct mm_struct *mm) | 1664 | #ifdef CONFIG_MMU |
1665 | static inline void mm_pgtables_bytes_init(struct mm_struct *mm) | ||
1643 | { | 1666 | { |
1644 | atomic_long_inc(&mm->nr_pmds); | 1667 | atomic_long_set(&mm->pgtables_bytes, 0); |
1645 | } | 1668 | } |
1646 | 1669 | ||
1647 | static inline void mm_dec_nr_pmds(struct mm_struct *mm) | 1670 | static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm) |
1671 | { | ||
1672 | return atomic_long_read(&mm->pgtables_bytes); | ||
1673 | } | ||
1674 | |||
1675 | static inline void mm_inc_nr_ptes(struct mm_struct *mm) | ||
1676 | { | ||
1677 | atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes); | ||
1678 | } | ||
1679 | |||
1680 | static inline void mm_dec_nr_ptes(struct mm_struct *mm) | ||
1648 | { | 1681 | { |
1649 | atomic_long_dec(&mm->nr_pmds); | 1682 | atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes); |
1650 | } | 1683 | } |
1684 | #else | ||
1685 | |||
1686 | static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {} | ||
1687 | static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm) | ||
1688 | { | ||
1689 | return 0; | ||
1690 | } | ||
1691 | |||
1692 | static inline void mm_inc_nr_ptes(struct mm_struct *mm) {} | ||
1693 | static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} | ||
1651 | #endif | 1694 | #endif |
1652 | 1695 | ||
1653 | int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); | 1696 | int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); |
@@ -2002,6 +2045,12 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn, | |||
2002 | struct mminit_pfnnid_cache *state); | 2045 | struct mminit_pfnnid_cache *state); |
2003 | #endif | 2046 | #endif |
2004 | 2047 | ||
2048 | #ifdef CONFIG_HAVE_MEMBLOCK | ||
2049 | void zero_resv_unavail(void); | ||
2050 | #else | ||
2051 | static inline void zero_resv_unavail(void) {} | ||
2052 | #endif | ||
2053 | |||
2005 | extern void set_dma_reserve(unsigned long new_dma_reserve); | 2054 | extern void set_dma_reserve(unsigned long new_dma_reserve); |
2006 | extern void memmap_init_zone(unsigned long, int, unsigned long, | 2055 | extern void memmap_init_zone(unsigned long, int, unsigned long, |
2007 | unsigned long, enum memmap_context); | 2056 | unsigned long, enum memmap_context); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c85f11dafd56..cfd0ac4e5e0e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -48,8 +48,10 @@ struct page { | |||
48 | * inode address_space, or NULL. | 48 | * inode address_space, or NULL. |
49 | * If page mapped as anonymous | 49 | * If page mapped as anonymous |
50 | * memory, low bit is set, and | 50 | * memory, low bit is set, and |
51 | * it points to anon_vma object: | 51 | * it points to anon_vma object |
52 | * see PAGE_MAPPING_ANON below. | 52 | * or KSM private structure. See |
53 | * PAGE_MAPPING_ANON and | ||
54 | * PAGE_MAPPING_KSM. | ||
53 | */ | 55 | */ |
54 | void *s_mem; /* slab first object */ | 56 | void *s_mem; /* slab first object */ |
55 | atomic_t compound_mapcount; /* first tail page */ | 57 | atomic_t compound_mapcount; /* first tail page */ |
@@ -207,14 +209,6 @@ struct page { | |||
207 | not kmapped, ie. highmem) */ | 209 | not kmapped, ie. highmem) */ |
208 | #endif /* WANT_PAGE_VIRTUAL */ | 210 | #endif /* WANT_PAGE_VIRTUAL */ |
209 | 211 | ||
210 | #ifdef CONFIG_KMEMCHECK | ||
211 | /* | ||
212 | * kmemcheck wants to track the status of each byte in a page; this | ||
213 | * is a pointer to such a status block. NULL if not tracked. | ||
214 | */ | ||
215 | void *shadow; | ||
216 | #endif | ||
217 | |||
218 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS | 212 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
219 | int _last_cpupid; | 213 | int _last_cpupid; |
220 | #endif | 214 | #endif |
@@ -399,9 +393,8 @@ struct mm_struct { | |||
399 | */ | 393 | */ |
400 | atomic_t mm_count; | 394 | atomic_t mm_count; |
401 | 395 | ||
402 | atomic_long_t nr_ptes; /* PTE page table pages */ | 396 | #ifdef CONFIG_MMU |
403 | #if CONFIG_PGTABLE_LEVELS > 2 | 397 | atomic_long_t pgtables_bytes; /* PTE page table pages */ |
404 | atomic_long_t nr_pmds; /* PMD page table pages */ | ||
405 | #endif | 398 | #endif |
406 | int map_count; /* number of VMAs */ | 399 | int map_count; /* number of VMAs */ |
407 | 400 | ||
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 2cf1c3c807f6..b25dc9db19fc 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
@@ -156,7 +156,8 @@ struct mmu_notifier_ops { | |||
156 | * shared page-tables, it not necessary to implement the | 156 | * shared page-tables, it not necessary to implement the |
157 | * invalidate_range_start()/end() notifiers, as | 157 | * invalidate_range_start()/end() notifiers, as |
158 | * invalidate_range() alread catches the points in time when an | 158 | * invalidate_range() alread catches the points in time when an |
159 | * external TLB range needs to be flushed. | 159 | * external TLB range needs to be flushed. For more in depth |
160 | * discussion on this see Documentation/vm/mmu_notifier.txt | ||
160 | * | 161 | * |
161 | * The invalidate_range() function is called under the ptl | 162 | * The invalidate_range() function is called under the ptl |
162 | * spin-lock and not allowed to sleep. | 163 | * spin-lock and not allowed to sleep. |
@@ -213,7 +214,8 @@ extern void __mmu_notifier_change_pte(struct mm_struct *mm, | |||
213 | extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, | 214 | extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, |
214 | unsigned long start, unsigned long end); | 215 | unsigned long start, unsigned long end); |
215 | extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, | 216 | extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, |
216 | unsigned long start, unsigned long end); | 217 | unsigned long start, unsigned long end, |
218 | bool only_end); | ||
217 | extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, | 219 | extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, |
218 | unsigned long start, unsigned long end); | 220 | unsigned long start, unsigned long end); |
219 | 221 | ||
@@ -267,7 +269,14 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, | |||
267 | unsigned long start, unsigned long end) | 269 | unsigned long start, unsigned long end) |
268 | { | 270 | { |
269 | if (mm_has_notifiers(mm)) | 271 | if (mm_has_notifiers(mm)) |
270 | __mmu_notifier_invalidate_range_end(mm, start, end); | 272 | __mmu_notifier_invalidate_range_end(mm, start, end, false); |
273 | } | ||
274 | |||
275 | static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, | ||
276 | unsigned long start, unsigned long end) | ||
277 | { | ||
278 | if (mm_has_notifiers(mm)) | ||
279 | __mmu_notifier_invalidate_range_end(mm, start, end, true); | ||
271 | } | 280 | } |
272 | 281 | ||
273 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, | 282 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, |
@@ -438,6 +447,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, | |||
438 | { | 447 | { |
439 | } | 448 | } |
440 | 449 | ||
450 | static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, | ||
451 | unsigned long start, unsigned long end) | ||
452 | { | ||
453 | } | ||
454 | |||
441 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, | 455 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, |
442 | unsigned long start, unsigned long end) | 456 | unsigned long start, unsigned long end) |
443 | { | 457 | { |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a507f43ad221..67f2e3c38939 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -700,7 +700,8 @@ typedef struct pglist_data { | |||
700 | * is the first PFN that needs to be initialised. | 700 | * is the first PFN that needs to be initialised. |
701 | */ | 701 | */ |
702 | unsigned long first_deferred_pfn; | 702 | unsigned long first_deferred_pfn; |
703 | unsigned long static_init_size; | 703 | /* Number of non-deferred pages */ |
704 | unsigned long static_init_pgcnt; | ||
704 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | 705 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
705 | 706 | ||
706 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 707 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -712,12 +713,6 @@ typedef struct pglist_data { | |||
712 | /* Fields commonly accessed by the page reclaim scanner */ | 713 | /* Fields commonly accessed by the page reclaim scanner */ |
713 | struct lruvec lruvec; | 714 | struct lruvec lruvec; |
714 | 715 | ||
715 | /* | ||
716 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on | ||
717 | * this node's LRU. Maintained by the pageout code. | ||
718 | */ | ||
719 | unsigned int inactive_ratio; | ||
720 | |||
721 | unsigned long flags; | 716 | unsigned long flags; |
722 | 717 | ||
723 | ZONE_PADDING(_pad2_) | 718 | ZONE_PADDING(_pad2_) |
diff --git a/include/linux/net.h b/include/linux/net.h index d97d80d7fdf8..caeb159abda5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/random.h> | 22 | #include <linux/random.h> |
23 | #include <linux/wait.h> | 23 | #include <linux/wait.h> |
24 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ | 24 | #include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */ |
25 | #include <linux/kmemcheck.h> | ||
26 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
27 | #include <linux/once.h> | 26 | #include <linux/once.h> |
28 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
@@ -111,9 +110,7 @@ struct socket_wq { | |||
111 | struct socket { | 110 | struct socket { |
112 | socket_state state; | 111 | socket_state state; |
113 | 112 | ||
114 | kmemcheck_bitfield_begin(type); | ||
115 | short type; | 113 | short type; |
116 | kmemcheck_bitfield_end(type); | ||
117 | 114 | ||
118 | unsigned long flags; | 115 | unsigned long flags; |
119 | 116 | ||
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index de1c50b93c61..15cab3967d6d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h | |||
@@ -104,7 +104,9 @@ extern nodemask_t _unused_nodemask_arg_; | |||
104 | * | 104 | * |
105 | * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. | 105 | * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. |
106 | */ | 106 | */ |
107 | #define nodemask_pr_args(maskp) MAX_NUMNODES, (maskp)->bits | 107 | #define nodemask_pr_args(maskp) \ |
108 | ((maskp) != NULL) ? MAX_NUMNODES : 0, \ | ||
109 | ((maskp) != NULL) ? (maskp)->bits : NULL | ||
108 | 110 | ||
109 | /* | 111 | /* |
110 | * The inline keyword gives the compiler room to decide to inline, or | 112 | * The inline keyword gives the compiler room to decide to inline, or |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 584b14c774c1..3ec44e27aa9d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -18,7 +18,7 @@ | |||
18 | * Various page->flags bits: | 18 | * Various page->flags bits: |
19 | * | 19 | * |
20 | * PG_reserved is set for special pages, which can never be swapped out. Some | 20 | * PG_reserved is set for special pages, which can never be swapped out. Some |
21 | * of them might not even exist (eg empty_bad_page)... | 21 | * of them might not even exist... |
22 | * | 22 | * |
23 | * The PG_private bitflag is set on pagecache pages if they contain filesystem | 23 | * The PG_private bitflag is set on pagecache pages if they contain filesystem |
24 | * specific data (which is normally at page->private). It can be used by | 24 | * specific data (which is normally at page->private). It can be used by |
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 05a04e603686..cdad58bbfd8b 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h | |||
@@ -31,7 +31,7 @@ static inline bool is_migrate_isolate(int migratetype) | |||
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | 33 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
34 | bool skip_hwpoisoned_pages); | 34 | int migratetype, bool skip_hwpoisoned_pages); |
35 | void set_pageblock_migratetype(struct page *page, int migratetype); | 35 | void set_pageblock_migratetype(struct page *page, int migratetype); |
36 | int move_freepages_block(struct zone *zone, struct page *page, | 36 | int move_freepages_block(struct zone *zone, struct page *page, |
37 | int migratetype, int *num_movable); | 37 | int migratetype, int *num_movable); |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e08b5339023c..34ce3ebf97d5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/hardirq.h> /* for in_interrupt() */ | 16 | #include <linux/hardirq.h> /* for in_interrupt() */ |
17 | #include <linux/hugetlb_inline.h> | 17 | #include <linux/hugetlb_inline.h> |
18 | 18 | ||
19 | struct pagevec; | ||
20 | |||
19 | /* | 21 | /* |
20 | * Bits in mapping->flags. | 22 | * Bits in mapping->flags. |
21 | */ | 23 | */ |
@@ -116,7 +118,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) | |||
116 | m->gfp_mask = mask; | 118 | m->gfp_mask = mask; |
117 | } | 119 | } |
118 | 120 | ||
119 | void release_pages(struct page **pages, int nr, bool cold); | 121 | void release_pages(struct page **pages, int nr); |
120 | 122 | ||
121 | /* | 123 | /* |
122 | * speculatively take a reference to a page. | 124 | * speculatively take a reference to a page. |
@@ -232,15 +234,9 @@ static inline struct page *page_cache_alloc(struct address_space *x) | |||
232 | return __page_cache_alloc(mapping_gfp_mask(x)); | 234 | return __page_cache_alloc(mapping_gfp_mask(x)); |
233 | } | 235 | } |
234 | 236 | ||
235 | static inline struct page *page_cache_alloc_cold(struct address_space *x) | ||
236 | { | ||
237 | return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); | ||
238 | } | ||
239 | |||
240 | static inline gfp_t readahead_gfp_mask(struct address_space *x) | 237 | static inline gfp_t readahead_gfp_mask(struct address_space *x) |
241 | { | 238 | { |
242 | return mapping_gfp_mask(x) | | 239 | return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; |
243 | __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; | ||
244 | } | 240 | } |
245 | 241 | ||
246 | typedef int filler_t(void *, struct page *); | 242 | typedef int filler_t(void *, struct page *); |
@@ -366,8 +362,16 @@ static inline unsigned find_get_pages(struct address_space *mapping, | |||
366 | } | 362 | } |
367 | unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, | 363 | unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, |
368 | unsigned int nr_pages, struct page **pages); | 364 | unsigned int nr_pages, struct page **pages); |
369 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 365 | unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, |
370 | int tag, unsigned int nr_pages, struct page **pages); | 366 | pgoff_t end, int tag, unsigned int nr_pages, |
367 | struct page **pages); | ||
368 | static inline unsigned find_get_pages_tag(struct address_space *mapping, | ||
369 | pgoff_t *index, int tag, unsigned int nr_pages, | ||
370 | struct page **pages) | ||
371 | { | ||
372 | return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, | ||
373 | nr_pages, pages); | ||
374 | } | ||
371 | unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, | 375 | unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, |
372 | int tag, unsigned int nr_entries, | 376 | int tag, unsigned int nr_entries, |
373 | struct page **entries, pgoff_t *indices); | 377 | struct page **entries, pgoff_t *indices); |
@@ -616,6 +620,8 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | |||
616 | extern void delete_from_page_cache(struct page *page); | 620 | extern void delete_from_page_cache(struct page *page); |
617 | extern void __delete_from_page_cache(struct page *page, void *shadow); | 621 | extern void __delete_from_page_cache(struct page *page, void *shadow); |
618 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); | 622 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); |
623 | void delete_from_page_cache_batch(struct address_space *mapping, | ||
624 | struct pagevec *pvec); | ||
619 | 625 | ||
620 | /* | 626 | /* |
621 | * Like add_to_page_cache_locked, but used to add newly allocated pages: | 627 | * Like add_to_page_cache_locked, but used to add newly allocated pages: |
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2636c0c0f279..5fb6580f7f23 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h | |||
@@ -17,7 +17,7 @@ struct address_space; | |||
17 | 17 | ||
18 | struct pagevec { | 18 | struct pagevec { |
19 | unsigned long nr; | 19 | unsigned long nr; |
20 | unsigned long cold; | 20 | bool percpu_pvec_drained; |
21 | struct page *pages[PAGEVEC_SIZE]; | 21 | struct page *pages[PAGEVEC_SIZE]; |
22 | }; | 22 | }; |
23 | 23 | ||
@@ -38,14 +38,22 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, | |||
38 | return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); | 38 | return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); |
39 | } | 39 | } |
40 | 40 | ||
41 | unsigned pagevec_lookup_tag(struct pagevec *pvec, | 41 | unsigned pagevec_lookup_range_tag(struct pagevec *pvec, |
42 | struct address_space *mapping, pgoff_t *index, int tag, | 42 | struct address_space *mapping, pgoff_t *index, pgoff_t end, |
43 | unsigned nr_pages); | 43 | int tag); |
44 | unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, | ||
45 | struct address_space *mapping, pgoff_t *index, pgoff_t end, | ||
46 | int tag, unsigned max_pages); | ||
47 | static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, | ||
48 | struct address_space *mapping, pgoff_t *index, int tag) | ||
49 | { | ||
50 | return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); | ||
51 | } | ||
44 | 52 | ||
45 | static inline void pagevec_init(struct pagevec *pvec, int cold) | 53 | static inline void pagevec_init(struct pagevec *pvec) |
46 | { | 54 | { |
47 | pvec->nr = 0; | 55 | pvec->nr = 0; |
48 | pvec->cold = cold; | 56 | pvec->percpu_pvec_drained = false; |
49 | } | 57 | } |
50 | 58 | ||
51 | static inline void pagevec_reinit(struct pagevec *pvec) | 59 | static inline void pagevec_reinit(struct pagevec *pvec) |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 567ebb5eaab0..0ca448c1cb42 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
@@ -301,18 +301,17 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, | |||
301 | void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); | 301 | void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); |
302 | void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, | 302 | void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, |
303 | unsigned long index); | 303 | unsigned long index); |
304 | typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); | 304 | typedef void (*radix_tree_update_node_t)(struct radix_tree_node *); |
305 | void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, | 305 | void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, |
306 | void __rcu **slot, void *entry, | 306 | void __rcu **slot, void *entry, |
307 | radix_tree_update_node_t update_node, void *private); | 307 | radix_tree_update_node_t update_node); |
308 | void radix_tree_iter_replace(struct radix_tree_root *, | 308 | void radix_tree_iter_replace(struct radix_tree_root *, |
309 | const struct radix_tree_iter *, void __rcu **slot, void *entry); | 309 | const struct radix_tree_iter *, void __rcu **slot, void *entry); |
310 | void radix_tree_replace_slot(struct radix_tree_root *, | 310 | void radix_tree_replace_slot(struct radix_tree_root *, |
311 | void __rcu **slot, void *entry); | 311 | void __rcu **slot, void *entry); |
312 | void __radix_tree_delete_node(struct radix_tree_root *, | 312 | void __radix_tree_delete_node(struct radix_tree_root *, |
313 | struct radix_tree_node *, | 313 | struct radix_tree_node *, |
314 | radix_tree_update_node_t update_node, | 314 | radix_tree_update_node_t update_node); |
315 | void *private); | ||
316 | void radix_tree_iter_delete(struct radix_tree_root *, | 315 | void radix_tree_iter_delete(struct radix_tree_root *, |
317 | struct radix_tree_iter *iter, void __rcu **slot); | 316 | struct radix_tree_iter *iter, void __rcu **slot); |
318 | void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); | 317 | void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); |
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa6ace66fea5..289e4d54e3e0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #ifndef _LINUX_RING_BUFFER_H | 2 | #ifndef _LINUX_RING_BUFFER_H |
3 | #define _LINUX_RING_BUFFER_H | 3 | #define _LINUX_RING_BUFFER_H |
4 | 4 | ||
5 | #include <linux/kmemcheck.h> | ||
6 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
7 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
8 | #include <linux/poll.h> | 7 | #include <linux/poll.h> |
@@ -14,9 +13,7 @@ struct ring_buffer_iter; | |||
14 | * Don't refer to this struct directly, use functions below. | 13 | * Don't refer to this struct directly, use functions below. |
15 | */ | 14 | */ |
16 | struct ring_buffer_event { | 15 | struct ring_buffer_event { |
17 | kmemcheck_bitfield_begin(bitfield); | ||
18 | u32 type_len:5, time_delta:27; | 16 | u32 type_len:5, time_delta:27; |
19 | kmemcheck_bitfield_end(bitfield); | ||
20 | 17 | ||
21 | u32 array[]; | 18 | u32 array[]; |
22 | }; | 19 | }; |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 54fe91183a8e..ed06e1c28fc7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -15,7 +15,6 @@ | |||
15 | #define _LINUX_SKBUFF_H | 15 | #define _LINUX_SKBUFF_H |
16 | 16 | ||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/kmemcheck.h> | ||
19 | #include <linux/compiler.h> | 18 | #include <linux/compiler.h> |
20 | #include <linux/time.h> | 19 | #include <linux/time.h> |
21 | #include <linux/bug.h> | 20 | #include <linux/bug.h> |
@@ -711,7 +710,6 @@ struct sk_buff { | |||
711 | /* Following fields are _not_ copied in __copy_skb_header() | 710 | /* Following fields are _not_ copied in __copy_skb_header() |
712 | * Note that queue_mapping is here mostly to fill a hole. | 711 | * Note that queue_mapping is here mostly to fill a hole. |
713 | */ | 712 | */ |
714 | kmemcheck_bitfield_begin(flags1); | ||
715 | __u16 queue_mapping; | 713 | __u16 queue_mapping; |
716 | 714 | ||
717 | /* if you move cloned around you also must adapt those constants */ | 715 | /* if you move cloned around you also must adapt those constants */ |
@@ -730,7 +728,6 @@ struct sk_buff { | |||
730 | head_frag:1, | 728 | head_frag:1, |
731 | xmit_more:1, | 729 | xmit_more:1, |
732 | __unused:1; /* one bit hole */ | 730 | __unused:1; /* one bit hole */ |
733 | kmemcheck_bitfield_end(flags1); | ||
734 | 731 | ||
735 | /* fields enclosed in headers_start/headers_end are copied | 732 | /* fields enclosed in headers_start/headers_end are copied |
736 | * using a single memcpy() in __copy_skb_header() | 733 | * using a single memcpy() in __copy_skb_header() |
@@ -2664,7 +2661,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, | |||
2664 | * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to | 2661 | * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to |
2665 | * code in gfp_to_alloc_flags that should be enforcing this. | 2662 | * code in gfp_to_alloc_flags that should be enforcing this. |
2666 | */ | 2663 | */ |
2667 | gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; | 2664 | gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; |
2668 | 2665 | ||
2669 | return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); | 2666 | return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); |
2670 | } | 2667 | } |
diff --git a/include/linux/slab.h b/include/linux/slab.h index af5aa65c7c18..50697a1d6621 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -21,13 +21,20 @@ | |||
21 | * Flags to pass to kmem_cache_create(). | 21 | * Flags to pass to kmem_cache_create(). |
22 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. | 22 | * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. |
23 | */ | 23 | */ |
24 | #define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ | 24 | /* DEBUG: Perform (expensive) checks on alloc/free */ |
25 | #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ | 25 | #define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) |
26 | #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ | 26 | /* DEBUG: Red zone objs in a cache */ |
27 | #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ | 27 | #define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) |
28 | #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ | 28 | /* DEBUG: Poison objects */ |
29 | #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ | 29 | #define SLAB_POISON ((slab_flags_t __force)0x00000800U) |
30 | #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ | 30 | /* Align objs on cache lines */ |
31 | #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) | ||
32 | /* Use GFP_DMA memory */ | ||
33 | #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) | ||
34 | /* DEBUG: Store the last owner for bug hunting */ | ||
35 | #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) | ||
36 | /* Panic if kmem_cache_create() fails */ | ||
37 | #define SLAB_PANIC ((slab_flags_t __force)0x00040000U) | ||
31 | /* | 38 | /* |
32 | * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! | 39 | * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! |
33 | * | 40 | * |
@@ -65,44 +72,45 @@ | |||
65 | * | 72 | * |
66 | * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. | 73 | * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. |
67 | */ | 74 | */ |
68 | #define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ | 75 | /* Defer freeing slabs to RCU */ |
69 | #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ | 76 | #define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) |
70 | #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ | 77 | /* Spread some memory over cpuset */ |
78 | #define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) | ||
79 | /* Trace allocations and frees */ | ||
80 | #define SLAB_TRACE ((slab_flags_t __force)0x00200000U) | ||
71 | 81 | ||
72 | /* Flag to prevent checks on free */ | 82 | /* Flag to prevent checks on free */ |
73 | #ifdef CONFIG_DEBUG_OBJECTS | 83 | #ifdef CONFIG_DEBUG_OBJECTS |
74 | # define SLAB_DEBUG_OBJECTS 0x00400000UL | 84 | # define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) |
75 | #else | 85 | #else |
76 | # define SLAB_DEBUG_OBJECTS 0x00000000UL | 86 | # define SLAB_DEBUG_OBJECTS 0 |
77 | #endif | 87 | #endif |
78 | 88 | ||
79 | #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ | 89 | /* Avoid kmemleak tracing */ |
90 | #define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) | ||
80 | 91 | ||
81 | /* Don't track use of uninitialized memory */ | 92 | /* Fault injection mark */ |
82 | #ifdef CONFIG_KMEMCHECK | ||
83 | # define SLAB_NOTRACK 0x01000000UL | ||
84 | #else | ||
85 | # define SLAB_NOTRACK 0x00000000UL | ||
86 | #endif | ||
87 | #ifdef CONFIG_FAILSLAB | 93 | #ifdef CONFIG_FAILSLAB |
88 | # define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ | 94 | # define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) |
89 | #else | 95 | #else |
90 | # define SLAB_FAILSLAB 0x00000000UL | 96 | # define SLAB_FAILSLAB 0 |
91 | #endif | 97 | #endif |
98 | /* Account to memcg */ | ||
92 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) | 99 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) |
93 | # define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */ | 100 | # define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) |
94 | #else | 101 | #else |
95 | # define SLAB_ACCOUNT 0x00000000UL | 102 | # define SLAB_ACCOUNT 0 |
96 | #endif | 103 | #endif |
97 | 104 | ||
98 | #ifdef CONFIG_KASAN | 105 | #ifdef CONFIG_KASAN |
99 | #define SLAB_KASAN 0x08000000UL | 106 | #define SLAB_KASAN ((slab_flags_t __force)0x08000000U) |
100 | #else | 107 | #else |
101 | #define SLAB_KASAN 0x00000000UL | 108 | #define SLAB_KASAN 0 |
102 | #endif | 109 | #endif |
103 | 110 | ||
104 | /* The following flags affect the page allocator grouping pages by mobility */ | 111 | /* The following flags affect the page allocator grouping pages by mobility */ |
105 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ | 112 | /* Objects are reclaimable */ |
113 | #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) | ||
106 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ | 114 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ |
107 | /* | 115 | /* |
108 | * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. | 116 | * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. |
@@ -128,7 +136,7 @@ void __init kmem_cache_init(void); | |||
128 | bool slab_is_available(void); | 136 | bool slab_is_available(void); |
129 | 137 | ||
130 | struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, | 138 | struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, |
131 | unsigned long, | 139 | slab_flags_t, |
132 | void (*)(void *)); | 140 | void (*)(void *)); |
133 | void kmem_cache_destroy(struct kmem_cache *); | 141 | void kmem_cache_destroy(struct kmem_cache *); |
134 | int kmem_cache_shrink(struct kmem_cache *); | 142 | int kmem_cache_shrink(struct kmem_cache *); |
@@ -459,9 +467,6 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) | |||
459 | * Also it is possible to set different flags by OR'ing | 467 | * Also it is possible to set different flags by OR'ing |
460 | * in one or more of the following additional @flags: | 468 | * in one or more of the following additional @flags: |
461 | * | 469 | * |
462 | * %__GFP_COLD - Request cache-cold pages instead of | ||
463 | * trying to return cache-warm pages. | ||
464 | * | ||
465 | * %__GFP_HIGH - This allocation has high priority and may use emergency pools. | 470 | * %__GFP_HIGH - This allocation has high priority and may use emergency pools. |
466 | * | 471 | * |
467 | * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail | 472 | * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail |
@@ -636,6 +641,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); | |||
636 | #define kmalloc_track_caller(size, flags) \ | 641 | #define kmalloc_track_caller(size, flags) \ |
637 | __kmalloc_track_caller(size, flags, _RET_IP_) | 642 | __kmalloc_track_caller(size, flags, _RET_IP_) |
638 | 643 | ||
644 | static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, | ||
645 | int node) | ||
646 | { | ||
647 | if (size != 0 && n > SIZE_MAX / size) | ||
648 | return NULL; | ||
649 | if (__builtin_constant_p(n) && __builtin_constant_p(size)) | ||
650 | return kmalloc_node(n * size, flags, node); | ||
651 | return __kmalloc_node(n * size, flags, node); | ||
652 | } | ||
653 | |||
654 | static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) | ||
655 | { | ||
656 | return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); | ||
657 | } | ||
658 | |||
659 | |||
639 | #ifdef CONFIG_NUMA | 660 | #ifdef CONFIG_NUMA |
640 | extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); | 661 | extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); |
641 | #define kmalloc_node_track_caller(size, flags, node) \ | 662 | #define kmalloc_node_track_caller(size, flags, node) \ |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8f7d2b1656d2..072e46e9e1d5 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -20,7 +20,7 @@ struct kmem_cache { | |||
20 | struct reciprocal_value reciprocal_buffer_size; | 20 | struct reciprocal_value reciprocal_buffer_size; |
21 | /* 2) touched by every alloc & free from the backend */ | 21 | /* 2) touched by every alloc & free from the backend */ |
22 | 22 | ||
23 | unsigned int flags; /* constant flags */ | 23 | slab_flags_t flags; /* constant flags */ |
24 | unsigned int num; /* # of objs per slab */ | 24 | unsigned int num; /* # of objs per slab */ |
25 | 25 | ||
26 | /* 3) cache_grow/shrink */ | 26 | /* 3) cache_grow/shrink */ |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 39fa09bcde23..0adae162dc8f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -82,7 +82,7 @@ struct kmem_cache_order_objects { | |||
82 | struct kmem_cache { | 82 | struct kmem_cache { |
83 | struct kmem_cache_cpu __percpu *cpu_slab; | 83 | struct kmem_cache_cpu __percpu *cpu_slab; |
84 | /* Used for retriving partial slabs etc */ | 84 | /* Used for retriving partial slabs etc */ |
85 | unsigned long flags; | 85 | slab_flags_t flags; |
86 | unsigned long min_partial; | 86 | unsigned long min_partial; |
87 | int size; /* The size of an object including meta data */ | 87 | int size; /* The size of an object including meta data */ |
88 | int object_size; /* The size of an object without meta data */ | 88 | int object_size; /* The size of an object without meta data */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index f02fb5db8914..c2b8128799c1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -171,8 +171,9 @@ enum { | |||
171 | SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ | 171 | SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ |
172 | SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ | 172 | SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ |
173 | SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ | 173 | SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ |
174 | SWP_SYNCHRONOUS_IO = (1 << 11), /* synchronous IO is efficient */ | ||
174 | /* add others here before... */ | 175 | /* add others here before... */ |
175 | SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */ | 176 | SWP_SCANNING = (1 << 12), /* refcount in scan_swap_map */ |
176 | }; | 177 | }; |
177 | 178 | ||
178 | #define SWAP_CLUSTER_MAX 32UL | 179 | #define SWAP_CLUSTER_MAX 32UL |
@@ -297,7 +298,18 @@ struct vma_swap_readahead { | |||
297 | void *workingset_eviction(struct address_space *mapping, struct page *page); | 298 | void *workingset_eviction(struct address_space *mapping, struct page *page); |
298 | bool workingset_refault(void *shadow); | 299 | bool workingset_refault(void *shadow); |
299 | void workingset_activation(struct page *page); | 300 | void workingset_activation(struct page *page); |
300 | void workingset_update_node(struct radix_tree_node *node, void *private); | 301 | |
302 | /* Do not use directly, use workingset_lookup_update */ | ||
303 | void workingset_update_node(struct radix_tree_node *node); | ||
304 | |||
305 | /* Returns workingset_update_node() if the mapping has shadow entries. */ | ||
306 | #define workingset_lookup_update(mapping) \ | ||
307 | ({ \ | ||
308 | radix_tree_update_node_t __helper = workingset_update_node; \ | ||
309 | if (dax_mapping(mapping) || shmem_mapping(mapping)) \ | ||
310 | __helper = NULL; \ | ||
311 | __helper; \ | ||
312 | }) | ||
301 | 313 | ||
302 | /* linux/mm/page_alloc.c */ | 314 | /* linux/mm/page_alloc.c */ |
303 | extern unsigned long totalram_pages; | 315 | extern unsigned long totalram_pages; |
@@ -462,9 +474,11 @@ extern unsigned int count_swap_pages(int, int); | |||
462 | extern sector_t map_swap_page(struct page *, struct block_device **); | 474 | extern sector_t map_swap_page(struct page *, struct block_device **); |
463 | extern sector_t swapdev_block(int, pgoff_t); | 475 | extern sector_t swapdev_block(int, pgoff_t); |
464 | extern int page_swapcount(struct page *); | 476 | extern int page_swapcount(struct page *); |
477 | extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry); | ||
465 | extern int __swp_swapcount(swp_entry_t entry); | 478 | extern int __swp_swapcount(swp_entry_t entry); |
466 | extern int swp_swapcount(swp_entry_t entry); | 479 | extern int swp_swapcount(swp_entry_t entry); |
467 | extern struct swap_info_struct *page_swap_info(struct page *); | 480 | extern struct swap_info_struct *page_swap_info(struct page *); |
481 | extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); | ||
468 | extern bool reuse_swap_page(struct page *, int *); | 482 | extern bool reuse_swap_page(struct page *, int *); |
469 | extern int try_to_free_swap(struct page *); | 483 | extern int try_to_free_swap(struct page *); |
470 | struct backing_dev_info; | 484 | struct backing_dev_info; |
@@ -473,6 +487,16 @@ extern void exit_swap_address_space(unsigned int type); | |||
473 | 487 | ||
474 | #else /* CONFIG_SWAP */ | 488 | #else /* CONFIG_SWAP */ |
475 | 489 | ||
490 | static inline int swap_readpage(struct page *page, bool do_poll) | ||
491 | { | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) | ||
496 | { | ||
497 | return NULL; | ||
498 | } | ||
499 | |||
476 | #define swap_address_space(entry) (NULL) | 500 | #define swap_address_space(entry) (NULL) |
477 | #define get_nr_swap_pages() 0L | 501 | #define get_nr_swap_pages() 0L |
478 | #define total_swap_pages 0L | 502 | #define total_swap_pages 0L |
@@ -486,7 +510,7 @@ extern void exit_swap_address_space(unsigned int type); | |||
486 | #define free_page_and_swap_cache(page) \ | 510 | #define free_page_and_swap_cache(page) \ |
487 | put_page(page) | 511 | put_page(page) |
488 | #define free_pages_and_swap_cache(pages, nr) \ | 512 | #define free_pages_and_swap_cache(pages, nr) \ |
489 | release_pages((pages), (nr), false); | 513 | release_pages((pages), (nr)); |
490 | 514 | ||
491 | static inline void show_swap_cache_info(void) | 515 | static inline void show_swap_cache_info(void) |
492 | { | 516 | { |
@@ -577,6 +601,11 @@ static inline int page_swapcount(struct page *page) | |||
577 | return 0; | 601 | return 0; |
578 | } | 602 | } |
579 | 603 | ||
604 | static inline int __swap_count(struct swap_info_struct *si, swp_entry_t entry) | ||
605 | { | ||
606 | return 0; | ||
607 | } | ||
608 | |||
580 | static inline int __swp_swapcount(swp_entry_t entry) | 609 | static inline int __swp_swapcount(swp_entry_t entry) |
581 | { | 610 | { |
582 | return 0; | 611 | return 0; |
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 4bcdf00c110f..34f053a150a9 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -44,10 +44,9 @@ enum { | |||
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) | 46 | #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) |
47 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ | 47 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) |
48 | __GFP_ZERO) | ||
49 | #else | 48 | #else |
50 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK) | 49 | # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) |
51 | #endif | 50 | #endif |
52 | 51 | ||
53 | /* | 52 | /* |
diff --git a/include/linux/types.h b/include/linux/types.h index 34fce54e4f1b..c94d59ef96cc 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -156,6 +156,7 @@ typedef u32 dma_addr_t; | |||
156 | #endif | 156 | #endif |
157 | 157 | ||
158 | typedef unsigned __bitwise gfp_t; | 158 | typedef unsigned __bitwise gfp_t; |
159 | typedef unsigned __bitwise slab_flags_t; | ||
159 | typedef unsigned __bitwise fmode_t; | 160 | typedef unsigned __bitwise fmode_t; |
160 | 161 | ||
161 | #ifdef CONFIG_PHYS_ADDR_T_64BIT | 162 | #ifdef CONFIG_PHYS_ADDR_T_64BIT |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 1e0cb72e0598..1779c9817b39 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -7,9 +7,19 @@ | |||
7 | #include <linux/mmzone.h> | 7 | #include <linux/mmzone.h> |
8 | #include <linux/vm_event_item.h> | 8 | #include <linux/vm_event_item.h> |
9 | #include <linux/atomic.h> | 9 | #include <linux/atomic.h> |
10 | #include <linux/static_key.h> | ||
10 | 11 | ||
11 | extern int sysctl_stat_interval; | 12 | extern int sysctl_stat_interval; |
12 | 13 | ||
14 | #ifdef CONFIG_NUMA | ||
15 | #define ENABLE_NUMA_STAT 1 | ||
16 | #define DISABLE_NUMA_STAT 0 | ||
17 | extern int sysctl_vm_numa_stat; | ||
18 | DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); | ||
19 | extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, | ||
20 | int write, void __user *buffer, size_t *length, loff_t *ppos); | ||
21 | #endif | ||
22 | |||
13 | #ifdef CONFIG_VM_EVENT_COUNTERS | 23 | #ifdef CONFIG_VM_EVENT_COUNTERS |
14 | /* | 24 | /* |
15 | * Light weight per cpu counter implementation. | 25 | * Light weight per cpu counter implementation. |
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2135c9ba6ac3..39efb968b7a4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
@@ -17,7 +17,6 @@ | |||
17 | #define _INET_SOCK_H | 17 | #define _INET_SOCK_H |
18 | 18 | ||
19 | #include <linux/bitops.h> | 19 | #include <linux/bitops.h> |
20 | #include <linux/kmemcheck.h> | ||
21 | #include <linux/string.h> | 20 | #include <linux/string.h> |
22 | #include <linux/types.h> | 21 | #include <linux/types.h> |
23 | #include <linux/jhash.h> | 22 | #include <linux/jhash.h> |
@@ -84,7 +83,6 @@ struct inet_request_sock { | |||
84 | #define ireq_state req.__req_common.skc_state | 83 | #define ireq_state req.__req_common.skc_state |
85 | #define ireq_family req.__req_common.skc_family | 84 | #define ireq_family req.__req_common.skc_family |
86 | 85 | ||
87 | kmemcheck_bitfield_begin(flags); | ||
88 | u16 snd_wscale : 4, | 86 | u16 snd_wscale : 4, |
89 | rcv_wscale : 4, | 87 | rcv_wscale : 4, |
90 | tstamp_ok : 1, | 88 | tstamp_ok : 1, |
@@ -94,7 +92,6 @@ struct inet_request_sock { | |||
94 | acked : 1, | 92 | acked : 1, |
95 | no_srccheck: 1, | 93 | no_srccheck: 1, |
96 | smc_ok : 1; | 94 | smc_ok : 1; |
97 | kmemcheck_bitfield_end(flags); | ||
98 | u32 ir_mark; | 95 | u32 ir_mark; |
99 | union { | 96 | union { |
100 | struct ip_options_rcu __rcu *ireq_opt; | 97 | struct ip_options_rcu __rcu *ireq_opt; |
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6a75d67a30fd..1356fa6a7566 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h | |||
@@ -15,8 +15,6 @@ | |||
15 | #ifndef _INET_TIMEWAIT_SOCK_ | 15 | #ifndef _INET_TIMEWAIT_SOCK_ |
16 | #define _INET_TIMEWAIT_SOCK_ | 16 | #define _INET_TIMEWAIT_SOCK_ |
17 | 17 | ||
18 | |||
19 | #include <linux/kmemcheck.h> | ||
20 | #include <linux/list.h> | 18 | #include <linux/list.h> |
21 | #include <linux/timer.h> | 19 | #include <linux/timer.h> |
22 | #include <linux/types.h> | 20 | #include <linux/types.h> |
@@ -69,14 +67,12 @@ struct inet_timewait_sock { | |||
69 | /* Socket demultiplex comparisons on incoming packets. */ | 67 | /* Socket demultiplex comparisons on incoming packets. */ |
70 | /* these three are in inet_sock */ | 68 | /* these three are in inet_sock */ |
71 | __be16 tw_sport; | 69 | __be16 tw_sport; |
72 | kmemcheck_bitfield_begin(flags); | ||
73 | /* And these are ours. */ | 70 | /* And these are ours. */ |
74 | unsigned int tw_kill : 1, | 71 | unsigned int tw_kill : 1, |
75 | tw_transparent : 1, | 72 | tw_transparent : 1, |
76 | tw_flowlabel : 20, | 73 | tw_flowlabel : 20, |
77 | tw_pad : 2, /* 2 bits hole */ | 74 | tw_pad : 2, /* 2 bits hole */ |
78 | tw_tos : 8; | 75 | tw_tos : 8; |
79 | kmemcheck_bitfield_end(flags); | ||
80 | struct timer_list tw_timer; | 76 | struct timer_list tw_timer; |
81 | struct inet_bind_bucket *tw_tb; | 77 | struct inet_bind_bucket *tw_tb; |
82 | }; | 78 | }; |
diff --git a/include/net/sock.h b/include/net/sock.h index f8715c5af37d..79e1a2c7912c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -440,7 +440,6 @@ struct sock { | |||
440 | #define SK_FL_TYPE_MASK 0xffff0000 | 440 | #define SK_FL_TYPE_MASK 0xffff0000 |
441 | #endif | 441 | #endif |
442 | 442 | ||
443 | kmemcheck_bitfield_begin(flags); | ||
444 | unsigned int sk_padding : 1, | 443 | unsigned int sk_padding : 1, |
445 | sk_kern_sock : 1, | 444 | sk_kern_sock : 1, |
446 | sk_no_check_tx : 1, | 445 | sk_no_check_tx : 1, |
@@ -449,8 +448,6 @@ struct sock { | |||
449 | sk_protocol : 8, | 448 | sk_protocol : 8, |
450 | sk_type : 16; | 449 | sk_type : 16; |
451 | #define SK_PROTOCOL_MAX U8_MAX | 450 | #define SK_PROTOCOL_MAX U8_MAX |
452 | kmemcheck_bitfield_end(flags); | ||
453 | |||
454 | u16 sk_gso_max_segs; | 451 | u16 sk_gso_max_segs; |
455 | u8 sk_pacing_shift; | 452 | u8 sk_pacing_shift; |
456 | unsigned long sk_lingertime; | 453 | unsigned long sk_lingertime; |
@@ -1114,7 +1111,7 @@ struct proto { | |||
1114 | 1111 | ||
1115 | struct kmem_cache *slab; | 1112 | struct kmem_cache *slab; |
1116 | unsigned int obj_size; | 1113 | unsigned int obj_size; |
1117 | int slab_flags; | 1114 | slab_flags_t slab_flags; |
1118 | 1115 | ||
1119 | struct percpu_counter *orphan_count; | 1116 | struct percpu_counter *orphan_count; |
1120 | 1117 | ||
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 285feeadac39..eb57e3037deb 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h | |||
@@ -172,24 +172,21 @@ TRACE_EVENT(mm_page_free, | |||
172 | 172 | ||
173 | TRACE_EVENT(mm_page_free_batched, | 173 | TRACE_EVENT(mm_page_free_batched, |
174 | 174 | ||
175 | TP_PROTO(struct page *page, int cold), | 175 | TP_PROTO(struct page *page), |
176 | 176 | ||
177 | TP_ARGS(page, cold), | 177 | TP_ARGS(page), |
178 | 178 | ||
179 | TP_STRUCT__entry( | 179 | TP_STRUCT__entry( |
180 | __field( unsigned long, pfn ) | 180 | __field( unsigned long, pfn ) |
181 | __field( int, cold ) | ||
182 | ), | 181 | ), |
183 | 182 | ||
184 | TP_fast_assign( | 183 | TP_fast_assign( |
185 | __entry->pfn = page_to_pfn(page); | 184 | __entry->pfn = page_to_pfn(page); |
186 | __entry->cold = cold; | ||
187 | ), | 185 | ), |
188 | 186 | ||
189 | TP_printk("page=%p pfn=%lu order=0 cold=%d", | 187 | TP_printk("page=%p pfn=%lu order=0", |
190 | pfn_to_page(__entry->pfn), | 188 | pfn_to_page(__entry->pfn), |
191 | __entry->pfn, | 189 | __entry->pfn) |
192 | __entry->cold) | ||
193 | ); | 190 | ); |
194 | 191 | ||
195 | TRACE_EVENT(mm_page_alloc, | 192 | TRACE_EVENT(mm_page_alloc, |
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 648cbf603736..dbe1bb058c09 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h | |||
@@ -32,7 +32,6 @@ | |||
32 | {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ | 32 | {(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \ |
33 | {(unsigned long)__GFP_IO, "__GFP_IO"}, \ | 33 | {(unsigned long)__GFP_IO, "__GFP_IO"}, \ |
34 | {(unsigned long)__GFP_FS, "__GFP_FS"}, \ | 34 | {(unsigned long)__GFP_FS, "__GFP_FS"}, \ |
35 | {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ | ||
36 | {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ | 35 | {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ |
37 | {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ | 36 | {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ |
38 | {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ | 37 | {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ |
@@ -46,7 +45,6 @@ | |||
46 | {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ | 45 | {(unsigned long)__GFP_RECLAIMABLE, "__GFP_RECLAIMABLE"}, \ |
47 | {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ | 46 | {(unsigned long)__GFP_MOVABLE, "__GFP_MOVABLE"}, \ |
48 | {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ | 47 | {(unsigned long)__GFP_ACCOUNT, "__GFP_ACCOUNT"}, \ |
49 | {(unsigned long)__GFP_NOTRACK, "__GFP_NOTRACK"}, \ | ||
50 | {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ | 48 | {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ |
51 | {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ | 49 | {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ |
52 | {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ | 50 | {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ |
diff --git a/init/Kconfig b/init/Kconfig index 5327146db9b5..7d5a6fbac56a 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1655,12 +1655,6 @@ config HAVE_GENERIC_DMA_COHERENT | |||
1655 | bool | 1655 | bool |
1656 | default n | 1656 | default n |
1657 | 1657 | ||
1658 | config SLABINFO | ||
1659 | bool | ||
1660 | depends on PROC_FS | ||
1661 | depends on SLAB || SLUB_DEBUG | ||
1662 | default y | ||
1663 | |||
1664 | config RT_MUTEXES | 1658 | config RT_MUTEXES |
1665 | bool | 1659 | bool |
1666 | 1660 | ||
diff --git a/init/do_mounts.c b/init/do_mounts.c index f6d4dd764a52..7cf4f6dafd5f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c | |||
@@ -380,8 +380,7 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) | |||
380 | 380 | ||
381 | void __init mount_block_root(char *name, int flags) | 381 | void __init mount_block_root(char *name, int flags) |
382 | { | 382 | { |
383 | struct page *page = alloc_page(GFP_KERNEL | | 383 | struct page *page = alloc_page(GFP_KERNEL); |
384 | __GFP_NOTRACK_FALSE_POSITIVE); | ||
385 | char *fs_names = page_address(page); | 384 | char *fs_names = page_address(page); |
386 | char *p; | 385 | char *p; |
387 | #ifdef CONFIG_BLOCK | 386 | #ifdef CONFIG_BLOCK |
diff --git a/init/main.c b/init/main.c index 3bdd8da90f69..859a786f7c0a 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -70,7 +70,6 @@ | |||
70 | #include <linux/kgdb.h> | 70 | #include <linux/kgdb.h> |
71 | #include <linux/ftrace.h> | 71 | #include <linux/ftrace.h> |
72 | #include <linux/async.h> | 72 | #include <linux/async.h> |
73 | #include <linux/kmemcheck.h> | ||
74 | #include <linux/sfi.h> | 73 | #include <linux/sfi.h> |
75 | #include <linux/shmem_fs.h> | 74 | #include <linux/shmem_fs.h> |
76 | #include <linux/slab.h> | 75 | #include <linux/slab.h> |
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8a6c37762330..b9f8686a84cf 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -85,8 +85,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | |||
85 | if (fp == NULL) | 85 | if (fp == NULL) |
86 | return NULL; | 86 | return NULL; |
87 | 87 | ||
88 | kmemcheck_annotate_bitfield(fp, meta); | ||
89 | |||
90 | aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); | 88 | aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); |
91 | if (aux == NULL) { | 89 | if (aux == NULL) { |
92 | vfree(fp); | 90 | vfree(fp); |
@@ -127,8 +125,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, | |||
127 | if (fp == NULL) { | 125 | if (fp == NULL) { |
128 | __bpf_prog_uncharge(fp_old->aux->user, delta); | 126 | __bpf_prog_uncharge(fp_old->aux->user, delta); |
129 | } else { | 127 | } else { |
130 | kmemcheck_annotate_bitfield(fp, meta); | ||
131 | |||
132 | memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); | 128 | memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); |
133 | fp->pages = pages; | 129 | fp->pages = pages; |
134 | fp->aux->prog = fp; | 130 | fp->aux->prog = fp; |
@@ -675,8 +671,6 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, | |||
675 | 671 | ||
676 | fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); | 672 | fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); |
677 | if (fp != NULL) { | 673 | if (fp != NULL) { |
678 | kmemcheck_annotate_bitfield(fp, meta); | ||
679 | |||
680 | /* aux->prog still points to the fp_other one, so | 674 | /* aux->prog still points to the fp_other one, so |
681 | * when promoting the clone to the real program, | 675 | * when promoting the clone to the real program, |
682 | * this still needs to be adapted. | 676 | * this still needs to be adapted. |
diff --git a/kernel/fork.c b/kernel/fork.c index 07cc743698d3..4e55eedba8d6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -469,7 +469,7 @@ void __init fork_init(void) | |||
469 | /* create a slab on which task_structs can be allocated */ | 469 | /* create a slab on which task_structs can be allocated */ |
470 | task_struct_cachep = kmem_cache_create("task_struct", | 470 | task_struct_cachep = kmem_cache_create("task_struct", |
471 | arch_task_struct_size, align, | 471 | arch_task_struct_size, align, |
472 | SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); | 472 | SLAB_PANIC|SLAB_ACCOUNT, NULL); |
473 | #endif | 473 | #endif |
474 | 474 | ||
475 | /* do the arch specific task caches init */ | 475 | /* do the arch specific task caches init */ |
@@ -817,8 +817,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, | |||
817 | init_rwsem(&mm->mmap_sem); | 817 | init_rwsem(&mm->mmap_sem); |
818 | INIT_LIST_HEAD(&mm->mmlist); | 818 | INIT_LIST_HEAD(&mm->mmlist); |
819 | mm->core_state = NULL; | 819 | mm->core_state = NULL; |
820 | atomic_long_set(&mm->nr_ptes, 0); | 820 | mm_pgtables_bytes_init(mm); |
821 | mm_nr_pmds_init(mm); | ||
822 | mm->map_count = 0; | 821 | mm->map_count = 0; |
823 | mm->locked_vm = 0; | 822 | mm->locked_vm = 0; |
824 | mm->pinned_vm = 0; | 823 | mm->pinned_vm = 0; |
@@ -872,12 +871,9 @@ static void check_mm(struct mm_struct *mm) | |||
872 | "mm:%p idx:%d val:%ld\n", mm, i, x); | 871 | "mm:%p idx:%d val:%ld\n", mm, i, x); |
873 | } | 872 | } |
874 | 873 | ||
875 | if (atomic_long_read(&mm->nr_ptes)) | 874 | if (mm_pgtables_bytes(mm)) |
876 | pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n", | 875 | pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", |
877 | atomic_long_read(&mm->nr_ptes)); | 876 | mm_pgtables_bytes(mm)); |
878 | if (mm_nr_pmds(mm)) | ||
879 | pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n", | ||
880 | mm_nr_pmds(mm)); | ||
881 | 877 | ||
882 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS | 878 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS |
883 | VM_BUG_ON_MM(mm->pmd_huge_pte, mm); | 879 | VM_BUG_ON_MM(mm->pmd_huge_pte, mm); |
@@ -2209,18 +2205,18 @@ void __init proc_caches_init(void) | |||
2209 | sighand_cachep = kmem_cache_create("sighand_cache", | 2205 | sighand_cachep = kmem_cache_create("sighand_cache", |
2210 | sizeof(struct sighand_struct), 0, | 2206 | sizeof(struct sighand_struct), 0, |
2211 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| | 2207 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| |
2212 | SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); | 2208 | SLAB_ACCOUNT, sighand_ctor); |
2213 | signal_cachep = kmem_cache_create("signal_cache", | 2209 | signal_cachep = kmem_cache_create("signal_cache", |
2214 | sizeof(struct signal_struct), 0, | 2210 | sizeof(struct signal_struct), 0, |
2215 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, | 2211 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, |
2216 | NULL); | 2212 | NULL); |
2217 | files_cachep = kmem_cache_create("files_cache", | 2213 | files_cachep = kmem_cache_create("files_cache", |
2218 | sizeof(struct files_struct), 0, | 2214 | sizeof(struct files_struct), 0, |
2219 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, | 2215 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, |
2220 | NULL); | 2216 | NULL); |
2221 | fs_cachep = kmem_cache_create("fs_cache", | 2217 | fs_cachep = kmem_cache_create("fs_cache", |
2222 | sizeof(struct fs_struct), 0, | 2218 | sizeof(struct fs_struct), 0, |
2223 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, | 2219 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, |
2224 | NULL); | 2220 | NULL); |
2225 | /* | 2221 | /* |
2226 | * FIXME! The "sizeof(struct mm_struct)" currently includes the | 2222 | * FIXME! The "sizeof(struct mm_struct)" currently includes the |
@@ -2231,7 +2227,7 @@ void __init proc_caches_init(void) | |||
2231 | */ | 2227 | */ |
2232 | mm_cachep = kmem_cache_create("mm_struct", | 2228 | mm_cachep = kmem_cache_create("mm_struct", |
2233 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 2229 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
2234 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, | 2230 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, |
2235 | NULL); | 2231 | NULL); |
2236 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); | 2232 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); |
2237 | mmap_init(); | 2233 | mmap_init(); |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index db933d063bfc..9776da8db180 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include <linux/stringify.h> | 47 | #include <linux/stringify.h> |
48 | #include <linux/bitops.h> | 48 | #include <linux/bitops.h> |
49 | #include <linux/gfp.h> | 49 | #include <linux/gfp.h> |
50 | #include <linux/kmemcheck.h> | ||
51 | #include <linux/random.h> | 50 | #include <linux/random.h> |
52 | #include <linux/jhash.h> | 51 | #include <linux/jhash.h> |
53 | 52 | ||
@@ -3238,8 +3237,6 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
3238 | { | 3237 | { |
3239 | int i; | 3238 | int i; |
3240 | 3239 | ||
3241 | kmemcheck_mark_initialized(lock, sizeof(*lock)); | ||
3242 | |||
3243 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | 3240 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) |
3244 | lock->class_cache[i] = NULL; | 3241 | lock->class_cache[i] = NULL; |
3245 | 3242 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index a917a301e201..bce0464524d8 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1884,7 +1884,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) | |||
1884 | */ | 1884 | */ |
1885 | static inline int get_highmem_buffer(int safe_needed) | 1885 | static inline int get_highmem_buffer(int safe_needed) |
1886 | { | 1886 | { |
1887 | buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); | 1887 | buffer = get_image_page(GFP_ATOMIC, safe_needed); |
1888 | return buffer ? 0 : -ENOMEM; | 1888 | return buffer ? 0 : -ENOMEM; |
1889 | } | 1889 | } |
1890 | 1890 | ||
@@ -1945,7 +1945,7 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm, | |||
1945 | while (nr_pages-- > 0) { | 1945 | while (nr_pages-- > 0) { |
1946 | struct page *page; | 1946 | struct page *page; |
1947 | 1947 | ||
1948 | page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); | 1948 | page = alloc_image_page(GFP_ATOMIC); |
1949 | if (!page) | 1949 | if (!page) |
1950 | goto err_out; | 1950 | goto err_out; |
1951 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); | 1951 | memory_bm_set_bit(copy_bm, page_to_pfn(page)); |
diff --git a/kernel/signal.c b/kernel/signal.c index 8dcd8825b2de..aa1fb9f905db 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1036,8 +1036,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1036 | else | 1036 | else |
1037 | override_rlimit = 0; | 1037 | override_rlimit = 0; |
1038 | 1038 | ||
1039 | q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, | 1039 | q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit); |
1040 | override_rlimit); | ||
1041 | if (q) { | 1040 | if (q) { |
1042 | list_add_tail(&q->list, &pending->list); | 1041 | list_add_tail(&q->list, &pending->list); |
1043 | switch ((unsigned long) info) { | 1042 | switch ((unsigned long) info) { |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 662f7b1b7a78..2f5e87f1bae2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -486,16 +486,6 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
486 | } | 486 | } |
487 | EXPORT_SYMBOL(__tasklet_hi_schedule); | 487 | EXPORT_SYMBOL(__tasklet_hi_schedule); |
488 | 488 | ||
489 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
490 | { | ||
491 | lockdep_assert_irqs_disabled(); | ||
492 | |||
493 | t->next = __this_cpu_read(tasklet_hi_vec.head); | ||
494 | __this_cpu_write(tasklet_hi_vec.head, t); | ||
495 | __raise_softirq_irqoff(HI_SOFTIRQ); | ||
496 | } | ||
497 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | ||
498 | |||
499 | static __latent_entropy void tasklet_action(struct softirq_action *a) | 489 | static __latent_entropy void tasklet_action(struct softirq_action *a) |
500 | { | 490 | { |
501 | struct tasklet_struct *list; | 491 | struct tasklet_struct *list; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9576bd582d4a..4a13a389e99b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/proc_fs.h> | 30 | #include <linux/proc_fs.h> |
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/ctype.h> | 32 | #include <linux/ctype.h> |
33 | #include <linux/kmemcheck.h> | ||
34 | #include <linux/kmemleak.h> | 33 | #include <linux/kmemleak.h> |
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
@@ -1174,15 +1173,6 @@ static struct ctl_table kern_table[] = { | |||
1174 | .extra2 = &one_thousand, | 1173 | .extra2 = &one_thousand, |
1175 | }, | 1174 | }, |
1176 | #endif | 1175 | #endif |
1177 | #ifdef CONFIG_KMEMCHECK | ||
1178 | { | ||
1179 | .procname = "kmemcheck", | ||
1180 | .data = &kmemcheck_enabled, | ||
1181 | .maxlen = sizeof(int), | ||
1182 | .mode = 0644, | ||
1183 | .proc_handler = proc_dointvec, | ||
1184 | }, | ||
1185 | #endif | ||
1186 | { | 1176 | { |
1187 | .procname = "panic_on_warn", | 1177 | .procname = "panic_on_warn", |
1188 | .data = &panic_on_warn, | 1178 | .data = &panic_on_warn, |
@@ -1366,6 +1356,15 @@ static struct ctl_table vm_table[] = { | |||
1366 | .mode = 0644, | 1356 | .mode = 0644, |
1367 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, | 1357 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, |
1368 | }, | 1358 | }, |
1359 | { | ||
1360 | .procname = "numa_stat", | ||
1361 | .data = &sysctl_vm_numa_stat, | ||
1362 | .maxlen = sizeof(int), | ||
1363 | .mode = 0644, | ||
1364 | .proc_handler = sysctl_vm_numa_stat_handler, | ||
1365 | .extra1 = &zero, | ||
1366 | .extra2 = &one, | ||
1367 | }, | ||
1369 | #endif | 1368 | #endif |
1370 | { | 1369 | { |
1371 | .procname = "hugetlb_shm_group", | 1370 | .procname = "hugetlb_shm_group", |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 845f3805c73d..d57fede84b38 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/uaccess.h> | 13 | #include <linux/uaccess.h> |
14 | #include <linux/hardirq.h> | 14 | #include <linux/hardirq.h> |
15 | #include <linux/kthread.h> /* for self test */ | 15 | #include <linux/kthread.h> /* for self test */ |
16 | #include <linux/kmemcheck.h> | ||
17 | #include <linux/module.h> | 16 | #include <linux/module.h> |
18 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
19 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
@@ -2055,7 +2054,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
2055 | } | 2054 | } |
2056 | 2055 | ||
2057 | event = __rb_page_index(tail_page, tail); | 2056 | event = __rb_page_index(tail_page, tail); |
2058 | kmemcheck_annotate_bitfield(event, bitfield); | ||
2059 | 2057 | ||
2060 | /* account for padding bytes */ | 2058 | /* account for padding bytes */ |
2061 | local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); | 2059 | local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); |
@@ -2686,7 +2684,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
2686 | /* We reserved something on the buffer */ | 2684 | /* We reserved something on the buffer */ |
2687 | 2685 | ||
2688 | event = __rb_page_index(tail_page, tail); | 2686 | event = __rb_page_index(tail_page, tail); |
2689 | kmemcheck_annotate_bitfield(event, bitfield); | ||
2690 | rb_update_event(cpu_buffer, event, info); | 2687 | rb_update_event(cpu_buffer, event, info); |
2691 | 2688 | ||
2692 | local_inc(&tail_page->entries); | 2689 | local_inc(&tail_page->entries); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 07ce7449765a..5402e3954659 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -504,7 +504,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT | |||
504 | 504 | ||
505 | config DEBUG_SLAB | 505 | config DEBUG_SLAB |
506 | bool "Debug slab memory allocations" | 506 | bool "Debug slab memory allocations" |
507 | depends on DEBUG_KERNEL && SLAB && !KMEMCHECK | 507 | depends on DEBUG_KERNEL && SLAB |
508 | help | 508 | help |
509 | Say Y here to have the kernel do limited verification on memory | 509 | Say Y here to have the kernel do limited verification on memory |
510 | allocation as well as poisoning memory on free to catch use of freed | 510 | allocation as well as poisoning memory on free to catch use of freed |
@@ -516,7 +516,7 @@ config DEBUG_SLAB_LEAK | |||
516 | 516 | ||
517 | config SLUB_DEBUG_ON | 517 | config SLUB_DEBUG_ON |
518 | bool "SLUB debugging on by default" | 518 | bool "SLUB debugging on by default" |
519 | depends on SLUB && SLUB_DEBUG && !KMEMCHECK | 519 | depends on SLUB && SLUB_DEBUG |
520 | default n | 520 | default n |
521 | help | 521 | help |
522 | Boot with debugging on by default. SLUB boots by default with | 522 | Boot with debugging on by default. SLUB boots by default with |
@@ -730,8 +730,6 @@ config DEBUG_STACKOVERFLOW | |||
730 | 730 | ||
731 | If in doubt, say "N". | 731 | If in doubt, say "N". |
732 | 732 | ||
733 | source "lib/Kconfig.kmemcheck" | ||
734 | |||
735 | source "lib/Kconfig.kasan" | 733 | source "lib/Kconfig.kasan" |
736 | 734 | ||
737 | endmenu # "Memory Debugging" | 735 | endmenu # "Memory Debugging" |
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck deleted file mode 100644 index 846e039a86b4..000000000000 --- a/lib/Kconfig.kmemcheck +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | config HAVE_ARCH_KMEMCHECK | ||
2 | bool | ||
3 | |||
4 | if HAVE_ARCH_KMEMCHECK | ||
5 | |||
6 | menuconfig KMEMCHECK | ||
7 | bool "kmemcheck: trap use of uninitialized memory" | ||
8 | depends on DEBUG_KERNEL | ||
9 | depends on !X86_USE_3DNOW | ||
10 | depends on SLUB || SLAB | ||
11 | depends on !CC_OPTIMIZE_FOR_SIZE | ||
12 | depends on !FUNCTION_TRACER | ||
13 | select FRAME_POINTER | ||
14 | select STACKTRACE | ||
15 | default n | ||
16 | help | ||
17 | This option enables tracing of dynamically allocated kernel memory | ||
18 | to see if memory is used before it has been given an initial value. | ||
19 | Be aware that this requires half of your memory for bookkeeping and | ||
20 | will insert extra code at *every* read and write to tracked memory | ||
21 | thus slow down the kernel code (but user code is unaffected). | ||
22 | |||
23 | The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable | ||
24 | or enable kmemcheck at boot-time. If the kernel is started with | ||
25 | kmemcheck=0, the large memory and CPU overhead is not incurred. | ||
26 | |||
27 | choice | ||
28 | prompt "kmemcheck: default mode at boot" | ||
29 | depends on KMEMCHECK | ||
30 | default KMEMCHECK_ONESHOT_BY_DEFAULT | ||
31 | help | ||
32 | This option controls the default behaviour of kmemcheck when the | ||
33 | kernel boots and no kmemcheck= parameter is given. | ||
34 | |||
35 | config KMEMCHECK_DISABLED_BY_DEFAULT | ||
36 | bool "disabled" | ||
37 | depends on KMEMCHECK | ||
38 | |||
39 | config KMEMCHECK_ENABLED_BY_DEFAULT | ||
40 | bool "enabled" | ||
41 | depends on KMEMCHECK | ||
42 | |||
43 | config KMEMCHECK_ONESHOT_BY_DEFAULT | ||
44 | bool "one-shot" | ||
45 | depends on KMEMCHECK | ||
46 | help | ||
47 | In one-shot mode, only the first error detected is reported before | ||
48 | kmemcheck is disabled. | ||
49 | |||
50 | endchoice | ||
51 | |||
52 | config KMEMCHECK_QUEUE_SIZE | ||
53 | int "kmemcheck: error queue size" | ||
54 | depends on KMEMCHECK | ||
55 | default 64 | ||
56 | help | ||
57 | Select the maximum number of errors to store in the queue. Since | ||
58 | errors can occur virtually anywhere and in any context, we need a | ||
59 | temporary storage area which is guarantueed not to generate any | ||
60 | other faults. The queue will be emptied as soon as a tasklet may | ||
61 | be scheduled. If the queue is full, new error reports will be | ||
62 | lost. | ||
63 | |||
64 | config KMEMCHECK_SHADOW_COPY_SHIFT | ||
65 | int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" | ||
66 | depends on KMEMCHECK | ||
67 | range 2 8 | ||
68 | default 5 | ||
69 | help | ||
70 | Select the number of shadow bytes to save along with each entry of | ||
71 | the queue. These bytes indicate what parts of an allocation are | ||
72 | initialized, uninitialized, etc. and will be displayed when an | ||
73 | error is detected to help the debugging of a particular problem. | ||
74 | |||
75 | config KMEMCHECK_PARTIAL_OK | ||
76 | bool "kmemcheck: allow partially uninitialized memory" | ||
77 | depends on KMEMCHECK | ||
78 | default y | ||
79 | help | ||
80 | This option works around certain GCC optimizations that produce | ||
81 | 32-bit reads from 16-bit variables where the upper 16 bits are | ||
82 | thrown away afterwards. This may of course also hide some real | ||
83 | bugs. | ||
84 | |||
85 | config KMEMCHECK_BITOPS_OK | ||
86 | bool "kmemcheck: allow bit-field manipulation" | ||
87 | depends on KMEMCHECK | ||
88 | default n | ||
89 | help | ||
90 | This option silences warnings that would be generated for bit-field | ||
91 | accesses where not all the bits are initialized at the same time. | ||
92 | This may also hide some real bugs. | ||
93 | |||
94 | endif | ||
@@ -171,7 +171,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id) | |||
171 | if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) | 171 | if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) |
172 | return ERR_PTR(-ENOENT); | 172 | return ERR_PTR(-ENOENT); |
173 | 173 | ||
174 | __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL); | 174 | __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL); |
175 | 175 | ||
176 | return entry; | 176 | return entry; |
177 | } | 177 | } |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8b1feca1230a..c8d55565fafa 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -677,8 +677,7 @@ out: | |||
677 | * @root radix tree root | 677 | * @root radix tree root |
678 | */ | 678 | */ |
679 | static inline bool radix_tree_shrink(struct radix_tree_root *root, | 679 | static inline bool radix_tree_shrink(struct radix_tree_root *root, |
680 | radix_tree_update_node_t update_node, | 680 | radix_tree_update_node_t update_node) |
681 | void *private) | ||
682 | { | 681 | { |
683 | bool shrunk = false; | 682 | bool shrunk = false; |
684 | 683 | ||
@@ -739,7 +738,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, | |||
739 | if (!radix_tree_is_internal_node(child)) { | 738 | if (!radix_tree_is_internal_node(child)) { |
740 | node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; | 739 | node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; |
741 | if (update_node) | 740 | if (update_node) |
742 | update_node(node, private); | 741 | update_node(node); |
743 | } | 742 | } |
744 | 743 | ||
745 | WARN_ON_ONCE(!list_empty(&node->private_list)); | 744 | WARN_ON_ONCE(!list_empty(&node->private_list)); |
@@ -752,7 +751,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, | |||
752 | 751 | ||
753 | static bool delete_node(struct radix_tree_root *root, | 752 | static bool delete_node(struct radix_tree_root *root, |
754 | struct radix_tree_node *node, | 753 | struct radix_tree_node *node, |
755 | radix_tree_update_node_t update_node, void *private) | 754 | radix_tree_update_node_t update_node) |
756 | { | 755 | { |
757 | bool deleted = false; | 756 | bool deleted = false; |
758 | 757 | ||
@@ -762,8 +761,8 @@ static bool delete_node(struct radix_tree_root *root, | |||
762 | if (node->count) { | 761 | if (node->count) { |
763 | if (node_to_entry(node) == | 762 | if (node_to_entry(node) == |
764 | rcu_dereference_raw(root->rnode)) | 763 | rcu_dereference_raw(root->rnode)) |
765 | deleted |= radix_tree_shrink(root, update_node, | 764 | deleted |= radix_tree_shrink(root, |
766 | private); | 765 | update_node); |
767 | return deleted; | 766 | return deleted; |
768 | } | 767 | } |
769 | 768 | ||
@@ -1173,7 +1172,6 @@ static int calculate_count(struct radix_tree_root *root, | |||
1173 | * @slot: pointer to slot in @node | 1172 | * @slot: pointer to slot in @node |
1174 | * @item: new item to store in the slot. | 1173 | * @item: new item to store in the slot. |
1175 | * @update_node: callback for changing leaf nodes | 1174 | * @update_node: callback for changing leaf nodes |
1176 | * @private: private data to pass to @update_node | ||
1177 | * | 1175 | * |
1178 | * For use with __radix_tree_lookup(). Caller must hold tree write locked | 1176 | * For use with __radix_tree_lookup(). Caller must hold tree write locked |
1179 | * across slot lookup and replacement. | 1177 | * across slot lookup and replacement. |
@@ -1181,7 +1179,7 @@ static int calculate_count(struct radix_tree_root *root, | |||
1181 | void __radix_tree_replace(struct radix_tree_root *root, | 1179 | void __radix_tree_replace(struct radix_tree_root *root, |
1182 | struct radix_tree_node *node, | 1180 | struct radix_tree_node *node, |
1183 | void __rcu **slot, void *item, | 1181 | void __rcu **slot, void *item, |
1184 | radix_tree_update_node_t update_node, void *private) | 1182 | radix_tree_update_node_t update_node) |
1185 | { | 1183 | { |
1186 | void *old = rcu_dereference_raw(*slot); | 1184 | void *old = rcu_dereference_raw(*slot); |
1187 | int exceptional = !!radix_tree_exceptional_entry(item) - | 1185 | int exceptional = !!radix_tree_exceptional_entry(item) - |
@@ -1201,9 +1199,9 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
1201 | return; | 1199 | return; |
1202 | 1200 | ||
1203 | if (update_node) | 1201 | if (update_node) |
1204 | update_node(node, private); | 1202 | update_node(node); |
1205 | 1203 | ||
1206 | delete_node(root, node, update_node, private); | 1204 | delete_node(root, node, update_node); |
1207 | } | 1205 | } |
1208 | 1206 | ||
1209 | /** | 1207 | /** |
@@ -1225,7 +1223,7 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
1225 | void radix_tree_replace_slot(struct radix_tree_root *root, | 1223 | void radix_tree_replace_slot(struct radix_tree_root *root, |
1226 | void __rcu **slot, void *item) | 1224 | void __rcu **slot, void *item) |
1227 | { | 1225 | { |
1228 | __radix_tree_replace(root, NULL, slot, item, NULL, NULL); | 1226 | __radix_tree_replace(root, NULL, slot, item, NULL); |
1229 | } | 1227 | } |
1230 | EXPORT_SYMBOL(radix_tree_replace_slot); | 1228 | EXPORT_SYMBOL(radix_tree_replace_slot); |
1231 | 1229 | ||
@@ -1242,7 +1240,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root, | |||
1242 | const struct radix_tree_iter *iter, | 1240 | const struct radix_tree_iter *iter, |
1243 | void __rcu **slot, void *item) | 1241 | void __rcu **slot, void *item) |
1244 | { | 1242 | { |
1245 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); | 1243 | __radix_tree_replace(root, iter->node, slot, item, NULL); |
1246 | } | 1244 | } |
1247 | 1245 | ||
1248 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 1246 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
@@ -1972,7 +1970,6 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); | |||
1972 | * @root: radix tree root | 1970 | * @root: radix tree root |
1973 | * @node: node containing @index | 1971 | * @node: node containing @index |
1974 | * @update_node: callback for changing leaf nodes | 1972 | * @update_node: callback for changing leaf nodes |
1975 | * @private: private data to pass to @update_node | ||
1976 | * | 1973 | * |
1977 | * After clearing the slot at @index in @node from radix tree | 1974 | * After clearing the slot at @index in @node from radix tree |
1978 | * rooted at @root, call this function to attempt freeing the | 1975 | * rooted at @root, call this function to attempt freeing the |
@@ -1980,10 +1977,9 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); | |||
1980 | */ | 1977 | */ |
1981 | void __radix_tree_delete_node(struct radix_tree_root *root, | 1978 | void __radix_tree_delete_node(struct radix_tree_root *root, |
1982 | struct radix_tree_node *node, | 1979 | struct radix_tree_node *node, |
1983 | radix_tree_update_node_t update_node, | 1980 | radix_tree_update_node_t update_node) |
1984 | void *private) | ||
1985 | { | 1981 | { |
1986 | delete_node(root, node, update_node, private); | 1982 | delete_node(root, node, update_node); |
1987 | } | 1983 | } |
1988 | 1984 | ||
1989 | static bool __radix_tree_delete(struct radix_tree_root *root, | 1985 | static bool __radix_tree_delete(struct radix_tree_root *root, |
@@ -2001,7 +1997,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, | |||
2001 | node_tag_clear(root, node, tag, offset); | 1997 | node_tag_clear(root, node, tag, offset); |
2002 | 1998 | ||
2003 | replace_slot(slot, NULL, node, -1, exceptional); | 1999 | replace_slot(slot, NULL, node, -1, exceptional); |
2004 | return node && delete_node(root, node, NULL, NULL); | 2000 | return node && delete_node(root, node, NULL); |
2005 | } | 2001 | } |
2006 | 2002 | ||
2007 | /** | 2003 | /** |
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 5b0adf1435de..e5e606ee5f71 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
@@ -11,7 +11,6 @@ config DEBUG_PAGEALLOC | |||
11 | bool "Debug page memory allocations" | 11 | bool "Debug page memory allocations" |
12 | depends on DEBUG_KERNEL | 12 | depends on DEBUG_KERNEL |
13 | depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC | 13 | depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC |
14 | depends on !KMEMCHECK | ||
15 | select PAGE_EXTENSION | 14 | select PAGE_EXTENSION |
16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC | 15 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC |
17 | ---help--- | 16 | ---help--- |
diff --git a/mm/Makefile b/mm/Makefile index 4659b93cba43..e7ebd176fb93 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -17,7 +17,6 @@ KCOV_INSTRUMENT_slub.o := n | |||
17 | KCOV_INSTRUMENT_page_alloc.o := n | 17 | KCOV_INSTRUMENT_page_alloc.o := n |
18 | KCOV_INSTRUMENT_debug-pagealloc.o := n | 18 | KCOV_INSTRUMENT_debug-pagealloc.o := n |
19 | KCOV_INSTRUMENT_kmemleak.o := n | 19 | KCOV_INSTRUMENT_kmemleak.o := n |
20 | KCOV_INSTRUMENT_kmemcheck.o := n | ||
21 | KCOV_INSTRUMENT_memcontrol.o := n | 20 | KCOV_INSTRUMENT_memcontrol.o := n |
22 | KCOV_INSTRUMENT_mmzone.o := n | 21 | KCOV_INSTRUMENT_mmzone.o := n |
23 | KCOV_INSTRUMENT_vmstat.o := n | 22 | KCOV_INSTRUMENT_vmstat.o := n |
@@ -70,7 +69,6 @@ obj-$(CONFIG_KSM) += ksm.o | |||
70 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o | 69 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o |
71 | obj-$(CONFIG_SLAB) += slab.o | 70 | obj-$(CONFIG_SLAB) += slab.o |
72 | obj-$(CONFIG_SLUB) += slub.o | 71 | obj-$(CONFIG_SLUB) += slub.o |
73 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | ||
74 | obj-$(CONFIG_KASAN) += kasan/ | 72 | obj-$(CONFIG_KASAN) += kasan/ |
75 | obj-$(CONFIG_FAILSLAB) += failslab.o | 73 | obj-$(CONFIG_FAILSLAB) += failslab.o |
76 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 74 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
@@ -461,7 +461,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, | |||
461 | trace_cma_alloc(pfn, page, count, align); | 461 | trace_cma_alloc(pfn, page, count, align); |
462 | 462 | ||
463 | if (ret && !(gfp_mask & __GFP_NOWARN)) { | 463 | if (ret && !(gfp_mask & __GFP_NOWARN)) { |
464 | pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", | 464 | pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n", |
465 | __func__, count, ret); | 465 | __func__, count, ret); |
466 | cma_debug_show_areas(cma); | 466 | cma_debug_show_areas(cma); |
467 | } | 467 | } |
diff --git a/mm/debug.c b/mm/debug.c index 6726bec731c9..d947f3e03b0d 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -105,7 +105,7 @@ void dump_mm(const struct mm_struct *mm) | |||
105 | "get_unmapped_area %p\n" | 105 | "get_unmapped_area %p\n" |
106 | #endif | 106 | #endif |
107 | "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" | 107 | "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" |
108 | "pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n" | 108 | "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" |
109 | "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" | 109 | "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" |
110 | "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" | 110 | "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" |
111 | "start_code %lx end_code %lx start_data %lx end_data %lx\n" | 111 | "start_code %lx end_code %lx start_data %lx end_data %lx\n" |
@@ -135,8 +135,7 @@ void dump_mm(const struct mm_struct *mm) | |||
135 | mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end, | 135 | mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end, |
136 | mm->pgd, atomic_read(&mm->mm_users), | 136 | mm->pgd, atomic_read(&mm->mm_users), |
137 | atomic_read(&mm->mm_count), | 137 | atomic_read(&mm->mm_count), |
138 | atomic_long_read((atomic_long_t *)&mm->nr_ptes), | 138 | mm_pgtables_bytes(mm), |
139 | mm_nr_pmds((struct mm_struct *)mm), | ||
140 | mm->map_count, | 139 | mm->map_count, |
141 | mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, | 140 | mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, |
142 | mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm, | 141 | mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm, |
diff --git a/mm/filemap.c b/mm/filemap.c index 594d73fef8b4..923fc2ebd74a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/hugetlb.h> | 35 | #include <linux/hugetlb.h> |
36 | #include <linux/memcontrol.h> | 36 | #include <linux/memcontrol.h> |
37 | #include <linux/cleancache.h> | 37 | #include <linux/cleancache.h> |
38 | #include <linux/shmem_fs.h> | ||
38 | #include <linux/rmap.h> | 39 | #include <linux/rmap.h> |
39 | #include "internal.h" | 40 | #include "internal.h" |
40 | 41 | ||
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping, | |||
134 | *shadowp = p; | 135 | *shadowp = p; |
135 | } | 136 | } |
136 | __radix_tree_replace(&mapping->page_tree, node, slot, page, | 137 | __radix_tree_replace(&mapping->page_tree, node, slot, page, |
137 | workingset_update_node, mapping); | 138 | workingset_lookup_update(mapping)); |
138 | mapping->nrpages++; | 139 | mapping->nrpages++; |
139 | return 0; | 140 | return 0; |
140 | } | 141 | } |
@@ -162,9 +163,12 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
162 | 163 | ||
163 | radix_tree_clear_tags(&mapping->page_tree, node, slot); | 164 | radix_tree_clear_tags(&mapping->page_tree, node, slot); |
164 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, | 165 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, |
165 | workingset_update_node, mapping); | 166 | workingset_lookup_update(mapping)); |
166 | } | 167 | } |
167 | 168 | ||
169 | page->mapping = NULL; | ||
170 | /* Leave page->index set: truncation lookup relies upon it */ | ||
171 | |||
168 | if (shadow) { | 172 | if (shadow) { |
169 | mapping->nrexceptional += nr; | 173 | mapping->nrexceptional += nr; |
170 | /* | 174 | /* |
@@ -178,17 +182,11 @@ static void page_cache_tree_delete(struct address_space *mapping, | |||
178 | mapping->nrpages -= nr; | 182 | mapping->nrpages -= nr; |
179 | } | 183 | } |
180 | 184 | ||
181 | /* | 185 | static void unaccount_page_cache_page(struct address_space *mapping, |
182 | * Delete a page from the page cache and free it. Caller has to make | 186 | struct page *page) |
183 | * sure the page is locked and that nobody else uses it - or that usage | ||
184 | * is safe. The caller must hold the mapping's tree_lock. | ||
185 | */ | ||
186 | void __delete_from_page_cache(struct page *page, void *shadow) | ||
187 | { | 187 | { |
188 | struct address_space *mapping = page->mapping; | 188 | int nr; |
189 | int nr = hpage_nr_pages(page); | ||
190 | 189 | ||
191 | trace_mm_filemap_delete_from_page_cache(page); | ||
192 | /* | 190 | /* |
193 | * if we're uptodate, flush out into the cleancache, otherwise | 191 | * if we're uptodate, flush out into the cleancache, otherwise |
194 | * invalidate any existing cleancache entries. We can't leave | 192 | * invalidate any existing cleancache entries. We can't leave |
@@ -224,15 +222,12 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
224 | } | 222 | } |
225 | } | 223 | } |
226 | 224 | ||
227 | page_cache_tree_delete(mapping, page, shadow); | ||
228 | |||
229 | page->mapping = NULL; | ||
230 | /* Leave page->index set: truncation lookup relies upon it */ | ||
231 | |||
232 | /* hugetlb pages do not participate in page cache accounting. */ | 225 | /* hugetlb pages do not participate in page cache accounting. */ |
233 | if (PageHuge(page)) | 226 | if (PageHuge(page)) |
234 | return; | 227 | return; |
235 | 228 | ||
229 | nr = hpage_nr_pages(page); | ||
230 | |||
236 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); | 231 | __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); |
237 | if (PageSwapBacked(page)) { | 232 | if (PageSwapBacked(page)) { |
238 | __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); | 233 | __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); |
@@ -243,17 +238,51 @@ void __delete_from_page_cache(struct page *page, void *shadow) | |||
243 | } | 238 | } |
244 | 239 | ||
245 | /* | 240 | /* |
246 | * At this point page must be either written or cleaned by truncate. | 241 | * At this point page must be either written or cleaned by |
247 | * Dirty page here signals a bug and loss of unwritten data. | 242 | * truncate. Dirty page here signals a bug and loss of |
243 | * unwritten data. | ||
248 | * | 244 | * |
249 | * This fixes dirty accounting after removing the page entirely but | 245 | * This fixes dirty accounting after removing the page entirely |
250 | * leaves PageDirty set: it has no effect for truncated page and | 246 | * but leaves PageDirty set: it has no effect for truncated |
251 | * anyway will be cleared before returning page into buddy allocator. | 247 | * page and anyway will be cleared before returning page into |
248 | * buddy allocator. | ||
252 | */ | 249 | */ |
253 | if (WARN_ON_ONCE(PageDirty(page))) | 250 | if (WARN_ON_ONCE(PageDirty(page))) |
254 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); | 251 | account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); |
255 | } | 252 | } |
256 | 253 | ||
254 | /* | ||
255 | * Delete a page from the page cache and free it. Caller has to make | ||
256 | * sure the page is locked and that nobody else uses it - or that usage | ||
257 | * is safe. The caller must hold the mapping's tree_lock. | ||
258 | */ | ||
259 | void __delete_from_page_cache(struct page *page, void *shadow) | ||
260 | { | ||
261 | struct address_space *mapping = page->mapping; | ||
262 | |||
263 | trace_mm_filemap_delete_from_page_cache(page); | ||
264 | |||
265 | unaccount_page_cache_page(mapping, page); | ||
266 | page_cache_tree_delete(mapping, page, shadow); | ||
267 | } | ||
268 | |||
269 | static void page_cache_free_page(struct address_space *mapping, | ||
270 | struct page *page) | ||
271 | { | ||
272 | void (*freepage)(struct page *); | ||
273 | |||
274 | freepage = mapping->a_ops->freepage; | ||
275 | if (freepage) | ||
276 | freepage(page); | ||
277 | |||
278 | if (PageTransHuge(page) && !PageHuge(page)) { | ||
279 | page_ref_sub(page, HPAGE_PMD_NR); | ||
280 | VM_BUG_ON_PAGE(page_count(page) <= 0, page); | ||
281 | } else { | ||
282 | put_page(page); | ||
283 | } | ||
284 | } | ||
285 | |||
257 | /** | 286 | /** |
258 | * delete_from_page_cache - delete page from page cache | 287 | * delete_from_page_cache - delete page from page cache |
259 | * @page: the page which the kernel is trying to remove from page cache | 288 | * @page: the page which the kernel is trying to remove from page cache |
@@ -266,27 +295,98 @@ void delete_from_page_cache(struct page *page) | |||
266 | { | 295 | { |
267 | struct address_space *mapping = page_mapping(page); | 296 | struct address_space *mapping = page_mapping(page); |
268 | unsigned long flags; | 297 | unsigned long flags; |
269 | void (*freepage)(struct page *); | ||
270 | 298 | ||
271 | BUG_ON(!PageLocked(page)); | 299 | BUG_ON(!PageLocked(page)); |
272 | |||
273 | freepage = mapping->a_ops->freepage; | ||
274 | |||
275 | spin_lock_irqsave(&mapping->tree_lock, flags); | 300 | spin_lock_irqsave(&mapping->tree_lock, flags); |
276 | __delete_from_page_cache(page, NULL); | 301 | __delete_from_page_cache(page, NULL); |
277 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 302 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
278 | 303 | ||
279 | if (freepage) | 304 | page_cache_free_page(mapping, page); |
280 | freepage(page); | 305 | } |
306 | EXPORT_SYMBOL(delete_from_page_cache); | ||
281 | 307 | ||
282 | if (PageTransHuge(page) && !PageHuge(page)) { | 308 | /* |
283 | page_ref_sub(page, HPAGE_PMD_NR); | 309 | * page_cache_tree_delete_batch - delete several pages from page cache |
284 | VM_BUG_ON_PAGE(page_count(page) <= 0, page); | 310 | * @mapping: the mapping to which pages belong |
285 | } else { | 311 | * @pvec: pagevec with pages to delete |
286 | put_page(page); | 312 | * |
313 | * The function walks over mapping->page_tree and removes pages passed in @pvec | ||
314 | * from the radix tree. The function expects @pvec to be sorted by page index. | ||
315 | * It tolerates holes in @pvec (radix tree entries at those indices are not | ||
316 | * modified). The function expects only THP head pages to be present in the | ||
317 | * @pvec and takes care to delete all corresponding tail pages from the radix | ||
318 | * tree as well. | ||
319 | * | ||
320 | * The function expects mapping->tree_lock to be held. | ||
321 | */ | ||
322 | static void | ||
323 | page_cache_tree_delete_batch(struct address_space *mapping, | ||
324 | struct pagevec *pvec) | ||
325 | { | ||
326 | struct radix_tree_iter iter; | ||
327 | void **slot; | ||
328 | int total_pages = 0; | ||
329 | int i = 0, tail_pages = 0; | ||
330 | struct page *page; | ||
331 | pgoff_t start; | ||
332 | |||
333 | start = pvec->pages[0]->index; | ||
334 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | ||
335 | if (i >= pagevec_count(pvec) && !tail_pages) | ||
336 | break; | ||
337 | page = radix_tree_deref_slot_protected(slot, | ||
338 | &mapping->tree_lock); | ||
339 | if (radix_tree_exceptional_entry(page)) | ||
340 | continue; | ||
341 | if (!tail_pages) { | ||
342 | /* | ||
343 | * Some page got inserted in our range? Skip it. We | ||
344 | * have our pages locked so they are protected from | ||
345 | * being removed. | ||
346 | */ | ||
347 | if (page != pvec->pages[i]) | ||
348 | continue; | ||
349 | WARN_ON_ONCE(!PageLocked(page)); | ||
350 | if (PageTransHuge(page) && !PageHuge(page)) | ||
351 | tail_pages = HPAGE_PMD_NR - 1; | ||
352 | page->mapping = NULL; | ||
353 | /* | ||
354 | * Leave page->index set: truncation lookup relies | ||
355 | * upon it | ||
356 | */ | ||
357 | i++; | ||
358 | } else { | ||
359 | tail_pages--; | ||
360 | } | ||
361 | radix_tree_clear_tags(&mapping->page_tree, iter.node, slot); | ||
362 | __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL, | ||
363 | workingset_lookup_update(mapping)); | ||
364 | total_pages++; | ||
287 | } | 365 | } |
366 | mapping->nrpages -= total_pages; | ||
367 | } | ||
368 | |||
369 | void delete_from_page_cache_batch(struct address_space *mapping, | ||
370 | struct pagevec *pvec) | ||
371 | { | ||
372 | int i; | ||
373 | unsigned long flags; | ||
374 | |||
375 | if (!pagevec_count(pvec)) | ||
376 | return; | ||
377 | |||
378 | spin_lock_irqsave(&mapping->tree_lock, flags); | ||
379 | for (i = 0; i < pagevec_count(pvec); i++) { | ||
380 | trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); | ||
381 | |||
382 | unaccount_page_cache_page(mapping, pvec->pages[i]); | ||
383 | } | ||
384 | page_cache_tree_delete_batch(mapping, pvec); | ||
385 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | ||
386 | |||
387 | for (i = 0; i < pagevec_count(pvec); i++) | ||
388 | page_cache_free_page(mapping, pvec->pages[i]); | ||
288 | } | 389 | } |
289 | EXPORT_SYMBOL(delete_from_page_cache); | ||
290 | 390 | ||
291 | int filemap_check_errors(struct address_space *mapping) | 391 | int filemap_check_errors(struct address_space *mapping) |
292 | { | 392 | { |
@@ -419,20 +519,18 @@ static void __filemap_fdatawait_range(struct address_space *mapping, | |||
419 | if (end_byte < start_byte) | 519 | if (end_byte < start_byte) |
420 | return; | 520 | return; |
421 | 521 | ||
422 | pagevec_init(&pvec, 0); | 522 | pagevec_init(&pvec); |
423 | while ((index <= end) && | 523 | while (index <= end) { |
424 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
425 | PAGECACHE_TAG_WRITEBACK, | ||
426 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
427 | unsigned i; | 524 | unsigned i; |
428 | 525 | ||
526 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, | ||
527 | end, PAGECACHE_TAG_WRITEBACK); | ||
528 | if (!nr_pages) | ||
529 | break; | ||
530 | |||
429 | for (i = 0; i < nr_pages; i++) { | 531 | for (i = 0; i < nr_pages; i++) { |
430 | struct page *page = pvec.pages[i]; | 532 | struct page *page = pvec.pages[i]; |
431 | 533 | ||
432 | /* until radix tree lookup accepts end_index */ | ||
433 | if (page->index > end) | ||
434 | continue; | ||
435 | |||
436 | wait_on_page_writeback(page); | 534 | wait_on_page_writeback(page); |
437 | ClearPageError(page); | 535 | ClearPageError(page); |
438 | } | 536 | } |
@@ -1754,9 +1852,10 @@ repeat: | |||
1754 | EXPORT_SYMBOL(find_get_pages_contig); | 1852 | EXPORT_SYMBOL(find_get_pages_contig); |
1755 | 1853 | ||
1756 | /** | 1854 | /** |
1757 | * find_get_pages_tag - find and return pages that match @tag | 1855 | * find_get_pages_range_tag - find and return pages in given range matching @tag |
1758 | * @mapping: the address_space to search | 1856 | * @mapping: the address_space to search |
1759 | * @index: the starting page index | 1857 | * @index: the starting page index |
1858 | * @end: The final page index (inclusive) | ||
1760 | * @tag: the tag index | 1859 | * @tag: the tag index |
1761 | * @nr_pages: the maximum number of pages | 1860 | * @nr_pages: the maximum number of pages |
1762 | * @pages: where the resulting pages are placed | 1861 | * @pages: where the resulting pages are placed |
@@ -1764,8 +1863,9 @@ EXPORT_SYMBOL(find_get_pages_contig); | |||
1764 | * Like find_get_pages, except we only return pages which are tagged with | 1863 | * Like find_get_pages, except we only return pages which are tagged with |
1765 | * @tag. We update @index to index the next page for the traversal. | 1864 | * @tag. We update @index to index the next page for the traversal. |
1766 | */ | 1865 | */ |
1767 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 1866 | unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, |
1768 | int tag, unsigned int nr_pages, struct page **pages) | 1867 | pgoff_t end, int tag, unsigned int nr_pages, |
1868 | struct page **pages) | ||
1769 | { | 1869 | { |
1770 | struct radix_tree_iter iter; | 1870 | struct radix_tree_iter iter; |
1771 | void **slot; | 1871 | void **slot; |
@@ -1778,6 +1878,9 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
1778 | radix_tree_for_each_tagged(slot, &mapping->page_tree, | 1878 | radix_tree_for_each_tagged(slot, &mapping->page_tree, |
1779 | &iter, *index, tag) { | 1879 | &iter, *index, tag) { |
1780 | struct page *head, *page; | 1880 | struct page *head, *page; |
1881 | |||
1882 | if (iter.index > end) | ||
1883 | break; | ||
1781 | repeat: | 1884 | repeat: |
1782 | page = radix_tree_deref_slot(slot); | 1885 | page = radix_tree_deref_slot(slot); |
1783 | if (unlikely(!page)) | 1886 | if (unlikely(!page)) |
@@ -1819,18 +1922,28 @@ repeat: | |||
1819 | } | 1922 | } |
1820 | 1923 | ||
1821 | pages[ret] = page; | 1924 | pages[ret] = page; |
1822 | if (++ret == nr_pages) | 1925 | if (++ret == nr_pages) { |
1823 | break; | 1926 | *index = pages[ret - 1]->index + 1; |
1927 | goto out; | ||
1928 | } | ||
1824 | } | 1929 | } |
1825 | 1930 | ||
1931 | /* | ||
1932 | * We come here when we got at @end. We take care to not overflow the | ||
1933 | * index @index as it confuses some of the callers. This breaks the | ||
1934 | * iteration when there is page at index -1 but that is already broken | ||
1935 | * anyway. | ||
1936 | */ | ||
1937 | if (end == (pgoff_t)-1) | ||
1938 | *index = (pgoff_t)-1; | ||
1939 | else | ||
1940 | *index = end + 1; | ||
1941 | out: | ||
1826 | rcu_read_unlock(); | 1942 | rcu_read_unlock(); |
1827 | 1943 | ||
1828 | if (ret) | ||
1829 | *index = pages[ret - 1]->index + 1; | ||
1830 | |||
1831 | return ret; | 1944 | return ret; |
1832 | } | 1945 | } |
1833 | EXPORT_SYMBOL(find_get_pages_tag); | 1946 | EXPORT_SYMBOL(find_get_pages_range_tag); |
1834 | 1947 | ||
1835 | /** | 1948 | /** |
1836 | * find_get_entries_tag - find and return entries that match @tag | 1949 | * find_get_entries_tag - find and return entries that match @tag |
@@ -2159,7 +2272,7 @@ no_cached_page: | |||
2159 | * Ok, it wasn't cached, so we need to create a new | 2272 | * Ok, it wasn't cached, so we need to create a new |
2160 | * page.. | 2273 | * page.. |
2161 | */ | 2274 | */ |
2162 | page = page_cache_alloc_cold(mapping); | 2275 | page = page_cache_alloc(mapping); |
2163 | if (!page) { | 2276 | if (!page) { |
2164 | error = -ENOMEM; | 2277 | error = -ENOMEM; |
2165 | goto out; | 2278 | goto out; |
@@ -2271,7 +2384,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) | |||
2271 | int ret; | 2384 | int ret; |
2272 | 2385 | ||
2273 | do { | 2386 | do { |
2274 | page = __page_cache_alloc(gfp_mask|__GFP_COLD); | 2387 | page = __page_cache_alloc(gfp_mask); |
2275 | if (!page) | 2388 | if (!page) |
2276 | return -ENOMEM; | 2389 | return -ENOMEM; |
2277 | 2390 | ||
@@ -2675,7 +2788,7 @@ static struct page *do_read_cache_page(struct address_space *mapping, | |||
2675 | repeat: | 2788 | repeat: |
2676 | page = find_get_page(mapping, index); | 2789 | page = find_get_page(mapping, index); |
2677 | if (!page) { | 2790 | if (!page) { |
2678 | page = __page_cache_alloc(gfp | __GFP_COLD); | 2791 | page = __page_cache_alloc(gfp); |
2679 | if (!page) | 2792 | if (!page) |
2680 | return ERR_PTR(-ENOMEM); | 2793 | return ERR_PTR(-ENOMEM); |
2681 | err = add_to_page_cache_lru(page, mapping, index, gfp); | 2794 | err = add_to_page_cache_lru(page, mapping, index, gfp); |
@@ -803,11 +803,10 @@ static RADIX_TREE(hmm_devmem_radix, GFP_KERNEL); | |||
803 | 803 | ||
804 | static void hmm_devmem_radix_release(struct resource *resource) | 804 | static void hmm_devmem_radix_release(struct resource *resource) |
805 | { | 805 | { |
806 | resource_size_t key, align_start, align_size, align_end; | 806 | resource_size_t key, align_start, align_size; |
807 | 807 | ||
808 | align_start = resource->start & ~(PA_SECTION_SIZE - 1); | 808 | align_start = resource->start & ~(PA_SECTION_SIZE - 1); |
809 | align_size = ALIGN(resource_size(resource), PA_SECTION_SIZE); | 809 | align_size = ALIGN(resource_size(resource), PA_SECTION_SIZE); |
810 | align_end = align_start + align_size - 1; | ||
811 | 810 | ||
812 | mutex_lock(&hmm_devmem_lock); | 811 | mutex_lock(&hmm_devmem_lock); |
813 | for (key = resource->start; | 812 | for (key = resource->start; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 003f7bcd0952..86fe697e8bfb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -606,7 +606,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
606 | pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); | 606 | pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); |
607 | set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); | 607 | set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); |
608 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); | 608 | add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); |
609 | atomic_long_inc(&vma->vm_mm->nr_ptes); | 609 | mm_inc_nr_ptes(vma->vm_mm); |
610 | spin_unlock(vmf->ptl); | 610 | spin_unlock(vmf->ptl); |
611 | count_vm_event(THP_FAULT_ALLOC); | 611 | count_vm_event(THP_FAULT_ALLOC); |
612 | } | 612 | } |
@@ -662,7 +662,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | |||
662 | if (pgtable) | 662 | if (pgtable) |
663 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 663 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
664 | set_pmd_at(mm, haddr, pmd, entry); | 664 | set_pmd_at(mm, haddr, pmd, entry); |
665 | atomic_long_inc(&mm->nr_ptes); | 665 | mm_inc_nr_ptes(mm); |
666 | return true; | 666 | return true; |
667 | } | 667 | } |
668 | 668 | ||
@@ -747,7 +747,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, | |||
747 | 747 | ||
748 | if (pgtable) { | 748 | if (pgtable) { |
749 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 749 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
750 | atomic_long_inc(&mm->nr_ptes); | 750 | mm_inc_nr_ptes(mm); |
751 | } | 751 | } |
752 | 752 | ||
753 | set_pmd_at(mm, addr, pmd, entry); | 753 | set_pmd_at(mm, addr, pmd, entry); |
@@ -942,7 +942,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
942 | set_pmd_at(src_mm, addr, src_pmd, pmd); | 942 | set_pmd_at(src_mm, addr, src_pmd, pmd); |
943 | } | 943 | } |
944 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | 944 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); |
945 | atomic_long_inc(&dst_mm->nr_ptes); | 945 | mm_inc_nr_ptes(dst_mm); |
946 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | 946 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); |
947 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); | 947 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); |
948 | ret = 0; | 948 | ret = 0; |
@@ -978,7 +978,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
978 | get_page(src_page); | 978 | get_page(src_page); |
979 | page_dup_rmap(src_page, true); | 979 | page_dup_rmap(src_page, true); |
980 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | 980 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); |
981 | atomic_long_inc(&dst_mm->nr_ptes); | 981 | mm_inc_nr_ptes(dst_mm); |
982 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | 982 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); |
983 | 983 | ||
984 | pmdp_set_wrprotect(src_mm, addr, src_pmd); | 984 | pmdp_set_wrprotect(src_mm, addr, src_pmd); |
@@ -1189,8 +1189,15 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, | |||
1189 | goto out_free_pages; | 1189 | goto out_free_pages; |
1190 | VM_BUG_ON_PAGE(!PageHead(page), page); | 1190 | VM_BUG_ON_PAGE(!PageHead(page), page); |
1191 | 1191 | ||
1192 | /* | ||
1193 | * Leave pmd empty until pte is filled note we must notify here as | ||
1194 | * concurrent CPU thread might write to new page before the call to | ||
1195 | * mmu_notifier_invalidate_range_end() happens which can lead to a | ||
1196 | * device seeing memory write in different order than CPU. | ||
1197 | * | ||
1198 | * See Documentation/vm/mmu_notifier.txt | ||
1199 | */ | ||
1192 | pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); | 1200 | pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd); |
1193 | /* leave pmd empty until pte is filled */ | ||
1194 | 1201 | ||
1195 | pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd); | 1202 | pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, vmf->pmd); |
1196 | pmd_populate(vma->vm_mm, &_pmd, pgtable); | 1203 | pmd_populate(vma->vm_mm, &_pmd, pgtable); |
@@ -1216,7 +1223,12 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, | |||
1216 | page_remove_rmap(page, true); | 1223 | page_remove_rmap(page, true); |
1217 | spin_unlock(vmf->ptl); | 1224 | spin_unlock(vmf->ptl); |
1218 | 1225 | ||
1219 | mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); | 1226 | /* |
1227 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
1228 | * the above pmdp_huge_clear_flush_notify() did already call it. | ||
1229 | */ | ||
1230 | mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start, | ||
1231 | mmun_end); | ||
1220 | 1232 | ||
1221 | ret |= VM_FAULT_WRITE; | 1233 | ret |= VM_FAULT_WRITE; |
1222 | put_page(page); | 1234 | put_page(page); |
@@ -1365,7 +1377,12 @@ alloc: | |||
1365 | } | 1377 | } |
1366 | spin_unlock(vmf->ptl); | 1378 | spin_unlock(vmf->ptl); |
1367 | out_mn: | 1379 | out_mn: |
1368 | mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); | 1380 | /* |
1381 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
1382 | * the above pmdp_huge_clear_flush_notify() did already call it. | ||
1383 | */ | ||
1384 | mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start, | ||
1385 | mmun_end); | ||
1369 | out: | 1386 | out: |
1370 | return ret; | 1387 | return ret; |
1371 | out_unlock: | 1388 | out_unlock: |
@@ -1678,7 +1695,7 @@ static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd) | |||
1678 | 1695 | ||
1679 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); | 1696 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
1680 | pte_free(mm, pgtable); | 1697 | pte_free(mm, pgtable); |
1681 | atomic_long_dec(&mm->nr_ptes); | 1698 | mm_dec_nr_ptes(mm); |
1682 | } | 1699 | } |
1683 | 1700 | ||
1684 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | 1701 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, |
@@ -2017,7 +2034,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, | |||
2017 | 2034 | ||
2018 | out: | 2035 | out: |
2019 | spin_unlock(ptl); | 2036 | spin_unlock(ptl); |
2020 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE); | 2037 | /* |
2038 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
2039 | * the above pudp_huge_clear_flush_notify() did already call it. | ||
2040 | */ | ||
2041 | mmu_notifier_invalidate_range_only_end(mm, haddr, haddr + | ||
2042 | HPAGE_PUD_SIZE); | ||
2021 | } | 2043 | } |
2022 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | 2044 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
2023 | 2045 | ||
@@ -2029,8 +2051,15 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, | |||
2029 | pmd_t _pmd; | 2051 | pmd_t _pmd; |
2030 | int i; | 2052 | int i; |
2031 | 2053 | ||
2032 | /* leave pmd empty until pte is filled */ | 2054 | /* |
2033 | pmdp_huge_clear_flush_notify(vma, haddr, pmd); | 2055 | * Leave pmd empty until pte is filled note that it is fine to delay |
2056 | * notification until mmu_notifier_invalidate_range_end() as we are | ||
2057 | * replacing a zero pmd write protected page with a zero pte write | ||
2058 | * protected page. | ||
2059 | * | ||
2060 | * See Documentation/vm/mmu_notifier.txt | ||
2061 | */ | ||
2062 | pmdp_huge_clear_flush(vma, haddr, pmd); | ||
2034 | 2063 | ||
2035 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); | 2064 | pgtable = pgtable_trans_huge_withdraw(mm, pmd); |
2036 | pmd_populate(mm, &_pmd, pgtable); | 2065 | pmd_populate(mm, &_pmd, pgtable); |
@@ -2085,6 +2114,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | |||
2085 | add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR); | 2114 | add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR); |
2086 | return; | 2115 | return; |
2087 | } else if (is_huge_zero_pmd(*pmd)) { | 2116 | } else if (is_huge_zero_pmd(*pmd)) { |
2117 | /* | ||
2118 | * FIXME: Do we want to invalidate secondary mmu by calling | ||
2119 | * mmu_notifier_invalidate_range() see comments below inside | ||
2120 | * __split_huge_pmd() ? | ||
2121 | * | ||
2122 | * We are going from a zero huge page write protected to zero | ||
2123 | * small page also write protected so it does not seems useful | ||
2124 | * to invalidate secondary mmu at this time. | ||
2125 | */ | ||
2088 | return __split_huge_zero_page_pmd(vma, haddr, pmd); | 2126 | return __split_huge_zero_page_pmd(vma, haddr, pmd); |
2089 | } | 2127 | } |
2090 | 2128 | ||
@@ -2220,7 +2258,21 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
2220 | __split_huge_pmd_locked(vma, pmd, haddr, freeze); | 2258 | __split_huge_pmd_locked(vma, pmd, haddr, freeze); |
2221 | out: | 2259 | out: |
2222 | spin_unlock(ptl); | 2260 | spin_unlock(ptl); |
2223 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); | 2261 | /* |
2262 | * No need to double call mmu_notifier->invalidate_range() callback. | ||
2263 | * They are 3 cases to consider inside __split_huge_pmd_locked(): | ||
2264 | * 1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious | ||
2265 | * 2) __split_huge_zero_page_pmd() read only zero page and any write | ||
2266 | * fault will trigger a flush_notify before pointing to a new page | ||
2267 | * (it is fine if the secondary mmu keeps pointing to the old zero | ||
2268 | * page in the meantime) | ||
2269 | * 3) Split a huge pmd into pte pointing to the same page. No need | ||
2270 | * to invalidate secondary tlb entry they are all still valid. | ||
2271 | * any further changes to individual pte will notify. So no need | ||
2272 | * to call mmu_notifier->invalidate_range() | ||
2273 | */ | ||
2274 | mmu_notifier_invalidate_range_only_end(mm, haddr, haddr + | ||
2275 | HPAGE_PMD_SIZE); | ||
2224 | } | 2276 | } |
2225 | 2277 | ||
2226 | void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, | 2278 | void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2d2ff5e8bf2b..681b300185c0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3256,9 +3256,14 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
3256 | set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); | 3256 | set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); |
3257 | } else { | 3257 | } else { |
3258 | if (cow) { | 3258 | if (cow) { |
3259 | /* | ||
3260 | * No need to notify as we are downgrading page | ||
3261 | * table protection not changing it to point | ||
3262 | * to a new page. | ||
3263 | * | ||
3264 | * See Documentation/vm/mmu_notifier.txt | ||
3265 | */ | ||
3259 | huge_ptep_set_wrprotect(src, addr, src_pte); | 3266 | huge_ptep_set_wrprotect(src, addr, src_pte); |
3260 | mmu_notifier_invalidate_range(src, mmun_start, | ||
3261 | mmun_end); | ||
3262 | } | 3267 | } |
3263 | entry = huge_ptep_get(src_pte); | 3268 | entry = huge_ptep_get(src_pte); |
3264 | ptepage = pte_page(entry); | 3269 | ptepage = pte_page(entry); |
@@ -4318,7 +4323,12 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
4318 | * and that page table be reused and filled with junk. | 4323 | * and that page table be reused and filled with junk. |
4319 | */ | 4324 | */ |
4320 | flush_hugetlb_tlb_range(vma, start, end); | 4325 | flush_hugetlb_tlb_range(vma, start, end); |
4321 | mmu_notifier_invalidate_range(mm, start, end); | 4326 | /* |
4327 | * No need to call mmu_notifier_invalidate_range() we are downgrading | ||
4328 | * page table protection not changing it to point to a new page. | ||
4329 | * | ||
4330 | * See Documentation/vm/mmu_notifier.txt | ||
4331 | */ | ||
4322 | i_mmap_unlock_write(vma->vm_file->f_mapping); | 4332 | i_mmap_unlock_write(vma->vm_file->f_mapping); |
4323 | mmu_notifier_invalidate_range_end(mm, start, end); | 4333 | mmu_notifier_invalidate_range_end(mm, start, end); |
4324 | 4334 | ||
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 6f319fb81718..405bba487df5 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c | |||
@@ -337,7 +337,7 @@ static size_t optimal_redzone(size_t object_size) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, | 339 | void kasan_cache_create(struct kmem_cache *cache, size_t *size, |
340 | unsigned long *flags) | 340 | slab_flags_t *flags) |
341 | { | 341 | { |
342 | int redzone_adjust; | 342 | int redzone_adjust; |
343 | int orig_size = *size; | 343 | int orig_size = *size; |
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 43cb3043311b..ea4ff259b671 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c | |||
@@ -1270,7 +1270,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) | |||
1270 | _pmd = pmdp_collapse_flush(vma, addr, pmd); | 1270 | _pmd = pmdp_collapse_flush(vma, addr, pmd); |
1271 | spin_unlock(ptl); | 1271 | spin_unlock(ptl); |
1272 | up_write(&vma->vm_mm->mmap_sem); | 1272 | up_write(&vma->vm_mm->mmap_sem); |
1273 | atomic_long_dec(&vma->vm_mm->nr_ptes); | 1273 | mm_dec_nr_ptes(vma->vm_mm); |
1274 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); | 1274 | pte_free(vma->vm_mm, pmd_pgtable(_pmd)); |
1275 | } | 1275 | } |
1276 | } | 1276 | } |
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 800d64b854ea..cec594032515 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c | |||
@@ -1,126 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/gfp.h> | ||
3 | #include <linux/mm_types.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <linux/slab.h> | ||
6 | #include "slab.h" | ||
7 | #include <linux/kmemcheck.h> | ||
8 | |||
9 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
10 | { | ||
11 | struct page *shadow; | ||
12 | int pages; | ||
13 | int i; | ||
14 | |||
15 | pages = 1 << order; | ||
16 | |||
17 | /* | ||
18 | * With kmemcheck enabled, we need to allocate a memory area for the | ||
19 | * shadow bits as well. | ||
20 | */ | ||
21 | shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order); | ||
22 | if (!shadow) { | ||
23 | if (printk_ratelimit()) | ||
24 | pr_err("kmemcheck: failed to allocate shadow bitmap\n"); | ||
25 | return; | ||
26 | } | ||
27 | |||
28 | for(i = 0; i < pages; ++i) | ||
29 | page[i].shadow = page_address(&shadow[i]); | ||
30 | |||
31 | /* | ||
32 | * Mark it as non-present for the MMU so that our accesses to | ||
33 | * this memory will trigger a page fault and let us analyze | ||
34 | * the memory accesses. | ||
35 | */ | ||
36 | kmemcheck_hide_pages(page, pages); | ||
37 | } | ||
38 | |||
39 | void kmemcheck_free_shadow(struct page *page, int order) | ||
40 | { | ||
41 | struct page *shadow; | ||
42 | int pages; | ||
43 | int i; | ||
44 | |||
45 | if (!kmemcheck_page_is_tracked(page)) | ||
46 | return; | ||
47 | |||
48 | pages = 1 << order; | ||
49 | |||
50 | kmemcheck_show_pages(page, pages); | ||
51 | |||
52 | shadow = virt_to_page(page[0].shadow); | ||
53 | |||
54 | for(i = 0; i < pages; ++i) | ||
55 | page[i].shadow = NULL; | ||
56 | |||
57 | __free_pages(shadow, order); | ||
58 | } | ||
59 | |||
60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
61 | size_t size) | ||
62 | { | ||
63 | if (unlikely(!object)) /* Skip object if allocation failed */ | ||
64 | return; | ||
65 | |||
66 | /* | ||
67 | * Has already been memset(), which initializes the shadow for us | ||
68 | * as well. | ||
69 | */ | ||
70 | if (gfpflags & __GFP_ZERO) | ||
71 | return; | ||
72 | |||
73 | /* No need to initialize the shadow of a non-tracked slab. */ | ||
74 | if (s->flags & SLAB_NOTRACK) | ||
75 | return; | ||
76 | |||
77 | if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { | ||
78 | /* | ||
79 | * Allow notracked objects to be allocated from | ||
80 | * tracked caches. Note however that these objects | ||
81 | * will still get page faults on access, they just | ||
82 | * won't ever be flagged as uninitialized. If page | ||
83 | * faults are not acceptable, the slab cache itself | ||
84 | * should be marked NOTRACK. | ||
85 | */ | ||
86 | kmemcheck_mark_initialized(object, size); | ||
87 | } else if (!s->ctor) { | ||
88 | /* | ||
89 | * New objects should be marked uninitialized before | ||
90 | * they're returned to the called. | ||
91 | */ | ||
92 | kmemcheck_mark_uninitialized(object, size); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) | ||
97 | { | ||
98 | /* TODO: RCU freeing is unsupported for now; hide false positives. */ | ||
99 | if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) | ||
100 | kmemcheck_mark_freed(object, size); | ||
101 | } | ||
102 | |||
103 | void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, | ||
104 | gfp_t gfpflags) | ||
105 | { | ||
106 | int pages; | ||
107 | |||
108 | if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) | ||
109 | return; | ||
110 | |||
111 | pages = 1 << order; | ||
112 | |||
113 | /* | ||
114 | * NOTE: We choose to track GFP_ZERO pages too; in fact, they | ||
115 | * can become uninitialized by copying uninitialized memory | ||
116 | * into them. | ||
117 | */ | ||
118 | |||
119 | /* XXX: Can use zone->node for node? */ | ||
120 | kmemcheck_alloc_shadow(page, order, gfpflags, -1); | ||
121 | |||
122 | if (gfpflags & __GFP_ZERO) | ||
123 | kmemcheck_mark_initialized_pages(page, pages); | ||
124 | else | ||
125 | kmemcheck_mark_uninitialized_pages(page, pages); | ||
126 | } | ||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7780cd83a495..e4738d5e9b8c 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -110,7 +110,6 @@ | |||
110 | #include <linux/atomic.h> | 110 | #include <linux/atomic.h> |
111 | 111 | ||
112 | #include <linux/kasan.h> | 112 | #include <linux/kasan.h> |
113 | #include <linux/kmemcheck.h> | ||
114 | #include <linux/kmemleak.h> | 113 | #include <linux/kmemleak.h> |
115 | #include <linux/memory_hotplug.h> | 114 | #include <linux/memory_hotplug.h> |
116 | 115 | ||
@@ -1238,9 +1237,6 @@ static bool update_checksum(struct kmemleak_object *object) | |||
1238 | { | 1237 | { |
1239 | u32 old_csum = object->checksum; | 1238 | u32 old_csum = object->checksum; |
1240 | 1239 | ||
1241 | if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) | ||
1242 | return false; | ||
1243 | |||
1244 | kasan_disable_current(); | 1240 | kasan_disable_current(); |
1245 | object->checksum = crc32(0, (void *)object->pointer, object->size); | 1241 | object->checksum = crc32(0, (void *)object->pointer, object->size); |
1246 | kasan_enable_current(); | 1242 | kasan_enable_current(); |
@@ -1314,11 +1310,6 @@ static void scan_block(void *_start, void *_end, | |||
1314 | if (scan_should_stop()) | 1310 | if (scan_should_stop()) |
1315 | break; | 1311 | break; |
1316 | 1312 | ||
1317 | /* don't scan uninitialized memory */ | ||
1318 | if (!kmemcheck_is_obj_initialized((unsigned long)ptr, | ||
1319 | BYTES_PER_POINTER)) | ||
1320 | continue; | ||
1321 | |||
1322 | kasan_disable_current(); | 1313 | kasan_disable_current(); |
1323 | pointer = *ptr; | 1314 | pointer = *ptr; |
1324 | kasan_enable_current(); | 1315 | kasan_enable_current(); |
@@ -2104,7 +2095,7 @@ static int __init kmemleak_late_init(void) | |||
2104 | return -ENOMEM; | 2095 | return -ENOMEM; |
2105 | } | 2096 | } |
2106 | 2097 | ||
2107 | dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL, | 2098 | dentry = debugfs_create_file("kmemleak", 0644, NULL, NULL, |
2108 | &kmemleak_fops); | 2099 | &kmemleak_fops); |
2109 | if (!dentry) | 2100 | if (!dentry) |
2110 | pr_warn("Failed to create the debugfs kmemleak file\n"); | 2101 | pr_warn("Failed to create the debugfs kmemleak file\n"); |
@@ -1052,8 +1052,13 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, | |||
1052 | * So we clear the pte and flush the tlb before the check | 1052 | * So we clear the pte and flush the tlb before the check |
1053 | * this assure us that no O_DIRECT can happen after the check | 1053 | * this assure us that no O_DIRECT can happen after the check |
1054 | * or in the middle of the check. | 1054 | * or in the middle of the check. |
1055 | * | ||
1056 | * No need to notify as we are downgrading page table to read | ||
1057 | * only not changing it to point to a new page. | ||
1058 | * | ||
1059 | * See Documentation/vm/mmu_notifier.txt | ||
1055 | */ | 1060 | */ |
1056 | entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte); | 1061 | entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte); |
1057 | /* | 1062 | /* |
1058 | * Check that no O_DIRECT or similar I/O is in progress on the | 1063 | * Check that no O_DIRECT or similar I/O is in progress on the |
1059 | * page | 1064 | * page |
@@ -1136,7 +1141,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
1136 | } | 1141 | } |
1137 | 1142 | ||
1138 | flush_cache_page(vma, addr, pte_pfn(*ptep)); | 1143 | flush_cache_page(vma, addr, pte_pfn(*ptep)); |
1139 | ptep_clear_flush_notify(vma, addr, ptep); | 1144 | /* |
1145 | * No need to notify as we are replacing a read only page with another | ||
1146 | * read only page with the same content. | ||
1147 | * | ||
1148 | * See Documentation/vm/mmu_notifier.txt | ||
1149 | */ | ||
1150 | ptep_clear_flush(vma, addr, ptep); | ||
1140 | set_pte_at_notify(mm, addr, ptep, newpte); | 1151 | set_pte_at_notify(mm, addr, ptep, newpte); |
1141 | 1152 | ||
1142 | page_remove_rmap(page, false); | 1153 | page_remove_rmap(page, false); |
diff --git a/mm/list_lru.c b/mm/list_lru.c index f141f0c80ff3..fd41e969ede5 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c | |||
@@ -221,6 +221,7 @@ restart: | |||
221 | switch (ret) { | 221 | switch (ret) { |
222 | case LRU_REMOVED_RETRY: | 222 | case LRU_REMOVED_RETRY: |
223 | assert_spin_locked(&nlru->lock); | 223 | assert_spin_locked(&nlru->lock); |
224 | /* fall through */ | ||
224 | case LRU_REMOVED: | 225 | case LRU_REMOVED: |
225 | isolated++; | 226 | isolated++; |
226 | nlru->nr_items--; | 227 | nlru->nr_items--; |
diff --git a/mm/memblock.c b/mm/memblock.c index 91205780e6b1..46aacdfa4f4d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -533,7 +533,7 @@ repeat: | |||
533 | base = obase; | 533 | base = obase; |
534 | nr_new = 0; | 534 | nr_new = 0; |
535 | 535 | ||
536 | for_each_memblock_type(type, rgn) { | 536 | for_each_memblock_type(idx, type, rgn) { |
537 | phys_addr_t rbase = rgn->base; | 537 | phys_addr_t rbase = rgn->base; |
538 | phys_addr_t rend = rbase + rgn->size; | 538 | phys_addr_t rend = rbase + rgn->size; |
539 | 539 | ||
@@ -637,7 +637,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
637 | if (memblock_double_array(type, base, size) < 0) | 637 | if (memblock_double_array(type, base, size) < 0) |
638 | return -ENOMEM; | 638 | return -ENOMEM; |
639 | 639 | ||
640 | for_each_memblock_type(type, rgn) { | 640 | for_each_memblock_type(idx, type, rgn) { |
641 | phys_addr_t rbase = rgn->base; | 641 | phys_addr_t rbase = rgn->base; |
642 | phys_addr_t rend = rbase + rgn->size; | 642 | phys_addr_t rend = rbase + rgn->size; |
643 | 643 | ||
@@ -1327,7 +1327,6 @@ again: | |||
1327 | return NULL; | 1327 | return NULL; |
1328 | done: | 1328 | done: |
1329 | ptr = phys_to_virt(alloc); | 1329 | ptr = phys_to_virt(alloc); |
1330 | memset(ptr, 0, size); | ||
1331 | 1330 | ||
1332 | /* | 1331 | /* |
1333 | * The min_count is set to 0 so that bootmem allocated blocks | 1332 | * The min_count is set to 0 so that bootmem allocated blocks |
@@ -1341,6 +1340,45 @@ done: | |||
1341 | } | 1340 | } |
1342 | 1341 | ||
1343 | /** | 1342 | /** |
1343 | * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing | ||
1344 | * memory and without panicking | ||
1345 | * @size: size of memory block to be allocated in bytes | ||
1346 | * @align: alignment of the region and block's size | ||
1347 | * @min_addr: the lower bound of the memory region from where the allocation | ||
1348 | * is preferred (phys address) | ||
1349 | * @max_addr: the upper bound of the memory region from where the allocation | ||
1350 | * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to | ||
1351 | * allocate only from memory limited by memblock.current_limit value | ||
1352 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node | ||
1353 | * | ||
1354 | * Public function, provides additional debug information (including caller | ||
1355 | * info), if enabled. Does not zero allocated memory, does not panic if request | ||
1356 | * cannot be satisfied. | ||
1357 | * | ||
1358 | * RETURNS: | ||
1359 | * Virtual address of allocated memory block on success, NULL on failure. | ||
1360 | */ | ||
1361 | void * __init memblock_virt_alloc_try_nid_raw( | ||
1362 | phys_addr_t size, phys_addr_t align, | ||
1363 | phys_addr_t min_addr, phys_addr_t max_addr, | ||
1364 | int nid) | ||
1365 | { | ||
1366 | void *ptr; | ||
1367 | |||
1368 | memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", | ||
1369 | __func__, (u64)size, (u64)align, nid, (u64)min_addr, | ||
1370 | (u64)max_addr, (void *)_RET_IP_); | ||
1371 | |||
1372 | ptr = memblock_virt_alloc_internal(size, align, | ||
1373 | min_addr, max_addr, nid); | ||
1374 | #ifdef CONFIG_DEBUG_VM | ||
1375 | if (ptr && size > 0) | ||
1376 | memset(ptr, 0xff, size); | ||
1377 | #endif | ||
1378 | return ptr; | ||
1379 | } | ||
1380 | |||
1381 | /** | ||
1344 | * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block | 1382 | * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block |
1345 | * @size: size of memory block to be allocated in bytes | 1383 | * @size: size of memory block to be allocated in bytes |
1346 | * @align: alignment of the region and block's size | 1384 | * @align: alignment of the region and block's size |
@@ -1351,8 +1389,8 @@ done: | |||
1351 | * allocate only from memory limited by memblock.current_limit value | 1389 | * allocate only from memory limited by memblock.current_limit value |
1352 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node | 1390 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node |
1353 | * | 1391 | * |
1354 | * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides | 1392 | * Public function, provides additional debug information (including caller |
1355 | * additional debug information (including caller info), if enabled. | 1393 | * info), if enabled. This function zeroes the allocated memory. |
1356 | * | 1394 | * |
1357 | * RETURNS: | 1395 | * RETURNS: |
1358 | * Virtual address of allocated memory block on success, NULL on failure. | 1396 | * Virtual address of allocated memory block on success, NULL on failure. |
@@ -1362,11 +1400,17 @@ void * __init memblock_virt_alloc_try_nid_nopanic( | |||
1362 | phys_addr_t min_addr, phys_addr_t max_addr, | 1400 | phys_addr_t min_addr, phys_addr_t max_addr, |
1363 | int nid) | 1401 | int nid) |
1364 | { | 1402 | { |
1403 | void *ptr; | ||
1404 | |||
1365 | memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", | 1405 | memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", |
1366 | __func__, (u64)size, (u64)align, nid, (u64)min_addr, | 1406 | __func__, (u64)size, (u64)align, nid, (u64)min_addr, |
1367 | (u64)max_addr, (void *)_RET_IP_); | 1407 | (u64)max_addr, (void *)_RET_IP_); |
1368 | return memblock_virt_alloc_internal(size, align, min_addr, | 1408 | |
1369 | max_addr, nid); | 1409 | ptr = memblock_virt_alloc_internal(size, align, |
1410 | min_addr, max_addr, nid); | ||
1411 | if (ptr) | ||
1412 | memset(ptr, 0, size); | ||
1413 | return ptr; | ||
1370 | } | 1414 | } |
1371 | 1415 | ||
1372 | /** | 1416 | /** |
@@ -1380,7 +1424,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic( | |||
1380 | * allocate only from memory limited by memblock.current_limit value | 1424 | * allocate only from memory limited by memblock.current_limit value |
1381 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node | 1425 | * @nid: nid of the free area to find, %NUMA_NO_NODE for any node |
1382 | * | 1426 | * |
1383 | * Public panicking version of _memblock_virt_alloc_try_nid_nopanic() | 1427 | * Public panicking version of memblock_virt_alloc_try_nid_nopanic() |
1384 | * which provides debug information (including caller info), if enabled, | 1428 | * which provides debug information (including caller info), if enabled, |
1385 | * and panics if the request can not be satisfied. | 1429 | * and panics if the request can not be satisfied. |
1386 | * | 1430 | * |
@@ -1399,8 +1443,10 @@ void * __init memblock_virt_alloc_try_nid( | |||
1399 | (u64)max_addr, (void *)_RET_IP_); | 1443 | (u64)max_addr, (void *)_RET_IP_); |
1400 | ptr = memblock_virt_alloc_internal(size, align, | 1444 | ptr = memblock_virt_alloc_internal(size, align, |
1401 | min_addr, max_addr, nid); | 1445 | min_addr, max_addr, nid); |
1402 | if (ptr) | 1446 | if (ptr) { |
1447 | memset(ptr, 0, size); | ||
1403 | return ptr; | 1448 | return ptr; |
1449 | } | ||
1404 | 1450 | ||
1405 | panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n", | 1451 | panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n", |
1406 | __func__, (u64)size, (u64)align, nid, (u64)min_addr, | 1452 | __func__, (u64)size, (u64)align, nid, (u64)min_addr, |
@@ -1715,7 +1761,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) | |||
1715 | 1761 | ||
1716 | pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); | 1762 | pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); |
1717 | 1763 | ||
1718 | for_each_memblock_type(type, rgn) { | 1764 | for_each_memblock_type(idx, type, rgn) { |
1719 | char nid_buf[32] = ""; | 1765 | char nid_buf[32] = ""; |
1720 | 1766 | ||
1721 | base = rgn->base; | 1767 | base = rgn->base; |
@@ -1739,7 +1785,7 @@ memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) | |||
1739 | unsigned long size = 0; | 1785 | unsigned long size = 0; |
1740 | int idx; | 1786 | int idx; |
1741 | 1787 | ||
1742 | for_each_memblock_type((&memblock.reserved), rgn) { | 1788 | for_each_memblock_type(idx, (&memblock.reserved), rgn) { |
1743 | phys_addr_t start, end; | 1789 | phys_addr_t start, end; |
1744 | 1790 | ||
1745 | if (rgn->base + rgn->size < start_addr) | 1791 | if (rgn->base + rgn->size < start_addr) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 661f046ad318..50e6906314f8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4049,7 +4049,7 @@ static struct cftype mem_cgroup_legacy_files[] = { | |||
4049 | .write = mem_cgroup_reset, | 4049 | .write = mem_cgroup_reset, |
4050 | .read_u64 = mem_cgroup_read_u64, | 4050 | .read_u64 = mem_cgroup_read_u64, |
4051 | }, | 4051 | }, |
4052 | #ifdef CONFIG_SLABINFO | 4052 | #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) |
4053 | { | 4053 | { |
4054 | .name = "kmem.slabinfo", | 4054 | .name = "kmem.slabinfo", |
4055 | .seq_start = memcg_slab_start, | 4055 | .seq_start = memcg_slab_start, |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 88366626c0b7..4acdf393a801 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -1587,7 +1587,7 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1587 | ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, | 1587 | ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, |
1588 | MIGRATE_SYNC, MR_MEMORY_FAILURE); | 1588 | MIGRATE_SYNC, MR_MEMORY_FAILURE); |
1589 | if (ret) { | 1589 | if (ret) { |
1590 | pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", | 1590 | pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n", |
1591 | pfn, ret, page->flags, &page->flags); | 1591 | pfn, ret, page->flags, &page->flags); |
1592 | if (!list_empty(&pagelist)) | 1592 | if (!list_empty(&pagelist)) |
1593 | putback_movable_pages(&pagelist); | 1593 | putback_movable_pages(&pagelist); |
diff --git a/mm/memory.c b/mm/memory.c index cae514e7dcfc..85e7a87da79f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -438,7 +438,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
438 | pgtable_t token = pmd_pgtable(*pmd); | 438 | pgtable_t token = pmd_pgtable(*pmd); |
439 | pmd_clear(pmd); | 439 | pmd_clear(pmd); |
440 | pte_free_tlb(tlb, token, addr); | 440 | pte_free_tlb(tlb, token, addr); |
441 | atomic_long_dec(&tlb->mm->nr_ptes); | 441 | mm_dec_nr_ptes(tlb->mm); |
442 | } | 442 | } |
443 | 443 | ||
444 | static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 444 | static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
@@ -506,6 +506,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, | |||
506 | pud = pud_offset(p4d, start); | 506 | pud = pud_offset(p4d, start); |
507 | p4d_clear(p4d); | 507 | p4d_clear(p4d); |
508 | pud_free_tlb(tlb, pud, start); | 508 | pud_free_tlb(tlb, pud, start); |
509 | mm_dec_nr_puds(tlb->mm); | ||
509 | } | 510 | } |
510 | 511 | ||
511 | static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, | 512 | static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, |
@@ -665,7 +666,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) | |||
665 | 666 | ||
666 | ptl = pmd_lock(mm, pmd); | 667 | ptl = pmd_lock(mm, pmd); |
667 | if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ | 668 | if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ |
668 | atomic_long_inc(&mm->nr_ptes); | 669 | mm_inc_nr_ptes(mm); |
669 | pmd_populate(mm, pmd, new); | 670 | pmd_populate(mm, pmd, new); |
670 | new = NULL; | 671 | new = NULL; |
671 | } | 672 | } |
@@ -2554,7 +2555,11 @@ static int wp_page_copy(struct vm_fault *vmf) | |||
2554 | put_page(new_page); | 2555 | put_page(new_page); |
2555 | 2556 | ||
2556 | pte_unmap_unlock(vmf->pte, vmf->ptl); | 2557 | pte_unmap_unlock(vmf->pte, vmf->ptl); |
2557 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2558 | /* |
2559 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
2560 | * the above ptep_clear_flush_notify() did already call it. | ||
2561 | */ | ||
2562 | mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end); | ||
2558 | if (old_page) { | 2563 | if (old_page) { |
2559 | /* | 2564 | /* |
2560 | * Don't let another task, with possibly unlocked vma, | 2565 | * Don't let another task, with possibly unlocked vma, |
@@ -2842,7 +2847,7 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
2842 | int do_swap_page(struct vm_fault *vmf) | 2847 | int do_swap_page(struct vm_fault *vmf) |
2843 | { | 2848 | { |
2844 | struct vm_area_struct *vma = vmf->vma; | 2849 | struct vm_area_struct *vma = vmf->vma; |
2845 | struct page *page = NULL, *swapcache; | 2850 | struct page *page = NULL, *swapcache = NULL; |
2846 | struct mem_cgroup *memcg; | 2851 | struct mem_cgroup *memcg; |
2847 | struct vma_swap_readahead swap_ra; | 2852 | struct vma_swap_readahead swap_ra; |
2848 | swp_entry_t entry; | 2853 | swp_entry_t entry; |
@@ -2881,17 +2886,36 @@ int do_swap_page(struct vm_fault *vmf) | |||
2881 | } | 2886 | } |
2882 | goto out; | 2887 | goto out; |
2883 | } | 2888 | } |
2889 | |||
2890 | |||
2884 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | 2891 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); |
2885 | if (!page) | 2892 | if (!page) |
2886 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, | 2893 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, |
2887 | vmf->address); | 2894 | vmf->address); |
2888 | if (!page) { | 2895 | if (!page) { |
2889 | if (vma_readahead) | 2896 | struct swap_info_struct *si = swp_swap_info(entry); |
2890 | page = do_swap_page_readahead(entry, | 2897 | |
2891 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); | 2898 | if (si->flags & SWP_SYNCHRONOUS_IO && |
2892 | else | 2899 | __swap_count(si, entry) == 1) { |
2893 | page = swapin_readahead(entry, | 2900 | /* skip swapcache */ |
2894 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | 2901 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); |
2902 | if (page) { | ||
2903 | __SetPageLocked(page); | ||
2904 | __SetPageSwapBacked(page); | ||
2905 | set_page_private(page, entry.val); | ||
2906 | lru_cache_add_anon(page); | ||
2907 | swap_readpage(page, true); | ||
2908 | } | ||
2909 | } else { | ||
2910 | if (vma_readahead) | ||
2911 | page = do_swap_page_readahead(entry, | ||
2912 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); | ||
2913 | else | ||
2914 | page = swapin_readahead(entry, | ||
2915 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | ||
2916 | swapcache = page; | ||
2917 | } | ||
2918 | |||
2895 | if (!page) { | 2919 | if (!page) { |
2896 | /* | 2920 | /* |
2897 | * Back out if somebody else faulted in this pte | 2921 | * Back out if somebody else faulted in this pte |
@@ -2920,7 +2944,6 @@ int do_swap_page(struct vm_fault *vmf) | |||
2920 | goto out_release; | 2944 | goto out_release; |
2921 | } | 2945 | } |
2922 | 2946 | ||
2923 | swapcache = page; | ||
2924 | locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); | 2947 | locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); |
2925 | 2948 | ||
2926 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2949 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
@@ -2935,7 +2958,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2935 | * test below, are not enough to exclude that. Even if it is still | 2958 | * test below, are not enough to exclude that. Even if it is still |
2936 | * swapcache, we need to check that the page's swap has not changed. | 2959 | * swapcache, we need to check that the page's swap has not changed. |
2937 | */ | 2960 | */ |
2938 | if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) | 2961 | if (unlikely((!PageSwapCache(page) || |
2962 | page_private(page) != entry.val)) && swapcache) | ||
2939 | goto out_page; | 2963 | goto out_page; |
2940 | 2964 | ||
2941 | page = ksm_might_need_to_copy(page, vma, vmf->address); | 2965 | page = ksm_might_need_to_copy(page, vma, vmf->address); |
@@ -2988,14 +3012,16 @@ int do_swap_page(struct vm_fault *vmf) | |||
2988 | pte = pte_mksoft_dirty(pte); | 3012 | pte = pte_mksoft_dirty(pte); |
2989 | set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); | 3013 | set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); |
2990 | vmf->orig_pte = pte; | 3014 | vmf->orig_pte = pte; |
2991 | if (page == swapcache) { | 3015 | |
2992 | do_page_add_anon_rmap(page, vma, vmf->address, exclusive); | 3016 | /* ksm created a completely new copy */ |
2993 | mem_cgroup_commit_charge(page, memcg, true, false); | 3017 | if (unlikely(page != swapcache && swapcache)) { |
2994 | activate_page(page); | ||
2995 | } else { /* ksm created a completely new copy */ | ||
2996 | page_add_new_anon_rmap(page, vma, vmf->address, false); | 3018 | page_add_new_anon_rmap(page, vma, vmf->address, false); |
2997 | mem_cgroup_commit_charge(page, memcg, false, false); | 3019 | mem_cgroup_commit_charge(page, memcg, false, false); |
2998 | lru_cache_add_active_or_unevictable(page, vma); | 3020 | lru_cache_add_active_or_unevictable(page, vma); |
3021 | } else { | ||
3022 | do_page_add_anon_rmap(page, vma, vmf->address, exclusive); | ||
3023 | mem_cgroup_commit_charge(page, memcg, true, false); | ||
3024 | activate_page(page); | ||
2999 | } | 3025 | } |
3000 | 3026 | ||
3001 | swap_free(entry); | 3027 | swap_free(entry); |
@@ -3003,7 +3029,7 @@ int do_swap_page(struct vm_fault *vmf) | |||
3003 | (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 3029 | (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
3004 | try_to_free_swap(page); | 3030 | try_to_free_swap(page); |
3005 | unlock_page(page); | 3031 | unlock_page(page); |
3006 | if (page != swapcache) { | 3032 | if (page != swapcache && swapcache) { |
3007 | /* | 3033 | /* |
3008 | * Hold the lock to avoid the swap entry to be reused | 3034 | * Hold the lock to avoid the swap entry to be reused |
3009 | * until we take the PT lock for the pte_same() check | 3035 | * until we take the PT lock for the pte_same() check |
@@ -3036,7 +3062,7 @@ out_page: | |||
3036 | unlock_page(page); | 3062 | unlock_page(page); |
3037 | out_release: | 3063 | out_release: |
3038 | put_page(page); | 3064 | put_page(page); |
3039 | if (page != swapcache) { | 3065 | if (page != swapcache && swapcache) { |
3040 | unlock_page(swapcache); | 3066 | unlock_page(swapcache); |
3041 | put_page(swapcache); | 3067 | put_page(swapcache); |
3042 | } | 3068 | } |
@@ -3212,7 +3238,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf) | |||
3212 | goto map_pte; | 3238 | goto map_pte; |
3213 | } | 3239 | } |
3214 | 3240 | ||
3215 | atomic_long_inc(&vma->vm_mm->nr_ptes); | 3241 | mm_inc_nr_ptes(vma->vm_mm); |
3216 | pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); | 3242 | pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); |
3217 | spin_unlock(vmf->ptl); | 3243 | spin_unlock(vmf->ptl); |
3218 | vmf->prealloc_pte = NULL; | 3244 | vmf->prealloc_pte = NULL; |
@@ -3271,7 +3297,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf) | |||
3271 | * We are going to consume the prealloc table, | 3297 | * We are going to consume the prealloc table, |
3272 | * count that as nr_ptes. | 3298 | * count that as nr_ptes. |
3273 | */ | 3299 | */ |
3274 | atomic_long_inc(&vma->vm_mm->nr_ptes); | 3300 | mm_inc_nr_ptes(vma->vm_mm); |
3275 | vmf->prealloc_pte = NULL; | 3301 | vmf->prealloc_pte = NULL; |
3276 | } | 3302 | } |
3277 | 3303 | ||
@@ -4124,15 +4150,17 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) | |||
4124 | 4150 | ||
4125 | spin_lock(&mm->page_table_lock); | 4151 | spin_lock(&mm->page_table_lock); |
4126 | #ifndef __ARCH_HAS_5LEVEL_HACK | 4152 | #ifndef __ARCH_HAS_5LEVEL_HACK |
4127 | if (p4d_present(*p4d)) /* Another has populated it */ | 4153 | if (!p4d_present(*p4d)) { |
4128 | pud_free(mm, new); | 4154 | mm_inc_nr_puds(mm); |
4129 | else | ||
4130 | p4d_populate(mm, p4d, new); | 4155 | p4d_populate(mm, p4d, new); |
4131 | #else | 4156 | } else /* Another has populated it */ |
4132 | if (pgd_present(*p4d)) /* Another has populated it */ | ||
4133 | pud_free(mm, new); | 4157 | pud_free(mm, new); |
4134 | else | 4158 | #else |
4159 | if (!pgd_present(*p4d)) { | ||
4160 | mm_inc_nr_puds(mm); | ||
4135 | pgd_populate(mm, p4d, new); | 4161 | pgd_populate(mm, p4d, new); |
4162 | } else /* Another has populated it */ | ||
4163 | pud_free(mm, new); | ||
4136 | #endif /* __ARCH_HAS_5LEVEL_HACK */ | 4164 | #endif /* __ARCH_HAS_5LEVEL_HACK */ |
4137 | spin_unlock(&mm->page_table_lock); | 4165 | spin_unlock(&mm->page_table_lock); |
4138 | return 0; | 4166 | return 0; |
@@ -4457,17 +4485,15 @@ void print_vma_addr(char *prefix, unsigned long ip) | |||
4457 | struct vm_area_struct *vma; | 4485 | struct vm_area_struct *vma; |
4458 | 4486 | ||
4459 | /* | 4487 | /* |
4460 | * Do not print if we are in atomic | 4488 | * we might be running from an atomic context so we cannot sleep |
4461 | * contexts (in exception stacks, etc.): | ||
4462 | */ | 4489 | */ |
4463 | if (preempt_count()) | 4490 | if (!down_read_trylock(&mm->mmap_sem)) |
4464 | return; | 4491 | return; |
4465 | 4492 | ||
4466 | down_read(&mm->mmap_sem); | ||
4467 | vma = find_vma(mm, ip); | 4493 | vma = find_vma(mm, ip); |
4468 | if (vma && vma->vm_file) { | 4494 | if (vma && vma->vm_file) { |
4469 | struct file *f = vma->vm_file; | 4495 | struct file *f = vma->vm_file; |
4470 | char *buf = (char *)__get_free_page(GFP_KERNEL); | 4496 | char *buf = (char *)__get_free_page(GFP_NOWAIT); |
4471 | if (buf) { | 4497 | if (buf) { |
4472 | char *p; | 4498 | char *p; |
4473 | 4499 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d4b5f29906b9..c52aa05b106c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -265,7 +265,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, | |||
265 | /* | 265 | /* |
266 | * Make all the pages reserved so that nobody will stumble over half | 266 | * Make all the pages reserved so that nobody will stumble over half |
267 | * initialized state. | 267 | * initialized state. |
268 | * FIXME: We also have to associate it with a node because pfn_to_node | 268 | * FIXME: We also have to associate it with a node because page_to_nid |
269 | * relies on having page with the proper node. | 269 | * relies on having page with the proper node. |
270 | */ | 270 | */ |
271 | for (i = 0; i < PAGES_PER_SECTION; i++) { | 271 | for (i = 0; i < PAGES_PER_SECTION; i++) { |
@@ -1590,11 +1590,11 @@ static void node_states_clear_node(int node, struct memory_notify *arg) | |||
1590 | } | 1590 | } |
1591 | 1591 | ||
1592 | static int __ref __offline_pages(unsigned long start_pfn, | 1592 | static int __ref __offline_pages(unsigned long start_pfn, |
1593 | unsigned long end_pfn, unsigned long timeout) | 1593 | unsigned long end_pfn) |
1594 | { | 1594 | { |
1595 | unsigned long pfn, nr_pages, expire; | 1595 | unsigned long pfn, nr_pages; |
1596 | long offlined_pages; | 1596 | long offlined_pages; |
1597 | int ret, drain, retry_max, node; | 1597 | int ret, node; |
1598 | unsigned long flags; | 1598 | unsigned long flags; |
1599 | unsigned long valid_start, valid_end; | 1599 | unsigned long valid_start, valid_end; |
1600 | struct zone *zone; | 1600 | struct zone *zone; |
@@ -1630,44 +1630,22 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1630 | goto failed_removal; | 1630 | goto failed_removal; |
1631 | 1631 | ||
1632 | pfn = start_pfn; | 1632 | pfn = start_pfn; |
1633 | expire = jiffies + timeout; | ||
1634 | drain = 0; | ||
1635 | retry_max = 5; | ||
1636 | repeat: | 1633 | repeat: |
1637 | /* start memory hot removal */ | 1634 | /* start memory hot removal */ |
1638 | ret = -EAGAIN; | ||
1639 | if (time_after(jiffies, expire)) | ||
1640 | goto failed_removal; | ||
1641 | ret = -EINTR; | 1635 | ret = -EINTR; |
1642 | if (signal_pending(current)) | 1636 | if (signal_pending(current)) |
1643 | goto failed_removal; | 1637 | goto failed_removal; |
1644 | ret = 0; | 1638 | |
1645 | if (drain) { | 1639 | cond_resched(); |
1646 | lru_add_drain_all_cpuslocked(); | 1640 | lru_add_drain_all_cpuslocked(); |
1647 | cond_resched(); | 1641 | drain_all_pages(zone); |
1648 | drain_all_pages(zone); | ||
1649 | } | ||
1650 | 1642 | ||
1651 | pfn = scan_movable_pages(start_pfn, end_pfn); | 1643 | pfn = scan_movable_pages(start_pfn, end_pfn); |
1652 | if (pfn) { /* We have movable pages */ | 1644 | if (pfn) { /* We have movable pages */ |
1653 | ret = do_migrate_range(pfn, end_pfn); | 1645 | ret = do_migrate_range(pfn, end_pfn); |
1654 | if (!ret) { | 1646 | goto repeat; |
1655 | drain = 1; | ||
1656 | goto repeat; | ||
1657 | } else { | ||
1658 | if (ret < 0) | ||
1659 | if (--retry_max == 0) | ||
1660 | goto failed_removal; | ||
1661 | yield(); | ||
1662 | drain = 1; | ||
1663 | goto repeat; | ||
1664 | } | ||
1665 | } | 1647 | } |
1666 | /* drain all zone's lru pagevec, this is asynchronous... */ | 1648 | |
1667 | lru_add_drain_all_cpuslocked(); | ||
1668 | yield(); | ||
1669 | /* drain pcp pages, this is synchronous. */ | ||
1670 | drain_all_pages(zone); | ||
1671 | /* | 1649 | /* |
1672 | * dissolve free hugepages in the memory block before doing offlining | 1650 | * dissolve free hugepages in the memory block before doing offlining |
1673 | * actually in order to make hugetlbfs's object counting consistent. | 1651 | * actually in order to make hugetlbfs's object counting consistent. |
@@ -1677,10 +1655,8 @@ repeat: | |||
1677 | goto failed_removal; | 1655 | goto failed_removal; |
1678 | /* check again */ | 1656 | /* check again */ |
1679 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | 1657 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); |
1680 | if (offlined_pages < 0) { | 1658 | if (offlined_pages < 0) |
1681 | ret = -EBUSY; | 1659 | goto repeat; |
1682 | goto failed_removal; | ||
1683 | } | ||
1684 | pr_info("Offlined Pages %ld\n", offlined_pages); | 1660 | pr_info("Offlined Pages %ld\n", offlined_pages); |
1685 | /* Ok, all of our target is isolated. | 1661 | /* Ok, all of our target is isolated. |
1686 | We cannot do rollback at this point. */ | 1662 | We cannot do rollback at this point. */ |
@@ -1728,7 +1704,7 @@ failed_removal: | |||
1728 | /* Must be protected by mem_hotplug_begin() or a device_lock */ | 1704 | /* Must be protected by mem_hotplug_begin() or a device_lock */ |
1729 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) | 1705 | int offline_pages(unsigned long start_pfn, unsigned long nr_pages) |
1730 | { | 1706 | { |
1731 | return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); | 1707 | return __offline_pages(start_pfn, start_pfn + nr_pages); |
1732 | } | 1708 | } |
1733 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 1709 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
1734 | 1710 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2af6d58a68f..4ce44d3ff03d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -85,6 +85,7 @@ | |||
85 | #include <linux/interrupt.h> | 85 | #include <linux/interrupt.h> |
86 | #include <linux/init.h> | 86 | #include <linux/init.h> |
87 | #include <linux/compat.h> | 87 | #include <linux/compat.h> |
88 | #include <linux/ptrace.h> | ||
88 | #include <linux/swap.h> | 89 | #include <linux/swap.h> |
89 | #include <linux/seq_file.h> | 90 | #include <linux/seq_file.h> |
90 | #include <linux/proc_fs.h> | 91 | #include <linux/proc_fs.h> |
@@ -1365,7 +1366,6 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, | |||
1365 | const unsigned long __user *, old_nodes, | 1366 | const unsigned long __user *, old_nodes, |
1366 | const unsigned long __user *, new_nodes) | 1367 | const unsigned long __user *, new_nodes) |
1367 | { | 1368 | { |
1368 | const struct cred *cred = current_cred(), *tcred; | ||
1369 | struct mm_struct *mm = NULL; | 1369 | struct mm_struct *mm = NULL; |
1370 | struct task_struct *task; | 1370 | struct task_struct *task; |
1371 | nodemask_t task_nodes; | 1371 | nodemask_t task_nodes; |
@@ -1401,15 +1401,10 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, | |||
1401 | err = -EINVAL; | 1401 | err = -EINVAL; |
1402 | 1402 | ||
1403 | /* | 1403 | /* |
1404 | * Check if this process has the right to modify the specified | 1404 | * Check if this process has the right to modify the specified process. |
1405 | * process. The right exists if the process has administrative | 1405 | * Use the regular "ptrace_may_access()" checks. |
1406 | * capabilities, superuser privileges or the same | ||
1407 | * userid as the target process. | ||
1408 | */ | 1406 | */ |
1409 | tcred = __task_cred(task); | 1407 | if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { |
1410 | if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && | ||
1411 | !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && | ||
1412 | !capable(CAP_SYS_NICE)) { | ||
1413 | rcu_read_unlock(); | 1408 | rcu_read_unlock(); |
1414 | err = -EPERM; | 1409 | err = -EPERM; |
1415 | goto out_put; | 1410 | goto out_put; |
@@ -1920,6 +1915,9 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
1920 | struct page *page; | 1915 | struct page *page; |
1921 | 1916 | ||
1922 | page = __alloc_pages(gfp, order, nid); | 1917 | page = __alloc_pages(gfp, order, nid); |
1918 | /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */ | ||
1919 | if (!static_branch_likely(&vm_numa_stat_key)) | ||
1920 | return page; | ||
1923 | if (page && page_to_nid(page) == nid) { | 1921 | if (page && page_to_nid(page) == nid) { |
1924 | preempt_disable(); | 1922 | preempt_disable(); |
1925 | __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); | 1923 | __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); |
diff --git a/mm/mempool.c b/mm/mempool.c index c4a23cdae3f0..7d8c5a0010a2 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -189,7 +189,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | |||
189 | pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); | 189 | pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); |
190 | if (!pool) | 190 | if (!pool) |
191 | return NULL; | 191 | return NULL; |
192 | pool->elements = kmalloc_node(min_nr * sizeof(void *), | 192 | pool->elements = kmalloc_array_node(min_nr, sizeof(void *), |
193 | gfp_mask, node_id); | 193 | gfp_mask, node_id); |
194 | if (!pool->elements) { | 194 | if (!pool->elements) { |
195 | kfree(pool); | 195 | kfree(pool); |
diff --git a/mm/migrate.c b/mm/migrate.c index 1236449b4777..4d0be47a322a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -2089,7 +2089,11 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
2089 | set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED); | 2089 | set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED); |
2090 | 2090 | ||
2091 | spin_unlock(ptl); | 2091 | spin_unlock(ptl); |
2092 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2092 | /* |
2093 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
2094 | * the above pmdp_huge_clear_flush_notify() did already call it. | ||
2095 | */ | ||
2096 | mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end); | ||
2093 | 2097 | ||
2094 | /* Take an "isolate" reference and put new page on the LRU. */ | 2098 | /* Take an "isolate" reference and put new page on the LRU. */ |
2095 | get_page(new_page); | 2099 | get_page(new_page); |
@@ -2805,9 +2809,14 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | |||
2805 | migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | 2809 | migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; |
2806 | } | 2810 | } |
2807 | 2811 | ||
2812 | /* | ||
2813 | * No need to double call mmu_notifier->invalidate_range() callback as | ||
2814 | * the above ptep_clear_flush_notify() inside migrate_vma_insert_page() | ||
2815 | * did already call it. | ||
2816 | */ | ||
2808 | if (notified) | 2817 | if (notified) |
2809 | mmu_notifier_invalidate_range_end(mm, mmu_start, | 2818 | mmu_notifier_invalidate_range_only_end(mm, mmu_start, |
2810 | migrate->end); | 2819 | migrate->end); |
2811 | } | 2820 | } |
2812 | 2821 | ||
2813 | /* | 2822 | /* |
diff --git a/mm/mlock.c b/mm/mlock.c index 46af369c13e5..30472d438794 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -289,7 +289,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | |||
289 | struct pagevec pvec_putback; | 289 | struct pagevec pvec_putback; |
290 | int pgrescued = 0; | 290 | int pgrescued = 0; |
291 | 291 | ||
292 | pagevec_init(&pvec_putback, 0); | 292 | pagevec_init(&pvec_putback); |
293 | 293 | ||
294 | /* Phase 1: page isolation */ | 294 | /* Phase 1: page isolation */ |
295 | spin_lock_irq(zone_lru_lock(zone)); | 295 | spin_lock_irq(zone_lru_lock(zone)); |
@@ -448,7 +448,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
448 | struct pagevec pvec; | 448 | struct pagevec pvec; |
449 | struct zone *zone; | 449 | struct zone *zone; |
450 | 450 | ||
451 | pagevec_init(&pvec, 0); | 451 | pagevec_init(&pvec); |
452 | /* | 452 | /* |
453 | * Although FOLL_DUMP is intended for get_dump_page(), | 453 | * Although FOLL_DUMP is intended for get_dump_page(), |
454 | * it just so happens that its special treatment of the | 454 | * it just so happens that its special treatment of the |
@@ -670,8 +670,6 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla | |||
670 | if (!can_do_mlock()) | 670 | if (!can_do_mlock()) |
671 | return -EPERM; | 671 | return -EPERM; |
672 | 672 | ||
673 | lru_add_drain_all(); /* flush pagevec */ | ||
674 | |||
675 | len = PAGE_ALIGN(len + (offset_in_page(start))); | 673 | len = PAGE_ALIGN(len + (offset_in_page(start))); |
676 | start &= PAGE_MASK; | 674 | start &= PAGE_MASK; |
677 | 675 | ||
@@ -798,9 +796,6 @@ SYSCALL_DEFINE1(mlockall, int, flags) | |||
798 | if (!can_do_mlock()) | 796 | if (!can_do_mlock()) |
799 | return -EPERM; | 797 | return -EPERM; |
800 | 798 | ||
801 | if (flags & MCL_CURRENT) | ||
802 | lru_add_drain_all(); /* flush pagevec */ | ||
803 | |||
804 | lock_limit = rlimit(RLIMIT_MEMLOCK); | 799 | lock_limit = rlimit(RLIMIT_MEMLOCK); |
805 | lock_limit >>= PAGE_SHIFT; | 800 | lock_limit >>= PAGE_SHIFT; |
806 | 801 | ||
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 314285284e6e..96edb33fd09a 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -190,7 +190,9 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, | |||
190 | EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); | 190 | EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); |
191 | 191 | ||
192 | void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, | 192 | void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, |
193 | unsigned long start, unsigned long end) | 193 | unsigned long start, |
194 | unsigned long end, | ||
195 | bool only_end) | ||
194 | { | 196 | { |
195 | struct mmu_notifier *mn; | 197 | struct mmu_notifier *mn; |
196 | int id; | 198 | int id; |
@@ -204,8 +206,13 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, | |||
204 | * subsystem registers either invalidate_range_start()/end() or | 206 | * subsystem registers either invalidate_range_start()/end() or |
205 | * invalidate_range(), so this will be no additional overhead | 207 | * invalidate_range(), so this will be no additional overhead |
206 | * (besides the pointer check). | 208 | * (besides the pointer check). |
209 | * | ||
210 | * We skip call to invalidate_range() if we know it is safe ie | ||
211 | * call site use mmu_notifier_invalidate_range_only_end() which | ||
212 | * is safe to do when we know that a call to invalidate_range() | ||
213 | * already happen under page table lock. | ||
207 | */ | 214 | */ |
208 | if (mn->ops->invalidate_range) | 215 | if (!only_end && mn->ops->invalidate_range) |
209 | mn->ops->invalidate_range(mn, mm, start, end); | 216 | mn->ops->invalidate_range(mn, mm, start, end); |
210 | if (mn->ops->invalidate_range_end) | 217 | if (mn->ops->invalidate_range_end) |
211 | mn->ops->invalidate_range_end(mn, mm, start, end); | 218 | mn->ops->invalidate_range_end(mn, mm, start, end); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dee0f75c3013..c86fbd1b590e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -44,6 +44,7 @@ | |||
44 | 44 | ||
45 | #include <asm/tlb.h> | 45 | #include <asm/tlb.h> |
46 | #include "internal.h" | 46 | #include "internal.h" |
47 | #include "slab.h" | ||
47 | 48 | ||
48 | #define CREATE_TRACE_POINTS | 49 | #define CREATE_TRACE_POINTS |
49 | #include <trace/events/oom.h> | 50 | #include <trace/events/oom.h> |
@@ -161,6 +162,25 @@ static bool oom_unkillable_task(struct task_struct *p, | |||
161 | return false; | 162 | return false; |
162 | } | 163 | } |
163 | 164 | ||
165 | /* | ||
166 | * Print out unreclaimble slabs info when unreclaimable slabs amount is greater | ||
167 | * than all user memory (LRU pages) | ||
168 | */ | ||
169 | static bool is_dump_unreclaim_slabs(void) | ||
170 | { | ||
171 | unsigned long nr_lru; | ||
172 | |||
173 | nr_lru = global_node_page_state(NR_ACTIVE_ANON) + | ||
174 | global_node_page_state(NR_INACTIVE_ANON) + | ||
175 | global_node_page_state(NR_ACTIVE_FILE) + | ||
176 | global_node_page_state(NR_INACTIVE_FILE) + | ||
177 | global_node_page_state(NR_ISOLATED_ANON) + | ||
178 | global_node_page_state(NR_ISOLATED_FILE) + | ||
179 | global_node_page_state(NR_UNEVICTABLE); | ||
180 | |||
181 | return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru); | ||
182 | } | ||
183 | |||
164 | /** | 184 | /** |
165 | * oom_badness - heuristic function to determine which candidate task to kill | 185 | * oom_badness - heuristic function to determine which candidate task to kill |
166 | * @p: task struct of which task we should calculate | 186 | * @p: task struct of which task we should calculate |
@@ -201,7 +221,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
201 | * task's rss, pagetable and swap space use. | 221 | * task's rss, pagetable and swap space use. |
202 | */ | 222 | */ |
203 | points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + | 223 | points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + |
204 | atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm); | 224 | mm_pgtables_bytes(p->mm) / PAGE_SIZE; |
205 | task_unlock(p); | 225 | task_unlock(p); |
206 | 226 | ||
207 | /* | 227 | /* |
@@ -369,15 +389,15 @@ static void select_bad_process(struct oom_control *oc) | |||
369 | * Dumps the current memory state of all eligible tasks. Tasks not in the same | 389 | * Dumps the current memory state of all eligible tasks. Tasks not in the same |
370 | * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes | 390 | * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes |
371 | * are not shown. | 391 | * are not shown. |
372 | * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, | 392 | * State information includes task's pid, uid, tgid, vm size, rss, |
373 | * swapents, oom_score_adj value, and name. | 393 | * pgtables_bytes, swapents, oom_score_adj value, and name. |
374 | */ | 394 | */ |
375 | static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | 395 | static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) |
376 | { | 396 | { |
377 | struct task_struct *p; | 397 | struct task_struct *p; |
378 | struct task_struct *task; | 398 | struct task_struct *task; |
379 | 399 | ||
380 | pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name\n"); | 400 | pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); |
381 | rcu_read_lock(); | 401 | rcu_read_lock(); |
382 | for_each_process(p) { | 402 | for_each_process(p) { |
383 | if (oom_unkillable_task(p, memcg, nodemask)) | 403 | if (oom_unkillable_task(p, memcg, nodemask)) |
@@ -393,11 +413,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | |||
393 | continue; | 413 | continue; |
394 | } | 414 | } |
395 | 415 | ||
396 | pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu %5hd %s\n", | 416 | pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", |
397 | task->pid, from_kuid(&init_user_ns, task_uid(task)), | 417 | task->pid, from_kuid(&init_user_ns, task_uid(task)), |
398 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), | 418 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
399 | atomic_long_read(&task->mm->nr_ptes), | 419 | mm_pgtables_bytes(task->mm), |
400 | mm_nr_pmds(task->mm), | ||
401 | get_mm_counter(task->mm, MM_SWAPENTS), | 420 | get_mm_counter(task->mm, MM_SWAPENTS), |
402 | task->signal->oom_score_adj, task->comm); | 421 | task->signal->oom_score_adj, task->comm); |
403 | task_unlock(task); | 422 | task_unlock(task); |
@@ -407,23 +426,22 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | |||
407 | 426 | ||
408 | static void dump_header(struct oom_control *oc, struct task_struct *p) | 427 | static void dump_header(struct oom_control *oc, struct task_struct *p) |
409 | { | 428 | { |
410 | pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=", | 429 | pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n", |
411 | current->comm, oc->gfp_mask, &oc->gfp_mask); | 430 | current->comm, oc->gfp_mask, &oc->gfp_mask, |
412 | if (oc->nodemask) | 431 | nodemask_pr_args(oc->nodemask), oc->order, |
413 | pr_cont("%*pbl", nodemask_pr_args(oc->nodemask)); | 432 | current->signal->oom_score_adj); |
414 | else | ||
415 | pr_cont("(null)"); | ||
416 | pr_cont(", order=%d, oom_score_adj=%hd\n", | ||
417 | oc->order, current->signal->oom_score_adj); | ||
418 | if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) | 433 | if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) |
419 | pr_warn("COMPACTION is disabled!!!\n"); | 434 | pr_warn("COMPACTION is disabled!!!\n"); |
420 | 435 | ||
421 | cpuset_print_current_mems_allowed(); | 436 | cpuset_print_current_mems_allowed(); |
422 | dump_stack(); | 437 | dump_stack(); |
423 | if (oc->memcg) | 438 | if (is_memcg_oom(oc)) |
424 | mem_cgroup_print_oom_info(oc->memcg, p); | 439 | mem_cgroup_print_oom_info(oc->memcg, p); |
425 | else | 440 | else { |
426 | show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); | 441 | show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); |
442 | if (is_dump_unreclaim_slabs()) | ||
443 | dump_unreclaimable_slab(); | ||
444 | } | ||
427 | if (sysctl_oom_dump_tasks) | 445 | if (sysctl_oom_dump_tasks) |
428 | dump_tasks(oc->memcg, oc->nodemask); | 446 | dump_tasks(oc->memcg, oc->nodemask); |
429 | } | 447 | } |
@@ -618,9 +636,6 @@ static int oom_reaper(void *unused) | |||
618 | 636 | ||
619 | static void wake_oom_reaper(struct task_struct *tsk) | 637 | static void wake_oom_reaper(struct task_struct *tsk) |
620 | { | 638 | { |
621 | if (!oom_reaper_th) | ||
622 | return; | ||
623 | |||
624 | /* tsk is already queued? */ | 639 | /* tsk is already queued? */ |
625 | if (tsk == oom_reaper_list || tsk->oom_reaper_list) | 640 | if (tsk == oom_reaper_list || tsk->oom_reaper_list) |
626 | return; | 641 | return; |
@@ -638,11 +653,6 @@ static void wake_oom_reaper(struct task_struct *tsk) | |||
638 | static int __init oom_init(void) | 653 | static int __init oom_init(void) |
639 | { | 654 | { |
640 | oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); | 655 | oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); |
641 | if (IS_ERR(oom_reaper_th)) { | ||
642 | pr_err("Unable to start OOM reaper %ld. Continuing regardless\n", | ||
643 | PTR_ERR(oom_reaper_th)); | ||
644 | oom_reaper_th = NULL; | ||
645 | } | ||
646 | return 0; | 656 | return 0; |
647 | } | 657 | } |
648 | subsys_initcall(oom_init) | 658 | subsys_initcall(oom_init) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c518c845f202..8a1551154285 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -433,8 +433,11 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) | |||
433 | else | 433 | else |
434 | bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; | 434 | bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; |
435 | 435 | ||
436 | if (bg_thresh >= thresh) | 436 | if (unlikely(bg_thresh >= thresh)) { |
437 | pr_warn("vm direct limit must be set greater than background limit.\n"); | ||
437 | bg_thresh = thresh / 2; | 438 | bg_thresh = thresh / 2; |
439 | } | ||
440 | |||
438 | tsk = current; | 441 | tsk = current; |
439 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { | 442 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { |
440 | bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; | 443 | bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; |
@@ -625,9 +628,9 @@ EXPORT_SYMBOL_GPL(wb_writeout_inc); | |||
625 | * On idle system, we can be called long after we scheduled because we use | 628 | * On idle system, we can be called long after we scheduled because we use |
626 | * deferred timers so count with missed periods. | 629 | * deferred timers so count with missed periods. |
627 | */ | 630 | */ |
628 | static void writeout_period(unsigned long t) | 631 | static void writeout_period(struct timer_list *t) |
629 | { | 632 | { |
630 | struct wb_domain *dom = (void *)t; | 633 | struct wb_domain *dom = from_timer(dom, t, period_timer); |
631 | int miss_periods = (jiffies - dom->period_time) / | 634 | int miss_periods = (jiffies - dom->period_time) / |
632 | VM_COMPLETIONS_PERIOD_LEN; | 635 | VM_COMPLETIONS_PERIOD_LEN; |
633 | 636 | ||
@@ -650,8 +653,7 @@ int wb_domain_init(struct wb_domain *dom, gfp_t gfp) | |||
650 | 653 | ||
651 | spin_lock_init(&dom->lock); | 654 | spin_lock_init(&dom->lock); |
652 | 655 | ||
653 | setup_deferrable_timer(&dom->period_timer, writeout_period, | 656 | timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE); |
654 | (unsigned long)dom); | ||
655 | 657 | ||
656 | dom->dirty_limit_tstamp = jiffies; | 658 | dom->dirty_limit_tstamp = jiffies; |
657 | 659 | ||
@@ -1543,7 +1545,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) | |||
1543 | * actually dirty; with m+n sitting in the percpu | 1545 | * actually dirty; with m+n sitting in the percpu |
1544 | * deltas. | 1546 | * deltas. |
1545 | */ | 1547 | */ |
1546 | if (dtc->wb_thresh < 2 * wb_stat_error(wb)) { | 1548 | if (dtc->wb_thresh < 2 * wb_stat_error()) { |
1547 | wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); | 1549 | wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); |
1548 | dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); | 1550 | dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); |
1549 | } else { | 1551 | } else { |
@@ -1559,8 +1561,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) | |||
1559 | * If we're over `background_thresh' then the writeback threads are woken to | 1561 | * If we're over `background_thresh' then the writeback threads are woken to |
1560 | * perform some writeout. | 1562 | * perform some writeout. |
1561 | */ | 1563 | */ |
1562 | static void balance_dirty_pages(struct address_space *mapping, | 1564 | static void balance_dirty_pages(struct bdi_writeback *wb, |
1563 | struct bdi_writeback *wb, | ||
1564 | unsigned long pages_dirtied) | 1565 | unsigned long pages_dirtied) |
1565 | { | 1566 | { |
1566 | struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; | 1567 | struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; |
@@ -1802,7 +1803,7 @@ pause: | |||
1802 | * more page. However wb_dirty has accounting errors. So use | 1803 | * more page. However wb_dirty has accounting errors. So use |
1803 | * the larger and more IO friendly wb_stat_error. | 1804 | * the larger and more IO friendly wb_stat_error. |
1804 | */ | 1805 | */ |
1805 | if (sdtc->wb_dirty <= wb_stat_error(wb)) | 1806 | if (sdtc->wb_dirty <= wb_stat_error()) |
1806 | break; | 1807 | break; |
1807 | 1808 | ||
1808 | if (fatal_signal_pending(current)) | 1809 | if (fatal_signal_pending(current)) |
@@ -1910,7 +1911,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) | |||
1910 | preempt_enable(); | 1911 | preempt_enable(); |
1911 | 1912 | ||
1912 | if (unlikely(current->nr_dirtied >= ratelimit)) | 1913 | if (unlikely(current->nr_dirtied >= ratelimit)) |
1913 | balance_dirty_pages(mapping, wb, current->nr_dirtied); | 1914 | balance_dirty_pages(wb, current->nr_dirtied); |
1914 | 1915 | ||
1915 | wb_put(wb); | 1916 | wb_put(wb); |
1916 | } | 1917 | } |
@@ -2167,7 +2168,7 @@ int write_cache_pages(struct address_space *mapping, | |||
2167 | int range_whole = 0; | 2168 | int range_whole = 0; |
2168 | int tag; | 2169 | int tag; |
2169 | 2170 | ||
2170 | pagevec_init(&pvec, 0); | 2171 | pagevec_init(&pvec); |
2171 | if (wbc->range_cyclic) { | 2172 | if (wbc->range_cyclic) { |
2172 | writeback_index = mapping->writeback_index; /* prev offset */ | 2173 | writeback_index = mapping->writeback_index; /* prev offset */ |
2173 | index = writeback_index; | 2174 | index = writeback_index; |
@@ -2194,30 +2195,14 @@ retry: | |||
2194 | while (!done && (index <= end)) { | 2195 | while (!done && (index <= end)) { |
2195 | int i; | 2196 | int i; |
2196 | 2197 | ||
2197 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2198 | nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
2198 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2199 | tag); |
2199 | if (nr_pages == 0) | 2200 | if (nr_pages == 0) |
2200 | break; | 2201 | break; |
2201 | 2202 | ||
2202 | for (i = 0; i < nr_pages; i++) { | 2203 | for (i = 0; i < nr_pages; i++) { |
2203 | struct page *page = pvec.pages[i]; | 2204 | struct page *page = pvec.pages[i]; |
2204 | 2205 | ||
2205 | /* | ||
2206 | * At this point, the page may be truncated or | ||
2207 | * invalidated (changing page->mapping to NULL), or | ||
2208 | * even swizzled back from swapper_space to tmpfs file | ||
2209 | * mapping. However, page->index will not change | ||
2210 | * because we have a reference on the page. | ||
2211 | */ | ||
2212 | if (page->index > end) { | ||
2213 | /* | ||
2214 | * can't be range_cyclic (1st pass) because | ||
2215 | * end == -1 in that case. | ||
2216 | */ | ||
2217 | done = 1; | ||
2218 | break; | ||
2219 | } | ||
2220 | |||
2221 | done_index = page->index; | 2206 | done_index = page->index; |
2222 | 2207 | ||
2223 | lock_page(page); | 2208 | lock_page(page); |
@@ -2623,7 +2608,7 @@ EXPORT_SYMBOL(set_page_dirty_lock); | |||
2623 | * page without actually doing it through the VM. Can you say "ext3 is | 2608 | * page without actually doing it through the VM. Can you say "ext3 is |
2624 | * horribly ugly"? Thought you could. | 2609 | * horribly ugly"? Thought you could. |
2625 | */ | 2610 | */ |
2626 | void cancel_dirty_page(struct page *page) | 2611 | void __cancel_dirty_page(struct page *page) |
2627 | { | 2612 | { |
2628 | struct address_space *mapping = page_mapping(page); | 2613 | struct address_space *mapping = page_mapping(page); |
2629 | 2614 | ||
@@ -2644,7 +2629,7 @@ void cancel_dirty_page(struct page *page) | |||
2644 | ClearPageDirty(page); | 2629 | ClearPageDirty(page); |
2645 | } | 2630 | } |
2646 | } | 2631 | } |
2647 | EXPORT_SYMBOL(cancel_dirty_page); | 2632 | EXPORT_SYMBOL(__cancel_dirty_page); |
2648 | 2633 | ||
2649 | /* | 2634 | /* |
2650 | * Clear a page's dirty flag, while caring for dirty memory accounting. | 2635 | * Clear a page's dirty flag, while caring for dirty memory accounting. |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 77e4d3c5c57b..55ded92f9809 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/memblock.h> | 24 | #include <linux/memblock.h> |
25 | #include <linux/compiler.h> | 25 | #include <linux/compiler.h> |
26 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
27 | #include <linux/kmemcheck.h> | ||
28 | #include <linux/kasan.h> | 27 | #include <linux/kasan.h> |
29 | #include <linux/module.h> | 28 | #include <linux/module.h> |
30 | #include <linux/suspend.h> | 29 | #include <linux/suspend.h> |
@@ -83,6 +82,8 @@ DEFINE_PER_CPU(int, numa_node); | |||
83 | EXPORT_PER_CPU_SYMBOL(numa_node); | 82 | EXPORT_PER_CPU_SYMBOL(numa_node); |
84 | #endif | 83 | #endif |
85 | 84 | ||
85 | DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key); | ||
86 | |||
86 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | 87 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES |
87 | /* | 88 | /* |
88 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. | 89 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. |
@@ -290,28 +291,37 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
290 | int page_group_by_mobility_disabled __read_mostly; | 291 | int page_group_by_mobility_disabled __read_mostly; |
291 | 292 | ||
292 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 293 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
294 | |||
295 | /* | ||
296 | * Determine how many pages need to be initialized durig early boot | ||
297 | * (non-deferred initialization). | ||
298 | * The value of first_deferred_pfn will be set later, once non-deferred pages | ||
299 | * are initialized, but for now set it ULONG_MAX. | ||
300 | */ | ||
293 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | 301 | static inline void reset_deferred_meminit(pg_data_t *pgdat) |
294 | { | 302 | { |
295 | unsigned long max_initialise; | 303 | phys_addr_t start_addr, end_addr; |
296 | unsigned long reserved_lowmem; | 304 | unsigned long max_pgcnt; |
305 | unsigned long reserved; | ||
297 | 306 | ||
298 | /* | 307 | /* |
299 | * Initialise at least 2G of a node but also take into account that | 308 | * Initialise at least 2G of a node but also take into account that |
300 | * two large system hashes that can take up 1GB for 0.25TB/node. | 309 | * two large system hashes that can take up 1GB for 0.25TB/node. |
301 | */ | 310 | */ |
302 | max_initialise = max(2UL << (30 - PAGE_SHIFT), | 311 | max_pgcnt = max(2UL << (30 - PAGE_SHIFT), |
303 | (pgdat->node_spanned_pages >> 8)); | 312 | (pgdat->node_spanned_pages >> 8)); |
304 | 313 | ||
305 | /* | 314 | /* |
306 | * Compensate the all the memblock reservations (e.g. crash kernel) | 315 | * Compensate the all the memblock reservations (e.g. crash kernel) |
307 | * from the initial estimation to make sure we will initialize enough | 316 | * from the initial estimation to make sure we will initialize enough |
308 | * memory to boot. | 317 | * memory to boot. |
309 | */ | 318 | */ |
310 | reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, | 319 | start_addr = PFN_PHYS(pgdat->node_start_pfn); |
311 | pgdat->node_start_pfn + max_initialise); | 320 | end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt); |
312 | max_initialise += reserved_lowmem; | 321 | reserved = memblock_reserved_memory_within(start_addr, end_addr); |
322 | max_pgcnt += PHYS_PFN(reserved); | ||
313 | 323 | ||
314 | pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); | 324 | pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages); |
315 | pgdat->first_deferred_pfn = ULONG_MAX; | 325 | pgdat->first_deferred_pfn = ULONG_MAX; |
316 | } | 326 | } |
317 | 327 | ||
@@ -338,7 +348,7 @@ static inline bool update_defer_init(pg_data_t *pgdat, | |||
338 | if (zone_end < pgdat_end_pfn(pgdat)) | 348 | if (zone_end < pgdat_end_pfn(pgdat)) |
339 | return true; | 349 | return true; |
340 | (*nr_initialised)++; | 350 | (*nr_initialised)++; |
341 | if ((*nr_initialised > pgdat->static_init_size) && | 351 | if ((*nr_initialised > pgdat->static_init_pgcnt) && |
342 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { | 352 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { |
343 | pgdat->first_deferred_pfn = pfn; | 353 | pgdat->first_deferred_pfn = pfn; |
344 | return false; | 354 | return false; |
@@ -1013,7 +1023,6 @@ static __always_inline bool free_pages_prepare(struct page *page, | |||
1013 | VM_BUG_ON_PAGE(PageTail(page), page); | 1023 | VM_BUG_ON_PAGE(PageTail(page), page); |
1014 | 1024 | ||
1015 | trace_mm_page_free(page, order); | 1025 | trace_mm_page_free(page, order); |
1016 | kmemcheck_free_shadow(page, order); | ||
1017 | 1026 | ||
1018 | /* | 1027 | /* |
1019 | * Check tail pages before head page information is cleared to | 1028 | * Check tail pages before head page information is cleared to |
@@ -1170,6 +1179,7 @@ static void free_one_page(struct zone *zone, | |||
1170 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1179 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
1171 | unsigned long zone, int nid) | 1180 | unsigned long zone, int nid) |
1172 | { | 1181 | { |
1182 | mm_zero_struct_page(page); | ||
1173 | set_page_links(page, zone, nid, pfn); | 1183 | set_page_links(page, zone, nid, pfn); |
1174 | init_page_count(page); | 1184 | init_page_count(page); |
1175 | page_mapcount_reset(page); | 1185 | page_mapcount_reset(page); |
@@ -1410,14 +1420,17 @@ void clear_zone_contiguous(struct zone *zone) | |||
1410 | } | 1420 | } |
1411 | 1421 | ||
1412 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1422 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
1413 | static void __init deferred_free_range(struct page *page, | 1423 | static void __init deferred_free_range(unsigned long pfn, |
1414 | unsigned long pfn, int nr_pages) | 1424 | unsigned long nr_pages) |
1415 | { | 1425 | { |
1416 | int i; | 1426 | struct page *page; |
1427 | unsigned long i; | ||
1417 | 1428 | ||
1418 | if (!page) | 1429 | if (!nr_pages) |
1419 | return; | 1430 | return; |
1420 | 1431 | ||
1432 | page = pfn_to_page(pfn); | ||
1433 | |||
1421 | /* Free a large naturally-aligned chunk if possible */ | 1434 | /* Free a large naturally-aligned chunk if possible */ |
1422 | if (nr_pages == pageblock_nr_pages && | 1435 | if (nr_pages == pageblock_nr_pages && |
1423 | (pfn & (pageblock_nr_pages - 1)) == 0) { | 1436 | (pfn & (pageblock_nr_pages - 1)) == 0) { |
@@ -1443,19 +1456,109 @@ static inline void __init pgdat_init_report_one_done(void) | |||
1443 | complete(&pgdat_init_all_done_comp); | 1456 | complete(&pgdat_init_all_done_comp); |
1444 | } | 1457 | } |
1445 | 1458 | ||
1459 | /* | ||
1460 | * Helper for deferred_init_range, free the given range, reset the counters, and | ||
1461 | * return number of pages freed. | ||
1462 | */ | ||
1463 | static inline unsigned long __init __def_free(unsigned long *nr_free, | ||
1464 | unsigned long *free_base_pfn, | ||
1465 | struct page **page) | ||
1466 | { | ||
1467 | unsigned long nr = *nr_free; | ||
1468 | |||
1469 | deferred_free_range(*free_base_pfn, nr); | ||
1470 | *free_base_pfn = 0; | ||
1471 | *nr_free = 0; | ||
1472 | *page = NULL; | ||
1473 | |||
1474 | return nr; | ||
1475 | } | ||
1476 | |||
1477 | static unsigned long __init deferred_init_range(int nid, int zid, | ||
1478 | unsigned long start_pfn, | ||
1479 | unsigned long end_pfn) | ||
1480 | { | ||
1481 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1482 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | ||
1483 | unsigned long free_base_pfn = 0; | ||
1484 | unsigned long nr_pages = 0; | ||
1485 | unsigned long nr_free = 0; | ||
1486 | struct page *page = NULL; | ||
1487 | unsigned long pfn; | ||
1488 | |||
1489 | /* | ||
1490 | * First we check if pfn is valid on architectures where it is possible | ||
1491 | * to have holes within pageblock_nr_pages. On systems where it is not | ||
1492 | * possible, this function is optimized out. | ||
1493 | * | ||
1494 | * Then, we check if a current large page is valid by only checking the | ||
1495 | * validity of the head pfn. | ||
1496 | * | ||
1497 | * meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1498 | * within a node: a pfn is between start and end of a node, but does not | ||
1499 | * belong to this memory node. | ||
1500 | * | ||
1501 | * Finally, we minimize pfn page lookups and scheduler checks by | ||
1502 | * performing it only once every pageblock_nr_pages. | ||
1503 | * | ||
1504 | * We do it in two loops: first we initialize struct page, than free to | ||
1505 | * buddy allocator, becuse while we are freeing pages we can access | ||
1506 | * pages that are ahead (computing buddy page in __free_one_page()). | ||
1507 | */ | ||
1508 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1509 | if (!pfn_valid_within(pfn)) | ||
1510 | continue; | ||
1511 | if ((pfn & nr_pgmask) || pfn_valid(pfn)) { | ||
1512 | if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1513 | if (page && (pfn & nr_pgmask)) | ||
1514 | page++; | ||
1515 | else | ||
1516 | page = pfn_to_page(pfn); | ||
1517 | __init_single_page(page, pfn, zid, nid); | ||
1518 | cond_resched(); | ||
1519 | } | ||
1520 | } | ||
1521 | } | ||
1522 | |||
1523 | page = NULL; | ||
1524 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1525 | if (!pfn_valid_within(pfn)) { | ||
1526 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1527 | } else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) { | ||
1528 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1529 | } else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1530 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1531 | } else if (page && (pfn & nr_pgmask)) { | ||
1532 | page++; | ||
1533 | nr_free++; | ||
1534 | } else { | ||
1535 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1536 | page = pfn_to_page(pfn); | ||
1537 | free_base_pfn = pfn; | ||
1538 | nr_free = 1; | ||
1539 | cond_resched(); | ||
1540 | } | ||
1541 | } | ||
1542 | /* Free the last block of pages to allocator */ | ||
1543 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1544 | |||
1545 | return nr_pages; | ||
1546 | } | ||
1547 | |||
1446 | /* Initialise remaining memory on a node */ | 1548 | /* Initialise remaining memory on a node */ |
1447 | static int __init deferred_init_memmap(void *data) | 1549 | static int __init deferred_init_memmap(void *data) |
1448 | { | 1550 | { |
1449 | pg_data_t *pgdat = data; | 1551 | pg_data_t *pgdat = data; |
1450 | int nid = pgdat->node_id; | 1552 | int nid = pgdat->node_id; |
1451 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1452 | unsigned long start = jiffies; | 1553 | unsigned long start = jiffies; |
1453 | unsigned long nr_pages = 0; | 1554 | unsigned long nr_pages = 0; |
1454 | unsigned long walk_start, walk_end; | 1555 | unsigned long spfn, epfn; |
1455 | int i, zid; | 1556 | phys_addr_t spa, epa; |
1557 | int zid; | ||
1456 | struct zone *zone; | 1558 | struct zone *zone; |
1457 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; | 1559 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; |
1458 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 1560 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
1561 | u64 i; | ||
1459 | 1562 | ||
1460 | if (first_init_pfn == ULONG_MAX) { | 1563 | if (first_init_pfn == ULONG_MAX) { |
1461 | pgdat_init_report_one_done(); | 1564 | pgdat_init_report_one_done(); |
@@ -1477,83 +1580,12 @@ static int __init deferred_init_memmap(void *data) | |||
1477 | if (first_init_pfn < zone_end_pfn(zone)) | 1580 | if (first_init_pfn < zone_end_pfn(zone)) |
1478 | break; | 1581 | break; |
1479 | } | 1582 | } |
1583 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); | ||
1480 | 1584 | ||
1481 | for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) { | 1585 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { |
1482 | unsigned long pfn, end_pfn; | 1586 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); |
1483 | struct page *page = NULL; | 1587 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); |
1484 | struct page *free_base_page = NULL; | 1588 | nr_pages += deferred_init_range(nid, zid, spfn, epfn); |
1485 | unsigned long free_base_pfn = 0; | ||
1486 | int nr_to_free = 0; | ||
1487 | |||
1488 | end_pfn = min(walk_end, zone_end_pfn(zone)); | ||
1489 | pfn = first_init_pfn; | ||
1490 | if (pfn < walk_start) | ||
1491 | pfn = walk_start; | ||
1492 | if (pfn < zone->zone_start_pfn) | ||
1493 | pfn = zone->zone_start_pfn; | ||
1494 | |||
1495 | for (; pfn < end_pfn; pfn++) { | ||
1496 | if (!pfn_valid_within(pfn)) | ||
1497 | goto free_range; | ||
1498 | |||
1499 | /* | ||
1500 | * Ensure pfn_valid is checked every | ||
1501 | * pageblock_nr_pages for memory holes | ||
1502 | */ | ||
1503 | if ((pfn & (pageblock_nr_pages - 1)) == 0) { | ||
1504 | if (!pfn_valid(pfn)) { | ||
1505 | page = NULL; | ||
1506 | goto free_range; | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1510 | if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1511 | page = NULL; | ||
1512 | goto free_range; | ||
1513 | } | ||
1514 | |||
1515 | /* Minimise pfn page lookups and scheduler checks */ | ||
1516 | if (page && (pfn & (pageblock_nr_pages - 1)) != 0) { | ||
1517 | page++; | ||
1518 | } else { | ||
1519 | nr_pages += nr_to_free; | ||
1520 | deferred_free_range(free_base_page, | ||
1521 | free_base_pfn, nr_to_free); | ||
1522 | free_base_page = NULL; | ||
1523 | free_base_pfn = nr_to_free = 0; | ||
1524 | |||
1525 | page = pfn_to_page(pfn); | ||
1526 | cond_resched(); | ||
1527 | } | ||
1528 | |||
1529 | if (page->flags) { | ||
1530 | VM_BUG_ON(page_zone(page) != zone); | ||
1531 | goto free_range; | ||
1532 | } | ||
1533 | |||
1534 | __init_single_page(page, pfn, zid, nid); | ||
1535 | if (!free_base_page) { | ||
1536 | free_base_page = page; | ||
1537 | free_base_pfn = pfn; | ||
1538 | nr_to_free = 0; | ||
1539 | } | ||
1540 | nr_to_free++; | ||
1541 | |||
1542 | /* Where possible, batch up pages for a single free */ | ||
1543 | continue; | ||
1544 | free_range: | ||
1545 | /* Free the current block of pages to allocator */ | ||
1546 | nr_pages += nr_to_free; | ||
1547 | deferred_free_range(free_base_page, free_base_pfn, | ||
1548 | nr_to_free); | ||
1549 | free_base_page = NULL; | ||
1550 | free_base_pfn = nr_to_free = 0; | ||
1551 | } | ||
1552 | /* Free the last block of pages to allocator */ | ||
1553 | nr_pages += nr_to_free; | ||
1554 | deferred_free_range(free_base_page, free_base_pfn, nr_to_free); | ||
1555 | |||
1556 | first_init_pfn = max(end_pfn, first_init_pfn); | ||
1557 | } | 1589 | } |
1558 | 1590 | ||
1559 | /* Sanity check that the next zone really is unpopulated */ | 1591 | /* Sanity check that the next zone really is unpopulated */ |
@@ -1792,7 +1824,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags | |||
1792 | * Go through the free lists for the given migratetype and remove | 1824 | * Go through the free lists for the given migratetype and remove |
1793 | * the smallest available page from the freelists | 1825 | * the smallest available page from the freelists |
1794 | */ | 1826 | */ |
1795 | static inline | 1827 | static __always_inline |
1796 | struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | 1828 | struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, |
1797 | int migratetype) | 1829 | int migratetype) |
1798 | { | 1830 | { |
@@ -1836,7 +1868,7 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
1836 | }; | 1868 | }; |
1837 | 1869 | ||
1838 | #ifdef CONFIG_CMA | 1870 | #ifdef CONFIG_CMA |
1839 | static struct page *__rmqueue_cma_fallback(struct zone *zone, | 1871 | static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone, |
1840 | unsigned int order) | 1872 | unsigned int order) |
1841 | { | 1873 | { |
1842 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); | 1874 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); |
@@ -2217,7 +2249,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2217 | * deviation from the rest of this file, to make the for loop | 2249 | * deviation from the rest of this file, to make the for loop |
2218 | * condition simpler. | 2250 | * condition simpler. |
2219 | */ | 2251 | */ |
2220 | static inline bool | 2252 | static __always_inline bool |
2221 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 2253 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) |
2222 | { | 2254 | { |
2223 | struct free_area *area; | 2255 | struct free_area *area; |
@@ -2289,8 +2321,8 @@ do_steal: | |||
2289 | * Do the hard work of removing an element from the buddy allocator. | 2321 | * Do the hard work of removing an element from the buddy allocator. |
2290 | * Call me with the zone->lock already held. | 2322 | * Call me with the zone->lock already held. |
2291 | */ | 2323 | */ |
2292 | static struct page *__rmqueue(struct zone *zone, unsigned int order, | 2324 | static __always_inline struct page * |
2293 | int migratetype) | 2325 | __rmqueue(struct zone *zone, unsigned int order, int migratetype) |
2294 | { | 2326 | { |
2295 | struct page *page; | 2327 | struct page *page; |
2296 | 2328 | ||
@@ -2315,7 +2347,7 @@ retry: | |||
2315 | */ | 2347 | */ |
2316 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 2348 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
2317 | unsigned long count, struct list_head *list, | 2349 | unsigned long count, struct list_head *list, |
2318 | int migratetype, bool cold) | 2350 | int migratetype) |
2319 | { | 2351 | { |
2320 | int i, alloced = 0; | 2352 | int i, alloced = 0; |
2321 | 2353 | ||
@@ -2329,19 +2361,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2329 | continue; | 2361 | continue; |
2330 | 2362 | ||
2331 | /* | 2363 | /* |
2332 | * Split buddy pages returned by expand() are received here | 2364 | * Split buddy pages returned by expand() are received here in |
2333 | * in physical page order. The page is added to the callers and | 2365 | * physical page order. The page is added to the tail of |
2334 | * list and the list head then moves forward. From the callers | 2366 | * caller's list. From the callers perspective, the linked list |
2335 | * perspective, the linked list is ordered by page number in | 2367 | * is ordered by page number under some conditions. This is |
2336 | * some conditions. This is useful for IO devices that can | 2368 | * useful for IO devices that can forward direction from the |
2337 | * merge IO requests if the physical pages are ordered | 2369 | * head, thus also in the physical page order. This is useful |
2338 | * properly. | 2370 | * for IO devices that can merge IO requests if the physical |
2371 | * pages are ordered properly. | ||
2339 | */ | 2372 | */ |
2340 | if (likely(!cold)) | 2373 | list_add_tail(&page->lru, list); |
2341 | list_add(&page->lru, list); | ||
2342 | else | ||
2343 | list_add_tail(&page->lru, list); | ||
2344 | list = &page->lru; | ||
2345 | alloced++; | 2374 | alloced++; |
2346 | if (is_migrate_cma(get_pcppage_migratetype(page))) | 2375 | if (is_migrate_cma(get_pcppage_migratetype(page))) |
2347 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | 2376 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, |
@@ -2590,24 +2619,25 @@ void mark_free_pages(struct zone *zone) | |||
2590 | } | 2619 | } |
2591 | #endif /* CONFIG_PM */ | 2620 | #endif /* CONFIG_PM */ |
2592 | 2621 | ||
2593 | /* | 2622 | static bool free_unref_page_prepare(struct page *page, unsigned long pfn) |
2594 | * Free a 0-order page | ||
2595 | * cold == true ? free a cold page : free a hot page | ||
2596 | */ | ||
2597 | void free_hot_cold_page(struct page *page, bool cold) | ||
2598 | { | 2623 | { |
2599 | struct zone *zone = page_zone(page); | ||
2600 | struct per_cpu_pages *pcp; | ||
2601 | unsigned long flags; | ||
2602 | unsigned long pfn = page_to_pfn(page); | ||
2603 | int migratetype; | 2624 | int migratetype; |
2604 | 2625 | ||
2605 | if (!free_pcp_prepare(page)) | 2626 | if (!free_pcp_prepare(page)) |
2606 | return; | 2627 | return false; |
2607 | 2628 | ||
2608 | migratetype = get_pfnblock_migratetype(page, pfn); | 2629 | migratetype = get_pfnblock_migratetype(page, pfn); |
2609 | set_pcppage_migratetype(page, migratetype); | 2630 | set_pcppage_migratetype(page, migratetype); |
2610 | local_irq_save(flags); | 2631 | return true; |
2632 | } | ||
2633 | |||
2634 | static void free_unref_page_commit(struct page *page, unsigned long pfn) | ||
2635 | { | ||
2636 | struct zone *zone = page_zone(page); | ||
2637 | struct per_cpu_pages *pcp; | ||
2638 | int migratetype; | ||
2639 | |||
2640 | migratetype = get_pcppage_migratetype(page); | ||
2611 | __count_vm_event(PGFREE); | 2641 | __count_vm_event(PGFREE); |
2612 | 2642 | ||
2613 | /* | 2643 | /* |
@@ -2620,38 +2650,62 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2620 | if (migratetype >= MIGRATE_PCPTYPES) { | 2650 | if (migratetype >= MIGRATE_PCPTYPES) { |
2621 | if (unlikely(is_migrate_isolate(migratetype))) { | 2651 | if (unlikely(is_migrate_isolate(migratetype))) { |
2622 | free_one_page(zone, page, pfn, 0, migratetype); | 2652 | free_one_page(zone, page, pfn, 0, migratetype); |
2623 | goto out; | 2653 | return; |
2624 | } | 2654 | } |
2625 | migratetype = MIGRATE_MOVABLE; | 2655 | migratetype = MIGRATE_MOVABLE; |
2626 | } | 2656 | } |
2627 | 2657 | ||
2628 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2658 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2629 | if (!cold) | 2659 | list_add(&page->lru, &pcp->lists[migratetype]); |
2630 | list_add(&page->lru, &pcp->lists[migratetype]); | ||
2631 | else | ||
2632 | list_add_tail(&page->lru, &pcp->lists[migratetype]); | ||
2633 | pcp->count++; | 2660 | pcp->count++; |
2634 | if (pcp->count >= pcp->high) { | 2661 | if (pcp->count >= pcp->high) { |
2635 | unsigned long batch = READ_ONCE(pcp->batch); | 2662 | unsigned long batch = READ_ONCE(pcp->batch); |
2636 | free_pcppages_bulk(zone, batch, pcp); | 2663 | free_pcppages_bulk(zone, batch, pcp); |
2637 | pcp->count -= batch; | 2664 | pcp->count -= batch; |
2638 | } | 2665 | } |
2666 | } | ||
2639 | 2667 | ||
2640 | out: | 2668 | /* |
2669 | * Free a 0-order page | ||
2670 | */ | ||
2671 | void free_unref_page(struct page *page) | ||
2672 | { | ||
2673 | unsigned long flags; | ||
2674 | unsigned long pfn = page_to_pfn(page); | ||
2675 | |||
2676 | if (!free_unref_page_prepare(page, pfn)) | ||
2677 | return; | ||
2678 | |||
2679 | local_irq_save(flags); | ||
2680 | free_unref_page_commit(page, pfn); | ||
2641 | local_irq_restore(flags); | 2681 | local_irq_restore(flags); |
2642 | } | 2682 | } |
2643 | 2683 | ||
2644 | /* | 2684 | /* |
2645 | * Free a list of 0-order pages | 2685 | * Free a list of 0-order pages |
2646 | */ | 2686 | */ |
2647 | void free_hot_cold_page_list(struct list_head *list, bool cold) | 2687 | void free_unref_page_list(struct list_head *list) |
2648 | { | 2688 | { |
2649 | struct page *page, *next; | 2689 | struct page *page, *next; |
2690 | unsigned long flags, pfn; | ||
2691 | |||
2692 | /* Prepare pages for freeing */ | ||
2693 | list_for_each_entry_safe(page, next, list, lru) { | ||
2694 | pfn = page_to_pfn(page); | ||
2695 | if (!free_unref_page_prepare(page, pfn)) | ||
2696 | list_del(&page->lru); | ||
2697 | set_page_private(page, pfn); | ||
2698 | } | ||
2650 | 2699 | ||
2700 | local_irq_save(flags); | ||
2651 | list_for_each_entry_safe(page, next, list, lru) { | 2701 | list_for_each_entry_safe(page, next, list, lru) { |
2652 | trace_mm_page_free_batched(page, cold); | 2702 | unsigned long pfn = page_private(page); |
2653 | free_hot_cold_page(page, cold); | 2703 | |
2704 | set_page_private(page, 0); | ||
2705 | trace_mm_page_free_batched(page); | ||
2706 | free_unref_page_commit(page, pfn); | ||
2654 | } | 2707 | } |
2708 | local_irq_restore(flags); | ||
2655 | } | 2709 | } |
2656 | 2710 | ||
2657 | /* | 2711 | /* |
@@ -2669,15 +2723,6 @@ void split_page(struct page *page, unsigned int order) | |||
2669 | VM_BUG_ON_PAGE(PageCompound(page), page); | 2723 | VM_BUG_ON_PAGE(PageCompound(page), page); |
2670 | VM_BUG_ON_PAGE(!page_count(page), page); | 2724 | VM_BUG_ON_PAGE(!page_count(page), page); |
2671 | 2725 | ||
2672 | #ifdef CONFIG_KMEMCHECK | ||
2673 | /* | ||
2674 | * Split shadow pages too, because free(page[0]) would | ||
2675 | * otherwise free the whole shadow. | ||
2676 | */ | ||
2677 | if (kmemcheck_page_is_tracked(page)) | ||
2678 | split_page(virt_to_page(page[0].shadow), order); | ||
2679 | #endif | ||
2680 | |||
2681 | for (i = 1; i < (1 << order); i++) | 2726 | for (i = 1; i < (1 << order); i++) |
2682 | set_page_refcounted(page + i); | 2727 | set_page_refcounted(page + i); |
2683 | split_page_owner(page, order); | 2728 | split_page_owner(page, order); |
@@ -2743,6 +2788,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2743 | #ifdef CONFIG_NUMA | 2788 | #ifdef CONFIG_NUMA |
2744 | enum numa_stat_item local_stat = NUMA_LOCAL; | 2789 | enum numa_stat_item local_stat = NUMA_LOCAL; |
2745 | 2790 | ||
2791 | /* skip numa counters update if numa stats is disabled */ | ||
2792 | if (!static_branch_likely(&vm_numa_stat_key)) | ||
2793 | return; | ||
2794 | |||
2746 | if (z->node != numa_node_id()) | 2795 | if (z->node != numa_node_id()) |
2747 | local_stat = NUMA_OTHER; | 2796 | local_stat = NUMA_OTHER; |
2748 | 2797 | ||
@@ -2758,7 +2807,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2758 | 2807 | ||
2759 | /* Remove page from the per-cpu list, caller must protect the list */ | 2808 | /* Remove page from the per-cpu list, caller must protect the list */ |
2760 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | 2809 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, |
2761 | bool cold, struct per_cpu_pages *pcp, | 2810 | struct per_cpu_pages *pcp, |
2762 | struct list_head *list) | 2811 | struct list_head *list) |
2763 | { | 2812 | { |
2764 | struct page *page; | 2813 | struct page *page; |
@@ -2767,16 +2816,12 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
2767 | if (list_empty(list)) { | 2816 | if (list_empty(list)) { |
2768 | pcp->count += rmqueue_bulk(zone, 0, | 2817 | pcp->count += rmqueue_bulk(zone, 0, |
2769 | pcp->batch, list, | 2818 | pcp->batch, list, |
2770 | migratetype, cold); | 2819 | migratetype); |
2771 | if (unlikely(list_empty(list))) | 2820 | if (unlikely(list_empty(list))) |
2772 | return NULL; | 2821 | return NULL; |
2773 | } | 2822 | } |
2774 | 2823 | ||
2775 | if (cold) | 2824 | page = list_first_entry(list, struct page, lru); |
2776 | page = list_last_entry(list, struct page, lru); | ||
2777 | else | ||
2778 | page = list_first_entry(list, struct page, lru); | ||
2779 | |||
2780 | list_del(&page->lru); | 2825 | list_del(&page->lru); |
2781 | pcp->count--; | 2826 | pcp->count--; |
2782 | } while (check_new_pcp(page)); | 2827 | } while (check_new_pcp(page)); |
@@ -2791,14 +2836,13 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
2791 | { | 2836 | { |
2792 | struct per_cpu_pages *pcp; | 2837 | struct per_cpu_pages *pcp; |
2793 | struct list_head *list; | 2838 | struct list_head *list; |
2794 | bool cold = ((gfp_flags & __GFP_COLD) != 0); | ||
2795 | struct page *page; | 2839 | struct page *page; |
2796 | unsigned long flags; | 2840 | unsigned long flags; |
2797 | 2841 | ||
2798 | local_irq_save(flags); | 2842 | local_irq_save(flags); |
2799 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2843 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2800 | list = &pcp->lists[migratetype]; | 2844 | list = &pcp->lists[migratetype]; |
2801 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); | 2845 | page = __rmqueue_pcplist(zone, migratetype, pcp, list); |
2802 | if (page) { | 2846 | if (page) { |
2803 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 2847 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
2804 | zone_statistics(preferred_zone, zone); | 2848 | zone_statistics(preferred_zone, zone); |
@@ -3006,9 +3050,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, | |||
3006 | if (!area->nr_free) | 3050 | if (!area->nr_free) |
3007 | continue; | 3051 | continue; |
3008 | 3052 | ||
3009 | if (alloc_harder) | ||
3010 | return true; | ||
3011 | |||
3012 | for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { | 3053 | for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { |
3013 | if (!list_empty(&area->free_list[mt])) | 3054 | if (!list_empty(&area->free_list[mt])) |
3014 | return true; | 3055 | return true; |
@@ -3020,6 +3061,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, | |||
3020 | return true; | 3061 | return true; |
3021 | } | 3062 | } |
3022 | #endif | 3063 | #endif |
3064 | if (alloc_harder && | ||
3065 | !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) | ||
3066 | return true; | ||
3023 | } | 3067 | } |
3024 | return false; | 3068 | return false; |
3025 | } | 3069 | } |
@@ -3235,20 +3279,14 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) | |||
3235 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) | 3279 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) |
3236 | return; | 3280 | return; |
3237 | 3281 | ||
3238 | pr_warn("%s: ", current->comm); | ||
3239 | |||
3240 | va_start(args, fmt); | 3282 | va_start(args, fmt); |
3241 | vaf.fmt = fmt; | 3283 | vaf.fmt = fmt; |
3242 | vaf.va = &args; | 3284 | vaf.va = &args; |
3243 | pr_cont("%pV", &vaf); | 3285 | pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n", |
3286 | current->comm, &vaf, gfp_mask, &gfp_mask, | ||
3287 | nodemask_pr_args(nodemask)); | ||
3244 | va_end(args); | 3288 | va_end(args); |
3245 | 3289 | ||
3246 | pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask); | ||
3247 | if (nodemask) | ||
3248 | pr_cont("%*pbl\n", nodemask_pr_args(nodemask)); | ||
3249 | else | ||
3250 | pr_cont("(null)\n"); | ||
3251 | |||
3252 | cpuset_print_current_mems_allowed(); | 3290 | cpuset_print_current_mems_allowed(); |
3253 | 3291 | ||
3254 | dump_stack(); | 3292 | dump_stack(); |
@@ -3868,8 +3906,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3868 | enum compact_result compact_result; | 3906 | enum compact_result compact_result; |
3869 | int compaction_retries; | 3907 | int compaction_retries; |
3870 | int no_progress_loops; | 3908 | int no_progress_loops; |
3871 | unsigned long alloc_start = jiffies; | ||
3872 | unsigned int stall_timeout = 10 * HZ; | ||
3873 | unsigned int cpuset_mems_cookie; | 3909 | unsigned int cpuset_mems_cookie; |
3874 | int reserve_flags; | 3910 | int reserve_flags; |
3875 | 3911 | ||
@@ -4001,14 +4037,6 @@ retry: | |||
4001 | if (!can_direct_reclaim) | 4037 | if (!can_direct_reclaim) |
4002 | goto nopage; | 4038 | goto nopage; |
4003 | 4039 | ||
4004 | /* Make sure we know about allocations which stall for too long */ | ||
4005 | if (time_after(jiffies, alloc_start + stall_timeout)) { | ||
4006 | warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, | ||
4007 | "page allocation stalls for %ums, order:%u", | ||
4008 | jiffies_to_msecs(jiffies-alloc_start), order); | ||
4009 | stall_timeout += 10 * HZ; | ||
4010 | } | ||
4011 | |||
4012 | /* Avoid recursion of direct reclaim */ | 4040 | /* Avoid recursion of direct reclaim */ |
4013 | if (current->flags & PF_MEMALLOC) | 4041 | if (current->flags & PF_MEMALLOC) |
4014 | goto nopage; | 4042 | goto nopage; |
@@ -4223,9 +4251,6 @@ out: | |||
4223 | page = NULL; | 4251 | page = NULL; |
4224 | } | 4252 | } |
4225 | 4253 | ||
4226 | if (kmemcheck_enabled && page) | ||
4227 | kmemcheck_pagealloc_alloc(page, order, gfp_mask); | ||
4228 | |||
4229 | trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); | 4254 | trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); |
4230 | 4255 | ||
4231 | return page; | 4256 | return page; |
@@ -4262,7 +4287,7 @@ void __free_pages(struct page *page, unsigned int order) | |||
4262 | { | 4287 | { |
4263 | if (put_page_testzero(page)) { | 4288 | if (put_page_testzero(page)) { |
4264 | if (order == 0) | 4289 | if (order == 0) |
4265 | free_hot_cold_page(page, false); | 4290 | free_unref_page(page); |
4266 | else | 4291 | else |
4267 | __free_pages_ok(page, order); | 4292 | __free_pages_ok(page, order); |
4268 | } | 4293 | } |
@@ -4320,7 +4345,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) | |||
4320 | unsigned int order = compound_order(page); | 4345 | unsigned int order = compound_order(page); |
4321 | 4346 | ||
4322 | if (order == 0) | 4347 | if (order == 0) |
4323 | free_hot_cold_page(page, false); | 4348 | free_unref_page(page); |
4324 | else | 4349 | else |
4325 | __free_pages_ok(page, order); | 4350 | __free_pages_ok(page, order); |
4326 | } | 4351 | } |
@@ -6126,6 +6151,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
6126 | } | 6151 | } |
6127 | } | 6152 | } |
6128 | 6153 | ||
6154 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6129 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | 6155 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) |
6130 | { | 6156 | { |
6131 | unsigned long __maybe_unused start = 0; | 6157 | unsigned long __maybe_unused start = 0; |
@@ -6135,7 +6161,6 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6135 | if (!pgdat->node_spanned_pages) | 6161 | if (!pgdat->node_spanned_pages) |
6136 | return; | 6162 | return; |
6137 | 6163 | ||
6138 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6139 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); | 6164 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); |
6140 | offset = pgdat->node_start_pfn - start; | 6165 | offset = pgdat->node_start_pfn - start; |
6141 | /* ia64 gets its own node_mem_map, before this, without bootmem */ | 6166 | /* ia64 gets its own node_mem_map, before this, without bootmem */ |
@@ -6157,6 +6182,9 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6157 | pgdat->node_id); | 6182 | pgdat->node_id); |
6158 | pgdat->node_mem_map = map + offset; | 6183 | pgdat->node_mem_map = map + offset; |
6159 | } | 6184 | } |
6185 | pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", | ||
6186 | __func__, pgdat->node_id, (unsigned long)pgdat, | ||
6187 | (unsigned long)pgdat->node_mem_map); | ||
6160 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 6188 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
6161 | /* | 6189 | /* |
6162 | * With no DISCONTIG, the global mem_map is just set as node 0's | 6190 | * With no DISCONTIG, the global mem_map is just set as node 0's |
@@ -6169,8 +6197,10 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6169 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 6197 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
6170 | } | 6198 | } |
6171 | #endif | 6199 | #endif |
6172 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
6173 | } | 6200 | } |
6201 | #else | ||
6202 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { } | ||
6203 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
6174 | 6204 | ||
6175 | void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | 6205 | void __paginginit free_area_init_node(int nid, unsigned long *zones_size, |
6176 | unsigned long node_start_pfn, unsigned long *zholes_size) | 6206 | unsigned long node_start_pfn, unsigned long *zholes_size) |
@@ -6197,16 +6227,49 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
6197 | zones_size, zholes_size); | 6227 | zones_size, zholes_size); |
6198 | 6228 | ||
6199 | alloc_node_mem_map(pgdat); | 6229 | alloc_node_mem_map(pgdat); |
6200 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6201 | printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n", | ||
6202 | nid, (unsigned long)pgdat, | ||
6203 | (unsigned long)pgdat->node_mem_map); | ||
6204 | #endif | ||
6205 | 6230 | ||
6206 | reset_deferred_meminit(pgdat); | 6231 | reset_deferred_meminit(pgdat); |
6207 | free_area_init_core(pgdat); | 6232 | free_area_init_core(pgdat); |
6208 | } | 6233 | } |
6209 | 6234 | ||
6235 | #ifdef CONFIG_HAVE_MEMBLOCK | ||
6236 | /* | ||
6237 | * Only struct pages that are backed by physical memory are zeroed and | ||
6238 | * initialized by going through __init_single_page(). But, there are some | ||
6239 | * struct pages which are reserved in memblock allocator and their fields | ||
6240 | * may be accessed (for example page_to_pfn() on some configuration accesses | ||
6241 | * flags). We must explicitly zero those struct pages. | ||
6242 | */ | ||
6243 | void __paginginit zero_resv_unavail(void) | ||
6244 | { | ||
6245 | phys_addr_t start, end; | ||
6246 | unsigned long pfn; | ||
6247 | u64 i, pgcnt; | ||
6248 | |||
6249 | /* | ||
6250 | * Loop through ranges that are reserved, but do not have reported | ||
6251 | * physical memory backing. | ||
6252 | */ | ||
6253 | pgcnt = 0; | ||
6254 | for_each_resv_unavail_range(i, &start, &end) { | ||
6255 | for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { | ||
6256 | mm_zero_struct_page(pfn_to_page(pfn)); | ||
6257 | pgcnt++; | ||
6258 | } | ||
6259 | } | ||
6260 | |||
6261 | /* | ||
6262 | * Struct pages that do not have backing memory. This could be because | ||
6263 | * firmware is using some of this memory, or for some other reasons. | ||
6264 | * Once memblock is changed so such behaviour is not allowed: i.e. | ||
6265 | * list of "reserved" memory must be a subset of list of "memory", then | ||
6266 | * this code can be removed. | ||
6267 | */ | ||
6268 | if (pgcnt) | ||
6269 | pr_info("Reserved but unavailable: %lld pages", pgcnt); | ||
6270 | } | ||
6271 | #endif /* CONFIG_HAVE_MEMBLOCK */ | ||
6272 | |||
6210 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 6273 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
6211 | 6274 | ||
6212 | #if MAX_NUMNODES > 1 | 6275 | #if MAX_NUMNODES > 1 |
@@ -6630,6 +6693,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
6630 | node_set_state(nid, N_MEMORY); | 6693 | node_set_state(nid, N_MEMORY); |
6631 | check_for_memory(pgdat, nid); | 6694 | check_for_memory(pgdat, nid); |
6632 | } | 6695 | } |
6696 | zero_resv_unavail(); | ||
6633 | } | 6697 | } |
6634 | 6698 | ||
6635 | static int __init cmdline_parse_core(char *p, unsigned long *core) | 6699 | static int __init cmdline_parse_core(char *p, unsigned long *core) |
@@ -6793,6 +6857,7 @@ void __init free_area_init(unsigned long *zones_size) | |||
6793 | { | 6857 | { |
6794 | free_area_init_node(0, zones_size, | 6858 | free_area_init_node(0, zones_size, |
6795 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); | 6859 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); |
6860 | zero_resv_unavail(); | ||
6796 | } | 6861 | } |
6797 | 6862 | ||
6798 | static int page_alloc_cpu_dead(unsigned int cpu) | 6863 | static int page_alloc_cpu_dead(unsigned int cpu) |
@@ -7305,18 +7370,17 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
7305 | 7370 | ||
7306 | log2qty = ilog2(numentries); | 7371 | log2qty = ilog2(numentries); |
7307 | 7372 | ||
7308 | /* | ||
7309 | * memblock allocator returns zeroed memory already, so HASH_ZERO is | ||
7310 | * currently not used when HASH_EARLY is specified. | ||
7311 | */ | ||
7312 | gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; | 7373 | gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; |
7313 | do { | 7374 | do { |
7314 | size = bucketsize << log2qty; | 7375 | size = bucketsize << log2qty; |
7315 | if (flags & HASH_EARLY) | 7376 | if (flags & HASH_EARLY) { |
7316 | table = memblock_virt_alloc_nopanic(size, 0); | 7377 | if (flags & HASH_ZERO) |
7317 | else if (hashdist) | 7378 | table = memblock_virt_alloc_nopanic(size, 0); |
7379 | else | ||
7380 | table = memblock_virt_alloc_raw(size, 0); | ||
7381 | } else if (hashdist) { | ||
7318 | table = __vmalloc(size, gfp_flags, PAGE_KERNEL); | 7382 | table = __vmalloc(size, gfp_flags, PAGE_KERNEL); |
7319 | else { | 7383 | } else { |
7320 | /* | 7384 | /* |
7321 | * If bucketsize is not a power-of-two, we may free | 7385 | * If bucketsize is not a power-of-two, we may free |
7322 | * some pages at the end of hash table which | 7386 | * some pages at the end of hash table which |
@@ -7353,10 +7417,10 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
7353 | * race condition. So you can't expect this function should be exact. | 7417 | * race condition. So you can't expect this function should be exact. |
7354 | */ | 7418 | */ |
7355 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | 7419 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
7420 | int migratetype, | ||
7356 | bool skip_hwpoisoned_pages) | 7421 | bool skip_hwpoisoned_pages) |
7357 | { | 7422 | { |
7358 | unsigned long pfn, iter, found; | 7423 | unsigned long pfn, iter, found; |
7359 | int mt; | ||
7360 | 7424 | ||
7361 | /* | 7425 | /* |
7362 | * For avoiding noise data, lru_add_drain_all() should be called | 7426 | * For avoiding noise data, lru_add_drain_all() should be called |
@@ -7364,8 +7428,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7364 | */ | 7428 | */ |
7365 | if (zone_idx(zone) == ZONE_MOVABLE) | 7429 | if (zone_idx(zone) == ZONE_MOVABLE) |
7366 | return false; | 7430 | return false; |
7367 | mt = get_pageblock_migratetype(page); | 7431 | |
7368 | if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) | 7432 | /* |
7433 | * CMA allocations (alloc_contig_range) really need to mark isolate | ||
7434 | * CMA pageblocks even when they are not movable in fact so consider | ||
7435 | * them movable here. | ||
7436 | */ | ||
7437 | if (is_migrate_cma(migratetype) && | ||
7438 | is_migrate_cma(get_pageblock_migratetype(page))) | ||
7369 | return false; | 7439 | return false; |
7370 | 7440 | ||
7371 | pfn = page_to_pfn(page); | 7441 | pfn = page_to_pfn(page); |
@@ -7377,6 +7447,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7377 | 7447 | ||
7378 | page = pfn_to_page(check); | 7448 | page = pfn_to_page(check); |
7379 | 7449 | ||
7450 | if (PageReserved(page)) | ||
7451 | return true; | ||
7452 | |||
7380 | /* | 7453 | /* |
7381 | * Hugepages are not in LRU lists, but they're movable. | 7454 | * Hugepages are not in LRU lists, but they're movable. |
7382 | * We need not scan over tail pages bacause we don't | 7455 | * We need not scan over tail pages bacause we don't |
@@ -7450,7 +7523,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
7450 | if (!zone_spans_pfn(zone, pfn)) | 7523 | if (!zone_spans_pfn(zone, pfn)) |
7451 | return false; | 7524 | return false; |
7452 | 7525 | ||
7453 | return !has_unmovable_pages(zone, page, 0, true); | 7526 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); |
7454 | } | 7527 | } |
7455 | 7528 | ||
7456 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) | 7529 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) |
diff --git a/mm/page_ext.c b/mm/page_ext.c index 4f0367d472c4..2c16216c29b6 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c | |||
@@ -125,7 +125,6 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
125 | struct page_ext *base; | 125 | struct page_ext *base; |
126 | 126 | ||
127 | base = NODE_DATA(page_to_nid(page))->node_page_ext; | 127 | base = NODE_DATA(page_to_nid(page))->node_page_ext; |
128 | #if defined(CONFIG_DEBUG_VM) | ||
129 | /* | 128 | /* |
130 | * The sanity checks the page allocator does upon freeing a | 129 | * The sanity checks the page allocator does upon freeing a |
131 | * page can reach here before the page_ext arrays are | 130 | * page can reach here before the page_ext arrays are |
@@ -134,7 +133,6 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
134 | */ | 133 | */ |
135 | if (unlikely(!base)) | 134 | if (unlikely(!base)) |
136 | return NULL; | 135 | return NULL; |
137 | #endif | ||
138 | index = pfn - round_down(node_start_pfn(page_to_nid(page)), | 136 | index = pfn - round_down(node_start_pfn(page_to_nid(page)), |
139 | MAX_ORDER_NR_PAGES); | 137 | MAX_ORDER_NR_PAGES); |
140 | return get_entry(base, index); | 138 | return get_entry(base, index); |
@@ -199,7 +197,6 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
199 | { | 197 | { |
200 | unsigned long pfn = page_to_pfn(page); | 198 | unsigned long pfn = page_to_pfn(page); |
201 | struct mem_section *section = __pfn_to_section(pfn); | 199 | struct mem_section *section = __pfn_to_section(pfn); |
202 | #if defined(CONFIG_DEBUG_VM) | ||
203 | /* | 200 | /* |
204 | * The sanity checks the page allocator does upon freeing a | 201 | * The sanity checks the page allocator does upon freeing a |
205 | * page can reach here before the page_ext arrays are | 202 | * page can reach here before the page_ext arrays are |
@@ -208,7 +205,6 @@ struct page_ext *lookup_page_ext(struct page *page) | |||
208 | */ | 205 | */ |
209 | if (!section->page_ext) | 206 | if (!section->page_ext) |
210 | return NULL; | 207 | return NULL; |
211 | #endif | ||
212 | return get_entry(section->page_ext, pfn); | 208 | return get_entry(section->page_ext, pfn); |
213 | } | 209 | } |
214 | 210 | ||
diff --git a/mm/page_io.c b/mm/page_io.c index cd52b9cc169b..e93f1a4cacd7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -347,7 +347,7 @@ out: | |||
347 | return ret; | 347 | return ret; |
348 | } | 348 | } |
349 | 349 | ||
350 | int swap_readpage(struct page *page, bool do_poll) | 350 | int swap_readpage(struct page *page, bool synchronous) |
351 | { | 351 | { |
352 | struct bio *bio; | 352 | struct bio *bio; |
353 | int ret = 0; | 353 | int ret = 0; |
@@ -355,7 +355,7 @@ int swap_readpage(struct page *page, bool do_poll) | |||
355 | blk_qc_t qc; | 355 | blk_qc_t qc; |
356 | struct gendisk *disk; | 356 | struct gendisk *disk; |
357 | 357 | ||
358 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); | 358 | VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); |
359 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 359 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
360 | VM_BUG_ON_PAGE(PageUptodate(page), page); | 360 | VM_BUG_ON_PAGE(PageUptodate(page), page); |
361 | if (frontswap_load(page) == 0) { | 361 | if (frontswap_load(page) == 0) { |
@@ -403,7 +403,7 @@ int swap_readpage(struct page *page, bool do_poll) | |||
403 | count_vm_event(PSWPIN); | 403 | count_vm_event(PSWPIN); |
404 | bio_get(bio); | 404 | bio_get(bio); |
405 | qc = submit_bio(bio); | 405 | qc = submit_bio(bio); |
406 | while (do_poll) { | 406 | while (synchronous) { |
407 | set_current_state(TASK_UNINTERRUPTIBLE); | 407 | set_current_state(TASK_UNINTERRUPTIBLE); |
408 | if (!READ_ONCE(bio->bi_private)) | 408 | if (!READ_ONCE(bio->bi_private)) |
409 | break; | 409 | break; |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 44f213935bf6..165ed8117bd1 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #define CREATE_TRACE_POINTS | 15 | #define CREATE_TRACE_POINTS |
16 | #include <trace/events/page_isolation.h> | 16 | #include <trace/events/page_isolation.h> |
17 | 17 | ||
18 | static int set_migratetype_isolate(struct page *page, | 18 | static int set_migratetype_isolate(struct page *page, int migratetype, |
19 | bool skip_hwpoisoned_pages) | 19 | bool skip_hwpoisoned_pages) |
20 | { | 20 | { |
21 | struct zone *zone; | 21 | struct zone *zone; |
@@ -52,7 +52,7 @@ static int set_migratetype_isolate(struct page *page, | |||
52 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. | 52 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. |
53 | * We just check MOVABLE pages. | 53 | * We just check MOVABLE pages. |
54 | */ | 54 | */ |
55 | if (!has_unmovable_pages(zone, page, arg.pages_found, | 55 | if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, |
56 | skip_hwpoisoned_pages)) | 56 | skip_hwpoisoned_pages)) |
57 | ret = 0; | 57 | ret = 0; |
58 | 58 | ||
@@ -64,14 +64,14 @@ static int set_migratetype_isolate(struct page *page, | |||
64 | out: | 64 | out: |
65 | if (!ret) { | 65 | if (!ret) { |
66 | unsigned long nr_pages; | 66 | unsigned long nr_pages; |
67 | int migratetype = get_pageblock_migratetype(page); | 67 | int mt = get_pageblock_migratetype(page); |
68 | 68 | ||
69 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 69 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
70 | zone->nr_isolate_pageblock++; | 70 | zone->nr_isolate_pageblock++; |
71 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, | 71 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, |
72 | NULL); | 72 | NULL); |
73 | 73 | ||
74 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); | 74 | __mod_zone_freepage_state(zone, -nr_pages, mt); |
75 | } | 75 | } |
76 | 76 | ||
77 | spin_unlock_irqrestore(&zone->lock, flags); | 77 | spin_unlock_irqrestore(&zone->lock, flags); |
@@ -183,7 +183,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, | |||
183 | pfn += pageblock_nr_pages) { | 183 | pfn += pageblock_nr_pages) { |
184 | page = __first_valid_page(pfn, pageblock_nr_pages); | 184 | page = __first_valid_page(pfn, pageblock_nr_pages); |
185 | if (page && | 185 | if (page && |
186 | set_migratetype_isolate(page, skip_hwpoisoned_pages)) { | 186 | set_migratetype_isolate(page, migratetype, skip_hwpoisoned_pages)) { |
187 | undo_pfn = pfn; | 187 | undo_pfn = pfn; |
188 | goto undo; | 188 | goto undo; |
189 | } | 189 | } |
diff --git a/mm/page_owner.c b/mm/page_owner.c index 4f44b95b9d1e..8592543a0f15 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c | |||
@@ -20,9 +20,9 @@ | |||
20 | #define PAGE_OWNER_STACK_DEPTH (16) | 20 | #define PAGE_OWNER_STACK_DEPTH (16) |
21 | 21 | ||
22 | struct page_owner { | 22 | struct page_owner { |
23 | unsigned int order; | 23 | unsigned short order; |
24 | short last_migrate_reason; | ||
24 | gfp_t gfp_mask; | 25 | gfp_t gfp_mask; |
25 | int last_migrate_reason; | ||
26 | depot_stack_handle_t handle; | 26 | depot_stack_handle_t handle; |
27 | }; | 27 | }; |
28 | 28 | ||
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 15dab691ea70..9158e5a81391 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -81,7 +81,7 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, | |||
81 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | 81 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
82 | struct page **pages, int page_start, int page_end) | 82 | struct page **pages, int page_start, int page_end) |
83 | { | 83 | { |
84 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | 84 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; |
85 | unsigned int cpu, tcpu; | 85 | unsigned int cpu, tcpu; |
86 | int i; | 86 | int i; |
87 | 87 | ||
@@ -899,7 +899,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
899 | mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); | 899 | mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); |
900 | 900 | ||
901 | while (page_vma_mapped_walk(&pvmw)) { | 901 | while (page_vma_mapped_walk(&pvmw)) { |
902 | unsigned long cstart, cend; | 902 | unsigned long cstart; |
903 | int ret = 0; | 903 | int ret = 0; |
904 | 904 | ||
905 | cstart = address = pvmw.address; | 905 | cstart = address = pvmw.address; |
@@ -915,7 +915,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
915 | entry = pte_wrprotect(entry); | 915 | entry = pte_wrprotect(entry); |
916 | entry = pte_mkclean(entry); | 916 | entry = pte_mkclean(entry); |
917 | set_pte_at(vma->vm_mm, address, pte, entry); | 917 | set_pte_at(vma->vm_mm, address, pte, entry); |
918 | cend = cstart + PAGE_SIZE; | ||
919 | ret = 1; | 918 | ret = 1; |
920 | } else { | 919 | } else { |
921 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE | 920 | #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE |
@@ -931,7 +930,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
931 | entry = pmd_mkclean(entry); | 930 | entry = pmd_mkclean(entry); |
932 | set_pmd_at(vma->vm_mm, address, pmd, entry); | 931 | set_pmd_at(vma->vm_mm, address, pmd, entry); |
933 | cstart &= PMD_MASK; | 932 | cstart &= PMD_MASK; |
934 | cend = cstart + PMD_SIZE; | ||
935 | ret = 1; | 933 | ret = 1; |
936 | #else | 934 | #else |
937 | /* unexpected pmd-mapped page? */ | 935 | /* unexpected pmd-mapped page? */ |
@@ -939,10 +937,15 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
939 | #endif | 937 | #endif |
940 | } | 938 | } |
941 | 939 | ||
942 | if (ret) { | 940 | /* |
943 | mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend); | 941 | * No need to call mmu_notifier_invalidate_range() as we are |
942 | * downgrading page table protection not changing it to point | ||
943 | * to a new page. | ||
944 | * | ||
945 | * See Documentation/vm/mmu_notifier.txt | ||
946 | */ | ||
947 | if (ret) | ||
944 | (*cleaned)++; | 948 | (*cleaned)++; |
945 | } | ||
946 | } | 949 | } |
947 | 950 | ||
948 | mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); | 951 | mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
@@ -1318,7 +1321,7 @@ void page_remove_rmap(struct page *page, bool compound) | |||
1318 | * It would be tidy to reset the PageAnon mapping here, | 1321 | * It would be tidy to reset the PageAnon mapping here, |
1319 | * but that might overwrite a racing page_add_anon_rmap | 1322 | * but that might overwrite a racing page_add_anon_rmap |
1320 | * which increments mapcount after us but sets mapping | 1323 | * which increments mapcount after us but sets mapping |
1321 | * before us: so leave the reset to free_hot_cold_page, | 1324 | * before us: so leave the reset to free_unref_page, |
1322 | * and remember that it's only reliable while mapped. | 1325 | * and remember that it's only reliable while mapped. |
1323 | * Leaving it set also helps swapoff to reinstate ptes | 1326 | * Leaving it set also helps swapoff to reinstate ptes |
1324 | * faster for those pages still in swapcache. | 1327 | * faster for those pages still in swapcache. |
@@ -1426,6 +1429,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1426 | if (pte_soft_dirty(pteval)) | 1429 | if (pte_soft_dirty(pteval)) |
1427 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1430 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1428 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); | 1431 | set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); |
1432 | /* | ||
1433 | * No need to invalidate here it will synchronize on | ||
1434 | * against the special swap migration pte. | ||
1435 | */ | ||
1429 | goto discard; | 1436 | goto discard; |
1430 | } | 1437 | } |
1431 | 1438 | ||
@@ -1483,6 +1490,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1483 | * will take care of the rest. | 1490 | * will take care of the rest. |
1484 | */ | 1491 | */ |
1485 | dec_mm_counter(mm, mm_counter(page)); | 1492 | dec_mm_counter(mm, mm_counter(page)); |
1493 | /* We have to invalidate as we cleared the pte */ | ||
1494 | mmu_notifier_invalidate_range(mm, address, | ||
1495 | address + PAGE_SIZE); | ||
1486 | } else if (IS_ENABLED(CONFIG_MIGRATION) && | 1496 | } else if (IS_ENABLED(CONFIG_MIGRATION) && |
1487 | (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) { | 1497 | (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) { |
1488 | swp_entry_t entry; | 1498 | swp_entry_t entry; |
@@ -1498,6 +1508,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1498 | if (pte_soft_dirty(pteval)) | 1508 | if (pte_soft_dirty(pteval)) |
1499 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1509 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1500 | set_pte_at(mm, address, pvmw.pte, swp_pte); | 1510 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
1511 | /* | ||
1512 | * No need to invalidate here it will synchronize on | ||
1513 | * against the special swap migration pte. | ||
1514 | */ | ||
1501 | } else if (PageAnon(page)) { | 1515 | } else if (PageAnon(page)) { |
1502 | swp_entry_t entry = { .val = page_private(subpage) }; | 1516 | swp_entry_t entry = { .val = page_private(subpage) }; |
1503 | pte_t swp_pte; | 1517 | pte_t swp_pte; |
@@ -1509,6 +1523,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1509 | WARN_ON_ONCE(1); | 1523 | WARN_ON_ONCE(1); |
1510 | ret = false; | 1524 | ret = false; |
1511 | /* We have to invalidate as we cleared the pte */ | 1525 | /* We have to invalidate as we cleared the pte */ |
1526 | mmu_notifier_invalidate_range(mm, address, | ||
1527 | address + PAGE_SIZE); | ||
1512 | page_vma_mapped_walk_done(&pvmw); | 1528 | page_vma_mapped_walk_done(&pvmw); |
1513 | break; | 1529 | break; |
1514 | } | 1530 | } |
@@ -1516,6 +1532,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1516 | /* MADV_FREE page check */ | 1532 | /* MADV_FREE page check */ |
1517 | if (!PageSwapBacked(page)) { | 1533 | if (!PageSwapBacked(page)) { |
1518 | if (!PageDirty(page)) { | 1534 | if (!PageDirty(page)) { |
1535 | /* Invalidate as we cleared the pte */ | ||
1536 | mmu_notifier_invalidate_range(mm, | ||
1537 | address, address + PAGE_SIZE); | ||
1519 | dec_mm_counter(mm, MM_ANONPAGES); | 1538 | dec_mm_counter(mm, MM_ANONPAGES); |
1520 | goto discard; | 1539 | goto discard; |
1521 | } | 1540 | } |
@@ -1549,13 +1568,39 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1549 | if (pte_soft_dirty(pteval)) | 1568 | if (pte_soft_dirty(pteval)) |
1550 | swp_pte = pte_swp_mksoft_dirty(swp_pte); | 1569 | swp_pte = pte_swp_mksoft_dirty(swp_pte); |
1551 | set_pte_at(mm, address, pvmw.pte, swp_pte); | 1570 | set_pte_at(mm, address, pvmw.pte, swp_pte); |
1552 | } else | 1571 | /* Invalidate as we cleared the pte */ |
1572 | mmu_notifier_invalidate_range(mm, address, | ||
1573 | address + PAGE_SIZE); | ||
1574 | } else { | ||
1575 | /* | ||
1576 | * We should not need to notify here as we reach this | ||
1577 | * case only from freeze_page() itself only call from | ||
1578 | * split_huge_page_to_list() so everything below must | ||
1579 | * be true: | ||
1580 | * - page is not anonymous | ||
1581 | * - page is locked | ||
1582 | * | ||
1583 | * So as it is a locked file back page thus it can not | ||
1584 | * be remove from the page cache and replace by a new | ||
1585 | * page before mmu_notifier_invalidate_range_end so no | ||
1586 | * concurrent thread might update its page table to | ||
1587 | * point at new page while a device still is using this | ||
1588 | * page. | ||
1589 | * | ||
1590 | * See Documentation/vm/mmu_notifier.txt | ||
1591 | */ | ||
1553 | dec_mm_counter(mm, mm_counter_file(page)); | 1592 | dec_mm_counter(mm, mm_counter_file(page)); |
1593 | } | ||
1554 | discard: | 1594 | discard: |
1595 | /* | ||
1596 | * No need to call mmu_notifier_invalidate_range() it has be | ||
1597 | * done above for all cases requiring it to happen under page | ||
1598 | * table lock before mmu_notifier_invalidate_range_end() | ||
1599 | * | ||
1600 | * See Documentation/vm/mmu_notifier.txt | ||
1601 | */ | ||
1555 | page_remove_rmap(subpage, PageHuge(page)); | 1602 | page_remove_rmap(subpage, PageHuge(page)); |
1556 | put_page(page); | 1603 | put_page(page); |
1557 | mmu_notifier_invalidate_range(mm, address, | ||
1558 | address + PAGE_SIZE); | ||
1559 | } | 1604 | } |
1560 | 1605 | ||
1561 | mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); | 1606 | mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); |
diff --git a/mm/shmem.c b/mm/shmem.c index 07a1d22807be..ab22eaa2412e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -338,7 +338,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping, | |||
338 | if (item != expected) | 338 | if (item != expected) |
339 | return -ENOENT; | 339 | return -ENOENT; |
340 | __radix_tree_replace(&mapping->page_tree, node, pslot, | 340 | __radix_tree_replace(&mapping->page_tree, node, pslot, |
341 | replacement, NULL, NULL); | 341 | replacement, NULL); |
342 | return 0; | 342 | return 0; |
343 | } | 343 | } |
344 | 344 | ||
@@ -747,7 +747,7 @@ void shmem_unlock_mapping(struct address_space *mapping) | |||
747 | pgoff_t indices[PAGEVEC_SIZE]; | 747 | pgoff_t indices[PAGEVEC_SIZE]; |
748 | pgoff_t index = 0; | 748 | pgoff_t index = 0; |
749 | 749 | ||
750 | pagevec_init(&pvec, 0); | 750 | pagevec_init(&pvec); |
751 | /* | 751 | /* |
752 | * Minor point, but we might as well stop if someone else SHM_LOCKs it. | 752 | * Minor point, but we might as well stop if someone else SHM_LOCKs it. |
753 | */ | 753 | */ |
@@ -790,7 +790,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
790 | if (lend == -1) | 790 | if (lend == -1) |
791 | end = -1; /* unsigned, so actually very big */ | 791 | end = -1; /* unsigned, so actually very big */ |
792 | 792 | ||
793 | pagevec_init(&pvec, 0); | 793 | pagevec_init(&pvec); |
794 | index = start; | 794 | index = start; |
795 | while (index < end) { | 795 | while (index < end) { |
796 | pvec.nr = find_get_entries(mapping, index, | 796 | pvec.nr = find_get_entries(mapping, index, |
@@ -2528,7 +2528,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, | |||
2528 | bool done = false; | 2528 | bool done = false; |
2529 | int i; | 2529 | int i; |
2530 | 2530 | ||
2531 | pagevec_init(&pvec, 0); | 2531 | pagevec_init(&pvec); |
2532 | pvec.nr = 1; /* start small: we may be there already */ | 2532 | pvec.nr = 1; /* start small: we may be there already */ |
2533 | while (!done) { | 2533 | while (!done) { |
2534 | pvec.nr = find_get_entries(mapping, index, | 2534 | pvec.nr = find_get_entries(mapping, index, |
@@ -3862,12 +3862,11 @@ static void shmem_init_inode(void *foo) | |||
3862 | inode_init_once(&info->vfs_inode); | 3862 | inode_init_once(&info->vfs_inode); |
3863 | } | 3863 | } |
3864 | 3864 | ||
3865 | static int shmem_init_inodecache(void) | 3865 | static void shmem_init_inodecache(void) |
3866 | { | 3866 | { |
3867 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", | 3867 | shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", |
3868 | sizeof(struct shmem_inode_info), | 3868 | sizeof(struct shmem_inode_info), |
3869 | 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); | 3869 | 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); |
3870 | return 0; | ||
3871 | } | 3870 | } |
3872 | 3871 | ||
3873 | static void shmem_destroy_inodecache(void) | 3872 | static void shmem_destroy_inodecache(void) |
@@ -3991,9 +3990,7 @@ int __init shmem_init(void) | |||
3991 | if (shmem_inode_cachep) | 3990 | if (shmem_inode_cachep) |
3992 | return 0; | 3991 | return 0; |
3993 | 3992 | ||
3994 | error = shmem_init_inodecache(); | 3993 | shmem_init_inodecache(); |
3995 | if (error) | ||
3996 | goto out3; | ||
3997 | 3994 | ||
3998 | error = register_filesystem(&shmem_fs_type); | 3995 | error = register_filesystem(&shmem_fs_type); |
3999 | if (error) { | 3996 | if (error) { |
@@ -4020,7 +4017,6 @@ out1: | |||
4020 | unregister_filesystem(&shmem_fs_type); | 4017 | unregister_filesystem(&shmem_fs_type); |
4021 | out2: | 4018 | out2: |
4022 | shmem_destroy_inodecache(); | 4019 | shmem_destroy_inodecache(); |
4023 | out3: | ||
4024 | shm_mnt = ERR_PTR(error); | 4020 | shm_mnt = ERR_PTR(error); |
4025 | return error; | 4021 | return error; |
4026 | } | 4022 | } |
@@ -4102,6 +4098,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma) | |||
4102 | if (i_size >= HPAGE_PMD_SIZE && | 4098 | if (i_size >= HPAGE_PMD_SIZE && |
4103 | i_size >> PAGE_SHIFT >= off) | 4099 | i_size >> PAGE_SHIFT >= off) |
4104 | return true; | 4100 | return true; |
4101 | /* fall through */ | ||
4105 | case SHMEM_HUGE_ADVISE: | 4102 | case SHMEM_HUGE_ADVISE: |
4106 | /* TODO: implement fadvise() hints */ | 4103 | /* TODO: implement fadvise() hints */ |
4107 | return (vma->vm_flags & VM_HUGEPAGE); | 4104 | return (vma->vm_flags & VM_HUGEPAGE); |
@@ -114,7 +114,6 @@ | |||
114 | #include <linux/rtmutex.h> | 114 | #include <linux/rtmutex.h> |
115 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
116 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
117 | #include <linux/kmemcheck.h> | ||
118 | #include <linux/memory.h> | 117 | #include <linux/memory.h> |
119 | #include <linux/prefetch.h> | 118 | #include <linux/prefetch.h> |
120 | #include <linux/sched/task_stack.h> | 119 | #include <linux/sched/task_stack.h> |
@@ -252,8 +251,8 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) | |||
252 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 251 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
253 | } while (0) | 252 | } while (0) |
254 | 253 | ||
255 | #define CFLGS_OBJFREELIST_SLAB (0x40000000UL) | 254 | #define CFLGS_OBJFREELIST_SLAB ((slab_flags_t __force)0x40000000U) |
256 | #define CFLGS_OFF_SLAB (0x80000000UL) | 255 | #define CFLGS_OFF_SLAB ((slab_flags_t __force)0x80000000U) |
257 | #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB) | 256 | #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB) |
258 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 257 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
259 | 258 | ||
@@ -441,7 +440,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
441 | * Calculate the number of objects and left-over bytes for a given buffer size. | 440 | * Calculate the number of objects and left-over bytes for a given buffer size. |
442 | */ | 441 | */ |
443 | static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size, | 442 | static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size, |
444 | unsigned long flags, size_t *left_over) | 443 | slab_flags_t flags, size_t *left_over) |
445 | { | 444 | { |
446 | unsigned int num; | 445 | unsigned int num; |
447 | size_t slab_size = PAGE_SIZE << gfporder; | 446 | size_t slab_size = PAGE_SIZE << gfporder; |
@@ -1410,10 +1409,8 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
1410 | int nr_pages; | 1409 | int nr_pages; |
1411 | 1410 | ||
1412 | flags |= cachep->allocflags; | 1411 | flags |= cachep->allocflags; |
1413 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | ||
1414 | flags |= __GFP_RECLAIMABLE; | ||
1415 | 1412 | ||
1416 | page = __alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); | 1413 | page = __alloc_pages_node(nodeid, flags, cachep->gfporder); |
1417 | if (!page) { | 1414 | if (!page) { |
1418 | slab_out_of_memory(cachep, flags, nodeid); | 1415 | slab_out_of_memory(cachep, flags, nodeid); |
1419 | return NULL; | 1416 | return NULL; |
@@ -1435,15 +1432,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, | |||
1435 | if (sk_memalloc_socks() && page_is_pfmemalloc(page)) | 1432 | if (sk_memalloc_socks() && page_is_pfmemalloc(page)) |
1436 | SetPageSlabPfmemalloc(page); | 1433 | SetPageSlabPfmemalloc(page); |
1437 | 1434 | ||
1438 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | ||
1439 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); | ||
1440 | |||
1441 | if (cachep->ctor) | ||
1442 | kmemcheck_mark_uninitialized_pages(page, nr_pages); | ||
1443 | else | ||
1444 | kmemcheck_mark_unallocated_pages(page, nr_pages); | ||
1445 | } | ||
1446 | |||
1447 | return page; | 1435 | return page; |
1448 | } | 1436 | } |
1449 | 1437 | ||
@@ -1455,8 +1443,6 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) | |||
1455 | int order = cachep->gfporder; | 1443 | int order = cachep->gfporder; |
1456 | unsigned long nr_freed = (1 << order); | 1444 | unsigned long nr_freed = (1 << order); |
1457 | 1445 | ||
1458 | kmemcheck_free_shadow(page, order); | ||
1459 | |||
1460 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1446 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1461 | mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed); | 1447 | mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed); |
1462 | else | 1448 | else |
@@ -1761,7 +1747,7 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) | |||
1761 | * towards high-order requests, this should be changed. | 1747 | * towards high-order requests, this should be changed. |
1762 | */ | 1748 | */ |
1763 | static size_t calculate_slab_order(struct kmem_cache *cachep, | 1749 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
1764 | size_t size, unsigned long flags) | 1750 | size_t size, slab_flags_t flags) |
1765 | { | 1751 | { |
1766 | size_t left_over = 0; | 1752 | size_t left_over = 0; |
1767 | int gfporder; | 1753 | int gfporder; |
@@ -1888,8 +1874,8 @@ static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
1888 | return 0; | 1874 | return 0; |
1889 | } | 1875 | } |
1890 | 1876 | ||
1891 | unsigned long kmem_cache_flags(unsigned long object_size, | 1877 | slab_flags_t kmem_cache_flags(unsigned long object_size, |
1892 | unsigned long flags, const char *name, | 1878 | slab_flags_t flags, const char *name, |
1893 | void (*ctor)(void *)) | 1879 | void (*ctor)(void *)) |
1894 | { | 1880 | { |
1895 | return flags; | 1881 | return flags; |
@@ -1897,7 +1883,7 @@ unsigned long kmem_cache_flags(unsigned long object_size, | |||
1897 | 1883 | ||
1898 | struct kmem_cache * | 1884 | struct kmem_cache * |
1899 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 1885 | __kmem_cache_alias(const char *name, size_t size, size_t align, |
1900 | unsigned long flags, void (*ctor)(void *)) | 1886 | slab_flags_t flags, void (*ctor)(void *)) |
1901 | { | 1887 | { |
1902 | struct kmem_cache *cachep; | 1888 | struct kmem_cache *cachep; |
1903 | 1889 | ||
@@ -1915,7 +1901,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
1915 | } | 1901 | } |
1916 | 1902 | ||
1917 | static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, | 1903 | static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, |
1918 | size_t size, unsigned long flags) | 1904 | size_t size, slab_flags_t flags) |
1919 | { | 1905 | { |
1920 | size_t left; | 1906 | size_t left; |
1921 | 1907 | ||
@@ -1938,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, | |||
1938 | } | 1924 | } |
1939 | 1925 | ||
1940 | static bool set_off_slab_cache(struct kmem_cache *cachep, | 1926 | static bool set_off_slab_cache(struct kmem_cache *cachep, |
1941 | size_t size, unsigned long flags) | 1927 | size_t size, slab_flags_t flags) |
1942 | { | 1928 | { |
1943 | size_t left; | 1929 | size_t left; |
1944 | 1930 | ||
@@ -1972,7 +1958,7 @@ static bool set_off_slab_cache(struct kmem_cache *cachep, | |||
1972 | } | 1958 | } |
1973 | 1959 | ||
1974 | static bool set_on_slab_cache(struct kmem_cache *cachep, | 1960 | static bool set_on_slab_cache(struct kmem_cache *cachep, |
1975 | size_t size, unsigned long flags) | 1961 | size_t size, slab_flags_t flags) |
1976 | { | 1962 | { |
1977 | size_t left; | 1963 | size_t left; |
1978 | 1964 | ||
@@ -2008,8 +1994,7 @@ static bool set_on_slab_cache(struct kmem_cache *cachep, | |||
2008 | * cacheline. This can be beneficial if you're counting cycles as closely | 1994 | * cacheline. This can be beneficial if you're counting cycles as closely |
2009 | * as davem. | 1995 | * as davem. |
2010 | */ | 1996 | */ |
2011 | int | 1997 | int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) |
2012 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | ||
2013 | { | 1998 | { |
2014 | size_t ralign = BYTES_PER_WORD; | 1999 | size_t ralign = BYTES_PER_WORD; |
2015 | gfp_t gfp; | 2000 | gfp_t gfp; |
@@ -2144,6 +2129,8 @@ done: | |||
2144 | cachep->allocflags = __GFP_COMP; | 2129 | cachep->allocflags = __GFP_COMP; |
2145 | if (flags & SLAB_CACHE_DMA) | 2130 | if (flags & SLAB_CACHE_DMA) |
2146 | cachep->allocflags |= GFP_DMA; | 2131 | cachep->allocflags |= GFP_DMA; |
2132 | if (flags & SLAB_RECLAIM_ACCOUNT) | ||
2133 | cachep->allocflags |= __GFP_RECLAIMABLE; | ||
2147 | cachep->size = size; | 2134 | cachep->size = size; |
2148 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2135 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
2149 | 2136 | ||
@@ -3516,8 +3503,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, | |||
3516 | kmemleak_free_recursive(objp, cachep->flags); | 3503 | kmemleak_free_recursive(objp, cachep->flags); |
3517 | objp = cache_free_debugcheck(cachep, objp, caller); | 3504 | objp = cache_free_debugcheck(cachep, objp, caller); |
3518 | 3505 | ||
3519 | kmemcheck_slab_free(cachep, objp, cachep->object_size); | ||
3520 | |||
3521 | /* | 3506 | /* |
3522 | * Skip calling cache_free_alien() when the platform is not numa. | 3507 | * Skip calling cache_free_alien() when the platform is not numa. |
3523 | * This will avoid cache misses that happen while accessing slabp (which | 3508 | * This will avoid cache misses that happen while accessing slabp (which |
@@ -4097,7 +4082,6 @@ out: | |||
4097 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC)); | 4082 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC)); |
4098 | } | 4083 | } |
4099 | 4084 | ||
4100 | #ifdef CONFIG_SLABINFO | ||
4101 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | 4085 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) |
4102 | { | 4086 | { |
4103 | unsigned long active_objs, num_objs, active_slabs; | 4087 | unsigned long active_objs, num_objs, active_slabs; |
@@ -4405,7 +4389,6 @@ static int __init slab_proc_init(void) | |||
4405 | return 0; | 4389 | return 0; |
4406 | } | 4390 | } |
4407 | module_init(slab_proc_init); | 4391 | module_init(slab_proc_init); |
4408 | #endif | ||
4409 | 4392 | ||
4410 | #ifdef CONFIG_HARDENED_USERCOPY | 4393 | #ifdef CONFIG_HARDENED_USERCOPY |
4411 | /* | 4394 | /* |
@@ -21,7 +21,7 @@ struct kmem_cache { | |||
21 | unsigned int object_size;/* The original size of the object */ | 21 | unsigned int object_size;/* The original size of the object */ |
22 | unsigned int size; /* The aligned/padded/added on size */ | 22 | unsigned int size; /* The aligned/padded/added on size */ |
23 | unsigned int align; /* Alignment as calculated */ | 23 | unsigned int align; /* Alignment as calculated */ |
24 | unsigned long flags; /* Active flags on the slab */ | 24 | slab_flags_t flags; /* Active flags on the slab */ |
25 | const char *name; /* Slab name for sysfs */ | 25 | const char *name; /* Slab name for sysfs */ |
26 | int refcount; /* Use counter */ | 26 | int refcount; /* Use counter */ |
27 | void (*ctor)(void *); /* Called on object slot creation */ | 27 | void (*ctor)(void *); /* Called on object slot creation */ |
@@ -40,7 +40,6 @@ struct kmem_cache { | |||
40 | 40 | ||
41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
42 | #include <linux/fault-inject.h> | 42 | #include <linux/fault-inject.h> |
43 | #include <linux/kmemcheck.h> | ||
44 | #include <linux/kasan.h> | 43 | #include <linux/kasan.h> |
45 | #include <linux/kmemleak.h> | 44 | #include <linux/kmemleak.h> |
46 | #include <linux/random.h> | 45 | #include <linux/random.h> |
@@ -79,13 +78,13 @@ extern const struct kmalloc_info_struct { | |||
79 | unsigned long size; | 78 | unsigned long size; |
80 | } kmalloc_info[]; | 79 | } kmalloc_info[]; |
81 | 80 | ||
82 | unsigned long calculate_alignment(unsigned long flags, | 81 | unsigned long calculate_alignment(slab_flags_t flags, |
83 | unsigned long align, unsigned long size); | 82 | unsigned long align, unsigned long size); |
84 | 83 | ||
85 | #ifndef CONFIG_SLOB | 84 | #ifndef CONFIG_SLOB |
86 | /* Kmalloc array related functions */ | 85 | /* Kmalloc array related functions */ |
87 | void setup_kmalloc_cache_index_table(void); | 86 | void setup_kmalloc_cache_index_table(void); |
88 | void create_kmalloc_caches(unsigned long); | 87 | void create_kmalloc_caches(slab_flags_t); |
89 | 88 | ||
90 | /* Find the kmalloc slab corresponding for a certain size */ | 89 | /* Find the kmalloc slab corresponding for a certain size */ |
91 | struct kmem_cache *kmalloc_slab(size_t, gfp_t); | 90 | struct kmem_cache *kmalloc_slab(size_t, gfp_t); |
@@ -93,32 +92,32 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t); | |||
93 | 92 | ||
94 | 93 | ||
95 | /* Functions provided by the slab allocators */ | 94 | /* Functions provided by the slab allocators */ |
96 | extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); | 95 | int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); |
97 | 96 | ||
98 | extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, | 97 | extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, |
99 | unsigned long flags); | 98 | slab_flags_t flags); |
100 | extern void create_boot_cache(struct kmem_cache *, const char *name, | 99 | extern void create_boot_cache(struct kmem_cache *, const char *name, |
101 | size_t size, unsigned long flags); | 100 | size_t size, slab_flags_t flags); |
102 | 101 | ||
103 | int slab_unmergeable(struct kmem_cache *s); | 102 | int slab_unmergeable(struct kmem_cache *s); |
104 | struct kmem_cache *find_mergeable(size_t size, size_t align, | 103 | struct kmem_cache *find_mergeable(size_t size, size_t align, |
105 | unsigned long flags, const char *name, void (*ctor)(void *)); | 104 | slab_flags_t flags, const char *name, void (*ctor)(void *)); |
106 | #ifndef CONFIG_SLOB | 105 | #ifndef CONFIG_SLOB |
107 | struct kmem_cache * | 106 | struct kmem_cache * |
108 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 107 | __kmem_cache_alias(const char *name, size_t size, size_t align, |
109 | unsigned long flags, void (*ctor)(void *)); | 108 | slab_flags_t flags, void (*ctor)(void *)); |
110 | 109 | ||
111 | unsigned long kmem_cache_flags(unsigned long object_size, | 110 | slab_flags_t kmem_cache_flags(unsigned long object_size, |
112 | unsigned long flags, const char *name, | 111 | slab_flags_t flags, const char *name, |
113 | void (*ctor)(void *)); | 112 | void (*ctor)(void *)); |
114 | #else | 113 | #else |
115 | static inline struct kmem_cache * | 114 | static inline struct kmem_cache * |
116 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 115 | __kmem_cache_alias(const char *name, size_t size, size_t align, |
117 | unsigned long flags, void (*ctor)(void *)) | 116 | slab_flags_t flags, void (*ctor)(void *)) |
118 | { return NULL; } | 117 | { return NULL; } |
119 | 118 | ||
120 | static inline unsigned long kmem_cache_flags(unsigned long object_size, | 119 | static inline slab_flags_t kmem_cache_flags(unsigned long object_size, |
121 | unsigned long flags, const char *name, | 120 | slab_flags_t flags, const char *name, |
122 | void (*ctor)(void *)) | 121 | void (*ctor)(void *)) |
123 | { | 122 | { |
124 | return flags; | 123 | return flags; |
@@ -142,10 +141,10 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
142 | #if defined(CONFIG_SLAB) | 141 | #if defined(CONFIG_SLAB) |
143 | #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ | 142 | #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ |
144 | SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ | 143 | SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \ |
145 | SLAB_NOTRACK | SLAB_ACCOUNT) | 144 | SLAB_ACCOUNT) |
146 | #elif defined(CONFIG_SLUB) | 145 | #elif defined(CONFIG_SLUB) |
147 | #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ | 146 | #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ |
148 | SLAB_TEMPORARY | SLAB_NOTRACK | SLAB_ACCOUNT) | 147 | SLAB_TEMPORARY | SLAB_ACCOUNT) |
149 | #else | 148 | #else |
150 | #define SLAB_CACHE_FLAGS (0) | 149 | #define SLAB_CACHE_FLAGS (0) |
151 | #endif | 150 | #endif |
@@ -164,7 +163,6 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
164 | SLAB_NOLEAKTRACE | \ | 163 | SLAB_NOLEAKTRACE | \ |
165 | SLAB_RECLAIM_ACCOUNT | \ | 164 | SLAB_RECLAIM_ACCOUNT | \ |
166 | SLAB_TEMPORARY | \ | 165 | SLAB_TEMPORARY | \ |
167 | SLAB_NOTRACK | \ | ||
168 | SLAB_ACCOUNT) | 166 | SLAB_ACCOUNT) |
169 | 167 | ||
170 | int __kmem_cache_shutdown(struct kmem_cache *); | 168 | int __kmem_cache_shutdown(struct kmem_cache *); |
@@ -439,7 +437,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |||
439 | for (i = 0; i < size; i++) { | 437 | for (i = 0; i < size; i++) { |
440 | void *object = p[i]; | 438 | void *object = p[i]; |
441 | 439 | ||
442 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
443 | kmemleak_alloc_recursive(object, s->object_size, 1, | 440 | kmemleak_alloc_recursive(object, s->object_size, 1, |
444 | s->flags, flags); | 441 | s->flags, flags); |
445 | kasan_slab_alloc(s, object, flags); | 442 | kasan_slab_alloc(s, object, flags); |
@@ -506,6 +503,14 @@ void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos); | |||
506 | void memcg_slab_stop(struct seq_file *m, void *p); | 503 | void memcg_slab_stop(struct seq_file *m, void *p); |
507 | int memcg_slab_show(struct seq_file *m, void *p); | 504 | int memcg_slab_show(struct seq_file *m, void *p); |
508 | 505 | ||
506 | #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) | ||
507 | void dump_unreclaimable_slab(void); | ||
508 | #else | ||
509 | static inline void dump_unreclaimable_slab(void) | ||
510 | { | ||
511 | } | ||
512 | #endif | ||
513 | |||
509 | void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); | 514 | void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); |
510 | 515 | ||
511 | #ifdef CONFIG_SLAB_FREELIST_RANDOM | 516 | #ifdef CONFIG_SLAB_FREELIST_RANDOM |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 0d7fe71ff5e4..c8cb36774ba1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -44,7 +44,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, | |||
44 | SLAB_FAILSLAB | SLAB_KASAN) | 44 | SLAB_FAILSLAB | SLAB_KASAN) |
45 | 45 | ||
46 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ | 46 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ |
47 | SLAB_NOTRACK | SLAB_ACCOUNT) | 47 | SLAB_ACCOUNT) |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * Merge control. If this is set then no merging of slab caches will occur. | 50 | * Merge control. If this is set then no merging of slab caches will occur. |
@@ -291,7 +291,7 @@ int slab_unmergeable(struct kmem_cache *s) | |||
291 | } | 291 | } |
292 | 292 | ||
293 | struct kmem_cache *find_mergeable(size_t size, size_t align, | 293 | struct kmem_cache *find_mergeable(size_t size, size_t align, |
294 | unsigned long flags, const char *name, void (*ctor)(void *)) | 294 | slab_flags_t flags, const char *name, void (*ctor)(void *)) |
295 | { | 295 | { |
296 | struct kmem_cache *s; | 296 | struct kmem_cache *s; |
297 | 297 | ||
@@ -341,7 +341,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, | |||
341 | * Figure out what the alignment of the objects will be given a set of | 341 | * Figure out what the alignment of the objects will be given a set of |
342 | * flags, a user specified alignment and the size of the objects. | 342 | * flags, a user specified alignment and the size of the objects. |
343 | */ | 343 | */ |
344 | unsigned long calculate_alignment(unsigned long flags, | 344 | unsigned long calculate_alignment(slab_flags_t flags, |
345 | unsigned long align, unsigned long size) | 345 | unsigned long align, unsigned long size) |
346 | { | 346 | { |
347 | /* | 347 | /* |
@@ -366,7 +366,7 @@ unsigned long calculate_alignment(unsigned long flags, | |||
366 | 366 | ||
367 | static struct kmem_cache *create_cache(const char *name, | 367 | static struct kmem_cache *create_cache(const char *name, |
368 | size_t object_size, size_t size, size_t align, | 368 | size_t object_size, size_t size, size_t align, |
369 | unsigned long flags, void (*ctor)(void *), | 369 | slab_flags_t flags, void (*ctor)(void *), |
370 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) | 370 | struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
371 | { | 371 | { |
372 | struct kmem_cache *s; | 372 | struct kmem_cache *s; |
@@ -431,7 +431,7 @@ out_free_cache: | |||
431 | */ | 431 | */ |
432 | struct kmem_cache * | 432 | struct kmem_cache * |
433 | kmem_cache_create(const char *name, size_t size, size_t align, | 433 | kmem_cache_create(const char *name, size_t size, size_t align, |
434 | unsigned long flags, void (*ctor)(void *)) | 434 | slab_flags_t flags, void (*ctor)(void *)) |
435 | { | 435 | { |
436 | struct kmem_cache *s = NULL; | 436 | struct kmem_cache *s = NULL; |
437 | const char *cache_name; | 437 | const char *cache_name; |
@@ -879,7 +879,7 @@ bool slab_is_available(void) | |||
879 | #ifndef CONFIG_SLOB | 879 | #ifndef CONFIG_SLOB |
880 | /* Create a cache during boot when no slab services are available yet */ | 880 | /* Create a cache during boot when no slab services are available yet */ |
881 | void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, | 881 | void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, |
882 | unsigned long flags) | 882 | slab_flags_t flags) |
883 | { | 883 | { |
884 | int err; | 884 | int err; |
885 | 885 | ||
@@ -899,7 +899,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz | |||
899 | } | 899 | } |
900 | 900 | ||
901 | struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, | 901 | struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, |
902 | unsigned long flags) | 902 | slab_flags_t flags) |
903 | { | 903 | { |
904 | struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); | 904 | struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); |
905 | 905 | ||
@@ -1057,7 +1057,7 @@ void __init setup_kmalloc_cache_index_table(void) | |||
1057 | } | 1057 | } |
1058 | } | 1058 | } |
1059 | 1059 | ||
1060 | static void __init new_kmalloc_cache(int idx, unsigned long flags) | 1060 | static void __init new_kmalloc_cache(int idx, slab_flags_t flags) |
1061 | { | 1061 | { |
1062 | kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, | 1062 | kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, |
1063 | kmalloc_info[idx].size, flags); | 1063 | kmalloc_info[idx].size, flags); |
@@ -1068,7 +1068,7 @@ static void __init new_kmalloc_cache(int idx, unsigned long flags) | |||
1068 | * may already have been created because they were needed to | 1068 | * may already have been created because they were needed to |
1069 | * enable allocations for slab creation. | 1069 | * enable allocations for slab creation. |
1070 | */ | 1070 | */ |
1071 | void __init create_kmalloc_caches(unsigned long flags) | 1071 | void __init create_kmalloc_caches(slab_flags_t flags) |
1072 | { | 1072 | { |
1073 | int i; | 1073 | int i; |
1074 | 1074 | ||
@@ -1184,8 +1184,7 @@ void cache_random_seq_destroy(struct kmem_cache *cachep) | |||
1184 | } | 1184 | } |
1185 | #endif /* CONFIG_SLAB_FREELIST_RANDOM */ | 1185 | #endif /* CONFIG_SLAB_FREELIST_RANDOM */ |
1186 | 1186 | ||
1187 | #ifdef CONFIG_SLABINFO | 1187 | #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) |
1188 | |||
1189 | #ifdef CONFIG_SLAB | 1188 | #ifdef CONFIG_SLAB |
1190 | #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) | 1189 | #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) |
1191 | #else | 1190 | #else |
@@ -1281,7 +1280,41 @@ static int slab_show(struct seq_file *m, void *p) | |||
1281 | return 0; | 1280 | return 0; |
1282 | } | 1281 | } |
1283 | 1282 | ||
1284 | #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) | 1283 | void dump_unreclaimable_slab(void) |
1284 | { | ||
1285 | struct kmem_cache *s, *s2; | ||
1286 | struct slabinfo sinfo; | ||
1287 | |||
1288 | /* | ||
1289 | * Here acquiring slab_mutex is risky since we don't prefer to get | ||
1290 | * sleep in oom path. But, without mutex hold, it may introduce a | ||
1291 | * risk of crash. | ||
1292 | * Use mutex_trylock to protect the list traverse, dump nothing | ||
1293 | * without acquiring the mutex. | ||
1294 | */ | ||
1295 | if (!mutex_trylock(&slab_mutex)) { | ||
1296 | pr_warn("excessive unreclaimable slab but cannot dump stats\n"); | ||
1297 | return; | ||
1298 | } | ||
1299 | |||
1300 | pr_info("Unreclaimable slab info:\n"); | ||
1301 | pr_info("Name Used Total\n"); | ||
1302 | |||
1303 | list_for_each_entry_safe(s, s2, &slab_caches, list) { | ||
1304 | if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT)) | ||
1305 | continue; | ||
1306 | |||
1307 | get_slabinfo(s, &sinfo); | ||
1308 | |||
1309 | if (sinfo.num_objs > 0) | ||
1310 | pr_info("%-17s %10luKB %10luKB\n", cache_name(s), | ||
1311 | (sinfo.active_objs * s->size) / 1024, | ||
1312 | (sinfo.num_objs * s->size) / 1024); | ||
1313 | } | ||
1314 | mutex_unlock(&slab_mutex); | ||
1315 | } | ||
1316 | |||
1317 | #if defined(CONFIG_MEMCG) | ||
1285 | void *memcg_slab_start(struct seq_file *m, loff_t *pos) | 1318 | void *memcg_slab_start(struct seq_file *m, loff_t *pos) |
1286 | { | 1319 | { |
1287 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | 1320 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
@@ -1355,7 +1388,7 @@ static int __init slab_proc_init(void) | |||
1355 | return 0; | 1388 | return 0; |
1356 | } | 1389 | } |
1357 | module_init(slab_proc_init); | 1390 | module_init(slab_proc_init); |
1358 | #endif /* CONFIG_SLABINFO */ | 1391 | #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ |
1359 | 1392 | ||
1360 | static __always_inline void *__do_krealloc(const void *p, size_t new_size, | 1393 | static __always_inline void *__do_krealloc(const void *p, size_t new_size, |
1361 | gfp_t flags) | 1394 | gfp_t flags) |
@@ -330,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
330 | BUG_ON(!b); | 330 | BUG_ON(!b); |
331 | spin_unlock_irqrestore(&slob_lock, flags); | 331 | spin_unlock_irqrestore(&slob_lock, flags); |
332 | } | 332 | } |
333 | if (unlikely((gfp & __GFP_ZERO) && b)) | 333 | if (unlikely(gfp & __GFP_ZERO)) |
334 | memset(b, 0, size); | 334 | memset(b, 0, size); |
335 | return b; | 335 | return b; |
336 | } | 336 | } |
@@ -524,7 +524,7 @@ size_t ksize(const void *block) | |||
524 | } | 524 | } |
525 | EXPORT_SYMBOL(ksize); | 525 | EXPORT_SYMBOL(ksize); |
526 | 526 | ||
527 | int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) | 527 | int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags) |
528 | { | 528 | { |
529 | if (flags & SLAB_TYPESAFE_BY_RCU) { | 529 | if (flags & SLAB_TYPESAFE_BY_RCU) { |
530 | /* leave room for rcu footer at the end of object */ | 530 | /* leave room for rcu footer at the end of object */ |
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | #include <linux/kasan.h> | 24 | #include <linux/kasan.h> |
25 | #include <linux/kmemcheck.h> | ||
26 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
27 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
28 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
@@ -193,8 +192,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | |||
193 | #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ | 192 | #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ |
194 | 193 | ||
195 | /* Internal SLUB flags */ | 194 | /* Internal SLUB flags */ |
196 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ | 195 | /* Poison object */ |
197 | #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ | 196 | #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U) |
197 | /* Use cmpxchg_double */ | ||
198 | #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U) | ||
198 | 199 | ||
199 | /* | 200 | /* |
200 | * Tracking user of a slab. | 201 | * Tracking user of a slab. |
@@ -485,9 +486,9 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p) | |||
485 | * Debug settings: | 486 | * Debug settings: |
486 | */ | 487 | */ |
487 | #if defined(CONFIG_SLUB_DEBUG_ON) | 488 | #if defined(CONFIG_SLUB_DEBUG_ON) |
488 | static int slub_debug = DEBUG_DEFAULT_FLAGS; | 489 | static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; |
489 | #else | 490 | #else |
490 | static int slub_debug; | 491 | static slab_flags_t slub_debug; |
491 | #endif | 492 | #endif |
492 | 493 | ||
493 | static char *slub_debug_slabs; | 494 | static char *slub_debug_slabs; |
@@ -1289,8 +1290,8 @@ out: | |||
1289 | 1290 | ||
1290 | __setup("slub_debug", setup_slub_debug); | 1291 | __setup("slub_debug", setup_slub_debug); |
1291 | 1292 | ||
1292 | unsigned long kmem_cache_flags(unsigned long object_size, | 1293 | slab_flags_t kmem_cache_flags(unsigned long object_size, |
1293 | unsigned long flags, const char *name, | 1294 | slab_flags_t flags, const char *name, |
1294 | void (*ctor)(void *)) | 1295 | void (*ctor)(void *)) |
1295 | { | 1296 | { |
1296 | /* | 1297 | /* |
@@ -1322,8 +1323,8 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, | |||
1322 | struct page *page) {} | 1323 | struct page *page) {} |
1323 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, | 1324 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, |
1324 | struct page *page) {} | 1325 | struct page *page) {} |
1325 | unsigned long kmem_cache_flags(unsigned long object_size, | 1326 | slab_flags_t kmem_cache_flags(unsigned long object_size, |
1326 | unsigned long flags, const char *name, | 1327 | slab_flags_t flags, const char *name, |
1327 | void (*ctor)(void *)) | 1328 | void (*ctor)(void *)) |
1328 | { | 1329 | { |
1329 | return flags; | 1330 | return flags; |
@@ -1370,12 +1371,11 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) | |||
1370 | * So in order to make the debug calls that expect irqs to be | 1371 | * So in order to make the debug calls that expect irqs to be |
1371 | * disabled we need to disable interrupts temporarily. | 1372 | * disabled we need to disable interrupts temporarily. |
1372 | */ | 1373 | */ |
1373 | #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) | 1374 | #ifdef CONFIG_LOCKDEP |
1374 | { | 1375 | { |
1375 | unsigned long flags; | 1376 | unsigned long flags; |
1376 | 1377 | ||
1377 | local_irq_save(flags); | 1378 | local_irq_save(flags); |
1378 | kmemcheck_slab_free(s, x, s->object_size); | ||
1379 | debug_check_no_locks_freed(x, s->object_size); | 1379 | debug_check_no_locks_freed(x, s->object_size); |
1380 | local_irq_restore(flags); | 1380 | local_irq_restore(flags); |
1381 | } | 1381 | } |
@@ -1399,8 +1399,7 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s, | |||
1399 | * Compiler cannot detect this function can be removed if slab_free_hook() | 1399 | * Compiler cannot detect this function can be removed if slab_free_hook() |
1400 | * evaluates to nothing. Thus, catch all relevant config debug options here. | 1400 | * evaluates to nothing. Thus, catch all relevant config debug options here. |
1401 | */ | 1401 | */ |
1402 | #if defined(CONFIG_KMEMCHECK) || \ | 1402 | #if defined(CONFIG_LOCKDEP) || \ |
1403 | defined(CONFIG_LOCKDEP) || \ | ||
1404 | defined(CONFIG_DEBUG_KMEMLEAK) || \ | 1403 | defined(CONFIG_DEBUG_KMEMLEAK) || \ |
1405 | defined(CONFIG_DEBUG_OBJECTS_FREE) || \ | 1404 | defined(CONFIG_DEBUG_OBJECTS_FREE) || \ |
1406 | defined(CONFIG_KASAN) | 1405 | defined(CONFIG_KASAN) |
@@ -1436,8 +1435,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, | |||
1436 | struct page *page; | 1435 | struct page *page; |
1437 | int order = oo_order(oo); | 1436 | int order = oo_order(oo); |
1438 | 1437 | ||
1439 | flags |= __GFP_NOTRACK; | ||
1440 | |||
1441 | if (node == NUMA_NO_NODE) | 1438 | if (node == NUMA_NO_NODE) |
1442 | page = alloc_pages(flags, order); | 1439 | page = alloc_pages(flags, order); |
1443 | else | 1440 | else |
@@ -1596,22 +1593,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1596 | stat(s, ORDER_FALLBACK); | 1593 | stat(s, ORDER_FALLBACK); |
1597 | } | 1594 | } |
1598 | 1595 | ||
1599 | if (kmemcheck_enabled && | ||
1600 | !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { | ||
1601 | int pages = 1 << oo_order(oo); | ||
1602 | |||
1603 | kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); | ||
1604 | |||
1605 | /* | ||
1606 | * Objects from caches that have a constructor don't get | ||
1607 | * cleared when they're allocated, so we need to do it here. | ||
1608 | */ | ||
1609 | if (s->ctor) | ||
1610 | kmemcheck_mark_uninitialized_pages(page, pages); | ||
1611 | else | ||
1612 | kmemcheck_mark_unallocated_pages(page, pages); | ||
1613 | } | ||
1614 | |||
1615 | page->objects = oo_objects(oo); | 1596 | page->objects = oo_objects(oo); |
1616 | 1597 | ||
1617 | order = compound_order(page); | 1598 | order = compound_order(page); |
@@ -1687,8 +1668,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1687 | check_object(s, page, p, SLUB_RED_INACTIVE); | 1668 | check_object(s, page, p, SLUB_RED_INACTIVE); |
1688 | } | 1669 | } |
1689 | 1670 | ||
1690 | kmemcheck_free_shadow(page, compound_order(page)); | ||
1691 | |||
1692 | mod_lruvec_page_state(page, | 1671 | mod_lruvec_page_state(page, |
1693 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1672 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
1694 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1673 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
@@ -3477,7 +3456,7 @@ static void set_cpu_partial(struct kmem_cache *s) | |||
3477 | */ | 3456 | */ |
3478 | static int calculate_sizes(struct kmem_cache *s, int forced_order) | 3457 | static int calculate_sizes(struct kmem_cache *s, int forced_order) |
3479 | { | 3458 | { |
3480 | unsigned long flags = s->flags; | 3459 | slab_flags_t flags = s->flags; |
3481 | size_t size = s->object_size; | 3460 | size_t size = s->object_size; |
3482 | int order; | 3461 | int order; |
3483 | 3462 | ||
@@ -3593,7 +3572,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
3593 | return !!oo_objects(s->oo); | 3572 | return !!oo_objects(s->oo); |
3594 | } | 3573 | } |
3595 | 3574 | ||
3596 | static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | 3575 | static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) |
3597 | { | 3576 | { |
3598 | s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); | 3577 | s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); |
3599 | s->reserved = 0; | 3578 | s->reserved = 0; |
@@ -3655,7 +3634,7 @@ error: | |||
3655 | if (flags & SLAB_PANIC) | 3634 | if (flags & SLAB_PANIC) |
3656 | panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n", | 3635 | panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n", |
3657 | s->name, (unsigned long)s->size, s->size, | 3636 | s->name, (unsigned long)s->size, s->size, |
3658 | oo_order(s->oo), s->offset, flags); | 3637 | oo_order(s->oo), s->offset, (unsigned long)flags); |
3659 | return -EINVAL; | 3638 | return -EINVAL; |
3660 | } | 3639 | } |
3661 | 3640 | ||
@@ -3792,7 +3771,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | |||
3792 | struct page *page; | 3771 | struct page *page; |
3793 | void *ptr = NULL; | 3772 | void *ptr = NULL; |
3794 | 3773 | ||
3795 | flags |= __GFP_COMP | __GFP_NOTRACK; | 3774 | flags |= __GFP_COMP; |
3796 | page = alloc_pages_node(node, flags, get_order(size)); | 3775 | page = alloc_pages_node(node, flags, get_order(size)); |
3797 | if (page) | 3776 | if (page) |
3798 | ptr = page_address(page); | 3777 | ptr = page_address(page); |
@@ -4245,7 +4224,7 @@ void __init kmem_cache_init_late(void) | |||
4245 | 4224 | ||
4246 | struct kmem_cache * | 4225 | struct kmem_cache * |
4247 | __kmem_cache_alias(const char *name, size_t size, size_t align, | 4226 | __kmem_cache_alias(const char *name, size_t size, size_t align, |
4248 | unsigned long flags, void (*ctor)(void *)) | 4227 | slab_flags_t flags, void (*ctor)(void *)) |
4249 | { | 4228 | { |
4250 | struct kmem_cache *s, *c; | 4229 | struct kmem_cache *s, *c; |
4251 | 4230 | ||
@@ -4275,7 +4254,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
4275 | return s; | 4254 | return s; |
4276 | } | 4255 | } |
4277 | 4256 | ||
4278 | int __kmem_cache_create(struct kmem_cache *s, unsigned long flags) | 4257 | int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) |
4279 | { | 4258 | { |
4280 | int err; | 4259 | int err; |
4281 | 4260 | ||
@@ -5655,8 +5634,6 @@ static char *create_unique_id(struct kmem_cache *s) | |||
5655 | *p++ = 'a'; | 5634 | *p++ = 'a'; |
5656 | if (s->flags & SLAB_CONSISTENCY_CHECKS) | 5635 | if (s->flags & SLAB_CONSISTENCY_CHECKS) |
5657 | *p++ = 'F'; | 5636 | *p++ = 'F'; |
5658 | if (!(s->flags & SLAB_NOTRACK)) | ||
5659 | *p++ = 't'; | ||
5660 | if (s->flags & SLAB_ACCOUNT) | 5637 | if (s->flags & SLAB_ACCOUNT) |
5661 | *p++ = 'A'; | 5638 | *p++ = 'A'; |
5662 | if (p != name + 1) | 5639 | if (p != name + 1) |
@@ -5704,6 +5681,10 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
5704 | return 0; | 5681 | return 0; |
5705 | } | 5682 | } |
5706 | 5683 | ||
5684 | if (!unmergeable && disable_higher_order_debug && | ||
5685 | (slub_debug & DEBUG_METADATA_FLAGS)) | ||
5686 | unmergeable = 1; | ||
5687 | |||
5707 | if (unmergeable) { | 5688 | if (unmergeable) { |
5708 | /* | 5689 | /* |
5709 | * Slabcache can never be merged so we can use the name proper. | 5690 | * Slabcache can never be merged so we can use the name proper. |
@@ -5852,7 +5833,7 @@ __initcall(slab_sysfs_init); | |||
5852 | /* | 5833 | /* |
5853 | * The /proc/slabinfo ABI | 5834 | * The /proc/slabinfo ABI |
5854 | */ | 5835 | */ |
5855 | #ifdef CONFIG_SLABINFO | 5836 | #ifdef CONFIG_SLUB_DEBUG |
5856 | void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) | 5837 | void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) |
5857 | { | 5838 | { |
5858 | unsigned long nr_slabs = 0; | 5839 | unsigned long nr_slabs = 0; |
@@ -5884,4 +5865,4 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, | |||
5884 | { | 5865 | { |
5885 | return -EIO; | 5866 | return -EIO; |
5886 | } | 5867 | } |
5887 | #endif /* CONFIG_SLABINFO */ | 5868 | #endif /* CONFIG_SLUB_DEBUG */ |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 478ce6d4a2c4..17acf01791fa 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -42,7 +42,7 @@ static void * __ref __earlyonly_bootmem_alloc(int node, | |||
42 | unsigned long align, | 42 | unsigned long align, |
43 | unsigned long goal) | 43 | unsigned long goal) |
44 | { | 44 | { |
45 | return memblock_virt_alloc_try_nid(size, align, goal, | 45 | return memblock_virt_alloc_try_nid_raw(size, align, goal, |
46 | BOOTMEM_ALLOC_ACCESSIBLE, node); | 46 | BOOTMEM_ALLOC_ACCESSIBLE, node); |
47 | } | 47 | } |
48 | 48 | ||
@@ -53,13 +53,20 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) | |||
53 | { | 53 | { |
54 | /* If the main allocator is up use that, fallback to bootmem. */ | 54 | /* If the main allocator is up use that, fallback to bootmem. */ |
55 | if (slab_is_available()) { | 55 | if (slab_is_available()) { |
56 | gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; | ||
57 | int order = get_order(size); | ||
58 | static bool warned; | ||
56 | struct page *page; | 59 | struct page *page; |
57 | 60 | ||
58 | page = alloc_pages_node(node, | 61 | page = alloc_pages_node(node, gfp_mask, order); |
59 | GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL, | ||
60 | get_order(size)); | ||
61 | if (page) | 62 | if (page) |
62 | return page_address(page); | 63 | return page_address(page); |
64 | |||
65 | if (!warned) { | ||
66 | warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL, | ||
67 | "vmemmap alloc failure: order:%u", order); | ||
68 | warned = true; | ||
69 | } | ||
63 | return NULL; | 70 | return NULL; |
64 | } else | 71 | } else |
65 | return __earlyonly_bootmem_alloc(node, size, size, | 72 | return __earlyonly_bootmem_alloc(node, size, size, |
@@ -180,11 +187,22 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) | |||
180 | return pte; | 187 | return pte; |
181 | } | 188 | } |
182 | 189 | ||
190 | static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) | ||
191 | { | ||
192 | void *p = vmemmap_alloc_block(size, node); | ||
193 | |||
194 | if (!p) | ||
195 | return NULL; | ||
196 | memset(p, 0, size); | ||
197 | |||
198 | return p; | ||
199 | } | ||
200 | |||
183 | pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) | 201 | pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) |
184 | { | 202 | { |
185 | pmd_t *pmd = pmd_offset(pud, addr); | 203 | pmd_t *pmd = pmd_offset(pud, addr); |
186 | if (pmd_none(*pmd)) { | 204 | if (pmd_none(*pmd)) { |
187 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 205 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
188 | if (!p) | 206 | if (!p) |
189 | return NULL; | 207 | return NULL; |
190 | pmd_populate_kernel(&init_mm, pmd, p); | 208 | pmd_populate_kernel(&init_mm, pmd, p); |
@@ -196,7 +214,7 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) | |||
196 | { | 214 | { |
197 | pud_t *pud = pud_offset(p4d, addr); | 215 | pud_t *pud = pud_offset(p4d, addr); |
198 | if (pud_none(*pud)) { | 216 | if (pud_none(*pud)) { |
199 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 217 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
200 | if (!p) | 218 | if (!p) |
201 | return NULL; | 219 | return NULL; |
202 | pud_populate(&init_mm, pud, p); | 220 | pud_populate(&init_mm, pud, p); |
@@ -208,7 +226,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) | |||
208 | { | 226 | { |
209 | p4d_t *p4d = p4d_offset(pgd, addr); | 227 | p4d_t *p4d = p4d_offset(pgd, addr); |
210 | if (p4d_none(*p4d)) { | 228 | if (p4d_none(*p4d)) { |
211 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 229 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
212 | if (!p) | 230 | if (!p) |
213 | return NULL; | 231 | return NULL; |
214 | p4d_populate(&init_mm, p4d, p); | 232 | p4d_populate(&init_mm, p4d, p); |
@@ -220,7 +238,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) | |||
220 | { | 238 | { |
221 | pgd_t *pgd = pgd_offset_k(addr); | 239 | pgd_t *pgd = pgd_offset_k(addr); |
222 | if (pgd_none(*pgd)) { | 240 | if (pgd_none(*pgd)) { |
223 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 241 | void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); |
224 | if (!p) | 242 | if (!p) |
225 | return NULL; | 243 | return NULL; |
226 | pgd_populate(&init_mm, pgd, p); | 244 | pgd_populate(&init_mm, pgd, p); |
diff --git a/mm/sparse.c b/mm/sparse.c index 60805abf98af..7a5dacaa06e3 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -453,9 +453,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, | |||
453 | } | 453 | } |
454 | 454 | ||
455 | size = PAGE_ALIGN(size); | 455 | size = PAGE_ALIGN(size); |
456 | map = memblock_virt_alloc_try_nid(size * map_count, | 456 | map = memblock_virt_alloc_try_nid_raw(size * map_count, |
457 | PAGE_SIZE, __pa(MAX_DMA_ADDRESS), | 457 | PAGE_SIZE, __pa(MAX_DMA_ADDRESS), |
458 | BOOTMEM_ALLOC_ACCESSIBLE, nodeid); | 458 | BOOTMEM_ALLOC_ACCESSIBLE, nodeid); |
459 | if (map) { | 459 | if (map) { |
460 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | 460 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
461 | if (!present_section_nr(pnum)) | 461 | if (!present_section_nr(pnum)) |
@@ -76,7 +76,7 @@ static void __page_cache_release(struct page *page) | |||
76 | static void __put_single_page(struct page *page) | 76 | static void __put_single_page(struct page *page) |
77 | { | 77 | { |
78 | __page_cache_release(page); | 78 | __page_cache_release(page); |
79 | free_hot_cold_page(page, false); | 79 | free_unref_page(page); |
80 | } | 80 | } |
81 | 81 | ||
82 | static void __put_compound_page(struct page *page) | 82 | static void __put_compound_page(struct page *page) |
@@ -210,7 +210,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, | |||
210 | } | 210 | } |
211 | if (pgdat) | 211 | if (pgdat) |
212 | spin_unlock_irqrestore(&pgdat->lru_lock, flags); | 212 | spin_unlock_irqrestore(&pgdat->lru_lock, flags); |
213 | release_pages(pvec->pages, pvec->nr, pvec->cold); | 213 | release_pages(pvec->pages, pvec->nr); |
214 | pagevec_reinit(pvec); | 214 | pagevec_reinit(pvec); |
215 | } | 215 | } |
216 | 216 | ||
@@ -740,7 +740,7 @@ void lru_add_drain_all(void) | |||
740 | * Decrement the reference count on all the pages in @pages. If it | 740 | * Decrement the reference count on all the pages in @pages. If it |
741 | * fell to zero, remove the page from the LRU and free it. | 741 | * fell to zero, remove the page from the LRU and free it. |
742 | */ | 742 | */ |
743 | void release_pages(struct page **pages, int nr, bool cold) | 743 | void release_pages(struct page **pages, int nr) |
744 | { | 744 | { |
745 | int i; | 745 | int i; |
746 | LIST_HEAD(pages_to_free); | 746 | LIST_HEAD(pages_to_free); |
@@ -817,7 +817,7 @@ void release_pages(struct page **pages, int nr, bool cold) | |||
817 | spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); | 817 | spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags); |
818 | 818 | ||
819 | mem_cgroup_uncharge_list(&pages_to_free); | 819 | mem_cgroup_uncharge_list(&pages_to_free); |
820 | free_hot_cold_page_list(&pages_to_free, cold); | 820 | free_unref_page_list(&pages_to_free); |
821 | } | 821 | } |
822 | EXPORT_SYMBOL(release_pages); | 822 | EXPORT_SYMBOL(release_pages); |
823 | 823 | ||
@@ -833,8 +833,11 @@ EXPORT_SYMBOL(release_pages); | |||
833 | */ | 833 | */ |
834 | void __pagevec_release(struct pagevec *pvec) | 834 | void __pagevec_release(struct pagevec *pvec) |
835 | { | 835 | { |
836 | lru_add_drain(); | 836 | if (!pvec->percpu_pvec_drained) { |
837 | release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); | 837 | lru_add_drain(); |
838 | pvec->percpu_pvec_drained = true; | ||
839 | } | ||
840 | release_pages(pvec->pages, pagevec_count(pvec)); | ||
838 | pagevec_reinit(pvec); | 841 | pagevec_reinit(pvec); |
839 | } | 842 | } |
840 | EXPORT_SYMBOL(__pagevec_release); | 843 | EXPORT_SYMBOL(__pagevec_release); |
@@ -986,15 +989,25 @@ unsigned pagevec_lookup_range(struct pagevec *pvec, | |||
986 | } | 989 | } |
987 | EXPORT_SYMBOL(pagevec_lookup_range); | 990 | EXPORT_SYMBOL(pagevec_lookup_range); |
988 | 991 | ||
989 | unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, | 992 | unsigned pagevec_lookup_range_tag(struct pagevec *pvec, |
990 | pgoff_t *index, int tag, unsigned nr_pages) | 993 | struct address_space *mapping, pgoff_t *index, pgoff_t end, |
994 | int tag) | ||
991 | { | 995 | { |
992 | pvec->nr = find_get_pages_tag(mapping, index, tag, | 996 | pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, |
993 | nr_pages, pvec->pages); | 997 | PAGEVEC_SIZE, pvec->pages); |
994 | return pagevec_count(pvec); | 998 | return pagevec_count(pvec); |
995 | } | 999 | } |
996 | EXPORT_SYMBOL(pagevec_lookup_tag); | 1000 | EXPORT_SYMBOL(pagevec_lookup_range_tag); |
997 | 1001 | ||
1002 | unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, | ||
1003 | struct address_space *mapping, pgoff_t *index, pgoff_t end, | ||
1004 | int tag, unsigned max_pages) | ||
1005 | { | ||
1006 | pvec->nr = find_get_pages_range_tag(mapping, index, end, tag, | ||
1007 | min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages); | ||
1008 | return pagevec_count(pvec); | ||
1009 | } | ||
1010 | EXPORT_SYMBOL(pagevec_lookup_range_nr_tag); | ||
998 | /* | 1011 | /* |
999 | * Perform any setup for the swap system | 1012 | * Perform any setup for the swap system |
1000 | */ | 1013 | */ |
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index d81cfc5a43d5..bebc19292018 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c | |||
@@ -149,6 +149,13 @@ static int alloc_swap_slot_cache(unsigned int cpu) | |||
149 | cache->nr = 0; | 149 | cache->nr = 0; |
150 | cache->cur = 0; | 150 | cache->cur = 0; |
151 | cache->n_ret = 0; | 151 | cache->n_ret = 0; |
152 | /* | ||
153 | * We initialized alloc_lock and free_lock earlier. We use | ||
154 | * !cache->slots or !cache->slots_ret to know if it is safe to acquire | ||
155 | * the corresponding lock and use the cache. Memory barrier below | ||
156 | * ensures the assumption. | ||
157 | */ | ||
158 | mb(); | ||
152 | cache->slots = slots; | 159 | cache->slots = slots; |
153 | slots = NULL; | 160 | slots = NULL; |
154 | cache->slots_ret = slots_ret; | 161 | cache->slots_ret = slots_ret; |
@@ -275,7 +282,7 @@ int free_swap_slot(swp_entry_t entry) | |||
275 | struct swap_slots_cache *cache; | 282 | struct swap_slots_cache *cache; |
276 | 283 | ||
277 | cache = raw_cpu_ptr(&swp_slots); | 284 | cache = raw_cpu_ptr(&swp_slots); |
278 | if (use_swap_slot_cache && cache->slots_ret) { | 285 | if (likely(use_swap_slot_cache && cache->slots_ret)) { |
279 | spin_lock_irq(&cache->free_lock); | 286 | spin_lock_irq(&cache->free_lock); |
280 | /* Swap slots cache may be deactivated before acquiring lock */ | 287 | /* Swap slots cache may be deactivated before acquiring lock */ |
281 | if (!use_swap_slot_cache || !cache->slots_ret) { | 288 | if (!use_swap_slot_cache || !cache->slots_ret) { |
@@ -326,7 +333,7 @@ swp_entry_t get_swap_page(struct page *page) | |||
326 | */ | 333 | */ |
327 | cache = raw_cpu_ptr(&swp_slots); | 334 | cache = raw_cpu_ptr(&swp_slots); |
328 | 335 | ||
329 | if (check_cache_active()) { | 336 | if (likely(check_cache_active() && cache->slots)) { |
330 | mutex_lock(&cache->alloc_lock); | 337 | mutex_lock(&cache->alloc_lock); |
331 | if (cache->slots) { | 338 | if (cache->slots) { |
332 | repeat: | 339 | repeat: |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 326439428daf..39ae7cfad90f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -36,9 +36,9 @@ static const struct address_space_operations swap_aops = { | |||
36 | #endif | 36 | #endif |
37 | }; | 37 | }; |
38 | 38 | ||
39 | struct address_space *swapper_spaces[MAX_SWAPFILES]; | 39 | struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; |
40 | static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; | 40 | static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; |
41 | bool swap_vma_readahead = true; | 41 | bool swap_vma_readahead __read_mostly = true; |
42 | 42 | ||
43 | #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) | 43 | #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) |
44 | #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) | 44 | #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) |
@@ -319,7 +319,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr) | |||
319 | lru_add_drain(); | 319 | lru_add_drain(); |
320 | for (i = 0; i < nr; i++) | 320 | for (i = 0; i < nr; i++) |
321 | free_swap_cache(pagep[i]); | 321 | free_swap_cache(pagep[i]); |
322 | release_pages(pagep, nr, false); | 322 | release_pages(pagep, nr); |
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
@@ -559,6 +559,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
559 | unsigned long offset = entry_offset; | 559 | unsigned long offset = entry_offset; |
560 | unsigned long start_offset, end_offset; | 560 | unsigned long start_offset, end_offset; |
561 | unsigned long mask; | 561 | unsigned long mask; |
562 | struct swap_info_struct *si = swp_swap_info(entry); | ||
562 | struct blk_plug plug; | 563 | struct blk_plug plug; |
563 | bool do_poll = true, page_allocated; | 564 | bool do_poll = true, page_allocated; |
564 | 565 | ||
@@ -572,6 +573,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
572 | end_offset = offset | mask; | 573 | end_offset = offset | mask; |
573 | if (!start_offset) /* First page is swap header. */ | 574 | if (!start_offset) /* First page is swap header. */ |
574 | start_offset++; | 575 | start_offset++; |
576 | if (end_offset >= si->max) | ||
577 | end_offset = si->max - 1; | ||
575 | 578 | ||
576 | blk_start_plug(&plug); | 579 | blk_start_plug(&plug); |
577 | for (offset = start_offset; offset <= end_offset ; offset++) { | 580 | for (offset = start_offset; offset <= end_offset ; offset++) { |
diff --git a/mm/swapfile.c b/mm/swapfile.c index e47a21e64764..3074b02eaa09 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1328,6 +1328,13 @@ int page_swapcount(struct page *page) | |||
1328 | return count; | 1328 | return count; |
1329 | } | 1329 | } |
1330 | 1330 | ||
1331 | int __swap_count(struct swap_info_struct *si, swp_entry_t entry) | ||
1332 | { | ||
1333 | pgoff_t offset = swp_offset(entry); | ||
1334 | |||
1335 | return swap_count(si->swap_map[offset]); | ||
1336 | } | ||
1337 | |||
1331 | static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) | 1338 | static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) |
1332 | { | 1339 | { |
1333 | int count = 0; | 1340 | int count = 0; |
@@ -3169,6 +3176,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
3169 | if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) | 3176 | if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) |
3170 | p->flags |= SWP_STABLE_WRITES; | 3177 | p->flags |= SWP_STABLE_WRITES; |
3171 | 3178 | ||
3179 | if (bdi_cap_synchronous_io(inode_to_bdi(inode))) | ||
3180 | p->flags |= SWP_SYNCHRONOUS_IO; | ||
3181 | |||
3172 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 3182 | if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
3173 | int cpu; | 3183 | int cpu; |
3174 | unsigned long ci, nr_cluster; | 3184 | unsigned long ci, nr_cluster; |
@@ -3452,10 +3462,15 @@ int swapcache_prepare(swp_entry_t entry) | |||
3452 | return __swap_duplicate(entry, SWAP_HAS_CACHE); | 3462 | return __swap_duplicate(entry, SWAP_HAS_CACHE); |
3453 | } | 3463 | } |
3454 | 3464 | ||
3465 | struct swap_info_struct *swp_swap_info(swp_entry_t entry) | ||
3466 | { | ||
3467 | return swap_info[swp_type(entry)]; | ||
3468 | } | ||
3469 | |||
3455 | struct swap_info_struct *page_swap_info(struct page *page) | 3470 | struct swap_info_struct *page_swap_info(struct page *page) |
3456 | { | 3471 | { |
3457 | swp_entry_t swap = { .val = page_private(page) }; | 3472 | swp_entry_t entry = { .val = page_private(page) }; |
3458 | return swap_info[swp_type(swap)]; | 3473 | return swp_swap_info(entry); |
3459 | } | 3474 | } |
3460 | 3475 | ||
3461 | /* | 3476 | /* |
@@ -3463,7 +3478,6 @@ struct swap_info_struct *page_swap_info(struct page *page) | |||
3463 | */ | 3478 | */ |
3464 | struct address_space *__page_file_mapping(struct page *page) | 3479 | struct address_space *__page_file_mapping(struct page *page) |
3465 | { | 3480 | { |
3466 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); | ||
3467 | return page_swap_info(page)->swap_file->f_mapping; | 3481 | return page_swap_info(page)->swap_file->f_mapping; |
3468 | } | 3482 | } |
3469 | EXPORT_SYMBOL_GPL(__page_file_mapping); | 3483 | EXPORT_SYMBOL_GPL(__page_file_mapping); |
@@ -3471,7 +3485,6 @@ EXPORT_SYMBOL_GPL(__page_file_mapping); | |||
3471 | pgoff_t __page_file_index(struct page *page) | 3485 | pgoff_t __page_file_index(struct page *page) |
3472 | { | 3486 | { |
3473 | swp_entry_t swap = { .val = page_private(page) }; | 3487 | swp_entry_t swap = { .val = page_private(page) }; |
3474 | VM_BUG_ON_PAGE(!PageSwapCache(page), page); | ||
3475 | return swp_offset(swap); | 3488 | return swp_offset(swap); |
3476 | } | 3489 | } |
3477 | EXPORT_SYMBOL_GPL(__page_file_index); | 3490 | EXPORT_SYMBOL_GPL(__page_file_index); |
diff --git a/mm/truncate.c b/mm/truncate.c index 2330223841fb..e4b4cf0f4070 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -25,44 +25,85 @@ | |||
25 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
26 | #include "internal.h" | 26 | #include "internal.h" |
27 | 27 | ||
28 | static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, | 28 | /* |
29 | void *entry) | 29 | * Regular page slots are stabilized by the page lock even without the tree |
30 | * itself locked. These unlocked entries need verification under the tree | ||
31 | * lock. | ||
32 | */ | ||
33 | static inline void __clear_shadow_entry(struct address_space *mapping, | ||
34 | pgoff_t index, void *entry) | ||
30 | { | 35 | { |
31 | struct radix_tree_node *node; | 36 | struct radix_tree_node *node; |
32 | void **slot; | 37 | void **slot; |
33 | 38 | ||
34 | spin_lock_irq(&mapping->tree_lock); | ||
35 | /* | ||
36 | * Regular page slots are stabilized by the page lock even | ||
37 | * without the tree itself locked. These unlocked entries | ||
38 | * need verification under the tree lock. | ||
39 | */ | ||
40 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) | 39 | if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) |
41 | goto unlock; | 40 | return; |
42 | if (*slot != entry) | 41 | if (*slot != entry) |
43 | goto unlock; | 42 | return; |
44 | __radix_tree_replace(&mapping->page_tree, node, slot, NULL, | 43 | __radix_tree_replace(&mapping->page_tree, node, slot, NULL, |
45 | workingset_update_node, mapping); | 44 | workingset_update_node); |
46 | mapping->nrexceptional--; | 45 | mapping->nrexceptional--; |
47 | unlock: | 46 | } |
47 | |||
48 | static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, | ||
49 | void *entry) | ||
50 | { | ||
51 | spin_lock_irq(&mapping->tree_lock); | ||
52 | __clear_shadow_entry(mapping, index, entry); | ||
48 | spin_unlock_irq(&mapping->tree_lock); | 53 | spin_unlock_irq(&mapping->tree_lock); |
49 | } | 54 | } |
50 | 55 | ||
51 | /* | 56 | /* |
52 | * Unconditionally remove exceptional entry. Usually called from truncate path. | 57 | * Unconditionally remove exceptional entries. Usually called from truncate |
58 | * path. Note that the pagevec may be altered by this function by removing | ||
59 | * exceptional entries similar to what pagevec_remove_exceptionals does. | ||
53 | */ | 60 | */ |
54 | static void truncate_exceptional_entry(struct address_space *mapping, | 61 | static void truncate_exceptional_pvec_entries(struct address_space *mapping, |
55 | pgoff_t index, void *entry) | 62 | struct pagevec *pvec, pgoff_t *indices, |
63 | pgoff_t end) | ||
56 | { | 64 | { |
65 | int i, j; | ||
66 | bool dax, lock; | ||
67 | |||
57 | /* Handled by shmem itself */ | 68 | /* Handled by shmem itself */ |
58 | if (shmem_mapping(mapping)) | 69 | if (shmem_mapping(mapping)) |
59 | return; | 70 | return; |
60 | 71 | ||
61 | if (dax_mapping(mapping)) { | 72 | for (j = 0; j < pagevec_count(pvec); j++) |
62 | dax_delete_mapping_entry(mapping, index); | 73 | if (radix_tree_exceptional_entry(pvec->pages[j])) |
74 | break; | ||
75 | |||
76 | if (j == pagevec_count(pvec)) | ||
63 | return; | 77 | return; |
78 | |||
79 | dax = dax_mapping(mapping); | ||
80 | lock = !dax && indices[j] < end; | ||
81 | if (lock) | ||
82 | spin_lock_irq(&mapping->tree_lock); | ||
83 | |||
84 | for (i = j; i < pagevec_count(pvec); i++) { | ||
85 | struct page *page = pvec->pages[i]; | ||
86 | pgoff_t index = indices[i]; | ||
87 | |||
88 | if (!radix_tree_exceptional_entry(page)) { | ||
89 | pvec->pages[j++] = page; | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | if (index >= end) | ||
94 | continue; | ||
95 | |||
96 | if (unlikely(dax)) { | ||
97 | dax_delete_mapping_entry(mapping, index); | ||
98 | continue; | ||
99 | } | ||
100 | |||
101 | __clear_shadow_entry(mapping, index, page); | ||
64 | } | 102 | } |
65 | clear_shadow_entry(mapping, index, entry); | 103 | |
104 | if (lock) | ||
105 | spin_unlock_irq(&mapping->tree_lock); | ||
106 | pvec->nr = j; | ||
66 | } | 107 | } |
67 | 108 | ||
68 | /* | 109 | /* |
@@ -134,11 +175,17 @@ void do_invalidatepage(struct page *page, unsigned int offset, | |||
134 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and | 175 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and |
135 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. | 176 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. |
136 | */ | 177 | */ |
137 | static int | 178 | static void |
138 | truncate_complete_page(struct address_space *mapping, struct page *page) | 179 | truncate_cleanup_page(struct address_space *mapping, struct page *page) |
139 | { | 180 | { |
140 | if (page->mapping != mapping) | 181 | if (page_mapped(page)) { |
141 | return -EIO; | 182 | loff_t holelen; |
183 | |||
184 | holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE; | ||
185 | unmap_mapping_range(mapping, | ||
186 | (loff_t)page->index << PAGE_SHIFT, | ||
187 | holelen, 0); | ||
188 | } | ||
142 | 189 | ||
143 | if (page_has_private(page)) | 190 | if (page_has_private(page)) |
144 | do_invalidatepage(page, 0, PAGE_SIZE); | 191 | do_invalidatepage(page, 0, PAGE_SIZE); |
@@ -150,8 +197,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
150 | */ | 197 | */ |
151 | cancel_dirty_page(page); | 198 | cancel_dirty_page(page); |
152 | ClearPageMappedToDisk(page); | 199 | ClearPageMappedToDisk(page); |
153 | delete_from_page_cache(page); | ||
154 | return 0; | ||
155 | } | 200 | } |
156 | 201 | ||
157 | /* | 202 | /* |
@@ -180,16 +225,14 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
180 | 225 | ||
181 | int truncate_inode_page(struct address_space *mapping, struct page *page) | 226 | int truncate_inode_page(struct address_space *mapping, struct page *page) |
182 | { | 227 | { |
183 | loff_t holelen; | ||
184 | VM_BUG_ON_PAGE(PageTail(page), page); | 228 | VM_BUG_ON_PAGE(PageTail(page), page); |
185 | 229 | ||
186 | holelen = PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE; | 230 | if (page->mapping != mapping) |
187 | if (page_mapped(page)) { | 231 | return -EIO; |
188 | unmap_mapping_range(mapping, | 232 | |
189 | (loff_t)page->index << PAGE_SHIFT, | 233 | truncate_cleanup_page(mapping, page); |
190 | holelen, 0); | 234 | delete_from_page_cache(page); |
191 | } | 235 | return 0; |
192 | return truncate_complete_page(mapping, page); | ||
193 | } | 236 | } |
194 | 237 | ||
195 | /* | 238 | /* |
@@ -287,11 +330,19 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
287 | else | 330 | else |
288 | end = (lend + 1) >> PAGE_SHIFT; | 331 | end = (lend + 1) >> PAGE_SHIFT; |
289 | 332 | ||
290 | pagevec_init(&pvec, 0); | 333 | pagevec_init(&pvec); |
291 | index = start; | 334 | index = start; |
292 | while (index < end && pagevec_lookup_entries(&pvec, mapping, index, | 335 | while (index < end && pagevec_lookup_entries(&pvec, mapping, index, |
293 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 336 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
294 | indices)) { | 337 | indices)) { |
338 | /* | ||
339 | * Pagevec array has exceptional entries and we may also fail | ||
340 | * to lock some pages. So we store pages that can be deleted | ||
341 | * in a new pagevec. | ||
342 | */ | ||
343 | struct pagevec locked_pvec; | ||
344 | |||
345 | pagevec_init(&locked_pvec); | ||
295 | for (i = 0; i < pagevec_count(&pvec); i++) { | 346 | for (i = 0; i < pagevec_count(&pvec); i++) { |
296 | struct page *page = pvec.pages[i]; | 347 | struct page *page = pvec.pages[i]; |
297 | 348 | ||
@@ -300,11 +351,8 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
300 | if (index >= end) | 351 | if (index >= end) |
301 | break; | 352 | break; |
302 | 353 | ||
303 | if (radix_tree_exceptional_entry(page)) { | 354 | if (radix_tree_exceptional_entry(page)) |
304 | truncate_exceptional_entry(mapping, index, | ||
305 | page); | ||
306 | continue; | 355 | continue; |
307 | } | ||
308 | 356 | ||
309 | if (!trylock_page(page)) | 357 | if (!trylock_page(page)) |
310 | continue; | 358 | continue; |
@@ -313,15 +361,22 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
313 | unlock_page(page); | 361 | unlock_page(page); |
314 | continue; | 362 | continue; |
315 | } | 363 | } |
316 | truncate_inode_page(mapping, page); | 364 | if (page->mapping != mapping) { |
317 | unlock_page(page); | 365 | unlock_page(page); |
366 | continue; | ||
367 | } | ||
368 | pagevec_add(&locked_pvec, page); | ||
318 | } | 369 | } |
319 | pagevec_remove_exceptionals(&pvec); | 370 | for (i = 0; i < pagevec_count(&locked_pvec); i++) |
371 | truncate_cleanup_page(mapping, locked_pvec.pages[i]); | ||
372 | delete_from_page_cache_batch(mapping, &locked_pvec); | ||
373 | for (i = 0; i < pagevec_count(&locked_pvec); i++) | ||
374 | unlock_page(locked_pvec.pages[i]); | ||
375 | truncate_exceptional_pvec_entries(mapping, &pvec, indices, end); | ||
320 | pagevec_release(&pvec); | 376 | pagevec_release(&pvec); |
321 | cond_resched(); | 377 | cond_resched(); |
322 | index++; | 378 | index++; |
323 | } | 379 | } |
324 | |||
325 | if (partial_start) { | 380 | if (partial_start) { |
326 | struct page *page = find_lock_page(mapping, start - 1); | 381 | struct page *page = find_lock_page(mapping, start - 1); |
327 | if (page) { | 382 | if (page) { |
@@ -379,6 +434,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
379 | pagevec_release(&pvec); | 434 | pagevec_release(&pvec); |
380 | break; | 435 | break; |
381 | } | 436 | } |
437 | |||
382 | for (i = 0; i < pagevec_count(&pvec); i++) { | 438 | for (i = 0; i < pagevec_count(&pvec); i++) { |
383 | struct page *page = pvec.pages[i]; | 439 | struct page *page = pvec.pages[i]; |
384 | 440 | ||
@@ -390,11 +446,8 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
390 | break; | 446 | break; |
391 | } | 447 | } |
392 | 448 | ||
393 | if (radix_tree_exceptional_entry(page)) { | 449 | if (radix_tree_exceptional_entry(page)) |
394 | truncate_exceptional_entry(mapping, index, | ||
395 | page); | ||
396 | continue; | 450 | continue; |
397 | } | ||
398 | 451 | ||
399 | lock_page(page); | 452 | lock_page(page); |
400 | WARN_ON(page_to_index(page) != index); | 453 | WARN_ON(page_to_index(page) != index); |
@@ -402,7 +455,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
402 | truncate_inode_page(mapping, page); | 455 | truncate_inode_page(mapping, page); |
403 | unlock_page(page); | 456 | unlock_page(page); |
404 | } | 457 | } |
405 | pagevec_remove_exceptionals(&pvec); | 458 | truncate_exceptional_pvec_entries(mapping, &pvec, indices, end); |
406 | pagevec_release(&pvec); | 459 | pagevec_release(&pvec); |
407 | index++; | 460 | index++; |
408 | } | 461 | } |
@@ -500,7 +553,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, | |||
500 | unsigned long count = 0; | 553 | unsigned long count = 0; |
501 | int i; | 554 | int i; |
502 | 555 | ||
503 | pagevec_init(&pvec, 0); | 556 | pagevec_init(&pvec); |
504 | while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, | 557 | while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, |
505 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | 558 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, |
506 | indices)) { | 559 | indices)) { |
@@ -630,7 +683,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
630 | if (mapping->nrpages == 0 && mapping->nrexceptional == 0) | 683 | if (mapping->nrpages == 0 && mapping->nrexceptional == 0) |
631 | goto out; | 684 | goto out; |
632 | 685 | ||
633 | pagevec_init(&pvec, 0); | 686 | pagevec_init(&pvec); |
634 | index = start; | 687 | index = start; |
635 | while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, | 688 | while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, |
636 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, | 689 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 15b483ef6440..c02c850ea349 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1349,7 +1349,7 @@ keep: | |||
1349 | 1349 | ||
1350 | mem_cgroup_uncharge_list(&free_pages); | 1350 | mem_cgroup_uncharge_list(&free_pages); |
1351 | try_to_unmap_flush(); | 1351 | try_to_unmap_flush(); |
1352 | free_hot_cold_page_list(&free_pages, true); | 1352 | free_unref_page_list(&free_pages); |
1353 | 1353 | ||
1354 | list_splice(&ret_pages, page_list); | 1354 | list_splice(&ret_pages, page_list); |
1355 | count_vm_events(PGACTIVATE, pgactivate); | 1355 | count_vm_events(PGACTIVATE, pgactivate); |
@@ -1824,7 +1824,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1824 | spin_unlock_irq(&pgdat->lru_lock); | 1824 | spin_unlock_irq(&pgdat->lru_lock); |
1825 | 1825 | ||
1826 | mem_cgroup_uncharge_list(&page_list); | 1826 | mem_cgroup_uncharge_list(&page_list); |
1827 | free_hot_cold_page_list(&page_list, true); | 1827 | free_unref_page_list(&page_list); |
1828 | 1828 | ||
1829 | /* | 1829 | /* |
1830 | * If reclaim is isolating dirty pages under writeback, it implies | 1830 | * If reclaim is isolating dirty pages under writeback, it implies |
@@ -2063,7 +2063,7 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
2063 | spin_unlock_irq(&pgdat->lru_lock); | 2063 | spin_unlock_irq(&pgdat->lru_lock); |
2064 | 2064 | ||
2065 | mem_cgroup_uncharge_list(&l_hold); | 2065 | mem_cgroup_uncharge_list(&l_hold); |
2066 | free_hot_cold_page_list(&l_hold, true); | 2066 | free_unref_page_list(&l_hold); |
2067 | trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, | 2067 | trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, |
2068 | nr_deactivate, nr_rotated, sc->priority, file); | 2068 | nr_deactivate, nr_rotated, sc->priority, file); |
2069 | } | 2069 | } |
@@ -2082,7 +2082,7 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
2082 | * If that fails and refaulting is observed, the inactive list grows. | 2082 | * If that fails and refaulting is observed, the inactive list grows. |
2083 | * | 2083 | * |
2084 | * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages | 2084 | * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages |
2085 | * on this LRU, maintained by the pageout code. A zone->inactive_ratio | 2085 | * on this LRU, maintained by the pageout code. An inactive_ratio |
2086 | * of 3 means 3:1 or 25% of the pages are kept on the inactive list. | 2086 | * of 3 means 3:1 or 25% of the pages are kept on the inactive list. |
2087 | * | 2087 | * |
2088 | * total target max | 2088 | * total target max |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 4bb13e72ac97..40b2db6db6b1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -32,6 +32,77 @@ | |||
32 | 32 | ||
33 | #define NUMA_STATS_THRESHOLD (U16_MAX - 2) | 33 | #define NUMA_STATS_THRESHOLD (U16_MAX - 2) |
34 | 34 | ||
35 | #ifdef CONFIG_NUMA | ||
36 | int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; | ||
37 | |||
38 | /* zero numa counters within a zone */ | ||
39 | static void zero_zone_numa_counters(struct zone *zone) | ||
40 | { | ||
41 | int item, cpu; | ||
42 | |||
43 | for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { | ||
44 | atomic_long_set(&zone->vm_numa_stat[item], 0); | ||
45 | for_each_online_cpu(cpu) | ||
46 | per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item] | ||
47 | = 0; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | /* zero numa counters of all the populated zones */ | ||
52 | static void zero_zones_numa_counters(void) | ||
53 | { | ||
54 | struct zone *zone; | ||
55 | |||
56 | for_each_populated_zone(zone) | ||
57 | zero_zone_numa_counters(zone); | ||
58 | } | ||
59 | |||
60 | /* zero global numa counters */ | ||
61 | static void zero_global_numa_counters(void) | ||
62 | { | ||
63 | int item; | ||
64 | |||
65 | for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) | ||
66 | atomic_long_set(&vm_numa_stat[item], 0); | ||
67 | } | ||
68 | |||
69 | static void invalid_numa_statistics(void) | ||
70 | { | ||
71 | zero_zones_numa_counters(); | ||
72 | zero_global_numa_counters(); | ||
73 | } | ||
74 | |||
75 | static DEFINE_MUTEX(vm_numa_stat_lock); | ||
76 | |||
77 | int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, | ||
78 | void __user *buffer, size_t *length, loff_t *ppos) | ||
79 | { | ||
80 | int ret, oldval; | ||
81 | |||
82 | mutex_lock(&vm_numa_stat_lock); | ||
83 | if (write) | ||
84 | oldval = sysctl_vm_numa_stat; | ||
85 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | ||
86 | if (ret || !write) | ||
87 | goto out; | ||
88 | |||
89 | if (oldval == sysctl_vm_numa_stat) | ||
90 | goto out; | ||
91 | else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { | ||
92 | static_branch_enable(&vm_numa_stat_key); | ||
93 | pr_info("enable numa statistics\n"); | ||
94 | } else { | ||
95 | static_branch_disable(&vm_numa_stat_key); | ||
96 | invalid_numa_statistics(); | ||
97 | pr_info("disable numa statistics, and clear numa counters\n"); | ||
98 | } | ||
99 | |||
100 | out: | ||
101 | mutex_unlock(&vm_numa_stat_lock); | ||
102 | return ret; | ||
103 | } | ||
104 | #endif | ||
105 | |||
35 | #ifdef CONFIG_VM_EVENT_COUNTERS | 106 | #ifdef CONFIG_VM_EVENT_COUNTERS |
36 | DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; | 107 | DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; |
37 | EXPORT_PER_CPU_SYMBOL(vm_event_states); | 108 | EXPORT_PER_CPU_SYMBOL(vm_event_states); |
@@ -1564,11 +1635,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
1564 | } | 1635 | } |
1565 | seq_printf(m, | 1636 | seq_printf(m, |
1566 | "\n node_unreclaimable: %u" | 1637 | "\n node_unreclaimable: %u" |
1567 | "\n start_pfn: %lu" | 1638 | "\n start_pfn: %lu", |
1568 | "\n node_inactive_ratio: %u", | ||
1569 | pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, | 1639 | pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, |
1570 | zone->zone_start_pfn, | 1640 | zone->zone_start_pfn); |
1571 | zone->zone_pgdat->inactive_ratio); | ||
1572 | seq_putc(m, '\n'); | 1641 | seq_putc(m, '\n'); |
1573 | } | 1642 | } |
1574 | 1643 | ||
diff --git a/mm/workingset.c b/mm/workingset.c index b997c9de28f6..b7d616a3bbbe 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -340,14 +340,8 @@ out: | |||
340 | 340 | ||
341 | static struct list_lru shadow_nodes; | 341 | static struct list_lru shadow_nodes; |
342 | 342 | ||
343 | void workingset_update_node(struct radix_tree_node *node, void *private) | 343 | void workingset_update_node(struct radix_tree_node *node) |
344 | { | 344 | { |
345 | struct address_space *mapping = private; | ||
346 | |||
347 | /* Only regular page cache has shadow entries */ | ||
348 | if (dax_mapping(mapping) || shmem_mapping(mapping)) | ||
349 | return; | ||
350 | |||
351 | /* | 345 | /* |
352 | * Track non-empty nodes that contain only shadow entries; | 346 | * Track non-empty nodes that contain only shadow entries; |
353 | * unlink those that contain pages or are being freed. | 347 | * unlink those that contain pages or are being freed. |
@@ -475,7 +469,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |||
475 | goto out_invalid; | 469 | goto out_invalid; |
476 | inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); | 470 | inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); |
477 | __radix_tree_delete_node(&mapping->page_tree, node, | 471 | __radix_tree_delete_node(&mapping->page_tree, node, |
478 | workingset_update_node, mapping); | 472 | workingset_lookup_update(mapping)); |
479 | 473 | ||
480 | out_invalid: | 474 | out_invalid: |
481 | spin_unlock(&mapping->tree_lock); | 475 | spin_unlock(&mapping->tree_lock); |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 7c38e850a8fc..685049a9048d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -1349,7 +1349,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |||
1349 | * pools/users, we can't allow mapping in interrupt context | 1349 | * pools/users, we can't allow mapping in interrupt context |
1350 | * because it can corrupt another users mappings. | 1350 | * because it can corrupt another users mappings. |
1351 | */ | 1351 | */ |
1352 | WARN_ON_ONCE(in_interrupt()); | 1352 | BUG_ON(in_interrupt()); |
1353 | 1353 | ||
1354 | /* From now on, migration cannot move the object */ | 1354 | /* From now on, migration cannot move the object */ |
1355 | pin_tag(handle); | 1355 | pin_tag(handle); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8134c00df6c2..6b0ff396fa9d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/types.h> | 42 | #include <linux/types.h> |
43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
44 | #include <linux/kmemcheck.h> | ||
45 | #include <linux/mm.h> | 44 | #include <linux/mm.h> |
46 | #include <linux/interrupt.h> | 45 | #include <linux/interrupt.h> |
47 | #include <linux/in.h> | 46 | #include <linux/in.h> |
@@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
234 | shinfo = skb_shinfo(skb); | 233 | shinfo = skb_shinfo(skb); |
235 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | 234 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); |
236 | atomic_set(&shinfo->dataref, 1); | 235 | atomic_set(&shinfo->dataref, 1); |
237 | kmemcheck_annotate_variable(shinfo->destructor_arg); | ||
238 | 236 | ||
239 | if (flags & SKB_ALLOC_FCLONE) { | 237 | if (flags & SKB_ALLOC_FCLONE) { |
240 | struct sk_buff_fclones *fclones; | 238 | struct sk_buff_fclones *fclones; |
241 | 239 | ||
242 | fclones = container_of(skb, struct sk_buff_fclones, skb1); | 240 | fclones = container_of(skb, struct sk_buff_fclones, skb1); |
243 | 241 | ||
244 | kmemcheck_annotate_bitfield(&fclones->skb2, flags1); | ||
245 | skb->fclone = SKB_FCLONE_ORIG; | 242 | skb->fclone = SKB_FCLONE_ORIG; |
246 | refcount_set(&fclones->fclone_ref, 1); | 243 | refcount_set(&fclones->fclone_ref, 1); |
247 | 244 | ||
@@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) | |||
301 | shinfo = skb_shinfo(skb); | 298 | shinfo = skb_shinfo(skb); |
302 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | 299 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); |
303 | atomic_set(&shinfo->dataref, 1); | 300 | atomic_set(&shinfo->dataref, 1); |
304 | kmemcheck_annotate_variable(shinfo->destructor_arg); | ||
305 | 301 | ||
306 | return skb; | 302 | return skb; |
307 | } | 303 | } |
@@ -357,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |||
357 | */ | 353 | */ |
358 | void *netdev_alloc_frag(unsigned int fragsz) | 354 | void *netdev_alloc_frag(unsigned int fragsz) |
359 | { | 355 | { |
360 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | 356 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC); |
361 | } | 357 | } |
362 | EXPORT_SYMBOL(netdev_alloc_frag); | 358 | EXPORT_SYMBOL(netdev_alloc_frag); |
363 | 359 | ||
@@ -370,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |||
370 | 366 | ||
371 | void *napi_alloc_frag(unsigned int fragsz) | 367 | void *napi_alloc_frag(unsigned int fragsz) |
372 | { | 368 | { |
373 | return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | 369 | return __napi_alloc_frag(fragsz, GFP_ATOMIC); |
374 | } | 370 | } |
375 | EXPORT_SYMBOL(napi_alloc_frag); | 371 | EXPORT_SYMBOL(napi_alloc_frag); |
376 | 372 | ||
@@ -1283,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
1283 | if (!n) | 1279 | if (!n) |
1284 | return NULL; | 1280 | return NULL; |
1285 | 1281 | ||
1286 | kmemcheck_annotate_bitfield(n, flags1); | ||
1287 | n->fclone = SKB_FCLONE_UNAVAILABLE; | 1282 | n->fclone = SKB_FCLONE_UNAVAILABLE; |
1288 | } | 1283 | } |
1289 | 1284 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 13719af7b4e3..c0b5b2f17412 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | |||
1469 | sk = kmalloc(prot->obj_size, priority); | 1469 | sk = kmalloc(prot->obj_size, priority); |
1470 | 1470 | ||
1471 | if (sk != NULL) { | 1471 | if (sk != NULL) { |
1472 | kmemcheck_annotate_bitfield(sk, flags); | ||
1473 | |||
1474 | if (security_sk_alloc(sk, family, priority)) | 1472 | if (security_sk_alloc(sk, family, priority)) |
1475 | goto out_free; | 1473 | goto out_free; |
1476 | 1474 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a4bab81f1462..c690cd0d9b3f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -9,7 +9,6 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/kmemcheck.h> | ||
13 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <net/inet_hashtables.h> | 14 | #include <net/inet_hashtables.h> |
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, | |||
167 | if (tw) { | 166 | if (tw) { |
168 | const struct inet_sock *inet = inet_sk(sk); | 167 | const struct inet_sock *inet = inet_sk(sk); |
169 | 168 | ||
170 | kmemcheck_annotate_bitfield(tw, flags); | ||
171 | |||
172 | tw->tw_dr = dr; | 169 | tw->tw_dr = dr; |
173 | /* Give us an identity. */ | 170 | /* Give us an identity. */ |
174 | tw->tw_daddr = inet->inet_daddr; | 171 | tw->tw_daddr = inet->inet_daddr; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dabbf1d392fb..f844c06c0676 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -6130,7 +6130,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, | |||
6130 | if (req) { | 6130 | if (req) { |
6131 | struct inet_request_sock *ireq = inet_rsk(req); | 6131 | struct inet_request_sock *ireq = inet_rsk(req); |
6132 | 6132 | ||
6133 | kmemcheck_annotate_bitfield(ireq, flags); | ||
6134 | ireq->ireq_opt = NULL; | 6133 | ireq->ireq_opt = NULL; |
6135 | #if IS_ENABLED(CONFIG_IPV6) | 6134 | #if IS_ENABLED(CONFIG_IPV6) |
6136 | ireq->pktopts = NULL; | 6135 | ireq->pktopts = NULL; |
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 86ef907067bb..e0f70c4051b6 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c | |||
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, | |||
139 | return -EINVAL; | 139 | return -EINVAL; |
140 | } | 140 | } |
141 | 141 | ||
142 | dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, | 142 | dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC, |
143 | rdsibdev_to_node(rds_ibdev)); | 143 | rdsibdev_to_node(rds_ibdev)); |
144 | if (!dma_pages) { | 144 | if (!dma_pages) { |
145 | ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); | 145 | ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); |
146 | return -ENOMEM; | 146 | return -ENOMEM; |
diff --git a/net/socket.c b/net/socket.c index c729625eb5d3..42d8e9c9ccd5 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void) | |||
568 | 568 | ||
569 | sock = SOCKET_I(inode); | 569 | sock = SOCKET_I(inode); |
570 | 570 | ||
571 | kmemcheck_annotate_bitfield(sock, type); | ||
572 | inode->i_ino = get_next_ino(); | 571 | inode->i_ino = get_next_ino(); |
573 | inode->i_mode = S_IFSOCK | S_IRWXUGO; | 572 | inode->i_mode = S_IFSOCK | S_IRWXUGO; |
574 | inode->i_uid = current_fsuid(); | 573 | inode->i_uid = current_fsuid(); |
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index a27677146410..6f099f915dcf 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter | |||
@@ -12,18 +12,22 @@ from signal import signal, SIGPIPE, SIG_DFL | |||
12 | 12 | ||
13 | signal(SIGPIPE, SIG_DFL) | 13 | signal(SIGPIPE, SIG_DFL) |
14 | 14 | ||
15 | if len(sys.argv) != 3: | 15 | if len(sys.argv) < 3: |
16 | sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0]) | 16 | sys.stderr.write("usage: %s [option] file1 file2\n" % sys.argv[0]) |
17 | sys.stderr.write("The options are:\n") | ||
18 | sys.stderr.write("-c cateogrize output based on symbole type\n") | ||
19 | sys.stderr.write("-d Show delta of Data Section\n") | ||
20 | sys.stderr.write("-t Show delta of text Section\n") | ||
17 | sys.exit(-1) | 21 | sys.exit(-1) |
18 | 22 | ||
19 | re_NUMBER = re.compile(r'\.[0-9]+') | 23 | re_NUMBER = re.compile(r'\.[0-9]+') |
20 | 24 | ||
21 | def getsizes(file): | 25 | def getsizes(file, format): |
22 | sym = {} | 26 | sym = {} |
23 | with os.popen("nm --size-sort " + file) as f: | 27 | with os.popen("nm --size-sort " + file) as f: |
24 | for line in f: | 28 | for line in f: |
25 | size, type, name = line.split() | 29 | size, type, name = line.split() |
26 | if type in "tTdDbBrR": | 30 | if type in format: |
27 | # strip generated symbols | 31 | # strip generated symbols |
28 | if name.startswith("__mod_"): continue | 32 | if name.startswith("__mod_"): continue |
29 | if name.startswith("SyS_"): continue | 33 | if name.startswith("SyS_"): continue |
@@ -34,44 +38,61 @@ def getsizes(file): | |||
34 | sym[name] = sym.get(name, 0) + int(size, 16) | 38 | sym[name] = sym.get(name, 0) + int(size, 16) |
35 | return sym | 39 | return sym |
36 | 40 | ||
37 | old = getsizes(sys.argv[1]) | 41 | def calc(oldfile, newfile, format): |
38 | new = getsizes(sys.argv[2]) | 42 | old = getsizes(oldfile, format) |
39 | grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0 | 43 | new = getsizes(newfile, format) |
40 | delta, common = [], {} | 44 | grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0 |
41 | otot, ntot = 0, 0 | 45 | delta, common = [], {} |
46 | otot, ntot = 0, 0 | ||
42 | 47 | ||
43 | for a in old: | 48 | for a in old: |
44 | if a in new: | 49 | if a in new: |
45 | common[a] = 1 | 50 | common[a] = 1 |
46 | 51 | ||
47 | for name in old: | 52 | for name in old: |
48 | otot += old[name] | 53 | otot += old[name] |
49 | if name not in common: | 54 | if name not in common: |
50 | remove += 1 | 55 | remove += 1 |
51 | down += old[name] | 56 | down += old[name] |
52 | delta.append((-old[name], name)) | 57 | delta.append((-old[name], name)) |
53 | 58 | ||
54 | for name in new: | 59 | for name in new: |
55 | ntot += new[name] | 60 | ntot += new[name] |
56 | if name not in common: | 61 | if name not in common: |
57 | add += 1 | 62 | add += 1 |
58 | up += new[name] | 63 | up += new[name] |
59 | delta.append((new[name], name)) | 64 | delta.append((new[name], name)) |
60 | 65 | ||
61 | for name in common: | 66 | for name in common: |
62 | d = new.get(name, 0) - old.get(name, 0) | 67 | d = new.get(name, 0) - old.get(name, 0) |
63 | if d>0: grow, up = grow+1, up+d | 68 | if d>0: grow, up = grow+1, up+d |
64 | if d<0: shrink, down = shrink+1, down-d | 69 | if d<0: shrink, down = shrink+1, down-d |
65 | delta.append((d, name)) | 70 | delta.append((d, name)) |
66 | 71 | ||
67 | delta.sort() | 72 | delta.sort() |
68 | delta.reverse() | 73 | delta.reverse() |
74 | return grow, shrink, add, remove, up, down, delta, old, new, otot, ntot | ||
69 | 75 | ||
70 | print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ | 76 | def print_result(symboltype, symbolformat, argc): |
71 | (add, remove, grow, shrink, up, -down, up-down)) | 77 | grow, shrink, add, remove, up, down, delta, old, new, otot, ntot = \ |
72 | print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")) | 78 | calc(sys.argv[argc - 1], sys.argv[argc], symbolformat) |
73 | for d, n in delta: | ||
74 | if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) | ||
75 | 79 | ||
76 | print("Total: Before=%d, After=%d, chg %+.2f%%" % \ | 80 | print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ |
77 | (otot, ntot, (ntot - otot)*100.0/otot)) | 81 | (add, remove, grow, shrink, up, -down, up-down)) |
82 | print("%-40s %7s %7s %+7s" % (symboltype, "old", "new", "delta")) | ||
83 | for d, n in delta: | ||
84 | if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) | ||
85 | |||
86 | print("Total: Before=%d, After=%d, chg %+.2f%%" % \ | ||
87 | (otot, ntot, (ntot - otot)*100.0/otot)) | ||
88 | |||
89 | if sys.argv[1] == "-c": | ||
90 | print_result("Function", "tT", 3) | ||
91 | print_result("Data", "dDbB", 3) | ||
92 | print_result("RO Data", "rR", 3) | ||
93 | elif sys.argv[1] == "-d": | ||
94 | print_result("Data", "dDbBrR", 3) | ||
95 | elif sys.argv[1] == "-t": | ||
96 | print_result("Function", "tT", 3) | ||
97 | else: | ||
98 | print_result("Function", "tTdDbBrR", 2) | ||
diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 67d051edd615..7bd52b8f63d4 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc | |||
@@ -2182,8 +2182,6 @@ sub dump_struct($$) { | |||
2182 | # strip comments: | 2182 | # strip comments: |
2183 | $members =~ s/\/\*.*?\*\///gos; | 2183 | $members =~ s/\/\*.*?\*\///gos; |
2184 | $nested =~ s/\/\*.*?\*\///gos; | 2184 | $nested =~ s/\/\*.*?\*\///gos; |
2185 | # strip kmemcheck_bitfield_{begin,end}.*; | ||
2186 | $members =~ s/kmemcheck_bitfield_.*?;//gos; | ||
2187 | # strip attributes | 2185 | # strip attributes |
2188 | $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; | 2186 | $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; |
2189 | $members =~ s/__aligned\s*\([^;]*\)//gos; | 2187 | $members =~ s/__aligned\s*\([^;]*\)//gos; |
diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h index 2bccd2c7b897..ea32a7d3cf1b 100644 --- a/tools/include/linux/kmemcheck.h +++ b/tools/include/linux/kmemcheck.h | |||
@@ -1,9 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_ | ||
3 | #define _LIBLOCKDEP_LINUX_KMEMCHECK_H_ | ||
4 | |||
5 | static inline void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
6 | { | ||
7 | } | ||
8 | |||
9 | #endif | ||
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 557d391f564a..ae11e4c3516a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -641,7 +641,6 @@ static const struct { | |||
641 | { "__GFP_ATOMIC", "_A" }, | 641 | { "__GFP_ATOMIC", "_A" }, |
642 | { "__GFP_IO", "I" }, | 642 | { "__GFP_IO", "I" }, |
643 | { "__GFP_FS", "F" }, | 643 | { "__GFP_FS", "F" }, |
644 | { "__GFP_COLD", "CO" }, | ||
645 | { "__GFP_NOWARN", "NWR" }, | 644 | { "__GFP_NOWARN", "NWR" }, |
646 | { "__GFP_RETRY_MAYFAIL", "R" }, | 645 | { "__GFP_RETRY_MAYFAIL", "R" }, |
647 | { "__GFP_NOFAIL", "NF" }, | 646 | { "__GFP_NOFAIL", "NF" }, |
@@ -655,7 +654,6 @@ static const struct { | |||
655 | { "__GFP_RECLAIMABLE", "RC" }, | 654 | { "__GFP_RECLAIMABLE", "RC" }, |
656 | { "__GFP_MOVABLE", "M" }, | 655 | { "__GFP_MOVABLE", "M" }, |
657 | { "__GFP_ACCOUNT", "AC" }, | 656 | { "__GFP_ACCOUNT", "AC" }, |
658 | { "__GFP_NOTRACK", "NT" }, | ||
659 | { "__GFP_WRITE", "WR" }, | 657 | { "__GFP_WRITE", "WR" }, |
660 | { "__GFP_RECLAIM", "R" }, | 658 | { "__GFP_RECLAIM", "R" }, |
661 | { "__GFP_DIRECT_RECLAIM", "DR" }, | 659 | { "__GFP_DIRECT_RECLAIM", "DR" }, |
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 06c71178d07d..59245b3d587c 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c | |||
@@ -618,7 +618,7 @@ static void multiorder_account(void) | |||
618 | __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); | 618 | __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); |
619 | __radix_tree_lookup(&tree, 1 << 5, &node, &slot); | 619 | __radix_tree_lookup(&tree, 1 << 5, &node, &slot); |
620 | assert(node->count == node->exceptional * 2); | 620 | assert(node->count == node->exceptional * 2); |
621 | __radix_tree_replace(&tree, node, slot, NULL, NULL, NULL); | 621 | __radix_tree_replace(&tree, node, slot, NULL, NULL); |
622 | assert(node->exceptional == 0); | 622 | assert(node->exceptional == 0); |
623 | 623 | ||
624 | item_kill_tree(&tree); | 624 | item_kill_tree(&tree); |
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index b0b7ef6d0de1..f82c2eaa859d 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c | |||
@@ -84,6 +84,7 @@ int output_lines = -1; | |||
84 | int sort_loss; | 84 | int sort_loss; |
85 | int extended_totals; | 85 | int extended_totals; |
86 | int show_bytes; | 86 | int show_bytes; |
87 | int unreclaim_only; | ||
87 | 88 | ||
88 | /* Debug options */ | 89 | /* Debug options */ |
89 | int sanity; | 90 | int sanity; |
@@ -133,6 +134,7 @@ static void usage(void) | |||
133 | "-L|--Loss Sort by loss\n" | 134 | "-L|--Loss Sort by loss\n" |
134 | "-X|--Xtotals Show extended summary information\n" | 135 | "-X|--Xtotals Show extended summary information\n" |
135 | "-B|--Bytes Show size in bytes\n" | 136 | "-B|--Bytes Show size in bytes\n" |
137 | "-U|--Unreclaim Show unreclaimable slabs only\n" | ||
136 | "\nValid debug options (FZPUT may be combined)\n" | 138 | "\nValid debug options (FZPUT may be combined)\n" |
137 | "a / A Switch on all debug options (=FZUP)\n" | 139 | "a / A Switch on all debug options (=FZUP)\n" |
138 | "- Switch off all debug options\n" | 140 | "- Switch off all debug options\n" |
@@ -569,6 +571,9 @@ static void slabcache(struct slabinfo *s) | |||
569 | if (strcmp(s->name, "*") == 0) | 571 | if (strcmp(s->name, "*") == 0) |
570 | return; | 572 | return; |
571 | 573 | ||
574 | if (unreclaim_only && s->reclaim_account) | ||
575 | return; | ||
576 | |||
572 | if (actual_slabs == 1) { | 577 | if (actual_slabs == 1) { |
573 | report(s); | 578 | report(s); |
574 | return; | 579 | return; |
@@ -1347,6 +1352,7 @@ struct option opts[] = { | |||
1347 | { "Loss", no_argument, NULL, 'L'}, | 1352 | { "Loss", no_argument, NULL, 'L'}, |
1348 | { "Xtotals", no_argument, NULL, 'X'}, | 1353 | { "Xtotals", no_argument, NULL, 'X'}, |
1349 | { "Bytes", no_argument, NULL, 'B'}, | 1354 | { "Bytes", no_argument, NULL, 'B'}, |
1355 | { "Unreclaim", no_argument, NULL, 'U'}, | ||
1350 | { NULL, 0, NULL, 0 } | 1356 | { NULL, 0, NULL, 0 } |
1351 | }; | 1357 | }; |
1352 | 1358 | ||
@@ -1358,7 +1364,7 @@ int main(int argc, char *argv[]) | |||
1358 | 1364 | ||
1359 | page_size = getpagesize(); | 1365 | page_size = getpagesize(); |
1360 | 1366 | ||
1361 | while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTSN:LXB", | 1367 | while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTSN:LXBU", |
1362 | opts, NULL)) != -1) | 1368 | opts, NULL)) != -1) |
1363 | switch (c) { | 1369 | switch (c) { |
1364 | case '1': | 1370 | case '1': |
@@ -1439,6 +1445,9 @@ int main(int argc, char *argv[]) | |||
1439 | case 'B': | 1445 | case 'B': |
1440 | show_bytes = 1; | 1446 | show_bytes = 1; |
1441 | break; | 1447 | break; |
1448 | case 'U': | ||
1449 | unreclaim_only = 1; | ||
1450 | break; | ||
1442 | default: | 1451 | default: |
1443 | fatal("%s: Invalid option '%c'\n", argv[0], optopt); | 1452 | fatal("%s: Invalid option '%c'\n", argv[0], optopt); |
1444 | 1453 | ||