summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLevin, Alexander (Sasha Levin) <alexander.levin@verizon.com>2017-11-15 20:36:02 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 21:21:05 -0500
commit4675ff05de2d76d167336b368bd07f3fef6ed5a6 (patch)
tree212d8adf40e13c2a27ac7834d14ca4900923b98c
parentd8be75663cec0069b85f80191abd2682ce4a512f (diff)
kmemcheck: rip it out
Fix up makefiles, remove references, and git rm kmemcheck. Link: http://lkml.kernel.org/r/20171007030159.22241-4-alexander.levin@verizon.com Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Vegard Nossum <vegardno@ifi.uio.no> Cc: Pekka Enberg <penberg@kernel.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Alexander Potapenko <glider@google.com> Cc: Tim Hansen <devtimhansen@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/dev-tools/index.rst1
-rw-r--r--Documentation/dev-tools/kmemcheck.rst733
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/include/asm/kmemcheck.h42
-rw-r--r--arch/x86/include/asm/string_32.h9
-rw-r--r--arch/x86/include/asm/string_64.h8
-rw-r--r--arch/x86/kernel/cpu/intel.c15
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/kmemcheck/Makefile1
-rw-r--r--arch/x86/mm/kmemcheck/error.c227
-rw-r--r--arch/x86/mm/kmemcheck/error.h15
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c658
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c106
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h9
-rw-r--r--arch/x86/mm/kmemcheck/pte.c22
-rw-r--r--arch/x86/mm/kmemcheck/pte.h10
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c70
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h6
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c173
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h18
-rw-r--r--include/linux/interrupt.h15
-rw-r--r--include/linux/kmemcheck.h171
-rw-r--r--kernel/softirq.c10
-rw-r--r--kernel/sysctl.c10
-rw-r--r--lib/Kconfig.debug6
-rw-r--r--lib/Kconfig.kmemcheck94
-rw-r--r--mm/Kconfig.debug1
-rw-r--r--mm/Makefile2
-rw-r--r--mm/kmemcheck.c125
-rw-r--r--mm/slub.c5
-rwxr-xr-xscripts/kernel-doc2
-rw-r--r--tools/include/linux/kmemcheck.h8
35 files changed, 7 insertions, 2592 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b74e13312fdc..00bb04972612 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1864,13 +1864,6 @@
1864 Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, 1864 Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
1865 the default is off. 1865 the default is off.
1866 1866
1867 kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode
1868 Valid arguments: 0, 1, 2
1869 kmemcheck=0 (disabled)
1870 kmemcheck=1 (enabled)
1871 kmemcheck=2 (one-shot mode)
1872 Default: 2 (one-shot mode)
1873
1874 kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. 1867 kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
1875 Default is 0 (don't ignore, but inject #GP) 1868 Default is 0 (don't ignore, but inject #GP)
1876 1869
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index a81787cd47d7..e313925fb0fa 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -21,7 +21,6 @@ whole; patches welcome!
21 kasan 21 kasan
22 ubsan 22 ubsan
23 kmemleak 23 kmemleak
24 kmemcheck
25 gdb-kernel-debugging 24 gdb-kernel-debugging
26 kgdb 25 kgdb
27 kselftest 26 kselftest
diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst
deleted file mode 100644
index 7f3d1985de74..000000000000
--- a/Documentation/dev-tools/kmemcheck.rst
+++ /dev/null
@@ -1,733 +0,0 @@
1Getting started with kmemcheck
2==============================
3
4Vegard Nossum <vegardno@ifi.uio.no>
5
6
7Introduction
8------------
9
10kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
11is a dynamic checker that detects and warns about some uses of uninitialized
12memory.
13
14Userspace programmers might be familiar with Valgrind's memcheck. The main
15difference between memcheck and kmemcheck is that memcheck works for userspace
16programs only, and kmemcheck works for the kernel only. The implementations
17are of course vastly different. Because of this, kmemcheck is not as accurate
18as memcheck, but it turns out to be good enough in practice to discover real
19programmer errors that the compiler is not able to find through static
20analysis.
21
22Enabling kmemcheck on a kernel will probably slow it down to the extent that
23the machine will not be usable for normal workloads such as e.g. an
24interactive desktop. kmemcheck will also cause the kernel to use about twice
25as much memory as normal. For this reason, kmemcheck is strictly a debugging
26feature.
27
28
29Downloading
30-----------
31
32As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
33
34
35Configuring and compiling
36-------------------------
37
38kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
39configuration variables must have specific settings in order for the kmemcheck
40menu to even appear in "menuconfig". These are:
41
42- ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n``
43 This option is located under "General setup" / "Optimize for size".
44
45 Without this, gcc will use certain optimizations that usually lead to
46 false positive warnings from kmemcheck. An example of this is a 16-bit
47 field in a struct, where gcc may load 32 bits, then discard the upper
48 16 bits. kmemcheck sees only the 32-bit load, and may trigger a
49 warning for the upper 16 bits (if they're uninitialized).
50
51- ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y``
52 This option is located under "General setup" / "Choose SLAB
53 allocator".
54
55- ``CONFIG_FUNCTION_TRACER=n``
56 This option is located under "Kernel hacking" / "Tracers" / "Kernel
57 Function Tracer"
58
59 When function tracing is compiled in, gcc emits a call to another
60 function at the beginning of every function. This means that when the
61 page fault handler is called, the ftrace framework will be called
62 before kmemcheck has had a chance to handle the fault. If ftrace then
63 modifies memory that was tracked by kmemcheck, the result is an
64 endless recursive page fault.
65
66- ``CONFIG_DEBUG_PAGEALLOC=n``
67 This option is located under "Kernel hacking" / "Memory Debugging"
68 / "Debug page memory allocations".
69
70In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also
71located under "Kernel hacking". With this, you will be able to get line number
72information from the kmemcheck warnings, which is extremely valuable in
73debugging a problem. This option is not mandatory, however, because it slows
74down the compilation process and produces a much bigger kernel image.
75
76Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory
77Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows
78a description of the kmemcheck configuration variables:
79
80- ``CONFIG_KMEMCHECK``
81 This must be enabled in order to use kmemcheck at all...
82
83- ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT``
84 This option controls the status of kmemcheck at boot-time. "Enabled"
85 will enable kmemcheck right from the start, "disabled" will boot the
86 kernel as normal (but with the kmemcheck code compiled in, so it can
87 be enabled at run-time after the kernel has booted), and "one-shot" is
88 a special mode which will turn kmemcheck off automatically after
89 detecting the first use of uninitialized memory.
90
91 If you are using kmemcheck to actively debug a problem, then you
92 probably want to choose "enabled" here.
93
94 The one-shot mode is mostly useful in automated test setups because it
95 can prevent floods of warnings and increase the chances of the machine
96 surviving in case something is really wrong. In other cases, the one-
97 shot mode could actually be counter-productive because it would turn
98 itself off at the very first error -- in the case of a false positive
99 too -- and this would come in the way of debugging the specific
100 problem you were interested in.
101
102 If you would like to use your kernel as normal, but with a chance to
103 enable kmemcheck in case of some problem, it might be a good idea to
104 choose "disabled" here. When kmemcheck is disabled, most of the run-
105 time overhead is not incurred, and the kernel will be almost as fast
106 as normal.
107
108- ``CONFIG_KMEMCHECK_QUEUE_SIZE``
109 Select the maximum number of error reports to store in an internal
110 (fixed-size) buffer. Since errors can occur virtually anywhere and in
111 any context, we need a temporary storage area which is guaranteed not
112 to generate any other page faults when accessed. The queue will be
113 emptied as soon as a tasklet may be scheduled. If the queue is full,
114 new error reports will be lost.
115
116 The default value of 64 is probably fine. If some code produces more
117 than 64 errors within an irqs-off section, then the code is likely to
118 produce many, many more, too, and these additional reports seldom give
119 any more information (the first report is usually the most valuable
120 anyway).
121
122 This number might have to be adjusted if you are not using serial
123 console or similar to capture the kernel log. If you are using the
124 "dmesg" command to save the log, then getting a lot of kmemcheck
125 warnings might overflow the kernel log itself, and the earlier reports
126 will get lost in that way instead. Try setting this to 10 or so on
127 such a setup.
128
129- ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT``
130 Select the number of shadow bytes to save along with each entry of the
131 error-report queue. These bytes indicate what parts of an allocation
132 are initialized, uninitialized, etc. and will be displayed when an
133 error is detected to help the debugging of a particular problem.
134
135 The number entered here is actually the logarithm of the number of
136 bytes that will be saved. So if you pick for example 5 here, kmemcheck
137 will save 2^5 = 32 bytes.
138
139 The default value should be fine for debugging most problems. It also
140 fits nicely within 80 columns.
141
142- ``CONFIG_KMEMCHECK_PARTIAL_OK``
143 This option (when enabled) works around certain GCC optimizations that
144 produce 32-bit reads from 16-bit variables where the upper 16 bits are
145 thrown away afterwards.
146
147 The default value (enabled) is recommended. This may of course hide
148 some real errors, but disabling it would probably produce a lot of
149 false positives.
150
151- ``CONFIG_KMEMCHECK_BITOPS_OK``
152 This option silences warnings that would be generated for bit-field
153 accesses where not all the bits are initialized at the same time. This
154 may also hide some real bugs.
155
156 This option is probably obsolete, or it should be replaced with
157 the kmemcheck-/bitfield-annotations for the code in question. The
158 default value is therefore fine.
159
160Now compile the kernel as usual.
161
162
163How to use
164----------
165
166Booting
167~~~~~~~
168
169First some information about the command-line options. There is only one
170option specific to kmemcheck, and this is called "kmemcheck". It can be used
171to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT``
172option. Its possible settings are:
173
174- ``kmemcheck=0`` (disabled)
175- ``kmemcheck=1`` (enabled)
176- ``kmemcheck=2`` (one-shot mode)
177
178If SLUB debugging has been enabled in the kernel, it may take precedence over
179kmemcheck in such a way that the slab caches which are under SLUB debugging
180will not be tracked by kmemcheck. In order to ensure that this doesn't happen
181(even though it shouldn't by default), use SLUB's boot option ``slub_debug``,
182like this: ``slub_debug=-``
183
184In fact, this option may also be used for fine-grained control over SLUB vs.
185kmemcheck. For example, if the command line includes
186``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only
187for the "dentry" slab cache, and with kmemcheck tracking all the other
188caches. This is advanced usage, however, and is not generally recommended.
189
190
191Run-time enable/disable
192~~~~~~~~~~~~~~~~~~~~~~~
193
194When the kernel has booted, it is possible to enable or disable kmemcheck at
195run-time. WARNING: This feature is still experimental and may cause false
196positive warnings to appear. Therefore, try not to use this. If you find that
197it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
198will be happy to take bug reports.
199
200Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.::
201
202 $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
203
204The numbers are the same as for the ``kmemcheck=`` command-line option.
205
206
207Debugging
208~~~~~~~~~
209
210A typical report will look something like this::
211
212 WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
213 80000000000000000000000000000000000000000088ffff0000000000000000
214 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
215 ^
216
217 Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
218 RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
219 RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002
220 RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
221 RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
222 RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
223 R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
224 R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
225 FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
226 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
227 CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
228 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
229 DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
230 [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
231 [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
232 [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
233 [<ffffffff8100c7b5>] int_signal+0x12/0x17
234 [<ffffffffffffffff>] 0xffffffffffffffff
235
236The single most valuable information in this report is the RIP (or EIP on 32-
237bit) value. This will help us pinpoint exactly which instruction that caused
238the warning.
239
240If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do
241is give this address to the addr2line program, like this::
242
243 $ addr2line -e vmlinux -i ffffffff8104ede8
244 arch/x86/include/asm/string_64.h:12
245 include/asm-generic/siginfo.h:287
246 kernel/signal.c:380
247 kernel/signal.c:410
248
249The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:**
250This must be the vmlinux of the kernel that produced the warning in the
251first place! If not, the line number information will almost certainly be
252wrong.
253
254The "``-i``" tells addr2line to also print the line numbers of inlined
255functions. In this case, the flag was very important, because otherwise,
256it would only have printed the first line, which is just a call to
257``memcpy()``, which could be called from a thousand places in the kernel, and
258is therefore not very useful. These inlined functions would not show up in
259the stack trace above, simply because the kernel doesn't load the extra
260debugging information. This technique can of course be used with ordinary
261kernel oopses as well.
262
263In this case, it's the caller of ``memcpy()`` that is interesting, and it can be
264found in ``include/asm-generic/siginfo.h``, line 287::
265
266 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
267 282 {
268 283 if (from->si_code < 0)
269 284 memcpy(to, from, sizeof(*to));
270 285 else
271 286 /* _sigchld is currently the largest know union member */
272 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
273 288 }
274
275Since this was a read (kmemcheck usually warns about reads only, though it can
276warn about writes to unallocated or freed memory as well), it was probably the
277"from" argument which contained some uninitialized bytes. Following the chain
278of calls, we move upwards to see where "from" was allocated or initialized,
279``kernel/signal.c``, line 380::
280
281 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
282 360 {
283 ...
284 367 list_for_each_entry(q, &list->list, list) {
285 368 if (q->info.si_signo == sig) {
286 369 if (first)
287 370 goto still_pending;
288 371 first = q;
289 ...
290 377 if (first) {
291 378 still_pending:
292 379 list_del_init(&first->list);
293 380 copy_siginfo(info, &first->info);
294 381 __sigqueue_free(first);
295 ...
296 392 }
297 393 }
298
299Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The
300variable ``first`` was found on a list -- passed in as the second argument to
301``collect_signal()``. We continue our journey through the stack, to figure out
302where the item on "list" was allocated or initialized. We move to line 410::
303
304 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
305 396 siginfo_t *info)
306 397 {
307 ...
308 410 collect_signal(sig, pending, info);
309 ...
310 414 }
311
312Now we need to follow the ``pending`` pointer, since that is being passed on to
313``collect_signal()`` as ``list``. At this point, we've run out of lines from the
314"addr2line" output. Not to worry, we just paste the next addresses from the
315kmemcheck stack dump, i.e.::
316
317 [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
318 [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
319 [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
320 [<ffffffff8100c7b5>] int_signal+0x12/0x17
321
322 $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
323 ffffffff8100b87d ffffffff8100c7b5
324 kernel/signal.c:446
325 kernel/signal.c:1806
326 arch/x86/kernel/signal.c:805
327 arch/x86/kernel/signal.c:871
328 arch/x86/kernel/entry_64.S:694
329
330Remember that since these addresses were found on the stack and not as the
331RIP value, they actually point to the _next_ instruction (they are return
332addresses). This becomes obvious when we look at the code for line 446::
333
334 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
335 423 {
336 ...
337 431 signr = __dequeue_signal(&tsk->signal->shared_pending,
338 432 mask, info);
339 433 /*
340 434 * itimer signal ?
341 435 *
342 436 * itimers are process shared and we restart periodic
343 437 * itimers in the signal delivery path to prevent DoS
344 438 * attacks in the high resolution timer case. This is
345 439 * compliant with the old way of self restarting
346 440 * itimers, as the SIGALRM is a legacy signal and only
347 441 * queued once. Changing the restart behaviour to
348 442 * restart the timer in the signal dequeue path is
349 443 * reducing the timer noise on heavy loaded !highres
350 444 * systems too.
351 445 */
352 446 if (unlikely(signr == SIGALRM)) {
353 ...
354 489 }
355
356So instead of looking at 446, we should be looking at 431, which is the line
357that executes just before 446. Here we see that what we are looking for is
358``&tsk->signal->shared_pending``.
359
360Our next task is now to figure out which function that puts items on this
361``shared_pending`` list. A crude, but efficient tool, is ``git grep``::
362
363 $ git grep -n 'shared_pending' kernel/
364 ...
365 kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending;
366 kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending;
367 ...
368
369There were more results, but none of them were related to list operations,
370and these were the only assignments. We inspect the line numbers more closely
371and find that this is indeed where items are being added to the list::
372
373 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
374 817 int group)
375 818 {
376 ...
377 828 pending = group ? &t->signal->shared_pending : &t->pending;
378 ...
379 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
380 852 (is_si_special(info) ||
381 853 info->si_code >= 0)));
382 854 if (q) {
383 855 list_add_tail(&q->list, &pending->list);
384 ...
385 890 }
386
387and::
388
389 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
390 1310 {
391 ....
392 1339 pending = group ? &t->signal->shared_pending : &t->pending;
393 1340 list_add_tail(&q->list, &pending->list);
394 ....
395 1347 }
396
397In the first case, the list element we are looking for, ``q``, is being
398returned from the function ``__sigqueue_alloc()``, which looks like an
399allocation function. Let's take a look at it::
400
401 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
402 188 int override_rlimit)
403 189 {
404 190 struct sigqueue *q = NULL;
405 191 struct user_struct *user;
406 192
407 193 /*
408 194 * We won't get problems with the target's UID changing under us
409 195 * because changing it requires RCU be used, and if t != current, the
410 196 * caller must be holding the RCU readlock (by way of a spinlock) and
411 197 * we use RCU protection here
412 198 */
413 199 user = get_uid(__task_cred(t)->user);
414 200 atomic_inc(&user->sigpending);
415 201 if (override_rlimit ||
416 202 atomic_read(&user->sigpending) <=
417 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
418 204 q = kmem_cache_alloc(sigqueue_cachep, flags);
419 205 if (unlikely(q == NULL)) {
420 206 atomic_dec(&user->sigpending);
421 207 free_uid(user);
422 208 } else {
423 209 INIT_LIST_HEAD(&q->list);
424 210 q->flags = 0;
425 211 q->user = user;
426 212 }
427 213
428 214 return q;
429 215 }
430
431We see that this function initializes ``q->list``, ``q->flags``, and
432``q->user``. It seems that now is the time to look at the definition of
433``struct sigqueue``, e.g.::
434
435 14 struct sigqueue {
436 15 struct list_head list;
437 16 int flags;
438 17 siginfo_t info;
439 18 struct user_struct *user;
440 19 };
441
442And, you might remember, it was a ``memcpy()`` on ``&first->info`` that
443caused the warning, so this makes perfect sense. It also seems reasonable
444to assume that it is the caller of ``__sigqueue_alloc()`` that has the
445responsibility of filling out (initializing) this member.
446
447But just which fields of the struct were uninitialized? Let's look at
448kmemcheck's report again::
449
450 WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
451 80000000000000000000000000000000000000000088ffff0000000000000000
452 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
453 ^
454
455These first two lines are the memory dump of the memory object itself, and
456the shadow bytemap, respectively. The memory object itself is in this case
457``&first->info``. Just beware that the start of this dump is NOT the start
458of the object itself! The position of the caret (^) corresponds with the
459address of the read (ffff88003e4a2024).
460
461The shadow bytemap dump legend is as follows:
462
463- i: initialized
464- u: uninitialized
465- a: unallocated (memory has been allocated by the slab layer, but has not
466 yet been handed off to anybody)
467- f: freed (memory has been allocated by the slab layer, but has been freed
468 by the previous owner)
469
470In order to figure out where (relative to the start of the object) the
471uninitialized memory was located, we have to look at the disassembly. For
472that, we'll need the RIP address again::
473
474 RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
475
476 $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
477 ffffffff8104edc8: mov %r8,0x8(%r8)
478 ffffffff8104edcc: test %r10d,%r10d
479 ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168>
480 ffffffff8104edd5: mov %rax,%rdx
481 ffffffff8104edd8: mov $0xc,%ecx
482 ffffffff8104eddd: mov %r13,%rdi
483 ffffffff8104ede0: mov $0x30,%eax
484 ffffffff8104ede5: mov %rdx,%rsi
485 ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi)
486 ffffffff8104edea: test $0x2,%al
487 ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0>
488 ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi)
489 ffffffff8104edf0: test $0x1,%al
490 ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5>
491 ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi)
492 ffffffff8104edf5: mov %r8,%rdi
493 ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free>
494
495As expected, it's the "``rep movsl``" instruction from the ``memcpy()``
496that causes the warning. We know about ``REP MOVSL`` that it uses the register
497``RCX`` to count the number of remaining iterations. By taking a look at the
498register dump again (from the kmemcheck report), we can figure out how many
499bytes were left to copy::
500
501 RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
502
503By looking at the disassembly, we also see that ``%ecx`` is being loaded
504with the value ``$0xc`` just before (ffffffff8104edd8), so we are very
505lucky. Keep in mind that this is the number of iterations, not bytes. And
506since this is a "long" operation, we need to multiply by 4 to get the
507number of bytes. So this means that the uninitialized value was encountered
508at 4 * (0xc - 0x9) = 12 bytes from the start of the object.
509
510We can now try to figure out which field of the "``struct siginfo``" that
511was not initialized. This is the beginning of the struct::
512
513 40 typedef struct siginfo {
514 41 int si_signo;
515 42 int si_errno;
516 43 int si_code;
517 44
518 45 union {
519 ..
520 92 } _sifields;
521 93 } siginfo_t;
522
523On 64-bit, the int is 4 bytes long, so it must the union member that has
524not been initialized. We can verify this using gdb::
525
526 $ gdb vmlinux
527 ...
528 (gdb) p &((struct siginfo *) 0)->_sifields
529 $1 = (union {...} *) 0x10
530
531Actually, it seems that the union member is located at offset 0x10 -- which
532means that gcc has inserted 4 bytes of padding between the members ``si_code``
533and ``_sifields``. We can now get a fuller picture of the memory dump::
534
535 _----------------------------=> si_code
536 / _--------------------=> (padding)
537 | / _------------=> _sifields(._kill._pid)
538 | | / _----=> _sifields(._kill._uid)
539 | | | /
540 -------|-------|-------|-------|
541 80000000000000000000000000000000000000000088ffff0000000000000000
542 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
543
544This allows us to realize another important fact: ``si_code`` contains the
545value 0x80. Remember that x86 is little endian, so the first 4 bytes
546"80000000" are really the number 0x00000080. With a bit of research, we
547find that this is actually the constant ``SI_KERNEL`` defined in
548``include/asm-generic/siginfo.h``::
549
550 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
551
552This macro is used in exactly one place in the x86 kernel: In ``send_signal()``
553in ``kernel/signal.c``::
554
555 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
556 817 int group)
557 818 {
558 ...
559 828 pending = group ? &t->signal->shared_pending : &t->pending;
560 ...
561 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
562 852 (is_si_special(info) ||
563 853 info->si_code >= 0)));
564 854 if (q) {
565 855 list_add_tail(&q->list, &pending->list);
566 856 switch ((unsigned long) info) {
567 ...
568 865 case (unsigned long) SEND_SIG_PRIV:
569 866 q->info.si_signo = sig;
570 867 q->info.si_errno = 0;
571 868 q->info.si_code = SI_KERNEL;
572 869 q->info.si_pid = 0;
573 870 q->info.si_uid = 0;
574 871 break;
575 ...
576 890 }
577
578Not only does this match with the ``.si_code`` member, it also matches the place
579we found earlier when looking for where siginfo_t objects are enqueued on the
580``shared_pending`` list.
581
582So to sum up: It seems that it is the padding introduced by the compiler
583between two struct fields that is uninitialized, and this gets reported when
584we do a ``memcpy()`` on the struct. This means that we have identified a false
585positive warning.
586
587Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls
588when both the source and destination addresses are tracked. (Instead, we copy
589the shadow bytemap as well). In this case, the destination address clearly
590was not tracked. We can dig a little deeper into the stack trace from above::
591
592 arch/x86/kernel/signal.c:805
593 arch/x86/kernel/signal.c:871
594 arch/x86/kernel/entry_64.S:694
595
596And we clearly see that the destination siginfo object is located on the
597stack::
598
599 782 static void do_signal(struct pt_regs *regs)
600 783 {
601 784 struct k_sigaction ka;
602 785 siginfo_t info;
603 ...
604 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
605 ...
606 854 }
607
608And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the
609destination argument.
610
611Now, even though we didn't find an actual error here, the example is still a
612good one, because it shows how one would go about to find out what the report
613was all about.
614
615
616Annotating false positives
617~~~~~~~~~~~~~~~~~~~~~~~~~~
618
619There are a few different ways to make annotations in the source code that
620will keep kmemcheck from checking and reporting certain allocations. Here
621they are:
622
623- ``__GFP_NOTRACK_FALSE_POSITIVE``
624 This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()``
625 (therefore also to other functions that end up calling one of
626 these) to indicate that the allocation should not be tracked
627 because it would lead to a false positive report. This is a "big
628 hammer" way of silencing kmemcheck; after all, even if the false
629 positive pertains to particular field in a struct, for example, we
630 will now lose the ability to find (real) errors in other parts of
631 the same struct.
632
633 Example::
634
635 /* No warnings will ever trigger on accessing any part of x */
636 x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
637
638- ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and
639 ``kmemcheck_annotate_bitfield(ptr, name)``
640 The first two of these three macros can be used inside struct
641 definitions to signal, respectively, the beginning and end of a
642 bitfield. Additionally, this will assign the bitfield a name, which
643 is given as an argument to the macros.
644
645 Having used these markers, one can later use
646 kmemcheck_annotate_bitfield() at the point of allocation, to indicate
647 which parts of the allocation is part of a bitfield.
648
649 Example::
650
651 struct foo {
652 int x;
653
654 kmemcheck_bitfield_begin(flags);
655 int flag_a:1;
656 int flag_b:1;
657 kmemcheck_bitfield_end(flags);
658
659 int y;
660 };
661
662 struct foo *x = kmalloc(sizeof *x);
663
664 /* No warnings will trigger on accessing the bitfield of x */
665 kmemcheck_annotate_bitfield(x, flags);
666
667 Note that ``kmemcheck_annotate_bitfield()`` can be used even before the
668 return value of ``kmalloc()`` is checked -- in other words, passing NULL
669 as the first argument is legal (and will do nothing).
670
671
672Reporting errors
673----------------
674
675As we have seen, kmemcheck will produce false positive reports. Therefore, it
676is not very wise to blindly post kmemcheck warnings to mailing lists and
677maintainers. Instead, I encourage maintainers and developers to find errors
678in their own code. If you get a warning, you can try to work around it, try
679to figure out if it's a real error or not, or simply ignore it. Most
680developers know their own code and will quickly and efficiently determine the
681root cause of a kmemcheck report. This is therefore also the most efficient
682way to work with kmemcheck.
683
684That said, we (the kmemcheck maintainers) will always be on the lookout for
685false positives that we can annotate and silence. So whatever you find,
686please drop us a note privately! Kernel configs and steps to reproduce (if
687available) are of course a great help too.
688
689Happy hacking!
690
691
692Technical description
693---------------------
694
695kmemcheck works by marking memory pages non-present. This means that whenever
696somebody attempts to access the page, a page fault is generated. The page
697fault handler notices that the page was in fact only hidden, and so it calls
698on the kmemcheck code to make further investigations.
699
700When the investigations are completed, kmemcheck "shows" the page by marking
701it present (as it would be under normal circumstances). This way, the
702interrupted code can continue as usual.
703
704But after the instruction has been executed, we should hide the page again, so
705that we can catch the next access too! Now kmemcheck makes use of a debugging
706feature of the processor, namely single-stepping. When the processor has
707finished the one instruction that generated the memory access, a debug
708exception is raised. From here, we simply hide the page again and continue
709execution, this time with the single-stepping feature turned off.
710
711kmemcheck requires some assistance from the memory allocator in order to work.
712The memory allocator needs to
713
714 1. Tell kmemcheck about newly allocated pages and pages that are about to
715 be freed. This allows kmemcheck to set up and tear down the shadow memory
716 for the pages in question. The shadow memory stores the status of each
717 byte in the allocation proper, e.g. whether it is initialized or
718 uninitialized.
719
720 2. Tell kmemcheck which parts of memory should be marked uninitialized.
721 There are actually a few more states, such as "not yet allocated" and
722 "recently freed".
723
724If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
725memory that can take page faults because of kmemcheck.
726
727If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
728request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
729This does not prevent the page faults from occurring, however, but marks the
730object in question as being initialized so that no warnings will ever be
731produced for this object.
732
733Currently, the SLAB and SLUB allocators are supported by kmemcheck.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7e9c887ad951..ac814d3dd1c1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7688,16 +7688,6 @@ F: include/linux/kdb.h
7688F: include/linux/kgdb.h 7688F: include/linux/kgdb.h
7689F: kernel/debug/ 7689F: kernel/debug/
7690 7690
7691KMEMCHECK
7692M: Vegard Nossum <vegardno@ifi.uio.no>
7693M: Pekka Enberg <penberg@kernel.org>
7694S: Maintained
7695F: Documentation/dev-tools/kmemcheck.rst
7696F: arch/x86/include/asm/kmemcheck.h
7697F: arch/x86/mm/kmemcheck/
7698F: include/linux/kmemcheck.h
7699F: mm/kmemcheck.c
7700
7701KMEMLEAK 7691KMEMLEAK
7702M: Catalin Marinas <catalin.marinas@arm.com> 7692M: Catalin Marinas <catalin.marinas@arm.com>
7703S: Maintained 7693S: Maintained
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f08977d82ca0..cb678192da4a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,7 +112,6 @@ config X86
112 select HAVE_ARCH_JUMP_LABEL 112 select HAVE_ARCH_JUMP_LABEL
113 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP 113 select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
114 select HAVE_ARCH_KGDB 114 select HAVE_ARCH_KGDB
115 select HAVE_ARCH_KMEMCHECK
116 select HAVE_ARCH_MMAP_RND_BITS if MMU 115 select HAVE_ARCH_MMAP_RND_BITS if MMU
117 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT 116 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
118 select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT 117 select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
@@ -1430,7 +1429,7 @@ config ARCH_DMA_ADDR_T_64BIT
1430 1429
1431config X86_DIRECT_GBPAGES 1430config X86_DIRECT_GBPAGES
1432 def_bool y 1431 def_bool y
1433 depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK 1432 depends on X86_64 && !DEBUG_PAGEALLOC
1434 ---help--- 1433 ---help---
1435 Certain kernel features effectively disable kernel 1434 Certain kernel features effectively disable kernel
1436 linear 1 GB mappings (even if the CPU otherwise 1435 linear 1 GB mappings (even if the CPU otherwise
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
index 945a0337fbcf..ea32a7d3cf1b 100644
--- a/arch/x86/include/asm/kmemcheck.h
+++ b/arch/x86/include/asm/kmemcheck.h
@@ -1,43 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ASM_X86_KMEMCHECK_H
3#define ASM_X86_KMEMCHECK_H
4
5#include <linux/types.h>
6#include <asm/ptrace.h>
7
8#ifdef CONFIG_KMEMCHECK
9bool kmemcheck_active(struct pt_regs *regs);
10
11void kmemcheck_show(struct pt_regs *regs);
12void kmemcheck_hide(struct pt_regs *regs);
13
14bool kmemcheck_fault(struct pt_regs *regs,
15 unsigned long address, unsigned long error_code);
16bool kmemcheck_trap(struct pt_regs *regs);
17#else
18static inline bool kmemcheck_active(struct pt_regs *regs)
19{
20 return false;
21}
22
23static inline void kmemcheck_show(struct pt_regs *regs)
24{
25}
26
27static inline void kmemcheck_hide(struct pt_regs *regs)
28{
29}
30
31static inline bool kmemcheck_fault(struct pt_regs *regs,
32 unsigned long address, unsigned long error_code)
33{
34 return false;
35}
36
37static inline bool kmemcheck_trap(struct pt_regs *regs)
38{
39 return false;
40}
41#endif /* CONFIG_KMEMCHECK */
42
43#endif
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 076502241eae..55d392c6bd29 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
179 * No 3D Now! 179 * No 3D Now!
180 */ 180 */
181 181
182#ifndef CONFIG_KMEMCHECK
183
184#if (__GNUC__ >= 4) 182#if (__GNUC__ >= 4)
185#define memcpy(t, f, n) __builtin_memcpy(t, f, n) 183#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
186#else 184#else
@@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
189 ? __constant_memcpy((t), (f), (n)) \ 187 ? __constant_memcpy((t), (f), (n)) \
190 : __memcpy((t), (f), (n))) 188 : __memcpy((t), (f), (n)))
191#endif 189#endif
192#else
193/*
194 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
195 * because it means that we know both memory operands in advance.
196 */
197#define memcpy(t, f, n) __memcpy((t), (f), (n))
198#endif
199 190
200#endif 191#endif
201#endif /* !CONFIG_FORTIFY_SOURCE */ 192#endif /* !CONFIG_FORTIFY_SOURCE */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 0b1b4445f4c5..533f74c300c2 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len);
33extern void *__memcpy(void *to, const void *from, size_t len); 33extern void *__memcpy(void *to, const void *from, size_t len);
34 34
35#ifndef CONFIG_FORTIFY_SOURCE 35#ifndef CONFIG_FORTIFY_SOURCE
36#ifndef CONFIG_KMEMCHECK
37#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 36#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
38#define memcpy(dst, src, len) \ 37#define memcpy(dst, src, len) \
39({ \ 38({ \
@@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len);
46 __ret; \ 45 __ret; \
47}) 46})
48#endif 47#endif
49#else
50/*
51 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
52 * because it means that we know both memory operands in advance.
53 */
54#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
55#endif
56#endif /* !CONFIG_FORTIFY_SOURCE */ 48#endif /* !CONFIG_FORTIFY_SOURCE */
57 49
58#define __HAVE_ARCH_MEMSET 50#define __HAVE_ARCH_MEMSET
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b720dacac051..b1af22073e28 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
187 if (c->x86 == 6 && c->x86_model < 15) 187 if (c->x86 == 6 && c->x86_model < 15)
188 clear_cpu_cap(c, X86_FEATURE_PAT); 188 clear_cpu_cap(c, X86_FEATURE_PAT);
189 189
190#ifdef CONFIG_KMEMCHECK
191 /*
192 * P4s have a "fast strings" feature which causes single-
193 * stepping REP instructions to only generate a #DB on
194 * cache-line boundaries.
195 *
196 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
197 * (model 2) with the same problem.
198 */
199 if (c->x86 == 15)
200 if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
201 MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
202 pr_info("kmemcheck: Disabling fast string operations\n");
203#endif
204
205 /* 190 /*
206 * If fast string is not enabled in IA32_MISC_ENABLE for any reason, 191 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
207 * clear the fast string and enhanced fast string CPU capabilities. 192 * clear the fast string and enhanced fast string CPU capabilities.
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 7ba7f3d7f477..8e13b8cc6bed 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
29 29
30obj-$(CONFIG_HIGHMEM) += highmem_32.o 30obj-$(CONFIG_HIGHMEM) += highmem_32.o
31 31
32obj-$(CONFIG_KMEMCHECK) += kmemcheck/
33
34KASAN_SANITIZE_kasan_init_$(BITS).o := n 32KASAN_SANITIZE_kasan_init_$(BITS).o := n
35obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o 33obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
36 34
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ef94620ceb8a..6fdf91ef130a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -163,12 +163,11 @@ static int page_size_mask;
163static void __init probe_page_size_mask(void) 163static void __init probe_page_size_mask(void)
164{ 164{
165 /* 165 /*
166 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will 166 * For pagealloc debugging, identity mapping will use small pages.
167 * use small pages.
168 * This will simplify cpa(), which otherwise needs to support splitting 167 * This will simplify cpa(), which otherwise needs to support splitting
169 * large pages into small in interrupt context, etc. 168 * large pages into small in interrupt context, etc.
170 */ 169 */
171 if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) 170 if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
172 page_size_mask |= 1 << PG_LEVEL_2M; 171 page_size_mask |= 1 << PG_LEVEL_2M;
173 else 172 else
174 direct_gbpages = 0; 173 direct_gbpages = 0;
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
deleted file mode 100644
index 520b3bce4095..000000000000
--- a/arch/x86/mm/kmemcheck/Makefile
+++ /dev/null
@@ -1 +0,0 @@
1obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 872ec4159a68..cec594032515 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -1,228 +1 @@
1// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
2#include <linux/interrupt.h>
3#include <linux/kdebug.h>
4#include <linux/kmemcheck.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/ptrace.h>
8#include <linux/stacktrace.h>
9#include <linux/string.h>
10
11#include "error.h"
12#include "shadow.h"
13
14enum kmemcheck_error_type {
15 KMEMCHECK_ERROR_INVALID_ACCESS,
16 KMEMCHECK_ERROR_BUG,
17};
18
19#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
20
21struct kmemcheck_error {
22 enum kmemcheck_error_type type;
23
24 union {
25 /* KMEMCHECK_ERROR_INVALID_ACCESS */
26 struct {
27 /* Kind of access that caused the error */
28 enum kmemcheck_shadow state;
29 /* Address and size of the erroneous read */
30 unsigned long address;
31 unsigned int size;
32 };
33 };
34
35 struct pt_regs regs;
36 struct stack_trace trace;
37 unsigned long trace_entries[32];
38
39 /* We compress it to a char. */
40 unsigned char shadow_copy[SHADOW_COPY_SIZE];
41 unsigned char memory_copy[SHADOW_COPY_SIZE];
42};
43
44/*
45 * Create a ring queue of errors to output. We can't call printk() directly
46 * from the kmemcheck traps, since this may call the console drivers and
47 * result in a recursive fault.
48 */
49static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
50static unsigned int error_count;
51static unsigned int error_rd;
52static unsigned int error_wr;
53static unsigned int error_missed_count;
54
55static struct kmemcheck_error *error_next_wr(void)
56{
57 struct kmemcheck_error *e;
58
59 if (error_count == ARRAY_SIZE(error_fifo)) {
60 ++error_missed_count;
61 return NULL;
62 }
63
64 e = &error_fifo[error_wr];
65 if (++error_wr == ARRAY_SIZE(error_fifo))
66 error_wr = 0;
67 ++error_count;
68 return e;
69}
70
71static struct kmemcheck_error *error_next_rd(void)
72{
73 struct kmemcheck_error *e;
74
75 if (error_count == 0)
76 return NULL;
77
78 e = &error_fifo[error_rd];
79 if (++error_rd == ARRAY_SIZE(error_fifo))
80 error_rd = 0;
81 --error_count;
82 return e;
83}
84
85void kmemcheck_error_recall(void)
86{
87 static const char *desc[] = {
88 [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated",
89 [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized",
90 [KMEMCHECK_SHADOW_INITIALIZED] = "initialized",
91 [KMEMCHECK_SHADOW_FREED] = "freed",
92 };
93
94 static const char short_desc[] = {
95 [KMEMCHECK_SHADOW_UNALLOCATED] = 'a',
96 [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u',
97 [KMEMCHECK_SHADOW_INITIALIZED] = 'i',
98 [KMEMCHECK_SHADOW_FREED] = 'f',
99 };
100
101 struct kmemcheck_error *e;
102 unsigned int i;
103
104 e = error_next_rd();
105 if (!e)
106 return;
107
108 switch (e->type) {
109 case KMEMCHECK_ERROR_INVALID_ACCESS:
110 printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n",
111 8 * e->size, e->state < ARRAY_SIZE(desc) ?
112 desc[e->state] : "(invalid shadow state)",
113 (void *) e->address);
114
115 printk(KERN_WARNING);
116 for (i = 0; i < SHADOW_COPY_SIZE; ++i)
117 printk(KERN_CONT "%02x", e->memory_copy[i]);
118 printk(KERN_CONT "\n");
119
120 printk(KERN_WARNING);
121 for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
122 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
123 printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]);
124 else
125 printk(KERN_CONT " ?");
126 }
127 printk(KERN_CONT "\n");
128 printk(KERN_WARNING "%*c\n", 2 + 2
129 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
130 break;
131 case KMEMCHECK_ERROR_BUG:
132 printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
133 break;
134 }
135
136 __show_regs(&e->regs, 1);
137 print_stack_trace(&e->trace, 0);
138}
139
140static void do_wakeup(unsigned long data)
141{
142 while (error_count > 0)
143 kmemcheck_error_recall();
144
145 if (error_missed_count > 0) {
146 printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
147 "the queue was too small\n", error_missed_count);
148 error_missed_count = 0;
149 }
150}
151
152static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
153
154/*
155 * Save the context of an error report.
156 */
157void kmemcheck_error_save(enum kmemcheck_shadow state,
158 unsigned long address, unsigned int size, struct pt_regs *regs)
159{
160 static unsigned long prev_ip;
161
162 struct kmemcheck_error *e;
163 void *shadow_copy;
164 void *memory_copy;
165
166 /* Don't report several adjacent errors from the same EIP. */
167 if (regs->ip == prev_ip)
168 return;
169 prev_ip = regs->ip;
170
171 e = error_next_wr();
172 if (!e)
173 return;
174
175 e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
176
177 e->state = state;
178 e->address = address;
179 e->size = size;
180
181 /* Save regs */
182 memcpy(&e->regs, regs, sizeof(*regs));
183
184 /* Save stack trace */
185 e->trace.nr_entries = 0;
186 e->trace.entries = e->trace_entries;
187 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
188 e->trace.skip = 0;
189 save_stack_trace_regs(regs, &e->trace);
190
191 /* Round address down to nearest 16 bytes */
192 shadow_copy = kmemcheck_shadow_lookup(address
193 & ~(SHADOW_COPY_SIZE - 1));
194 BUG_ON(!shadow_copy);
195
196 memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
197
198 kmemcheck_show_addr(address);
199 memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
200 memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
201 kmemcheck_hide_addr(address);
202
203 tasklet_hi_schedule_first(&kmemcheck_tasklet);
204}
205
206/*
207 * Save the context of a kmemcheck bug.
208 */
209void kmemcheck_error_save_bug(struct pt_regs *regs)
210{
211 struct kmemcheck_error *e;
212
213 e = error_next_wr();
214 if (!e)
215 return;
216
217 e->type = KMEMCHECK_ERROR_BUG;
218
219 memcpy(&e->regs, regs, sizeof(*regs));
220
221 e->trace.nr_entries = 0;
222 e->trace.entries = e->trace_entries;
223 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
224 e->trace.skip = 1;
225 save_stack_trace(&e->trace);
226
227 tasklet_hi_schedule_first(&kmemcheck_tasklet);
228}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
index 39f80d7a874d..ea32a7d3cf1b 100644
--- a/arch/x86/mm/kmemcheck/error.h
+++ b/arch/x86/mm/kmemcheck/error.h
@@ -1,16 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
3#define ARCH__X86__MM__KMEMCHECK__ERROR_H
4
5#include <linux/ptrace.h>
6
7#include "shadow.h"
8
9void kmemcheck_error_save(enum kmemcheck_shadow state,
10 unsigned long address, unsigned int size, struct pt_regs *regs);
11
12void kmemcheck_error_save_bug(struct pt_regs *regs);
13
14void kmemcheck_error_recall(void);
15
16#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
deleted file mode 100644
index 4515bae36bbe..000000000000
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ /dev/null
@@ -1,658 +0,0 @@
1/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/page-flags.h>
18#include <linux/percpu.h>
19#include <linux/ptrace.h>
20#include <linux/string.h>
21#include <linux/types.h>
22
23#include <asm/cacheflush.h>
24#include <asm/kmemcheck.h>
25#include <asm/pgtable.h>
26#include <asm/tlbflush.h>
27
28#include "error.h"
29#include "opcode.h"
30#include "pte.h"
31#include "selftest.h"
32#include "shadow.h"
33
34
35#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
36# define KMEMCHECK_ENABLED 0
37#endif
38
39#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
40# define KMEMCHECK_ENABLED 1
41#endif
42
43#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
44# define KMEMCHECK_ENABLED 2
45#endif
46
47int kmemcheck_enabled = KMEMCHECK_ENABLED;
48
49int __init kmemcheck_init(void)
50{
51#ifdef CONFIG_SMP
52 /*
53 * Limit SMP to use a single CPU. We rely on the fact that this code
54 * runs before SMP is set up.
55 */
56 if (setup_max_cpus > 1) {
57 printk(KERN_INFO
58 "kmemcheck: Limiting number of CPUs to 1.\n");
59 setup_max_cpus = 1;
60 }
61#endif
62
63 if (!kmemcheck_selftest()) {
64 printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
65 kmemcheck_enabled = 0;
66 return -EINVAL;
67 }
68
69 printk(KERN_INFO "kmemcheck: Initialized\n");
70 return 0;
71}
72
73early_initcall(kmemcheck_init);
74
75/*
76 * We need to parse the kmemcheck= option before any memory is allocated.
77 */
78static int __init param_kmemcheck(char *str)
79{
80 int val;
81 int ret;
82
83 if (!str)
84 return -EINVAL;
85
86 ret = kstrtoint(str, 0, &val);
87 if (ret)
88 return ret;
89 kmemcheck_enabled = val;
90 return 0;
91}
92
93early_param("kmemcheck", param_kmemcheck);
94
95int kmemcheck_show_addr(unsigned long address)
96{
97 pte_t *pte;
98
99 pte = kmemcheck_pte_lookup(address);
100 if (!pte)
101 return 0;
102
103 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
104 __flush_tlb_one(address);
105 return 1;
106}
107
108int kmemcheck_hide_addr(unsigned long address)
109{
110 pte_t *pte;
111
112 pte = kmemcheck_pte_lookup(address);
113 if (!pte)
114 return 0;
115
116 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
117 __flush_tlb_one(address);
118 return 1;
119}
120
121struct kmemcheck_context {
122 bool busy;
123 int balance;
124
125 /*
126 * There can be at most two memory operands to an instruction, but
127 * each address can cross a page boundary -- so we may need up to
128 * four addresses that must be hidden/revealed for each fault.
129 */
130 unsigned long addr[4];
131 unsigned long n_addrs;
132 unsigned long flags;
133
134 /* Data size of the instruction that caused a fault. */
135 unsigned int size;
136};
137
138static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
139
140bool kmemcheck_active(struct pt_regs *regs)
141{
142 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
143
144 return data->balance > 0;
145}
146
147/* Save an address that needs to be shown/hidden */
148static void kmemcheck_save_addr(unsigned long addr)
149{
150 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
151
152 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
153 data->addr[data->n_addrs++] = addr;
154}
155
156static unsigned int kmemcheck_show_all(void)
157{
158 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
159 unsigned int i;
160 unsigned int n;
161
162 n = 0;
163 for (i = 0; i < data->n_addrs; ++i)
164 n += kmemcheck_show_addr(data->addr[i]);
165
166 return n;
167}
168
169static unsigned int kmemcheck_hide_all(void)
170{
171 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
172 unsigned int i;
173 unsigned int n;
174
175 n = 0;
176 for (i = 0; i < data->n_addrs; ++i)
177 n += kmemcheck_hide_addr(data->addr[i]);
178
179 return n;
180}
181
182/*
183 * Called from the #PF handler.
184 */
185void kmemcheck_show(struct pt_regs *regs)
186{
187 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
188
189 BUG_ON(!irqs_disabled());
190
191 if (unlikely(data->balance != 0)) {
192 kmemcheck_show_all();
193 kmemcheck_error_save_bug(regs);
194 data->balance = 0;
195 return;
196 }
197
198 /*
199 * None of the addresses actually belonged to kmemcheck. Note that
200 * this is not an error.
201 */
202 if (kmemcheck_show_all() == 0)
203 return;
204
205 ++data->balance;
206
207 /*
208 * The IF needs to be cleared as well, so that the faulting
209 * instruction can run "uninterrupted". Otherwise, we might take
210 * an interrupt and start executing that before we've had a chance
211 * to hide the page again.
212 *
213 * NOTE: In the rare case of multiple faults, we must not override
214 * the original flags:
215 */
216 if (!(regs->flags & X86_EFLAGS_TF))
217 data->flags = regs->flags;
218
219 regs->flags |= X86_EFLAGS_TF;
220 regs->flags &= ~X86_EFLAGS_IF;
221}
222
223/*
224 * Called from the #DB handler.
225 */
226void kmemcheck_hide(struct pt_regs *regs)
227{
228 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
229 int n;
230
231 BUG_ON(!irqs_disabled());
232
233 if (unlikely(data->balance != 1)) {
234 kmemcheck_show_all();
235 kmemcheck_error_save_bug(regs);
236 data->n_addrs = 0;
237 data->balance = 0;
238
239 if (!(data->flags & X86_EFLAGS_TF))
240 regs->flags &= ~X86_EFLAGS_TF;
241 if (data->flags & X86_EFLAGS_IF)
242 regs->flags |= X86_EFLAGS_IF;
243 return;
244 }
245
246 if (kmemcheck_enabled)
247 n = kmemcheck_hide_all();
248 else
249 n = kmemcheck_show_all();
250
251 if (n == 0)
252 return;
253
254 --data->balance;
255
256 data->n_addrs = 0;
257
258 if (!(data->flags & X86_EFLAGS_TF))
259 regs->flags &= ~X86_EFLAGS_TF;
260 if (data->flags & X86_EFLAGS_IF)
261 regs->flags |= X86_EFLAGS_IF;
262}
263
264void kmemcheck_show_pages(struct page *p, unsigned int n)
265{
266 unsigned int i;
267
268 for (i = 0; i < n; ++i) {
269 unsigned long address;
270 pte_t *pte;
271 unsigned int level;
272
273 address = (unsigned long) page_address(&p[i]);
274 pte = lookup_address(address, &level);
275 BUG_ON(!pte);
276 BUG_ON(level != PG_LEVEL_4K);
277
278 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
279 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
280 __flush_tlb_one(address);
281 }
282}
283
284bool kmemcheck_page_is_tracked(struct page *p)
285{
286 /* This will also check the "hidden" flag of the PTE. */
287 return kmemcheck_pte_lookup((unsigned long) page_address(p));
288}
289
290void kmemcheck_hide_pages(struct page *p, unsigned int n)
291{
292 unsigned int i;
293
294 for (i = 0; i < n; ++i) {
295 unsigned long address;
296 pte_t *pte;
297 unsigned int level;
298
299 address = (unsigned long) page_address(&p[i]);
300 pte = lookup_address(address, &level);
301 BUG_ON(!pte);
302 BUG_ON(level != PG_LEVEL_4K);
303
304 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
305 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
306 __flush_tlb_one(address);
307 }
308}
309
310/* Access may NOT cross page boundary */
311static void kmemcheck_read_strict(struct pt_regs *regs,
312 unsigned long addr, unsigned int size)
313{
314 void *shadow;
315 enum kmemcheck_shadow status;
316
317 shadow = kmemcheck_shadow_lookup(addr);
318 if (!shadow)
319 return;
320
321 kmemcheck_save_addr(addr);
322 status = kmemcheck_shadow_test(shadow, size);
323 if (status == KMEMCHECK_SHADOW_INITIALIZED)
324 return;
325
326 if (kmemcheck_enabled)
327 kmemcheck_error_save(status, addr, size, regs);
328
329 if (kmemcheck_enabled == 2)
330 kmemcheck_enabled = 0;
331
332 /* Don't warn about it again. */
333 kmemcheck_shadow_set(shadow, size);
334}
335
336bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337{
338 enum kmemcheck_shadow status;
339 void *shadow;
340
341 shadow = kmemcheck_shadow_lookup(addr);
342 if (!shadow)
343 return true;
344
345 status = kmemcheck_shadow_test_all(shadow, size);
346
347 return status == KMEMCHECK_SHADOW_INITIALIZED;
348}
349
350/* Access may cross page boundary */
351static void kmemcheck_read(struct pt_regs *regs,
352 unsigned long addr, unsigned int size)
353{
354 unsigned long page = addr & PAGE_MASK;
355 unsigned long next_addr = addr + size - 1;
356 unsigned long next_page = next_addr & PAGE_MASK;
357
358 if (likely(page == next_page)) {
359 kmemcheck_read_strict(regs, addr, size);
360 return;
361 }
362
363 /*
364 * What we do is basically to split the access across the
365 * two pages and handle each part separately. Yes, this means
366 * that we may now see reads that are 3 + 5 bytes, for
367 * example (and if both are uninitialized, there will be two
368 * reports), but it makes the code a lot simpler.
369 */
370 kmemcheck_read_strict(regs, addr, next_page - addr);
371 kmemcheck_read_strict(regs, next_page, next_addr - next_page);
372}
373
374static void kmemcheck_write_strict(struct pt_regs *regs,
375 unsigned long addr, unsigned int size)
376{
377 void *shadow;
378
379 shadow = kmemcheck_shadow_lookup(addr);
380 if (!shadow)
381 return;
382
383 kmemcheck_save_addr(addr);
384 kmemcheck_shadow_set(shadow, size);
385}
386
387static void kmemcheck_write(struct pt_regs *regs,
388 unsigned long addr, unsigned int size)
389{
390 unsigned long page = addr & PAGE_MASK;
391 unsigned long next_addr = addr + size - 1;
392 unsigned long next_page = next_addr & PAGE_MASK;
393
394 if (likely(page == next_page)) {
395 kmemcheck_write_strict(regs, addr, size);
396 return;
397 }
398
399 /* See comment in kmemcheck_read(). */
400 kmemcheck_write_strict(regs, addr, next_page - addr);
401 kmemcheck_write_strict(regs, next_page, next_addr - next_page);
402}
403
404/*
405 * Copying is hard. We have two addresses, each of which may be split across
406 * a page (and each page will have different shadow addresses).
407 */
408static void kmemcheck_copy(struct pt_regs *regs,
409 unsigned long src_addr, unsigned long dst_addr, unsigned int size)
410{
411 uint8_t shadow[8];
412 enum kmemcheck_shadow status;
413
414 unsigned long page;
415 unsigned long next_addr;
416 unsigned long next_page;
417
418 uint8_t *x;
419 unsigned int i;
420 unsigned int n;
421
422 BUG_ON(size > sizeof(shadow));
423
424 page = src_addr & PAGE_MASK;
425 next_addr = src_addr + size - 1;
426 next_page = next_addr & PAGE_MASK;
427
428 if (likely(page == next_page)) {
429 /* Same page */
430 x = kmemcheck_shadow_lookup(src_addr);
431 if (x) {
432 kmemcheck_save_addr(src_addr);
433 for (i = 0; i < size; ++i)
434 shadow[i] = x[i];
435 } else {
436 for (i = 0; i < size; ++i)
437 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
438 }
439 } else {
440 n = next_page - src_addr;
441 BUG_ON(n > sizeof(shadow));
442
443 /* First page */
444 x = kmemcheck_shadow_lookup(src_addr);
445 if (x) {
446 kmemcheck_save_addr(src_addr);
447 for (i = 0; i < n; ++i)
448 shadow[i] = x[i];
449 } else {
450 /* Not tracked */
451 for (i = 0; i < n; ++i)
452 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
453 }
454
455 /* Second page */
456 x = kmemcheck_shadow_lookup(next_page);
457 if (x) {
458 kmemcheck_save_addr(next_page);
459 for (i = n; i < size; ++i)
460 shadow[i] = x[i - n];
461 } else {
462 /* Not tracked */
463 for (i = n; i < size; ++i)
464 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
465 }
466 }
467
468 page = dst_addr & PAGE_MASK;
469 next_addr = dst_addr + size - 1;
470 next_page = next_addr & PAGE_MASK;
471
472 if (likely(page == next_page)) {
473 /* Same page */
474 x = kmemcheck_shadow_lookup(dst_addr);
475 if (x) {
476 kmemcheck_save_addr(dst_addr);
477 for (i = 0; i < size; ++i) {
478 x[i] = shadow[i];
479 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
480 }
481 }
482 } else {
483 n = next_page - dst_addr;
484 BUG_ON(n > sizeof(shadow));
485
486 /* First page */
487 x = kmemcheck_shadow_lookup(dst_addr);
488 if (x) {
489 kmemcheck_save_addr(dst_addr);
490 for (i = 0; i < n; ++i) {
491 x[i] = shadow[i];
492 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
493 }
494 }
495
496 /* Second page */
497 x = kmemcheck_shadow_lookup(next_page);
498 if (x) {
499 kmemcheck_save_addr(next_page);
500 for (i = n; i < size; ++i) {
501 x[i - n] = shadow[i];
502 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
503 }
504 }
505 }
506
507 status = kmemcheck_shadow_test(shadow, size);
508 if (status == KMEMCHECK_SHADOW_INITIALIZED)
509 return;
510
511 if (kmemcheck_enabled)
512 kmemcheck_error_save(status, src_addr, size, regs);
513
514 if (kmemcheck_enabled == 2)
515 kmemcheck_enabled = 0;
516}
517
518enum kmemcheck_method {
519 KMEMCHECK_READ,
520 KMEMCHECK_WRITE,
521};
522
523static void kmemcheck_access(struct pt_regs *regs,
524 unsigned long fallback_address, enum kmemcheck_method fallback_method)
525{
526 const uint8_t *insn;
527 const uint8_t *insn_primary;
528 unsigned int size;
529
530 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
531
532 /* Recursive fault -- ouch. */
533 if (data->busy) {
534 kmemcheck_show_addr(fallback_address);
535 kmemcheck_error_save_bug(regs);
536 return;
537 }
538
539 data->busy = true;
540
541 insn = (const uint8_t *) regs->ip;
542 insn_primary = kmemcheck_opcode_get_primary(insn);
543
544 kmemcheck_opcode_decode(insn, &size);
545
546 switch (insn_primary[0]) {
547#ifdef CONFIG_KMEMCHECK_BITOPS_OK
548 /* AND, OR, XOR */
549 /*
550 * Unfortunately, these instructions have to be excluded from
551 * our regular checking since they access only some (and not
552 * all) bits. This clears out "bogus" bitfield-access warnings.
553 */
554 case 0x80:
555 case 0x81:
556 case 0x82:
557 case 0x83:
558 switch ((insn_primary[1] >> 3) & 7) {
559 /* OR */
560 case 1:
561 /* AND */
562 case 4:
563 /* XOR */
564 case 6:
565 kmemcheck_write(regs, fallback_address, size);
566 goto out;
567
568 /* ADD */
569 case 0:
570 /* ADC */
571 case 2:
572 /* SBB */
573 case 3:
574 /* SUB */
575 case 5:
576 /* CMP */
577 case 7:
578 break;
579 }
580 break;
581#endif
582
583 /* MOVS, MOVSB, MOVSW, MOVSD */
584 case 0xa4:
585 case 0xa5:
586 /*
587 * These instructions are special because they take two
588 * addresses, but we only get one page fault.
589 */
590 kmemcheck_copy(regs, regs->si, regs->di, size);
591 goto out;
592
593 /* CMPS, CMPSB, CMPSW, CMPSD */
594 case 0xa6:
595 case 0xa7:
596 kmemcheck_read(regs, regs->si, size);
597 kmemcheck_read(regs, regs->di, size);
598 goto out;
599 }
600
601 /*
602 * If the opcode isn't special in any way, we use the data from the
603 * page fault handler to determine the address and type of memory
604 * access.
605 */
606 switch (fallback_method) {
607 case KMEMCHECK_READ:
608 kmemcheck_read(regs, fallback_address, size);
609 goto out;
610 case KMEMCHECK_WRITE:
611 kmemcheck_write(regs, fallback_address, size);
612 goto out;
613 }
614
615out:
616 data->busy = false;
617}
618
619bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
620 unsigned long error_code)
621{
622 pte_t *pte;
623
624 /*
625 * XXX: Is it safe to assume that memory accesses from virtual 86
626 * mode or non-kernel code segments will _never_ access kernel
627 * memory (e.g. tracked pages)? For now, we need this to avoid
628 * invoking kmemcheck for PnP BIOS calls.
629 */
630 if (regs->flags & X86_VM_MASK)
631 return false;
632 if (regs->cs != __KERNEL_CS)
633 return false;
634
635 pte = kmemcheck_pte_lookup(address);
636 if (!pte)
637 return false;
638
639 WARN_ON_ONCE(in_nmi());
640
641 if (error_code & 2)
642 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
643 else
644 kmemcheck_access(regs, address, KMEMCHECK_READ);
645
646 kmemcheck_show(regs);
647 return true;
648}
649
650bool kmemcheck_trap(struct pt_regs *regs)
651{
652 if (!kmemcheck_active(regs))
653 return false;
654
655 /* We're done. */
656 kmemcheck_hide(regs);
657 return true;
658}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
index df8109ddf7fe..cec594032515 100644
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -1,107 +1 @@
1// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
2#include <linux/types.h>
3
4#include "opcode.h"
5
6static bool opcode_is_prefix(uint8_t b)
7{
8 return
9 /* Group 1 */
10 b == 0xf0 || b == 0xf2 || b == 0xf3
11 /* Group 2 */
12 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
13 || b == 0x64 || b == 0x65
14 /* Group 3 */
15 || b == 0x66
16 /* Group 4 */
17 || b == 0x67;
18}
19
20#ifdef CONFIG_X86_64
21static bool opcode_is_rex_prefix(uint8_t b)
22{
23 return (b & 0xf0) == 0x40;
24}
25#else
26static bool opcode_is_rex_prefix(uint8_t b)
27{
28 return false;
29}
30#endif
31
32#define REX_W (1 << 3)
33
34/*
35 * This is a VERY crude opcode decoder. We only need to find the size of the
36 * load/store that caused our #PF and this should work for all the opcodes
37 * that we care about. Moreover, the ones who invented this instruction set
38 * should be shot.
39 */
40void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
41{
42 /* Default operand size */
43 int operand_size_override = 4;
44
45 /* prefixes */
46 for (; opcode_is_prefix(*op); ++op) {
47 if (*op == 0x66)
48 operand_size_override = 2;
49 }
50
51 /* REX prefix */
52 if (opcode_is_rex_prefix(*op)) {
53 uint8_t rex = *op;
54
55 ++op;
56 if (rex & REX_W) {
57 switch (*op) {
58 case 0x63:
59 *size = 4;
60 return;
61 case 0x0f:
62 ++op;
63
64 switch (*op) {
65 case 0xb6:
66 case 0xbe:
67 *size = 1;
68 return;
69 case 0xb7:
70 case 0xbf:
71 *size = 2;
72 return;
73 }
74
75 break;
76 }
77
78 *size = 8;
79 return;
80 }
81 }
82
83 /* escape opcode */
84 if (*op == 0x0f) {
85 ++op;
86
87 /*
88 * This is move with zero-extend and sign-extend, respectively;
89 * we don't have to think about 0xb6/0xbe, because this is
90 * already handled in the conditional below.
91 */
92 if (*op == 0xb7 || *op == 0xbf)
93 operand_size_override = 2;
94 }
95
96 *size = (*op & 1) ? operand_size_override : 1;
97}
98
99const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
100{
101 /* skip prefixes */
102 while (opcode_is_prefix(*op))
103 ++op;
104 if (opcode_is_rex_prefix(*op))
105 ++op;
106 return op;
107}
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
index 51a1ce94c24a..ea32a7d3cf1b 100644
--- a/arch/x86/mm/kmemcheck/opcode.h
+++ b/arch/x86/mm/kmemcheck/opcode.h
@@ -1,10 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
3#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
4
5#include <linux/types.h>
6
7void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
8const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
9
10#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
index 8a03be90272a..cec594032515 100644
--- a/arch/x86/mm/kmemcheck/pte.c
+++ b/arch/x86/mm/kmemcheck/pte.c
@@ -1,23 +1 @@
1// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
2#include <linux/mm.h>
3
4#include <asm/pgtable.h>
5
6#include "pte.h"
7
8pte_t *kmemcheck_pte_lookup(unsigned long address)
9{
10 pte_t *pte;
11 unsigned int level;
12
13 pte = lookup_address(address, &level);
14 if (!pte)
15 return NULL;
16 if (level != PG_LEVEL_4K)
17 return NULL;
18 if (!pte_hidden(*pte))
19 return NULL;
20
21 return pte;
22}
23
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
index b595612382c2..ea32a7d3cf1b 100644
--- a/arch/x86/mm/kmemcheck/pte.h
+++ b/arch/x86/mm/kmemcheck/pte.h
@@ -1,11 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
3#define ARCH__X86__MM__KMEMCHECK__PTE_H
4
5#include <linux/mm.h>
6
7#include <asm/pgtable.h>
8
9pte_t *kmemcheck_pte_lookup(unsigned long address);
10
11#endif
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
index 7ce0be1f99eb..cec594032515 100644
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -1,71 +1 @@
1// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
2#include <linux/bug.h>
3#include <linux/kernel.h>
4
5#include "opcode.h"
6#include "selftest.h"
7
8struct selftest_opcode {
9 unsigned int expected_size;
10 const uint8_t *insn;
11 const char *desc;
12};
13
14static const struct selftest_opcode selftest_opcodes[] = {
15 /* REP MOVS */
16 {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"},
17 {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"},
18
19 /* MOVZX / MOVZXD */
20 {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"},
21 {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"},
22
23 /* MOVSX / MOVSXD */
24 {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"},
25 {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"},
26
27#ifdef CONFIG_X86_64
28 /* MOVZX / MOVZXD */
29 {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"},
30 {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"},
31
32 /* MOVSX / MOVSXD */
33 {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"},
34 {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"},
35 {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"},
36#endif
37};
38
39static bool selftest_opcode_one(const struct selftest_opcode *op)
40{
41 unsigned size;
42
43 kmemcheck_opcode_decode(op->insn, &size);
44
45 if (size == op->expected_size)
46 return true;
47
48 printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n",
49 op->desc, op->expected_size, size);
50 return false;
51}
52
53static bool selftest_opcodes_all(void)
54{
55 bool pass = true;
56 unsigned int i;
57
58 for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i)
59 pass = pass && selftest_opcode_one(&selftest_opcodes[i]);
60
61 return pass;
62}
63
64bool kmemcheck_selftest(void)
65{
66 bool pass = true;
67
68 pass = pass && selftest_opcodes_all();
69
70 return pass;
71}
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
index 8d759aae453d..ea32a7d3cf1b 100644
--- a/arch/x86/mm/kmemcheck/selftest.h
+++ b/arch/x86/mm/kmemcheck/selftest.h
@@ -1,7 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H
3#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H
4
5bool kmemcheck_selftest(void);
6
7#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
deleted file mode 100644
index c2638a7d2c10..000000000000
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ /dev/null
@@ -1,173 +0,0 @@
1#include <linux/kmemcheck.h>
2#include <linux/export.h>
3#include <linux/mm.h>
4
5#include <asm/page.h>
6#include <asm/pgtable.h>
7
8#include "pte.h"
9#include "shadow.h"
10
11/*
12 * Return the shadow address for the given address. Returns NULL if the
13 * address is not tracked.
14 *
15 * We need to be extremely careful not to follow any invalid pointers,
16 * because this function can be called for *any* possible address.
17 */
18void *kmemcheck_shadow_lookup(unsigned long address)
19{
20 pte_t *pte;
21 struct page *page;
22
23 if (!virt_addr_valid(address))
24 return NULL;
25
26 pte = kmemcheck_pte_lookup(address);
27 if (!pte)
28 return NULL;
29
30 page = virt_to_page(address);
31 if (!page->shadow)
32 return NULL;
33 return page->shadow + (address & (PAGE_SIZE - 1));
34}
35
36static void mark_shadow(void *address, unsigned int n,
37 enum kmemcheck_shadow status)
38{
39 unsigned long addr = (unsigned long) address;
40 unsigned long last_addr = addr + n - 1;
41 unsigned long page = addr & PAGE_MASK;
42 unsigned long last_page = last_addr & PAGE_MASK;
43 unsigned int first_n;
44 void *shadow;
45
46 /* If the memory range crosses a page boundary, stop there. */
47 if (page == last_page)
48 first_n = n;
49 else
50 first_n = page + PAGE_SIZE - addr;
51
52 shadow = kmemcheck_shadow_lookup(addr);
53 if (shadow)
54 memset(shadow, status, first_n);
55
56 addr += first_n;
57 n -= first_n;
58
59 /* Do full-page memset()s. */
60 while (n >= PAGE_SIZE) {
61 shadow = kmemcheck_shadow_lookup(addr);
62 if (shadow)
63 memset(shadow, status, PAGE_SIZE);
64
65 addr += PAGE_SIZE;
66 n -= PAGE_SIZE;
67 }
68
69 /* Do the remaining page, if any. */
70 if (n > 0) {
71 shadow = kmemcheck_shadow_lookup(addr);
72 if (shadow)
73 memset(shadow, status, n);
74 }
75}
76
77void kmemcheck_mark_unallocated(void *address, unsigned int n)
78{
79 mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
80}
81
82void kmemcheck_mark_uninitialized(void *address, unsigned int n)
83{
84 mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
85}
86
87/*
88 * Fill the shadow memory of the given address such that the memory at that
89 * address is marked as being initialized.
90 */
91void kmemcheck_mark_initialized(void *address, unsigned int n)
92{
93 mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
94}
95EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
96
97void kmemcheck_mark_freed(void *address, unsigned int n)
98{
99 mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
100}
101
102void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
103{
104 unsigned int i;
105
106 for (i = 0; i < n; ++i)
107 kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
108}
109
110void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
111{
112 unsigned int i;
113
114 for (i = 0; i < n; ++i)
115 kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
116}
117
118void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
119{
120 unsigned int i;
121
122 for (i = 0; i < n; ++i)
123 kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
124}
125
126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
127{
128#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
129 uint8_t *x;
130 unsigned int i;
131
132 x = shadow;
133
134 /*
135 * Make sure _some_ bytes are initialized. Gcc frequently generates
136 * code to access neighboring bytes.
137 */
138 for (i = 0; i < size; ++i) {
139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
140 return x[i];
141 }
142
143 return x[0];
144#else
145 return kmemcheck_shadow_test_all(shadow, size);
146#endif
147}
148
149enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
150{
151 uint8_t *x;
152 unsigned int i;
153
154 x = shadow;
155
156 /* All bytes must be initialized. */
157 for (i = 0; i < size; ++i) {
158 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
159 return x[i];
160 }
161
162 return x[0];
163}
164
165void kmemcheck_shadow_set(void *shadow, unsigned int size)
166{
167 uint8_t *x;
168 unsigned int i;
169
170 x = shadow;
171 for (i = 0; i < size; ++i)
172 x[i] = KMEMCHECK_SHADOW_INITIALIZED;
173}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index 49768dc18664..ea32a7d3cf1b 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -1,19 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
3#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
4
5enum kmemcheck_shadow {
6 KMEMCHECK_SHADOW_UNALLOCATED,
7 KMEMCHECK_SHADOW_UNINITIALIZED,
8 KMEMCHECK_SHADOW_INITIALIZED,
9 KMEMCHECK_SHADOW_FREED,
10};
11
12void *kmemcheck_shadow_lookup(unsigned long address);
13
14enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
15enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
16 unsigned int size);
17void kmemcheck_shadow_set(void *shadow, unsigned int size);
18
19#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index baeb872283d9..69c238210325 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
594 __tasklet_hi_schedule(t); 594 __tasklet_hi_schedule(t);
595} 595}
596 596
597extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
598
599/*
600 * This version avoids touching any other tasklets. Needed for kmemcheck
601 * in order not to take any page faults while enqueueing this tasklet;
602 * consider VERY carefully whether you really need this or
603 * tasklet_hi_schedule()...
604 */
605static inline void tasklet_hi_schedule_first(struct tasklet_struct *t)
606{
607 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
608 __tasklet_hi_schedule_first(t);
609}
610
611
612static inline void tasklet_disable_nosync(struct tasklet_struct *t) 597static inline void tasklet_disable_nosync(struct tasklet_struct *t)
613{ 598{
614 atomic_inc(&t->count); 599 atomic_inc(&t->count);
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 7b1d7bead7d9..ea32a7d3cf1b 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -1,172 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef LINUX_KMEMCHECK_H
3#define LINUX_KMEMCHECK_H
4
5#include <linux/mm_types.h>
6#include <linux/types.h>
7
8#ifdef CONFIG_KMEMCHECK
9extern int kmemcheck_enabled;
10
11/* The slab-related functions. */
12void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node);
13void kmemcheck_free_shadow(struct page *page, int order);
14void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
15 size_t size);
16void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size);
17
18void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order,
19 gfp_t gfpflags);
20
21void kmemcheck_show_pages(struct page *p, unsigned int n);
22void kmemcheck_hide_pages(struct page *p, unsigned int n);
23
24bool kmemcheck_page_is_tracked(struct page *p);
25
26void kmemcheck_mark_unallocated(void *address, unsigned int n);
27void kmemcheck_mark_uninitialized(void *address, unsigned int n);
28void kmemcheck_mark_initialized(void *address, unsigned int n);
29void kmemcheck_mark_freed(void *address, unsigned int n);
30
31void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n);
32void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n);
33void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
34
35int kmemcheck_show_addr(unsigned long address);
36int kmemcheck_hide_addr(unsigned long address);
37
38bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
39
40/*
41 * Bitfield annotations
42 *
43 * How to use: If you have a struct using bitfields, for example
44 *
45 * struct a {
46 * int x:8, y:8;
47 * };
48 *
49 * then this should be rewritten as
50 *
51 * struct a {
52 * kmemcheck_bitfield_begin(flags);
53 * int x:8, y:8;
54 * kmemcheck_bitfield_end(flags);
55 * };
56 *
57 * Now the "flags_begin" and "flags_end" members may be used to refer to the
58 * beginning and end, respectively, of the bitfield (and things like
59 * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
60 * fields should be annotated:
61 *
62 * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
63 * kmemcheck_annotate_bitfield(a, flags);
64 */
65#define kmemcheck_bitfield_begin(name) \
66 int name##_begin[0];
67
68#define kmemcheck_bitfield_end(name) \
69 int name##_end[0];
70
71#define kmemcheck_annotate_bitfield(ptr, name) \
72 do { \
73 int _n; \
74 \
75 if (!ptr) \
76 break; \
77 \
78 _n = (long) &((ptr)->name##_end) \
79 - (long) &((ptr)->name##_begin); \
80 BUILD_BUG_ON(_n < 0); \
81 \
82 kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \
83 } while (0)
84
85#define kmemcheck_annotate_variable(var) \
86 do { \
87 kmemcheck_mark_initialized(&(var), sizeof(var)); \
88 } while (0) \
89
90#else
91#define kmemcheck_enabled 0
92
93static inline void
94kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
95{
96}
97
98static inline void
99kmemcheck_free_shadow(struct page *page, int order)
100{
101}
102
103static inline void
104kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
105 size_t size)
106{
107}
108
109static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object,
110 size_t size)
111{
112}
113
114static inline void kmemcheck_pagealloc_alloc(struct page *p,
115 unsigned int order, gfp_t gfpflags)
116{
117}
118
119static inline bool kmemcheck_page_is_tracked(struct page *p)
120{
121 return false;
122}
123
124static inline void kmemcheck_mark_unallocated(void *address, unsigned int n)
125{
126}
127
128static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n)
129{
130}
131
132static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
133{
134}
135
136static inline void kmemcheck_mark_freed(void *address, unsigned int n)
137{
138}
139
140static inline void kmemcheck_mark_unallocated_pages(struct page *p,
141 unsigned int n)
142{
143}
144
145static inline void kmemcheck_mark_uninitialized_pages(struct page *p,
146 unsigned int n)
147{
148}
149
150static inline void kmemcheck_mark_initialized_pages(struct page *p,
151 unsigned int n)
152{
153}
154
155static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
156{
157 return true;
158}
159
160#define kmemcheck_bitfield_begin(name)
161#define kmemcheck_bitfield_end(name)
162#define kmemcheck_annotate_bitfield(ptr, name) \
163 do { \
164 } while (0)
165
166#define kmemcheck_annotate_variable(var) \
167 do { \
168 } while (0)
169
170#endif /* CONFIG_KMEMCHECK */
171
172#endif /* LINUX_KMEMCHECK_H */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 662f7b1b7a78..2f5e87f1bae2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -486,16 +486,6 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
486} 486}
487EXPORT_SYMBOL(__tasklet_hi_schedule); 487EXPORT_SYMBOL(__tasklet_hi_schedule);
488 488
489void __tasklet_hi_schedule_first(struct tasklet_struct *t)
490{
491 lockdep_assert_irqs_disabled();
492
493 t->next = __this_cpu_read(tasklet_hi_vec.head);
494 __this_cpu_write(tasklet_hi_vec.head, t);
495 __raise_softirq_irqoff(HI_SOFTIRQ);
496}
497EXPORT_SYMBOL(__tasklet_hi_schedule_first);
498
499static __latent_entropy void tasklet_action(struct softirq_action *a) 489static __latent_entropy void tasklet_action(struct softirq_action *a)
500{ 490{
501 struct tasklet_struct *list; 491 struct tasklet_struct *list;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9576bd582d4a..7638e2f7fff8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -30,7 +30,6 @@
30#include <linux/proc_fs.h> 30#include <linux/proc_fs.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/ctype.h> 32#include <linux/ctype.h>
33#include <linux/kmemcheck.h>
34#include <linux/kmemleak.h> 33#include <linux/kmemleak.h>
35#include <linux/fs.h> 34#include <linux/fs.h>
36#include <linux/init.h> 35#include <linux/init.h>
@@ -1174,15 +1173,6 @@ static struct ctl_table kern_table[] = {
1174 .extra2 = &one_thousand, 1173 .extra2 = &one_thousand,
1175 }, 1174 },
1176#endif 1175#endif
1177#ifdef CONFIG_KMEMCHECK
1178 {
1179 .procname = "kmemcheck",
1180 .data = &kmemcheck_enabled,
1181 .maxlen = sizeof(int),
1182 .mode = 0644,
1183 .proc_handler = proc_dointvec,
1184 },
1185#endif
1186 { 1176 {
1187 .procname = "panic_on_warn", 1177 .procname = "panic_on_warn",
1188 .data = &panic_on_warn, 1178 .data = &panic_on_warn,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 07ce7449765a..5402e3954659 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -504,7 +504,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT
504 504
505config DEBUG_SLAB 505config DEBUG_SLAB
506 bool "Debug slab memory allocations" 506 bool "Debug slab memory allocations"
507 depends on DEBUG_KERNEL && SLAB && !KMEMCHECK 507 depends on DEBUG_KERNEL && SLAB
508 help 508 help
509 Say Y here to have the kernel do limited verification on memory 509 Say Y here to have the kernel do limited verification on memory
510 allocation as well as poisoning memory on free to catch use of freed 510 allocation as well as poisoning memory on free to catch use of freed
@@ -516,7 +516,7 @@ config DEBUG_SLAB_LEAK
516 516
517config SLUB_DEBUG_ON 517config SLUB_DEBUG_ON
518 bool "SLUB debugging on by default" 518 bool "SLUB debugging on by default"
519 depends on SLUB && SLUB_DEBUG && !KMEMCHECK 519 depends on SLUB && SLUB_DEBUG
520 default n 520 default n
521 help 521 help
522 Boot with debugging on by default. SLUB boots by default with 522 Boot with debugging on by default. SLUB boots by default with
@@ -730,8 +730,6 @@ config DEBUG_STACKOVERFLOW
730 730
731 If in doubt, say "N". 731 If in doubt, say "N".
732 732
733source "lib/Kconfig.kmemcheck"
734
735source "lib/Kconfig.kasan" 733source "lib/Kconfig.kasan"
736 734
737endmenu # "Memory Debugging" 735endmenu # "Memory Debugging"
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
deleted file mode 100644
index 846e039a86b4..000000000000
--- a/lib/Kconfig.kmemcheck
+++ /dev/null
@@ -1,94 +0,0 @@
1config HAVE_ARCH_KMEMCHECK
2 bool
3
4if HAVE_ARCH_KMEMCHECK
5
6menuconfig KMEMCHECK
7 bool "kmemcheck: trap use of uninitialized memory"
8 depends on DEBUG_KERNEL
9 depends on !X86_USE_3DNOW
10 depends on SLUB || SLAB
11 depends on !CC_OPTIMIZE_FOR_SIZE
12 depends on !FUNCTION_TRACER
13 select FRAME_POINTER
14 select STACKTRACE
15 default n
16 help
17 This option enables tracing of dynamically allocated kernel memory
18 to see if memory is used before it has been given an initial value.
19 Be aware that this requires half of your memory for bookkeeping and
20 will insert extra code at *every* read and write to tracked memory
21 thus slow down the kernel code (but user code is unaffected).
22
23 The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
24 or enable kmemcheck at boot-time. If the kernel is started with
25 kmemcheck=0, the large memory and CPU overhead is not incurred.
26
27choice
28 prompt "kmemcheck: default mode at boot"
29 depends on KMEMCHECK
30 default KMEMCHECK_ONESHOT_BY_DEFAULT
31 help
32 This option controls the default behaviour of kmemcheck when the
33 kernel boots and no kmemcheck= parameter is given.
34
35config KMEMCHECK_DISABLED_BY_DEFAULT
36 bool "disabled"
37 depends on KMEMCHECK
38
39config KMEMCHECK_ENABLED_BY_DEFAULT
40 bool "enabled"
41 depends on KMEMCHECK
42
43config KMEMCHECK_ONESHOT_BY_DEFAULT
44 bool "one-shot"
45 depends on KMEMCHECK
46 help
47 In one-shot mode, only the first error detected is reported before
48 kmemcheck is disabled.
49
50endchoice
51
52config KMEMCHECK_QUEUE_SIZE
53 int "kmemcheck: error queue size"
54 depends on KMEMCHECK
55 default 64
56 help
57 Select the maximum number of errors to store in the queue. Since
58 errors can occur virtually anywhere and in any context, we need a
59 temporary storage area which is guarantueed not to generate any
60 other faults. The queue will be emptied as soon as a tasklet may
61 be scheduled. If the queue is full, new error reports will be
62 lost.
63
64config KMEMCHECK_SHADOW_COPY_SHIFT
65 int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
66 depends on KMEMCHECK
67 range 2 8
68 default 5
69 help
70 Select the number of shadow bytes to save along with each entry of
71 the queue. These bytes indicate what parts of an allocation are
72 initialized, uninitialized, etc. and will be displayed when an
73 error is detected to help the debugging of a particular problem.
74
75config KMEMCHECK_PARTIAL_OK
76 bool "kmemcheck: allow partially uninitialized memory"
77 depends on KMEMCHECK
78 default y
79 help
80 This option works around certain GCC optimizations that produce
81 32-bit reads from 16-bit variables where the upper 16 bits are
82 thrown away afterwards. This may of course also hide some real
83 bugs.
84
85config KMEMCHECK_BITOPS_OK
86 bool "kmemcheck: allow bit-field manipulation"
87 depends on KMEMCHECK
88 default n
89 help
90 This option silences warnings that would be generated for bit-field
91 accesses where not all the bits are initialized at the same time.
92 This may also hide some real bugs.
93
94endif
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 5b0adf1435de..e5e606ee5f71 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -11,7 +11,6 @@ config DEBUG_PAGEALLOC
11 bool "Debug page memory allocations" 11 bool "Debug page memory allocations"
12 depends on DEBUG_KERNEL 12 depends on DEBUG_KERNEL
13 depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC 13 depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
14 depends on !KMEMCHECK
15 select PAGE_EXTENSION 14 select PAGE_EXTENSION
16 select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC 15 select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
17 ---help--- 16 ---help---
diff --git a/mm/Makefile b/mm/Makefile
index 4659b93cba43..e7ebd176fb93 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,7 +17,6 @@ KCOV_INSTRUMENT_slub.o := n
17KCOV_INSTRUMENT_page_alloc.o := n 17KCOV_INSTRUMENT_page_alloc.o := n
18KCOV_INSTRUMENT_debug-pagealloc.o := n 18KCOV_INSTRUMENT_debug-pagealloc.o := n
19KCOV_INSTRUMENT_kmemleak.o := n 19KCOV_INSTRUMENT_kmemleak.o := n
20KCOV_INSTRUMENT_kmemcheck.o := n
21KCOV_INSTRUMENT_memcontrol.o := n 20KCOV_INSTRUMENT_memcontrol.o := n
22KCOV_INSTRUMENT_mmzone.o := n 21KCOV_INSTRUMENT_mmzone.o := n
23KCOV_INSTRUMENT_vmstat.o := n 22KCOV_INSTRUMENT_vmstat.o := n
@@ -70,7 +69,6 @@ obj-$(CONFIG_KSM) += ksm.o
70obj-$(CONFIG_PAGE_POISONING) += page_poison.o 69obj-$(CONFIG_PAGE_POISONING) += page_poison.o
71obj-$(CONFIG_SLAB) += slab.o 70obj-$(CONFIG_SLAB) += slab.o
72obj-$(CONFIG_SLUB) += slub.o 71obj-$(CONFIG_SLUB) += slub.o
73obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
74obj-$(CONFIG_KASAN) += kasan/ 72obj-$(CONFIG_KASAN) += kasan/
75obj-$(CONFIG_FAILSLAB) += failslab.o 73obj-$(CONFIG_FAILSLAB) += failslab.o
76obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 74obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index b3a4d61d341c..cec594032515 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -1,126 +1 @@
1// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
2#include <linux/gfp.h>
3#include <linux/mm_types.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include "slab.h"
7#include <linux/kmemcheck.h>
8
9void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
10{
11 struct page *shadow;
12 int pages;
13 int i;
14
15 pages = 1 << order;
16
17 /*
18 * With kmemcheck enabled, we need to allocate a memory area for the
19 * shadow bits as well.
20 */
21 shadow = alloc_pages_node(node, flags, order);
22 if (!shadow) {
23 if (printk_ratelimit())
24 pr_err("kmemcheck: failed to allocate shadow bitmap\n");
25 return;
26 }
27
28 for(i = 0; i < pages; ++i)
29 page[i].shadow = page_address(&shadow[i]);
30
31 /*
32 * Mark it as non-present for the MMU so that our accesses to
33 * this memory will trigger a page fault and let us analyze
34 * the memory accesses.
35 */
36 kmemcheck_hide_pages(page, pages);
37}
38
39void kmemcheck_free_shadow(struct page *page, int order)
40{
41 struct page *shadow;
42 int pages;
43 int i;
44
45 if (!kmemcheck_page_is_tracked(page))
46 return;
47
48 pages = 1 << order;
49
50 kmemcheck_show_pages(page, pages);
51
52 shadow = virt_to_page(page[0].shadow);
53
54 for(i = 0; i < pages; ++i)
55 page[i].shadow = NULL;
56
57 __free_pages(shadow, order);
58}
59
60void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
61 size_t size)
62{
63 if (unlikely(!object)) /* Skip object if allocation failed */
64 return;
65
66 /*
67 * Has already been memset(), which initializes the shadow for us
68 * as well.
69 */
70 if (gfpflags & __GFP_ZERO)
71 return;
72
73 /* No need to initialize the shadow of a non-tracked slab. */
74 if (s->flags & SLAB_NOTRACK)
75 return;
76
77 if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) {
78 /*
79 * Allow notracked objects to be allocated from
80 * tracked caches. Note however that these objects
81 * will still get page faults on access, they just
82 * won't ever be flagged as uninitialized. If page
83 * faults are not acceptable, the slab cache itself
84 * should be marked NOTRACK.
85 */
86 kmemcheck_mark_initialized(object, size);
87 } else if (!s->ctor) {
88 /*
89 * New objects should be marked uninitialized before
90 * they're returned to the called.
91 */
92 kmemcheck_mark_uninitialized(object, size);
93 }
94}
95
96void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
97{
98 /* TODO: RCU freeing is unsupported for now; hide false positives. */
99 if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
100 kmemcheck_mark_freed(object, size);
101}
102
103void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order,
104 gfp_t gfpflags)
105{
106 int pages;
107
108 if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK))
109 return;
110
111 pages = 1 << order;
112
113 /*
114 * NOTE: We choose to track GFP_ZERO pages too; in fact, they
115 * can become uninitialized by copying uninitialized memory
116 * into them.
117 */
118
119 /* XXX: Can use zone->node for node? */
120 kmemcheck_alloc_shadow(page, order, gfpflags, -1);
121
122 if (gfpflags & __GFP_ZERO)
123 kmemcheck_mark_initialized_pages(page, pages);
124 else
125 kmemcheck_mark_uninitialized_pages(page, pages);
126}
diff --git a/mm/slub.c b/mm/slub.c
index c2c41e178acf..cfd56e5a35fb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1371,7 +1371,7 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x)
1371 * So in order to make the debug calls that expect irqs to be 1371 * So in order to make the debug calls that expect irqs to be
1372 * disabled we need to disable interrupts temporarily. 1372 * disabled we need to disable interrupts temporarily.
1373 */ 1373 */
1374#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 1374#ifdef CONFIG_LOCKDEP
1375 { 1375 {
1376 unsigned long flags; 1376 unsigned long flags;
1377 1377
@@ -1399,8 +1399,7 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
1399 * Compiler cannot detect this function can be removed if slab_free_hook() 1399 * Compiler cannot detect this function can be removed if slab_free_hook()
1400 * evaluates to nothing. Thus, catch all relevant config debug options here. 1400 * evaluates to nothing. Thus, catch all relevant config debug options here.
1401 */ 1401 */
1402#if defined(CONFIG_KMEMCHECK) || \ 1402#if defined(CONFIG_LOCKDEP) || \
1403 defined(CONFIG_LOCKDEP) || \
1404 defined(CONFIG_DEBUG_KMEMLEAK) || \ 1403 defined(CONFIG_DEBUG_KMEMLEAK) || \
1405 defined(CONFIG_DEBUG_OBJECTS_FREE) || \ 1404 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1406 defined(CONFIG_KASAN) 1405 defined(CONFIG_KASAN)
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 67d051edd615..7bd52b8f63d4 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -2182,8 +2182,6 @@ sub dump_struct($$) {
2182 # strip comments: 2182 # strip comments:
2183 $members =~ s/\/\*.*?\*\///gos; 2183 $members =~ s/\/\*.*?\*\///gos;
2184 $nested =~ s/\/\*.*?\*\///gos; 2184 $nested =~ s/\/\*.*?\*\///gos;
2185 # strip kmemcheck_bitfield_{begin,end}.*;
2186 $members =~ s/kmemcheck_bitfield_.*?;//gos;
2187 # strip attributes 2185 # strip attributes
2188 $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; 2186 $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
2189 $members =~ s/__aligned\s*\([^;]*\)//gos; 2187 $members =~ s/__aligned\s*\([^;]*\)//gos;
diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h
index 2bccd2c7b897..ea32a7d3cf1b 100644
--- a/tools/include/linux/kmemcheck.h
+++ b/tools/include/linux/kmemcheck.h
@@ -1,9 +1 @@
1/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_
3#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_
4
5static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
6{
7}
8
9#endif