diff options
author | Levin, Alexander (Sasha Levin) <alexander.levin@verizon.com> | 2017-11-15 20:36:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 21:21:05 -0500 |
commit | 4675ff05de2d76d167336b368bd07f3fef6ed5a6 (patch) | |
tree | 212d8adf40e13c2a27ac7834d14ca4900923b98c | |
parent | d8be75663cec0069b85f80191abd2682ce4a512f (diff) |
kmemcheck: rip it out
Fix up makefiles, remove references, and git rm kmemcheck.
Link: http://lkml.kernel.org/r/20171007030159.22241-4-alexander.levin@verizon.com
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vegard Nossum <vegardno@ifi.uio.no>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Tim Hansen <devtimhansen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
35 files changed, 7 insertions, 2592 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b74e13312fdc..00bb04972612 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -1864,13 +1864,6 @@ | |||
1864 | Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, | 1864 | Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, |
1865 | the default is off. | 1865 | the default is off. |
1866 | 1866 | ||
1867 | kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode | ||
1868 | Valid arguments: 0, 1, 2 | ||
1869 | kmemcheck=0 (disabled) | ||
1870 | kmemcheck=1 (enabled) | ||
1871 | kmemcheck=2 (one-shot mode) | ||
1872 | Default: 2 (one-shot mode) | ||
1873 | |||
1874 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | 1867 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. |
1875 | Default is 0 (don't ignore, but inject #GP) | 1868 | Default is 0 (don't ignore, but inject #GP) |
1876 | 1869 | ||
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index a81787cd47d7..e313925fb0fa 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst | |||
@@ -21,7 +21,6 @@ whole; patches welcome! | |||
21 | kasan | 21 | kasan |
22 | ubsan | 22 | ubsan |
23 | kmemleak | 23 | kmemleak |
24 | kmemcheck | ||
25 | gdb-kernel-debugging | 24 | gdb-kernel-debugging |
26 | kgdb | 25 | kgdb |
27 | kselftest | 26 | kselftest |
diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst deleted file mode 100644 index 7f3d1985de74..000000000000 --- a/Documentation/dev-tools/kmemcheck.rst +++ /dev/null | |||
@@ -1,733 +0,0 @@ | |||
1 | Getting started with kmemcheck | ||
2 | ============================== | ||
3 | |||
4 | Vegard Nossum <vegardno@ifi.uio.no> | ||
5 | |||
6 | |||
7 | Introduction | ||
8 | ------------ | ||
9 | |||
10 | kmemcheck is a debugging feature for the Linux Kernel. More specifically, it | ||
11 | is a dynamic checker that detects and warns about some uses of uninitialized | ||
12 | memory. | ||
13 | |||
14 | Userspace programmers might be familiar with Valgrind's memcheck. The main | ||
15 | difference between memcheck and kmemcheck is that memcheck works for userspace | ||
16 | programs only, and kmemcheck works for the kernel only. The implementations | ||
17 | are of course vastly different. Because of this, kmemcheck is not as accurate | ||
18 | as memcheck, but it turns out to be good enough in practice to discover real | ||
19 | programmer errors that the compiler is not able to find through static | ||
20 | analysis. | ||
21 | |||
22 | Enabling kmemcheck on a kernel will probably slow it down to the extent that | ||
23 | the machine will not be usable for normal workloads such as e.g. an | ||
24 | interactive desktop. kmemcheck will also cause the kernel to use about twice | ||
25 | as much memory as normal. For this reason, kmemcheck is strictly a debugging | ||
26 | feature. | ||
27 | |||
28 | |||
29 | Downloading | ||
30 | ----------- | ||
31 | |||
32 | As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel. | ||
33 | |||
34 | |||
35 | Configuring and compiling | ||
36 | ------------------------- | ||
37 | |||
38 | kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of | ||
39 | configuration variables must have specific settings in order for the kmemcheck | ||
40 | menu to even appear in "menuconfig". These are: | ||
41 | |||
42 | - ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n`` | ||
43 | This option is located under "General setup" / "Optimize for size". | ||
44 | |||
45 | Without this, gcc will use certain optimizations that usually lead to | ||
46 | false positive warnings from kmemcheck. An example of this is a 16-bit | ||
47 | field in a struct, where gcc may load 32 bits, then discard the upper | ||
48 | 16 bits. kmemcheck sees only the 32-bit load, and may trigger a | ||
49 | warning for the upper 16 bits (if they're uninitialized). | ||
50 | |||
51 | - ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y`` | ||
52 | This option is located under "General setup" / "Choose SLAB | ||
53 | allocator". | ||
54 | |||
55 | - ``CONFIG_FUNCTION_TRACER=n`` | ||
56 | This option is located under "Kernel hacking" / "Tracers" / "Kernel | ||
57 | Function Tracer" | ||
58 | |||
59 | When function tracing is compiled in, gcc emits a call to another | ||
60 | function at the beginning of every function. This means that when the | ||
61 | page fault handler is called, the ftrace framework will be called | ||
62 | before kmemcheck has had a chance to handle the fault. If ftrace then | ||
63 | modifies memory that was tracked by kmemcheck, the result is an | ||
64 | endless recursive page fault. | ||
65 | |||
66 | - ``CONFIG_DEBUG_PAGEALLOC=n`` | ||
67 | This option is located under "Kernel hacking" / "Memory Debugging" | ||
68 | / "Debug page memory allocations". | ||
69 | |||
70 | In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also | ||
71 | located under "Kernel hacking". With this, you will be able to get line number | ||
72 | information from the kmemcheck warnings, which is extremely valuable in | ||
73 | debugging a problem. This option is not mandatory, however, because it slows | ||
74 | down the compilation process and produces a much bigger kernel image. | ||
75 | |||
76 | Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory | ||
77 | Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows | ||
78 | a description of the kmemcheck configuration variables: | ||
79 | |||
80 | - ``CONFIG_KMEMCHECK`` | ||
81 | This must be enabled in order to use kmemcheck at all... | ||
82 | |||
83 | - ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT`` | ||
84 | This option controls the status of kmemcheck at boot-time. "Enabled" | ||
85 | will enable kmemcheck right from the start, "disabled" will boot the | ||
86 | kernel as normal (but with the kmemcheck code compiled in, so it can | ||
87 | be enabled at run-time after the kernel has booted), and "one-shot" is | ||
88 | a special mode which will turn kmemcheck off automatically after | ||
89 | detecting the first use of uninitialized memory. | ||
90 | |||
91 | If you are using kmemcheck to actively debug a problem, then you | ||
92 | probably want to choose "enabled" here. | ||
93 | |||
94 | The one-shot mode is mostly useful in automated test setups because it | ||
95 | can prevent floods of warnings and increase the chances of the machine | ||
96 | surviving in case something is really wrong. In other cases, the one- | ||
97 | shot mode could actually be counter-productive because it would turn | ||
98 | itself off at the very first error -- in the case of a false positive | ||
99 | too -- and this would come in the way of debugging the specific | ||
100 | problem you were interested in. | ||
101 | |||
102 | If you would like to use your kernel as normal, but with a chance to | ||
103 | enable kmemcheck in case of some problem, it might be a good idea to | ||
104 | choose "disabled" here. When kmemcheck is disabled, most of the run- | ||
105 | time overhead is not incurred, and the kernel will be almost as fast | ||
106 | as normal. | ||
107 | |||
108 | - ``CONFIG_KMEMCHECK_QUEUE_SIZE`` | ||
109 | Select the maximum number of error reports to store in an internal | ||
110 | (fixed-size) buffer. Since errors can occur virtually anywhere and in | ||
111 | any context, we need a temporary storage area which is guaranteed not | ||
112 | to generate any other page faults when accessed. The queue will be | ||
113 | emptied as soon as a tasklet may be scheduled. If the queue is full, | ||
114 | new error reports will be lost. | ||
115 | |||
116 | The default value of 64 is probably fine. If some code produces more | ||
117 | than 64 errors within an irqs-off section, then the code is likely to | ||
118 | produce many, many more, too, and these additional reports seldom give | ||
119 | any more information (the first report is usually the most valuable | ||
120 | anyway). | ||
121 | |||
122 | This number might have to be adjusted if you are not using serial | ||
123 | console or similar to capture the kernel log. If you are using the | ||
124 | "dmesg" command to save the log, then getting a lot of kmemcheck | ||
125 | warnings might overflow the kernel log itself, and the earlier reports | ||
126 | will get lost in that way instead. Try setting this to 10 or so on | ||
127 | such a setup. | ||
128 | |||
129 | - ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT`` | ||
130 | Select the number of shadow bytes to save along with each entry of the | ||
131 | error-report queue. These bytes indicate what parts of an allocation | ||
132 | are initialized, uninitialized, etc. and will be displayed when an | ||
133 | error is detected to help the debugging of a particular problem. | ||
134 | |||
135 | The number entered here is actually the logarithm of the number of | ||
136 | bytes that will be saved. So if you pick for example 5 here, kmemcheck | ||
137 | will save 2^5 = 32 bytes. | ||
138 | |||
139 | The default value should be fine for debugging most problems. It also | ||
140 | fits nicely within 80 columns. | ||
141 | |||
142 | - ``CONFIG_KMEMCHECK_PARTIAL_OK`` | ||
143 | This option (when enabled) works around certain GCC optimizations that | ||
144 | produce 32-bit reads from 16-bit variables where the upper 16 bits are | ||
145 | thrown away afterwards. | ||
146 | |||
147 | The default value (enabled) is recommended. This may of course hide | ||
148 | some real errors, but disabling it would probably produce a lot of | ||
149 | false positives. | ||
150 | |||
151 | - ``CONFIG_KMEMCHECK_BITOPS_OK`` | ||
152 | This option silences warnings that would be generated for bit-field | ||
153 | accesses where not all the bits are initialized at the same time. This | ||
154 | may also hide some real bugs. | ||
155 | |||
156 | This option is probably obsolete, or it should be replaced with | ||
157 | the kmemcheck-/bitfield-annotations for the code in question. The | ||
158 | default value is therefore fine. | ||
159 | |||
160 | Now compile the kernel as usual. | ||
161 | |||
162 | |||
163 | How to use | ||
164 | ---------- | ||
165 | |||
166 | Booting | ||
167 | ~~~~~~~ | ||
168 | |||
169 | First some information about the command-line options. There is only one | ||
170 | option specific to kmemcheck, and this is called "kmemcheck". It can be used | ||
171 | to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT`` | ||
172 | option. Its possible settings are: | ||
173 | |||
174 | - ``kmemcheck=0`` (disabled) | ||
175 | - ``kmemcheck=1`` (enabled) | ||
176 | - ``kmemcheck=2`` (one-shot mode) | ||
177 | |||
178 | If SLUB debugging has been enabled in the kernel, it may take precedence over | ||
179 | kmemcheck in such a way that the slab caches which are under SLUB debugging | ||
180 | will not be tracked by kmemcheck. In order to ensure that this doesn't happen | ||
181 | (even though it shouldn't by default), use SLUB's boot option ``slub_debug``, | ||
182 | like this: ``slub_debug=-`` | ||
183 | |||
184 | In fact, this option may also be used for fine-grained control over SLUB vs. | ||
185 | kmemcheck. For example, if the command line includes | ||
186 | ``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only | ||
187 | for the "dentry" slab cache, and with kmemcheck tracking all the other | ||
188 | caches. This is advanced usage, however, and is not generally recommended. | ||
189 | |||
190 | |||
191 | Run-time enable/disable | ||
192 | ~~~~~~~~~~~~~~~~~~~~~~~ | ||
193 | |||
194 | When the kernel has booted, it is possible to enable or disable kmemcheck at | ||
195 | run-time. WARNING: This feature is still experimental and may cause false | ||
196 | positive warnings to appear. Therefore, try not to use this. If you find that | ||
197 | it doesn't work properly (e.g. you see an unreasonable amount of warnings), I | ||
198 | will be happy to take bug reports. | ||
199 | |||
200 | Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.:: | ||
201 | |||
202 | $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck | ||
203 | |||
204 | The numbers are the same as for the ``kmemcheck=`` command-line option. | ||
205 | |||
206 | |||
207 | Debugging | ||
208 | ~~~~~~~~~ | ||
209 | |||
210 | A typical report will look something like this:: | ||
211 | |||
212 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
213 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
214 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
215 | ^ | ||
216 | |||
217 | Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A | ||
218 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
219 | RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 | ||
220 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
221 | RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 | ||
222 | RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 | ||
223 | R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e | ||
224 | R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 | ||
225 | FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 | ||
226 | CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 | ||
227 | CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 | ||
228 | DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | ||
229 | DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 | ||
230 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
231 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
232 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
233 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
234 | [<ffffffffffffffff>] 0xffffffffffffffff | ||
235 | |||
236 | The single most valuable information in this report is the RIP (or EIP on 32- | ||
237 | bit) value. This will help us pinpoint exactly which instruction that caused | ||
238 | the warning. | ||
239 | |||
240 | If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do | ||
241 | is give this address to the addr2line program, like this:: | ||
242 | |||
243 | $ addr2line -e vmlinux -i ffffffff8104ede8 | ||
244 | arch/x86/include/asm/string_64.h:12 | ||
245 | include/asm-generic/siginfo.h:287 | ||
246 | kernel/signal.c:380 | ||
247 | kernel/signal.c:410 | ||
248 | |||
249 | The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:** | ||
250 | This must be the vmlinux of the kernel that produced the warning in the | ||
251 | first place! If not, the line number information will almost certainly be | ||
252 | wrong. | ||
253 | |||
254 | The "``-i``" tells addr2line to also print the line numbers of inlined | ||
255 | functions. In this case, the flag was very important, because otherwise, | ||
256 | it would only have printed the first line, which is just a call to | ||
257 | ``memcpy()``, which could be called from a thousand places in the kernel, and | ||
258 | is therefore not very useful. These inlined functions would not show up in | ||
259 | the stack trace above, simply because the kernel doesn't load the extra | ||
260 | debugging information. This technique can of course be used with ordinary | ||
261 | kernel oopses as well. | ||
262 | |||
263 | In this case, it's the caller of ``memcpy()`` that is interesting, and it can be | ||
264 | found in ``include/asm-generic/siginfo.h``, line 287:: | ||
265 | |||
266 | 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) | ||
267 | 282 { | ||
268 | 283 if (from->si_code < 0) | ||
269 | 284 memcpy(to, from, sizeof(*to)); | ||
270 | 285 else | ||
271 | 286 /* _sigchld is currently the largest know union member */ | ||
272 | 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); | ||
273 | 288 } | ||
274 | |||
275 | Since this was a read (kmemcheck usually warns about reads only, though it can | ||
276 | warn about writes to unallocated or freed memory as well), it was probably the | ||
277 | "from" argument which contained some uninitialized bytes. Following the chain | ||
278 | of calls, we move upwards to see where "from" was allocated or initialized, | ||
279 | ``kernel/signal.c``, line 380:: | ||
280 | |||
281 | 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | ||
282 | 360 { | ||
283 | ... | ||
284 | 367 list_for_each_entry(q, &list->list, list) { | ||
285 | 368 if (q->info.si_signo == sig) { | ||
286 | 369 if (first) | ||
287 | 370 goto still_pending; | ||
288 | 371 first = q; | ||
289 | ... | ||
290 | 377 if (first) { | ||
291 | 378 still_pending: | ||
292 | 379 list_del_init(&first->list); | ||
293 | 380 copy_siginfo(info, &first->info); | ||
294 | 381 __sigqueue_free(first); | ||
295 | ... | ||
296 | 392 } | ||
297 | 393 } | ||
298 | |||
299 | Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The | ||
300 | variable ``first`` was found on a list -- passed in as the second argument to | ||
301 | ``collect_signal()``. We continue our journey through the stack, to figure out | ||
302 | where the item on "list" was allocated or initialized. We move to line 410:: | ||
303 | |||
304 | 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | ||
305 | 396 siginfo_t *info) | ||
306 | 397 { | ||
307 | ... | ||
308 | 410 collect_signal(sig, pending, info); | ||
309 | ... | ||
310 | 414 } | ||
311 | |||
312 | Now we need to follow the ``pending`` pointer, since that is being passed on to | ||
313 | ``collect_signal()`` as ``list``. At this point, we've run out of lines from the | ||
314 | "addr2line" output. Not to worry, we just paste the next addresses from the | ||
315 | kmemcheck stack dump, i.e.:: | ||
316 | |||
317 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
318 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
319 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
320 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
321 | |||
322 | $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ | ||
323 | ffffffff8100b87d ffffffff8100c7b5 | ||
324 | kernel/signal.c:446 | ||
325 | kernel/signal.c:1806 | ||
326 | arch/x86/kernel/signal.c:805 | ||
327 | arch/x86/kernel/signal.c:871 | ||
328 | arch/x86/kernel/entry_64.S:694 | ||
329 | |||
330 | Remember that since these addresses were found on the stack and not as the | ||
331 | RIP value, they actually point to the _next_ instruction (they are return | ||
332 | addresses). This becomes obvious when we look at the code for line 446:: | ||
333 | |||
334 | 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | ||
335 | 423 { | ||
336 | ... | ||
337 | 431 signr = __dequeue_signal(&tsk->signal->shared_pending, | ||
338 | 432 mask, info); | ||
339 | 433 /* | ||
340 | 434 * itimer signal ? | ||
341 | 435 * | ||
342 | 436 * itimers are process shared and we restart periodic | ||
343 | 437 * itimers in the signal delivery path to prevent DoS | ||
344 | 438 * attacks in the high resolution timer case. This is | ||
345 | 439 * compliant with the old way of self restarting | ||
346 | 440 * itimers, as the SIGALRM is a legacy signal and only | ||
347 | 441 * queued once. Changing the restart behaviour to | ||
348 | 442 * restart the timer in the signal dequeue path is | ||
349 | 443 * reducing the timer noise on heavy loaded !highres | ||
350 | 444 * systems too. | ||
351 | 445 */ | ||
352 | 446 if (unlikely(signr == SIGALRM)) { | ||
353 | ... | ||
354 | 489 } | ||
355 | |||
356 | So instead of looking at 446, we should be looking at 431, which is the line | ||
357 | that executes just before 446. Here we see that what we are looking for is | ||
358 | ``&tsk->signal->shared_pending``. | ||
359 | |||
360 | Our next task is now to figure out which function that puts items on this | ||
361 | ``shared_pending`` list. A crude, but efficient tool, is ``git grep``:: | ||
362 | |||
363 | $ git grep -n 'shared_pending' kernel/ | ||
364 | ... | ||
365 | kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; | ||
366 | kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; | ||
367 | ... | ||
368 | |||
369 | There were more results, but none of them were related to list operations, | ||
370 | and these were the only assignments. We inspect the line numbers more closely | ||
371 | and find that this is indeed where items are being added to the list:: | ||
372 | |||
373 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
374 | 817 int group) | ||
375 | 818 { | ||
376 | ... | ||
377 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
378 | ... | ||
379 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
380 | 852 (is_si_special(info) || | ||
381 | 853 info->si_code >= 0))); | ||
382 | 854 if (q) { | ||
383 | 855 list_add_tail(&q->list, &pending->list); | ||
384 | ... | ||
385 | 890 } | ||
386 | |||
387 | and:: | ||
388 | |||
389 | 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | ||
390 | 1310 { | ||
391 | .... | ||
392 | 1339 pending = group ? &t->signal->shared_pending : &t->pending; | ||
393 | 1340 list_add_tail(&q->list, &pending->list); | ||
394 | .... | ||
395 | 1347 } | ||
396 | |||
397 | In the first case, the list element we are looking for, ``q``, is being | ||
398 | returned from the function ``__sigqueue_alloc()``, which looks like an | ||
399 | allocation function. Let's take a look at it:: | ||
400 | |||
401 | 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, | ||
402 | 188 int override_rlimit) | ||
403 | 189 { | ||
404 | 190 struct sigqueue *q = NULL; | ||
405 | 191 struct user_struct *user; | ||
406 | 192 | ||
407 | 193 /* | ||
408 | 194 * We won't get problems with the target's UID changing under us | ||
409 | 195 * because changing it requires RCU be used, and if t != current, the | ||
410 | 196 * caller must be holding the RCU readlock (by way of a spinlock) and | ||
411 | 197 * we use RCU protection here | ||
412 | 198 */ | ||
413 | 199 user = get_uid(__task_cred(t)->user); | ||
414 | 200 atomic_inc(&user->sigpending); | ||
415 | 201 if (override_rlimit || | ||
416 | 202 atomic_read(&user->sigpending) <= | ||
417 | 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) | ||
418 | 204 q = kmem_cache_alloc(sigqueue_cachep, flags); | ||
419 | 205 if (unlikely(q == NULL)) { | ||
420 | 206 atomic_dec(&user->sigpending); | ||
421 | 207 free_uid(user); | ||
422 | 208 } else { | ||
423 | 209 INIT_LIST_HEAD(&q->list); | ||
424 | 210 q->flags = 0; | ||
425 | 211 q->user = user; | ||
426 | 212 } | ||
427 | 213 | ||
428 | 214 return q; | ||
429 | 215 } | ||
430 | |||
431 | We see that this function initializes ``q->list``, ``q->flags``, and | ||
432 | ``q->user``. It seems that now is the time to look at the definition of | ||
433 | ``struct sigqueue``, e.g.:: | ||
434 | |||
435 | 14 struct sigqueue { | ||
436 | 15 struct list_head list; | ||
437 | 16 int flags; | ||
438 | 17 siginfo_t info; | ||
439 | 18 struct user_struct *user; | ||
440 | 19 }; | ||
441 | |||
442 | And, you might remember, it was a ``memcpy()`` on ``&first->info`` that | ||
443 | caused the warning, so this makes perfect sense. It also seems reasonable | ||
444 | to assume that it is the caller of ``__sigqueue_alloc()`` that has the | ||
445 | responsibility of filling out (initializing) this member. | ||
446 | |||
447 | But just which fields of the struct were uninitialized? Let's look at | ||
448 | kmemcheck's report again:: | ||
449 | |||
450 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
451 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
452 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
453 | ^ | ||
454 | |||
455 | These first two lines are the memory dump of the memory object itself, and | ||
456 | the shadow bytemap, respectively. The memory object itself is in this case | ||
457 | ``&first->info``. Just beware that the start of this dump is NOT the start | ||
458 | of the object itself! The position of the caret (^) corresponds with the | ||
459 | address of the read (ffff88003e4a2024). | ||
460 | |||
461 | The shadow bytemap dump legend is as follows: | ||
462 | |||
463 | - i: initialized | ||
464 | - u: uninitialized | ||
465 | - a: unallocated (memory has been allocated by the slab layer, but has not | ||
466 | yet been handed off to anybody) | ||
467 | - f: freed (memory has been allocated by the slab layer, but has been freed | ||
468 | by the previous owner) | ||
469 | |||
470 | In order to figure out where (relative to the start of the object) the | ||
471 | uninitialized memory was located, we have to look at the disassembly. For | ||
472 | that, we'll need the RIP address again:: | ||
473 | |||
474 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
475 | |||
476 | $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: | ||
477 | ffffffff8104edc8: mov %r8,0x8(%r8) | ||
478 | ffffffff8104edcc: test %r10d,%r10d | ||
479 | ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> | ||
480 | ffffffff8104edd5: mov %rax,%rdx | ||
481 | ffffffff8104edd8: mov $0xc,%ecx | ||
482 | ffffffff8104eddd: mov %r13,%rdi | ||
483 | ffffffff8104ede0: mov $0x30,%eax | ||
484 | ffffffff8104ede5: mov %rdx,%rsi | ||
485 | ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) | ||
486 | ffffffff8104edea: test $0x2,%al | ||
487 | ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> | ||
488 | ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) | ||
489 | ffffffff8104edf0: test $0x1,%al | ||
490 | ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> | ||
491 | ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) | ||
492 | ffffffff8104edf5: mov %r8,%rdi | ||
493 | ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> | ||
494 | |||
495 | As expected, it's the "``rep movsl``" instruction from the ``memcpy()`` | ||
496 | that causes the warning. We know about ``REP MOVSL`` that it uses the register | ||
497 | ``RCX`` to count the number of remaining iterations. By taking a look at the | ||
498 | register dump again (from the kmemcheck report), we can figure out how many | ||
499 | bytes were left to copy:: | ||
500 | |||
501 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
502 | |||
503 | By looking at the disassembly, we also see that ``%ecx`` is being loaded | ||
504 | with the value ``$0xc`` just before (ffffffff8104edd8), so we are very | ||
505 | lucky. Keep in mind that this is the number of iterations, not bytes. And | ||
506 | since this is a "long" operation, we need to multiply by 4 to get the | ||
507 | number of bytes. So this means that the uninitialized value was encountered | ||
508 | at 4 * (0xc - 0x9) = 12 bytes from the start of the object. | ||
509 | |||
510 | We can now try to figure out which field of the "``struct siginfo``" that | ||
511 | was not initialized. This is the beginning of the struct:: | ||
512 | |||
513 | 40 typedef struct siginfo { | ||
514 | 41 int si_signo; | ||
515 | 42 int si_errno; | ||
516 | 43 int si_code; | ||
517 | 44 | ||
518 | 45 union { | ||
519 | .. | ||
520 | 92 } _sifields; | ||
521 | 93 } siginfo_t; | ||
522 | |||
523 | On 64-bit, the int is 4 bytes long, so it must the union member that has | ||
524 | not been initialized. We can verify this using gdb:: | ||
525 | |||
526 | $ gdb vmlinux | ||
527 | ... | ||
528 | (gdb) p &((struct siginfo *) 0)->_sifields | ||
529 | $1 = (union {...} *) 0x10 | ||
530 | |||
531 | Actually, it seems that the union member is located at offset 0x10 -- which | ||
532 | means that gcc has inserted 4 bytes of padding between the members ``si_code`` | ||
533 | and ``_sifields``. We can now get a fuller picture of the memory dump:: | ||
534 | |||
535 | _----------------------------=> si_code | ||
536 | / _--------------------=> (padding) | ||
537 | | / _------------=> _sifields(._kill._pid) | ||
538 | | | / _----=> _sifields(._kill._uid) | ||
539 | | | | / | ||
540 | -------|-------|-------|-------| | ||
541 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
542 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
543 | |||
544 | This allows us to realize another important fact: ``si_code`` contains the | ||
545 | value 0x80. Remember that x86 is little endian, so the first 4 bytes | ||
546 | "80000000" are really the number 0x00000080. With a bit of research, we | ||
547 | find that this is actually the constant ``SI_KERNEL`` defined in | ||
548 | ``include/asm-generic/siginfo.h``:: | ||
549 | |||
550 | 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ | ||
551 | |||
552 | This macro is used in exactly one place in the x86 kernel: In ``send_signal()`` | ||
553 | in ``kernel/signal.c``:: | ||
554 | |||
555 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
556 | 817 int group) | ||
557 | 818 { | ||
558 | ... | ||
559 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
560 | ... | ||
561 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
562 | 852 (is_si_special(info) || | ||
563 | 853 info->si_code >= 0))); | ||
564 | 854 if (q) { | ||
565 | 855 list_add_tail(&q->list, &pending->list); | ||
566 | 856 switch ((unsigned long) info) { | ||
567 | ... | ||
568 | 865 case (unsigned long) SEND_SIG_PRIV: | ||
569 | 866 q->info.si_signo = sig; | ||
570 | 867 q->info.si_errno = 0; | ||
571 | 868 q->info.si_code = SI_KERNEL; | ||
572 | 869 q->info.si_pid = 0; | ||
573 | 870 q->info.si_uid = 0; | ||
574 | 871 break; | ||
575 | ... | ||
576 | 890 } | ||
577 | |||
578 | Not only does this match with the ``.si_code`` member, it also matches the place | ||
579 | we found earlier when looking for where siginfo_t objects are enqueued on the | ||
580 | ``shared_pending`` list. | ||
581 | |||
582 | So to sum up: It seems that it is the padding introduced by the compiler | ||
583 | between two struct fields that is uninitialized, and this gets reported when | ||
584 | we do a ``memcpy()`` on the struct. This means that we have identified a false | ||
585 | positive warning. | ||
586 | |||
587 | Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls | ||
588 | when both the source and destination addresses are tracked. (Instead, we copy | ||
589 | the shadow bytemap as well). In this case, the destination address clearly | ||
590 | was not tracked. We can dig a little deeper into the stack trace from above:: | ||
591 | |||
592 | arch/x86/kernel/signal.c:805 | ||
593 | arch/x86/kernel/signal.c:871 | ||
594 | arch/x86/kernel/entry_64.S:694 | ||
595 | |||
596 | And we clearly see that the destination siginfo object is located on the | ||
597 | stack:: | ||
598 | |||
599 | 782 static void do_signal(struct pt_regs *regs) | ||
600 | 783 { | ||
601 | 784 struct k_sigaction ka; | ||
602 | 785 siginfo_t info; | ||
603 | ... | ||
604 | 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
605 | ... | ||
606 | 854 } | ||
607 | |||
608 | And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the | ||
609 | destination argument. | ||
610 | |||
611 | Now, even though we didn't find an actual error here, the example is still a | ||
612 | good one, because it shows how one would go about to find out what the report | ||
613 | was all about. | ||
614 | |||
615 | |||
616 | Annotating false positives | ||
617 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
618 | |||
619 | There are a few different ways to make annotations in the source code that | ||
620 | will keep kmemcheck from checking and reporting certain allocations. Here | ||
621 | they are: | ||
622 | |||
623 | - ``__GFP_NOTRACK_FALSE_POSITIVE`` | ||
624 | This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()`` | ||
625 | (therefore also to other functions that end up calling one of | ||
626 | these) to indicate that the allocation should not be tracked | ||
627 | because it would lead to a false positive report. This is a "big | ||
628 | hammer" way of silencing kmemcheck; after all, even if the false | ||
629 | positive pertains to particular field in a struct, for example, we | ||
630 | will now lose the ability to find (real) errors in other parts of | ||
631 | the same struct. | ||
632 | |||
633 | Example:: | ||
634 | |||
635 | /* No warnings will ever trigger on accessing any part of x */ | ||
636 | x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); | ||
637 | |||
638 | - ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and | ||
639 | ``kmemcheck_annotate_bitfield(ptr, name)`` | ||
640 | The first two of these three macros can be used inside struct | ||
641 | definitions to signal, respectively, the beginning and end of a | ||
642 | bitfield. Additionally, this will assign the bitfield a name, which | ||
643 | is given as an argument to the macros. | ||
644 | |||
645 | Having used these markers, one can later use | ||
646 | kmemcheck_annotate_bitfield() at the point of allocation, to indicate | ||
647 | which parts of the allocation is part of a bitfield. | ||
648 | |||
649 | Example:: | ||
650 | |||
651 | struct foo { | ||
652 | int x; | ||
653 | |||
654 | kmemcheck_bitfield_begin(flags); | ||
655 | int flag_a:1; | ||
656 | int flag_b:1; | ||
657 | kmemcheck_bitfield_end(flags); | ||
658 | |||
659 | int y; | ||
660 | }; | ||
661 | |||
662 | struct foo *x = kmalloc(sizeof *x); | ||
663 | |||
664 | /* No warnings will trigger on accessing the bitfield of x */ | ||
665 | kmemcheck_annotate_bitfield(x, flags); | ||
666 | |||
667 | Note that ``kmemcheck_annotate_bitfield()`` can be used even before the | ||
668 | return value of ``kmalloc()`` is checked -- in other words, passing NULL | ||
669 | as the first argument is legal (and will do nothing). | ||
670 | |||
671 | |||
672 | Reporting errors | ||
673 | ---------------- | ||
674 | |||
675 | As we have seen, kmemcheck will produce false positive reports. Therefore, it | ||
676 | is not very wise to blindly post kmemcheck warnings to mailing lists and | ||
677 | maintainers. Instead, I encourage maintainers and developers to find errors | ||
678 | in their own code. If you get a warning, you can try to work around it, try | ||
679 | to figure out if it's a real error or not, or simply ignore it. Most | ||
680 | developers know their own code and will quickly and efficiently determine the | ||
681 | root cause of a kmemcheck report. This is therefore also the most efficient | ||
682 | way to work with kmemcheck. | ||
683 | |||
684 | That said, we (the kmemcheck maintainers) will always be on the lookout for | ||
685 | false positives that we can annotate and silence. So whatever you find, | ||
686 | please drop us a note privately! Kernel configs and steps to reproduce (if | ||
687 | available) are of course a great help too. | ||
688 | |||
689 | Happy hacking! | ||
690 | |||
691 | |||
692 | Technical description | ||
693 | --------------------- | ||
694 | |||
695 | kmemcheck works by marking memory pages non-present. This means that whenever | ||
696 | somebody attempts to access the page, a page fault is generated. The page | ||
697 | fault handler notices that the page was in fact only hidden, and so it calls | ||
698 | on the kmemcheck code to make further investigations. | ||
699 | |||
700 | When the investigations are completed, kmemcheck "shows" the page by marking | ||
701 | it present (as it would be under normal circumstances). This way, the | ||
702 | interrupted code can continue as usual. | ||
703 | |||
704 | But after the instruction has been executed, we should hide the page again, so | ||
705 | that we can catch the next access too! Now kmemcheck makes use of a debugging | ||
706 | feature of the processor, namely single-stepping. When the processor has | ||
707 | finished the one instruction that generated the memory access, a debug | ||
708 | exception is raised. From here, we simply hide the page again and continue | ||
709 | execution, this time with the single-stepping feature turned off. | ||
710 | |||
711 | kmemcheck requires some assistance from the memory allocator in order to work. | ||
712 | The memory allocator needs to | ||
713 | |||
714 | 1. Tell kmemcheck about newly allocated pages and pages that are about to | ||
715 | be freed. This allows kmemcheck to set up and tear down the shadow memory | ||
716 | for the pages in question. The shadow memory stores the status of each | ||
717 | byte in the allocation proper, e.g. whether it is initialized or | ||
718 | uninitialized. | ||
719 | |||
720 | 2. Tell kmemcheck which parts of memory should be marked uninitialized. | ||
721 | There are actually a few more states, such as "not yet allocated" and | ||
722 | "recently freed". | ||
723 | |||
724 | If a slab cache is set up using the SLAB_NOTRACK flag, it will never return | ||
725 | memory that can take page faults because of kmemcheck. | ||
726 | |||
727 | If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still | ||
728 | request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. | ||
729 | This does not prevent the page faults from occurring, however, but marks the | ||
730 | object in question as being initialized so that no warnings will ever be | ||
731 | produced for this object. | ||
732 | |||
733 | Currently, the SLAB and SLUB allocators are supported by kmemcheck. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 7e9c887ad951..ac814d3dd1c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7688,16 +7688,6 @@ F: include/linux/kdb.h | |||
7688 | F: include/linux/kgdb.h | 7688 | F: include/linux/kgdb.h |
7689 | F: kernel/debug/ | 7689 | F: kernel/debug/ |
7690 | 7690 | ||
7691 | KMEMCHECK | ||
7692 | M: Vegard Nossum <vegardno@ifi.uio.no> | ||
7693 | M: Pekka Enberg <penberg@kernel.org> | ||
7694 | S: Maintained | ||
7695 | F: Documentation/dev-tools/kmemcheck.rst | ||
7696 | F: arch/x86/include/asm/kmemcheck.h | ||
7697 | F: arch/x86/mm/kmemcheck/ | ||
7698 | F: include/linux/kmemcheck.h | ||
7699 | F: mm/kmemcheck.c | ||
7700 | |||
7701 | KMEMLEAK | 7691 | KMEMLEAK |
7702 | M: Catalin Marinas <catalin.marinas@arm.com> | 7692 | M: Catalin Marinas <catalin.marinas@arm.com> |
7703 | S: Maintained | 7693 | S: Maintained |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f08977d82ca0..cb678192da4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -112,7 +112,6 @@ config X86 | |||
112 | select HAVE_ARCH_JUMP_LABEL | 112 | select HAVE_ARCH_JUMP_LABEL |
113 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | 113 | select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
114 | select HAVE_ARCH_KGDB | 114 | select HAVE_ARCH_KGDB |
115 | select HAVE_ARCH_KMEMCHECK | ||
116 | select HAVE_ARCH_MMAP_RND_BITS if MMU | 115 | select HAVE_ARCH_MMAP_RND_BITS if MMU |
117 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT | 116 | select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT |
118 | select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT | 117 | select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT |
@@ -1430,7 +1429,7 @@ config ARCH_DMA_ADDR_T_64BIT | |||
1430 | 1429 | ||
1431 | config X86_DIRECT_GBPAGES | 1430 | config X86_DIRECT_GBPAGES |
1432 | def_bool y | 1431 | def_bool y |
1433 | depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK | 1432 | depends on X86_64 && !DEBUG_PAGEALLOC |
1434 | ---help--- | 1433 | ---help--- |
1435 | Certain kernel features effectively disable kernel | 1434 | Certain kernel features effectively disable kernel |
1436 | linear 1 GB mappings (even if the CPU otherwise | 1435 | linear 1 GB mappings (even if the CPU otherwise |
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h index 945a0337fbcf..ea32a7d3cf1b 100644 --- a/arch/x86/include/asm/kmemcheck.h +++ b/arch/x86/include/asm/kmemcheck.h | |||
@@ -1,43 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ASM_X86_KMEMCHECK_H | ||
3 | #define ASM_X86_KMEMCHECK_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | #include <asm/ptrace.h> | ||
7 | |||
8 | #ifdef CONFIG_KMEMCHECK | ||
9 | bool kmemcheck_active(struct pt_regs *regs); | ||
10 | |||
11 | void kmemcheck_show(struct pt_regs *regs); | ||
12 | void kmemcheck_hide(struct pt_regs *regs); | ||
13 | |||
14 | bool kmemcheck_fault(struct pt_regs *regs, | ||
15 | unsigned long address, unsigned long error_code); | ||
16 | bool kmemcheck_trap(struct pt_regs *regs); | ||
17 | #else | ||
18 | static inline bool kmemcheck_active(struct pt_regs *regs) | ||
19 | { | ||
20 | return false; | ||
21 | } | ||
22 | |||
23 | static inline void kmemcheck_show(struct pt_regs *regs) | ||
24 | { | ||
25 | } | ||
26 | |||
27 | static inline void kmemcheck_hide(struct pt_regs *regs) | ||
28 | { | ||
29 | } | ||
30 | |||
31 | static inline bool kmemcheck_fault(struct pt_regs *regs, | ||
32 | unsigned long address, unsigned long error_code) | ||
33 | { | ||
34 | return false; | ||
35 | } | ||
36 | |||
37 | static inline bool kmemcheck_trap(struct pt_regs *regs) | ||
38 | { | ||
39 | return false; | ||
40 | } | ||
41 | #endif /* CONFIG_KMEMCHECK */ | ||
42 | |||
43 | #endif | ||
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 076502241eae..55d392c6bd29 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
179 | * No 3D Now! | 179 | * No 3D Now! |
180 | */ | 180 | */ |
181 | 181 | ||
182 | #ifndef CONFIG_KMEMCHECK | ||
183 | |||
184 | #if (__GNUC__ >= 4) | 182 | #if (__GNUC__ >= 4) |
185 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) | 183 | #define memcpy(t, f, n) __builtin_memcpy(t, f, n) |
186 | #else | 184 | #else |
@@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
189 | ? __constant_memcpy((t), (f), (n)) \ | 187 | ? __constant_memcpy((t), (f), (n)) \ |
190 | : __memcpy((t), (f), (n))) | 188 | : __memcpy((t), (f), (n))) |
191 | #endif | 189 | #endif |
192 | #else | ||
193 | /* | ||
194 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
195 | * because it means that we know both memory operands in advance. | ||
196 | */ | ||
197 | #define memcpy(t, f, n) __memcpy((t), (f), (n)) | ||
198 | #endif | ||
199 | 190 | ||
200 | #endif | 191 | #endif |
201 | #endif /* !CONFIG_FORTIFY_SOURCE */ | 192 | #endif /* !CONFIG_FORTIFY_SOURCE */ |
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 0b1b4445f4c5..533f74c300c2 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len); | |||
33 | extern void *__memcpy(void *to, const void *from, size_t len); | 33 | extern void *__memcpy(void *to, const void *from, size_t len); |
34 | 34 | ||
35 | #ifndef CONFIG_FORTIFY_SOURCE | 35 | #ifndef CONFIG_FORTIFY_SOURCE |
36 | #ifndef CONFIG_KMEMCHECK | ||
37 | #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 | 36 | #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 |
38 | #define memcpy(dst, src, len) \ | 37 | #define memcpy(dst, src, len) \ |
39 | ({ \ | 38 | ({ \ |
@@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len); | |||
46 | __ret; \ | 45 | __ret; \ |
47 | }) | 46 | }) |
48 | #endif | 47 | #endif |
49 | #else | ||
50 | /* | ||
51 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
52 | * because it means that we know both memory operands in advance. | ||
53 | */ | ||
54 | #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) | ||
55 | #endif | ||
56 | #endif /* !CONFIG_FORTIFY_SOURCE */ | 48 | #endif /* !CONFIG_FORTIFY_SOURCE */ |
57 | 49 | ||
58 | #define __HAVE_ARCH_MEMSET | 50 | #define __HAVE_ARCH_MEMSET |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b720dacac051..b1af22073e28 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
187 | if (c->x86 == 6 && c->x86_model < 15) | 187 | if (c->x86 == 6 && c->x86_model < 15) |
188 | clear_cpu_cap(c, X86_FEATURE_PAT); | 188 | clear_cpu_cap(c, X86_FEATURE_PAT); |
189 | 189 | ||
190 | #ifdef CONFIG_KMEMCHECK | ||
191 | /* | ||
192 | * P4s have a "fast strings" feature which causes single- | ||
193 | * stepping REP instructions to only generate a #DB on | ||
194 | * cache-line boundaries. | ||
195 | * | ||
196 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | ||
197 | * (model 2) with the same problem. | ||
198 | */ | ||
199 | if (c->x86 == 15) | ||
200 | if (msr_clear_bit(MSR_IA32_MISC_ENABLE, | ||
201 | MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) | ||
202 | pr_info("kmemcheck: Disabling fast string operations\n"); | ||
203 | #endif | ||
204 | |||
205 | /* | 190 | /* |
206 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, | 191 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, |
207 | * clear the fast string and enhanced fast string CPU capabilities. | 192 | * clear the fast string and enhanced fast string CPU capabilities. |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..8e13b8cc6bed 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o | |||
29 | 29 | ||
30 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 30 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
31 | 31 | ||
32 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
33 | |||
34 | KASAN_SANITIZE_kasan_init_$(BITS).o := n | 32 | KASAN_SANITIZE_kasan_init_$(BITS).o := n |
35 | obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o | 33 | obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o |
36 | 34 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ef94620ceb8a..6fdf91ef130a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -163,12 +163,11 @@ static int page_size_mask; | |||
163 | static void __init probe_page_size_mask(void) | 163 | static void __init probe_page_size_mask(void) |
164 | { | 164 | { |
165 | /* | 165 | /* |
166 | * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will | 166 | * For pagealloc debugging, identity mapping will use small pages. |
167 | * use small pages. | ||
168 | * This will simplify cpa(), which otherwise needs to support splitting | 167 | * This will simplify cpa(), which otherwise needs to support splitting |
169 | * large pages into small in interrupt context, etc. | 168 | * large pages into small in interrupt context, etc. |
170 | */ | 169 | */ |
171 | if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) | 170 | if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) |
172 | page_size_mask |= 1 << PG_LEVEL_2M; | 171 | page_size_mask |= 1 << PG_LEVEL_2M; |
173 | else | 172 | else |
174 | direct_gbpages = 0; | 173 | direct_gbpages = 0; |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile deleted file mode 100644 index 520b3bce4095..000000000000 --- a/arch/x86/mm/kmemcheck/Makefile +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | ||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 872ec4159a68..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -1,228 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/interrupt.h> | ||
3 | #include <linux/kdebug.h> | ||
4 | #include <linux/kmemcheck.h> | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/ptrace.h> | ||
8 | #include <linux/stacktrace.h> | ||
9 | #include <linux/string.h> | ||
10 | |||
11 | #include "error.h" | ||
12 | #include "shadow.h" | ||
13 | |||
14 | enum kmemcheck_error_type { | ||
15 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
16 | KMEMCHECK_ERROR_BUG, | ||
17 | }; | ||
18 | |||
19 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
20 | |||
21 | struct kmemcheck_error { | ||
22 | enum kmemcheck_error_type type; | ||
23 | |||
24 | union { | ||
25 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
26 | struct { | ||
27 | /* Kind of access that caused the error */ | ||
28 | enum kmemcheck_shadow state; | ||
29 | /* Address and size of the erroneous read */ | ||
30 | unsigned long address; | ||
31 | unsigned int size; | ||
32 | }; | ||
33 | }; | ||
34 | |||
35 | struct pt_regs regs; | ||
36 | struct stack_trace trace; | ||
37 | unsigned long trace_entries[32]; | ||
38 | |||
39 | /* We compress it to a char. */ | ||
40 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
41 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | * Create a ring queue of errors to output. We can't call printk() directly | ||
46 | * from the kmemcheck traps, since this may call the console drivers and | ||
47 | * result in a recursive fault. | ||
48 | */ | ||
49 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
50 | static unsigned int error_count; | ||
51 | static unsigned int error_rd; | ||
52 | static unsigned int error_wr; | ||
53 | static unsigned int error_missed_count; | ||
54 | |||
55 | static struct kmemcheck_error *error_next_wr(void) | ||
56 | { | ||
57 | struct kmemcheck_error *e; | ||
58 | |||
59 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
60 | ++error_missed_count; | ||
61 | return NULL; | ||
62 | } | ||
63 | |||
64 | e = &error_fifo[error_wr]; | ||
65 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
66 | error_wr = 0; | ||
67 | ++error_count; | ||
68 | return e; | ||
69 | } | ||
70 | |||
71 | static struct kmemcheck_error *error_next_rd(void) | ||
72 | { | ||
73 | struct kmemcheck_error *e; | ||
74 | |||
75 | if (error_count == 0) | ||
76 | return NULL; | ||
77 | |||
78 | e = &error_fifo[error_rd]; | ||
79 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
80 | error_rd = 0; | ||
81 | --error_count; | ||
82 | return e; | ||
83 | } | ||
84 | |||
85 | void kmemcheck_error_recall(void) | ||
86 | { | ||
87 | static const char *desc[] = { | ||
88 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
89 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
90 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
91 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
92 | }; | ||
93 | |||
94 | static const char short_desc[] = { | ||
95 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
96 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
97 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
98 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
99 | }; | ||
100 | |||
101 | struct kmemcheck_error *e; | ||
102 | unsigned int i; | ||
103 | |||
104 | e = error_next_rd(); | ||
105 | if (!e) | ||
106 | return; | ||
107 | |||
108 | switch (e->type) { | ||
109 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
110 | printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
112 | desc[e->state] : "(invalid shadow state)", | ||
113 | (void *) e->address); | ||
114 | |||
115 | printk(KERN_WARNING); | ||
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
117 | printk(KERN_CONT "%02x", e->memory_copy[i]); | ||
118 | printk(KERN_CONT "\n"); | ||
119 | |||
120 | printk(KERN_WARNING); | ||
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
123 | printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); | ||
124 | else | ||
125 | printk(KERN_CONT " ?"); | ||
126 | } | ||
127 | printk(KERN_CONT "\n"); | ||
128 | printk(KERN_WARNING "%*c\n", 2 + 2 | ||
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
130 | break; | ||
131 | case KMEMCHECK_ERROR_BUG: | ||
132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | __show_regs(&e->regs, 1); | ||
137 | print_stack_trace(&e->trace, 0); | ||
138 | } | ||
139 | |||
140 | static void do_wakeup(unsigned long data) | ||
141 | { | ||
142 | while (error_count > 0) | ||
143 | kmemcheck_error_recall(); | ||
144 | |||
145 | if (error_missed_count > 0) { | ||
146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
147 | "the queue was too small\n", error_missed_count); | ||
148 | error_missed_count = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
153 | |||
154 | /* | ||
155 | * Save the context of an error report. | ||
156 | */ | ||
157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
159 | { | ||
160 | static unsigned long prev_ip; | ||
161 | |||
162 | struct kmemcheck_error *e; | ||
163 | void *shadow_copy; | ||
164 | void *memory_copy; | ||
165 | |||
166 | /* Don't report several adjacent errors from the same EIP. */ | ||
167 | if (regs->ip == prev_ip) | ||
168 | return; | ||
169 | prev_ip = regs->ip; | ||
170 | |||
171 | e = error_next_wr(); | ||
172 | if (!e) | ||
173 | return; | ||
174 | |||
175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
176 | |||
177 | e->state = state; | ||
178 | e->address = address; | ||
179 | e->size = size; | ||
180 | |||
181 | /* Save regs */ | ||
182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
183 | |||
184 | /* Save stack trace */ | ||
185 | e->trace.nr_entries = 0; | ||
186 | e->trace.entries = e->trace_entries; | ||
187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
188 | e->trace.skip = 0; | ||
189 | save_stack_trace_regs(regs, &e->trace); | ||
190 | |||
191 | /* Round address down to nearest 16 bytes */ | ||
192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
194 | BUG_ON(!shadow_copy); | ||
195 | |||
196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
197 | |||
198 | kmemcheck_show_addr(address); | ||
199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
201 | kmemcheck_hide_addr(address); | ||
202 | |||
203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Save the context of a kmemcheck bug. | ||
208 | */ | ||
209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
210 | { | ||
211 | struct kmemcheck_error *e; | ||
212 | |||
213 | e = error_next_wr(); | ||
214 | if (!e) | ||
215 | return; | ||
216 | |||
217 | e->type = KMEMCHECK_ERROR_BUG; | ||
218 | |||
219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
220 | |||
221 | e->trace.nr_entries = 0; | ||
222 | e->trace.entries = e->trace_entries; | ||
223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
224 | e->trace.skip = 1; | ||
225 | save_stack_trace(&e->trace); | ||
226 | |||
227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h index 39f80d7a874d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/error.h +++ b/arch/x86/mm/kmemcheck/error.h | |||
@@ -1,16 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
4 | |||
5 | #include <linux/ptrace.h> | ||
6 | |||
7 | #include "shadow.h" | ||
8 | |||
9 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
10 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
11 | |||
12 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
13 | |||
14 | void kmemcheck_error_recall(void); | ||
15 | |||
16 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c deleted file mode 100644 index 4515bae36bbe..000000000000 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ /dev/null | |||
@@ -1,658 +0,0 @@ | |||
1 | /** | ||
2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2) as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kmemcheck.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/page-flags.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/ptrace.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/types.h> | ||
22 | |||
23 | #include <asm/cacheflush.h> | ||
24 | #include <asm/kmemcheck.h> | ||
25 | #include <asm/pgtable.h> | ||
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | #include "error.h" | ||
29 | #include "opcode.h" | ||
30 | #include "pte.h" | ||
31 | #include "selftest.h" | ||
32 | #include "shadow.h" | ||
33 | |||
34 | |||
35 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
36 | # define KMEMCHECK_ENABLED 0 | ||
37 | #endif | ||
38 | |||
39 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
40 | # define KMEMCHECK_ENABLED 1 | ||
41 | #endif | ||
42 | |||
43 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
44 | # define KMEMCHECK_ENABLED 2 | ||
45 | #endif | ||
46 | |||
47 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
48 | |||
49 | int __init kmemcheck_init(void) | ||
50 | { | ||
51 | #ifdef CONFIG_SMP | ||
52 | /* | ||
53 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
54 | * runs before SMP is set up. | ||
55 | */ | ||
56 | if (setup_max_cpus > 1) { | ||
57 | printk(KERN_INFO | ||
58 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
59 | setup_max_cpus = 1; | ||
60 | } | ||
61 | #endif | ||
62 | |||
63 | if (!kmemcheck_selftest()) { | ||
64 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
65 | kmemcheck_enabled = 0; | ||
66 | return -EINVAL; | ||
67 | } | ||
68 | |||
69 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | early_initcall(kmemcheck_init); | ||
74 | |||
75 | /* | ||
76 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
77 | */ | ||
78 | static int __init param_kmemcheck(char *str) | ||
79 | { | ||
80 | int val; | ||
81 | int ret; | ||
82 | |||
83 | if (!str) | ||
84 | return -EINVAL; | ||
85 | |||
86 | ret = kstrtoint(str, 0, &val); | ||
87 | if (ret) | ||
88 | return ret; | ||
89 | kmemcheck_enabled = val; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | early_param("kmemcheck", param_kmemcheck); | ||
94 | |||
95 | int kmemcheck_show_addr(unsigned long address) | ||
96 | { | ||
97 | pte_t *pte; | ||
98 | |||
99 | pte = kmemcheck_pte_lookup(address); | ||
100 | if (!pte) | ||
101 | return 0; | ||
102 | |||
103 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
104 | __flush_tlb_one(address); | ||
105 | return 1; | ||
106 | } | ||
107 | |||
108 | int kmemcheck_hide_addr(unsigned long address) | ||
109 | { | ||
110 | pte_t *pte; | ||
111 | |||
112 | pte = kmemcheck_pte_lookup(address); | ||
113 | if (!pte) | ||
114 | return 0; | ||
115 | |||
116 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
117 | __flush_tlb_one(address); | ||
118 | return 1; | ||
119 | } | ||
120 | |||
121 | struct kmemcheck_context { | ||
122 | bool busy; | ||
123 | int balance; | ||
124 | |||
125 | /* | ||
126 | * There can be at most two memory operands to an instruction, but | ||
127 | * each address can cross a page boundary -- so we may need up to | ||
128 | * four addresses that must be hidden/revealed for each fault. | ||
129 | */ | ||
130 | unsigned long addr[4]; | ||
131 | unsigned long n_addrs; | ||
132 | unsigned long flags; | ||
133 | |||
134 | /* Data size of the instruction that caused a fault. */ | ||
135 | unsigned int size; | ||
136 | }; | ||
137 | |||
138 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
139 | |||
140 | bool kmemcheck_active(struct pt_regs *regs) | ||
141 | { | ||
142 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
143 | |||
144 | return data->balance > 0; | ||
145 | } | ||
146 | |||
147 | /* Save an address that needs to be shown/hidden */ | ||
148 | static void kmemcheck_save_addr(unsigned long addr) | ||
149 | { | ||
150 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
151 | |||
152 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
153 | data->addr[data->n_addrs++] = addr; | ||
154 | } | ||
155 | |||
156 | static unsigned int kmemcheck_show_all(void) | ||
157 | { | ||
158 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
159 | unsigned int i; | ||
160 | unsigned int n; | ||
161 | |||
162 | n = 0; | ||
163 | for (i = 0; i < data->n_addrs; ++i) | ||
164 | n += kmemcheck_show_addr(data->addr[i]); | ||
165 | |||
166 | return n; | ||
167 | } | ||
168 | |||
169 | static unsigned int kmemcheck_hide_all(void) | ||
170 | { | ||
171 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
172 | unsigned int i; | ||
173 | unsigned int n; | ||
174 | |||
175 | n = 0; | ||
176 | for (i = 0; i < data->n_addrs; ++i) | ||
177 | n += kmemcheck_hide_addr(data->addr[i]); | ||
178 | |||
179 | return n; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Called from the #PF handler. | ||
184 | */ | ||
185 | void kmemcheck_show(struct pt_regs *regs) | ||
186 | { | ||
187 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
188 | |||
189 | BUG_ON(!irqs_disabled()); | ||
190 | |||
191 | if (unlikely(data->balance != 0)) { | ||
192 | kmemcheck_show_all(); | ||
193 | kmemcheck_error_save_bug(regs); | ||
194 | data->balance = 0; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * None of the addresses actually belonged to kmemcheck. Note that | ||
200 | * this is not an error. | ||
201 | */ | ||
202 | if (kmemcheck_show_all() == 0) | ||
203 | return; | ||
204 | |||
205 | ++data->balance; | ||
206 | |||
207 | /* | ||
208 | * The IF needs to be cleared as well, so that the faulting | ||
209 | * instruction can run "uninterrupted". Otherwise, we might take | ||
210 | * an interrupt and start executing that before we've had a chance | ||
211 | * to hide the page again. | ||
212 | * | ||
213 | * NOTE: In the rare case of multiple faults, we must not override | ||
214 | * the original flags: | ||
215 | */ | ||
216 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
217 | data->flags = regs->flags; | ||
218 | |||
219 | regs->flags |= X86_EFLAGS_TF; | ||
220 | regs->flags &= ~X86_EFLAGS_IF; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Called from the #DB handler. | ||
225 | */ | ||
226 | void kmemcheck_hide(struct pt_regs *regs) | ||
227 | { | ||
228 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
229 | int n; | ||
230 | |||
231 | BUG_ON(!irqs_disabled()); | ||
232 | |||
233 | if (unlikely(data->balance != 1)) { | ||
234 | kmemcheck_show_all(); | ||
235 | kmemcheck_error_save_bug(regs); | ||
236 | data->n_addrs = 0; | ||
237 | data->balance = 0; | ||
238 | |||
239 | if (!(data->flags & X86_EFLAGS_TF)) | ||
240 | regs->flags &= ~X86_EFLAGS_TF; | ||
241 | if (data->flags & X86_EFLAGS_IF) | ||
242 | regs->flags |= X86_EFLAGS_IF; | ||
243 | return; | ||
244 | } | ||
245 | |||
246 | if (kmemcheck_enabled) | ||
247 | n = kmemcheck_hide_all(); | ||
248 | else | ||
249 | n = kmemcheck_show_all(); | ||
250 | |||
251 | if (n == 0) | ||
252 | return; | ||
253 | |||
254 | --data->balance; | ||
255 | |||
256 | data->n_addrs = 0; | ||
257 | |||
258 | if (!(data->flags & X86_EFLAGS_TF)) | ||
259 | regs->flags &= ~X86_EFLAGS_TF; | ||
260 | if (data->flags & X86_EFLAGS_IF) | ||
261 | regs->flags |= X86_EFLAGS_IF; | ||
262 | } | ||
263 | |||
264 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
265 | { | ||
266 | unsigned int i; | ||
267 | |||
268 | for (i = 0; i < n; ++i) { | ||
269 | unsigned long address; | ||
270 | pte_t *pte; | ||
271 | unsigned int level; | ||
272 | |||
273 | address = (unsigned long) page_address(&p[i]); | ||
274 | pte = lookup_address(address, &level); | ||
275 | BUG_ON(!pte); | ||
276 | BUG_ON(level != PG_LEVEL_4K); | ||
277 | |||
278 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
279 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
280 | __flush_tlb_one(address); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | bool kmemcheck_page_is_tracked(struct page *p) | ||
285 | { | ||
286 | /* This will also check the "hidden" flag of the PTE. */ | ||
287 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
288 | } | ||
289 | |||
290 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
291 | { | ||
292 | unsigned int i; | ||
293 | |||
294 | for (i = 0; i < n; ++i) { | ||
295 | unsigned long address; | ||
296 | pte_t *pte; | ||
297 | unsigned int level; | ||
298 | |||
299 | address = (unsigned long) page_address(&p[i]); | ||
300 | pte = lookup_address(address, &level); | ||
301 | BUG_ON(!pte); | ||
302 | BUG_ON(level != PG_LEVEL_4K); | ||
303 | |||
304 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
305 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
306 | __flush_tlb_one(address); | ||
307 | } | ||
308 | } | ||
309 | |||
310 | /* Access may NOT cross page boundary */ | ||
311 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
312 | unsigned long addr, unsigned int size) | ||
313 | { | ||
314 | void *shadow; | ||
315 | enum kmemcheck_shadow status; | ||
316 | |||
317 | shadow = kmemcheck_shadow_lookup(addr); | ||
318 | if (!shadow) | ||
319 | return; | ||
320 | |||
321 | kmemcheck_save_addr(addr); | ||
322 | status = kmemcheck_shadow_test(shadow, size); | ||
323 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
324 | return; | ||
325 | |||
326 | if (kmemcheck_enabled) | ||
327 | kmemcheck_error_save(status, addr, size, regs); | ||
328 | |||
329 | if (kmemcheck_enabled == 2) | ||
330 | kmemcheck_enabled = 0; | ||
331 | |||
332 | /* Don't warn about it again. */ | ||
333 | kmemcheck_shadow_set(shadow, size); | ||
334 | } | ||
335 | |||
336 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
337 | { | ||
338 | enum kmemcheck_shadow status; | ||
339 | void *shadow; | ||
340 | |||
341 | shadow = kmemcheck_shadow_lookup(addr); | ||
342 | if (!shadow) | ||
343 | return true; | ||
344 | |||
345 | status = kmemcheck_shadow_test_all(shadow, size); | ||
346 | |||
347 | return status == KMEMCHECK_SHADOW_INITIALIZED; | ||
348 | } | ||
349 | |||
350 | /* Access may cross page boundary */ | ||
351 | static void kmemcheck_read(struct pt_regs *regs, | ||
352 | unsigned long addr, unsigned int size) | ||
353 | { | ||
354 | unsigned long page = addr & PAGE_MASK; | ||
355 | unsigned long next_addr = addr + size - 1; | ||
356 | unsigned long next_page = next_addr & PAGE_MASK; | ||
357 | |||
358 | if (likely(page == next_page)) { | ||
359 | kmemcheck_read_strict(regs, addr, size); | ||
360 | return; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * What we do is basically to split the access across the | ||
365 | * two pages and handle each part separately. Yes, this means | ||
366 | * that we may now see reads that are 3 + 5 bytes, for | ||
367 | * example (and if both are uninitialized, there will be two | ||
368 | * reports), but it makes the code a lot simpler. | ||
369 | */ | ||
370 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
371 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
372 | } | ||
373 | |||
374 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
375 | unsigned long addr, unsigned int size) | ||
376 | { | ||
377 | void *shadow; | ||
378 | |||
379 | shadow = kmemcheck_shadow_lookup(addr); | ||
380 | if (!shadow) | ||
381 | return; | ||
382 | |||
383 | kmemcheck_save_addr(addr); | ||
384 | kmemcheck_shadow_set(shadow, size); | ||
385 | } | ||
386 | |||
387 | static void kmemcheck_write(struct pt_regs *regs, | ||
388 | unsigned long addr, unsigned int size) | ||
389 | { | ||
390 | unsigned long page = addr & PAGE_MASK; | ||
391 | unsigned long next_addr = addr + size - 1; | ||
392 | unsigned long next_page = next_addr & PAGE_MASK; | ||
393 | |||
394 | if (likely(page == next_page)) { | ||
395 | kmemcheck_write_strict(regs, addr, size); | ||
396 | return; | ||
397 | } | ||
398 | |||
399 | /* See comment in kmemcheck_read(). */ | ||
400 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
401 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * Copying is hard. We have two addresses, each of which may be split across | ||
406 | * a page (and each page will have different shadow addresses). | ||
407 | */ | ||
408 | static void kmemcheck_copy(struct pt_regs *regs, | ||
409 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
410 | { | ||
411 | uint8_t shadow[8]; | ||
412 | enum kmemcheck_shadow status; | ||
413 | |||
414 | unsigned long page; | ||
415 | unsigned long next_addr; | ||
416 | unsigned long next_page; | ||
417 | |||
418 | uint8_t *x; | ||
419 | unsigned int i; | ||
420 | unsigned int n; | ||
421 | |||
422 | BUG_ON(size > sizeof(shadow)); | ||
423 | |||
424 | page = src_addr & PAGE_MASK; | ||
425 | next_addr = src_addr + size - 1; | ||
426 | next_page = next_addr & PAGE_MASK; | ||
427 | |||
428 | if (likely(page == next_page)) { | ||
429 | /* Same page */ | ||
430 | x = kmemcheck_shadow_lookup(src_addr); | ||
431 | if (x) { | ||
432 | kmemcheck_save_addr(src_addr); | ||
433 | for (i = 0; i < size; ++i) | ||
434 | shadow[i] = x[i]; | ||
435 | } else { | ||
436 | for (i = 0; i < size; ++i) | ||
437 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
438 | } | ||
439 | } else { | ||
440 | n = next_page - src_addr; | ||
441 | BUG_ON(n > sizeof(shadow)); | ||
442 | |||
443 | /* First page */ | ||
444 | x = kmemcheck_shadow_lookup(src_addr); | ||
445 | if (x) { | ||
446 | kmemcheck_save_addr(src_addr); | ||
447 | for (i = 0; i < n; ++i) | ||
448 | shadow[i] = x[i]; | ||
449 | } else { | ||
450 | /* Not tracked */ | ||
451 | for (i = 0; i < n; ++i) | ||
452 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
453 | } | ||
454 | |||
455 | /* Second page */ | ||
456 | x = kmemcheck_shadow_lookup(next_page); | ||
457 | if (x) { | ||
458 | kmemcheck_save_addr(next_page); | ||
459 | for (i = n; i < size; ++i) | ||
460 | shadow[i] = x[i - n]; | ||
461 | } else { | ||
462 | /* Not tracked */ | ||
463 | for (i = n; i < size; ++i) | ||
464 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
465 | } | ||
466 | } | ||
467 | |||
468 | page = dst_addr & PAGE_MASK; | ||
469 | next_addr = dst_addr + size - 1; | ||
470 | next_page = next_addr & PAGE_MASK; | ||
471 | |||
472 | if (likely(page == next_page)) { | ||
473 | /* Same page */ | ||
474 | x = kmemcheck_shadow_lookup(dst_addr); | ||
475 | if (x) { | ||
476 | kmemcheck_save_addr(dst_addr); | ||
477 | for (i = 0; i < size; ++i) { | ||
478 | x[i] = shadow[i]; | ||
479 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
480 | } | ||
481 | } | ||
482 | } else { | ||
483 | n = next_page - dst_addr; | ||
484 | BUG_ON(n > sizeof(shadow)); | ||
485 | |||
486 | /* First page */ | ||
487 | x = kmemcheck_shadow_lookup(dst_addr); | ||
488 | if (x) { | ||
489 | kmemcheck_save_addr(dst_addr); | ||
490 | for (i = 0; i < n; ++i) { | ||
491 | x[i] = shadow[i]; | ||
492 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /* Second page */ | ||
497 | x = kmemcheck_shadow_lookup(next_page); | ||
498 | if (x) { | ||
499 | kmemcheck_save_addr(next_page); | ||
500 | for (i = n; i < size; ++i) { | ||
501 | x[i - n] = shadow[i]; | ||
502 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
503 | } | ||
504 | } | ||
505 | } | ||
506 | |||
507 | status = kmemcheck_shadow_test(shadow, size); | ||
508 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
509 | return; | ||
510 | |||
511 | if (kmemcheck_enabled) | ||
512 | kmemcheck_error_save(status, src_addr, size, regs); | ||
513 | |||
514 | if (kmemcheck_enabled == 2) | ||
515 | kmemcheck_enabled = 0; | ||
516 | } | ||
517 | |||
518 | enum kmemcheck_method { | ||
519 | KMEMCHECK_READ, | ||
520 | KMEMCHECK_WRITE, | ||
521 | }; | ||
522 | |||
523 | static void kmemcheck_access(struct pt_regs *regs, | ||
524 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
525 | { | ||
526 | const uint8_t *insn; | ||
527 | const uint8_t *insn_primary; | ||
528 | unsigned int size; | ||
529 | |||
530 | struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); | ||
531 | |||
532 | /* Recursive fault -- ouch. */ | ||
533 | if (data->busy) { | ||
534 | kmemcheck_show_addr(fallback_address); | ||
535 | kmemcheck_error_save_bug(regs); | ||
536 | return; | ||
537 | } | ||
538 | |||
539 | data->busy = true; | ||
540 | |||
541 | insn = (const uint8_t *) regs->ip; | ||
542 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
543 | |||
544 | kmemcheck_opcode_decode(insn, &size); | ||
545 | |||
546 | switch (insn_primary[0]) { | ||
547 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
548 | /* AND, OR, XOR */ | ||
549 | /* | ||
550 | * Unfortunately, these instructions have to be excluded from | ||
551 | * our regular checking since they access only some (and not | ||
552 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
553 | */ | ||
554 | case 0x80: | ||
555 | case 0x81: | ||
556 | case 0x82: | ||
557 | case 0x83: | ||
558 | switch ((insn_primary[1] >> 3) & 7) { | ||
559 | /* OR */ | ||
560 | case 1: | ||
561 | /* AND */ | ||
562 | case 4: | ||
563 | /* XOR */ | ||
564 | case 6: | ||
565 | kmemcheck_write(regs, fallback_address, size); | ||
566 | goto out; | ||
567 | |||
568 | /* ADD */ | ||
569 | case 0: | ||
570 | /* ADC */ | ||
571 | case 2: | ||
572 | /* SBB */ | ||
573 | case 3: | ||
574 | /* SUB */ | ||
575 | case 5: | ||
576 | /* CMP */ | ||
577 | case 7: | ||
578 | break; | ||
579 | } | ||
580 | break; | ||
581 | #endif | ||
582 | |||
583 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
584 | case 0xa4: | ||
585 | case 0xa5: | ||
586 | /* | ||
587 | * These instructions are special because they take two | ||
588 | * addresses, but we only get one page fault. | ||
589 | */ | ||
590 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
591 | goto out; | ||
592 | |||
593 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
594 | case 0xa6: | ||
595 | case 0xa7: | ||
596 | kmemcheck_read(regs, regs->si, size); | ||
597 | kmemcheck_read(regs, regs->di, size); | ||
598 | goto out; | ||
599 | } | ||
600 | |||
601 | /* | ||
602 | * If the opcode isn't special in any way, we use the data from the | ||
603 | * page fault handler to determine the address and type of memory | ||
604 | * access. | ||
605 | */ | ||
606 | switch (fallback_method) { | ||
607 | case KMEMCHECK_READ: | ||
608 | kmemcheck_read(regs, fallback_address, size); | ||
609 | goto out; | ||
610 | case KMEMCHECK_WRITE: | ||
611 | kmemcheck_write(regs, fallback_address, size); | ||
612 | goto out; | ||
613 | } | ||
614 | |||
615 | out: | ||
616 | data->busy = false; | ||
617 | } | ||
618 | |||
619 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
620 | unsigned long error_code) | ||
621 | { | ||
622 | pte_t *pte; | ||
623 | |||
624 | /* | ||
625 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
626 | * mode or non-kernel code segments will _never_ access kernel | ||
627 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
628 | * invoking kmemcheck for PnP BIOS calls. | ||
629 | */ | ||
630 | if (regs->flags & X86_VM_MASK) | ||
631 | return false; | ||
632 | if (regs->cs != __KERNEL_CS) | ||
633 | return false; | ||
634 | |||
635 | pte = kmemcheck_pte_lookup(address); | ||
636 | if (!pte) | ||
637 | return false; | ||
638 | |||
639 | WARN_ON_ONCE(in_nmi()); | ||
640 | |||
641 | if (error_code & 2) | ||
642 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
643 | else | ||
644 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
645 | |||
646 | kmemcheck_show(regs); | ||
647 | return true; | ||
648 | } | ||
649 | |||
650 | bool kmemcheck_trap(struct pt_regs *regs) | ||
651 | { | ||
652 | if (!kmemcheck_active(regs)) | ||
653 | return false; | ||
654 | |||
655 | /* We're done. */ | ||
656 | kmemcheck_hide(regs); | ||
657 | return true; | ||
658 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index df8109ddf7fe..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -1,107 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/types.h> | ||
3 | |||
4 | #include "opcode.h" | ||
5 | |||
6 | static bool opcode_is_prefix(uint8_t b) | ||
7 | { | ||
8 | return | ||
9 | /* Group 1 */ | ||
10 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
11 | /* Group 2 */ | ||
12 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
13 | || b == 0x64 || b == 0x65 | ||
14 | /* Group 3 */ | ||
15 | || b == 0x66 | ||
16 | /* Group 4 */ | ||
17 | || b == 0x67; | ||
18 | } | ||
19 | |||
20 | #ifdef CONFIG_X86_64 | ||
21 | static bool opcode_is_rex_prefix(uint8_t b) | ||
22 | { | ||
23 | return (b & 0xf0) == 0x40; | ||
24 | } | ||
25 | #else | ||
26 | static bool opcode_is_rex_prefix(uint8_t b) | ||
27 | { | ||
28 | return false; | ||
29 | } | ||
30 | #endif | ||
31 | |||
32 | #define REX_W (1 << 3) | ||
33 | |||
34 | /* | ||
35 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
36 | * load/store that caused our #PF and this should work for all the opcodes | ||
37 | * that we care about. Moreover, the ones who invented this instruction set | ||
38 | * should be shot. | ||
39 | */ | ||
40 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
41 | { | ||
42 | /* Default operand size */ | ||
43 | int operand_size_override = 4; | ||
44 | |||
45 | /* prefixes */ | ||
46 | for (; opcode_is_prefix(*op); ++op) { | ||
47 | if (*op == 0x66) | ||
48 | operand_size_override = 2; | ||
49 | } | ||
50 | |||
51 | /* REX prefix */ | ||
52 | if (opcode_is_rex_prefix(*op)) { | ||
53 | uint8_t rex = *op; | ||
54 | |||
55 | ++op; | ||
56 | if (rex & REX_W) { | ||
57 | switch (*op) { | ||
58 | case 0x63: | ||
59 | *size = 4; | ||
60 | return; | ||
61 | case 0x0f: | ||
62 | ++op; | ||
63 | |||
64 | switch (*op) { | ||
65 | case 0xb6: | ||
66 | case 0xbe: | ||
67 | *size = 1; | ||
68 | return; | ||
69 | case 0xb7: | ||
70 | case 0xbf: | ||
71 | *size = 2; | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | break; | ||
76 | } | ||
77 | |||
78 | *size = 8; | ||
79 | return; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /* escape opcode */ | ||
84 | if (*op == 0x0f) { | ||
85 | ++op; | ||
86 | |||
87 | /* | ||
88 | * This is move with zero-extend and sign-extend, respectively; | ||
89 | * we don't have to think about 0xb6/0xbe, because this is | ||
90 | * already handled in the conditional below. | ||
91 | */ | ||
92 | if (*op == 0xb7 || *op == 0xbf) | ||
93 | operand_size_override = 2; | ||
94 | } | ||
95 | |||
96 | *size = (*op & 1) ? operand_size_override : 1; | ||
97 | } | ||
98 | |||
99 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
100 | { | ||
101 | /* skip prefixes */ | ||
102 | while (opcode_is_prefix(*op)) | ||
103 | ++op; | ||
104 | if (opcode_is_rex_prefix(*op)) | ||
105 | ++op; | ||
106 | return op; | ||
107 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h index 51a1ce94c24a..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/opcode.h +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
@@ -1,10 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | |||
7 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
8 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c index 8a03be90272a..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/pte.c +++ b/arch/x86/mm/kmemcheck/pte.c | |||
@@ -1,23 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/mm.h> | ||
3 | |||
4 | #include <asm/pgtable.h> | ||
5 | |||
6 | #include "pte.h" | ||
7 | |||
8 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
9 | { | ||
10 | pte_t *pte; | ||
11 | unsigned int level; | ||
12 | |||
13 | pte = lookup_address(address, &level); | ||
14 | if (!pte) | ||
15 | return NULL; | ||
16 | if (level != PG_LEVEL_4K) | ||
17 | return NULL; | ||
18 | if (!pte_hidden(*pte)) | ||
19 | return NULL; | ||
20 | |||
21 | return pte; | ||
22 | } | ||
23 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h index b595612382c2..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/pte.h +++ b/arch/x86/mm/kmemcheck/pte.h | |||
@@ -1,11 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
4 | |||
5 | #include <linux/mm.h> | ||
6 | |||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
10 | |||
11 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c index 7ce0be1f99eb..cec594032515 100644 --- a/arch/x86/mm/kmemcheck/selftest.c +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -1,71 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/bug.h> | ||
3 | #include <linux/kernel.h> | ||
4 | |||
5 | #include "opcode.h" | ||
6 | #include "selftest.h" | ||
7 | |||
8 | struct selftest_opcode { | ||
9 | unsigned int expected_size; | ||
10 | const uint8_t *insn; | ||
11 | const char *desc; | ||
12 | }; | ||
13 | |||
14 | static const struct selftest_opcode selftest_opcodes[] = { | ||
15 | /* REP MOVS */ | ||
16 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
17 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
18 | |||
19 | /* MOVZX / MOVZXD */ | ||
20 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
21 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
22 | |||
23 | /* MOVSX / MOVSXD */ | ||
24 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
25 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
26 | |||
27 | #ifdef CONFIG_X86_64 | ||
28 | /* MOVZX / MOVZXD */ | ||
29 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
30 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
31 | |||
32 | /* MOVSX / MOVSXD */ | ||
33 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
34 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
35 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
36 | #endif | ||
37 | }; | ||
38 | |||
39 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
40 | { | ||
41 | unsigned size; | ||
42 | |||
43 | kmemcheck_opcode_decode(op->insn, &size); | ||
44 | |||
45 | if (size == op->expected_size) | ||
46 | return true; | ||
47 | |||
48 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
49 | op->desc, op->expected_size, size); | ||
50 | return false; | ||
51 | } | ||
52 | |||
53 | static bool selftest_opcodes_all(void) | ||
54 | { | ||
55 | bool pass = true; | ||
56 | unsigned int i; | ||
57 | |||
58 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
59 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
60 | |||
61 | return pass; | ||
62 | } | ||
63 | |||
64 | bool kmemcheck_selftest(void) | ||
65 | { | ||
66 | bool pass = true; | ||
67 | |||
68 | pass = pass && selftest_opcodes_all(); | ||
69 | |||
70 | return pass; | ||
71 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h index 8d759aae453d..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/selftest.h +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
@@ -1,7 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
3 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
4 | |||
5 | bool kmemcheck_selftest(void); | ||
6 | |||
7 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c deleted file mode 100644 index c2638a7d2c10..000000000000 --- a/arch/x86/mm/kmemcheck/shadow.c +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | #include <linux/kmemcheck.h> | ||
2 | #include <linux/export.h> | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | #include <asm/page.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | #include "pte.h" | ||
9 | #include "shadow.h" | ||
10 | |||
11 | /* | ||
12 | * Return the shadow address for the given address. Returns NULL if the | ||
13 | * address is not tracked. | ||
14 | * | ||
15 | * We need to be extremely careful not to follow any invalid pointers, | ||
16 | * because this function can be called for *any* possible address. | ||
17 | */ | ||
18 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
19 | { | ||
20 | pte_t *pte; | ||
21 | struct page *page; | ||
22 | |||
23 | if (!virt_addr_valid(address)) | ||
24 | return NULL; | ||
25 | |||
26 | pte = kmemcheck_pte_lookup(address); | ||
27 | if (!pte) | ||
28 | return NULL; | ||
29 | |||
30 | page = virt_to_page(address); | ||
31 | if (!page->shadow) | ||
32 | return NULL; | ||
33 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
34 | } | ||
35 | |||
36 | static void mark_shadow(void *address, unsigned int n, | ||
37 | enum kmemcheck_shadow status) | ||
38 | { | ||
39 | unsigned long addr = (unsigned long) address; | ||
40 | unsigned long last_addr = addr + n - 1; | ||
41 | unsigned long page = addr & PAGE_MASK; | ||
42 | unsigned long last_page = last_addr & PAGE_MASK; | ||
43 | unsigned int first_n; | ||
44 | void *shadow; | ||
45 | |||
46 | /* If the memory range crosses a page boundary, stop there. */ | ||
47 | if (page == last_page) | ||
48 | first_n = n; | ||
49 | else | ||
50 | first_n = page + PAGE_SIZE - addr; | ||
51 | |||
52 | shadow = kmemcheck_shadow_lookup(addr); | ||
53 | if (shadow) | ||
54 | memset(shadow, status, first_n); | ||
55 | |||
56 | addr += first_n; | ||
57 | n -= first_n; | ||
58 | |||
59 | /* Do full-page memset()s. */ | ||
60 | while (n >= PAGE_SIZE) { | ||
61 | shadow = kmemcheck_shadow_lookup(addr); | ||
62 | if (shadow) | ||
63 | memset(shadow, status, PAGE_SIZE); | ||
64 | |||
65 | addr += PAGE_SIZE; | ||
66 | n -= PAGE_SIZE; | ||
67 | } | ||
68 | |||
69 | /* Do the remaining page, if any. */ | ||
70 | if (n > 0) { | ||
71 | shadow = kmemcheck_shadow_lookup(addr); | ||
72 | if (shadow) | ||
73 | memset(shadow, status, n); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
78 | { | ||
79 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
80 | } | ||
81 | |||
82 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
83 | { | ||
84 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Fill the shadow memory of the given address such that the memory at that | ||
89 | * address is marked as being initialized. | ||
90 | */ | ||
91 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
92 | { | ||
93 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
96 | |||
97 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
98 | { | ||
99 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
100 | } | ||
101 | |||
102 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
103 | { | ||
104 | unsigned int i; | ||
105 | |||
106 | for (i = 0; i < n; ++i) | ||
107 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
108 | } | ||
109 | |||
110 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
111 | { | ||
112 | unsigned int i; | ||
113 | |||
114 | for (i = 0; i < n; ++i) | ||
115 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
116 | } | ||
117 | |||
118 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
119 | { | ||
120 | unsigned int i; | ||
121 | |||
122 | for (i = 0; i < n; ++i) | ||
123 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
124 | } | ||
125 | |||
126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
127 | { | ||
128 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
129 | uint8_t *x; | ||
130 | unsigned int i; | ||
131 | |||
132 | x = shadow; | ||
133 | |||
134 | /* | ||
135 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
136 | * code to access neighboring bytes. | ||
137 | */ | ||
138 | for (i = 0; i < size; ++i) { | ||
139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
140 | return x[i]; | ||
141 | } | ||
142 | |||
143 | return x[0]; | ||
144 | #else | ||
145 | return kmemcheck_shadow_test_all(shadow, size); | ||
146 | #endif | ||
147 | } | ||
148 | |||
149 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) | ||
150 | { | ||
151 | uint8_t *x; | ||
152 | unsigned int i; | ||
153 | |||
154 | x = shadow; | ||
155 | |||
156 | /* All bytes must be initialized. */ | ||
157 | for (i = 0; i < size; ++i) { | ||
158 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
159 | return x[i]; | ||
160 | } | ||
161 | |||
162 | return x[0]; | ||
163 | } | ||
164 | |||
165 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
166 | { | ||
167 | uint8_t *x; | ||
168 | unsigned int i; | ||
169 | |||
170 | x = shadow; | ||
171 | for (i = 0; i < size; ++i) | ||
172 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
173 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index 49768dc18664..ea32a7d3cf1b 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -1,19 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
3 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
4 | |||
5 | enum kmemcheck_shadow { | ||
6 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
7 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
8 | KMEMCHECK_SHADOW_INITIALIZED, | ||
9 | KMEMCHECK_SHADOW_FREED, | ||
10 | }; | ||
11 | |||
12 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
13 | |||
14 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
15 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, | ||
16 | unsigned int size); | ||
17 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
18 | |||
19 | #endif | ||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index baeb872283d9..69c238210325 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) | |||
594 | __tasklet_hi_schedule(t); | 594 | __tasklet_hi_schedule(t); |
595 | } | 595 | } |
596 | 596 | ||
597 | extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); | ||
598 | |||
599 | /* | ||
600 | * This version avoids touching any other tasklets. Needed for kmemcheck | ||
601 | * in order not to take any page faults while enqueueing this tasklet; | ||
602 | * consider VERY carefully whether you really need this or | ||
603 | * tasklet_hi_schedule()... | ||
604 | */ | ||
605 | static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
606 | { | ||
607 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) | ||
608 | __tasklet_hi_schedule_first(t); | ||
609 | } | ||
610 | |||
611 | |||
612 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) | 597 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) |
613 | { | 598 | { |
614 | atomic_inc(&t->count); | 599 | atomic_inc(&t->count); |
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 7b1d7bead7d9..ea32a7d3cf1b 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h | |||
@@ -1,172 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef LINUX_KMEMCHECK_H | ||
3 | #define LINUX_KMEMCHECK_H | ||
4 | |||
5 | #include <linux/mm_types.h> | ||
6 | #include <linux/types.h> | ||
7 | |||
8 | #ifdef CONFIG_KMEMCHECK | ||
9 | extern int kmemcheck_enabled; | ||
10 | |||
11 | /* The slab-related functions. */ | ||
12 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); | ||
13 | void kmemcheck_free_shadow(struct page *page, int order); | ||
14 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
15 | size_t size); | ||
16 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); | ||
17 | |||
18 | void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, | ||
19 | gfp_t gfpflags); | ||
20 | |||
21 | void kmemcheck_show_pages(struct page *p, unsigned int n); | ||
22 | void kmemcheck_hide_pages(struct page *p, unsigned int n); | ||
23 | |||
24 | bool kmemcheck_page_is_tracked(struct page *p); | ||
25 | |||
26 | void kmemcheck_mark_unallocated(void *address, unsigned int n); | ||
27 | void kmemcheck_mark_uninitialized(void *address, unsigned int n); | ||
28 | void kmemcheck_mark_initialized(void *address, unsigned int n); | ||
29 | void kmemcheck_mark_freed(void *address, unsigned int n); | ||
30 | |||
31 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); | ||
32 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); | ||
33 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); | ||
34 | |||
35 | int kmemcheck_show_addr(unsigned long address); | ||
36 | int kmemcheck_hide_addr(unsigned long address); | ||
37 | |||
38 | bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); | ||
39 | |||
40 | /* | ||
41 | * Bitfield annotations | ||
42 | * | ||
43 | * How to use: If you have a struct using bitfields, for example | ||
44 | * | ||
45 | * struct a { | ||
46 | * int x:8, y:8; | ||
47 | * }; | ||
48 | * | ||
49 | * then this should be rewritten as | ||
50 | * | ||
51 | * struct a { | ||
52 | * kmemcheck_bitfield_begin(flags); | ||
53 | * int x:8, y:8; | ||
54 | * kmemcheck_bitfield_end(flags); | ||
55 | * }; | ||
56 | * | ||
57 | * Now the "flags_begin" and "flags_end" members may be used to refer to the | ||
58 | * beginning and end, respectively, of the bitfield (and things like | ||
59 | * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- | ||
60 | * fields should be annotated: | ||
61 | * | ||
62 | * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); | ||
63 | * kmemcheck_annotate_bitfield(a, flags); | ||
64 | */ | ||
65 | #define kmemcheck_bitfield_begin(name) \ | ||
66 | int name##_begin[0]; | ||
67 | |||
68 | #define kmemcheck_bitfield_end(name) \ | ||
69 | int name##_end[0]; | ||
70 | |||
71 | #define kmemcheck_annotate_bitfield(ptr, name) \ | ||
72 | do { \ | ||
73 | int _n; \ | ||
74 | \ | ||
75 | if (!ptr) \ | ||
76 | break; \ | ||
77 | \ | ||
78 | _n = (long) &((ptr)->name##_end) \ | ||
79 | - (long) &((ptr)->name##_begin); \ | ||
80 | BUILD_BUG_ON(_n < 0); \ | ||
81 | \ | ||
82 | kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ | ||
83 | } while (0) | ||
84 | |||
85 | #define kmemcheck_annotate_variable(var) \ | ||
86 | do { \ | ||
87 | kmemcheck_mark_initialized(&(var), sizeof(var)); \ | ||
88 | } while (0) \ | ||
89 | |||
90 | #else | ||
91 | #define kmemcheck_enabled 0 | ||
92 | |||
93 | static inline void | ||
94 | kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static inline void | ||
99 | kmemcheck_free_shadow(struct page *page, int order) | ||
100 | { | ||
101 | } | ||
102 | |||
103 | static inline void | ||
104 | kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
105 | size_t size) | ||
106 | { | ||
107 | } | ||
108 | |||
109 | static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, | ||
110 | size_t size) | ||
111 | { | ||
112 | } | ||
113 | |||
114 | static inline void kmemcheck_pagealloc_alloc(struct page *p, | ||
115 | unsigned int order, gfp_t gfpflags) | ||
116 | { | ||
117 | } | ||
118 | |||
119 | static inline bool kmemcheck_page_is_tracked(struct page *p) | ||
120 | { | ||
121 | return false; | ||
122 | } | ||
123 | |||
124 | static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
125 | { | ||
126 | } | ||
127 | |||
128 | static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
129 | { | ||
130 | } | ||
131 | |||
132 | static inline void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
133 | { | ||
134 | } | ||
135 | |||
136 | static inline void kmemcheck_mark_freed(void *address, unsigned int n) | ||
137 | { | ||
138 | } | ||
139 | |||
140 | static inline void kmemcheck_mark_unallocated_pages(struct page *p, | ||
141 | unsigned int n) | ||
142 | { | ||
143 | } | ||
144 | |||
145 | static inline void kmemcheck_mark_uninitialized_pages(struct page *p, | ||
146 | unsigned int n) | ||
147 | { | ||
148 | } | ||
149 | |||
150 | static inline void kmemcheck_mark_initialized_pages(struct page *p, | ||
151 | unsigned int n) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | ||
156 | { | ||
157 | return true; | ||
158 | } | ||
159 | |||
160 | #define kmemcheck_bitfield_begin(name) | ||
161 | #define kmemcheck_bitfield_end(name) | ||
162 | #define kmemcheck_annotate_bitfield(ptr, name) \ | ||
163 | do { \ | ||
164 | } while (0) | ||
165 | |||
166 | #define kmemcheck_annotate_variable(var) \ | ||
167 | do { \ | ||
168 | } while (0) | ||
169 | |||
170 | #endif /* CONFIG_KMEMCHECK */ | ||
171 | |||
172 | #endif /* LINUX_KMEMCHECK_H */ | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 662f7b1b7a78..2f5e87f1bae2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -486,16 +486,6 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
486 | } | 486 | } |
487 | EXPORT_SYMBOL(__tasklet_hi_schedule); | 487 | EXPORT_SYMBOL(__tasklet_hi_schedule); |
488 | 488 | ||
489 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
490 | { | ||
491 | lockdep_assert_irqs_disabled(); | ||
492 | |||
493 | t->next = __this_cpu_read(tasklet_hi_vec.head); | ||
494 | __this_cpu_write(tasklet_hi_vec.head, t); | ||
495 | __raise_softirq_irqoff(HI_SOFTIRQ); | ||
496 | } | ||
497 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | ||
498 | |||
499 | static __latent_entropy void tasklet_action(struct softirq_action *a) | 489 | static __latent_entropy void tasklet_action(struct softirq_action *a) |
500 | { | 490 | { |
501 | struct tasklet_struct *list; | 491 | struct tasklet_struct *list; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9576bd582d4a..7638e2f7fff8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/proc_fs.h> | 30 | #include <linux/proc_fs.h> |
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/ctype.h> | 32 | #include <linux/ctype.h> |
33 | #include <linux/kmemcheck.h> | ||
34 | #include <linux/kmemleak.h> | 33 | #include <linux/kmemleak.h> |
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
@@ -1174,15 +1173,6 @@ static struct ctl_table kern_table[] = { | |||
1174 | .extra2 = &one_thousand, | 1173 | .extra2 = &one_thousand, |
1175 | }, | 1174 | }, |
1176 | #endif | 1175 | #endif |
1177 | #ifdef CONFIG_KMEMCHECK | ||
1178 | { | ||
1179 | .procname = "kmemcheck", | ||
1180 | .data = &kmemcheck_enabled, | ||
1181 | .maxlen = sizeof(int), | ||
1182 | .mode = 0644, | ||
1183 | .proc_handler = proc_dointvec, | ||
1184 | }, | ||
1185 | #endif | ||
1186 | { | 1176 | { |
1187 | .procname = "panic_on_warn", | 1177 | .procname = "panic_on_warn", |
1188 | .data = &panic_on_warn, | 1178 | .data = &panic_on_warn, |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 07ce7449765a..5402e3954659 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -504,7 +504,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT | |||
504 | 504 | ||
505 | config DEBUG_SLAB | 505 | config DEBUG_SLAB |
506 | bool "Debug slab memory allocations" | 506 | bool "Debug slab memory allocations" |
507 | depends on DEBUG_KERNEL && SLAB && !KMEMCHECK | 507 | depends on DEBUG_KERNEL && SLAB |
508 | help | 508 | help |
509 | Say Y here to have the kernel do limited verification on memory | 509 | Say Y here to have the kernel do limited verification on memory |
510 | allocation as well as poisoning memory on free to catch use of freed | 510 | allocation as well as poisoning memory on free to catch use of freed |
@@ -516,7 +516,7 @@ config DEBUG_SLAB_LEAK | |||
516 | 516 | ||
517 | config SLUB_DEBUG_ON | 517 | config SLUB_DEBUG_ON |
518 | bool "SLUB debugging on by default" | 518 | bool "SLUB debugging on by default" |
519 | depends on SLUB && SLUB_DEBUG && !KMEMCHECK | 519 | depends on SLUB && SLUB_DEBUG |
520 | default n | 520 | default n |
521 | help | 521 | help |
522 | Boot with debugging on by default. SLUB boots by default with | 522 | Boot with debugging on by default. SLUB boots by default with |
@@ -730,8 +730,6 @@ config DEBUG_STACKOVERFLOW | |||
730 | 730 | ||
731 | If in doubt, say "N". | 731 | If in doubt, say "N". |
732 | 732 | ||
733 | source "lib/Kconfig.kmemcheck" | ||
734 | |||
735 | source "lib/Kconfig.kasan" | 733 | source "lib/Kconfig.kasan" |
736 | 734 | ||
737 | endmenu # "Memory Debugging" | 735 | endmenu # "Memory Debugging" |
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck deleted file mode 100644 index 846e039a86b4..000000000000 --- a/lib/Kconfig.kmemcheck +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | config HAVE_ARCH_KMEMCHECK | ||
2 | bool | ||
3 | |||
4 | if HAVE_ARCH_KMEMCHECK | ||
5 | |||
6 | menuconfig KMEMCHECK | ||
7 | bool "kmemcheck: trap use of uninitialized memory" | ||
8 | depends on DEBUG_KERNEL | ||
9 | depends on !X86_USE_3DNOW | ||
10 | depends on SLUB || SLAB | ||
11 | depends on !CC_OPTIMIZE_FOR_SIZE | ||
12 | depends on !FUNCTION_TRACER | ||
13 | select FRAME_POINTER | ||
14 | select STACKTRACE | ||
15 | default n | ||
16 | help | ||
17 | This option enables tracing of dynamically allocated kernel memory | ||
18 | to see if memory is used before it has been given an initial value. | ||
19 | Be aware that this requires half of your memory for bookkeeping and | ||
20 | will insert extra code at *every* read and write to tracked memory | ||
21 | thus slow down the kernel code (but user code is unaffected). | ||
22 | |||
23 | The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable | ||
24 | or enable kmemcheck at boot-time. If the kernel is started with | ||
25 | kmemcheck=0, the large memory and CPU overhead is not incurred. | ||
26 | |||
27 | choice | ||
28 | prompt "kmemcheck: default mode at boot" | ||
29 | depends on KMEMCHECK | ||
30 | default KMEMCHECK_ONESHOT_BY_DEFAULT | ||
31 | help | ||
32 | This option controls the default behaviour of kmemcheck when the | ||
33 | kernel boots and no kmemcheck= parameter is given. | ||
34 | |||
35 | config KMEMCHECK_DISABLED_BY_DEFAULT | ||
36 | bool "disabled" | ||
37 | depends on KMEMCHECK | ||
38 | |||
39 | config KMEMCHECK_ENABLED_BY_DEFAULT | ||
40 | bool "enabled" | ||
41 | depends on KMEMCHECK | ||
42 | |||
43 | config KMEMCHECK_ONESHOT_BY_DEFAULT | ||
44 | bool "one-shot" | ||
45 | depends on KMEMCHECK | ||
46 | help | ||
47 | In one-shot mode, only the first error detected is reported before | ||
48 | kmemcheck is disabled. | ||
49 | |||
50 | endchoice | ||
51 | |||
52 | config KMEMCHECK_QUEUE_SIZE | ||
53 | int "kmemcheck: error queue size" | ||
54 | depends on KMEMCHECK | ||
55 | default 64 | ||
56 | help | ||
57 | Select the maximum number of errors to store in the queue. Since | ||
58 | errors can occur virtually anywhere and in any context, we need a | ||
59 | temporary storage area which is guarantueed not to generate any | ||
60 | other faults. The queue will be emptied as soon as a tasklet may | ||
61 | be scheduled. If the queue is full, new error reports will be | ||
62 | lost. | ||
63 | |||
64 | config KMEMCHECK_SHADOW_COPY_SHIFT | ||
65 | int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" | ||
66 | depends on KMEMCHECK | ||
67 | range 2 8 | ||
68 | default 5 | ||
69 | help | ||
70 | Select the number of shadow bytes to save along with each entry of | ||
71 | the queue. These bytes indicate what parts of an allocation are | ||
72 | initialized, uninitialized, etc. and will be displayed when an | ||
73 | error is detected to help the debugging of a particular problem. | ||
74 | |||
75 | config KMEMCHECK_PARTIAL_OK | ||
76 | bool "kmemcheck: allow partially uninitialized memory" | ||
77 | depends on KMEMCHECK | ||
78 | default y | ||
79 | help | ||
80 | This option works around certain GCC optimizations that produce | ||
81 | 32-bit reads from 16-bit variables where the upper 16 bits are | ||
82 | thrown away afterwards. This may of course also hide some real | ||
83 | bugs. | ||
84 | |||
85 | config KMEMCHECK_BITOPS_OK | ||
86 | bool "kmemcheck: allow bit-field manipulation" | ||
87 | depends on KMEMCHECK | ||
88 | default n | ||
89 | help | ||
90 | This option silences warnings that would be generated for bit-field | ||
91 | accesses where not all the bits are initialized at the same time. | ||
92 | This may also hide some real bugs. | ||
93 | |||
94 | endif | ||
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 5b0adf1435de..e5e606ee5f71 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
@@ -11,7 +11,6 @@ config DEBUG_PAGEALLOC | |||
11 | bool "Debug page memory allocations" | 11 | bool "Debug page memory allocations" |
12 | depends on DEBUG_KERNEL | 12 | depends on DEBUG_KERNEL |
13 | depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC | 13 | depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC |
14 | depends on !KMEMCHECK | ||
15 | select PAGE_EXTENSION | 14 | select PAGE_EXTENSION |
16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC | 15 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC |
17 | ---help--- | 16 | ---help--- |
diff --git a/mm/Makefile b/mm/Makefile index 4659b93cba43..e7ebd176fb93 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -17,7 +17,6 @@ KCOV_INSTRUMENT_slub.o := n | |||
17 | KCOV_INSTRUMENT_page_alloc.o := n | 17 | KCOV_INSTRUMENT_page_alloc.o := n |
18 | KCOV_INSTRUMENT_debug-pagealloc.o := n | 18 | KCOV_INSTRUMENT_debug-pagealloc.o := n |
19 | KCOV_INSTRUMENT_kmemleak.o := n | 19 | KCOV_INSTRUMENT_kmemleak.o := n |
20 | KCOV_INSTRUMENT_kmemcheck.o := n | ||
21 | KCOV_INSTRUMENT_memcontrol.o := n | 20 | KCOV_INSTRUMENT_memcontrol.o := n |
22 | KCOV_INSTRUMENT_mmzone.o := n | 21 | KCOV_INSTRUMENT_mmzone.o := n |
23 | KCOV_INSTRUMENT_vmstat.o := n | 22 | KCOV_INSTRUMENT_vmstat.o := n |
@@ -70,7 +69,6 @@ obj-$(CONFIG_KSM) += ksm.o | |||
70 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o | 69 | obj-$(CONFIG_PAGE_POISONING) += page_poison.o |
71 | obj-$(CONFIG_SLAB) += slab.o | 70 | obj-$(CONFIG_SLAB) += slab.o |
72 | obj-$(CONFIG_SLUB) += slub.o | 71 | obj-$(CONFIG_SLUB) += slub.o |
73 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | ||
74 | obj-$(CONFIG_KASAN) += kasan/ | 72 | obj-$(CONFIG_KASAN) += kasan/ |
75 | obj-$(CONFIG_FAILSLAB) += failslab.o | 73 | obj-$(CONFIG_FAILSLAB) += failslab.o |
76 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 74 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index b3a4d61d341c..cec594032515 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c | |||
@@ -1,126 +1 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | // SPDX-License-Identifier: GPL-2.0 | |
2 | #include <linux/gfp.h> | ||
3 | #include <linux/mm_types.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <linux/slab.h> | ||
6 | #include "slab.h" | ||
7 | #include <linux/kmemcheck.h> | ||
8 | |||
9 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
10 | { | ||
11 | struct page *shadow; | ||
12 | int pages; | ||
13 | int i; | ||
14 | |||
15 | pages = 1 << order; | ||
16 | |||
17 | /* | ||
18 | * With kmemcheck enabled, we need to allocate a memory area for the | ||
19 | * shadow bits as well. | ||
20 | */ | ||
21 | shadow = alloc_pages_node(node, flags, order); | ||
22 | if (!shadow) { | ||
23 | if (printk_ratelimit()) | ||
24 | pr_err("kmemcheck: failed to allocate shadow bitmap\n"); | ||
25 | return; | ||
26 | } | ||
27 | |||
28 | for(i = 0; i < pages; ++i) | ||
29 | page[i].shadow = page_address(&shadow[i]); | ||
30 | |||
31 | /* | ||
32 | * Mark it as non-present for the MMU so that our accesses to | ||
33 | * this memory will trigger a page fault and let us analyze | ||
34 | * the memory accesses. | ||
35 | */ | ||
36 | kmemcheck_hide_pages(page, pages); | ||
37 | } | ||
38 | |||
39 | void kmemcheck_free_shadow(struct page *page, int order) | ||
40 | { | ||
41 | struct page *shadow; | ||
42 | int pages; | ||
43 | int i; | ||
44 | |||
45 | if (!kmemcheck_page_is_tracked(page)) | ||
46 | return; | ||
47 | |||
48 | pages = 1 << order; | ||
49 | |||
50 | kmemcheck_show_pages(page, pages); | ||
51 | |||
52 | shadow = virt_to_page(page[0].shadow); | ||
53 | |||
54 | for(i = 0; i < pages; ++i) | ||
55 | page[i].shadow = NULL; | ||
56 | |||
57 | __free_pages(shadow, order); | ||
58 | } | ||
59 | |||
60 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
61 | size_t size) | ||
62 | { | ||
63 | if (unlikely(!object)) /* Skip object if allocation failed */ | ||
64 | return; | ||
65 | |||
66 | /* | ||
67 | * Has already been memset(), which initializes the shadow for us | ||
68 | * as well. | ||
69 | */ | ||
70 | if (gfpflags & __GFP_ZERO) | ||
71 | return; | ||
72 | |||
73 | /* No need to initialize the shadow of a non-tracked slab. */ | ||
74 | if (s->flags & SLAB_NOTRACK) | ||
75 | return; | ||
76 | |||
77 | if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { | ||
78 | /* | ||
79 | * Allow notracked objects to be allocated from | ||
80 | * tracked caches. Note however that these objects | ||
81 | * will still get page faults on access, they just | ||
82 | * won't ever be flagged as uninitialized. If page | ||
83 | * faults are not acceptable, the slab cache itself | ||
84 | * should be marked NOTRACK. | ||
85 | */ | ||
86 | kmemcheck_mark_initialized(object, size); | ||
87 | } else if (!s->ctor) { | ||
88 | /* | ||
89 | * New objects should be marked uninitialized before | ||
90 | * they're returned to the called. | ||
91 | */ | ||
92 | kmemcheck_mark_uninitialized(object, size); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) | ||
97 | { | ||
98 | /* TODO: RCU freeing is unsupported for now; hide false positives. */ | ||
99 | if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) | ||
100 | kmemcheck_mark_freed(object, size); | ||
101 | } | ||
102 | |||
103 | void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, | ||
104 | gfp_t gfpflags) | ||
105 | { | ||
106 | int pages; | ||
107 | |||
108 | if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) | ||
109 | return; | ||
110 | |||
111 | pages = 1 << order; | ||
112 | |||
113 | /* | ||
114 | * NOTE: We choose to track GFP_ZERO pages too; in fact, they | ||
115 | * can become uninitialized by copying uninitialized memory | ||
116 | * into them. | ||
117 | */ | ||
118 | |||
119 | /* XXX: Can use zone->node for node? */ | ||
120 | kmemcheck_alloc_shadow(page, order, gfpflags, -1); | ||
121 | |||
122 | if (gfpflags & __GFP_ZERO) | ||
123 | kmemcheck_mark_initialized_pages(page, pages); | ||
124 | else | ||
125 | kmemcheck_mark_uninitialized_pages(page, pages); | ||
126 | } | ||
@@ -1371,7 +1371,7 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) | |||
1371 | * So in order to make the debug calls that expect irqs to be | 1371 | * So in order to make the debug calls that expect irqs to be |
1372 | * disabled we need to disable interrupts temporarily. | 1372 | * disabled we need to disable interrupts temporarily. |
1373 | */ | 1373 | */ |
1374 | #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) | 1374 | #ifdef CONFIG_LOCKDEP |
1375 | { | 1375 | { |
1376 | unsigned long flags; | 1376 | unsigned long flags; |
1377 | 1377 | ||
@@ -1399,8 +1399,7 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s, | |||
1399 | * Compiler cannot detect this function can be removed if slab_free_hook() | 1399 | * Compiler cannot detect this function can be removed if slab_free_hook() |
1400 | * evaluates to nothing. Thus, catch all relevant config debug options here. | 1400 | * evaluates to nothing. Thus, catch all relevant config debug options here. |
1401 | */ | 1401 | */ |
1402 | #if defined(CONFIG_KMEMCHECK) || \ | 1402 | #if defined(CONFIG_LOCKDEP) || \ |
1403 | defined(CONFIG_LOCKDEP) || \ | ||
1404 | defined(CONFIG_DEBUG_KMEMLEAK) || \ | 1403 | defined(CONFIG_DEBUG_KMEMLEAK) || \ |
1405 | defined(CONFIG_DEBUG_OBJECTS_FREE) || \ | 1404 | defined(CONFIG_DEBUG_OBJECTS_FREE) || \ |
1406 | defined(CONFIG_KASAN) | 1405 | defined(CONFIG_KASAN) |
diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 67d051edd615..7bd52b8f63d4 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc | |||
@@ -2182,8 +2182,6 @@ sub dump_struct($$) { | |||
2182 | # strip comments: | 2182 | # strip comments: |
2183 | $members =~ s/\/\*.*?\*\///gos; | 2183 | $members =~ s/\/\*.*?\*\///gos; |
2184 | $nested =~ s/\/\*.*?\*\///gos; | 2184 | $nested =~ s/\/\*.*?\*\///gos; |
2185 | # strip kmemcheck_bitfield_{begin,end}.*; | ||
2186 | $members =~ s/kmemcheck_bitfield_.*?;//gos; | ||
2187 | # strip attributes | 2185 | # strip attributes |
2188 | $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; | 2186 | $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; |
2189 | $members =~ s/__aligned\s*\([^;]*\)//gos; | 2187 | $members =~ s/__aligned\s*\([^;]*\)//gos; |
diff --git a/tools/include/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h index 2bccd2c7b897..ea32a7d3cf1b 100644 --- a/tools/include/linux/kmemcheck.h +++ b/tools/include/linux/kmemcheck.h | |||
@@ -1,9 +1 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | /* SPDX-License-Identifier: GPL-2.0 */ | |
2 | #ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_ | ||
3 | #define _LIBLOCKDEP_LINUX_KMEMCHECK_H_ | ||
4 | |||
5 | static inline void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
6 | { | ||
7 | } | ||
8 | |||
9 | #endif | ||