diff options
71 files changed, 2899 insertions, 128 deletions
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt new file mode 100644 index 000000000000..363044609dad --- /dev/null +++ b/Documentation/kmemcheck.txt | |||
| @@ -0,0 +1,773 @@ | |||
| 1 | GETTING STARTED WITH KMEMCHECK | ||
| 2 | ============================== | ||
| 3 | |||
| 4 | Vegard Nossum <vegardno@ifi.uio.no> | ||
| 5 | |||
| 6 | |||
| 7 | Contents | ||
| 8 | ======== | ||
| 9 | 0. Introduction | ||
| 10 | 1. Downloading | ||
| 11 | 2. Configuring and compiling | ||
| 12 | 3. How to use | ||
| 13 | 3.1. Booting | ||
| 14 | 3.2. Run-time enable/disable | ||
| 15 | 3.3. Debugging | ||
| 16 | 3.4. Annotating false positives | ||
| 17 | 4. Reporting errors | ||
| 18 | 5. Technical description | ||
| 19 | |||
| 20 | |||
| 21 | 0. Introduction | ||
| 22 | =============== | ||
| 23 | |||
| 24 | kmemcheck is a debugging feature for the Linux Kernel. More specifically, it | ||
| 25 | is a dynamic checker that detects and warns about some uses of uninitialized | ||
| 26 | memory. | ||
| 27 | |||
| 28 | Userspace programmers might be familiar with Valgrind's memcheck. The main | ||
| 29 | difference between memcheck and kmemcheck is that memcheck works for userspace | ||
| 30 | programs only, and kmemcheck works for the kernel only. The implementations | ||
| 31 | are of course vastly different. Because of this, kmemcheck is not as accurate | ||
| 32 | as memcheck, but it turns out to be good enough in practice to discover real | ||
| 33 | programmer errors that the compiler is not able to find through static | ||
| 34 | analysis. | ||
| 35 | |||
| 36 | Enabling kmemcheck on a kernel will probably slow it down to the extent that | ||
| 37 | the machine will not be usable for normal workloads such as e.g. an | ||
| 38 | interactive desktop. kmemcheck will also cause the kernel to use about twice | ||
| 39 | as much memory as normal. For this reason, kmemcheck is strictly a debugging | ||
| 40 | feature. | ||
| 41 | |||
| 42 | |||
| 43 | 1. Downloading | ||
| 44 | ============== | ||
| 45 | |||
| 46 | kmemcheck can only be downloaded using git. If you want to write patches | ||
| 47 | against the current code, you should use the kmemcheck development branch of | ||
| 48 | the tip tree. It is also possible to use the linux-next tree, which also | ||
| 49 | includes the latest version of kmemcheck. | ||
| 50 | |||
| 51 | Assuming that you've already cloned the linux-2.6.git repository, all you | ||
| 52 | have to do is add the -tip tree as a remote, like this: | ||
| 53 | |||
| 54 | $ git remote add tip git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git | ||
| 55 | |||
| 56 | To actually download the tree, fetch the remote: | ||
| 57 | |||
| 58 | $ git fetch tip | ||
| 59 | |||
| 60 | And to check out a new local branch with the kmemcheck code: | ||
| 61 | |||
| 62 | $ git checkout -b kmemcheck tip/kmemcheck | ||
| 63 | |||
| 64 | General instructions for the -tip tree can be found here: | ||
| 65 | http://people.redhat.com/mingo/tip.git/readme.txt | ||
| 66 | |||
| 67 | |||
| 68 | 2. Configuring and compiling | ||
| 69 | ============================ | ||
| 70 | |||
| 71 | kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of | ||
| 72 | configuration variables must have specific settings in order for the kmemcheck | ||
| 73 | menu to even appear in "menuconfig". These are: | ||
| 74 | |||
| 75 | o CONFIG_CC_OPTIMIZE_FOR_SIZE=n | ||
| 76 | |||
| 77 | This option is located under "General setup" / "Optimize for size". | ||
| 78 | |||
| 79 | Without this, gcc will use certain optimizations that usually lead to | ||
| 80 | false positive warnings from kmemcheck. An example of this is a 16-bit | ||
| 81 | field in a struct, where gcc may load 32 bits, then discard the upper | ||
| 82 | 16 bits. kmemcheck sees only the 32-bit load, and may trigger a | ||
| 83 | warning for the upper 16 bits (if they're uninitialized). | ||
| 84 | |||
| 85 | o CONFIG_SLAB=y or CONFIG_SLUB=y | ||
| 86 | |||
| 87 | This option is located under "General setup" / "Choose SLAB | ||
| 88 | allocator". | ||
| 89 | |||
| 90 | o CONFIG_FUNCTION_TRACER=n | ||
| 91 | |||
| 92 | This option is located under "Kernel hacking" / "Tracers" / "Kernel | ||
| 93 | Function Tracer" | ||
| 94 | |||
| 95 | When function tracing is compiled in, gcc emits a call to another | ||
| 96 | function at the beginning of every function. This means that when the | ||
| 97 | page fault handler is called, the ftrace framework will be called | ||
| 98 | before kmemcheck has had a chance to handle the fault. If ftrace then | ||
| 99 | modifies memory that was tracked by kmemcheck, the result is an | ||
| 100 | endless recursive page fault. | ||
| 101 | |||
| 102 | o CONFIG_DEBUG_PAGEALLOC=n | ||
| 103 | |||
| 104 | This option is located under "Kernel hacking" / "Debug page memory | ||
| 105 | allocations". | ||
| 106 | |||
| 107 | In addition, I highly recommend turning on CONFIG_DEBUG_INFO=y. This is also | ||
| 108 | located under "Kernel hacking". With this, you will be able to get line number | ||
| 109 | information from the kmemcheck warnings, which is extremely valuable in | ||
| 110 | debugging a problem. This option is not mandatory, however, because it slows | ||
| 111 | down the compilation process and produces a much bigger kernel image. | ||
| 112 | |||
| 113 | Now the kmemcheck menu should be visible (under "Kernel hacking" / "kmemcheck: | ||
| 114 | trap use of uninitialized memory"). Here follows a description of the | ||
| 115 | kmemcheck configuration variables: | ||
| 116 | |||
| 117 | o CONFIG_KMEMCHECK | ||
| 118 | |||
| 119 | This must be enabled in order to use kmemcheck at all... | ||
| 120 | |||
| 121 | o CONFIG_KMEMCHECK_[DISABLED | ENABLED | ONESHOT]_BY_DEFAULT | ||
| 122 | |||
| 123 | This option controls the status of kmemcheck at boot-time. "Enabled" | ||
| 124 | will enable kmemcheck right from the start, "disabled" will boot the | ||
| 125 | kernel as normal (but with the kmemcheck code compiled in, so it can | ||
| 126 | be enabled at run-time after the kernel has booted), and "one-shot" is | ||
| 127 | a special mode which will turn kmemcheck off automatically after | ||
| 128 | detecting the first use of uninitialized memory. | ||
| 129 | |||
| 130 | If you are using kmemcheck to actively debug a problem, then you | ||
| 131 | probably want to choose "enabled" here. | ||
| 132 | |||
| 133 | The one-shot mode is mostly useful in automated test setups because it | ||
| 134 | can prevent floods of warnings and increase the chances of the machine | ||
| 135 | surviving in case something is really wrong. In other cases, the one- | ||
| 136 | shot mode could actually be counter-productive because it would turn | ||
| 137 | itself off at the very first error -- in the case of a false positive | ||
| 138 | too -- and this would come in the way of debugging the specific | ||
| 139 | problem you were interested in. | ||
| 140 | |||
| 141 | If you would like to use your kernel as normal, but with a chance to | ||
| 142 | enable kmemcheck in case of some problem, it might be a good idea to | ||
| 143 | choose "disabled" here. When kmemcheck is disabled, most of the run- | ||
| 144 | time overhead is not incurred, and the kernel will be almost as fast | ||
| 145 | as normal. | ||
| 146 | |||
| 147 | o CONFIG_KMEMCHECK_QUEUE_SIZE | ||
| 148 | |||
| 149 | Select the maximum number of error reports to store in an internal | ||
| 150 | (fixed-size) buffer. Since errors can occur virtually anywhere and in | ||
| 151 | any context, we need a temporary storage area which is guaranteed not | ||
| 152 | to generate any other page faults when accessed. The queue will be | ||
| 153 | emptied as soon as a tasklet may be scheduled. If the queue is full, | ||
| 154 | new error reports will be lost. | ||
| 155 | |||
| 156 | The default value of 64 is probably fine. If some code produces more | ||
| 157 | than 64 errors within an irqs-off section, then the code is likely to | ||
| 158 | produce many, many more, too, and these additional reports seldom give | ||
| 159 | any more information (the first report is usually the most valuable | ||
| 160 | anyway). | ||
| 161 | |||
| 162 | This number might have to be adjusted if you are not using serial | ||
| 163 | console or similar to capture the kernel log. If you are using the | ||
| 164 | "dmesg" command to save the log, then getting a lot of kmemcheck | ||
| 165 | warnings might overflow the kernel log itself, and the earlier reports | ||
| 166 | will get lost in that way instead. Try setting this to 10 or so on | ||
| 167 | such a setup. | ||
| 168 | |||
| 169 | o CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT | ||
| 170 | |||
| 171 | Select the number of shadow bytes to save along with each entry of the | ||
| 172 | error-report queue. These bytes indicate what parts of an allocation | ||
| 173 | are initialized, uninitialized, etc. and will be displayed when an | ||
| 174 | error is detected to help the debugging of a particular problem. | ||
| 175 | |||
| 176 | The number entered here is actually the logarithm of the number of | ||
| 177 | bytes that will be saved. So if you pick for example 5 here, kmemcheck | ||
| 178 | will save 2^5 = 32 bytes. | ||
| 179 | |||
| 180 | The default value should be fine for debugging most problems. It also | ||
| 181 | fits nicely within 80 columns. | ||
| 182 | |||
| 183 | o CONFIG_KMEMCHECK_PARTIAL_OK | ||
| 184 | |||
| 185 | This option (when enabled) works around certain GCC optimizations that | ||
| 186 | produce 32-bit reads from 16-bit variables where the upper 16 bits are | ||
| 187 | thrown away afterwards. | ||
| 188 | |||
| 189 | The default value (enabled) is recommended. This may of course hide | ||
| 190 | some real errors, but disabling it would probably produce a lot of | ||
| 191 | false positives. | ||
| 192 | |||
| 193 | o CONFIG_KMEMCHECK_BITOPS_OK | ||
| 194 | |||
| 195 | This option silences warnings that would be generated for bit-field | ||
| 196 | accesses where not all the bits are initialized at the same time. This | ||
| 197 | may also hide some real bugs. | ||
| 198 | |||
| 199 | This option is probably obsolete, or it should be replaced with | ||
| 200 | the kmemcheck-/bitfield-annotations for the code in question. The | ||
| 201 | default value is therefore fine. | ||
| 202 | |||
| 203 | Now compile the kernel as usual. | ||
| 204 | |||
| 205 | |||
| 206 | 3. How to use | ||
| 207 | ============= | ||
| 208 | |||
| 209 | 3.1. Booting | ||
| 210 | ============ | ||
| 211 | |||
| 212 | First some information about the command-line options. There is only one | ||
| 213 | option specific to kmemcheck, and this is called "kmemcheck". It can be used | ||
| 214 | to override the default mode as chosen by the CONFIG_KMEMCHECK_*_BY_DEFAULT | ||
| 215 | option. Its possible settings are: | ||
| 216 | |||
| 217 | o kmemcheck=0 (disabled) | ||
| 218 | o kmemcheck=1 (enabled) | ||
| 219 | o kmemcheck=2 (one-shot mode) | ||
| 220 | |||
| 221 | If SLUB debugging has been enabled in the kernel, it may take precedence over | ||
| 222 | kmemcheck in such a way that the slab caches which are under SLUB debugging | ||
| 223 | will not be tracked by kmemcheck. In order to ensure that this doesn't happen | ||
| 224 | (even though it shouldn't by default), use SLUB's boot option "slub_debug", | ||
| 225 | like this: slub_debug=- | ||
| 226 | |||
| 227 | In fact, this option may also be used for fine-grained control over SLUB vs. | ||
| 228 | kmemcheck. For example, if the command line includes "kmemcheck=1 | ||
| 229 | slub_debug=,dentry", then SLUB debugging will be used only for the "dentry" | ||
| 230 | slab cache, and with kmemcheck tracking all the other caches. This is advanced | ||
| 231 | usage, however, and is not generally recommended. | ||
| 232 | |||
| 233 | |||
| 234 | 3.2. Run-time enable/disable | ||
| 235 | ============================ | ||
| 236 | |||
| 237 | When the kernel has booted, it is possible to enable or disable kmemcheck at | ||
| 238 | run-time. WARNING: This feature is still experimental and may cause false | ||
| 239 | positive warnings to appear. Therefore, try not to use this. If you find that | ||
| 240 | it doesn't work properly (e.g. you see an unreasonable amount of warnings), I | ||
| 241 | will be happy to take bug reports. | ||
| 242 | |||
| 243 | Use the file /proc/sys/kernel/kmemcheck for this purpose, e.g.: | ||
| 244 | |||
| 245 | $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck | ||
| 246 | |||
| 247 | The numbers are the same as for the kmemcheck= command-line option. | ||
| 248 | |||
| 249 | |||
| 250 | 3.3. Debugging | ||
| 251 | ============== | ||
| 252 | |||
| 253 | A typical report will look something like this: | ||
| 254 | |||
| 255 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
| 256 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
| 257 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
| 258 | ^ | ||
| 259 | |||
| 260 | Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A | ||
| 261 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
| 262 | RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002 | ||
| 263 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
| 264 | RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84 | ||
| 265 | RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000 | ||
| 266 | R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e | ||
| 267 | R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8 | ||
| 268 | FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000 | ||
| 269 | CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 | ||
| 270 | CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0 | ||
| 271 | DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 | ||
| 272 | DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400 | ||
| 273 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
| 274 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
| 275 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
| 276 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
| 277 | [<ffffffffffffffff>] 0xffffffffffffffff | ||
| 278 | |||
| 279 | The single most valuable information in this report is the RIP (or EIP on 32- | ||
| 280 | bit) value. This will help us pinpoint exactly which instruction that caused | ||
| 281 | the warning. | ||
| 282 | |||
| 283 | If your kernel was compiled with CONFIG_DEBUG_INFO=y, then all we have to do | ||
| 284 | is give this address to the addr2line program, like this: | ||
| 285 | |||
| 286 | $ addr2line -e vmlinux -i ffffffff8104ede8 | ||
| 287 | arch/x86/include/asm/string_64.h:12 | ||
| 288 | include/asm-generic/siginfo.h:287 | ||
| 289 | kernel/signal.c:380 | ||
| 290 | kernel/signal.c:410 | ||
| 291 | |||
| 292 | The "-e vmlinux" tells addr2line which file to look in. IMPORTANT: This must | ||
| 293 | be the vmlinux of the kernel that produced the warning in the first place! If | ||
| 294 | not, the line number information will almost certainly be wrong. | ||
| 295 | |||
| 296 | The "-i" tells addr2line to also print the line numbers of inlined functions. | ||
| 297 | In this case, the flag was very important, because otherwise, it would only | ||
| 298 | have printed the first line, which is just a call to memcpy(), which could be | ||
| 299 | called from a thousand places in the kernel, and is therefore not very useful. | ||
| 300 | These inlined functions would not show up in the stack trace above, simply | ||
| 301 | because the kernel doesn't load the extra debugging information. This | ||
| 302 | technique can of course be used with ordinary kernel oopses as well. | ||
| 303 | |||
| 304 | In this case, it's the caller of memcpy() that is interesting, and it can be | ||
| 305 | found in include/asm-generic/siginfo.h, line 287: | ||
| 306 | |||
| 307 | 281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) | ||
| 308 | 282 { | ||
| 309 | 283 if (from->si_code < 0) | ||
| 310 | 284 memcpy(to, from, sizeof(*to)); | ||
| 311 | 285 else | ||
| 312 | 286 /* _sigchld is currently the largest know union member */ | ||
| 313 | 287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); | ||
| 314 | 288 } | ||
| 315 | |||
| 316 | Since this was a read (kmemcheck usually warns about reads only, though it can | ||
| 317 | warn about writes to unallocated or freed memory as well), it was probably the | ||
| 318 | "from" argument which contained some uninitialized bytes. Following the chain | ||
| 319 | of calls, we move upwards to see where "from" was allocated or initialized, | ||
| 320 | kernel/signal.c, line 380: | ||
| 321 | |||
| 322 | 359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | ||
| 323 | 360 { | ||
| 324 | ... | ||
| 325 | 367 list_for_each_entry(q, &list->list, list) { | ||
| 326 | 368 if (q->info.si_signo == sig) { | ||
| 327 | 369 if (first) | ||
| 328 | 370 goto still_pending; | ||
| 329 | 371 first = q; | ||
| 330 | ... | ||
| 331 | 377 if (first) { | ||
| 332 | 378 still_pending: | ||
| 333 | 379 list_del_init(&first->list); | ||
| 334 | 380 copy_siginfo(info, &first->info); | ||
| 335 | 381 __sigqueue_free(first); | ||
| 336 | ... | ||
| 337 | 392 } | ||
| 338 | 393 } | ||
| 339 | |||
| 340 | Here, it is &first->info that is being passed on to copy_siginfo(). The | ||
| 341 | variable "first" was found on a list -- passed in as the second argument to | ||
| 342 | collect_signal(). We continue our journey through the stack, to figure out | ||
| 343 | where the item on "list" was allocated or initialized. We move to line 410: | ||
| 344 | |||
| 345 | 395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | ||
| 346 | 396 siginfo_t *info) | ||
| 347 | 397 { | ||
| 348 | ... | ||
| 349 | 410 collect_signal(sig, pending, info); | ||
| 350 | ... | ||
| 351 | 414 } | ||
| 352 | |||
| 353 | Now we need to follow the "pending" pointer, since that is being passed on to | ||
| 354 | collect_signal() as "list". At this point, we've run out of lines from the | ||
| 355 | "addr2line" output. Not to worry, we just paste the next addresses from the | ||
| 356 | kmemcheck stack dump, i.e.: | ||
| 357 | |||
| 358 | [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170 | ||
| 359 | [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390 | ||
| 360 | [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0 | ||
| 361 | [<ffffffff8100c7b5>] int_signal+0x12/0x17 | ||
| 362 | |||
| 363 | $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \ | ||
| 364 | ffffffff8100b87d ffffffff8100c7b5 | ||
| 365 | kernel/signal.c:446 | ||
| 366 | kernel/signal.c:1806 | ||
| 367 | arch/x86/kernel/signal.c:805 | ||
| 368 | arch/x86/kernel/signal.c:871 | ||
| 369 | arch/x86/kernel/entry_64.S:694 | ||
| 370 | |||
| 371 | Remember that since these addresses were found on the stack and not as the | ||
| 372 | RIP value, they actually point to the _next_ instruction (they are return | ||
| 373 | addresses). This becomes obvious when we look at the code for line 446: | ||
| 374 | |||
| 375 | 422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | ||
| 376 | 423 { | ||
| 377 | ... | ||
| 378 | 431 signr = __dequeue_signal(&tsk->signal->shared_pending, | ||
| 379 | 432 mask, info); | ||
| 380 | 433 /* | ||
| 381 | 434 * itimer signal ? | ||
| 382 | 435 * | ||
| 383 | 436 * itimers are process shared and we restart periodic | ||
| 384 | 437 * itimers in the signal delivery path to prevent DoS | ||
| 385 | 438 * attacks in the high resolution timer case. This is | ||
| 386 | 439 * compliant with the old way of self restarting | ||
| 387 | 440 * itimers, as the SIGALRM is a legacy signal and only | ||
| 388 | 441 * queued once. Changing the restart behaviour to | ||
| 389 | 442 * restart the timer in the signal dequeue path is | ||
| 390 | 443 * reducing the timer noise on heavy loaded !highres | ||
| 391 | 444 * systems too. | ||
| 392 | 445 */ | ||
| 393 | 446 if (unlikely(signr == SIGALRM)) { | ||
| 394 | ... | ||
| 395 | 489 } | ||
| 396 | |||
| 397 | So instead of looking at 446, we should be looking at 431, which is the line | ||
| 398 | that executes just before 446. Here we see that what we are looking for is | ||
| 399 | &tsk->signal->shared_pending. | ||
| 400 | |||
| 401 | Our next task is now to figure out which function that puts items on this | ||
| 402 | "shared_pending" list. A crude, but efficient tool, is git grep: | ||
| 403 | |||
| 404 | $ git grep -n 'shared_pending' kernel/ | ||
| 405 | ... | ||
| 406 | kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending; | ||
| 407 | kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending; | ||
| 408 | ... | ||
| 409 | |||
| 410 | There were more results, but none of them were related to list operations, | ||
| 411 | and these were the only assignments. We inspect the line numbers more closely | ||
| 412 | and find that this is indeed where items are being added to the list: | ||
| 413 | |||
| 414 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
| 415 | 817 int group) | ||
| 416 | 818 { | ||
| 417 | ... | ||
| 418 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
| 419 | ... | ||
| 420 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
| 421 | 852 (is_si_special(info) || | ||
| 422 | 853 info->si_code >= 0))); | ||
| 423 | 854 if (q) { | ||
| 424 | 855 list_add_tail(&q->list, &pending->list); | ||
| 425 | ... | ||
| 426 | 890 } | ||
| 427 | |||
| 428 | and: | ||
| 429 | |||
| 430 | 1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | ||
| 431 | 1310 { | ||
| 432 | .... | ||
| 433 | 1339 pending = group ? &t->signal->shared_pending : &t->pending; | ||
| 434 | 1340 list_add_tail(&q->list, &pending->list); | ||
| 435 | .... | ||
| 436 | 1347 } | ||
| 437 | |||
| 438 | In the first case, the list element we are looking for, "q", is being returned | ||
| 439 | from the function __sigqueue_alloc(), which looks like an allocation function. | ||
| 440 | Let's take a look at it: | ||
| 441 | |||
| 442 | 187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, | ||
| 443 | 188 int override_rlimit) | ||
| 444 | 189 { | ||
| 445 | 190 struct sigqueue *q = NULL; | ||
| 446 | 191 struct user_struct *user; | ||
| 447 | 192 | ||
| 448 | 193 /* | ||
| 449 | 194 * We won't get problems with the target's UID changing under us | ||
| 450 | 195 * because changing it requires RCU be used, and if t != current, the | ||
| 451 | 196 * caller must be holding the RCU readlock (by way of a spinlock) and | ||
| 452 | 197 * we use RCU protection here | ||
| 453 | 198 */ | ||
| 454 | 199 user = get_uid(__task_cred(t)->user); | ||
| 455 | 200 atomic_inc(&user->sigpending); | ||
| 456 | 201 if (override_rlimit || | ||
| 457 | 202 atomic_read(&user->sigpending) <= | ||
| 458 | 203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) | ||
| 459 | 204 q = kmem_cache_alloc(sigqueue_cachep, flags); | ||
| 460 | 205 if (unlikely(q == NULL)) { | ||
| 461 | 206 atomic_dec(&user->sigpending); | ||
| 462 | 207 free_uid(user); | ||
| 463 | 208 } else { | ||
| 464 | 209 INIT_LIST_HEAD(&q->list); | ||
| 465 | 210 q->flags = 0; | ||
| 466 | 211 q->user = user; | ||
| 467 | 212 } | ||
| 468 | 213 | ||
| 469 | 214 return q; | ||
| 470 | 215 } | ||
| 471 | |||
| 472 | We see that this function initializes q->list, q->flags, and q->user. It seems | ||
| 473 | that now is the time to look at the definition of "struct sigqueue", e.g.: | ||
| 474 | |||
| 475 | 14 struct sigqueue { | ||
| 476 | 15 struct list_head list; | ||
| 477 | 16 int flags; | ||
| 478 | 17 siginfo_t info; | ||
| 479 | 18 struct user_struct *user; | ||
| 480 | 19 }; | ||
| 481 | |||
| 482 | And, you might remember, it was a memcpy() on &first->info that caused the | ||
| 483 | warning, so this makes perfect sense. It also seems reasonable to assume that | ||
| 484 | it is the caller of __sigqueue_alloc() that has the responsibility of filling | ||
| 485 | out (initializing) this member. | ||
| 486 | |||
| 487 | But just which fields of the struct were uninitialized? Let's look at | ||
| 488 | kmemcheck's report again: | ||
| 489 | |||
| 490 | WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024) | ||
| 491 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
| 492 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
| 493 | ^ | ||
| 494 | |||
| 495 | These first two lines are the memory dump of the memory object itself, and the | ||
| 496 | shadow bytemap, respectively. The memory object itself is in this case | ||
| 497 | &first->info. Just beware that the start of this dump is NOT the start of the | ||
| 498 | object itself! The position of the caret (^) corresponds with the address of | ||
| 499 | the read (ffff88003e4a2024). | ||
| 500 | |||
| 501 | The shadow bytemap dump legend is as follows: | ||
| 502 | |||
| 503 | i - initialized | ||
| 504 | u - uninitialized | ||
| 505 | a - unallocated (memory has been allocated by the slab layer, but has not | ||
| 506 | yet been handed off to anybody) | ||
| 507 | f - freed (memory has been allocated by the slab layer, but has been freed | ||
| 508 | by the previous owner) | ||
| 509 | |||
| 510 | In order to figure out where (relative to the start of the object) the | ||
| 511 | uninitialized memory was located, we have to look at the disassembly. For | ||
| 512 | that, we'll need the RIP address again: | ||
| 513 | |||
| 514 | RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190 | ||
| 515 | |||
| 516 | $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8: | ||
| 517 | ffffffff8104edc8: mov %r8,0x8(%r8) | ||
| 518 | ffffffff8104edcc: test %r10d,%r10d | ||
| 519 | ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168> | ||
| 520 | ffffffff8104edd5: mov %rax,%rdx | ||
| 521 | ffffffff8104edd8: mov $0xc,%ecx | ||
| 522 | ffffffff8104eddd: mov %r13,%rdi | ||
| 523 | ffffffff8104ede0: mov $0x30,%eax | ||
| 524 | ffffffff8104ede5: mov %rdx,%rsi | ||
| 525 | ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi) | ||
| 526 | ffffffff8104edea: test $0x2,%al | ||
| 527 | ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0> | ||
| 528 | ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi) | ||
| 529 | ffffffff8104edf0: test $0x1,%al | ||
| 530 | ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5> | ||
| 531 | ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi) | ||
| 532 | ffffffff8104edf5: mov %r8,%rdi | ||
| 533 | ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free> | ||
| 534 | |||
| 535 | As expected, it's the "rep movsl" instruction from the memcpy() that causes | ||
| 536 | the warning. We know about REP MOVSL that it uses the register RCX to count | ||
| 537 | the number of remaining iterations. By taking a look at the register dump | ||
| 538 | again (from the kmemcheck report), we can figure out how many bytes were left | ||
| 539 | to copy: | ||
| 540 | |||
| 541 | RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009 | ||
| 542 | |||
| 543 | By looking at the disassembly, we also see that %ecx is being loaded with the | ||
| 544 | value $0xc just before (ffffffff8104edd8), so we are very lucky. Keep in mind | ||
| 545 | that this is the number of iterations, not bytes. And since this is a "long" | ||
| 546 | operation, we need to multiply by 4 to get the number of bytes. So this means | ||
| 547 | that the uninitialized value was encountered at 4 * (0xc - 0x9) = 12 bytes | ||
| 548 | from the start of the object. | ||
| 549 | |||
| 550 | We can now try to figure out which field of the "struct siginfo" that was not | ||
| 551 | initialized. This is the beginning of the struct: | ||
| 552 | |||
| 553 | 40 typedef struct siginfo { | ||
| 554 | 41 int si_signo; | ||
| 555 | 42 int si_errno; | ||
| 556 | 43 int si_code; | ||
| 557 | 44 | ||
| 558 | 45 union { | ||
| 559 | .. | ||
| 560 | 92 } _sifields; | ||
| 561 | 93 } siginfo_t; | ||
| 562 | |||
| 563 | On 64-bit, the int is 4 bytes long, so it must the the union member that has | ||
| 564 | not been initialized. We can verify this using gdb: | ||
| 565 | |||
| 566 | $ gdb vmlinux | ||
| 567 | ... | ||
| 568 | (gdb) p &((struct siginfo *) 0)->_sifields | ||
| 569 | $1 = (union {...} *) 0x10 | ||
| 570 | |||
| 571 | Actually, it seems that the union member is located at offset 0x10 -- which | ||
| 572 | means that gcc has inserted 4 bytes of padding between the members si_code | ||
| 573 | and _sifields. We can now get a fuller picture of the memory dump: | ||
| 574 | |||
| 575 | _----------------------------=> si_code | ||
| 576 | / _--------------------=> (padding) | ||
| 577 | | / _------------=> _sifields(._kill._pid) | ||
| 578 | | | / _----=> _sifields(._kill._uid) | ||
| 579 | | | | / | ||
| 580 | -------|-------|-------|-------| | ||
| 581 | 80000000000000000000000000000000000000000088ffff0000000000000000 | ||
| 582 | i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u | ||
| 583 | |||
| 584 | This allows us to realize another important fact: si_code contains the value | ||
| 585 | 0x80. Remember that x86 is little endian, so the first 4 bytes "80000000" are | ||
| 586 | really the number 0x00000080. With a bit of research, we find that this is | ||
| 587 | actually the constant SI_KERNEL defined in include/asm-generic/siginfo.h: | ||
| 588 | |||
| 589 | 144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ | ||
| 590 | |||
| 591 | This macro is used in exactly one place in the x86 kernel: In send_signal() | ||
| 592 | in kernel/signal.c: | ||
| 593 | |||
| 594 | 816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | ||
| 595 | 817 int group) | ||
| 596 | 818 { | ||
| 597 | ... | ||
| 598 | 828 pending = group ? &t->signal->shared_pending : &t->pending; | ||
| 599 | ... | ||
| 600 | 851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | ||
| 601 | 852 (is_si_special(info) || | ||
| 602 | 853 info->si_code >= 0))); | ||
| 603 | 854 if (q) { | ||
| 604 | 855 list_add_tail(&q->list, &pending->list); | ||
| 605 | 856 switch ((unsigned long) info) { | ||
| 606 | ... | ||
| 607 | 865 case (unsigned long) SEND_SIG_PRIV: | ||
| 608 | 866 q->info.si_signo = sig; | ||
| 609 | 867 q->info.si_errno = 0; | ||
| 610 | 868 q->info.si_code = SI_KERNEL; | ||
| 611 | 869 q->info.si_pid = 0; | ||
| 612 | 870 q->info.si_uid = 0; | ||
| 613 | 871 break; | ||
| 614 | ... | ||
| 615 | 890 } | ||
| 616 | |||
| 617 | Not only does this match with the .si_code member, it also matches the place | ||
| 618 | we found earlier when looking for where siginfo_t objects are enqueued on the | ||
| 619 | "shared_pending" list. | ||
| 620 | |||
| 621 | So to sum up: It seems that it is the padding introduced by the compiler | ||
| 622 | between two struct fields that is uninitialized, and this gets reported when | ||
| 623 | we do a memcpy() on the struct. This means that we have identified a false | ||
| 624 | positive warning. | ||
| 625 | |||
| 626 | Normally, kmemcheck will not report uninitialized accesses in memcpy() calls | ||
| 627 | when both the source and destination addresses are tracked. (Instead, we copy | ||
| 628 | the shadow bytemap as well). In this case, the destination address clearly | ||
| 629 | was not tracked. We can dig a little deeper into the stack trace from above: | ||
| 630 | |||
| 631 | arch/x86/kernel/signal.c:805 | ||
| 632 | arch/x86/kernel/signal.c:871 | ||
| 633 | arch/x86/kernel/entry_64.S:694 | ||
| 634 | |||
| 635 | And we clearly see that the destination siginfo object is located on the | ||
| 636 | stack: | ||
| 637 | |||
| 638 | 782 static void do_signal(struct pt_regs *regs) | ||
| 639 | 783 { | ||
| 640 | 784 struct k_sigaction ka; | ||
| 641 | 785 siginfo_t info; | ||
| 642 | ... | ||
| 643 | 804 signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
| 644 | ... | ||
| 645 | 854 } | ||
| 646 | |||
| 647 | And this &info is what eventually gets passed to copy_siginfo() as the | ||
| 648 | destination argument. | ||
| 649 | |||
| 650 | Now, even though we didn't find an actual error here, the example is still a | ||
| 651 | good one, because it shows how one would go about to find out what the report | ||
| 652 | was all about. | ||
| 653 | |||
| 654 | |||
| 655 | 3.4. Annotating false positives | ||
| 656 | =============================== | ||
| 657 | |||
| 658 | There are a few different ways to make annotations in the source code that | ||
| 659 | will keep kmemcheck from checking and reporting certain allocations. Here | ||
| 660 | they are: | ||
| 661 | |||
| 662 | o __GFP_NOTRACK_FALSE_POSITIVE | ||
| 663 | |||
| 664 | This flag can be passed to kmalloc() or kmem_cache_alloc() (therefore | ||
| 665 | also to other functions that end up calling one of these) to indicate | ||
| 666 | that the allocation should not be tracked because it would lead to | ||
| 667 | a false positive report. This is a "big hammer" way of silencing | ||
| 668 | kmemcheck; after all, even if the false positive pertains to | ||
| 669 | particular field in a struct, for example, we will now lose the | ||
| 670 | ability to find (real) errors in other parts of the same struct. | ||
| 671 | |||
| 672 | Example: | ||
| 673 | |||
| 674 | /* No warnings will ever trigger on accessing any part of x */ | ||
| 675 | x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE); | ||
| 676 | |||
| 677 | o kmemcheck_bitfield_begin(name)/kmemcheck_bitfield_end(name) and | ||
| 678 | kmemcheck_annotate_bitfield(ptr, name) | ||
| 679 | |||
| 680 | The first two of these three macros can be used inside struct | ||
| 681 | definitions to signal, respectively, the beginning and end of a | ||
| 682 | bitfield. Additionally, this will assign the bitfield a name, which | ||
| 683 | is given as an argument to the macros. | ||
| 684 | |||
| 685 | Having used these markers, one can later use | ||
| 686 | kmemcheck_annotate_bitfield() at the point of allocation, to indicate | ||
| 687 | which parts of the allocation is part of a bitfield. | ||
| 688 | |||
| 689 | Example: | ||
| 690 | |||
| 691 | struct foo { | ||
| 692 | int x; | ||
| 693 | |||
| 694 | kmemcheck_bitfield_begin(flags); | ||
| 695 | int flag_a:1; | ||
| 696 | int flag_b:1; | ||
| 697 | kmemcheck_bitfield_end(flags); | ||
| 698 | |||
| 699 | int y; | ||
| 700 | }; | ||
| 701 | |||
| 702 | struct foo *x = kmalloc(sizeof *x); | ||
| 703 | |||
| 704 | /* No warnings will trigger on accessing the bitfield of x */ | ||
| 705 | kmemcheck_annotate_bitfield(x, flags); | ||
| 706 | |||
| 707 | Note that kmemcheck_annotate_bitfield() can be used even before the | ||
| 708 | return value of kmalloc() is checked -- in other words, passing NULL | ||
| 709 | as the first argument is legal (and will do nothing). | ||
| 710 | |||
| 711 | |||
| 712 | 4. Reporting errors | ||
| 713 | =================== | ||
| 714 | |||
| 715 | As we have seen, kmemcheck will produce false positive reports. Therefore, it | ||
| 716 | is not very wise to blindly post kmemcheck warnings to mailing lists and | ||
| 717 | maintainers. Instead, I encourage maintainers and developers to find errors | ||
| 718 | in their own code. If you get a warning, you can try to work around it, try | ||
| 719 | to figure out if it's a real error or not, or simply ignore it. Most | ||
| 720 | developers know their own code and will quickly and efficiently determine the | ||
| 721 | root cause of a kmemcheck report. This is therefore also the most efficient | ||
| 722 | way to work with kmemcheck. | ||
| 723 | |||
| 724 | That said, we (the kmemcheck maintainers) will always be on the lookout for | ||
| 725 | false positives that we can annotate and silence. So whatever you find, | ||
| 726 | please drop us a note privately! Kernel configs and steps to reproduce (if | ||
| 727 | available) are of course a great help too. | ||
| 728 | |||
| 729 | Happy hacking! | ||
| 730 | |||
| 731 | |||
| 732 | 5. Technical description | ||
| 733 | ======================== | ||
| 734 | |||
| 735 | kmemcheck works by marking memory pages non-present. This means that whenever | ||
| 736 | somebody attempts to access the page, a page fault is generated. The page | ||
| 737 | fault handler notices that the page was in fact only hidden, and so it calls | ||
| 738 | on the kmemcheck code to make further investigations. | ||
| 739 | |||
| 740 | When the investigations are completed, kmemcheck "shows" the page by marking | ||
| 741 | it present (as it would be under normal circumstances). This way, the | ||
| 742 | interrupted code can continue as usual. | ||
| 743 | |||
| 744 | But after the instruction has been executed, we should hide the page again, so | ||
| 745 | that we can catch the next access too! Now kmemcheck makes use of a debugging | ||
| 746 | feature of the processor, namely single-stepping. When the processor has | ||
| 747 | finished the one instruction that generated the memory access, a debug | ||
| 748 | exception is raised. From here, we simply hide the page again and continue | ||
| 749 | execution, this time with the single-stepping feature turned off. | ||
| 750 | |||
| 751 | kmemcheck requires some assistance from the memory allocator in order to work. | ||
| 752 | The memory allocator needs to | ||
| 753 | |||
| 754 | 1. Tell kmemcheck about newly allocated pages and pages that are about to | ||
| 755 | be freed. This allows kmemcheck to set up and tear down the shadow memory | ||
| 756 | for the pages in question. The shadow memory stores the status of each | ||
| 757 | byte in the allocation proper, e.g. whether it is initialized or | ||
| 758 | uninitialized. | ||
| 759 | |||
| 760 | 2. Tell kmemcheck which parts of memory should be marked uninitialized. | ||
| 761 | There are actually a few more states, such as "not yet allocated" and | ||
| 762 | "recently freed". | ||
| 763 | |||
| 764 | If a slab cache is set up using the SLAB_NOTRACK flag, it will never return | ||
| 765 | memory that can take page faults because of kmemcheck. | ||
| 766 | |||
| 767 | If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still | ||
| 768 | request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags. | ||
| 769 | This does not prevent the page faults from occurring, however, but marks the | ||
| 770 | object in question as being initialized so that no warnings will ever be | ||
| 771 | produced for this object. | ||
| 772 | |||
| 773 | Currently, the SLAB and SLUB allocators are supported by kmemcheck. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 685784cc023b..af8ef6527f22 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -3406,6 +3406,14 @@ F: drivers/serial/kgdboc.c | |||
| 3406 | F: include/linux/kgdb.h | 3406 | F: include/linux/kgdb.h |
| 3407 | F: kernel/kgdb.c | 3407 | F: kernel/kgdb.c |
| 3408 | 3408 | ||
| 3409 | KMEMCHECK | ||
| 3410 | P: Vegard Nossum | ||
| 3411 | M: vegardno@ifi.uio.no | ||
| 3412 | P Pekka Enberg | ||
| 3413 | M: penberg@cs.helsinki.fi | ||
| 3414 | L: linux-kernel@vger.kernel.org | ||
| 3415 | S: Maintained | ||
| 3416 | |||
| 3409 | KMEMLEAK | 3417 | KMEMLEAK |
| 3410 | P: Catalin Marinas | 3418 | P: Catalin Marinas |
| 3411 | M: catalin.marinas@arm.com | 3419 | M: catalin.marinas@arm.com |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 356d2ec8e2fb..cf42fc305419 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -46,6 +46,7 @@ config X86 | |||
| 46 | select HAVE_KERNEL_GZIP | 46 | select HAVE_KERNEL_GZIP |
| 47 | select HAVE_KERNEL_BZIP2 | 47 | select HAVE_KERNEL_BZIP2 |
| 48 | select HAVE_KERNEL_LZMA | 48 | select HAVE_KERNEL_LZMA |
| 49 | select HAVE_ARCH_KMEMCHECK | ||
| 49 | 50 | ||
| 50 | config OUTPUT_FORMAT | 51 | config OUTPUT_FORMAT |
| 51 | string | 52 | string |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index edbd0ca62067..1b68659c41b4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
| @@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR | |||
| 81 | endif | 81 | endif |
| 82 | endif | 82 | endif |
| 83 | 83 | ||
| 84 | # Don't unroll struct assignments with kmemcheck enabled | ||
| 85 | ifeq ($(CONFIG_KMEMCHECK),y) | ||
| 86 | KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) | ||
| 87 | endif | ||
| 88 | |||
| 84 | # Stackpointer is addressed different for 32 bit and 64 bit x86 | 89 | # Stackpointer is addressed different for 32 bit and 64 bit x86 |
| 85 | sp-$(CONFIG_X86_32) := esp | 90 | sp-$(CONFIG_X86_32) := esp |
| 86 | sp-$(CONFIG_X86_64) := rsp | 91 | sp-$(CONFIG_X86_64) := rsp |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f82fdc412c64..b93405b228b4 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * Documentation/DMA-API.txt for documentation. | 6 | * Documentation/DMA-API.txt for documentation. |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/kmemcheck.h> | ||
| 9 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
| 10 | #include <linux/dma-debug.h> | 11 | #include <linux/dma-debug.h> |
| 11 | #include <linux/dma-attrs.h> | 12 | #include <linux/dma-attrs.h> |
| @@ -60,6 +61,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size, | |||
| 60 | dma_addr_t addr; | 61 | dma_addr_t addr; |
| 61 | 62 | ||
| 62 | BUG_ON(!valid_dma_direction(dir)); | 63 | BUG_ON(!valid_dma_direction(dir)); |
| 64 | kmemcheck_mark_initialized(ptr, size); | ||
| 63 | addr = ops->map_page(hwdev, virt_to_page(ptr), | 65 | addr = ops->map_page(hwdev, virt_to_page(ptr), |
| 64 | (unsigned long)ptr & ~PAGE_MASK, size, | 66 | (unsigned long)ptr & ~PAGE_MASK, size, |
| 65 | dir, NULL); | 67 | dir, NULL); |
| @@ -87,8 +89,12 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 87 | { | 89 | { |
| 88 | struct dma_map_ops *ops = get_dma_ops(hwdev); | 90 | struct dma_map_ops *ops = get_dma_ops(hwdev); |
| 89 | int ents; | 91 | int ents; |
| 92 | struct scatterlist *s; | ||
| 93 | int i; | ||
| 90 | 94 | ||
| 91 | BUG_ON(!valid_dma_direction(dir)); | 95 | BUG_ON(!valid_dma_direction(dir)); |
| 96 | for_each_sg(sg, s, nents, i) | ||
| 97 | kmemcheck_mark_initialized(sg_virt(s), s->length); | ||
| 92 | ents = ops->map_sg(hwdev, sg, nents, dir, NULL); | 98 | ents = ops->map_sg(hwdev, sg, nents, dir, NULL); |
| 93 | debug_dma_map_sg(hwdev, sg, nents, ents, dir); | 99 | debug_dma_map_sg(hwdev, sg, nents, ents, dir); |
| 94 | 100 | ||
| @@ -200,6 +206,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, | |||
| 200 | dma_addr_t addr; | 206 | dma_addr_t addr; |
| 201 | 207 | ||
| 202 | BUG_ON(!valid_dma_direction(dir)); | 208 | BUG_ON(!valid_dma_direction(dir)); |
| 209 | kmemcheck_mark_initialized(page_address(page) + offset, size); | ||
| 203 | addr = ops->map_page(dev, page, offset, size, dir, NULL); | 210 | addr = ops->map_page(dev, page, offset, size, dir, NULL); |
| 204 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | 211 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); |
| 205 | 212 | ||
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 000000000000..ed01518f297e --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | #ifndef ASM_X86_KMEMCHECK_H | ||
| 2 | #define ASM_X86_KMEMCHECK_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/ptrace.h> | ||
| 6 | |||
| 7 | #ifdef CONFIG_KMEMCHECK | ||
| 8 | bool kmemcheck_active(struct pt_regs *regs); | ||
| 9 | |||
| 10 | void kmemcheck_show(struct pt_regs *regs); | ||
| 11 | void kmemcheck_hide(struct pt_regs *regs); | ||
| 12 | |||
| 13 | bool kmemcheck_fault(struct pt_regs *regs, | ||
| 14 | unsigned long address, unsigned long error_code); | ||
| 15 | bool kmemcheck_trap(struct pt_regs *regs); | ||
| 16 | #else | ||
| 17 | static inline bool kmemcheck_active(struct pt_regs *regs) | ||
| 18 | { | ||
| 19 | return false; | ||
| 20 | } | ||
| 21 | |||
| 22 | static inline void kmemcheck_show(struct pt_regs *regs) | ||
| 23 | { | ||
| 24 | } | ||
| 25 | |||
| 26 | static inline void kmemcheck_hide(struct pt_regs *regs) | ||
| 27 | { | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline bool kmemcheck_fault(struct pt_regs *regs, | ||
| 31 | unsigned long address, unsigned long error_code) | ||
| 32 | { | ||
| 33 | return false; | ||
| 34 | } | ||
| 35 | |||
| 36 | static inline bool kmemcheck_trap(struct pt_regs *regs) | ||
| 37 | { | ||
| 38 | return false; | ||
| 39 | } | ||
| 40 | #endif /* CONFIG_KMEMCHECK */ | ||
| 41 | |||
| 42 | #endif | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18ef7ebf2631..3cc06e3fceb8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
| @@ -317,6 +317,11 @@ static inline int pte_present(pte_t a) | |||
| 317 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | 317 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); |
| 318 | } | 318 | } |
| 319 | 319 | ||
| 320 | static inline int pte_hidden(pte_t pte) | ||
| 321 | { | ||
| 322 | return pte_flags(pte) & _PAGE_HIDDEN; | ||
| 323 | } | ||
| 324 | |||
| 320 | static inline int pmd_present(pmd_t pmd) | 325 | static inline int pmd_present(pmd_t pmd) |
| 321 | { | 326 | { |
| 322 | return pmd_flags(pmd) & _PAGE_PRESENT; | 327 | return pmd_flags(pmd) & _PAGE_PRESENT; |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4d258ad76a0f..54cb697f4900 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
| 19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ | 19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ |
| 20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ | 20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ |
| 21 | #define _PAGE_BIT_UNUSED3 11 | 21 | #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ |
| 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
| 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 |
| 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
| @@ -41,13 +41,18 @@ | |||
| 41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | 41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) |
| 42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) | 42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) |
| 43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) | 43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) |
| 44 | #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) | ||
| 45 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | 44 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) |
| 46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 45 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
| 47 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 46 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
| 48 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) | 47 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) |
| 49 | #define __HAVE_ARCH_PTE_SPECIAL | 48 | #define __HAVE_ARCH_PTE_SPECIAL |
| 50 | 49 | ||
| 50 | #ifdef CONFIG_KMEMCHECK | ||
| 51 | #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | ||
| 52 | #else | ||
| 53 | #define _PAGE_HIDDEN (_AT(pteval_t, 0)) | ||
| 54 | #endif | ||
| 55 | |||
| 51 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 56 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
| 52 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | 57 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) |
| 53 | #else | 58 | #else |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 0e0e3ba827f7..c86f452256de 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
| @@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
| 177 | * No 3D Now! | 177 | * No 3D Now! |
| 178 | */ | 178 | */ |
| 179 | 179 | ||
| 180 | #ifndef CONFIG_KMEMCHECK | ||
| 180 | #define memcpy(t, f, n) \ | 181 | #define memcpy(t, f, n) \ |
| 181 | (__builtin_constant_p((n)) \ | 182 | (__builtin_constant_p((n)) \ |
| 182 | ? __constant_memcpy((t), (f), (n)) \ | 183 | ? __constant_memcpy((t), (f), (n)) \ |
| 183 | : __memcpy((t), (f), (n))) | 184 | : __memcpy((t), (f), (n))) |
| 185 | #else | ||
| 186 | /* | ||
| 187 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
| 188 | * because it means that we know both memory operands in advance. | ||
| 189 | */ | ||
| 190 | #define memcpy(t, f, n) __memcpy((t), (f), (n)) | ||
| 191 | #endif | ||
| 184 | 192 | ||
| 185 | #endif | 193 | #endif |
| 186 | 194 | ||
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2afe164bf1e6..19e2c468fc2c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
| @@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t | |||
| 27 | function. */ | 27 | function. */ |
| 28 | 28 | ||
| 29 | #define __HAVE_ARCH_MEMCPY 1 | 29 | #define __HAVE_ARCH_MEMCPY 1 |
| 30 | #ifndef CONFIG_KMEMCHECK | ||
| 30 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 | 31 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 |
| 31 | extern void *memcpy(void *to, const void *from, size_t len); | 32 | extern void *memcpy(void *to, const void *from, size_t len); |
| 32 | #else | 33 | #else |
| @@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len); | |||
| 42 | __ret; \ | 43 | __ret; \ |
| 43 | }) | 44 | }) |
| 44 | #endif | 45 | #endif |
| 46 | #else | ||
| 47 | /* | ||
| 48 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
| 49 | * because it means that we know both memory operands in advance. | ||
| 50 | */ | ||
| 51 | #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) | ||
| 52 | #endif | ||
| 45 | 53 | ||
| 46 | #define __HAVE_ARCH_MEMSET | 54 | #define __HAVE_ARCH_MEMSET |
| 47 | void *memset(void *s, int c, size_t n); | 55 | void *memset(void *s, int c, size_t n); |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 602c769fc98c..b0783520988b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -154,9 +154,9 @@ struct thread_info { | |||
| 154 | 154 | ||
| 155 | /* thread information allocation */ | 155 | /* thread information allocation */ |
| 156 | #ifdef CONFIG_DEBUG_STACK_USAGE | 156 | #ifdef CONFIG_DEBUG_STACK_USAGE |
| 157 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) | 157 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) |
| 158 | #else | 158 | #else |
| 159 | #define THREAD_FLAGS GFP_KERNEL | 159 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK) |
| 160 | #endif | 160 | #endif |
| 161 | 161 | ||
| 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 11b3bb86e17b..7fcf6f3dbcc3 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
| @@ -1,5 +1,10 @@ | |||
| 1 | #ifdef CONFIG_KMEMCHECK | ||
| 2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ | ||
| 3 | # include <asm-generic/xor.h> | ||
| 4 | #else | ||
| 1 | #ifdef CONFIG_X86_32 | 5 | #ifdef CONFIG_X86_32 |
| 2 | # include "xor_32.h" | 6 | # include "xor_32.h" |
| 3 | #else | 7 | #else |
| 4 | # include "xor_64.h" | 8 | # include "xor_64.h" |
| 5 | #endif | 9 | #endif |
| 10 | #endif | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index daed39ba2614..3260ab044996 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 86 | */ | 86 | */ |
| 87 | if (c->x86 == 6 && c->x86_model < 15) | 87 | if (c->x86 == 6 && c->x86_model < 15) |
| 88 | clear_cpu_cap(c, X86_FEATURE_PAT); | 88 | clear_cpu_cap(c, X86_FEATURE_PAT); |
| 89 | |||
| 90 | #ifdef CONFIG_KMEMCHECK | ||
| 91 | /* | ||
| 92 | * P4s have a "fast strings" feature which causes single- | ||
| 93 | * stepping REP instructions to only generate a #DB on | ||
| 94 | * cache-line boundaries. | ||
| 95 | * | ||
| 96 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | ||
| 97 | * (model 2) with the same problem. | ||
| 98 | */ | ||
| 99 | if (c->x86 == 15) { | ||
| 100 | u64 misc_enable; | ||
| 101 | |||
| 102 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
| 103 | |||
| 104 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | ||
| 105 | printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); | ||
| 106 | |||
| 107 | misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; | ||
| 108 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | #endif | ||
| 89 | } | 112 | } |
| 90 | 113 | ||
| 91 | #ifdef CONFIG_X86_32 | 114 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3bb2be1649bd..994dd6a4a2a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -63,7 +63,7 @@ void arch_task_cache_init(void) | |||
| 63 | task_xstate_cachep = | 63 | task_xstate_cachep = |
| 64 | kmem_cache_create("task_xstate", xstate_size, | 64 | kmem_cache_create("task_xstate", xstate_size, |
| 65 | __alignof__(union thread_xstate), | 65 | __alignof__(union thread_xstate), |
| 66 | SLAB_PANIC, NULL); | 66 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | /* | 69 | /* |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 4aaf7e48394f..c3eb207181fe 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
| @@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace) | |||
| 77 | } | 77 | } |
| 78 | EXPORT_SYMBOL_GPL(save_stack_trace); | 78 | EXPORT_SYMBOL_GPL(save_stack_trace); |
| 79 | 79 | ||
| 80 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | ||
| 81 | { | ||
| 82 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | ||
| 83 | if (trace->nr_entries < trace->max_entries) | ||
| 84 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
| 85 | } | ||
| 86 | |||
| 80 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 87 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
| 81 | { | 88 | { |
| 82 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 89 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d438..5f935f0d5861 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <linux/edac.h> | 45 | #include <linux/edac.h> |
| 46 | #endif | 46 | #endif |
| 47 | 47 | ||
| 48 | #include <asm/kmemcheck.h> | ||
| 48 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
| 49 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
| 50 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
| @@ -534,6 +535,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 534 | 535 | ||
| 535 | get_debugreg(condition, 6); | 536 | get_debugreg(condition, 6); |
| 536 | 537 | ||
| 538 | /* Catch kmemcheck conditions first of all! */ | ||
| 539 | if (condition & DR_STEP && kmemcheck_trap(regs)) | ||
| 540 | return; | ||
| 541 | |||
| 537 | /* | 542 | /* |
| 538 | * The processor cleared BTF, so don't mark that we need it set. | 543 | * The processor cleared BTF, so don't mark that we need it set. |
| 539 | */ | 544 | */ |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab52..eefdeee8a871 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
| @@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
| 10 | 10 | ||
| 11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
| 12 | 12 | ||
| 13 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
| 14 | |||
| 13 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 15 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
| 14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | 16 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
| 15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 17 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c6acc6326374..baa0e86adfbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | 14 | ||
| 15 | #include <asm/traps.h> /* dotraplinkage, ... */ | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
| 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
| 17 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | ||
| 17 | 18 | ||
| 18 | /* | 19 | /* |
| 19 | * Page fault error code bits: | 20 | * Page fault error code bits: |
| @@ -956,6 +957,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 956 | /* Get the faulting address: */ | 957 | /* Get the faulting address: */ |
| 957 | address = read_cr2(); | 958 | address = read_cr2(); |
| 958 | 959 | ||
| 960 | /* | ||
| 961 | * Detect and handle instructions that would cause a page fault for | ||
| 962 | * both a tracked kernel page and a userspace page. | ||
| 963 | */ | ||
| 964 | if (kmemcheck_active(regs)) | ||
| 965 | kmemcheck_hide(regs); | ||
| 966 | |||
| 959 | if (unlikely(kmmio_fault(regs, address))) | 967 | if (unlikely(kmmio_fault(regs, address))) |
| 960 | return; | 968 | return; |
| 961 | 969 | ||
| @@ -973,9 +981,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
| 973 | * protection error (error_code & 9) == 0. | 981 | * protection error (error_code & 9) == 0. |
| 974 | */ | 982 | */ |
| 975 | if (unlikely(fault_in_kernel_space(address))) { | 983 | if (unlikely(fault_in_kernel_space(address))) { |
| 976 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 984 | if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { |
| 977 | vmalloc_fault(address) >= 0) | 985 | if (vmalloc_fault(address) >= 0) |
| 978 | return; | 986 | return; |
| 987 | |||
| 988 | if (kmemcheck_fault(regs, address, error_code)) | ||
| 989 | return; | ||
| 990 | } | ||
| 979 | 991 | ||
| 980 | /* Can handle a stale RO->RW TLB: */ | 992 | /* Can handle a stale RO->RW TLB: */ |
| 981 | if (spurious_fault(error_code, address)) | 993 | if (spurious_fault(error_code, address)) |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 34c1bfb64f1c..f53b57e4086f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
| @@ -213,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
| 213 | if (!after_bootmem) | 213 | if (!after_bootmem) |
| 214 | init_gbpages(); | 214 | init_gbpages(); |
| 215 | 215 | ||
| 216 | #ifdef CONFIG_DEBUG_PAGEALLOC | 216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
| 217 | /* | 217 | /* |
| 218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
| 219 | * This will simplify cpa(), which otherwise needs to support splitting | 219 | * This will simplify cpa(), which otherwise needs to support splitting |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ff3c0816d15..3cd7711bb949 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
| @@ -111,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
| 111 | pte_t *page_table = NULL; | 111 | pte_t *page_table = NULL; |
| 112 | 112 | ||
| 113 | if (after_bootmem) { | 113 | if (after_bootmem) { |
| 114 | #ifdef CONFIG_DEBUG_PAGEALLOC | 114 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
| 115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
| 116 | #endif | 116 | #endif |
| 117 | if (!page_table) | 117 | if (!page_table) |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 52bb9519bb86..9c543290a813 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -104,7 +104,7 @@ static __ref void *spp_getpage(void) | |||
| 104 | void *ptr; | 104 | void *ptr; |
| 105 | 105 | ||
| 106 | if (after_bootmem) | 106 | if (after_bootmem) |
| 107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); | 107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
| 108 | else | 108 | else |
| 109 | ptr = alloc_bootmem_pages(PAGE_SIZE); | 109 | ptr = alloc_bootmem_pages(PAGE_SIZE); |
| 110 | 110 | ||
| @@ -281,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
| 281 | void *adr; | 281 | void *adr; |
| 282 | 282 | ||
| 283 | if (after_bootmem) { | 283 | if (after_bootmem) { |
| 284 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | 284 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
| 285 | *phys = __pa(adr); | 285 | *phys = __pa(adr); |
| 286 | 286 | ||
| 287 | return adr; | 287 | return adr; |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 000000000000..520b3bce4095 --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile | |||
| @@ -0,0 +1 @@ | |||
| obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | |||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 000000000000..4901d0dafda6 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c | |||
| @@ -0,0 +1,228 @@ | |||
| 1 | #include <linux/interrupt.h> | ||
| 2 | #include <linux/kdebug.h> | ||
| 3 | #include <linux/kmemcheck.h> | ||
| 4 | #include <linux/kernel.h> | ||
| 5 | #include <linux/types.h> | ||
| 6 | #include <linux/ptrace.h> | ||
| 7 | #include <linux/stacktrace.h> | ||
| 8 | #include <linux/string.h> | ||
| 9 | |||
| 10 | #include "error.h" | ||
| 11 | #include "shadow.h" | ||
| 12 | |||
| 13 | enum kmemcheck_error_type { | ||
| 14 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
| 15 | KMEMCHECK_ERROR_BUG, | ||
| 16 | }; | ||
| 17 | |||
| 18 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
| 19 | |||
| 20 | struct kmemcheck_error { | ||
| 21 | enum kmemcheck_error_type type; | ||
| 22 | |||
| 23 | union { | ||
| 24 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
| 25 | struct { | ||
| 26 | /* Kind of access that caused the error */ | ||
| 27 | enum kmemcheck_shadow state; | ||
| 28 | /* Address and size of the erroneous read */ | ||
| 29 | unsigned long address; | ||
| 30 | unsigned int size; | ||
| 31 | }; | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct pt_regs regs; | ||
| 35 | struct stack_trace trace; | ||
| 36 | unsigned long trace_entries[32]; | ||
| 37 | |||
| 38 | /* We compress it to a char. */ | ||
| 39 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
| 40 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
| 41 | }; | ||
| 42 | |||
| 43 | /* | ||
| 44 | * Create a ring queue of errors to output. We can't call printk() directly | ||
| 45 | * from the kmemcheck traps, since this may call the console drivers and | ||
| 46 | * result in a recursive fault. | ||
| 47 | */ | ||
| 48 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
| 49 | static unsigned int error_count; | ||
| 50 | static unsigned int error_rd; | ||
| 51 | static unsigned int error_wr; | ||
| 52 | static unsigned int error_missed_count; | ||
| 53 | |||
| 54 | static struct kmemcheck_error *error_next_wr(void) | ||
| 55 | { | ||
| 56 | struct kmemcheck_error *e; | ||
| 57 | |||
| 58 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
| 59 | ++error_missed_count; | ||
| 60 | return NULL; | ||
| 61 | } | ||
| 62 | |||
| 63 | e = &error_fifo[error_wr]; | ||
| 64 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
| 65 | error_wr = 0; | ||
| 66 | ++error_count; | ||
| 67 | return e; | ||
| 68 | } | ||
| 69 | |||
| 70 | static struct kmemcheck_error *error_next_rd(void) | ||
| 71 | { | ||
| 72 | struct kmemcheck_error *e; | ||
| 73 | |||
| 74 | if (error_count == 0) | ||
| 75 | return NULL; | ||
| 76 | |||
| 77 | e = &error_fifo[error_rd]; | ||
| 78 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
| 79 | error_rd = 0; | ||
| 80 | --error_count; | ||
| 81 | return e; | ||
| 82 | } | ||
| 83 | |||
| 84 | void kmemcheck_error_recall(void) | ||
| 85 | { | ||
| 86 | static const char *desc[] = { | ||
| 87 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
| 88 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
| 89 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
| 90 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
| 91 | }; | ||
| 92 | |||
| 93 | static const char short_desc[] = { | ||
| 94 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
| 95 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
| 96 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
| 97 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
| 98 | }; | ||
| 99 | |||
| 100 | struct kmemcheck_error *e; | ||
| 101 | unsigned int i; | ||
| 102 | |||
| 103 | e = error_next_rd(); | ||
| 104 | if (!e) | ||
| 105 | return; | ||
| 106 | |||
| 107 | switch (e->type) { | ||
| 108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
| 109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | ||
| 110 | "from %s memory (%p)\n", | ||
| 111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
| 112 | desc[e->state] : "(invalid shadow state)", | ||
| 113 | (void *) e->address); | ||
| 114 | |||
| 115 | printk(KERN_INFO); | ||
| 116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
| 117 | printk("%02x", e->memory_copy[i]); | ||
| 118 | printk("\n"); | ||
| 119 | |||
| 120 | printk(KERN_INFO); | ||
| 121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
| 122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
| 123 | printk(" %c", short_desc[e->shadow_copy[i]]); | ||
| 124 | else | ||
| 125 | printk(" ?"); | ||
| 126 | } | ||
| 127 | printk("\n"); | ||
| 128 | printk(KERN_INFO "%*c\n", 2 + 2 | ||
| 129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
| 130 | break; | ||
| 131 | case KMEMCHECK_ERROR_BUG: | ||
| 132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | |||
| 136 | __show_regs(&e->regs, 1); | ||
| 137 | print_stack_trace(&e->trace, 0); | ||
| 138 | } | ||
| 139 | |||
| 140 | static void do_wakeup(unsigned long data) | ||
| 141 | { | ||
| 142 | while (error_count > 0) | ||
| 143 | kmemcheck_error_recall(); | ||
| 144 | |||
| 145 | if (error_missed_count > 0) { | ||
| 146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
| 147 | "the queue was too small\n", error_missed_count); | ||
| 148 | error_missed_count = 0; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * Save the context of an error report. | ||
| 156 | */ | ||
| 157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
| 158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
| 159 | { | ||
| 160 | static unsigned long prev_ip; | ||
| 161 | |||
| 162 | struct kmemcheck_error *e; | ||
| 163 | void *shadow_copy; | ||
| 164 | void *memory_copy; | ||
| 165 | |||
| 166 | /* Don't report several adjacent errors from the same EIP. */ | ||
| 167 | if (regs->ip == prev_ip) | ||
| 168 | return; | ||
| 169 | prev_ip = regs->ip; | ||
| 170 | |||
| 171 | e = error_next_wr(); | ||
| 172 | if (!e) | ||
| 173 | return; | ||
| 174 | |||
| 175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
| 176 | |||
| 177 | e->state = state; | ||
| 178 | e->address = address; | ||
| 179 | e->size = size; | ||
| 180 | |||
| 181 | /* Save regs */ | ||
| 182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
| 183 | |||
| 184 | /* Save stack trace */ | ||
| 185 | e->trace.nr_entries = 0; | ||
| 186 | e->trace.entries = e->trace_entries; | ||
| 187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
| 188 | e->trace.skip = 0; | ||
| 189 | save_stack_trace_bp(&e->trace, regs->bp); | ||
| 190 | |||
| 191 | /* Round address down to nearest 16 bytes */ | ||
| 192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
| 193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
| 194 | BUG_ON(!shadow_copy); | ||
| 195 | |||
| 196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
| 197 | |||
| 198 | kmemcheck_show_addr(address); | ||
| 199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
| 200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
| 201 | kmemcheck_hide_addr(address); | ||
| 202 | |||
| 203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * Save the context of a kmemcheck bug. | ||
| 208 | */ | ||
| 209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
| 210 | { | ||
| 211 | struct kmemcheck_error *e; | ||
| 212 | |||
| 213 | e = error_next_wr(); | ||
| 214 | if (!e) | ||
| 215 | return; | ||
| 216 | |||
| 217 | e->type = KMEMCHECK_ERROR_BUG; | ||
| 218 | |||
| 219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
| 220 | |||
| 221 | e->trace.nr_entries = 0; | ||
| 222 | e->trace.entries = e->trace_entries; | ||
| 223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
| 224 | e->trace.skip = 1; | ||
| 225 | save_stack_trace(&e->trace); | ||
| 226 | |||
| 227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
| 228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 000000000000..0efc2e8d0a20 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
| 2 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
| 3 | |||
| 4 | #include <linux/ptrace.h> | ||
| 5 | |||
| 6 | #include "shadow.h" | ||
| 7 | |||
| 8 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
| 9 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
| 10 | |||
| 11 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
| 12 | |||
| 13 | void kmemcheck_error_recall(void); | ||
| 14 | |||
| 15 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 000000000000..2c55ed098654 --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
| @@ -0,0 +1,640 @@ | |||
| 1 | /** | ||
| 2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
| 3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
| 4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License (version 2) as | ||
| 8 | * published by the Free Software Foundation. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/interrupt.h> | ||
| 13 | #include <linux/kallsyms.h> | ||
| 14 | #include <linux/kernel.h> | ||
| 15 | #include <linux/kmemcheck.h> | ||
| 16 | #include <linux/mm.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/page-flags.h> | ||
| 19 | #include <linux/percpu.h> | ||
| 20 | #include <linux/ptrace.h> | ||
| 21 | #include <linux/string.h> | ||
| 22 | #include <linux/types.h> | ||
| 23 | |||
| 24 | #include <asm/cacheflush.h> | ||
| 25 | #include <asm/kmemcheck.h> | ||
| 26 | #include <asm/pgtable.h> | ||
| 27 | #include <asm/tlbflush.h> | ||
| 28 | |||
| 29 | #include "error.h" | ||
| 30 | #include "opcode.h" | ||
| 31 | #include "pte.h" | ||
| 32 | #include "selftest.h" | ||
| 33 | #include "shadow.h" | ||
| 34 | |||
| 35 | |||
| 36 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
| 37 | # define KMEMCHECK_ENABLED 0 | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
| 41 | # define KMEMCHECK_ENABLED 1 | ||
| 42 | #endif | ||
| 43 | |||
| 44 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
| 45 | # define KMEMCHECK_ENABLED 2 | ||
| 46 | #endif | ||
| 47 | |||
| 48 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
| 49 | |||
| 50 | int __init kmemcheck_init(void) | ||
| 51 | { | ||
| 52 | #ifdef CONFIG_SMP | ||
| 53 | /* | ||
| 54 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
| 55 | * runs before SMP is set up. | ||
| 56 | */ | ||
| 57 | if (setup_max_cpus > 1) { | ||
| 58 | printk(KERN_INFO | ||
| 59 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
| 60 | setup_max_cpus = 1; | ||
| 61 | } | ||
| 62 | #endif | ||
| 63 | |||
| 64 | if (!kmemcheck_selftest()) { | ||
| 65 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
| 66 | kmemcheck_enabled = 0; | ||
| 67 | return -EINVAL; | ||
| 68 | } | ||
| 69 | |||
| 70 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | early_initcall(kmemcheck_init); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
| 78 | */ | ||
| 79 | static int __init param_kmemcheck(char *str) | ||
| 80 | { | ||
| 81 | if (!str) | ||
| 82 | return -EINVAL; | ||
| 83 | |||
| 84 | sscanf(str, "%d", &kmemcheck_enabled); | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 88 | early_param("kmemcheck", param_kmemcheck); | ||
| 89 | |||
| 90 | int kmemcheck_show_addr(unsigned long address) | ||
| 91 | { | ||
| 92 | pte_t *pte; | ||
| 93 | |||
| 94 | pte = kmemcheck_pte_lookup(address); | ||
| 95 | if (!pte) | ||
| 96 | return 0; | ||
| 97 | |||
| 98 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
| 99 | __flush_tlb_one(address); | ||
| 100 | return 1; | ||
| 101 | } | ||
| 102 | |||
| 103 | int kmemcheck_hide_addr(unsigned long address) | ||
| 104 | { | ||
| 105 | pte_t *pte; | ||
| 106 | |||
| 107 | pte = kmemcheck_pte_lookup(address); | ||
| 108 | if (!pte) | ||
| 109 | return 0; | ||
| 110 | |||
| 111 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
| 112 | __flush_tlb_one(address); | ||
| 113 | return 1; | ||
| 114 | } | ||
| 115 | |||
| 116 | struct kmemcheck_context { | ||
| 117 | bool busy; | ||
| 118 | int balance; | ||
| 119 | |||
| 120 | /* | ||
| 121 | * There can be at most two memory operands to an instruction, but | ||
| 122 | * each address can cross a page boundary -- so we may need up to | ||
| 123 | * four addresses that must be hidden/revealed for each fault. | ||
| 124 | */ | ||
| 125 | unsigned long addr[4]; | ||
| 126 | unsigned long n_addrs; | ||
| 127 | unsigned long flags; | ||
| 128 | |||
| 129 | /* Data size of the instruction that caused a fault. */ | ||
| 130 | unsigned int size; | ||
| 131 | }; | ||
| 132 | |||
| 133 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
| 134 | |||
| 135 | bool kmemcheck_active(struct pt_regs *regs) | ||
| 136 | { | ||
| 137 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 138 | |||
| 139 | return data->balance > 0; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* Save an address that needs to be shown/hidden */ | ||
| 143 | static void kmemcheck_save_addr(unsigned long addr) | ||
| 144 | { | ||
| 145 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 146 | |||
| 147 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
| 148 | data->addr[data->n_addrs++] = addr; | ||
| 149 | } | ||
| 150 | |||
| 151 | static unsigned int kmemcheck_show_all(void) | ||
| 152 | { | ||
| 153 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 154 | unsigned int i; | ||
| 155 | unsigned int n; | ||
| 156 | |||
| 157 | n = 0; | ||
| 158 | for (i = 0; i < data->n_addrs; ++i) | ||
| 159 | n += kmemcheck_show_addr(data->addr[i]); | ||
| 160 | |||
| 161 | return n; | ||
| 162 | } | ||
| 163 | |||
| 164 | static unsigned int kmemcheck_hide_all(void) | ||
| 165 | { | ||
| 166 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 167 | unsigned int i; | ||
| 168 | unsigned int n; | ||
| 169 | |||
| 170 | n = 0; | ||
| 171 | for (i = 0; i < data->n_addrs; ++i) | ||
| 172 | n += kmemcheck_hide_addr(data->addr[i]); | ||
| 173 | |||
| 174 | return n; | ||
| 175 | } | ||
| 176 | |||
| 177 | /* | ||
| 178 | * Called from the #PF handler. | ||
| 179 | */ | ||
| 180 | void kmemcheck_show(struct pt_regs *regs) | ||
| 181 | { | ||
| 182 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 183 | |||
| 184 | BUG_ON(!irqs_disabled()); | ||
| 185 | |||
| 186 | if (unlikely(data->balance != 0)) { | ||
| 187 | kmemcheck_show_all(); | ||
| 188 | kmemcheck_error_save_bug(regs); | ||
| 189 | data->balance = 0; | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * None of the addresses actually belonged to kmemcheck. Note that | ||
| 195 | * this is not an error. | ||
| 196 | */ | ||
| 197 | if (kmemcheck_show_all() == 0) | ||
| 198 | return; | ||
| 199 | |||
| 200 | ++data->balance; | ||
| 201 | |||
| 202 | /* | ||
| 203 | * The IF needs to be cleared as well, so that the faulting | ||
| 204 | * instruction can run "uninterrupted". Otherwise, we might take | ||
| 205 | * an interrupt and start executing that before we've had a chance | ||
| 206 | * to hide the page again. | ||
| 207 | * | ||
| 208 | * NOTE: In the rare case of multiple faults, we must not override | ||
| 209 | * the original flags: | ||
| 210 | */ | ||
| 211 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
| 212 | data->flags = regs->flags; | ||
| 213 | |||
| 214 | regs->flags |= X86_EFLAGS_TF; | ||
| 215 | regs->flags &= ~X86_EFLAGS_IF; | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * Called from the #DB handler. | ||
| 220 | */ | ||
| 221 | void kmemcheck_hide(struct pt_regs *regs) | ||
| 222 | { | ||
| 223 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 224 | int n; | ||
| 225 | |||
| 226 | BUG_ON(!irqs_disabled()); | ||
| 227 | |||
| 228 | if (data->balance == 0) | ||
| 229 | return; | ||
| 230 | |||
| 231 | if (unlikely(data->balance != 1)) { | ||
| 232 | kmemcheck_show_all(); | ||
| 233 | kmemcheck_error_save_bug(regs); | ||
| 234 | data->n_addrs = 0; | ||
| 235 | data->balance = 0; | ||
| 236 | |||
| 237 | if (!(data->flags & X86_EFLAGS_TF)) | ||
| 238 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 239 | if (data->flags & X86_EFLAGS_IF) | ||
| 240 | regs->flags |= X86_EFLAGS_IF; | ||
| 241 | return; | ||
| 242 | } | ||
| 243 | |||
| 244 | if (kmemcheck_enabled) | ||
| 245 | n = kmemcheck_hide_all(); | ||
| 246 | else | ||
| 247 | n = kmemcheck_show_all(); | ||
| 248 | |||
| 249 | if (n == 0) | ||
| 250 | return; | ||
| 251 | |||
| 252 | --data->balance; | ||
| 253 | |||
| 254 | data->n_addrs = 0; | ||
| 255 | |||
| 256 | if (!(data->flags & X86_EFLAGS_TF)) | ||
| 257 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 258 | if (data->flags & X86_EFLAGS_IF) | ||
| 259 | regs->flags |= X86_EFLAGS_IF; | ||
| 260 | } | ||
| 261 | |||
| 262 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
| 263 | { | ||
| 264 | unsigned int i; | ||
| 265 | |||
| 266 | for (i = 0; i < n; ++i) { | ||
| 267 | unsigned long address; | ||
| 268 | pte_t *pte; | ||
| 269 | unsigned int level; | ||
| 270 | |||
| 271 | address = (unsigned long) page_address(&p[i]); | ||
| 272 | pte = lookup_address(address, &level); | ||
| 273 | BUG_ON(!pte); | ||
| 274 | BUG_ON(level != PG_LEVEL_4K); | ||
| 275 | |||
| 276 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
| 277 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
| 278 | __flush_tlb_one(address); | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | bool kmemcheck_page_is_tracked(struct page *p) | ||
| 283 | { | ||
| 284 | /* This will also check the "hidden" flag of the PTE. */ | ||
| 285 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
| 286 | } | ||
| 287 | |||
| 288 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
| 289 | { | ||
| 290 | unsigned int i; | ||
| 291 | |||
| 292 | for (i = 0; i < n; ++i) { | ||
| 293 | unsigned long address; | ||
| 294 | pte_t *pte; | ||
| 295 | unsigned int level; | ||
| 296 | |||
| 297 | address = (unsigned long) page_address(&p[i]); | ||
| 298 | pte = lookup_address(address, &level); | ||
| 299 | BUG_ON(!pte); | ||
| 300 | BUG_ON(level != PG_LEVEL_4K); | ||
| 301 | |||
| 302 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
| 303 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
| 304 | __flush_tlb_one(address); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | /* Access may NOT cross page boundary */ | ||
| 309 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
| 310 | unsigned long addr, unsigned int size) | ||
| 311 | { | ||
| 312 | void *shadow; | ||
| 313 | enum kmemcheck_shadow status; | ||
| 314 | |||
| 315 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 316 | if (!shadow) | ||
| 317 | return; | ||
| 318 | |||
| 319 | kmemcheck_save_addr(addr); | ||
| 320 | status = kmemcheck_shadow_test(shadow, size); | ||
| 321 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
| 322 | return; | ||
| 323 | |||
| 324 | if (kmemcheck_enabled) | ||
| 325 | kmemcheck_error_save(status, addr, size, regs); | ||
| 326 | |||
| 327 | if (kmemcheck_enabled == 2) | ||
| 328 | kmemcheck_enabled = 0; | ||
| 329 | |||
| 330 | /* Don't warn about it again. */ | ||
| 331 | kmemcheck_shadow_set(shadow, size); | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Access may cross page boundary */ | ||
| 335 | static void kmemcheck_read(struct pt_regs *regs, | ||
| 336 | unsigned long addr, unsigned int size) | ||
| 337 | { | ||
| 338 | unsigned long page = addr & PAGE_MASK; | ||
| 339 | unsigned long next_addr = addr + size - 1; | ||
| 340 | unsigned long next_page = next_addr & PAGE_MASK; | ||
| 341 | |||
| 342 | if (likely(page == next_page)) { | ||
| 343 | kmemcheck_read_strict(regs, addr, size); | ||
| 344 | return; | ||
| 345 | } | ||
| 346 | |||
| 347 | /* | ||
| 348 | * What we do is basically to split the access across the | ||
| 349 | * two pages and handle each part separately. Yes, this means | ||
| 350 | * that we may now see reads that are 3 + 5 bytes, for | ||
| 351 | * example (and if both are uninitialized, there will be two | ||
| 352 | * reports), but it makes the code a lot simpler. | ||
| 353 | */ | ||
| 354 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
| 355 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
| 356 | } | ||
| 357 | |||
| 358 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
| 359 | unsigned long addr, unsigned int size) | ||
| 360 | { | ||
| 361 | void *shadow; | ||
| 362 | |||
| 363 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 364 | if (!shadow) | ||
| 365 | return; | ||
| 366 | |||
| 367 | kmemcheck_save_addr(addr); | ||
| 368 | kmemcheck_shadow_set(shadow, size); | ||
| 369 | } | ||
| 370 | |||
| 371 | static void kmemcheck_write(struct pt_regs *regs, | ||
| 372 | unsigned long addr, unsigned int size) | ||
| 373 | { | ||
| 374 | unsigned long page = addr & PAGE_MASK; | ||
| 375 | unsigned long next_addr = addr + size - 1; | ||
| 376 | unsigned long next_page = next_addr & PAGE_MASK; | ||
| 377 | |||
| 378 | if (likely(page == next_page)) { | ||
| 379 | kmemcheck_write_strict(regs, addr, size); | ||
| 380 | return; | ||
| 381 | } | ||
| 382 | |||
| 383 | /* See comment in kmemcheck_read(). */ | ||
| 384 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
| 385 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
| 386 | } | ||
| 387 | |||
| 388 | /* | ||
| 389 | * Copying is hard. We have two addresses, each of which may be split across | ||
| 390 | * a page (and each page will have different shadow addresses). | ||
| 391 | */ | ||
| 392 | static void kmemcheck_copy(struct pt_regs *regs, | ||
| 393 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
| 394 | { | ||
| 395 | uint8_t shadow[8]; | ||
| 396 | enum kmemcheck_shadow status; | ||
| 397 | |||
| 398 | unsigned long page; | ||
| 399 | unsigned long next_addr; | ||
| 400 | unsigned long next_page; | ||
| 401 | |||
| 402 | uint8_t *x; | ||
| 403 | unsigned int i; | ||
| 404 | unsigned int n; | ||
| 405 | |||
| 406 | BUG_ON(size > sizeof(shadow)); | ||
| 407 | |||
| 408 | page = src_addr & PAGE_MASK; | ||
| 409 | next_addr = src_addr + size - 1; | ||
| 410 | next_page = next_addr & PAGE_MASK; | ||
| 411 | |||
| 412 | if (likely(page == next_page)) { | ||
| 413 | /* Same page */ | ||
| 414 | x = kmemcheck_shadow_lookup(src_addr); | ||
| 415 | if (x) { | ||
| 416 | kmemcheck_save_addr(src_addr); | ||
| 417 | for (i = 0; i < size; ++i) | ||
| 418 | shadow[i] = x[i]; | ||
| 419 | } else { | ||
| 420 | for (i = 0; i < size; ++i) | ||
| 421 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 422 | } | ||
| 423 | } else { | ||
| 424 | n = next_page - src_addr; | ||
| 425 | BUG_ON(n > sizeof(shadow)); | ||
| 426 | |||
| 427 | /* First page */ | ||
| 428 | x = kmemcheck_shadow_lookup(src_addr); | ||
| 429 | if (x) { | ||
| 430 | kmemcheck_save_addr(src_addr); | ||
| 431 | for (i = 0; i < n; ++i) | ||
| 432 | shadow[i] = x[i]; | ||
| 433 | } else { | ||
| 434 | /* Not tracked */ | ||
| 435 | for (i = 0; i < n; ++i) | ||
| 436 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 437 | } | ||
| 438 | |||
| 439 | /* Second page */ | ||
| 440 | x = kmemcheck_shadow_lookup(next_page); | ||
| 441 | if (x) { | ||
| 442 | kmemcheck_save_addr(next_page); | ||
| 443 | for (i = n; i < size; ++i) | ||
| 444 | shadow[i] = x[i - n]; | ||
| 445 | } else { | ||
| 446 | /* Not tracked */ | ||
| 447 | for (i = n; i < size; ++i) | ||
| 448 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | page = dst_addr & PAGE_MASK; | ||
| 453 | next_addr = dst_addr + size - 1; | ||
| 454 | next_page = next_addr & PAGE_MASK; | ||
| 455 | |||
| 456 | if (likely(page == next_page)) { | ||
| 457 | /* Same page */ | ||
| 458 | x = kmemcheck_shadow_lookup(dst_addr); | ||
| 459 | if (x) { | ||
| 460 | kmemcheck_save_addr(dst_addr); | ||
| 461 | for (i = 0; i < size; ++i) { | ||
| 462 | x[i] = shadow[i]; | ||
| 463 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 464 | } | ||
| 465 | } | ||
| 466 | } else { | ||
| 467 | n = next_page - dst_addr; | ||
| 468 | BUG_ON(n > sizeof(shadow)); | ||
| 469 | |||
| 470 | /* First page */ | ||
| 471 | x = kmemcheck_shadow_lookup(dst_addr); | ||
| 472 | if (x) { | ||
| 473 | kmemcheck_save_addr(dst_addr); | ||
| 474 | for (i = 0; i < n; ++i) { | ||
| 475 | x[i] = shadow[i]; | ||
| 476 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | /* Second page */ | ||
| 481 | x = kmemcheck_shadow_lookup(next_page); | ||
| 482 | if (x) { | ||
| 483 | kmemcheck_save_addr(next_page); | ||
| 484 | for (i = n; i < size; ++i) { | ||
| 485 | x[i - n] = shadow[i]; | ||
| 486 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | status = kmemcheck_shadow_test(shadow, size); | ||
| 492 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
| 493 | return; | ||
| 494 | |||
| 495 | if (kmemcheck_enabled) | ||
| 496 | kmemcheck_error_save(status, src_addr, size, regs); | ||
| 497 | |||
| 498 | if (kmemcheck_enabled == 2) | ||
| 499 | kmemcheck_enabled = 0; | ||
| 500 | } | ||
| 501 | |||
| 502 | enum kmemcheck_method { | ||
| 503 | KMEMCHECK_READ, | ||
| 504 | KMEMCHECK_WRITE, | ||
| 505 | }; | ||
| 506 | |||
| 507 | static void kmemcheck_access(struct pt_regs *regs, | ||
| 508 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
| 509 | { | ||
| 510 | const uint8_t *insn; | ||
| 511 | const uint8_t *insn_primary; | ||
| 512 | unsigned int size; | ||
| 513 | |||
| 514 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
| 515 | |||
| 516 | /* Recursive fault -- ouch. */ | ||
| 517 | if (data->busy) { | ||
| 518 | kmemcheck_show_addr(fallback_address); | ||
| 519 | kmemcheck_error_save_bug(regs); | ||
| 520 | return; | ||
| 521 | } | ||
| 522 | |||
| 523 | data->busy = true; | ||
| 524 | |||
| 525 | insn = (const uint8_t *) regs->ip; | ||
| 526 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
| 527 | |||
| 528 | kmemcheck_opcode_decode(insn, &size); | ||
| 529 | |||
| 530 | switch (insn_primary[0]) { | ||
| 531 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
| 532 | /* AND, OR, XOR */ | ||
| 533 | /* | ||
| 534 | * Unfortunately, these instructions have to be excluded from | ||
| 535 | * our regular checking since they access only some (and not | ||
| 536 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
| 537 | */ | ||
| 538 | case 0x80: | ||
| 539 | case 0x81: | ||
| 540 | case 0x82: | ||
| 541 | case 0x83: | ||
| 542 | switch ((insn_primary[1] >> 3) & 7) { | ||
| 543 | /* OR */ | ||
| 544 | case 1: | ||
| 545 | /* AND */ | ||
| 546 | case 4: | ||
| 547 | /* XOR */ | ||
| 548 | case 6: | ||
| 549 | kmemcheck_write(regs, fallback_address, size); | ||
| 550 | goto out; | ||
| 551 | |||
| 552 | /* ADD */ | ||
| 553 | case 0: | ||
| 554 | /* ADC */ | ||
| 555 | case 2: | ||
| 556 | /* SBB */ | ||
| 557 | case 3: | ||
| 558 | /* SUB */ | ||
| 559 | case 5: | ||
| 560 | /* CMP */ | ||
| 561 | case 7: | ||
| 562 | break; | ||
| 563 | } | ||
| 564 | break; | ||
| 565 | #endif | ||
| 566 | |||
| 567 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
| 568 | case 0xa4: | ||
| 569 | case 0xa5: | ||
| 570 | /* | ||
| 571 | * These instructions are special because they take two | ||
| 572 | * addresses, but we only get one page fault. | ||
| 573 | */ | ||
| 574 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
| 575 | goto out; | ||
| 576 | |||
| 577 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
| 578 | case 0xa6: | ||
| 579 | case 0xa7: | ||
| 580 | kmemcheck_read(regs, regs->si, size); | ||
| 581 | kmemcheck_read(regs, regs->di, size); | ||
| 582 | goto out; | ||
| 583 | } | ||
| 584 | |||
| 585 | /* | ||
| 586 | * If the opcode isn't special in any way, we use the data from the | ||
| 587 | * page fault handler to determine the address and type of memory | ||
| 588 | * access. | ||
| 589 | */ | ||
| 590 | switch (fallback_method) { | ||
| 591 | case KMEMCHECK_READ: | ||
| 592 | kmemcheck_read(regs, fallback_address, size); | ||
| 593 | goto out; | ||
| 594 | case KMEMCHECK_WRITE: | ||
| 595 | kmemcheck_write(regs, fallback_address, size); | ||
| 596 | goto out; | ||
| 597 | } | ||
| 598 | |||
| 599 | out: | ||
| 600 | data->busy = false; | ||
| 601 | } | ||
| 602 | |||
| 603 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
| 604 | unsigned long error_code) | ||
| 605 | { | ||
| 606 | pte_t *pte; | ||
| 607 | |||
| 608 | /* | ||
| 609 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
| 610 | * mode or non-kernel code segments will _never_ access kernel | ||
| 611 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
| 612 | * invoking kmemcheck for PnP BIOS calls. | ||
| 613 | */ | ||
| 614 | if (regs->flags & X86_VM_MASK) | ||
| 615 | return false; | ||
| 616 | if (regs->cs != __KERNEL_CS) | ||
| 617 | return false; | ||
| 618 | |||
| 619 | pte = kmemcheck_pte_lookup(address); | ||
| 620 | if (!pte) | ||
| 621 | return false; | ||
| 622 | |||
| 623 | if (error_code & 2) | ||
| 624 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
| 625 | else | ||
| 626 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
| 627 | |||
| 628 | kmemcheck_show(regs); | ||
| 629 | return true; | ||
| 630 | } | ||
| 631 | |||
| 632 | bool kmemcheck_trap(struct pt_regs *regs) | ||
| 633 | { | ||
| 634 | if (!kmemcheck_active(regs)) | ||
| 635 | return false; | ||
| 636 | |||
| 637 | /* We're done. */ | ||
| 638 | kmemcheck_hide(regs); | ||
| 639 | return true; | ||
| 640 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 000000000000..63c19e27aa6f --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | #include <linux/types.h> | ||
| 2 | |||
| 3 | #include "opcode.h" | ||
| 4 | |||
| 5 | static bool opcode_is_prefix(uint8_t b) | ||
| 6 | { | ||
| 7 | return | ||
| 8 | /* Group 1 */ | ||
| 9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
| 10 | /* Group 2 */ | ||
| 11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
| 12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | ||
| 13 | /* Group 3 */ | ||
| 14 | || b == 0x66 | ||
| 15 | /* Group 4 */ | ||
| 16 | || b == 0x67; | ||
| 17 | } | ||
| 18 | |||
| 19 | #ifdef CONFIG_X86_64 | ||
| 20 | static bool opcode_is_rex_prefix(uint8_t b) | ||
| 21 | { | ||
| 22 | return (b & 0xf0) == 0x40; | ||
| 23 | } | ||
| 24 | #else | ||
| 25 | static bool opcode_is_rex_prefix(uint8_t b) | ||
| 26 | { | ||
| 27 | return false; | ||
| 28 | } | ||
| 29 | #endif | ||
| 30 | |||
| 31 | #define REX_W (1 << 3) | ||
| 32 | |||
| 33 | /* | ||
| 34 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
| 35 | * load/store that caused our #PF and this should work for all the opcodes | ||
| 36 | * that we care about. Moreover, the ones who invented this instruction set | ||
| 37 | * should be shot. | ||
| 38 | */ | ||
| 39 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
| 40 | { | ||
| 41 | /* Default operand size */ | ||
| 42 | int operand_size_override = 4; | ||
| 43 | |||
| 44 | /* prefixes */ | ||
| 45 | for (; opcode_is_prefix(*op); ++op) { | ||
| 46 | if (*op == 0x66) | ||
| 47 | operand_size_override = 2; | ||
| 48 | } | ||
| 49 | |||
| 50 | /* REX prefix */ | ||
| 51 | if (opcode_is_rex_prefix(*op)) { | ||
| 52 | uint8_t rex = *op; | ||
| 53 | |||
| 54 | ++op; | ||
| 55 | if (rex & REX_W) { | ||
| 56 | switch (*op) { | ||
| 57 | case 0x63: | ||
| 58 | *size = 4; | ||
| 59 | return; | ||
| 60 | case 0x0f: | ||
| 61 | ++op; | ||
| 62 | |||
| 63 | switch (*op) { | ||
| 64 | case 0xb6: | ||
| 65 | case 0xbe: | ||
| 66 | *size = 1; | ||
| 67 | return; | ||
| 68 | case 0xb7: | ||
| 69 | case 0xbf: | ||
| 70 | *size = 2; | ||
| 71 | return; | ||
| 72 | } | ||
| 73 | |||
| 74 | break; | ||
| 75 | } | ||
| 76 | |||
| 77 | *size = 8; | ||
| 78 | return; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | /* escape opcode */ | ||
| 83 | if (*op == 0x0f) { | ||
| 84 | ++op; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * This is move with zero-extend and sign-extend, respectively; | ||
| 88 | * we don't have to think about 0xb6/0xbe, because this is | ||
| 89 | * already handled in the conditional below. | ||
| 90 | */ | ||
| 91 | if (*op == 0xb7 || *op == 0xbf) | ||
| 92 | operand_size_override = 2; | ||
| 93 | } | ||
| 94 | |||
| 95 | *size = (*op & 1) ? operand_size_override : 1; | ||
| 96 | } | ||
| 97 | |||
| 98 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
| 99 | { | ||
| 100 | /* skip prefixes */ | ||
| 101 | while (opcode_is_prefix(*op)) | ||
| 102 | ++op; | ||
| 103 | if (opcode_is_rex_prefix(*op)) | ||
| 104 | ++op; | ||
| 105 | return op; | ||
| 106 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 000000000000..6956aad66b5b --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
| 2 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | |||
| 6 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
| 7 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
| 8 | |||
| 9 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 000000000000..4ead26eeaf96 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #include <linux/mm.h> | ||
| 2 | |||
| 3 | #include <asm/pgtable.h> | ||
| 4 | |||
| 5 | #include "pte.h" | ||
| 6 | |||
| 7 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
| 8 | { | ||
| 9 | pte_t *pte; | ||
| 10 | unsigned int level; | ||
| 11 | |||
| 12 | pte = lookup_address(address, &level); | ||
| 13 | if (!pte) | ||
| 14 | return NULL; | ||
| 15 | if (level != PG_LEVEL_4K) | ||
| 16 | return NULL; | ||
| 17 | if (!pte_hidden(*pte)) | ||
| 18 | return NULL; | ||
| 19 | |||
| 20 | return pte; | ||
| 21 | } | ||
| 22 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 000000000000..9f5966456492 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
| 2 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
| 3 | |||
| 4 | #include <linux/mm.h> | ||
| 5 | |||
| 6 | #include <asm/pgtable.h> | ||
| 7 | |||
| 8 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
| 9 | |||
| 10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c new file mode 100644 index 000000000000..036efbea8b28 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | |||
| 3 | #include "opcode.h" | ||
| 4 | #include "selftest.h" | ||
| 5 | |||
| 6 | struct selftest_opcode { | ||
| 7 | unsigned int expected_size; | ||
| 8 | const uint8_t *insn; | ||
| 9 | const char *desc; | ||
| 10 | }; | ||
| 11 | |||
| 12 | static const struct selftest_opcode selftest_opcodes[] = { | ||
| 13 | /* REP MOVS */ | ||
| 14 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
| 15 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
| 16 | |||
| 17 | /* MOVZX / MOVZXD */ | ||
| 18 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
| 19 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
| 20 | |||
| 21 | /* MOVSX / MOVSXD */ | ||
| 22 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
| 23 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
| 24 | |||
| 25 | #ifdef CONFIG_X86_64 | ||
| 26 | /* MOVZX / MOVZXD */ | ||
| 27 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
| 28 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
| 29 | |||
| 30 | /* MOVSX / MOVSXD */ | ||
| 31 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
| 32 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
| 33 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
| 34 | #endif | ||
| 35 | }; | ||
| 36 | |||
| 37 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
| 38 | { | ||
| 39 | unsigned size; | ||
| 40 | |||
| 41 | kmemcheck_opcode_decode(op->insn, &size); | ||
| 42 | |||
| 43 | if (size == op->expected_size) | ||
| 44 | return true; | ||
| 45 | |||
| 46 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
| 47 | op->desc, op->expected_size, size); | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | |||
| 51 | static bool selftest_opcodes_all(void) | ||
| 52 | { | ||
| 53 | bool pass = true; | ||
| 54 | unsigned int i; | ||
| 55 | |||
| 56 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
| 57 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
| 58 | |||
| 59 | return pass; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool kmemcheck_selftest(void) | ||
| 63 | { | ||
| 64 | bool pass = true; | ||
| 65 | |||
| 66 | pass = pass && selftest_opcodes_all(); | ||
| 67 | |||
| 68 | return pass; | ||
| 69 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h new file mode 100644 index 000000000000..8fed4fe11f95 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
| 2 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
| 3 | |||
| 4 | bool kmemcheck_selftest(void); | ||
| 5 | |||
| 6 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 000000000000..e773b6bd0079 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
| @@ -0,0 +1,162 @@ | |||
| 1 | #include <linux/kmemcheck.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/mm.h> | ||
| 4 | #include <linux/module.h> | ||
| 5 | |||
| 6 | #include <asm/page.h> | ||
| 7 | #include <asm/pgtable.h> | ||
| 8 | |||
| 9 | #include "pte.h" | ||
| 10 | #include "shadow.h" | ||
| 11 | |||
| 12 | /* | ||
| 13 | * Return the shadow address for the given address. Returns NULL if the | ||
| 14 | * address is not tracked. | ||
| 15 | * | ||
| 16 | * We need to be extremely careful not to follow any invalid pointers, | ||
| 17 | * because this function can be called for *any* possible address. | ||
| 18 | */ | ||
| 19 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
| 20 | { | ||
| 21 | pte_t *pte; | ||
| 22 | struct page *page; | ||
| 23 | |||
| 24 | if (!virt_addr_valid(address)) | ||
| 25 | return NULL; | ||
| 26 | |||
| 27 | pte = kmemcheck_pte_lookup(address); | ||
| 28 | if (!pte) | ||
| 29 | return NULL; | ||
| 30 | |||
| 31 | page = virt_to_page(address); | ||
| 32 | if (!page->shadow) | ||
| 33 | return NULL; | ||
| 34 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
| 35 | } | ||
| 36 | |||
| 37 | static void mark_shadow(void *address, unsigned int n, | ||
| 38 | enum kmemcheck_shadow status) | ||
| 39 | { | ||
| 40 | unsigned long addr = (unsigned long) address; | ||
| 41 | unsigned long last_addr = addr + n - 1; | ||
| 42 | unsigned long page = addr & PAGE_MASK; | ||
| 43 | unsigned long last_page = last_addr & PAGE_MASK; | ||
| 44 | unsigned int first_n; | ||
| 45 | void *shadow; | ||
| 46 | |||
| 47 | /* If the memory range crosses a page boundary, stop there. */ | ||
| 48 | if (page == last_page) | ||
| 49 | first_n = n; | ||
| 50 | else | ||
| 51 | first_n = page + PAGE_SIZE - addr; | ||
| 52 | |||
| 53 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 54 | if (shadow) | ||
| 55 | memset(shadow, status, first_n); | ||
| 56 | |||
| 57 | addr += first_n; | ||
| 58 | n -= first_n; | ||
| 59 | |||
| 60 | /* Do full-page memset()s. */ | ||
| 61 | while (n >= PAGE_SIZE) { | ||
| 62 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 63 | if (shadow) | ||
| 64 | memset(shadow, status, PAGE_SIZE); | ||
| 65 | |||
| 66 | addr += PAGE_SIZE; | ||
| 67 | n -= PAGE_SIZE; | ||
| 68 | } | ||
| 69 | |||
| 70 | /* Do the remaining page, if any. */ | ||
| 71 | if (n > 0) { | ||
| 72 | shadow = kmemcheck_shadow_lookup(addr); | ||
| 73 | if (shadow) | ||
| 74 | memset(shadow, status, n); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
| 79 | { | ||
| 80 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
| 81 | } | ||
| 82 | |||
| 83 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
| 84 | { | ||
| 85 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
| 86 | } | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Fill the shadow memory of the given address such that the memory at that | ||
| 90 | * address is marked as being initialized. | ||
| 91 | */ | ||
| 92 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
| 93 | { | ||
| 94 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
| 95 | } | ||
| 96 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
| 97 | |||
| 98 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
| 99 | { | ||
| 100 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
| 101 | } | ||
| 102 | |||
| 103 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
| 104 | { | ||
| 105 | unsigned int i; | ||
| 106 | |||
| 107 | for (i = 0; i < n; ++i) | ||
| 108 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
| 109 | } | ||
| 110 | |||
| 111 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
| 112 | { | ||
| 113 | unsigned int i; | ||
| 114 | |||
| 115 | for (i = 0; i < n; ++i) | ||
| 116 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
| 117 | } | ||
| 118 | |||
| 119 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
| 120 | { | ||
| 121 | unsigned int i; | ||
| 122 | |||
| 123 | for (i = 0; i < n; ++i) | ||
| 124 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
| 125 | } | ||
| 126 | |||
| 127 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
| 128 | { | ||
| 129 | uint8_t *x; | ||
| 130 | unsigned int i; | ||
| 131 | |||
| 132 | x = shadow; | ||
| 133 | |||
| 134 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
| 135 | /* | ||
| 136 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
| 137 | * code to access neighboring bytes. | ||
| 138 | */ | ||
| 139 | for (i = 0; i < size; ++i) { | ||
| 140 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
| 141 | return x[i]; | ||
| 142 | } | ||
| 143 | #else | ||
| 144 | /* All bytes must be initialized. */ | ||
| 145 | for (i = 0; i < size; ++i) { | ||
| 146 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
| 147 | return x[i]; | ||
| 148 | } | ||
| 149 | #endif | ||
| 150 | |||
| 151 | return x[0]; | ||
| 152 | } | ||
| 153 | |||
| 154 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
| 155 | { | ||
| 156 | uint8_t *x; | ||
| 157 | unsigned int i; | ||
| 158 | |||
| 159 | x = shadow; | ||
| 160 | for (i = 0; i < size; ++i) | ||
| 161 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
| 162 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 000000000000..af46d9ab9d86 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
| 2 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
| 3 | |||
| 4 | enum kmemcheck_shadow { | ||
| 5 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
| 6 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
| 7 | KMEMCHECK_SHADOW_INITIALIZED, | ||
| 8 | KMEMCHECK_SHADOW_FREED, | ||
| 9 | }; | ||
| 10 | |||
| 11 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
| 12 | |||
| 13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
| 14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
| 15 | |||
| 16 | #endif | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6ce9518fe2ac..3cfe9ced8a4c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -470,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
| 470 | 470 | ||
| 471 | if (!debug_pagealloc) | 471 | if (!debug_pagealloc) |
| 472 | spin_unlock(&cpa_lock); | 472 | spin_unlock(&cpa_lock); |
| 473 | base = alloc_pages(GFP_KERNEL, 0); | 473 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
| 474 | if (!debug_pagealloc) | 474 | if (!debug_pagealloc) |
| 475 | spin_lock(&cpa_lock); | 475 | spin_lock(&cpa_lock); |
| 476 | if (!base) | 476 | if (!base) |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7aa03a5389f5..8e43bdd45456 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -4,9 +4,11 @@ | |||
| 4 | #include <asm/tlb.h> | 4 | #include <asm/tlb.h> |
| 5 | #include <asm/fixmap.h> | 5 | #include <asm/fixmap.h> |
| 6 | 6 | ||
| 7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | ||
| 8 | |||
| 7 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
| 8 | { | 10 | { |
| 9 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 11 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
| 10 | } | 12 | } |
| 11 | 13 | ||
| 12 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | 14 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) |
| @@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
| 14 | struct page *pte; | 16 | struct page *pte; |
| 15 | 17 | ||
| 16 | #ifdef CONFIG_HIGHPTE | 18 | #ifdef CONFIG_HIGHPTE |
| 17 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | 19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); |
| 18 | #else | 20 | #else |
| 19 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 21 | pte = alloc_pages(PGALLOC_GFP, 0); |
| 20 | #endif | 22 | #endif |
| 21 | if (pte) | 23 | if (pte) |
| 22 | pgtable_page_ctor(pte); | 24 | pgtable_page_ctor(pte); |
| @@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[]) | |||
| 161 | bool failed = false; | 163 | bool failed = false; |
| 162 | 164 | ||
| 163 | for(i = 0; i < PREALLOCATED_PMDS; i++) { | 165 | for(i = 0; i < PREALLOCATED_PMDS; i++) { |
| 164 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | 166 | pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); |
| 165 | if (pmd == NULL) | 167 | if (pmd == NULL) |
| 166 | failed = true; | 168 | failed = true; |
| 167 | pmds[i] = pmd; | 169 | pmds[i] = pmd; |
| @@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 228 | pmd_t *pmds[PREALLOCATED_PMDS]; | 230 | pmd_t *pmds[PREALLOCATED_PMDS]; |
| 229 | unsigned long flags; | 231 | unsigned long flags; |
| 230 | 232 | ||
| 231 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 233 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
| 232 | 234 | ||
| 233 | if (pgd == NULL) | 235 | if (pgd == NULL) |
| 234 | goto out; | 236 | goto out; |
diff --git a/crypto/xor.c b/crypto/xor.c index 996b6ee57d9e..fc5b836f3430 100644 --- a/crypto/xor.c +++ b/crypto/xor.c | |||
| @@ -101,7 +101,12 @@ calibrate_xor_blocks(void) | |||
| 101 | void *b1, *b2; | 101 | void *b1, *b2; |
| 102 | struct xor_block_template *f, *fastest; | 102 | struct xor_block_template *f, *fastest; |
| 103 | 103 | ||
| 104 | b1 = (void *) __get_free_pages(GFP_KERNEL, 2); | 104 | /* |
| 105 | * Note: Since the memory is not actually used for _anything_ but to | ||
| 106 | * test the XOR speed, we don't really want kmemcheck to warn about | ||
| 107 | * reading uninitialized bytes here. | ||
| 108 | */ | ||
| 109 | b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2); | ||
| 105 | if (!b1) { | 110 | if (!b1) { |
| 106 | printk(KERN_WARNING "xor: Yikes! No memory available.\n"); | 111 | printk(KERN_WARNING "xor: Yikes! No memory available.\n"); |
| 107 | return -ENOMEM; | 112 | return -ENOMEM; |
diff --git a/drivers/ieee1394/csr1212.c b/drivers/ieee1394/csr1212.c index a6dfeb0b3372..e76cac64c533 100644 --- a/drivers/ieee1394/csr1212.c +++ b/drivers/ieee1394/csr1212.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #include <linux/errno.h> | 36 | #include <linux/errno.h> |
| 37 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
| 38 | #include <linux/kmemcheck.h> | ||
| 38 | #include <linux/string.h> | 39 | #include <linux/string.h> |
| 39 | #include <asm/bug.h> | 40 | #include <asm/bug.h> |
| 40 | #include <asm/byteorder.h> | 41 | #include <asm/byteorder.h> |
| @@ -387,6 +388,7 @@ csr1212_new_descriptor_leaf(u8 dtype, u32 specifier_id, | |||
| 387 | if (!kv) | 388 | if (!kv) |
| 388 | return NULL; | 389 | return NULL; |
| 389 | 390 | ||
| 391 | kmemcheck_annotate_variable(kv->value.leaf.data[0]); | ||
| 390 | CSR1212_DESCRIPTOR_LEAF_SET_TYPE(kv, dtype); | 392 | CSR1212_DESCRIPTOR_LEAF_SET_TYPE(kv, dtype); |
| 391 | CSR1212_DESCRIPTOR_LEAF_SET_SPECIFIER_ID(kv, specifier_id); | 393 | CSR1212_DESCRIPTOR_LEAF_SET_SPECIFIER_ID(kv, specifier_id); |
| 392 | 394 | ||
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index a6d55bebe61a..5122b5a8aa2d 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/bitmap.h> | 11 | #include <linux/bitmap.h> |
| 12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
| 13 | #include <linux/kmemcheck.h> | ||
| 13 | #include <linux/list.h> | 14 | #include <linux/list.h> |
| 14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 15 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
| @@ -39,7 +40,10 @@ struct nodemgr_csr_info { | |||
| 39 | struct hpsb_host *host; | 40 | struct hpsb_host *host; |
| 40 | nodeid_t nodeid; | 41 | nodeid_t nodeid; |
| 41 | unsigned int generation; | 42 | unsigned int generation; |
| 43 | |||
| 44 | kmemcheck_bitfield_begin(flags); | ||
| 42 | unsigned int speed_unverified:1; | 45 | unsigned int speed_unverified:1; |
| 46 | kmemcheck_bitfield_end(flags); | ||
| 43 | }; | 47 | }; |
| 44 | 48 | ||
| 45 | 49 | ||
| @@ -1293,6 +1297,7 @@ static void nodemgr_node_scan_one(struct hpsb_host *host, | |||
| 1293 | u8 *speed; | 1297 | u8 *speed; |
| 1294 | 1298 | ||
| 1295 | ci = kmalloc(sizeof(*ci), GFP_KERNEL); | 1299 | ci = kmalloc(sizeof(*ci), GFP_KERNEL); |
| 1300 | kmemcheck_annotate_bitfield(ci, flags); | ||
| 1296 | if (!ci) | 1301 | if (!ci) |
| 1297 | return; | 1302 | return; |
| 1298 | 1303 | ||
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index 0207dd59090d..b5346b4db91a 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
| 16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/kmemcheck.h> | ||
| 18 | #include <linux/ctype.h> | 19 | #include <linux/ctype.h> |
| 19 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
| 20 | #include <linux/idr.h> | 21 | #include <linux/idr.h> |
| @@ -891,6 +892,7 @@ struct c2port_device *c2port_device_register(char *name, | |||
| 891 | return ERR_PTR(-EINVAL); | 892 | return ERR_PTR(-EINVAL); |
| 892 | 893 | ||
| 893 | c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); | 894 | c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); |
| 895 | kmemcheck_annotate_bitfield(c2dev, flags); | ||
| 894 | if (unlikely(!c2dev)) | 896 | if (unlikely(!c2dev)) |
| 895 | return ERR_PTR(-ENOMEM); | 897 | return ERR_PTR(-ENOMEM); |
| 896 | 898 | ||
diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 7b5a2388ba67..2a5cd867c365 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #include <linux/device.h> | 12 | #include <linux/device.h> |
| 13 | #include <linux/kmemcheck.h> | ||
| 13 | 14 | ||
| 14 | #define C2PORT_NAME_LEN 32 | 15 | #define C2PORT_NAME_LEN 32 |
| 15 | 16 | ||
| @@ -20,8 +21,10 @@ | |||
| 20 | /* Main struct */ | 21 | /* Main struct */ |
| 21 | struct c2port_ops; | 22 | struct c2port_ops; |
| 22 | struct c2port_device { | 23 | struct c2port_device { |
| 24 | kmemcheck_bitfield_begin(flags); | ||
| 23 | unsigned int access:1; | 25 | unsigned int access:1; |
| 24 | unsigned int flash_access:1; | 26 | unsigned int flash_access:1; |
| 27 | kmemcheck_bitfield_end(flags); | ||
| 25 | 28 | ||
| 26 | int id; | 29 | int id; |
| 27 | char name[C2PORT_NAME_LEN]; | 30 | char name[C2PORT_NAME_LEN]; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index ede84fa7da5d..6d12174fbe11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -1919,8 +1919,9 @@ extern void __init vfs_caches_init(unsigned long); | |||
| 1919 | 1919 | ||
| 1920 | extern struct kmem_cache *names_cachep; | 1920 | extern struct kmem_cache *names_cachep; |
| 1921 | 1921 | ||
| 1922 | #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) | 1922 | #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) |
| 1923 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | 1923 | #define __getname() __getname_gfp(GFP_KERNEL) |
| 1924 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | ||
| 1924 | #ifndef CONFIG_AUDITSYSCALL | 1925 | #ifndef CONFIG_AUDITSYSCALL |
| 1925 | #define putname(name) __putname(name) | 1926 | #define putname(name) __putname(name) |
| 1926 | #else | 1927 | #else |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 3760e7c5de02..80e14b8c2e78 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
| @@ -52,7 +52,19 @@ struct vm_area_struct; | |||
| 52 | #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ | 52 | #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ |
| 53 | #define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ | 53 | #define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ |
| 54 | 54 | ||
| 55 | #define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */ | 55 | #ifdef CONFIG_KMEMCHECK |
| 56 | #define __GFP_NOTRACK ((__force gfp_t)0x200000u) /* Don't track with kmemcheck */ | ||
| 57 | #else | ||
| 58 | #define __GFP_NOTRACK ((__force gfp_t)0) | ||
| 59 | #endif | ||
| 60 | |||
| 61 | /* | ||
| 62 | * This may seem redundant, but it's a way of annotating false positives vs. | ||
| 63 | * allocations that simply cannot be supported (e.g. page tables). | ||
| 64 | */ | ||
| 65 | #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) | ||
| 66 | |||
| 67 | #define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */ | ||
| 56 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) | 68 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) |
| 57 | 69 | ||
| 58 | /* This equals 0, but use constants in case they ever change */ | 70 | /* This equals 0, but use constants in case they ever change */ |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c41e812e9d5e..2721f07e9354 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
| @@ -472,6 +472,20 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) | |||
| 472 | __tasklet_hi_schedule(t); | 472 | __tasklet_hi_schedule(t); |
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); | ||
| 476 | |||
| 477 | /* | ||
| 478 | * This version avoids touching any other tasklets. Needed for kmemcheck | ||
| 479 | * in order not to take any page faults while enqueueing this tasklet; | ||
| 480 | * consider VERY carefully whether you really need this or | ||
| 481 | * tasklet_hi_schedule()... | ||
| 482 | */ | ||
| 483 | static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
| 484 | { | ||
| 485 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) | ||
| 486 | __tasklet_hi_schedule_first(t); | ||
| 487 | } | ||
| 488 | |||
| 475 | 489 | ||
| 476 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) | 490 | static inline void tasklet_disable_nosync(struct tasklet_struct *t) |
| 477 | { | 491 | { |
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h new file mode 100644 index 000000000000..47b39b7c7e84 --- /dev/null +++ b/include/linux/kmemcheck.h | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | #ifndef LINUX_KMEMCHECK_H | ||
| 2 | #define LINUX_KMEMCHECK_H | ||
| 3 | |||
| 4 | #include <linux/mm_types.h> | ||
| 5 | #include <linux/types.h> | ||
| 6 | |||
| 7 | #ifdef CONFIG_KMEMCHECK | ||
| 8 | extern int kmemcheck_enabled; | ||
| 9 | |||
| 10 | /* The slab-related functions. */ | ||
| 11 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); | ||
| 12 | void kmemcheck_free_shadow(struct page *page, int order); | ||
| 13 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
| 14 | size_t size); | ||
| 15 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); | ||
| 16 | |||
| 17 | void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, | ||
| 18 | gfp_t gfpflags); | ||
| 19 | |||
| 20 | void kmemcheck_show_pages(struct page *p, unsigned int n); | ||
| 21 | void kmemcheck_hide_pages(struct page *p, unsigned int n); | ||
| 22 | |||
| 23 | bool kmemcheck_page_is_tracked(struct page *p); | ||
| 24 | |||
| 25 | void kmemcheck_mark_unallocated(void *address, unsigned int n); | ||
| 26 | void kmemcheck_mark_uninitialized(void *address, unsigned int n); | ||
| 27 | void kmemcheck_mark_initialized(void *address, unsigned int n); | ||
| 28 | void kmemcheck_mark_freed(void *address, unsigned int n); | ||
| 29 | |||
| 30 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); | ||
| 31 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); | ||
| 32 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); | ||
| 33 | |||
| 34 | int kmemcheck_show_addr(unsigned long address); | ||
| 35 | int kmemcheck_hide_addr(unsigned long address); | ||
| 36 | |||
| 37 | #else | ||
| 38 | #define kmemcheck_enabled 0 | ||
| 39 | |||
| 40 | static inline void | ||
| 41 | kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
| 42 | { | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline void | ||
| 46 | kmemcheck_free_shadow(struct page *page, int order) | ||
| 47 | { | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline void | ||
| 51 | kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
| 52 | size_t size) | ||
| 53 | { | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, | ||
| 57 | size_t size) | ||
| 58 | { | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline void kmemcheck_pagealloc_alloc(struct page *p, | ||
| 62 | unsigned int order, gfp_t gfpflags) | ||
| 63 | { | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline bool kmemcheck_page_is_tracked(struct page *p) | ||
| 67 | { | ||
| 68 | return false; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
| 72 | { | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
| 76 | { | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
| 80 | { | ||
| 81 | } | ||
| 82 | |||
| 83 | static inline void kmemcheck_mark_freed(void *address, unsigned int n) | ||
| 84 | { | ||
| 85 | } | ||
| 86 | |||
| 87 | static inline void kmemcheck_mark_unallocated_pages(struct page *p, | ||
| 88 | unsigned int n) | ||
| 89 | { | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline void kmemcheck_mark_uninitialized_pages(struct page *p, | ||
| 93 | unsigned int n) | ||
| 94 | { | ||
| 95 | } | ||
| 96 | |||
| 97 | static inline void kmemcheck_mark_initialized_pages(struct page *p, | ||
| 98 | unsigned int n) | ||
| 99 | { | ||
| 100 | } | ||
| 101 | |||
| 102 | #endif /* CONFIG_KMEMCHECK */ | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Bitfield annotations | ||
| 106 | * | ||
| 107 | * How to use: If you have a struct using bitfields, for example | ||
| 108 | * | ||
| 109 | * struct a { | ||
| 110 | * int x:8, y:8; | ||
| 111 | * }; | ||
| 112 | * | ||
| 113 | * then this should be rewritten as | ||
| 114 | * | ||
| 115 | * struct a { | ||
| 116 | * kmemcheck_bitfield_begin(flags); | ||
| 117 | * int x:8, y:8; | ||
| 118 | * kmemcheck_bitfield_end(flags); | ||
| 119 | * }; | ||
| 120 | * | ||
| 121 | * Now the "flags_begin" and "flags_end" members may be used to refer to the | ||
| 122 | * beginning and end, respectively, of the bitfield (and things like | ||
| 123 | * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- | ||
| 124 | * fields should be annotated: | ||
| 125 | * | ||
| 126 | * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); | ||
| 127 | * kmemcheck_annotate_bitfield(a, flags); | ||
| 128 | * | ||
| 129 | * Note: We provide the same definitions for both kmemcheck and non- | ||
| 130 | * kmemcheck kernels. This makes it harder to introduce accidental errors. It | ||
| 131 | * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield(). | ||
| 132 | */ | ||
| 133 | #define kmemcheck_bitfield_begin(name) \ | ||
| 134 | int name##_begin[0]; | ||
| 135 | |||
| 136 | #define kmemcheck_bitfield_end(name) \ | ||
| 137 | int name##_end[0]; | ||
| 138 | |||
| 139 | #define kmemcheck_annotate_bitfield(ptr, name) \ | ||
| 140 | do if (ptr) { \ | ||
| 141 | int _n = (long) &((ptr)->name##_end) \ | ||
| 142 | - (long) &((ptr)->name##_begin); \ | ||
| 143 | BUILD_BUG_ON(_n < 0); \ | ||
| 144 | \ | ||
| 145 | kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ | ||
| 146 | } while (0) | ||
| 147 | |||
| 148 | #define kmemcheck_annotate_variable(var) \ | ||
| 149 | do { \ | ||
| 150 | kmemcheck_mark_initialized(&(var), sizeof(var)); \ | ||
| 151 | } while (0) \ | ||
| 152 | |||
| 153 | #endif /* LINUX_KMEMCHECK_H */ | ||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e80e26ecf21..0042090a4d70 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -98,6 +98,14 @@ struct page { | |||
| 98 | #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS | 98 | #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS |
| 99 | unsigned long debug_flags; /* Use atomic bitops on this */ | 99 | unsigned long debug_flags; /* Use atomic bitops on this */ |
| 100 | #endif | 100 | #endif |
| 101 | |||
| 102 | #ifdef CONFIG_KMEMCHECK | ||
| 103 | /* | ||
| 104 | * kmemcheck wants to track the status of each byte in a page; this | ||
| 105 | * is a pointer to such a status block. NULL if not tracked. | ||
| 106 | */ | ||
| 107 | void *shadow; | ||
| 108 | #endif | ||
| 101 | }; | 109 | }; |
| 102 | 110 | ||
| 103 | /* | 111 | /* |
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 8670f1575fe1..29f8599e6bea 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #ifndef _LINUX_RING_BUFFER_H | 1 | #ifndef _LINUX_RING_BUFFER_H |
| 2 | #define _LINUX_RING_BUFFER_H | 2 | #define _LINUX_RING_BUFFER_H |
| 3 | 3 | ||
| 4 | #include <linux/kmemcheck.h> | ||
| 4 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
| 5 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
| 6 | 7 | ||
| @@ -11,7 +12,10 @@ struct ring_buffer_iter; | |||
| 11 | * Don't refer to this struct directly, use functions below. | 12 | * Don't refer to this struct directly, use functions below. |
| 12 | */ | 13 | */ |
| 13 | struct ring_buffer_event { | 14 | struct ring_buffer_event { |
| 15 | kmemcheck_bitfield_begin(bitfield); | ||
| 14 | u32 type_len:5, time_delta:27; | 16 | u32 type_len:5, time_delta:27; |
| 17 | kmemcheck_bitfield_end(bitfield); | ||
| 18 | |||
| 15 | u32 array[]; | 19 | u32 array[]; |
| 16 | }; | 20 | }; |
| 17 | 21 | ||
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fa51293f2708..63ef24bc01d0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #define _LINUX_SKBUFF_H | 15 | #define _LINUX_SKBUFF_H |
| 16 | 16 | ||
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/kmemcheck.h> | ||
| 18 | #include <linux/compiler.h> | 19 | #include <linux/compiler.h> |
| 19 | #include <linux/time.h> | 20 | #include <linux/time.h> |
| 20 | #include <linux/cache.h> | 21 | #include <linux/cache.h> |
| @@ -343,6 +344,7 @@ struct sk_buff { | |||
| 343 | }; | 344 | }; |
| 344 | }; | 345 | }; |
| 345 | __u32 priority; | 346 | __u32 priority; |
| 347 | kmemcheck_bitfield_begin(flags1); | ||
| 346 | __u8 local_df:1, | 348 | __u8 local_df:1, |
| 347 | cloned:1, | 349 | cloned:1, |
| 348 | ip_summed:2, | 350 | ip_summed:2, |
| @@ -353,6 +355,7 @@ struct sk_buff { | |||
| 353 | ipvs_property:1, | 355 | ipvs_property:1, |
| 354 | peeked:1, | 356 | peeked:1, |
| 355 | nf_trace:1; | 357 | nf_trace:1; |
| 358 | kmemcheck_bitfield_end(flags1); | ||
| 356 | __be16 protocol; | 359 | __be16 protocol; |
| 357 | 360 | ||
| 358 | void (*destructor)(struct sk_buff *skb); | 361 | void (*destructor)(struct sk_buff *skb); |
| @@ -372,12 +375,16 @@ struct sk_buff { | |||
| 372 | __u16 tc_verd; /* traffic control verdict */ | 375 | __u16 tc_verd; /* traffic control verdict */ |
| 373 | #endif | 376 | #endif |
| 374 | #endif | 377 | #endif |
| 378 | |||
| 379 | kmemcheck_bitfield_begin(flags2); | ||
| 375 | #ifdef CONFIG_IPV6_NDISC_NODETYPE | 380 | #ifdef CONFIG_IPV6_NDISC_NODETYPE |
| 376 | __u8 ndisc_nodetype:2; | 381 | __u8 ndisc_nodetype:2; |
| 377 | #endif | 382 | #endif |
| 378 | #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) | 383 | #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) |
| 379 | __u8 do_not_encrypt:1; | 384 | __u8 do_not_encrypt:1; |
| 380 | #endif | 385 | #endif |
| 386 | kmemcheck_bitfield_end(flags2); | ||
| 387 | |||
| 381 | /* 0/13/14 bit hole */ | 388 | /* 0/13/14 bit hole */ |
| 382 | 389 | ||
| 383 | #ifdef CONFIG_NET_DMA | 390 | #ifdef CONFIG_NET_DMA |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 219b8fb4651d..2da8372519f5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
| @@ -64,6 +64,13 @@ | |||
| 64 | 64 | ||
| 65 | #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ | 65 | #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ |
| 66 | 66 | ||
| 67 | /* Don't track use of uninitialized memory */ | ||
| 68 | #ifdef CONFIG_KMEMCHECK | ||
| 69 | # define SLAB_NOTRACK 0x01000000UL | ||
| 70 | #else | ||
| 71 | # define SLAB_NOTRACK 0x00000000UL | ||
| 72 | #endif | ||
| 73 | |||
| 67 | /* The following flags affect the page allocator grouping pages by mobility */ | 74 | /* The following flags affect the page allocator grouping pages by mobility */ |
| 68 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ | 75 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ |
| 69 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ | 76 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 713f841ecaa9..850d057500de 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
| @@ -16,6 +16,87 @@ | |||
| 16 | #include <linux/compiler.h> | 16 | #include <linux/compiler.h> |
| 17 | #include <linux/kmemtrace.h> | 17 | #include <linux/kmemtrace.h> |
| 18 | 18 | ||
| 19 | /* | ||
| 20 | * struct kmem_cache | ||
| 21 | * | ||
| 22 | * manages a cache. | ||
| 23 | */ | ||
| 24 | |||
| 25 | struct kmem_cache { | ||
| 26 | /* 1) per-cpu data, touched during every alloc/free */ | ||
| 27 | struct array_cache *array[NR_CPUS]; | ||
| 28 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
| 29 | unsigned int batchcount; | ||
| 30 | unsigned int limit; | ||
| 31 | unsigned int shared; | ||
| 32 | |||
| 33 | unsigned int buffer_size; | ||
| 34 | u32 reciprocal_buffer_size; | ||
| 35 | /* 3) touched by every alloc & free from the backend */ | ||
| 36 | |||
| 37 | unsigned int flags; /* constant flags */ | ||
| 38 | unsigned int num; /* # of objs per slab */ | ||
| 39 | |||
| 40 | /* 4) cache_grow/shrink */ | ||
| 41 | /* order of pgs per slab (2^n) */ | ||
| 42 | unsigned int gfporder; | ||
| 43 | |||
| 44 | /* force GFP flags, e.g. GFP_DMA */ | ||
| 45 | gfp_t gfpflags; | ||
| 46 | |||
| 47 | size_t colour; /* cache colouring range */ | ||
| 48 | unsigned int colour_off; /* colour offset */ | ||
| 49 | struct kmem_cache *slabp_cache; | ||
| 50 | unsigned int slab_size; | ||
| 51 | unsigned int dflags; /* dynamic flags */ | ||
| 52 | |||
| 53 | /* constructor func */ | ||
| 54 | void (*ctor)(void *obj); | ||
| 55 | |||
| 56 | /* 5) cache creation/removal */ | ||
| 57 | const char *name; | ||
| 58 | struct list_head next; | ||
| 59 | |||
| 60 | /* 6) statistics */ | ||
| 61 | #ifdef CONFIG_DEBUG_SLAB | ||
| 62 | unsigned long num_active; | ||
| 63 | unsigned long num_allocations; | ||
| 64 | unsigned long high_mark; | ||
| 65 | unsigned long grown; | ||
| 66 | unsigned long reaped; | ||
| 67 | unsigned long errors; | ||
| 68 | unsigned long max_freeable; | ||
| 69 | unsigned long node_allocs; | ||
| 70 | unsigned long node_frees; | ||
| 71 | unsigned long node_overflow; | ||
| 72 | atomic_t allochit; | ||
| 73 | atomic_t allocmiss; | ||
| 74 | atomic_t freehit; | ||
| 75 | atomic_t freemiss; | ||
| 76 | |||
| 77 | /* | ||
| 78 | * If debugging is enabled, then the allocator can add additional | ||
| 79 | * fields and/or padding to every object. buffer_size contains the total | ||
| 80 | * object size including these internal fields, the following two | ||
| 81 | * variables contain the offset to the user object and its size. | ||
| 82 | */ | ||
| 83 | int obj_offset; | ||
| 84 | int obj_size; | ||
| 85 | #endif /* CONFIG_DEBUG_SLAB */ | ||
| 86 | |||
| 87 | /* | ||
| 88 | * We put nodelists[] at the end of kmem_cache, because we want to size | ||
| 89 | * this array to nr_node_ids slots instead of MAX_NUMNODES | ||
| 90 | * (see kmem_cache_init()) | ||
| 91 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache | ||
| 92 | * is statically defined, so we reserve the max number of nodes. | ||
| 93 | */ | ||
| 94 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | ||
| 95 | /* | ||
| 96 | * Do not add fields after nodelists[] | ||
| 97 | */ | ||
| 98 | }; | ||
| 99 | |||
| 19 | /* Size description struct for general caches. */ | 100 | /* Size description struct for general caches. */ |
| 20 | struct cache_sizes { | 101 | struct cache_sizes { |
| 21 | size_t cs_size; | 102 | size_t cs_size; |
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 1a8cecc4f38c..51efbef38fb0 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | struct task_struct; | 4 | struct task_struct; |
| 5 | 5 | ||
| 6 | #ifdef CONFIG_STACKTRACE | 6 | #ifdef CONFIG_STACKTRACE |
| 7 | struct task_struct; | ||
| 8 | |||
| 7 | struct stack_trace { | 9 | struct stack_trace { |
| 8 | unsigned int nr_entries, max_entries; | 10 | unsigned int nr_entries, max_entries; |
| 9 | unsigned long *entries; | 11 | unsigned long *entries; |
| @@ -11,6 +13,7 @@ struct stack_trace { | |||
| 11 | }; | 13 | }; |
| 12 | 14 | ||
| 13 | extern void save_stack_trace(struct stack_trace *trace); | 15 | extern void save_stack_trace(struct stack_trace *trace); |
| 16 | extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); | ||
| 14 | extern void save_stack_trace_tsk(struct task_struct *tsk, | 17 | extern void save_stack_trace_tsk(struct task_struct *tsk, |
| 15 | struct stack_trace *trace); | 18 | struct stack_trace *trace); |
| 16 | 19 | ||
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 20a6957af870..47004f35cc7e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #define _INET_SOCK_H | 17 | #define _INET_SOCK_H |
| 18 | 18 | ||
| 19 | 19 | ||
| 20 | #include <linux/kmemcheck.h> | ||
| 20 | #include <linux/string.h> | 21 | #include <linux/string.h> |
| 21 | #include <linux/types.h> | 22 | #include <linux/types.h> |
| 22 | #include <linux/jhash.h> | 23 | #include <linux/jhash.h> |
| @@ -66,14 +67,16 @@ struct inet_request_sock { | |||
| 66 | __be32 loc_addr; | 67 | __be32 loc_addr; |
| 67 | __be32 rmt_addr; | 68 | __be32 rmt_addr; |
| 68 | __be16 rmt_port; | 69 | __be16 rmt_port; |
| 69 | u16 snd_wscale : 4, | 70 | kmemcheck_bitfield_begin(flags); |
| 70 | rcv_wscale : 4, | 71 | u16 snd_wscale : 4, |
| 72 | rcv_wscale : 4, | ||
| 71 | tstamp_ok : 1, | 73 | tstamp_ok : 1, |
| 72 | sack_ok : 1, | 74 | sack_ok : 1, |
| 73 | wscale_ok : 1, | 75 | wscale_ok : 1, |
| 74 | ecn_ok : 1, | 76 | ecn_ok : 1, |
| 75 | acked : 1, | 77 | acked : 1, |
| 76 | no_srccheck: 1; | 78 | no_srccheck: 1; |
| 79 | kmemcheck_bitfield_end(flags); | ||
| 77 | struct ip_options *opt; | 80 | struct ip_options *opt; |
| 78 | }; | 81 | }; |
| 79 | 82 | ||
| @@ -199,9 +202,12 @@ static inline int inet_sk_ehashfn(const struct sock *sk) | |||
| 199 | static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) | 202 | static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) |
| 200 | { | 203 | { |
| 201 | struct request_sock *req = reqsk_alloc(ops); | 204 | struct request_sock *req = reqsk_alloc(ops); |
| 205 | struct inet_request_sock *ireq = inet_rsk(req); | ||
| 202 | 206 | ||
| 203 | if (req != NULL) | 207 | if (req != NULL) { |
| 204 | inet_rsk(req)->opt = NULL; | 208 | kmemcheck_annotate_bitfield(ireq, flags); |
| 209 | ireq->opt = NULL; | ||
| 210 | } | ||
| 205 | 211 | ||
| 206 | return req; | 212 | return req; |
| 207 | } | 213 | } |
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4b8ece22b8e9..b63b80fac567 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #define _INET_TIMEWAIT_SOCK_ | 16 | #define _INET_TIMEWAIT_SOCK_ |
| 17 | 17 | ||
| 18 | 18 | ||
| 19 | #include <linux/kmemcheck.h> | ||
| 19 | #include <linux/list.h> | 20 | #include <linux/list.h> |
| 20 | #include <linux/module.h> | 21 | #include <linux/module.h> |
| 21 | #include <linux/timer.h> | 22 | #include <linux/timer.h> |
| @@ -127,10 +128,12 @@ struct inet_timewait_sock { | |||
| 127 | __be32 tw_rcv_saddr; | 128 | __be32 tw_rcv_saddr; |
| 128 | __be16 tw_dport; | 129 | __be16 tw_dport; |
| 129 | __u16 tw_num; | 130 | __u16 tw_num; |
| 131 | kmemcheck_bitfield_begin(flags); | ||
| 130 | /* And these are ours. */ | 132 | /* And these are ours. */ |
| 131 | __u8 tw_ipv6only:1, | 133 | __u8 tw_ipv6only:1, |
| 132 | tw_transparent:1; | 134 | tw_transparent:1; |
| 133 | /* 15 bits hole, try to pack */ | 135 | /* 14 bits hole, try to pack */ |
| 136 | kmemcheck_bitfield_end(flags); | ||
| 134 | __u16 tw_ipv6_offset; | 137 | __u16 tw_ipv6_offset; |
| 135 | unsigned long tw_ttd; | 138 | unsigned long tw_ttd; |
| 136 | struct inet_bind_bucket *tw_tb; | 139 | struct inet_bind_bucket *tw_tb; |
diff --git a/include/net/sock.h b/include/net/sock.h index 010e14a93c92..95bd3fd75f94 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -218,9 +218,11 @@ struct sock { | |||
| 218 | #define sk_hash __sk_common.skc_hash | 218 | #define sk_hash __sk_common.skc_hash |
| 219 | #define sk_prot __sk_common.skc_prot | 219 | #define sk_prot __sk_common.skc_prot |
| 220 | #define sk_net __sk_common.skc_net | 220 | #define sk_net __sk_common.skc_net |
| 221 | kmemcheck_bitfield_begin(flags); | ||
| 221 | unsigned char sk_shutdown : 2, | 222 | unsigned char sk_shutdown : 2, |
| 222 | sk_no_check : 2, | 223 | sk_no_check : 2, |
| 223 | sk_userlocks : 4; | 224 | sk_userlocks : 4; |
| 225 | kmemcheck_bitfield_end(flags); | ||
| 224 | unsigned char sk_protocol; | 226 | unsigned char sk_protocol; |
| 225 | unsigned short sk_type; | 227 | unsigned short sk_type; |
| 226 | int sk_rcvbuf; | 228 | int sk_rcvbuf; |
diff --git a/init/do_mounts.c b/init/do_mounts.c index dd7ee5f203f3..093f65915501 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c | |||
| @@ -231,7 +231,8 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) | |||
| 231 | 231 | ||
| 232 | void __init mount_block_root(char *name, int flags) | 232 | void __init mount_block_root(char *name, int flags) |
| 233 | { | 233 | { |
| 234 | char *fs_names = __getname(); | 234 | char *fs_names = __getname_gfp(GFP_KERNEL |
| 235 | | __GFP_NOTRACK_FALSE_POSITIVE); | ||
| 235 | char *p; | 236 | char *p; |
| 236 | #ifdef CONFIG_BLOCK | 237 | #ifdef CONFIG_BLOCK |
| 237 | char b[BDEVNAME_SIZE]; | 238 | char b[BDEVNAME_SIZE]; |
diff --git a/init/main.c b/init/main.c index f6204f712e7c..7becd8b5c5bf 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -65,6 +65,7 @@ | |||
| 65 | #include <linux/idr.h> | 65 | #include <linux/idr.h> |
| 66 | #include <linux/ftrace.h> | 66 | #include <linux/ftrace.h> |
| 67 | #include <linux/async.h> | 67 | #include <linux/async.h> |
| 68 | #include <linux/kmemcheck.h> | ||
| 68 | #include <linux/kmemtrace.h> | 69 | #include <linux/kmemtrace.h> |
| 69 | #include <trace/boot.h> | 70 | #include <trace/boot.h> |
| 70 | 71 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 4430eb1376f2..be022c200da6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages) | |||
| 178 | /* create a slab on which task_structs can be allocated */ | 178 | /* create a slab on which task_structs can be allocated */ |
| 179 | task_struct_cachep = | 179 | task_struct_cachep = |
| 180 | kmem_cache_create("task_struct", sizeof(struct task_struct), | 180 | kmem_cache_create("task_struct", sizeof(struct task_struct), |
| 181 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); | 181 | ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); |
| 182 | #endif | 182 | #endif |
| 183 | 183 | ||
| 184 | /* do the arch specific task caches init */ | 184 | /* do the arch specific task caches init */ |
| @@ -1470,20 +1470,20 @@ void __init proc_caches_init(void) | |||
| 1470 | { | 1470 | { |
| 1471 | sighand_cachep = kmem_cache_create("sighand_cache", | 1471 | sighand_cachep = kmem_cache_create("sighand_cache", |
| 1472 | sizeof(struct sighand_struct), 0, | 1472 | sizeof(struct sighand_struct), 0, |
| 1473 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, | 1473 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| |
| 1474 | sighand_ctor); | 1474 | SLAB_NOTRACK, sighand_ctor); |
| 1475 | signal_cachep = kmem_cache_create("signal_cache", | 1475 | signal_cachep = kmem_cache_create("signal_cache", |
| 1476 | sizeof(struct signal_struct), 0, | 1476 | sizeof(struct signal_struct), 0, |
| 1477 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1477 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1478 | files_cachep = kmem_cache_create("files_cache", | 1478 | files_cachep = kmem_cache_create("files_cache", |
| 1479 | sizeof(struct files_struct), 0, | 1479 | sizeof(struct files_struct), 0, |
| 1480 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1480 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1481 | fs_cachep = kmem_cache_create("fs_cache", | 1481 | fs_cachep = kmem_cache_create("fs_cache", |
| 1482 | sizeof(struct fs_struct), 0, | 1482 | sizeof(struct fs_struct), 0, |
| 1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1484 | mm_cachep = kmem_cache_create("mm_struct", | 1484 | mm_cachep = kmem_cache_create("mm_struct", |
| 1485 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1485 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
| 1486 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1486 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1487 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); | 1487 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); |
| 1488 | mmap_init(); | 1488 | mmap_init(); |
| 1489 | } | 1489 | } |
diff --git a/kernel/signal.c b/kernel/signal.c index 809a228019ad..d81f4952eebb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -832,6 +832,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 832 | { | 832 | { |
| 833 | struct sigpending *pending; | 833 | struct sigpending *pending; |
| 834 | struct sigqueue *q; | 834 | struct sigqueue *q; |
| 835 | int override_rlimit; | ||
| 835 | 836 | ||
| 836 | trace_sched_signal_send(sig, t); | 837 | trace_sched_signal_send(sig, t); |
| 837 | 838 | ||
| @@ -863,9 +864,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 863 | make sure at least one signal gets delivered and don't | 864 | make sure at least one signal gets delivered and don't |
| 864 | pass on the info struct. */ | 865 | pass on the info struct. */ |
| 865 | 866 | ||
| 866 | q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN && | 867 | if (sig < SIGRTMIN) |
| 867 | (is_si_special(info) || | 868 | override_rlimit = (is_si_special(info) || info->si_code >= 0); |
| 868 | info->si_code >= 0))); | 869 | else |
| 870 | override_rlimit = 0; | ||
| 871 | |||
| 872 | q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, | ||
| 873 | override_rlimit); | ||
| 869 | if (q) { | 874 | if (q) { |
| 870 | list_add_tail(&q->list, &pending->list); | 875 | list_add_tail(&q->list, &pending->list); |
| 871 | switch ((unsigned long) info) { | 876 | switch ((unsigned long) info) { |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 258885a543db..b41fb710e114 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -382,6 +382,17 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
| 382 | 382 | ||
| 383 | EXPORT_SYMBOL(__tasklet_hi_schedule); | 383 | EXPORT_SYMBOL(__tasklet_hi_schedule); |
| 384 | 384 | ||
| 385 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | ||
| 386 | { | ||
| 387 | BUG_ON(!irqs_disabled()); | ||
| 388 | |||
| 389 | t->next = __get_cpu_var(tasklet_hi_vec).head; | ||
| 390 | __get_cpu_var(tasklet_hi_vec).head = t; | ||
| 391 | __raise_softirq_irqoff(HI_SOFTIRQ); | ||
| 392 | } | ||
| 393 | |||
| 394 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | ||
| 395 | |||
| 385 | static void tasklet_action(struct softirq_action *a) | 396 | static void tasklet_action(struct softirq_action *a) |
| 386 | { | 397 | { |
| 387 | struct tasklet_struct *list; | 398 | struct tasklet_struct *list; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0e51a35a4486..f5c76b6cd616 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/security.h> | 27 | #include <linux/security.h> |
| 28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
| 29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
| 30 | #include <linux/kmemcheck.h> | ||
| 30 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
| 31 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
| 32 | #include <linux/init.h> | 33 | #include <linux/init.h> |
| @@ -967,6 +968,17 @@ static struct ctl_table kern_table[] = { | |||
| 967 | .proc_handler = &proc_dointvec, | 968 | .proc_handler = &proc_dointvec, |
| 968 | }, | 969 | }, |
| 969 | #endif | 970 | #endif |
| 971 | #ifdef CONFIG_KMEMCHECK | ||
| 972 | { | ||
| 973 | .ctl_name = CTL_UNNUMBERED, | ||
| 974 | .procname = "kmemcheck", | ||
| 975 | .data = &kmemcheck_enabled, | ||
| 976 | .maxlen = sizeof(int), | ||
| 977 | .mode = 0644, | ||
| 978 | .proc_handler = &proc_dointvec, | ||
| 979 | }, | ||
| 980 | #endif | ||
| 981 | |||
| 970 | /* | 982 | /* |
| 971 | * NOTE: do not add new entries to this table unless you have read | 983 | * NOTE: do not add new entries to this table unless you have read |
| 972 | * Documentation/sysctl/ctl_unnumbered.txt | 984 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2e642b2b7253..dc4dc70171ce 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/debugfs.h> | 10 | #include <linux/debugfs.h> |
| 11 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
| 12 | #include <linux/hardirq.h> | 12 | #include <linux/hardirq.h> |
| 13 | #include <linux/kmemcheck.h> | ||
| 13 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 14 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
| 15 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
| @@ -1270,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1270 | if (tail < BUF_PAGE_SIZE) { | 1271 | if (tail < BUF_PAGE_SIZE) { |
| 1271 | /* Mark the rest of the page with padding */ | 1272 | /* Mark the rest of the page with padding */ |
| 1272 | event = __rb_page_index(tail_page, tail); | 1273 | event = __rb_page_index(tail_page, tail); |
| 1274 | kmemcheck_annotate_bitfield(event, bitfield); | ||
| 1273 | rb_event_set_padding(event); | 1275 | rb_event_set_padding(event); |
| 1274 | } | 1276 | } |
| 1275 | 1277 | ||
| @@ -1327,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
| 1327 | return NULL; | 1329 | return NULL; |
| 1328 | 1330 | ||
| 1329 | event = __rb_page_index(tail_page, tail); | 1331 | event = __rb_page_index(tail_page, tail); |
| 1332 | kmemcheck_annotate_bitfield(event, bitfield); | ||
| 1330 | rb_update_event(event, type, length); | 1333 | rb_update_event(event, type, length); |
| 1331 | 1334 | ||
| 1332 | /* The passed in type is zero for DATA */ | 1335 | /* The passed in type is zero for DATA */ |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 116a35051be6..6b0c2d8a2129 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -300,7 +300,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT | |||
| 300 | 300 | ||
| 301 | config DEBUG_SLAB | 301 | config DEBUG_SLAB |
| 302 | bool "Debug slab memory allocations" | 302 | bool "Debug slab memory allocations" |
| 303 | depends on DEBUG_KERNEL && SLAB | 303 | depends on DEBUG_KERNEL && SLAB && !KMEMCHECK |
| 304 | help | 304 | help |
| 305 | Say Y here to have the kernel do limited verification on memory | 305 | Say Y here to have the kernel do limited verification on memory |
| 306 | allocation as well as poisoning memory on free to catch use of freed | 306 | allocation as well as poisoning memory on free to catch use of freed |
| @@ -312,7 +312,7 @@ config DEBUG_SLAB_LEAK | |||
| 312 | 312 | ||
| 313 | config SLUB_DEBUG_ON | 313 | config SLUB_DEBUG_ON |
| 314 | bool "SLUB debugging on by default" | 314 | bool "SLUB debugging on by default" |
| 315 | depends on SLUB && SLUB_DEBUG | 315 | depends on SLUB && SLUB_DEBUG && !KMEMCHECK |
| 316 | default n | 316 | default n |
| 317 | help | 317 | help |
| 318 | Boot with debugging on by default. SLUB boots by default with | 318 | Boot with debugging on by default. SLUB boots by default with |
| @@ -996,3 +996,5 @@ config DMA_API_DEBUG | |||
| 996 | source "samples/Kconfig" | 996 | source "samples/Kconfig" |
| 997 | 997 | ||
| 998 | source "lib/Kconfig.kgdb" | 998 | source "lib/Kconfig.kgdb" |
| 999 | |||
| 1000 | source "lib/Kconfig.kmemcheck" | ||
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck new file mode 100644 index 000000000000..603c81b66549 --- /dev/null +++ b/lib/Kconfig.kmemcheck | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | config HAVE_ARCH_KMEMCHECK | ||
| 2 | bool | ||
| 3 | |||
| 4 | menuconfig KMEMCHECK | ||
| 5 | bool "kmemcheck: trap use of uninitialized memory" | ||
| 6 | depends on DEBUG_KERNEL | ||
| 7 | depends on !X86_USE_3DNOW | ||
| 8 | depends on SLUB || SLAB | ||
| 9 | depends on !CC_OPTIMIZE_FOR_SIZE | ||
| 10 | depends on !FUNCTION_TRACER | ||
| 11 | select FRAME_POINTER | ||
| 12 | select STACKTRACE | ||
| 13 | default n | ||
| 14 | help | ||
| 15 | This option enables tracing of dynamically allocated kernel memory | ||
| 16 | to see if memory is used before it has been given an initial value. | ||
| 17 | Be aware that this requires half of your memory for bookkeeping and | ||
| 18 | will insert extra code at *every* read and write to tracked memory | ||
| 19 | thus slow down the kernel code (but user code is unaffected). | ||
| 20 | |||
| 21 | The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable | ||
| 22 | or enable kmemcheck at boot-time. If the kernel is started with | ||
| 23 | kmemcheck=0, the large memory and CPU overhead is not incurred. | ||
| 24 | |||
| 25 | choice | ||
| 26 | prompt "kmemcheck: default mode at boot" | ||
| 27 | depends on KMEMCHECK | ||
| 28 | default KMEMCHECK_ONESHOT_BY_DEFAULT | ||
| 29 | help | ||
| 30 | This option controls the default behaviour of kmemcheck when the | ||
| 31 | kernel boots and no kmemcheck= parameter is given. | ||
| 32 | |||
| 33 | config KMEMCHECK_DISABLED_BY_DEFAULT | ||
| 34 | bool "disabled" | ||
| 35 | depends on KMEMCHECK | ||
| 36 | |||
| 37 | config KMEMCHECK_ENABLED_BY_DEFAULT | ||
| 38 | bool "enabled" | ||
| 39 | depends on KMEMCHECK | ||
| 40 | |||
| 41 | config KMEMCHECK_ONESHOT_BY_DEFAULT | ||
| 42 | bool "one-shot" | ||
| 43 | depends on KMEMCHECK | ||
| 44 | help | ||
| 45 | In one-shot mode, only the first error detected is reported before | ||
| 46 | kmemcheck is disabled. | ||
| 47 | |||
| 48 | endchoice | ||
| 49 | |||
| 50 | config KMEMCHECK_QUEUE_SIZE | ||
| 51 | int "kmemcheck: error queue size" | ||
| 52 | depends on KMEMCHECK | ||
| 53 | default 64 | ||
| 54 | help | ||
| 55 | Select the maximum number of errors to store in the queue. Since | ||
| 56 | errors can occur virtually anywhere and in any context, we need a | ||
| 57 | temporary storage area which is guarantueed not to generate any | ||
| 58 | other faults. The queue will be emptied as soon as a tasklet may | ||
| 59 | be scheduled. If the queue is full, new error reports will be | ||
| 60 | lost. | ||
| 61 | |||
| 62 | config KMEMCHECK_SHADOW_COPY_SHIFT | ||
| 63 | int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" | ||
| 64 | depends on KMEMCHECK | ||
| 65 | range 2 8 | ||
| 66 | default 5 | ||
| 67 | help | ||
| 68 | Select the number of shadow bytes to save along with each entry of | ||
| 69 | the queue. These bytes indicate what parts of an allocation are | ||
| 70 | initialized, uninitialized, etc. and will be displayed when an | ||
| 71 | error is detected to help the debugging of a particular problem. | ||
| 72 | |||
| 73 | config KMEMCHECK_PARTIAL_OK | ||
| 74 | bool "kmemcheck: allow partially uninitialized memory" | ||
| 75 | depends on KMEMCHECK | ||
| 76 | default y | ||
| 77 | help | ||
| 78 | This option works around certain GCC optimizations that produce | ||
| 79 | 32-bit reads from 16-bit variables where the upper 16 bits are | ||
| 80 | thrown away afterwards. This may of course also hide some real | ||
| 81 | bugs. | ||
| 82 | |||
| 83 | config KMEMCHECK_BITOPS_OK | ||
| 84 | bool "kmemcheck: allow bit-field manipulation" | ||
| 85 | depends on KMEMCHECK | ||
| 86 | default n | ||
| 87 | help | ||
| 88 | This option silences warnings that would be generated for bit-field | ||
| 89 | accesses where not all the bits are initialized at the same time. | ||
| 90 | This may also hide some real bugs. | ||
| 91 | |||
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index bb01e298f260..aa99fd1f7109 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
| @@ -2,6 +2,7 @@ config DEBUG_PAGEALLOC | |||
| 2 | bool "Debug page memory allocations" | 2 | bool "Debug page memory allocations" |
| 3 | depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC | 3 | depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC |
| 4 | depends on !HIBERNATION || !PPC && !SPARC | 4 | depends on !HIBERNATION || !PPC && !SPARC |
| 5 | depends on !KMEMCHECK | ||
| 5 | ---help--- | 6 | ---help--- |
| 6 | Unmap pages from the kernel linear mapping after free_pages(). | 7 | Unmap pages from the kernel linear mapping after free_pages(). |
| 7 | This results in a large slowdown, but helps to find certain types | 8 | This results in a large slowdown, but helps to find certain types |
diff --git a/mm/Makefile b/mm/Makefile index e89acb090b4d..c379ce08354a 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -27,6 +27,7 @@ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | |||
| 27 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 27 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o |
| 28 | obj-$(CONFIG_SLAB) += slab.o | 28 | obj-$(CONFIG_SLAB) += slab.o |
| 29 | obj-$(CONFIG_SLUB) += slub.o | 29 | obj-$(CONFIG_SLUB) += slub.o |
| 30 | obj-$(CONFIG_KMEMCHECK) += kmemcheck.o | ||
| 30 | obj-$(CONFIG_FAILSLAB) += failslab.o | 31 | obj-$(CONFIG_FAILSLAB) += failslab.o |
| 31 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 32 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
| 32 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 33 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c new file mode 100644 index 000000000000..fd814fd61319 --- /dev/null +++ b/mm/kmemcheck.c | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | #include <linux/gfp.h> | ||
| 2 | #include <linux/mm_types.h> | ||
| 3 | #include <linux/mm.h> | ||
| 4 | #include <linux/slab.h> | ||
| 5 | #include <linux/kmemcheck.h> | ||
| 6 | |||
| 7 | void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) | ||
| 8 | { | ||
| 9 | struct page *shadow; | ||
| 10 | int pages; | ||
| 11 | int i; | ||
| 12 | |||
| 13 | pages = 1 << order; | ||
| 14 | |||
| 15 | /* | ||
| 16 | * With kmemcheck enabled, we need to allocate a memory area for the | ||
| 17 | * shadow bits as well. | ||
| 18 | */ | ||
| 19 | shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order); | ||
| 20 | if (!shadow) { | ||
| 21 | if (printk_ratelimit()) | ||
| 22 | printk(KERN_ERR "kmemcheck: failed to allocate " | ||
| 23 | "shadow bitmap\n"); | ||
| 24 | return; | ||
| 25 | } | ||
| 26 | |||
| 27 | for(i = 0; i < pages; ++i) | ||
| 28 | page[i].shadow = page_address(&shadow[i]); | ||
| 29 | |||
| 30 | /* | ||
| 31 | * Mark it as non-present for the MMU so that our accesses to | ||
| 32 | * this memory will trigger a page fault and let us analyze | ||
| 33 | * the memory accesses. | ||
| 34 | */ | ||
| 35 | kmemcheck_hide_pages(page, pages); | ||
| 36 | } | ||
| 37 | |||
| 38 | void kmemcheck_free_shadow(struct page *page, int order) | ||
| 39 | { | ||
| 40 | struct page *shadow; | ||
| 41 | int pages; | ||
| 42 | int i; | ||
| 43 | |||
| 44 | if (!kmemcheck_page_is_tracked(page)) | ||
| 45 | return; | ||
| 46 | |||
| 47 | pages = 1 << order; | ||
| 48 | |||
| 49 | kmemcheck_show_pages(page, pages); | ||
| 50 | |||
| 51 | shadow = virt_to_page(page[0].shadow); | ||
| 52 | |||
| 53 | for(i = 0; i < pages; ++i) | ||
| 54 | page[i].shadow = NULL; | ||
| 55 | |||
| 56 | __free_pages(shadow, order); | ||
| 57 | } | ||
| 58 | |||
| 59 | void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | ||
| 60 | size_t size) | ||
| 61 | { | ||
| 62 | /* | ||
| 63 | * Has already been memset(), which initializes the shadow for us | ||
| 64 | * as well. | ||
| 65 | */ | ||
| 66 | if (gfpflags & __GFP_ZERO) | ||
| 67 | return; | ||
| 68 | |||
| 69 | /* No need to initialize the shadow of a non-tracked slab. */ | ||
| 70 | if (s->flags & SLAB_NOTRACK) | ||
| 71 | return; | ||
| 72 | |||
| 73 | if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { | ||
| 74 | /* | ||
| 75 | * Allow notracked objects to be allocated from | ||
| 76 | * tracked caches. Note however that these objects | ||
| 77 | * will still get page faults on access, they just | ||
| 78 | * won't ever be flagged as uninitialized. If page | ||
| 79 | * faults are not acceptable, the slab cache itself | ||
| 80 | * should be marked NOTRACK. | ||
| 81 | */ | ||
| 82 | kmemcheck_mark_initialized(object, size); | ||
| 83 | } else if (!s->ctor) { | ||
| 84 | /* | ||
| 85 | * New objects should be marked uninitialized before | ||
| 86 | * they're returned to the called. | ||
| 87 | */ | ||
| 88 | kmemcheck_mark_uninitialized(object, size); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) | ||
| 93 | { | ||
| 94 | /* TODO: RCU freeing is unsupported for now; hide false positives. */ | ||
| 95 | if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) | ||
| 96 | kmemcheck_mark_freed(object, size); | ||
| 97 | } | ||
| 98 | |||
| 99 | void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order, | ||
| 100 | gfp_t gfpflags) | ||
| 101 | { | ||
| 102 | int pages; | ||
| 103 | |||
| 104 | if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK)) | ||
| 105 | return; | ||
| 106 | |||
| 107 | pages = 1 << order; | ||
| 108 | |||
| 109 | /* | ||
| 110 | * NOTE: We choose to track GFP_ZERO pages too; in fact, they | ||
| 111 | * can become uninitialized by copying uninitialized memory | ||
| 112 | * into them. | ||
| 113 | */ | ||
| 114 | |||
| 115 | /* XXX: Can use zone->node for node? */ | ||
| 116 | kmemcheck_alloc_shadow(page, order, gfpflags, -1); | ||
| 117 | |||
| 118 | if (gfpflags & __GFP_ZERO) | ||
| 119 | kmemcheck_mark_initialized_pages(page, pages); | ||
| 120 | else | ||
| 121 | kmemcheck_mark_uninitialized_pages(page, pages); | ||
| 122 | } | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 17d5f539a9aa..0727896a88ac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
| 24 | #include <linux/compiler.h> | 24 | #include <linux/compiler.h> |
| 25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
| 26 | #include <linux/kmemcheck.h> | ||
| 26 | #include <linux/module.h> | 27 | #include <linux/module.h> |
| 27 | #include <linux/suspend.h> | 28 | #include <linux/suspend.h> |
| 28 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
| @@ -546,6 +547,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
| 546 | int i; | 547 | int i; |
| 547 | int bad = 0; | 548 | int bad = 0; |
| 548 | 549 | ||
| 550 | kmemcheck_free_shadow(page, order); | ||
| 551 | |||
| 549 | for (i = 0 ; i < (1 << order) ; ++i) | 552 | for (i = 0 ; i < (1 << order) ; ++i) |
| 550 | bad += free_pages_check(page + i); | 553 | bad += free_pages_check(page + i); |
| 551 | if (bad) | 554 | if (bad) |
| @@ -994,6 +997,8 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
| 994 | struct per_cpu_pages *pcp; | 997 | struct per_cpu_pages *pcp; |
| 995 | unsigned long flags; | 998 | unsigned long flags; |
| 996 | 999 | ||
| 1000 | kmemcheck_free_shadow(page, 0); | ||
| 1001 | |||
| 997 | if (PageAnon(page)) | 1002 | if (PageAnon(page)) |
| 998 | page->mapping = NULL; | 1003 | page->mapping = NULL; |
| 999 | if (free_pages_check(page)) | 1004 | if (free_pages_check(page)) |
| @@ -1047,6 +1052,16 @@ void split_page(struct page *page, unsigned int order) | |||
| 1047 | 1052 | ||
| 1048 | VM_BUG_ON(PageCompound(page)); | 1053 | VM_BUG_ON(PageCompound(page)); |
| 1049 | VM_BUG_ON(!page_count(page)); | 1054 | VM_BUG_ON(!page_count(page)); |
| 1055 | |||
| 1056 | #ifdef CONFIG_KMEMCHECK | ||
| 1057 | /* | ||
| 1058 | * Split shadow pages too, because free(page[0]) would | ||
| 1059 | * otherwise free the whole shadow. | ||
| 1060 | */ | ||
| 1061 | if (kmemcheck_page_is_tracked(page)) | ||
| 1062 | split_page(virt_to_page(page[0].shadow), order); | ||
| 1063 | #endif | ||
| 1064 | |||
| 1050 | for (i = 1; i < (1 << order); i++) | 1065 | for (i = 1; i < (1 << order); i++) |
| 1051 | set_page_refcounted(page + i); | 1066 | set_page_refcounted(page + i); |
| 1052 | } | 1067 | } |
| @@ -1667,7 +1682,10 @@ nopage: | |||
| 1667 | dump_stack(); | 1682 | dump_stack(); |
| 1668 | show_mem(); | 1683 | show_mem(); |
| 1669 | } | 1684 | } |
| 1685 | return page; | ||
| 1670 | got_pg: | 1686 | got_pg: |
| 1687 | if (kmemcheck_enabled) | ||
| 1688 | kmemcheck_pagealloc_alloc(page, order, gfp_mask); | ||
| 1671 | return page; | 1689 | return page; |
| 1672 | } | 1690 | } |
| 1673 | EXPORT_SYMBOL(__alloc_pages_internal); | 1691 | EXPORT_SYMBOL(__alloc_pages_internal); |
| @@ -114,6 +114,7 @@ | |||
| 114 | #include <linux/rtmutex.h> | 114 | #include <linux/rtmutex.h> |
| 115 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
| 116 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
| 117 | #include <linux/kmemcheck.h> | ||
| 117 | 118 | ||
| 118 | #include <asm/cacheflush.h> | 119 | #include <asm/cacheflush.h> |
| 119 | #include <asm/tlbflush.h> | 120 | #include <asm/tlbflush.h> |
| @@ -179,13 +180,13 @@ | |||
| 179 | SLAB_STORE_USER | \ | 180 | SLAB_STORE_USER | \ |
| 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 181 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 181 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 182 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
| 182 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) | 183 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) |
| 183 | #else | 184 | #else |
| 184 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ | 185 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
| 185 | SLAB_CACHE_DMA | \ | 186 | SLAB_CACHE_DMA | \ |
| 186 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 187 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 187 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 188 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
| 188 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) | 189 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) |
| 189 | #endif | 190 | #endif |
| 190 | 191 | ||
| 191 | /* | 192 | /* |
| @@ -380,87 +381,6 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
| 380 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 381 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
| 381 | } while (0) | 382 | } while (0) |
| 382 | 383 | ||
| 383 | /* | ||
| 384 | * struct kmem_cache | ||
| 385 | * | ||
| 386 | * manages a cache. | ||
| 387 | */ | ||
| 388 | |||
| 389 | struct kmem_cache { | ||
| 390 | /* 1) per-cpu data, touched during every alloc/free */ | ||
| 391 | struct array_cache *array[NR_CPUS]; | ||
| 392 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
| 393 | unsigned int batchcount; | ||
| 394 | unsigned int limit; | ||
| 395 | unsigned int shared; | ||
| 396 | |||
| 397 | unsigned int buffer_size; | ||
| 398 | u32 reciprocal_buffer_size; | ||
| 399 | /* 3) touched by every alloc & free from the backend */ | ||
| 400 | |||
| 401 | unsigned int flags; /* constant flags */ | ||
| 402 | unsigned int num; /* # of objs per slab */ | ||
| 403 | |||
| 404 | /* 4) cache_grow/shrink */ | ||
| 405 | /* order of pgs per slab (2^n) */ | ||
| 406 | unsigned int gfporder; | ||
| 407 | |||
| 408 | /* force GFP flags, e.g. GFP_DMA */ | ||
| 409 | gfp_t gfpflags; | ||
| 410 | |||
| 411 | size_t colour; /* cache colouring range */ | ||
| 412 | unsigned int colour_off; /* colour offset */ | ||
| 413 | struct kmem_cache *slabp_cache; | ||
| 414 | unsigned int slab_size; | ||
| 415 | unsigned int dflags; /* dynamic flags */ | ||
| 416 | |||
| 417 | /* constructor func */ | ||
| 418 | void (*ctor)(void *obj); | ||
| 419 | |||
| 420 | /* 5) cache creation/removal */ | ||
| 421 | const char *name; | ||
| 422 | struct list_head next; | ||
| 423 | |||
| 424 | /* 6) statistics */ | ||
| 425 | #if STATS | ||
| 426 | unsigned long num_active; | ||
| 427 | unsigned long num_allocations; | ||
| 428 | unsigned long high_mark; | ||
| 429 | unsigned long grown; | ||
| 430 | unsigned long reaped; | ||
| 431 | unsigned long errors; | ||
| 432 | unsigned long max_freeable; | ||
| 433 | unsigned long node_allocs; | ||
| 434 | unsigned long node_frees; | ||
| 435 | unsigned long node_overflow; | ||
| 436 | atomic_t allochit; | ||
| 437 | atomic_t allocmiss; | ||
| 438 | atomic_t freehit; | ||
| 439 | atomic_t freemiss; | ||
| 440 | #endif | ||
| 441 | #if DEBUG | ||
| 442 | /* | ||
| 443 | * If debugging is enabled, then the allocator can add additional | ||
| 444 | * fields and/or padding to every object. buffer_size contains the total | ||
| 445 | * object size including these internal fields, the following two | ||
| 446 | * variables contain the offset to the user object and its size. | ||
| 447 | */ | ||
| 448 | int obj_offset; | ||
| 449 | int obj_size; | ||
| 450 | #endif | ||
| 451 | /* | ||
| 452 | * We put nodelists[] at the end of kmem_cache, because we want to size | ||
| 453 | * this array to nr_node_ids slots instead of MAX_NUMNODES | ||
| 454 | * (see kmem_cache_init()) | ||
| 455 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache | ||
| 456 | * is statically defined, so we reserve the max number of nodes. | ||
| 457 | */ | ||
| 458 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | ||
| 459 | /* | ||
| 460 | * Do not add fields after nodelists[] | ||
| 461 | */ | ||
| 462 | }; | ||
| 463 | |||
| 464 | #define CFLGS_OFF_SLAB (0x80000000UL) | 384 | #define CFLGS_OFF_SLAB (0x80000000UL) |
| 465 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 385 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
| 466 | 386 | ||
| @@ -1707,7 +1627,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1707 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1627 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| 1708 | flags |= __GFP_RECLAIMABLE; | 1628 | flags |= __GFP_RECLAIMABLE; |
| 1709 | 1629 | ||
| 1710 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1630 | page = alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); |
| 1711 | if (!page) | 1631 | if (!page) |
| 1712 | return NULL; | 1632 | return NULL; |
| 1713 | 1633 | ||
| @@ -1720,6 +1640,16 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1720 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1640 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
| 1721 | for (i = 0; i < nr_pages; i++) | 1641 | for (i = 0; i < nr_pages; i++) |
| 1722 | __SetPageSlab(page + i); | 1642 | __SetPageSlab(page + i); |
| 1643 | |||
| 1644 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | ||
| 1645 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); | ||
| 1646 | |||
| 1647 | if (cachep->ctor) | ||
| 1648 | kmemcheck_mark_uninitialized_pages(page, nr_pages); | ||
| 1649 | else | ||
| 1650 | kmemcheck_mark_unallocated_pages(page, nr_pages); | ||
| 1651 | } | ||
| 1652 | |||
| 1723 | return page_address(page); | 1653 | return page_address(page); |
| 1724 | } | 1654 | } |
| 1725 | 1655 | ||
| @@ -1732,6 +1662,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1732 | struct page *page = virt_to_page(addr); | 1662 | struct page *page = virt_to_page(addr); |
| 1733 | const unsigned long nr_freed = i; | 1663 | const unsigned long nr_freed = i; |
| 1734 | 1664 | ||
| 1665 | kmemcheck_free_shadow(page, cachep->gfporder); | ||
| 1666 | |||
| 1735 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1667 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| 1736 | sub_zone_page_state(page_zone(page), | 1668 | sub_zone_page_state(page_zone(page), |
| 1737 | NR_SLAB_RECLAIMABLE, nr_freed); | 1669 | NR_SLAB_RECLAIMABLE, nr_freed); |
| @@ -3407,6 +3339,9 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
| 3407 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, | 3339 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, |
| 3408 | flags); | 3340 | flags); |
| 3409 | 3341 | ||
| 3342 | if (likely(ptr)) | ||
| 3343 | kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); | ||
| 3344 | |||
| 3410 | if (unlikely((flags & __GFP_ZERO) && ptr)) | 3345 | if (unlikely((flags & __GFP_ZERO) && ptr)) |
| 3411 | memset(ptr, 0, obj_size(cachep)); | 3346 | memset(ptr, 0, obj_size(cachep)); |
| 3412 | 3347 | ||
| @@ -3467,6 +3402,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
| 3467 | flags); | 3402 | flags); |
| 3468 | prefetchw(objp); | 3403 | prefetchw(objp); |
| 3469 | 3404 | ||
| 3405 | if (likely(objp)) | ||
| 3406 | kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); | ||
| 3407 | |||
| 3470 | if (unlikely((flags & __GFP_ZERO) && objp)) | 3408 | if (unlikely((flags & __GFP_ZERO) && objp)) |
| 3471 | memset(objp, 0, obj_size(cachep)); | 3409 | memset(objp, 0, obj_size(cachep)); |
| 3472 | 3410 | ||
| @@ -3583,6 +3521,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
| 3583 | kmemleak_free_recursive(objp, cachep->flags); | 3521 | kmemleak_free_recursive(objp, cachep->flags); |
| 3584 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3522 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); |
| 3585 | 3523 | ||
| 3524 | kmemcheck_slab_free(cachep, objp, obj_size(cachep)); | ||
| 3525 | |||
| 3586 | /* | 3526 | /* |
| 3587 | * Skip calling cache_free_alien() when the platform is not numa. | 3527 | * Skip calling cache_free_alien() when the platform is not numa. |
| 3588 | * This will avoid cache misses that happen while accessing slabp (which | 3528 | * This will avoid cache misses that happen while accessing slabp (which |
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/proc_fs.h> | 18 | #include <linux/proc_fs.h> |
| 19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
| 20 | #include <linux/kmemtrace.h> | 20 | #include <linux/kmemtrace.h> |
| 21 | #include <linux/kmemcheck.h> | ||
| 21 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
| 22 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
| 23 | #include <linux/kmemleak.h> | 24 | #include <linux/kmemleak.h> |
| @@ -147,7 +148,7 @@ | |||
| 147 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) | 148 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) |
| 148 | 149 | ||
| 149 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ | 150 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ |
| 150 | SLAB_CACHE_DMA) | 151 | SLAB_CACHE_DMA | SLAB_NOTRACK) |
| 151 | 152 | ||
| 152 | #ifndef ARCH_KMALLOC_MINALIGN | 153 | #ifndef ARCH_KMALLOC_MINALIGN |
| 153 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) | 154 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) |
| @@ -1071,6 +1072,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node, | |||
| 1071 | { | 1072 | { |
| 1072 | int order = oo_order(oo); | 1073 | int order = oo_order(oo); |
| 1073 | 1074 | ||
| 1075 | flags |= __GFP_NOTRACK; | ||
| 1076 | |||
| 1074 | if (node == -1) | 1077 | if (node == -1) |
| 1075 | return alloc_pages(flags, order); | 1078 | return alloc_pages(flags, order); |
| 1076 | else | 1079 | else |
| @@ -1098,6 +1101,24 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1098 | 1101 | ||
| 1099 | stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); | 1102 | stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); |
| 1100 | } | 1103 | } |
| 1104 | |||
| 1105 | if (kmemcheck_enabled | ||
| 1106 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) | ||
| 1107 | { | ||
| 1108 | int pages = 1 << oo_order(oo); | ||
| 1109 | |||
| 1110 | kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); | ||
| 1111 | |||
| 1112 | /* | ||
| 1113 | * Objects from caches that have a constructor don't get | ||
| 1114 | * cleared when they're allocated, so we need to do it here. | ||
| 1115 | */ | ||
| 1116 | if (s->ctor) | ||
| 1117 | kmemcheck_mark_uninitialized_pages(page, pages); | ||
| 1118 | else | ||
| 1119 | kmemcheck_mark_unallocated_pages(page, pages); | ||
| 1120 | } | ||
| 1121 | |||
| 1101 | page->objects = oo_objects(oo); | 1122 | page->objects = oo_objects(oo); |
| 1102 | mod_zone_page_state(page_zone(page), | 1123 | mod_zone_page_state(page_zone(page), |
| 1103 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1124 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
| @@ -1171,6 +1192,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1171 | __ClearPageSlubDebug(page); | 1192 | __ClearPageSlubDebug(page); |
| 1172 | } | 1193 | } |
| 1173 | 1194 | ||
| 1195 | kmemcheck_free_shadow(page, compound_order(page)); | ||
| 1196 | |||
| 1174 | mod_zone_page_state(page_zone(page), | 1197 | mod_zone_page_state(page_zone(page), |
| 1175 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1198 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
| 1176 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1199 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
| @@ -1626,7 +1649,9 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1626 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1649 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
| 1627 | memset(object, 0, objsize); | 1650 | memset(object, 0, objsize); |
| 1628 | 1651 | ||
| 1652 | kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); | ||
| 1629 | kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); | 1653 | kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); |
| 1654 | |||
| 1630 | return object; | 1655 | return object; |
| 1631 | } | 1656 | } |
| 1632 | 1657 | ||
| @@ -1759,6 +1784,7 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
| 1759 | kmemleak_free_recursive(x, s->flags); | 1784 | kmemleak_free_recursive(x, s->flags); |
| 1760 | local_irq_save(flags); | 1785 | local_irq_save(flags); |
| 1761 | c = get_cpu_slab(s, smp_processor_id()); | 1786 | c = get_cpu_slab(s, smp_processor_id()); |
| 1787 | kmemcheck_slab_free(s, object, c->objsize); | ||
| 1762 | debug_check_no_locks_freed(object, c->objsize); | 1788 | debug_check_no_locks_freed(object, c->objsize); |
| 1763 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 1789 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
| 1764 | debug_check_no_obj_freed(object, c->objsize); | 1790 | debug_check_no_obj_freed(object, c->objsize); |
| @@ -2633,7 +2659,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
| 2633 | 2659 | ||
| 2634 | if (!s || !text || !kmem_cache_open(s, flags, text, | 2660 | if (!s || !text || !kmem_cache_open(s, flags, text, |
| 2635 | realsize, ARCH_KMALLOC_MINALIGN, | 2661 | realsize, ARCH_KMALLOC_MINALIGN, |
| 2636 | SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) { | 2662 | SLAB_CACHE_DMA|SLAB_NOTRACK|__SYSFS_ADD_DEFERRED, |
| 2663 | NULL)) { | ||
| 2637 | kfree(s); | 2664 | kfree(s); |
| 2638 | kfree(text); | 2665 | kfree(text); |
| 2639 | goto unlock_out; | 2666 | goto unlock_out; |
| @@ -2727,9 +2754,10 @@ EXPORT_SYMBOL(__kmalloc); | |||
| 2727 | 2754 | ||
| 2728 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | 2755 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) |
| 2729 | { | 2756 | { |
| 2730 | struct page *page = alloc_pages_node(node, flags | __GFP_COMP, | 2757 | struct page *page; |
| 2731 | get_order(size)); | ||
| 2732 | 2758 | ||
| 2759 | flags |= __GFP_COMP | __GFP_NOTRACK; | ||
| 2760 | page = alloc_pages_node(node, flags, get_order(size)); | ||
| 2733 | if (page) | 2761 | if (page) |
| 2734 | return page_address(page); | 2762 | return page_address(page); |
| 2735 | else | 2763 | else |
| @@ -4412,6 +4440,8 @@ static char *create_unique_id(struct kmem_cache *s) | |||
| 4412 | *p++ = 'a'; | 4440 | *p++ = 'a'; |
| 4413 | if (s->flags & SLAB_DEBUG_FREE) | 4441 | if (s->flags & SLAB_DEBUG_FREE) |
| 4414 | *p++ = 'F'; | 4442 | *p++ = 'F'; |
| 4443 | if (!(s->flags & SLAB_NOTRACK)) | ||
| 4444 | *p++ = 't'; | ||
| 4415 | if (p != name + 1) | 4445 | if (p != name + 1) |
| 4416 | *p++ = '-'; | 4446 | *p++ = '-'; |
| 4417 | p += sprintf(p, "%07d", s->size); | 4447 | p += sprintf(p, "%07d", s->size); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a94a3037370..5c93435b0347 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
| 40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
| 41 | #include <linux/kernel.h> | 41 | #include <linux/kernel.h> |
| 42 | #include <linux/kmemcheck.h> | ||
| 42 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
| 43 | #include <linux/interrupt.h> | 44 | #include <linux/interrupt.h> |
| 44 | #include <linux/in.h> | 45 | #include <linux/in.h> |
| @@ -201,6 +202,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 201 | skb->data = data; | 202 | skb->data = data; |
| 202 | skb_reset_tail_pointer(skb); | 203 | skb_reset_tail_pointer(skb); |
| 203 | skb->end = skb->tail + size; | 204 | skb->end = skb->tail + size; |
| 205 | kmemcheck_annotate_bitfield(skb, flags1); | ||
| 206 | kmemcheck_annotate_bitfield(skb, flags2); | ||
| 204 | /* make sure we initialize shinfo sequentially */ | 207 | /* make sure we initialize shinfo sequentially */ |
| 205 | shinfo = skb_shinfo(skb); | 208 | shinfo = skb_shinfo(skb); |
| 206 | atomic_set(&shinfo->dataref, 1); | 209 | atomic_set(&shinfo->dataref, 1); |
| @@ -217,6 +220,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 217 | struct sk_buff *child = skb + 1; | 220 | struct sk_buff *child = skb + 1; |
| 218 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 221 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
| 219 | 222 | ||
| 223 | kmemcheck_annotate_bitfield(child, flags1); | ||
| 224 | kmemcheck_annotate_bitfield(child, flags2); | ||
| 220 | skb->fclone = SKB_FCLONE_ORIG; | 225 | skb->fclone = SKB_FCLONE_ORIG; |
| 221 | atomic_set(fclone_ref, 1); | 226 | atomic_set(fclone_ref, 1); |
| 222 | 227 | ||
| @@ -635,6 +640,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
| 635 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 640 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
| 636 | if (!n) | 641 | if (!n) |
| 637 | return NULL; | 642 | return NULL; |
| 643 | |||
| 644 | kmemcheck_annotate_bitfield(n, flags1); | ||
| 645 | kmemcheck_annotate_bitfield(n, flags2); | ||
| 638 | n->fclone = SKB_FCLONE_UNAVAILABLE; | 646 | n->fclone = SKB_FCLONE_UNAVAILABLE; |
| 639 | } | 647 | } |
| 640 | 648 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 06e26b77ad9e..b0ba569bc973 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -945,6 +945,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | |||
| 945 | sk = kmalloc(prot->obj_size, priority); | 945 | sk = kmalloc(prot->obj_size, priority); |
| 946 | 946 | ||
| 947 | if (sk != NULL) { | 947 | if (sk != NULL) { |
| 948 | kmemcheck_annotate_bitfield(sk, flags); | ||
| 949 | |||
| 948 | if (security_sk_alloc(sk, family, priority)) | 950 | if (security_sk_alloc(sk, family, priority)) |
| 949 | goto out_free; | 951 | goto out_free; |
| 950 | 952 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 68a8d892c711..61283f928825 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | #include <linux/kmemcheck.h> | ||
| 12 | #include <net/inet_hashtables.h> | 13 | #include <net/inet_hashtables.h> |
| 13 | #include <net/inet_timewait_sock.h> | 14 | #include <net/inet_timewait_sock.h> |
| 14 | #include <net/ip.h> | 15 | #include <net/ip.h> |
| @@ -120,6 +121,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
| 120 | if (tw != NULL) { | 121 | if (tw != NULL) { |
| 121 | const struct inet_sock *inet = inet_sk(sk); | 122 | const struct inet_sock *inet = inet_sk(sk); |
| 122 | 123 | ||
| 124 | kmemcheck_annotate_bitfield(tw, flags); | ||
| 125 | |||
| 123 | /* Give us an identity. */ | 126 | /* Give us an identity. */ |
| 124 | tw->tw_daddr = inet->daddr; | 127 | tw->tw_daddr = inet->daddr; |
| 125 | tw->tw_rcv_saddr = inet->rcv_saddr; | 128 | tw->tw_rcv_saddr = inet->rcv_saddr; |
