aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-05 04:20:47 -0500
committerIngo Molnar <mingo@elte.hu>2009-03-05 04:20:47 -0500
commitc4ef144a9d0803eb0a2d4110ae87e7f34e667ded (patch)
treeb4b5e472bafb3d5d0d8ea26680e1d8cc87365c30 /kernel
parenta1be621dfacbef0fd374d8acd553d71e07bf29ac (diff)
parentefed792d6738964f399a508ef9e831cd60fa4657 (diff)
Merge branch 'tracing/ftrace' into tracing/core
Diffstat (limited to 'kernel')
-rw-r--r--kernel/lockdep.c531
-rw-r--r--kernel/lockdep_internals.h45
-rw-r--r--kernel/lockdep_proc.c22
-rw-r--r--kernel/lockdep_states.h9
-rw-r--r--kernel/mutex-debug.c9
-rw-r--r--kernel/mutex-debug.h18
-rw-r--r--kernel/mutex.c121
-rw-r--r--kernel/mutex.h22
-rw-r--r--kernel/sched.c71
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/timer.c68
-rw-r--r--kernel/trace/ring_buffer.c118
-rw-r--r--kernel/trace/trace.c288
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events_stage_3.h4
15 files changed, 961 insertions, 367 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06b0c3568f0b..cb70c1db85d0 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -41,6 +41,8 @@
41#include <linux/utsname.h> 41#include <linux/utsname.h>
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h>
45#include <trace/lockdep.h>
44 46
45#include <asm/sections.h> 47#include <asm/sections.h>
46 48
@@ -310,12 +312,14 @@ EXPORT_SYMBOL(lockdep_on);
310#if VERBOSE 312#if VERBOSE
311# define HARDIRQ_VERBOSE 1 313# define HARDIRQ_VERBOSE 1
312# define SOFTIRQ_VERBOSE 1 314# define SOFTIRQ_VERBOSE 1
315# define RECLAIM_VERBOSE 1
313#else 316#else
314# define HARDIRQ_VERBOSE 0 317# define HARDIRQ_VERBOSE 0
315# define SOFTIRQ_VERBOSE 0 318# define SOFTIRQ_VERBOSE 0
319# define RECLAIM_VERBOSE 0
316#endif 320#endif
317 321
318#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE 322#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
319/* 323/*
320 * Quick filtering for interesting events: 324 * Quick filtering for interesting events:
321 */ 325 */
@@ -443,17 +447,18 @@ atomic_t nr_find_usage_backwards_recursions;
443 * Locking printouts: 447 * Locking printouts:
444 */ 448 */
445 449
450#define __USAGE(__STATE) \
451 [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \
452 [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \
453 [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
454 [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
455
446static const char *usage_str[] = 456static const char *usage_str[] =
447{ 457{
448 [LOCK_USED] = "initial-use ", 458#define LOCKDEP_STATE(__STATE) __USAGE(__STATE)
449 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", 459#include "lockdep_states.h"
450 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", 460#undef LOCKDEP_STATE
451 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", 461 [LOCK_USED] = "INITIAL USE",
452 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
453 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
454 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
455 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
456 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
457}; 462};
458 463
459const char * __get_key_name(struct lockdep_subclass_key *key, char *str) 464const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
@@ -461,46 +466,45 @@ const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
461 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); 466 return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
462} 467}
463 468
464void 469static inline unsigned long lock_flag(enum lock_usage_bit bit)
465get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
466{ 470{
467 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; 471 return 1UL << bit;
468 472}
469 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
470 *c1 = '+';
471 else
472 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
473 *c1 = '-';
474 473
475 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 474static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
476 *c2 = '+'; 475{
477 else 476 char c = '.';
478 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
479 *c2 = '-';
480 477
481 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 478 if (class->usage_mask & lock_flag(bit + 2))
482 *c3 = '-'; 479 c = '+';
483 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { 480 if (class->usage_mask & lock_flag(bit)) {
484 *c3 = '+'; 481 c = '-';
485 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 482 if (class->usage_mask & lock_flag(bit + 2))
486 *c3 = '?'; 483 c = '?';
487 } 484 }
488 485
489 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 486 return c;
490 *c4 = '-'; 487}
491 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { 488
492 *c4 = '+'; 489void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
493 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 490{
494 *c4 = '?'; 491 int i = 0;
495 } 492
493#define LOCKDEP_STATE(__STATE) \
494 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \
495 usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
496#include "lockdep_states.h"
497#undef LOCKDEP_STATE
498
499 usage[i] = '\0';
496} 500}
497 501
498static void print_lock_name(struct lock_class *class) 502static void print_lock_name(struct lock_class *class)
499{ 503{
500 char str[KSYM_NAME_LEN], c1, c2, c3, c4; 504 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
501 const char *name; 505 const char *name;
502 506
503 get_usage_chars(class, &c1, &c2, &c3, &c4); 507 get_usage_chars(class, usage);
504 508
505 name = class->name; 509 name = class->name;
506 if (!name) { 510 if (!name) {
@@ -513,7 +517,7 @@ static void print_lock_name(struct lock_class *class)
513 if (class->subclass) 517 if (class->subclass)
514 printk("/%d", class->subclass); 518 printk("/%d", class->subclass);
515 } 519 }
516 printk("){%c%c%c%c}", c1, c2, c3, c4); 520 printk("){%s}", usage);
517} 521}
518 522
519static void print_lockdep_cache(struct lockdep_map *lock) 523static void print_lockdep_cache(struct lockdep_map *lock)
@@ -1263,9 +1267,49 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
1263 bit_backwards, bit_forwards, irqclass); 1267 bit_backwards, bit_forwards, irqclass);
1264} 1268}
1265 1269
1266static int 1270static const char *state_names[] = {
1267check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, 1271#define LOCKDEP_STATE(__STATE) \
1268 struct held_lock *next) 1272 __stringify(__STATE),
1273#include "lockdep_states.h"
1274#undef LOCKDEP_STATE
1275};
1276
1277static const char *state_rnames[] = {
1278#define LOCKDEP_STATE(__STATE) \
1279 __stringify(__STATE)"-READ",
1280#include "lockdep_states.h"
1281#undef LOCKDEP_STATE
1282};
1283
1284static inline const char *state_name(enum lock_usage_bit bit)
1285{
1286 return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
1287}
1288
1289static int exclusive_bit(int new_bit)
1290{
1291 /*
1292 * USED_IN
1293 * USED_IN_READ
1294 * ENABLED
1295 * ENABLED_READ
1296 *
1297 * bit 0 - write/read
1298 * bit 1 - used_in/enabled
1299 * bit 2+ state
1300 */
1301
1302 int state = new_bit & ~3;
1303 int dir = new_bit & 2;
1304
1305 /*
1306 * keep state, bit flip the direction and strip read.
1307 */
1308 return state | (dir ^ 2);
1309}
1310
1311static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
1312 struct held_lock *next, enum lock_usage_bit bit)
1269{ 1313{
1270 /* 1314 /*
1271 * Prove that the new dependency does not connect a hardirq-safe 1315 * Prove that the new dependency does not connect a hardirq-safe
@@ -1273,38 +1317,34 @@ check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1273 * the backwards-subgraph starting at <prev>, and the 1317 * the backwards-subgraph starting at <prev>, and the
1274 * forwards-subgraph starting at <next>: 1318 * forwards-subgraph starting at <next>:
1275 */ 1319 */
1276 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, 1320 if (!check_usage(curr, prev, next, bit,
1277 LOCK_ENABLED_HARDIRQS, "hard")) 1321 exclusive_bit(bit), state_name(bit)))
1278 return 0; 1322 return 0;
1279 1323
1324 bit++; /* _READ */
1325
1280 /* 1326 /*
1281 * Prove that the new dependency does not connect a hardirq-safe-read 1327 * Prove that the new dependency does not connect a hardirq-safe-read
1282 * lock with a hardirq-unsafe lock - to achieve this we search 1328 * lock with a hardirq-unsafe lock - to achieve this we search
1283 * the backwards-subgraph starting at <prev>, and the 1329 * the backwards-subgraph starting at <prev>, and the
1284 * forwards-subgraph starting at <next>: 1330 * forwards-subgraph starting at <next>:
1285 */ 1331 */
1286 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, 1332 if (!check_usage(curr, prev, next, bit,
1287 LOCK_ENABLED_HARDIRQS, "hard-read")) 1333 exclusive_bit(bit), state_name(bit)))
1288 return 0; 1334 return 0;
1289 1335
1290 /* 1336 return 1;
1291 * Prove that the new dependency does not connect a softirq-safe 1337}
1292 * lock with a softirq-unsafe lock - to achieve this we search 1338
1293 * the backwards-subgraph starting at <prev>, and the 1339static int
1294 * forwards-subgraph starting at <next>: 1340check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
1295 */ 1341 struct held_lock *next)
1296 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, 1342{
1297 LOCK_ENABLED_SOFTIRQS, "soft")) 1343#define LOCKDEP_STATE(__STATE) \
1298 return 0; 1344 if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
1299 /*
1300 * Prove that the new dependency does not connect a softirq-safe-read
1301 * lock with a softirq-unsafe lock - to achieve this we search
1302 * the backwards-subgraph starting at <prev>, and the
1303 * forwards-subgraph starting at <next>:
1304 */
1305 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
1306 LOCK_ENABLED_SOFTIRQS, "soft"))
1307 return 0; 1345 return 0;
1346#include "lockdep_states.h"
1347#undef LOCKDEP_STATE
1308 1348
1309 return 1; 1349 return 1;
1310} 1350}
@@ -1861,9 +1901,9 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1861 curr->comm, task_pid_nr(curr)); 1901 curr->comm, task_pid_nr(curr));
1862 print_lock(this); 1902 print_lock(this);
1863 if (forwards) 1903 if (forwards)
1864 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); 1904 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
1865 else 1905 else
1866 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); 1906 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
1867 print_lock_name(other); 1907 print_lock_name(other);
1868 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 1908 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1869 1909
@@ -1933,7 +1973,7 @@ void print_irqtrace_events(struct task_struct *curr)
1933 print_ip_sym(curr->softirq_disable_ip); 1973 print_ip_sym(curr->softirq_disable_ip);
1934} 1974}
1935 1975
1936static int hardirq_verbose(struct lock_class *class) 1976static int HARDIRQ_verbose(struct lock_class *class)
1937{ 1977{
1938#if HARDIRQ_VERBOSE 1978#if HARDIRQ_VERBOSE
1939 return class_filter(class); 1979 return class_filter(class);
@@ -1941,7 +1981,7 @@ static int hardirq_verbose(struct lock_class *class)
1941 return 0; 1981 return 0;
1942} 1982}
1943 1983
1944static int softirq_verbose(struct lock_class *class) 1984static int SOFTIRQ_verbose(struct lock_class *class)
1945{ 1985{
1946#if SOFTIRQ_VERBOSE 1986#if SOFTIRQ_VERBOSE
1947 return class_filter(class); 1987 return class_filter(class);
@@ -1949,185 +1989,95 @@ static int softirq_verbose(struct lock_class *class)
1949 return 0; 1989 return 0;
1950} 1990}
1951 1991
1992static int RECLAIM_FS_verbose(struct lock_class *class)
1993{
1994#if RECLAIM_VERBOSE
1995 return class_filter(class);
1996#endif
1997 return 0;
1998}
1999
1952#define STRICT_READ_CHECKS 1 2000#define STRICT_READ_CHECKS 1
1953 2001
1954static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, 2002static int (*state_verbose_f[])(struct lock_class *class) = {
2003#define LOCKDEP_STATE(__STATE) \
2004 __STATE##_verbose,
2005#include "lockdep_states.h"
2006#undef LOCKDEP_STATE
2007};
2008
2009static inline int state_verbose(enum lock_usage_bit bit,
2010 struct lock_class *class)
2011{
2012 return state_verbose_f[bit >> 2](class);
2013}
2014
2015typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
2016 enum lock_usage_bit bit, const char *name);
2017
2018static int
2019mark_lock_irq(struct task_struct *curr, struct held_lock *this,
1955 enum lock_usage_bit new_bit) 2020 enum lock_usage_bit new_bit)
1956{ 2021{
1957 int ret = 1; 2022 int excl_bit = exclusive_bit(new_bit);
2023 int read = new_bit & 1;
2024 int dir = new_bit & 2;
1958 2025
1959 switch(new_bit) { 2026 /*
1960 case LOCK_USED_IN_HARDIRQ: 2027 * mark USED_IN has to look forwards -- to ensure no dependency
1961 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) 2028 * has ENABLED state, which would allow recursion deadlocks.
1962 return 0; 2029 *
1963 if (!valid_state(curr, this, new_bit, 2030 * mark ENABLED has to look backwards -- to ensure no dependee
1964 LOCK_ENABLED_HARDIRQS_READ)) 2031 * has USED_IN state, which, again, would allow recursion deadlocks.
1965 return 0; 2032 */
1966 /* 2033 check_usage_f usage = dir ?
1967 * just marked it hardirq-safe, check that this lock 2034 check_usage_backwards : check_usage_forwards;
1968 * took no hardirq-unsafe lock in the past: 2035
1969 */ 2036 /*
1970 if (!check_usage_forwards(curr, this, 2037 * Validate that this particular lock does not have conflicting
1971 LOCK_ENABLED_HARDIRQS, "hard")) 2038 * usage states.
1972 return 0; 2039 */
1973#if STRICT_READ_CHECKS 2040 if (!valid_state(curr, this, new_bit, excl_bit))
1974 /* 2041 return 0;
1975 * just marked it hardirq-safe, check that this lock 2042
1976 * took no hardirq-unsafe-read lock in the past: 2043 /*
1977 */ 2044 * Validate that the lock dependencies don't have conflicting usage
1978 if (!check_usage_forwards(curr, this, 2045 * states.
1979 LOCK_ENABLED_HARDIRQS_READ, "hard-read")) 2046 */
1980 return 0; 2047 if ((!read || !dir || STRICT_READ_CHECKS) &&
1981#endif 2048 !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
1982 if (hardirq_verbose(hlock_class(this))) 2049 return 0;
1983 ret = 2; 2050
1984 break; 2051 /*
1985 case LOCK_USED_IN_SOFTIRQ: 2052 * Check for read in write conflicts
1986 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) 2053 */
1987 return 0; 2054 if (!read) {
1988 if (!valid_state(curr, this, new_bit, 2055 if (!valid_state(curr, this, new_bit, excl_bit + 1))
1989 LOCK_ENABLED_SOFTIRQS_READ))
1990 return 0;
1991 /*
1992 * just marked it softirq-safe, check that this lock
1993 * took no softirq-unsafe lock in the past:
1994 */
1995 if (!check_usage_forwards(curr, this,
1996 LOCK_ENABLED_SOFTIRQS, "soft"))
1997 return 0;
1998#if STRICT_READ_CHECKS
1999 /*
2000 * just marked it softirq-safe, check that this lock
2001 * took no softirq-unsafe-read lock in the past:
2002 */
2003 if (!check_usage_forwards(curr, this,
2004 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
2005 return 0;
2006#endif
2007 if (softirq_verbose(hlock_class(this)))
2008 ret = 2;
2009 break;
2010 case LOCK_USED_IN_HARDIRQ_READ:
2011 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
2012 return 0;
2013 /*
2014 * just marked it hardirq-read-safe, check that this lock
2015 * took no hardirq-unsafe lock in the past:
2016 */
2017 if (!check_usage_forwards(curr, this,
2018 LOCK_ENABLED_HARDIRQS, "hard"))
2019 return 0;
2020 if (hardirq_verbose(hlock_class(this)))
2021 ret = 2;
2022 break;
2023 case LOCK_USED_IN_SOFTIRQ_READ:
2024 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
2025 return 0;
2026 /*
2027 * just marked it softirq-read-safe, check that this lock
2028 * took no softirq-unsafe lock in the past:
2029 */
2030 if (!check_usage_forwards(curr, this,
2031 LOCK_ENABLED_SOFTIRQS, "soft"))
2032 return 0;
2033 if (softirq_verbose(hlock_class(this)))
2034 ret = 2;
2035 break;
2036 case LOCK_ENABLED_HARDIRQS:
2037 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2038 return 0;
2039 if (!valid_state(curr, this, new_bit,
2040 LOCK_USED_IN_HARDIRQ_READ))
2041 return 0;
2042 /*
2043 * just marked it hardirq-unsafe, check that no hardirq-safe
2044 * lock in the system ever took it in the past:
2045 */
2046 if (!check_usage_backwards(curr, this,
2047 LOCK_USED_IN_HARDIRQ, "hard"))
2048 return 0;
2049#if STRICT_READ_CHECKS
2050 /*
2051 * just marked it hardirq-unsafe, check that no
2052 * hardirq-safe-read lock in the system ever took
2053 * it in the past:
2054 */
2055 if (!check_usage_backwards(curr, this,
2056 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
2057 return 0;
2058#endif
2059 if (hardirq_verbose(hlock_class(this)))
2060 ret = 2;
2061 break;
2062 case LOCK_ENABLED_SOFTIRQS:
2063 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2064 return 0;
2065 if (!valid_state(curr, this, new_bit,
2066 LOCK_USED_IN_SOFTIRQ_READ))
2067 return 0;
2068 /*
2069 * just marked it softirq-unsafe, check that no softirq-safe
2070 * lock in the system ever took it in the past:
2071 */
2072 if (!check_usage_backwards(curr, this,
2073 LOCK_USED_IN_SOFTIRQ, "soft"))
2074 return 0;
2075#if STRICT_READ_CHECKS
2076 /*
2077 * just marked it softirq-unsafe, check that no
2078 * softirq-safe-read lock in the system ever took
2079 * it in the past:
2080 */
2081 if (!check_usage_backwards(curr, this,
2082 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
2083 return 0;
2084#endif
2085 if (softirq_verbose(hlock_class(this)))
2086 ret = 2;
2087 break;
2088 case LOCK_ENABLED_HARDIRQS_READ:
2089 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
2090 return 0;
2091#if STRICT_READ_CHECKS
2092 /*
2093 * just marked it hardirq-read-unsafe, check that no
2094 * hardirq-safe lock in the system ever took it in the past:
2095 */
2096 if (!check_usage_backwards(curr, this,
2097 LOCK_USED_IN_HARDIRQ, "hard"))
2098 return 0;
2099#endif
2100 if (hardirq_verbose(hlock_class(this)))
2101 ret = 2;
2102 break;
2103 case LOCK_ENABLED_SOFTIRQS_READ:
2104 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
2105 return 0; 2056 return 0;
2106#if STRICT_READ_CHECKS 2057
2107 /* 2058 if (STRICT_READ_CHECKS &&
2108 * just marked it softirq-read-unsafe, check that no 2059 !usage(curr, this, excl_bit + 1,
2109 * softirq-safe lock in the system ever took it in the past: 2060 state_name(new_bit + 1)))
2110 */
2111 if (!check_usage_backwards(curr, this,
2112 LOCK_USED_IN_SOFTIRQ, "soft"))
2113 return 0; 2061 return 0;
2114#endif
2115 if (softirq_verbose(hlock_class(this)))
2116 ret = 2;
2117 break;
2118 default:
2119 WARN_ON(1);
2120 break;
2121 } 2062 }
2122 2063
2123 return ret; 2064 if (state_verbose(new_bit, hlock_class(this)))
2065 return 2;
2066
2067 return 1;
2124} 2068}
2125 2069
2070enum mark_type {
2071#define LOCKDEP_STATE(__STATE) __STATE,
2072#include "lockdep_states.h"
2073#undef LOCKDEP_STATE
2074};
2075
2126/* 2076/*
2127 * Mark all held locks with a usage bit: 2077 * Mark all held locks with a usage bit:
2128 */ 2078 */
2129static int 2079static int
2130mark_held_locks(struct task_struct *curr, int hardirq) 2080mark_held_locks(struct task_struct *curr, enum mark_type mark)
2131{ 2081{
2132 enum lock_usage_bit usage_bit; 2082 enum lock_usage_bit usage_bit;
2133 struct held_lock *hlock; 2083 struct held_lock *hlock;
@@ -2136,17 +2086,12 @@ mark_held_locks(struct task_struct *curr, int hardirq)
2136 for (i = 0; i < curr->lockdep_depth; i++) { 2086 for (i = 0; i < curr->lockdep_depth; i++) {
2137 hlock = curr->held_locks + i; 2087 hlock = curr->held_locks + i;
2138 2088
2139 if (hardirq) { 2089 usage_bit = 2 + (mark << 2); /* ENABLED */
2140 if (hlock->read) 2090 if (hlock->read)
2141 usage_bit = LOCK_ENABLED_HARDIRQS_READ; 2091 usage_bit += 1; /* READ */
2142 else 2092
2143 usage_bit = LOCK_ENABLED_HARDIRQS; 2093 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
2144 } else { 2094
2145 if (hlock->read)
2146 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
2147 else
2148 usage_bit = LOCK_ENABLED_SOFTIRQS;
2149 }
2150 if (!mark_lock(curr, hlock, usage_bit)) 2095 if (!mark_lock(curr, hlock, usage_bit))
2151 return 0; 2096 return 0;
2152 } 2097 }
@@ -2200,7 +2145,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2200 * We are going to turn hardirqs on, so set the 2145 * We are going to turn hardirqs on, so set the
2201 * usage bit for all held locks: 2146 * usage bit for all held locks:
2202 */ 2147 */
2203 if (!mark_held_locks(curr, 1)) 2148 if (!mark_held_locks(curr, HARDIRQ))
2204 return; 2149 return;
2205 /* 2150 /*
2206 * If we have softirqs enabled, then set the usage 2151 * If we have softirqs enabled, then set the usage
@@ -2208,7 +2153,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2208 * this bit from being set before) 2153 * this bit from being set before)
2209 */ 2154 */
2210 if (curr->softirqs_enabled) 2155 if (curr->softirqs_enabled)
2211 if (!mark_held_locks(curr, 0)) 2156 if (!mark_held_locks(curr, SOFTIRQ))
2212 return; 2157 return;
2213 2158
2214 curr->hardirq_enable_ip = ip; 2159 curr->hardirq_enable_ip = ip;
@@ -2288,7 +2233,7 @@ void trace_softirqs_on(unsigned long ip)
2288 * enabled too: 2233 * enabled too:
2289 */ 2234 */
2290 if (curr->hardirqs_enabled) 2235 if (curr->hardirqs_enabled)
2291 mark_held_locks(curr, 0); 2236 mark_held_locks(curr, SOFTIRQ);
2292} 2237}
2293 2238
2294/* 2239/*
@@ -2317,6 +2262,31 @@ void trace_softirqs_off(unsigned long ip)
2317 debug_atomic_inc(&redundant_softirqs_off); 2262 debug_atomic_inc(&redundant_softirqs_off);
2318} 2263}
2319 2264
2265void lockdep_trace_alloc(gfp_t gfp_mask)
2266{
2267 struct task_struct *curr = current;
2268
2269 if (unlikely(!debug_locks))
2270 return;
2271
2272 /* no reclaim without waiting on it */
2273 if (!(gfp_mask & __GFP_WAIT))
2274 return;
2275
2276 /* this guy won't enter reclaim */
2277 if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
2278 return;
2279
2280 /* We're only interested __GFP_FS allocations for now */
2281 if (!(gfp_mask & __GFP_FS))
2282 return;
2283
2284 if (DEBUG_LOCKS_WARN_ON(irqs_disabled()))
2285 return;
2286
2287 mark_held_locks(curr, RECLAIM_FS);
2288}
2289
2320static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) 2290static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2321{ 2291{
2322 /* 2292 /*
@@ -2345,19 +2315,35 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2345 if (!hlock->hardirqs_off) { 2315 if (!hlock->hardirqs_off) {
2346 if (hlock->read) { 2316 if (hlock->read) {
2347 if (!mark_lock(curr, hlock, 2317 if (!mark_lock(curr, hlock,
2348 LOCK_ENABLED_HARDIRQS_READ)) 2318 LOCK_ENABLED_HARDIRQ_READ))
2349 return 0; 2319 return 0;
2350 if (curr->softirqs_enabled) 2320 if (curr->softirqs_enabled)
2351 if (!mark_lock(curr, hlock, 2321 if (!mark_lock(curr, hlock,
2352 LOCK_ENABLED_SOFTIRQS_READ)) 2322 LOCK_ENABLED_SOFTIRQ_READ))
2353 return 0; 2323 return 0;
2354 } else { 2324 } else {
2355 if (!mark_lock(curr, hlock, 2325 if (!mark_lock(curr, hlock,
2356 LOCK_ENABLED_HARDIRQS)) 2326 LOCK_ENABLED_HARDIRQ))
2357 return 0; 2327 return 0;
2358 if (curr->softirqs_enabled) 2328 if (curr->softirqs_enabled)
2359 if (!mark_lock(curr, hlock, 2329 if (!mark_lock(curr, hlock,
2360 LOCK_ENABLED_SOFTIRQS)) 2330 LOCK_ENABLED_SOFTIRQ))
2331 return 0;
2332 }
2333 }
2334
2335 /*
2336 * We reuse the irq context infrastructure more broadly as a general
2337 * context checking code. This tests GFP_FS recursion (a lock taken
2338 * during reclaim for a GFP_FS allocation is held over a GFP_FS
2339 * allocation).
2340 */
2341 if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
2342 if (hlock->read) {
2343 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
2344 return 0;
2345 } else {
2346 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
2361 return 0; 2347 return 0;
2362 } 2348 }
2363 } 2349 }
@@ -2412,6 +2398,10 @@ static inline int separate_irq_context(struct task_struct *curr,
2412 return 0; 2398 return 0;
2413} 2399}
2414 2400
2401void lockdep_trace_alloc(gfp_t gfp_mask)
2402{
2403}
2404
2415#endif 2405#endif
2416 2406
2417/* 2407/*
@@ -2445,14 +2435,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2445 return 0; 2435 return 0;
2446 2436
2447 switch (new_bit) { 2437 switch (new_bit) {
2448 case LOCK_USED_IN_HARDIRQ: 2438#define LOCKDEP_STATE(__STATE) \
2449 case LOCK_USED_IN_SOFTIRQ: 2439 case LOCK_USED_IN_##__STATE: \
2450 case LOCK_USED_IN_HARDIRQ_READ: 2440 case LOCK_USED_IN_##__STATE##_READ: \
2451 case LOCK_USED_IN_SOFTIRQ_READ: 2441 case LOCK_ENABLED_##__STATE: \
2452 case LOCK_ENABLED_HARDIRQS: 2442 case LOCK_ENABLED_##__STATE##_READ:
2453 case LOCK_ENABLED_SOFTIRQS: 2443#include "lockdep_states.h"
2454 case LOCK_ENABLED_HARDIRQS_READ: 2444#undef LOCKDEP_STATE
2455 case LOCK_ENABLED_SOFTIRQS_READ:
2456 ret = mark_lock_irq(curr, this, new_bit); 2445 ret = mark_lock_irq(curr, this, new_bit);
2457 if (!ret) 2446 if (!ret)
2458 return 0; 2447 return 0;
@@ -2925,6 +2914,8 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
2925} 2914}
2926EXPORT_SYMBOL_GPL(lock_set_class); 2915EXPORT_SYMBOL_GPL(lock_set_class);
2927 2916
2917DEFINE_TRACE(lock_acquire);
2918
2928/* 2919/*
2929 * We are not always called with irqs disabled - do that here, 2920 * We are not always called with irqs disabled - do that here,
2930 * and also avoid lockdep recursion: 2921 * and also avoid lockdep recursion:
@@ -2935,6 +2926,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2935{ 2926{
2936 unsigned long flags; 2927 unsigned long flags;
2937 2928
2929 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
2930
2938 if (unlikely(current->lockdep_recursion)) 2931 if (unlikely(current->lockdep_recursion))
2939 return; 2932 return;
2940 2933
@@ -2949,11 +2942,15 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2949} 2942}
2950EXPORT_SYMBOL_GPL(lock_acquire); 2943EXPORT_SYMBOL_GPL(lock_acquire);
2951 2944
2945DEFINE_TRACE(lock_release);
2946
2952void lock_release(struct lockdep_map *lock, int nested, 2947void lock_release(struct lockdep_map *lock, int nested,
2953 unsigned long ip) 2948 unsigned long ip)
2954{ 2949{
2955 unsigned long flags; 2950 unsigned long flags;
2956 2951
2952 trace_lock_release(lock, nested, ip);
2953
2957 if (unlikely(current->lockdep_recursion)) 2954 if (unlikely(current->lockdep_recursion))
2958 return; 2955 return;
2959 2956
@@ -2966,6 +2963,16 @@ void lock_release(struct lockdep_map *lock, int nested,
2966} 2963}
2967EXPORT_SYMBOL_GPL(lock_release); 2964EXPORT_SYMBOL_GPL(lock_release);
2968 2965
2966void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
2967{
2968 current->lockdep_reclaim_gfp = gfp_mask;
2969}
2970
2971void lockdep_clear_current_reclaim_state(void)
2972{
2973 current->lockdep_reclaim_gfp = 0;
2974}
2975
2969#ifdef CONFIG_LOCK_STAT 2976#ifdef CONFIG_LOCK_STAT
2970static int 2977static int
2971print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, 2978print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
@@ -3092,10 +3099,14 @@ found_it:
3092 lock->ip = ip; 3099 lock->ip = ip;
3093} 3100}
3094 3101
3102DEFINE_TRACE(lock_contended);
3103
3095void lock_contended(struct lockdep_map *lock, unsigned long ip) 3104void lock_contended(struct lockdep_map *lock, unsigned long ip)
3096{ 3105{
3097 unsigned long flags; 3106 unsigned long flags;
3098 3107
3108 trace_lock_contended(lock, ip);
3109
3099 if (unlikely(!lock_stat)) 3110 if (unlikely(!lock_stat))
3100 return; 3111 return;
3101 3112
@@ -3111,10 +3122,14 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3111} 3122}
3112EXPORT_SYMBOL_GPL(lock_contended); 3123EXPORT_SYMBOL_GPL(lock_contended);
3113 3124
3125DEFINE_TRACE(lock_acquired);
3126
3114void lock_acquired(struct lockdep_map *lock, unsigned long ip) 3127void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3115{ 3128{
3116 unsigned long flags; 3129 unsigned long flags;
3117 3130
3131 trace_lock_acquired(lock, ip);
3132
3118 if (unlikely(!lock_stat)) 3133 if (unlikely(!lock_stat))
3119 return; 3134 return;
3120 3135
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 56b196932c08..a2cc7e9a6e84 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -7,6 +7,45 @@
7 */ 7 */
8 8
9/* 9/*
10 * Lock-class usage-state bits:
11 */
12enum lock_usage_bit {
13#define LOCKDEP_STATE(__STATE) \
14 LOCK_USED_IN_##__STATE, \
15 LOCK_USED_IN_##__STATE##_READ, \
16 LOCK_ENABLED_##__STATE, \
17 LOCK_ENABLED_##__STATE##_READ,
18#include "lockdep_states.h"
19#undef LOCKDEP_STATE
20 LOCK_USED,
21 LOCK_USAGE_STATES
22};
23
24/*
25 * Usage-state bitmasks:
26 */
27#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE),
28
29enum {
30#define LOCKDEP_STATE(__STATE) \
31 __LOCKF(USED_IN_##__STATE) \
32 __LOCKF(USED_IN_##__STATE##_READ) \
33 __LOCKF(ENABLED_##__STATE) \
34 __LOCKF(ENABLED_##__STATE##_READ)
35#include "lockdep_states.h"
36#undef LOCKDEP_STATE
37 __LOCKF(USED)
38};
39
40#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
41#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
42
43#define LOCKF_ENABLED_IRQ_READ \
44 (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
45#define LOCKF_USED_IN_IRQ_READ \
46 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
47
48/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies 49 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track. 50 * we track.
12 * 51 *
@@ -31,8 +70,10 @@
31extern struct list_head all_lock_classes; 70extern struct list_head all_lock_classes;
32extern struct lock_chain lock_chains[]; 71extern struct lock_chain lock_chains[];
33 72
34extern void 73#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); 74
75extern void get_usage_chars(struct lock_class *class,
76 char usage[LOCK_USAGE_CHARS]);
36 77
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); 78extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38 79
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 13716b813896..d7135aa2d2c4 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -84,7 +84,7 @@ static int l_show(struct seq_file *m, void *v)
84{ 84{
85 struct lock_class *class = v; 85 struct lock_class *class = v;
86 struct lock_list *entry; 86 struct lock_list *entry;
87 char c1, c2, c3, c4; 87 char usage[LOCK_USAGE_CHARS];
88 88
89 if (v == SEQ_START_TOKEN) { 89 if (v == SEQ_START_TOKEN) {
90 seq_printf(m, "all lock classes:\n"); 90 seq_printf(m, "all lock classes:\n");
@@ -100,8 +100,8 @@ static int l_show(struct seq_file *m, void *v)
100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); 100 seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
101#endif 101#endif
102 102
103 get_usage_chars(class, &c1, &c2, &c3, &c4); 103 get_usage_chars(class, usage);
104 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); 104 seq_printf(m, " %s", usage);
105 105
106 seq_printf(m, ": "); 106 seq_printf(m, ": ");
107 print_name(m, class); 107 print_name(m, class);
@@ -300,27 +300,27 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
300 nr_uncategorized++; 300 nr_uncategorized++;
301 if (class->usage_mask & LOCKF_USED_IN_IRQ) 301 if (class->usage_mask & LOCKF_USED_IN_IRQ)
302 nr_irq_safe++; 302 nr_irq_safe++;
303 if (class->usage_mask & LOCKF_ENABLED_IRQS) 303 if (class->usage_mask & LOCKF_ENABLED_IRQ)
304 nr_irq_unsafe++; 304 nr_irq_unsafe++;
305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) 305 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
306 nr_softirq_safe++; 306 nr_softirq_safe++;
307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) 307 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
308 nr_softirq_unsafe++; 308 nr_softirq_unsafe++;
309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) 309 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
310 nr_hardirq_safe++; 310 nr_hardirq_safe++;
311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) 311 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
312 nr_hardirq_unsafe++; 312 nr_hardirq_unsafe++;
313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) 313 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
314 nr_irq_read_safe++; 314 nr_irq_read_safe++;
315 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) 315 if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
316 nr_irq_read_unsafe++; 316 nr_irq_read_unsafe++;
317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) 317 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
318 nr_softirq_read_safe++; 318 nr_softirq_read_safe++;
319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) 319 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
320 nr_softirq_read_unsafe++; 320 nr_softirq_read_unsafe++;
321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) 321 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
322 nr_hardirq_read_safe++; 322 nr_hardirq_read_safe++;
323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) 323 if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
324 nr_hardirq_read_unsafe++; 324 nr_hardirq_read_unsafe++;
325 325
326#ifdef CONFIG_PROVE_LOCKING 326#ifdef CONFIG_PROVE_LOCKING
@@ -601,6 +601,10 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
601static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
602{ 602{
603 seq_printf(m, "lock_stat version 0.3\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
604
605 if (unlikely(!debug_locks))
606 seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
607
604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 608 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 609 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
606 "%14s %14s\n", 610 "%14s %14s\n",
diff --git a/kernel/lockdep_states.h b/kernel/lockdep_states.h
new file mode 100644
index 000000000000..995b0cc2b84c
--- /dev/null
+++ b/kernel/lockdep_states.h
@@ -0,0 +1,9 @@
1/*
2 * Lockdep states,
3 *
4 * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
5 * you add one, or come up with a nice dynamic solution.
6 */
7LOCKDEP_STATE(HARDIRQ)
8LOCKDEP_STATE(SOFTIRQ)
9LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160eb532..50d022e5a560 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -26,11 +26,6 @@
26/* 26/*
27 * Must be called with lock->wait_lock held. 27 * Must be called with lock->wait_lock held.
28 */ 28 */
29void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
30{
31 lock->owner = new_owner;
32}
33
34void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) 29void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
35{ 30{
36 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 31 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
@@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
59 54
60 /* Mark the current thread as blocked on the lock: */ 55 /* Mark the current thread as blocked on the lock: */
61 ti->task->blocked_on = waiter; 56 ti->task->blocked_on = waiter;
62 waiter->lock = lock;
63} 57}
64 58
65void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 59void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
@@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lock)
82 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 76 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 77 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
84 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 78 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
85 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 79 mutex_clear_owner(lock);
86} 80}
87 81
88void debug_mutex_init(struct mutex *lock, const char *name, 82void debug_mutex_init(struct mutex *lock, const char *name,
@@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock, const char *name,
95 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 89 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
96 lockdep_init_map(&lock->dep_map, name, key, 0); 90 lockdep_init_map(&lock->dep_map, name, key, 0);
97#endif 91#endif
98 lock->owner = NULL;
99 lock->magic = lock; 92 lock->magic = lock;
100} 93}
101 94
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index babfbdfc534b..6b2d735846a5 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -13,14 +13,6 @@
13/* 13/*
14 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
15 */ 15 */
16extern void
17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
18
19static inline void debug_mutex_clear_owner(struct mutex *lock)
20{
21 lock->owner = NULL;
22}
23
24extern void debug_mutex_lock_common(struct mutex *lock, 16extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter); 17 struct mutex_waiter *waiter);
26extern void debug_mutex_wake_waiter(struct mutex *lock, 18extern void debug_mutex_wake_waiter(struct mutex *lock,
@@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mutex *lock);
35extern void debug_mutex_init(struct mutex *lock, const char *name, 27extern void debug_mutex_init(struct mutex *lock, const char *name,
36 struct lock_class_key *key); 28 struct lock_class_key *key);
37 29
30static inline void mutex_set_owner(struct mutex *lock)
31{
32 lock->owner = current_thread_info();
33}
34
35static inline void mutex_clear_owner(struct mutex *lock)
36{
37 lock->owner = NULL;
38}
39
38#define spin_lock_mutex(lock, flags) \ 40#define spin_lock_mutex(lock, flags) \
39 do { \ 41 do { \
40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 42 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4f45d4b658ef..5d79781394a3 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -10,6 +10,11 @@
10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and 10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
11 * David Howells for suggestions and improvements. 11 * David Howells for suggestions and improvements.
12 * 12 *
13 * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
14 * from the -rt tree, where it was originally implemented for rtmutexes
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich.
17 *
13 * Also see Documentation/mutex-design.txt. 18 * Also see Documentation/mutex-design.txt.
14 */ 19 */
15#include <linux/mutex.h> 20#include <linux/mutex.h>
@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
46 atomic_set(&lock->count, 1); 51 atomic_set(&lock->count, 1);
47 spin_lock_init(&lock->wait_lock); 52 spin_lock_init(&lock->wait_lock);
48 INIT_LIST_HEAD(&lock->wait_list); 53 INIT_LIST_HEAD(&lock->wait_list);
54 mutex_clear_owner(lock);
49 55
50 debug_mutex_init(lock, name, key); 56 debug_mutex_init(lock, name, key);
51} 57}
@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock)
91 * 'unlocked' into 'locked' state. 97 * 'unlocked' into 'locked' state.
92 */ 98 */
93 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); 99 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
100 mutex_set_owner(lock);
94} 101}
95 102
96EXPORT_SYMBOL(mutex_lock); 103EXPORT_SYMBOL(mutex_lock);
@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock)
115 * The unlocking fastpath is the 0->1 transition from 'locked' 122 * The unlocking fastpath is the 0->1 transition from 'locked'
116 * into 'unlocked' state: 123 * into 'unlocked' state:
117 */ 124 */
125#ifndef CONFIG_DEBUG_MUTEXES
126 /*
127 * When debugging is enabled we must not clear the owner before time,
128 * the slow path will always be taken, and that clears the owner field
129 * after verifying that it was indeed current.
130 */
131 mutex_clear_owner(lock);
132#endif
118 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); 133 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
119} 134}
120 135
@@ -129,21 +144,75 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
129{ 144{
130 struct task_struct *task = current; 145 struct task_struct *task = current;
131 struct mutex_waiter waiter; 146 struct mutex_waiter waiter;
132 unsigned int old_val;
133 unsigned long flags; 147 unsigned long flags;
134 148
149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
152 /*
153 * Optimistic spinning.
154 *
155 * We try to spin for acquisition when we find that there are no
156 * pending waiters and the lock owner is currently running on a
157 * (different) CPU.
158 *
159 * The rationale is that if the lock owner is running, it is likely to
160 * release the lock soon.
161 *
162 * Since this needs the lock owner, and this mutex implementation
163 * doesn't track the owner atomically in the lock field, we need to
164 * track it non-atomically.
165 *
166 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
167 * to serialize everything.
168 */
169
170 for (;;) {
171 struct thread_info *owner;
172
173 /*
174 * If there's an owner, wait for it to either
175 * release the lock or go to sleep.
176 */
177 owner = ACCESS_ONCE(lock->owner);
178 if (owner && !mutex_spin_on_owner(lock, owner))
179 break;
180
181 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
182 lock_acquired(&lock->dep_map, ip);
183 mutex_set_owner(lock);
184 preempt_enable();
185 return 0;
186 }
187
188 /*
189 * When there's no owner, we might have preempted between the
190 * owner acquiring the lock and setting the owner field. If
191 * we're an RT task that will live-lock because we won't let
192 * the owner complete.
193 */
194 if (!owner && (need_resched() || rt_task(task)))
195 break;
196
197 /*
198 * The cpu_relax() call is a compiler barrier which forces
199 * everything in this loop to be re-loaded. We don't need
200 * memory barriers as we'll eventually observe the right
201 * values at the cost of a few extra spins.
202 */
203 cpu_relax();
204 }
205#endif
135 spin_lock_mutex(&lock->wait_lock, flags); 206 spin_lock_mutex(&lock->wait_lock, flags);
136 207
137 debug_mutex_lock_common(lock, &waiter); 208 debug_mutex_lock_common(lock, &waiter);
138 mutex_acquire(&lock->dep_map, subclass, 0, ip);
139 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); 209 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
140 210
141 /* add waiting tasks to the end of the waitqueue (FIFO): */ 211 /* add waiting tasks to the end of the waitqueue (FIFO): */
142 list_add_tail(&waiter.list, &lock->wait_list); 212 list_add_tail(&waiter.list, &lock->wait_list);
143 waiter.task = task; 213 waiter.task = task;
144 214
145 old_val = atomic_xchg(&lock->count, -1); 215 if (atomic_xchg(&lock->count, -1) == 1)
146 if (old_val == 1)
147 goto done; 216 goto done;
148 217
149 lock_contended(&lock->dep_map, ip); 218 lock_contended(&lock->dep_map, ip);
@@ -158,8 +227,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
158 * that when we release the lock, we properly wake up the 227 * that when we release the lock, we properly wake up the
159 * other waiters: 228 * other waiters:
160 */ 229 */
161 old_val = atomic_xchg(&lock->count, -1); 230 if (atomic_xchg(&lock->count, -1) == 1)
162 if (old_val == 1)
163 break; 231 break;
164 232
165 /* 233 /*
@@ -173,21 +241,22 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
173 spin_unlock_mutex(&lock->wait_lock, flags); 241 spin_unlock_mutex(&lock->wait_lock, flags);
174 242
175 debug_mutex_free_waiter(&waiter); 243 debug_mutex_free_waiter(&waiter);
244 preempt_enable();
176 return -EINTR; 245 return -EINTR;
177 } 246 }
178 __set_task_state(task, state); 247 __set_task_state(task, state);
179 248
180 /* didnt get the lock, go to sleep: */ 249 /* didnt get the lock, go to sleep: */
181 spin_unlock_mutex(&lock->wait_lock, flags); 250 spin_unlock_mutex(&lock->wait_lock, flags);
182 schedule(); 251 __schedule();
183 spin_lock_mutex(&lock->wait_lock, flags); 252 spin_lock_mutex(&lock->wait_lock, flags);
184 } 253 }
185 254
186done: 255done:
187 lock_acquired(&lock->dep_map, ip); 256 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 257 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 258 mutex_remove_waiter(lock, &waiter, current_thread_info());
190 debug_mutex_set_owner(lock, task_thread_info(task)); 259 mutex_set_owner(lock);
191 260
192 /* set it to 0 if there are no waiters left: */ 261 /* set it to 0 if there are no waiters left: */
193 if (likely(list_empty(&lock->wait_list))) 262 if (likely(list_empty(&lock->wait_list)))
@@ -196,6 +265,7 @@ done:
196 spin_unlock_mutex(&lock->wait_lock, flags); 265 spin_unlock_mutex(&lock->wait_lock, flags);
197 266
198 debug_mutex_free_waiter(&waiter); 267 debug_mutex_free_waiter(&waiter);
268 preempt_enable();
199 269
200 return 0; 270 return 0;
201} 271}
@@ -222,7 +292,8 @@ int __sched
222mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) 292mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
223{ 293{
224 might_sleep(); 294 might_sleep();
225 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); 295 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
296 subclass, _RET_IP_);
226} 297}
227 298
228EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 299EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -260,8 +331,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
260 wake_up_process(waiter->task); 331 wake_up_process(waiter->task);
261 } 332 }
262 333
263 debug_mutex_clear_owner(lock);
264
265 spin_unlock_mutex(&lock->wait_lock, flags); 334 spin_unlock_mutex(&lock->wait_lock, flags);
266} 335}
267 336
@@ -298,18 +367,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
298 */ 367 */
299int __sched mutex_lock_interruptible(struct mutex *lock) 368int __sched mutex_lock_interruptible(struct mutex *lock)
300{ 369{
370 int ret;
371
301 might_sleep(); 372 might_sleep();
302 return __mutex_fastpath_lock_retval 373 ret = __mutex_fastpath_lock_retval
303 (&lock->count, __mutex_lock_interruptible_slowpath); 374 (&lock->count, __mutex_lock_interruptible_slowpath);
375 if (!ret)
376 mutex_set_owner(lock);
377
378 return ret;
304} 379}
305 380
306EXPORT_SYMBOL(mutex_lock_interruptible); 381EXPORT_SYMBOL(mutex_lock_interruptible);
307 382
308int __sched mutex_lock_killable(struct mutex *lock) 383int __sched mutex_lock_killable(struct mutex *lock)
309{ 384{
385 int ret;
386
310 might_sleep(); 387 might_sleep();
311 return __mutex_fastpath_lock_retval 388 ret = __mutex_fastpath_lock_retval
312 (&lock->count, __mutex_lock_killable_slowpath); 389 (&lock->count, __mutex_lock_killable_slowpath);
390 if (!ret)
391 mutex_set_owner(lock);
392
393 return ret;
313} 394}
314EXPORT_SYMBOL(mutex_lock_killable); 395EXPORT_SYMBOL(mutex_lock_killable);
315 396
@@ -352,9 +433,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
352 433
353 prev = atomic_xchg(&lock->count, -1); 434 prev = atomic_xchg(&lock->count, -1);
354 if (likely(prev == 1)) { 435 if (likely(prev == 1)) {
355 debug_mutex_set_owner(lock, current_thread_info()); 436 mutex_set_owner(lock);
356 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 437 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
357 } 438 }
439
358 /* Set it back to 0 if there are no waiters: */ 440 /* Set it back to 0 if there are no waiters: */
359 if (likely(list_empty(&lock->wait_list))) 441 if (likely(list_empty(&lock->wait_list)))
360 atomic_set(&lock->count, 0); 442 atomic_set(&lock->count, 0);
@@ -380,8 +462,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
380 */ 462 */
381int __sched mutex_trylock(struct mutex *lock) 463int __sched mutex_trylock(struct mutex *lock)
382{ 464{
383 return __mutex_fastpath_trylock(&lock->count, 465 int ret;
384 __mutex_trylock_slowpath); 466
467 ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
468 if (ret)
469 mutex_set_owner(lock);
470
471 return ret;
385} 472}
386 473
387EXPORT_SYMBOL(mutex_trylock); 474EXPORT_SYMBOL(mutex_trylock);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index a075dafbb290..67578ca48f94 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,8 +16,26 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#ifdef CONFIG_SMP
20#define debug_mutex_clear_owner(lock) do { } while (0) 20static inline void mutex_set_owner(struct mutex *lock)
21{
22 lock->owner = current_thread_info();
23}
24
25static inline void mutex_clear_owner(struct mutex *lock)
26{
27 lock->owner = NULL;
28}
29#else
30static inline void mutex_set_owner(struct mutex *lock)
31{
32}
33
34static inline void mutex_clear_owner(struct mutex *lock)
35{
36}
37#endif
38
21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 39#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
22#define debug_mutex_free_waiter(waiter) do { } while (0) 40#define debug_mutex_free_waiter(waiter) do { } while (0)
23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) 41#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
diff --git a/kernel/sched.c b/kernel/sched.c
index 328f9c7448a5..e1f676e20119 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4543,15 +4543,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
4543/* 4543/*
4544 * schedule() is the main scheduler function. 4544 * schedule() is the main scheduler function.
4545 */ 4545 */
4546asmlinkage void __sched schedule(void) 4546asmlinkage void __sched __schedule(void)
4547{ 4547{
4548 struct task_struct *prev, *next; 4548 struct task_struct *prev, *next;
4549 unsigned long *switch_count; 4549 unsigned long *switch_count;
4550 struct rq *rq; 4550 struct rq *rq;
4551 int cpu; 4551 int cpu;
4552 4552
4553need_resched:
4554 preempt_disable();
4555 cpu = smp_processor_id(); 4553 cpu = smp_processor_id();
4556 rq = cpu_rq(cpu); 4554 rq = cpu_rq(cpu);
4557 rcu_qsctr_inc(cpu); 4555 rcu_qsctr_inc(cpu);
@@ -4608,13 +4606,80 @@ need_resched_nonpreemptible:
4608 4606
4609 if (unlikely(reacquire_kernel_lock(current) < 0)) 4607 if (unlikely(reacquire_kernel_lock(current) < 0))
4610 goto need_resched_nonpreemptible; 4608 goto need_resched_nonpreemptible;
4609}
4611 4610
4611asmlinkage void __sched schedule(void)
4612{
4613need_resched:
4614 preempt_disable();
4615 __schedule();
4612 preempt_enable_no_resched(); 4616 preempt_enable_no_resched();
4613 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 4617 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
4614 goto need_resched; 4618 goto need_resched;
4615} 4619}
4616EXPORT_SYMBOL(schedule); 4620EXPORT_SYMBOL(schedule);
4617 4621
4622#ifdef CONFIG_SMP
4623/*
4624 * Look out! "owner" is an entirely speculative pointer
4625 * access and not reliable.
4626 */
4627int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4628{
4629 unsigned int cpu;
4630 struct rq *rq;
4631
4632 if (!sched_feat(OWNER_SPIN))
4633 return 0;
4634
4635#ifdef CONFIG_DEBUG_PAGEALLOC
4636 /*
4637 * Need to access the cpu field knowing that
4638 * DEBUG_PAGEALLOC could have unmapped it if
4639 * the mutex owner just released it and exited.
4640 */
4641 if (probe_kernel_address(&owner->cpu, cpu))
4642 goto out;
4643#else
4644 cpu = owner->cpu;
4645#endif
4646
4647 /*
4648 * Even if the access succeeded (likely case),
4649 * the cpu field may no longer be valid.
4650 */
4651 if (cpu >= nr_cpumask_bits)
4652 goto out;
4653
4654 /*
4655 * We need to validate that we can do a
4656 * get_cpu() and that we have the percpu area.
4657 */
4658 if (!cpu_online(cpu))
4659 goto out;
4660
4661 rq = cpu_rq(cpu);
4662
4663 for (;;) {
4664 /*
4665 * Owner changed, break to re-assess state.
4666 */
4667 if (lock->owner != owner)
4668 break;
4669
4670 /*
4671 * Is that owner really running on that cpu?
4672 */
4673 if (task_thread_info(rq->curr) != owner || need_resched())
4674 return 0;
4675
4676 cpu_relax();
4677 }
4678out:
4679 return 1;
4680}
4681#endif
4682
4618#ifdef CONFIG_PREEMPT 4683#ifdef CONFIG_PREEMPT
4619/* 4684/*
4620 * this is the entry point to schedule() from in-kernel preemption 4685 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index da5d93b5d2c6..07bc02e99ab1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -13,3 +13,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1) 15SCHED_FEAT(LAST_BUDDY, 1)
16SCHED_FEAT(OWNER_SPIN, 1)
diff --git a/kernel/timer.c b/kernel/timer.c
index 13dd64fe143d..ef1c385bc572 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -491,14 +491,18 @@ static inline void debug_timer_free(struct timer_list *timer)
491 debug_object_free(timer, &timer_debug_descr); 491 debug_object_free(timer, &timer_debug_descr);
492} 492}
493 493
494static void __init_timer(struct timer_list *timer); 494static void __init_timer(struct timer_list *timer,
495 const char *name,
496 struct lock_class_key *key);
495 497
496void init_timer_on_stack(struct timer_list *timer) 498void init_timer_on_stack_key(struct timer_list *timer,
499 const char *name,
500 struct lock_class_key *key)
497{ 501{
498 debug_object_init_on_stack(timer, &timer_debug_descr); 502 debug_object_init_on_stack(timer, &timer_debug_descr);
499 __init_timer(timer); 503 __init_timer(timer, name, key);
500} 504}
501EXPORT_SYMBOL_GPL(init_timer_on_stack); 505EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
502 506
503void destroy_timer_on_stack(struct timer_list *timer) 507void destroy_timer_on_stack(struct timer_list *timer)
504{ 508{
@@ -512,7 +516,9 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
512static inline void debug_timer_deactivate(struct timer_list *timer) { } 516static inline void debug_timer_deactivate(struct timer_list *timer) { }
513#endif 517#endif
514 518
515static void __init_timer(struct timer_list *timer) 519static void __init_timer(struct timer_list *timer,
520 const char *name,
521 struct lock_class_key *key)
516{ 522{
517 timer->entry.next = NULL; 523 timer->entry.next = NULL;
518 timer->base = __raw_get_cpu_var(tvec_bases); 524 timer->base = __raw_get_cpu_var(tvec_bases);
@@ -521,6 +527,7 @@ static void __init_timer(struct timer_list *timer)
521 timer->start_pid = -1; 527 timer->start_pid = -1;
522 memset(timer->start_comm, 0, TASK_COMM_LEN); 528 memset(timer->start_comm, 0, TASK_COMM_LEN);
523#endif 529#endif
530 lockdep_init_map(&timer->lockdep_map, name, key, 0);
524} 531}
525 532
526/** 533/**
@@ -530,19 +537,23 @@ static void __init_timer(struct timer_list *timer)
530 * init_timer() must be done to a timer prior calling *any* of the 537 * init_timer() must be done to a timer prior calling *any* of the
531 * other timer functions. 538 * other timer functions.
532 */ 539 */
533void init_timer(struct timer_list *timer) 540void init_timer_key(struct timer_list *timer,
541 const char *name,
542 struct lock_class_key *key)
534{ 543{
535 debug_timer_init(timer); 544 debug_timer_init(timer);
536 __init_timer(timer); 545 __init_timer(timer, name, key);
537} 546}
538EXPORT_SYMBOL(init_timer); 547EXPORT_SYMBOL(init_timer_key);
539 548
540void init_timer_deferrable(struct timer_list *timer) 549void init_timer_deferrable_key(struct timer_list *timer,
550 const char *name,
551 struct lock_class_key *key)
541{ 552{
542 init_timer(timer); 553 init_timer_key(timer, name, key);
543 timer_set_deferrable(timer); 554 timer_set_deferrable(timer);
544} 555}
545EXPORT_SYMBOL(init_timer_deferrable); 556EXPORT_SYMBOL(init_timer_deferrable_key);
546 557
547static inline void detach_timer(struct timer_list *timer, 558static inline void detach_timer(struct timer_list *timer,
548 int clear_pending) 559 int clear_pending)
@@ -789,6 +800,15 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
789 */ 800 */
790int del_timer_sync(struct timer_list *timer) 801int del_timer_sync(struct timer_list *timer)
791{ 802{
803#ifdef CONFIG_LOCKDEP
804 unsigned long flags;
805
806 local_irq_save(flags);
807 lock_map_acquire(&timer->lockdep_map);
808 lock_map_release(&timer->lockdep_map);
809 local_irq_restore(flags);
810#endif
811
792 for (;;) { 812 for (;;) {
793 int ret = try_to_del_timer_sync(timer); 813 int ret = try_to_del_timer_sync(timer);
794 if (ret >= 0) 814 if (ret >= 0)
@@ -861,10 +881,36 @@ static inline void __run_timers(struct tvec_base *base)
861 881
862 set_running_timer(base, timer); 882 set_running_timer(base, timer);
863 detach_timer(timer, 1); 883 detach_timer(timer, 1);
884
864 spin_unlock_irq(&base->lock); 885 spin_unlock_irq(&base->lock);
865 { 886 {
866 int preempt_count = preempt_count(); 887 int preempt_count = preempt_count();
888
889#ifdef CONFIG_LOCKDEP
890 /*
891 * It is permissible to free the timer from
892 * inside the function that is called from
893 * it, this we need to take into account for
894 * lockdep too. To avoid bogus "held lock
895 * freed" warnings as well as problems when
896 * looking into timer->lockdep_map, make a
897 * copy and use that here.
898 */
899 struct lockdep_map lockdep_map =
900 timer->lockdep_map;
901#endif
902 /*
903 * Couple the lock chain with the lock chain at
904 * del_timer_sync() by acquiring the lock_map
905 * around the fn() call here and in
906 * del_timer_sync().
907 */
908 lock_map_acquire(&lockdep_map);
909
867 fn(data); 910 fn(data);
911
912 lock_map_release(&lockdep_map);
913
868 if (preempt_count != preempt_count()) { 914 if (preempt_count != preempt_count()) {
869 printk(KERN_ERR "huh, entered %p " 915 printk(KERN_ERR "huh, entered %p "
870 "with preempt_count %08x, exited" 916 "with preempt_count %08x, exited"
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a8c275c01e83..f2a163db52f9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -61,6 +61,8 @@ enum {
61 61
62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; 62static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
63 63
64#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
65
64/** 66/**
65 * tracing_on - enable all tracing buffers 67 * tracing_on - enable all tracing buffers
66 * 68 *
@@ -132,7 +134,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
132} 134}
133EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 135EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
134 136
135#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 137#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
136#define RB_ALIGNMENT 4U 138#define RB_ALIGNMENT 4U
137#define RB_MAX_SMALL_DATA 28 139#define RB_MAX_SMALL_DATA 28
138 140
@@ -234,6 +236,18 @@ static void rb_init_page(struct buffer_data_page *bpage)
234 local_set(&bpage->commit, 0); 236 local_set(&bpage->commit, 0);
235} 237}
236 238
239/**
240 * ring_buffer_page_len - the size of data on the page.
241 * @page: The page to read
242 *
243 * Returns the amount of data on the page, including buffer page header.
244 */
245size_t ring_buffer_page_len(void *page)
246{
247 return local_read(&((struct buffer_data_page *)page)->commit)
248 + BUF_PAGE_HDR_SIZE;
249}
250
237/* 251/*
238 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 252 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
239 * this issue out. 253 * this issue out.
@@ -254,7 +268,7 @@ static inline int test_time_stamp(u64 delta)
254 return 0; 268 return 0;
255} 269}
256 270
257#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) 271#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
258 272
259/* 273/*
260 * head_page == tail_page && head == tail then buffer is empty. 274 * head_page == tail_page && head == tail then buffer is empty.
@@ -2378,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2378 */ 2392 */
2379void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 2393void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2380{ 2394{
2381 unsigned long addr;
2382 struct buffer_data_page *bpage; 2395 struct buffer_data_page *bpage;
2396 unsigned long addr;
2383 2397
2384 addr = __get_free_page(GFP_KERNEL); 2398 addr = __get_free_page(GFP_KERNEL);
2385 if (!addr) 2399 if (!addr)
@@ -2387,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2387 2401
2388 bpage = (void *)addr; 2402 bpage = (void *)addr;
2389 2403
2404 rb_init_page(bpage);
2405
2390 return bpage; 2406 return bpage;
2391} 2407}
2392 2408
@@ -2406,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2406 * ring_buffer_read_page - extract a page from the ring buffer 2422 * ring_buffer_read_page - extract a page from the ring buffer
2407 * @buffer: buffer to extract from 2423 * @buffer: buffer to extract from
2408 * @data_page: the page to use allocated from ring_buffer_alloc_read_page 2424 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2425 * @len: amount to extract
2409 * @cpu: the cpu of the buffer to extract 2426 * @cpu: the cpu of the buffer to extract
2410 * @full: should the extraction only happen when the page is full. 2427 * @full: should the extraction only happen when the page is full.
2411 * 2428 *
@@ -2418,7 +2435,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2418 * rpage = ring_buffer_alloc_read_page(buffer); 2435 * rpage = ring_buffer_alloc_read_page(buffer);
2419 * if (!rpage) 2436 * if (!rpage)
2420 * return error; 2437 * return error;
2421 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); 2438 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
2422 * if (ret >= 0) 2439 * if (ret >= 0)
2423 * process_page(rpage, ret); 2440 * process_page(rpage, ret);
2424 * 2441 *
@@ -2435,70 +2452,103 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2435 * <0 if no data has been transferred. 2452 * <0 if no data has been transferred.
2436 */ 2453 */
2437int ring_buffer_read_page(struct ring_buffer *buffer, 2454int ring_buffer_read_page(struct ring_buffer *buffer,
2438 void **data_page, int cpu, int full) 2455 void **data_page, size_t len, int cpu, int full)
2439{ 2456{
2440 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 2457 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2441 struct ring_buffer_event *event; 2458 struct ring_buffer_event *event;
2442 struct buffer_data_page *bpage; 2459 struct buffer_data_page *bpage;
2460 struct buffer_page *reader;
2443 unsigned long flags; 2461 unsigned long flags;
2462 unsigned int commit;
2444 unsigned int read; 2463 unsigned int read;
2445 int ret = -1; 2464 int ret = -1;
2446 2465
2466 /*
2467 * If len is not big enough to hold the page header, then
2468 * we can not copy anything.
2469 */
2470 if (len <= BUF_PAGE_HDR_SIZE)
2471 return -1;
2472
2473 len -= BUF_PAGE_HDR_SIZE;
2474
2447 if (!data_page) 2475 if (!data_page)
2448 return 0; 2476 return -1;
2449 2477
2450 bpage = *data_page; 2478 bpage = *data_page;
2451 if (!bpage) 2479 if (!bpage)
2452 return 0; 2480 return -1;
2453 2481
2454 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2482 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2455 2483
2456 /* 2484 reader = rb_get_reader_page(cpu_buffer);
2457 * rb_buffer_peek will get the next ring buffer if 2485 if (!reader)
2458 * the current reader page is empty.
2459 */
2460 event = rb_buffer_peek(buffer, cpu, NULL);
2461 if (!event)
2462 goto out; 2486 goto out;
2463 2487
2464 /* check for data */ 2488 event = rb_reader_event(cpu_buffer);
2465 if (!local_read(&cpu_buffer->reader_page->page->commit)) 2489
2466 goto out; 2490 read = reader->read;
2491 commit = rb_page_commit(reader);
2467 2492
2468 read = cpu_buffer->reader_page->read;
2469 /* 2493 /*
2470 * If the writer is already off of the read page, then simply 2494 * If this page has been partially read or
2471 * switch the read page with the given page. Otherwise 2495 * if len is not big enough to read the rest of the page or
2472 * we need to copy the data from the reader to the writer. 2496 * a writer is still on the page, then
2497 * we must copy the data from the page to the buffer.
2498 * Otherwise, we can simply swap the page with the one passed in.
2473 */ 2499 */
2474 if (cpu_buffer->reader_page == cpu_buffer->commit_page) { 2500 if (read || (len < (commit - read)) ||
2475 unsigned int commit = rb_page_commit(cpu_buffer->reader_page); 2501 cpu_buffer->reader_page == cpu_buffer->commit_page) {
2476 struct buffer_data_page *rpage = cpu_buffer->reader_page->page; 2502 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
2503 unsigned int rpos = read;
2504 unsigned int pos = 0;
2505 unsigned int size;
2477 2506
2478 if (full) 2507 if (full)
2479 goto out; 2508 goto out;
2480 /* The writer is still on the reader page, we must copy */
2481 memcpy(bpage->data + read, rpage->data + read, commit - read);
2482 2509
2483 /* consume what was read */ 2510 if (len > (commit - read))
2484 cpu_buffer->reader_page->read = commit; 2511 len = (commit - read);
2512
2513 size = rb_event_length(event);
2514
2515 if (len < size)
2516 goto out;
2517
2518 /* Need to copy one event at a time */
2519 do {
2520 memcpy(bpage->data + pos, rpage->data + rpos, size);
2521
2522 len -= size;
2523
2524 rb_advance_reader(cpu_buffer);
2525 rpos = reader->read;
2526 pos += size;
2527
2528 event = rb_reader_event(cpu_buffer);
2529 size = rb_event_length(event);
2530 } while (len > size);
2485 2531
2486 /* update bpage */ 2532 /* update bpage */
2487 local_set(&bpage->commit, commit); 2533 local_set(&bpage->commit, pos);
2488 if (!read) 2534 bpage->time_stamp = rpage->time_stamp;
2489 bpage->time_stamp = rpage->time_stamp; 2535
2536 /* we copied everything to the beginning */
2537 read = 0;
2490 } else { 2538 } else {
2491 /* swap the pages */ 2539 /* swap the pages */
2492 rb_init_page(bpage); 2540 rb_init_page(bpage);
2493 bpage = cpu_buffer->reader_page->page; 2541 bpage = reader->page;
2494 cpu_buffer->reader_page->page = *data_page; 2542 reader->page = *data_page;
2495 cpu_buffer->reader_page->read = 0; 2543 local_set(&reader->write, 0);
2544 reader->read = 0;
2496 *data_page = bpage; 2545 *data_page = bpage;
2546
2547 /* update the entry counter */
2548 rb_remove_entries(cpu_buffer, bpage, read);
2497 } 2549 }
2498 ret = read; 2550 ret = read;
2499 2551
2500 /* update the entry counter */
2501 rb_remove_entries(cpu_buffer, bpage, read);
2502 out: 2552 out:
2503 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2553 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2504 2554
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ea055aa21cd9..c8abbb0c8397 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -11,31 +11,30 @@
11 * Copyright (C) 2004-2006 Ingo Molnar 11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III 12 * Copyright (C) 2004 William Lee Irwin III
13 */ 13 */
14#include <linux/ring_buffer.h>
14#include <linux/utsrelease.h> 15#include <linux/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
15#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 19#include <linux/seq_file.h>
17#include <linux/notifier.h> 20#include <linux/notifier.h>
21#include <linux/irqflags.h>
18#include <linux/debugfs.h> 22#include <linux/debugfs.h>
19#include <linux/pagemap.h> 23#include <linux/pagemap.h>
20#include <linux/hardirq.h> 24#include <linux/hardirq.h>
21#include <linux/linkage.h> 25#include <linux/linkage.h>
22#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kprobes.h>
23#include <linux/ftrace.h> 28#include <linux/ftrace.h>
24#include <linux/module.h> 29#include <linux/module.h>
25#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h>
26#include <linux/kdebug.h> 32#include <linux/kdebug.h>
27#include <linux/ctype.h> 33#include <linux/ctype.h>
28#include <linux/init.h> 34#include <linux/init.h>
29#include <linux/poll.h> 35#include <linux/poll.h>
30#include <linux/gfp.h> 36#include <linux/gfp.h>
31#include <linux/fs.h> 37#include <linux/fs.h>
32#include <linux/kprobes.h>
33#include <linux/writeback.h>
34#include <linux/splice.h>
35
36#include <linux/stacktrace.h>
37#include <linux/ring_buffer.h>
38#include <linux/irqflags.h>
39 38
40#include "trace.h" 39#include "trace.h"
41#include "trace_output.h" 40#include "trace_output.h"
@@ -624,7 +623,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
624static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 623static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
625static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 624static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
626static int cmdline_idx; 625static int cmdline_idx;
627static DEFINE_SPINLOCK(trace_cmdline_lock); 626static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
628 627
629/* temporary disable recording */ 628/* temporary disable recording */
630static atomic_t trace_record_cmdline_disabled __read_mostly; 629static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -736,7 +735,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
736 * nor do we want to disable interrupts, 735 * nor do we want to disable interrupts,
737 * so if we miss here, then better luck next time. 736 * so if we miss here, then better luck next time.
738 */ 737 */
739 if (!spin_trylock(&trace_cmdline_lock)) 738 if (!__raw_spin_trylock(&trace_cmdline_lock))
740 return; 739 return;
741 740
742 idx = map_pid_to_cmdline[tsk->pid]; 741 idx = map_pid_to_cmdline[tsk->pid];
@@ -754,7 +753,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
754 753
755 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 754 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
756 755
757 spin_unlock(&trace_cmdline_lock); 756 __raw_spin_unlock(&trace_cmdline_lock);
758} 757}
759 758
760char *trace_find_cmdline(int pid) 759char *trace_find_cmdline(int pid)
@@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = {
3005 .write = tracing_mark_write, 3004 .write = tracing_mark_write,
3006}; 3005};
3007 3006
3007struct ftrace_buffer_info {
3008 struct trace_array *tr;
3009 void *spare;
3010 int cpu;
3011 unsigned int read;
3012};
3013
3014static int tracing_buffers_open(struct inode *inode, struct file *filp)
3015{
3016 int cpu = (int)(long)inode->i_private;
3017 struct ftrace_buffer_info *info;
3018
3019 if (tracing_disabled)
3020 return -ENODEV;
3021
3022 info = kzalloc(sizeof(*info), GFP_KERNEL);
3023 if (!info)
3024 return -ENOMEM;
3025
3026 info->tr = &global_trace;
3027 info->cpu = cpu;
3028 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3029 /* Force reading ring buffer for first read */
3030 info->read = (unsigned int)-1;
3031 if (!info->spare)
3032 goto out;
3033
3034 filp->private_data = info;
3035
3036 return 0;
3037
3038 out:
3039 kfree(info);
3040 return -ENOMEM;
3041}
3042
3043static ssize_t
3044tracing_buffers_read(struct file *filp, char __user *ubuf,
3045 size_t count, loff_t *ppos)
3046{
3047 struct ftrace_buffer_info *info = filp->private_data;
3048 unsigned int pos;
3049 ssize_t ret;
3050 size_t size;
3051
3052 /* Do we have previous read data to read? */
3053 if (info->read < PAGE_SIZE)
3054 goto read;
3055
3056 info->read = 0;
3057
3058 ret = ring_buffer_read_page(info->tr->buffer,
3059 &info->spare,
3060 count,
3061 info->cpu, 0);
3062 if (ret < 0)
3063 return 0;
3064
3065 pos = ring_buffer_page_len(info->spare);
3066
3067 if (pos < PAGE_SIZE)
3068 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3069
3070read:
3071 size = PAGE_SIZE - info->read;
3072 if (size > count)
3073 size = count;
3074
3075 ret = copy_to_user(ubuf, info->spare + info->read, size);
3076 if (ret)
3077 return -EFAULT;
3078 *ppos += size;
3079 info->read += size;
3080
3081 return size;
3082}
3083
3084static int tracing_buffers_release(struct inode *inode, struct file *file)
3085{
3086 struct ftrace_buffer_info *info = file->private_data;
3087
3088 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3089 kfree(info);
3090
3091 return 0;
3092}
3093
3094struct buffer_ref {
3095 struct ring_buffer *buffer;
3096 void *page;
3097 int ref;
3098};
3099
3100static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3101 struct pipe_buffer *buf)
3102{
3103 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3104
3105 if (--ref->ref)
3106 return;
3107
3108 ring_buffer_free_read_page(ref->buffer, ref->page);
3109 kfree(ref);
3110 buf->private = 0;
3111}
3112
3113static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3114 struct pipe_buffer *buf)
3115{
3116 return 1;
3117}
3118
3119static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3120 struct pipe_buffer *buf)
3121{
3122 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3123
3124 ref->ref++;
3125}
3126
3127/* Pipe buffer operations for a buffer. */
3128static struct pipe_buf_operations buffer_pipe_buf_ops = {
3129 .can_merge = 0,
3130 .map = generic_pipe_buf_map,
3131 .unmap = generic_pipe_buf_unmap,
3132 .confirm = generic_pipe_buf_confirm,
3133 .release = buffer_pipe_buf_release,
3134 .steal = buffer_pipe_buf_steal,
3135 .get = buffer_pipe_buf_get,
3136};
3137
3138/*
3139 * Callback from splice_to_pipe(), if we need to release some pages
3140 * at the end of the spd in case we error'ed out in filling the pipe.
3141 */
3142static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3143{
3144 struct buffer_ref *ref =
3145 (struct buffer_ref *)spd->partial[i].private;
3146
3147 if (--ref->ref)
3148 return;
3149
3150 ring_buffer_free_read_page(ref->buffer, ref->page);
3151 kfree(ref);
3152 spd->partial[i].private = 0;
3153}
3154
3155static ssize_t
3156tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3157 struct pipe_inode_info *pipe, size_t len,
3158 unsigned int flags)
3159{
3160 struct ftrace_buffer_info *info = file->private_data;
3161 struct partial_page partial[PIPE_BUFFERS];
3162 struct page *pages[PIPE_BUFFERS];
3163 struct splice_pipe_desc spd = {
3164 .pages = pages,
3165 .partial = partial,
3166 .flags = flags,
3167 .ops = &buffer_pipe_buf_ops,
3168 .spd_release = buffer_spd_release,
3169 };
3170 struct buffer_ref *ref;
3171 int size, i;
3172 size_t ret;
3173
3174 /*
3175 * We can't seek on a buffer input
3176 */
3177 if (unlikely(*ppos))
3178 return -ESPIPE;
3179
3180
3181 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
3182 struct page *page;
3183 int r;
3184
3185 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3186 if (!ref)
3187 break;
3188
3189 ref->buffer = info->tr->buffer;
3190 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3191 if (!ref->page) {
3192 kfree(ref);
3193 break;
3194 }
3195
3196 r = ring_buffer_read_page(ref->buffer, &ref->page,
3197 len, info->cpu, 0);
3198 if (r < 0) {
3199 ring_buffer_free_read_page(ref->buffer,
3200 ref->page);
3201 kfree(ref);
3202 break;
3203 }
3204
3205 /*
3206 * zero out any left over data, this is going to
3207 * user land.
3208 */
3209 size = ring_buffer_page_len(ref->page);
3210 if (size < PAGE_SIZE)
3211 memset(ref->page + size, 0, PAGE_SIZE - size);
3212
3213 page = virt_to_page(ref->page);
3214
3215 spd.pages[i] = page;
3216 spd.partial[i].len = PAGE_SIZE;
3217 spd.partial[i].offset = 0;
3218 spd.partial[i].private = (unsigned long)ref;
3219 spd.nr_pages++;
3220 }
3221
3222 spd.nr_pages = i;
3223
3224 /* did we read anything? */
3225 if (!spd.nr_pages) {
3226 if (flags & SPLICE_F_NONBLOCK)
3227 ret = -EAGAIN;
3228 else
3229 ret = 0;
3230 /* TODO: block */
3231 return ret;
3232 }
3233
3234 ret = splice_to_pipe(pipe, &spd);
3235
3236 return ret;
3237}
3238
3239static const struct file_operations tracing_buffers_fops = {
3240 .open = tracing_buffers_open,
3241 .read = tracing_buffers_read,
3242 .release = tracing_buffers_release,
3243 .splice_read = tracing_buffers_splice_read,
3244 .llseek = no_llseek,
3245};
3246
3008#ifdef CONFIG_DYNAMIC_FTRACE 3247#ifdef CONFIG_DYNAMIC_FTRACE
3009 3248
3010int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3249int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void)
3399static __init int tracer_init_debugfs(void) 3638static __init int tracer_init_debugfs(void)
3400{ 3639{
3401 struct dentry *d_tracer; 3640 struct dentry *d_tracer;
3641 struct dentry *buffers;
3402 struct dentry *entry; 3642 struct dentry *entry;
3403 int cpu; 3643 int cpu;
3404 3644
@@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void)
3471 pr_warning("Could not create debugfs " 3711 pr_warning("Could not create debugfs "
3472 "'trace_marker' entry\n"); 3712 "'trace_marker' entry\n");
3473 3713
3714 buffers = debugfs_create_dir("binary_buffers", d_tracer);
3715
3716 if (!buffers)
3717 pr_warning("Could not create buffers directory\n");
3718 else {
3719 int cpu;
3720 char buf[64];
3721
3722 for_each_tracing_cpu(cpu) {
3723 sprintf(buf, "%d", cpu);
3724
3725 entry = debugfs_create_file(buf, 0444, buffers,
3726 (void *)(long)cpu,
3727 &tracing_buffers_fops);
3728 if (!entry)
3729 pr_warning("Could not create debugfs buffers "
3730 "'%s' entry\n", buf);
3731 }
3732 }
3733
3474#ifdef CONFIG_DYNAMIC_FTRACE 3734#ifdef CONFIG_DYNAMIC_FTRACE
3475 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3735 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3476 &ftrace_update_tot_cnt, 3736 &ftrace_update_tot_cnt,
@@ -3491,7 +3751,7 @@ static __init int tracer_init_debugfs(void)
3491 3751
3492int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) 3752int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3493{ 3753{
3494 static DEFINE_SPINLOCK(trace_buf_lock); 3754 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
3495 static char trace_buf[TRACE_BUF_SIZE]; 3755 static char trace_buf[TRACE_BUF_SIZE];
3496 3756
3497 struct ring_buffer_event *event; 3757 struct ring_buffer_event *event;
@@ -3513,7 +3773,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3513 goto out; 3773 goto out;
3514 3774
3515 pause_graph_tracing(); 3775 pause_graph_tracing();
3516 spin_lock_irqsave(&trace_buf_lock, irq_flags); 3776 raw_local_irq_save(irq_flags);
3777 __raw_spin_lock(&trace_buf_lock);
3517 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 3778 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3518 3779
3519 len = min(len, TRACE_BUF_SIZE-1); 3780 len = min(len, TRACE_BUF_SIZE-1);
@@ -3532,7 +3793,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3532 ring_buffer_unlock_commit(tr->buffer, event); 3793 ring_buffer_unlock_commit(tr->buffer, event);
3533 3794
3534 out_unlock: 3795 out_unlock:
3535 spin_unlock_irqrestore(&trace_buf_lock, irq_flags); 3796 __raw_spin_unlock(&trace_buf_lock);
3797 raw_local_irq_restore(irq_flags);
3536 unpause_graph_tracing(); 3798 unpause_graph_tracing();
3537 out: 3799 out:
3538 preempt_enable_notrace(); 3800 preempt_enable_notrace();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e606633fb498..561bb5c5d988 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -217,6 +217,7 @@ enum trace_flag_type {
217 */ 217 */
218struct trace_array_cpu { 218struct trace_array_cpu {
219 atomic_t disabled; 219 atomic_t disabled;
220 void *buffer_page; /* ring buffer spare */
220 221
221 /* these fields get copied into max-trace: */ 222 /* these fields get copied into max-trace: */
222 unsigned long trace_idx; 223 unsigned long trace_idx;
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 041789ffbac1..2c8d76c7dbed 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * static void ftrace_event_<call>(proto) 6 * static void ftrace_event_<call>(proto)
7 * { 7 * {
8 * event_trace_printk(_RET_IP_, "(<call>) " <fmt>); 8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * } 9 * }
10 * 10 *
11 * static int ftrace_reg_event_<call>(void) 11 * static int ftrace_reg_event_<call>(void)
@@ -112,7 +112,7 @@
112#define _TRACE_FORMAT(call, proto, args, fmt) \ 112#define _TRACE_FORMAT(call, proto, args, fmt) \
113static void ftrace_event_##call(proto) \ 113static void ftrace_event_##call(proto) \
114{ \ 114{ \
115 event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ 115 event_trace_printk(_RET_IP_, #call ": " fmt); \
116} \ 116} \
117 \ 117 \
118static int ftrace_reg_event_##call(void) \ 118static int ftrace_reg_event_##call(void) \