aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2010-11-16 04:09:42 -0500
committerJens Axboe <jaxboe@fusionio.com>2010-11-16 04:09:42 -0500
commita02056349cdea2252cd2b21643ebf025e83a29f2 (patch)
treeb7c889d6cbe8e7188d07d99a5c9da858c53a5b6c /kernel/trace
parent34db1d595ef6f183fbc1e42cda45a3dfa0035258 (diff)
parente53beacd23d9cb47590da6a7a7f6d417b941a994 (diff)
Merge branch 'v2.6.37-rc2' into for-2.6.38/core
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig7
-rw-r--r--kernel/trace/blktrace.c20
-rw-r--r--kernel/trace/ftrace.c129
-rw-r--r--kernel/trace/ring_buffer.c357
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_event_perf.c28
-rw-r--r--kernel/trace/trace_events.c61
-rw-r--r--kernel/trace/trace_functions_graph.c209
-rw-r--r--kernel/trace/trace_irqsoff.c152
-rw-r--r--kernel/trace/trace_kdb.c1
-rw-r--r--kernel/trace/trace_kprobe.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c256
-rw-r--r--kernel/trace/trace_stack.c1
-rw-r--r--kernel/trace/trace_workqueue.c10
15 files changed, 825 insertions, 423 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c6ea50..e04b8bcdef88 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
49 help 49 help
50 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
51 51
52config HAVE_C_RECORDMCOUNT
53 bool
54 help
55 C version of recordmcount available?
56
52config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
53 bool 58 bool
54 59
@@ -121,7 +126,7 @@ if FTRACE
121config FUNCTION_TRACER 126config FUNCTION_TRACER
122 bool "Kernel Function Tracer" 127 bool "Kernel Function Tracer"
123 depends on HAVE_FUNCTION_TRACER 128 depends on HAVE_FUNCTION_TRACER
124 select FRAME_POINTER 129 select FRAME_POINTER if (!ARM_UNWIND)
125 select KALLSYMS 130 select KALLSYMS
126 select GENERIC_TRACER 131 select GENERIC_TRACER
127 select CONTEXT_SWITCH_TRACER 132 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 959f8d6c8cc1..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,7 +23,6 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/smp_lock.h>
27#include <linux/time.h> 26#include <linux/time.h>
28#include <linux/uaccess.h> 27#include <linux/uaccess.h>
29 28
@@ -169,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
169static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
170 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
171 170
172#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
173#define BLK_TC_RAHEAD BLK_TC_AHEAD 171#define BLK_TC_RAHEAD BLK_TC_AHEAD
174 172
175/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
@@ -197,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
197 return; 195 return;
198 196
199 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
200 what |= MASK_TC_BIT(rw, HARDBARRIER);
201 what |= MASK_TC_BIT(rw, SYNC); 198 what |= MASK_TC_BIT(rw, SYNC);
202 what |= MASK_TC_BIT(rw, RAHEAD); 199 what |= MASK_TC_BIT(rw, RAHEAD);
203 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
@@ -326,6 +323,7 @@ static const struct file_operations blk_dropped_fops = {
326 .owner = THIS_MODULE, 323 .owner = THIS_MODULE,
327 .open = blk_dropped_open, 324 .open = blk_dropped_open,
328 .read = blk_dropped_read, 325 .read = blk_dropped_read,
326 .llseek = default_llseek,
329}; 327};
330 328
331static int blk_msg_open(struct inode *inode, struct file *filp) 329static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -365,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
365 .owner = THIS_MODULE, 363 .owner = THIS_MODULE,
366 .open = blk_msg_open, 364 .open = blk_msg_open,
367 .write = blk_msg_write, 365 .write = blk_msg_write,
366 .llseek = noop_llseek,
368}; 367};
369 368
370/* 369/*
@@ -639,7 +638,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
639 if (!q) 638 if (!q)
640 return -ENXIO; 639 return -ENXIO;
641 640
642 lock_kernel();
643 mutex_lock(&bdev->bd_mutex); 641 mutex_lock(&bdev->bd_mutex);
644 642
645 switch (cmd) { 643 switch (cmd) {
@@ -667,7 +665,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
667 } 665 }
668 666
669 mutex_unlock(&bdev->bd_mutex); 667 mutex_unlock(&bdev->bd_mutex);
670 unlock_kernel();
671 return ret; 668 return ret;
672} 669}
673 670
@@ -1652,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1652 struct block_device *bdev; 1649 struct block_device *bdev;
1653 ssize_t ret = -ENXIO; 1650 ssize_t ret = -ENXIO;
1654 1651
1655 lock_kernel();
1656 bdev = bdget(part_devt(p)); 1652 bdev = bdget(part_devt(p));
1657 if (bdev == NULL) 1653 if (bdev == NULL)
1658 goto out_unlock_kernel; 1654 goto out;
1659 1655
1660 q = blk_trace_get_queue(bdev); 1656 q = blk_trace_get_queue(bdev);
1661 if (q == NULL) 1657 if (q == NULL)
@@ -1683,8 +1679,7 @@ out_unlock_bdev:
1683 mutex_unlock(&bdev->bd_mutex); 1679 mutex_unlock(&bdev->bd_mutex);
1684out_bdput: 1680out_bdput:
1685 bdput(bdev); 1681 bdput(bdev);
1686out_unlock_kernel: 1682out:
1687 unlock_kernel();
1688 return ret; 1683 return ret;
1689} 1684}
1690 1685
@@ -1714,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1714 1709
1715 ret = -ENXIO; 1710 ret = -ENXIO;
1716 1711
1717 lock_kernel();
1718 p = dev_to_part(dev); 1712 p = dev_to_part(dev);
1719 bdev = bdget(part_devt(p)); 1713 bdev = bdget(part_devt(p));
1720 if (bdev == NULL) 1714 if (bdev == NULL)
1721 goto out_unlock_kernel; 1715 goto out;
1722 1716
1723 q = blk_trace_get_queue(bdev); 1717 q = blk_trace_get_queue(bdev);
1724 if (q == NULL) 1718 if (q == NULL)
@@ -1753,8 +1747,6 @@ out_unlock_bdev:
1753 mutex_unlock(&bdev->bd_mutex); 1747 mutex_unlock(&bdev->bd_mutex);
1754out_bdput: 1748out_bdput:
1755 bdput(bdev); 1749 bdput(bdev);
1756out_unlock_kernel:
1757 unlock_kernel();
1758out: 1750out:
1759 return ret ? ret : count; 1751 return ret ? ret : count;
1760} 1752}
@@ -1813,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1813 1805
1814 if (rw & REQ_RAHEAD) 1806 if (rw & REQ_RAHEAD)
1815 rwbs[i++] = 'A'; 1807 rwbs[i++] = 'A';
1816 if (rw & REQ_HARDBARRIER)
1817 rwbs[i++] = 'B';
1818 if (rw & REQ_SYNC) 1808 if (rw & REQ_SYNC)
1819 rwbs[i++] = 'S'; 1809 rwbs[i++] = 'S';
1820 if (rw & REQ_META) 1810 if (rw & REQ_META)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -800,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
800 .open = tracing_open_generic, 800 .open = tracing_open_generic,
801 .read = ftrace_profile_read, 801 .read = ftrace_profile_read,
802 .write = ftrace_profile_write, 802 .write = ftrace_profile_write,
803 .llseek = default_llseek,
803}; 804};
804 805
805/* used to initialize the real stat files */ 806/* used to initialize the real stat files */
@@ -884,10 +885,8 @@ enum {
884 FTRACE_ENABLE_CALLS = (1 << 0), 885 FTRACE_ENABLE_CALLS = (1 << 0),
885 FTRACE_DISABLE_CALLS = (1 << 1), 886 FTRACE_DISABLE_CALLS = (1 << 1),
886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 887 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
887 FTRACE_ENABLE_MCOUNT = (1 << 3), 888 FTRACE_START_FUNC_RET = (1 << 3),
888 FTRACE_DISABLE_MCOUNT = (1 << 4), 889 FTRACE_STOP_FUNC_RET = (1 << 4),
889 FTRACE_START_FUNC_RET = (1 << 5),
890 FTRACE_STOP_FUNC_RET = (1 << 6),
891}; 890};
892 891
893static int ftrace_filtered; 892static int ftrace_filtered;
@@ -1226,8 +1225,6 @@ static void ftrace_shutdown(int command)
1226 1225
1227static void ftrace_startup_sysctl(void) 1226static void ftrace_startup_sysctl(void)
1228{ 1227{
1229 int command = FTRACE_ENABLE_MCOUNT;
1230
1231 if (unlikely(ftrace_disabled)) 1228 if (unlikely(ftrace_disabled))
1232 return; 1229 return;
1233 1230
@@ -1235,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
1235 saved_ftrace_func = NULL; 1232 saved_ftrace_func = NULL;
1236 /* ftrace_start_up is true if we want ftrace running */ 1233 /* ftrace_start_up is true if we want ftrace running */
1237 if (ftrace_start_up) 1234 if (ftrace_start_up)
1238 command |= FTRACE_ENABLE_CALLS; 1235 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1239
1240 ftrace_run_update_code(command);
1241} 1236}
1242 1237
1243static void ftrace_shutdown_sysctl(void) 1238static void ftrace_shutdown_sysctl(void)
1244{ 1239{
1245 int command = FTRACE_DISABLE_MCOUNT;
1246
1247 if (unlikely(ftrace_disabled)) 1240 if (unlikely(ftrace_disabled))
1248 return; 1241 return;
1249 1242
1250 /* ftrace_start_up is true if ftrace is running */ 1243 /* ftrace_start_up is true if ftrace is running */
1251 if (ftrace_start_up) 1244 if (ftrace_start_up)
1252 command |= FTRACE_DISABLE_CALLS; 1245 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1253
1254 ftrace_run_update_code(command);
1255} 1246}
1256 1247
1257static cycle_t ftrace_update_time; 1248static cycle_t ftrace_update_time;
@@ -1368,24 +1359,29 @@ enum {
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1359#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1360
1370struct ftrace_iterator { 1361struct ftrace_iterator {
1371 struct ftrace_page *pg; 1362 loff_t pos;
1372 int hidx; 1363 loff_t func_pos;
1373 int idx; 1364 struct ftrace_page *pg;
1374 unsigned flags; 1365 struct dyn_ftrace *func;
1375 struct trace_parser parser; 1366 struct ftrace_func_probe *probe;
1367 struct trace_parser parser;
1368 int hidx;
1369 int idx;
1370 unsigned flags;
1376}; 1371};
1377 1372
1378static void * 1373static void *
1379t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1374t_hash_next(struct seq_file *m, loff_t *pos)
1380{ 1375{
1381 struct ftrace_iterator *iter = m->private; 1376 struct ftrace_iterator *iter = m->private;
1382 struct hlist_node *hnd = v; 1377 struct hlist_node *hnd = NULL;
1383 struct hlist_head *hhd; 1378 struct hlist_head *hhd;
1384 1379
1385 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1386
1387 (*pos)++; 1380 (*pos)++;
1381 iter->pos = *pos;
1388 1382
1383 if (iter->probe)
1384 hnd = &iter->probe->node;
1389 retry: 1385 retry:
1390 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1386 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1391 return NULL; 1387 return NULL;
@@ -1408,7 +1404,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1408 } 1404 }
1409 } 1405 }
1410 1406
1411 return hnd; 1407 if (WARN_ON_ONCE(!hnd))
1408 return NULL;
1409
1410 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1411
1412 return iter;
1412} 1413}
1413 1414
1414static void *t_hash_start(struct seq_file *m, loff_t *pos) 1415static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1418,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417 void *p = NULL; 1418 void *p = NULL;
1418 loff_t l; 1419 loff_t l;
1419 1420
1420 if (!(iter->flags & FTRACE_ITER_HASH)) 1421 if (iter->func_pos > *pos)
1421 *pos = 0; 1422 return NULL;
1422
1423 iter->flags |= FTRACE_ITER_HASH;
1424 1423
1425 iter->hidx = 0; 1424 iter->hidx = 0;
1426 for (l = 0; l <= *pos; ) { 1425 for (l = 0; l <= (*pos - iter->func_pos); ) {
1427 p = t_hash_next(m, p, &l); 1426 p = t_hash_next(m, &l);
1428 if (!p) 1427 if (!p)
1429 break; 1428 break;
1430 } 1429 }
1431 return p; 1430 if (!p)
1431 return NULL;
1432
1433 /* Only set this if we have an item */
1434 iter->flags |= FTRACE_ITER_HASH;
1435
1436 return iter;
1432} 1437}
1433 1438
1434static int t_hash_show(struct seq_file *m, void *v) 1439static int
1440t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1435{ 1441{
1436 struct ftrace_func_probe *rec; 1442 struct ftrace_func_probe *rec;
1437 struct hlist_node *hnd = v;
1438 1443
1439 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1444 rec = iter->probe;
1445 if (WARN_ON_ONCE(!rec))
1446 return -EIO;
1440 1447
1441 if (rec->ops->print) 1448 if (rec->ops->print)
1442 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1449 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1457,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1457 struct dyn_ftrace *rec = NULL; 1464 struct dyn_ftrace *rec = NULL;
1458 1465
1459 if (iter->flags & FTRACE_ITER_HASH) 1466 if (iter->flags & FTRACE_ITER_HASH)
1460 return t_hash_next(m, v, pos); 1467 return t_hash_next(m, pos);
1461 1468
1462 (*pos)++; 1469 (*pos)++;
1470 iter->pos = *pos;
1463 1471
1464 if (iter->flags & FTRACE_ITER_PRINTALL) 1472 if (iter->flags & FTRACE_ITER_PRINTALL)
1465 return NULL; 1473 return t_hash_start(m, pos);
1466 1474
1467 retry: 1475 retry:
1468 if (iter->idx >= iter->pg->index) { 1476 if (iter->idx >= iter->pg->index) {
@@ -1491,7 +1499,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1491 } 1499 }
1492 } 1500 }
1493 1501
1494 return rec; 1502 if (!rec)
1503 return t_hash_start(m, pos);
1504
1505 iter->func_pos = *pos;
1506 iter->func = rec;
1507
1508 return iter;
1509}
1510
1511static void reset_iter_read(struct ftrace_iterator *iter)
1512{
1513 iter->pos = 0;
1514 iter->func_pos = 0;
1515 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1495} 1516}
1496 1517
1497static void *t_start(struct seq_file *m, loff_t *pos) 1518static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1502,6 +1523,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1502 1523
1503 mutex_lock(&ftrace_lock); 1524 mutex_lock(&ftrace_lock);
1504 /* 1525 /*
1526 * If an lseek was done, then reset and start from beginning.
1527 */
1528 if (*pos < iter->pos)
1529 reset_iter_read(iter);
1530
1531 /*
1505 * For set_ftrace_filter reading, if we have the filter 1532 * For set_ftrace_filter reading, if we have the filter
1506 * off, we can short cut and just print out that all 1533 * off, we can short cut and just print out that all
1507 * functions are enabled. 1534 * functions are enabled.
@@ -1518,6 +1545,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 if (iter->flags & FTRACE_ITER_HASH) 1545 if (iter->flags & FTRACE_ITER_HASH)
1519 return t_hash_start(m, pos); 1546 return t_hash_start(m, pos);
1520 1547
1548 /*
1549 * Unfortunately, we need to restart at ftrace_pages_start
1550 * every time we let go of the ftrace_mutex. This is because
1551 * those pointers can change without the lock.
1552 */
1521 iter->pg = ftrace_pages_start; 1553 iter->pg = ftrace_pages_start;
1522 iter->idx = 0; 1554 iter->idx = 0;
1523 for (l = 0; l <= *pos; ) { 1555 for (l = 0; l <= *pos; ) {
@@ -1526,10 +1558,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1526 break; 1558 break;
1527 } 1559 }
1528 1560
1529 if (!p && iter->flags & FTRACE_ITER_FILTER) 1561 if (!p) {
1530 return t_hash_start(m, pos); 1562 if (iter->flags & FTRACE_ITER_FILTER)
1563 return t_hash_start(m, pos);
1564
1565 return NULL;
1566 }
1531 1567
1532 return p; 1568 return iter;
1533} 1569}
1534 1570
1535static void t_stop(struct seq_file *m, void *p) 1571static void t_stop(struct seq_file *m, void *p)
@@ -1540,16 +1576,18 @@ static void t_stop(struct seq_file *m, void *p)
1540static int t_show(struct seq_file *m, void *v) 1576static int t_show(struct seq_file *m, void *v)
1541{ 1577{
1542 struct ftrace_iterator *iter = m->private; 1578 struct ftrace_iterator *iter = m->private;
1543 struct dyn_ftrace *rec = v; 1579 struct dyn_ftrace *rec;
1544 1580
1545 if (iter->flags & FTRACE_ITER_HASH) 1581 if (iter->flags & FTRACE_ITER_HASH)
1546 return t_hash_show(m, v); 1582 return t_hash_show(m, iter);
1547 1583
1548 if (iter->flags & FTRACE_ITER_PRINTALL) { 1584 if (iter->flags & FTRACE_ITER_PRINTALL) {
1549 seq_printf(m, "#### all functions enabled ####\n"); 1585 seq_printf(m, "#### all functions enabled ####\n");
1550 return 0; 1586 return 0;
1551 } 1587 }
1552 1588
1589 rec = iter->func;
1590
1553 if (!rec) 1591 if (!rec)
1554 return 0; 1592 return 0;
1555 1593
@@ -1601,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
1601 1639
1602 ret = ftrace_avail_open(inode, file); 1640 ret = ftrace_avail_open(inode, file);
1603 if (!ret) { 1641 if (!ret) {
1604 m = (struct seq_file *)file->private_data; 1642 m = file->private_data;
1605 iter = (struct ftrace_iterator *)m->private; 1643 iter = m->private;
1606 iter->flags = FTRACE_ITER_FAILURES; 1644 iter->flags = FTRACE_ITER_FAILURES;
1607 } 1645 }
1608 1646
@@ -2418,7 +2456,7 @@ static const struct file_operations ftrace_filter_fops = {
2418 .open = ftrace_filter_open, 2456 .open = ftrace_filter_open,
2419 .read = seq_read, 2457 .read = seq_read,
2420 .write = ftrace_filter_write, 2458 .write = ftrace_filter_write,
2421 .llseek = no_llseek, 2459 .llseek = ftrace_regex_lseek,
2422 .release = ftrace_filter_release, 2460 .release = ftrace_filter_release,
2423}; 2461};
2424 2462
@@ -2632,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = {
2632 .read = seq_read, 2670 .read = seq_read,
2633 .write = ftrace_graph_write, 2671 .write = ftrace_graph_write,
2634 .release = ftrace_graph_release, 2672 .release = ftrace_graph_release,
2673 .llseek = seq_lseek,
2635}; 2674};
2636#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2675#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2637 2676
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bca96377fd4e..9ed509a015d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
225}; 225};
226 226
227#define skip_time_extend(event) \
228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
229
227static inline int rb_null_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
228{ 231{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
249} 252}
250 253
251/* inline for ring buffer fast paths */ 254/*
252static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
254{ 261{
255 switch (event->type_len) { 262 switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 281 return 0;
275} 282}
276 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
277/** 301/**
278 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
280 */ 310 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 312{
283 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 320 return length;
286 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 329static void *
295rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
296{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 336 if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
404/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 441/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
405#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 442#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
406 443
407/* Max number of timestamps that can fit on a page */
408#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
409
410int ring_buffer_print_page_header(struct trace_seq *s) 444int ring_buffer_print_page_header(struct trace_seq *s)
411{ 445{
412 struct buffer_data_page field; 446 struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1580 iter->head = 0;
1547} 1581}
1548 1582
1583/* Slow path, do not inline */
1584static noinline struct ring_buffer_event *
1585rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1586{
1587 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1588
1589 /* Not the first event on the page? */
1590 if (rb_event_index(event)) {
1591 event->time_delta = delta & TS_MASK;
1592 event->array[0] = delta >> TS_SHIFT;
1593 } else {
1594 /* nope, just zero it */
1595 event->time_delta = 0;
1596 event->array[0] = 0;
1597 }
1598
1599 return skip_time_extend(event);
1600}
1601
1549/** 1602/**
1550 * ring_buffer_update_event - update event type and data 1603 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1604 * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1611 * data field.
1559 */ 1612 */
1560static void 1613static void
1561rb_update_event(struct ring_buffer_event *event, 1614rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1615 struct ring_buffer_event *event, unsigned length,
1616 int add_timestamp, u64 delta)
1563{ 1617{
1564 event->type_len = type; 1618 /* Only a commit updates the timestamp */
1565 1619 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1620 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1621
1573 case 0: 1622 /*
1574 length -= RB_EVNT_HDR_SIZE; 1623 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1624 * add it to the start of the resevered space.
1576 event->array[0] = length; 1625 */
1577 else 1626 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1627 event = rb_add_time_stamp(event, delta);
1579 break; 1628 length -= RB_LEN_TIME_EXTEND;
1580 default: 1629 delta = 0;
1581 BUG();
1582 } 1630 }
1631
1632 event->time_delta = delta;
1633 length -= RB_EVNT_HDR_SIZE;
1634 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1635 event->type_len = 0;
1636 event->array[0] = length;
1637 } else
1638 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1639}
1584 1640
1585/* 1641/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1823 local_sub(length, &tail_page->write); 1879 local_sub(length, &tail_page->write);
1824} 1880}
1825 1881
1826static struct ring_buffer_event * 1882/*
1883 * This is the slow path, force gcc not to inline it.
1884 */
1885static noinline struct ring_buffer_event *
1827rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1886rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1828 unsigned long length, unsigned long tail, 1887 unsigned long length, unsigned long tail,
1829 struct buffer_page *tail_page, u64 *ts) 1888 struct buffer_page *tail_page, u64 ts)
1830{ 1889{
1831 struct buffer_page *commit_page = cpu_buffer->commit_page; 1890 struct buffer_page *commit_page = cpu_buffer->commit_page;
1832 struct ring_buffer *buffer = cpu_buffer->buffer; 1891 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1909 * Nested commits always have zero deltas, so 1968 * Nested commits always have zero deltas, so
1910 * just reread the time stamp 1969 * just reread the time stamp
1911 */ 1970 */
1912 *ts = rb_time_stamp(buffer); 1971 ts = rb_time_stamp(buffer);
1913 next_page->page->time_stamp = *ts; 1972 next_page->page->time_stamp = ts;
1914 } 1973 }
1915 1974
1916 out_again: 1975 out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1929 1988
1930static struct ring_buffer_event * 1989static struct ring_buffer_event *
1931__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1990__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1932 unsigned type, unsigned long length, u64 *ts) 1991 unsigned long length, u64 ts,
1992 u64 delta, int add_timestamp)
1933{ 1993{
1934 struct buffer_page *tail_page; 1994 struct buffer_page *tail_page;
1935 struct ring_buffer_event *event; 1995 struct ring_buffer_event *event;
1936 unsigned long tail, write; 1996 unsigned long tail, write;
1937 1997
1998 /*
1999 * If the time delta since the last event is too big to
2000 * hold in the time field of the event, then we append a
2001 * TIME EXTEND event ahead of the data event.
2002 */
2003 if (unlikely(add_timestamp))
2004 length += RB_LEN_TIME_EXTEND;
2005
1938 tail_page = cpu_buffer->tail_page; 2006 tail_page = cpu_buffer->tail_page;
1939 write = local_add_return(length, &tail_page->write); 2007 write = local_add_return(length, &tail_page->write);
1940 2008
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1943 tail = write - length; 2011 tail = write - length;
1944 2012
1945 /* See if we shot pass the end of this buffer page */ 2013 /* See if we shot pass the end of this buffer page */
1946 if (write > BUF_PAGE_SIZE) 2014 if (unlikely(write > BUF_PAGE_SIZE))
1947 return rb_move_tail(cpu_buffer, length, tail, 2015 return rb_move_tail(cpu_buffer, length, tail,
1948 tail_page, ts); 2016 tail_page, ts);
1949 2017
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1951 2019
1952 event = __rb_page_index(tail_page, tail); 2020 event = __rb_page_index(tail_page, tail);
1953 kmemcheck_annotate_bitfield(event, bitfield); 2021 kmemcheck_annotate_bitfield(event, bitfield);
1954 rb_update_event(event, type, length); 2022 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1955 2023
1956 /* The passed in type is zero for DATA */ 2024 local_inc(&tail_page->entries);
1957 if (likely(!type))
1958 local_inc(&tail_page->entries);
1959 2025
1960 /* 2026 /*
1961 * If this is the first commit on the page, then update 2027 * If this is the first commit on the page, then update
1962 * its timestamp. 2028 * its timestamp.
1963 */ 2029 */
1964 if (!tail) 2030 if (!tail)
1965 tail_page->page->time_stamp = *ts; 2031 tail_page->page->time_stamp = ts;
1966 2032
1967 return event; 2033 return event;
1968} 2034}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1977 unsigned long addr; 2043 unsigned long addr;
1978 2044
1979 new_index = rb_event_index(event); 2045 new_index = rb_event_index(event);
1980 old_index = new_index + rb_event_length(event); 2046 old_index = new_index + rb_event_ts_length(event);
1981 addr = (unsigned long)event; 2047 addr = (unsigned long)event;
1982 addr &= PAGE_MASK; 2048 addr &= PAGE_MASK;
1983 2049
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2003 return 0; 2069 return 0;
2004} 2070}
2005 2071
2006static int
2007rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2008 u64 *ts, u64 *delta)
2009{
2010 struct ring_buffer_event *event;
2011 int ret;
2012
2013 WARN_ONCE(*delta > (1ULL << 59),
2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2015 (unsigned long long)*delta,
2016 (unsigned long long)*ts,
2017 (unsigned long long)cpu_buffer->write_stamp);
2018
2019 /*
2020 * The delta is too big, we to add a
2021 * new timestamp.
2022 */
2023 event = __rb_reserve_next(cpu_buffer,
2024 RINGBUF_TYPE_TIME_EXTEND,
2025 RB_LEN_TIME_EXTEND,
2026 ts);
2027 if (!event)
2028 return -EBUSY;
2029
2030 if (PTR_ERR(event) == -EAGAIN)
2031 return -EAGAIN;
2032
2033 /* Only a commited time event can update the write stamp */
2034 if (rb_event_is_commit(cpu_buffer, event)) {
2035 /*
2036 * If this is the first on the page, then it was
2037 * updated with the page itself. Try to discard it
2038 * and if we can't just make it zero.
2039 */
2040 if (rb_event_index(event)) {
2041 event->time_delta = *delta & TS_MASK;
2042 event->array[0] = *delta >> TS_SHIFT;
2043 } else {
2044 /* try to discard, since we do not need this */
2045 if (!rb_try_to_discard(cpu_buffer, event)) {
2046 /* nope, just zero it */
2047 event->time_delta = 0;
2048 event->array[0] = 0;
2049 }
2050 }
2051 cpu_buffer->write_stamp = *ts;
2052 /* let the caller know this was the commit */
2053 ret = 1;
2054 } else {
2055 /* Try to discard the event */
2056 if (!rb_try_to_discard(cpu_buffer, event)) {
2057 /* Darn, this is just wasted space */
2058 event->time_delta = 0;
2059 event->array[0] = 0;
2060 }
2061 ret = 0;
2062 }
2063
2064 *delta = 0;
2065
2066 return ret;
2067}
2068
2069static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2072static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2070{ 2073{
2071 local_inc(&cpu_buffer->committing); 2074 local_inc(&cpu_buffer->committing);
2072 local_inc(&cpu_buffer->commits); 2075 local_inc(&cpu_buffer->commits);
2073} 2076}
2074 2077
2075static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2078static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2076{ 2079{
2077 unsigned long commits; 2080 unsigned long commits;
2078 2081
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2110 unsigned long length) 2113 unsigned long length)
2111{ 2114{
2112 struct ring_buffer_event *event; 2115 struct ring_buffer_event *event;
2113 u64 ts, delta = 0; 2116 u64 ts, delta;
2114 int commit = 0;
2115 int nr_loops = 0; 2117 int nr_loops = 0;
2118 int add_timestamp;
2119 u64 diff;
2116 2120
2117 rb_start_commit(cpu_buffer); 2121 rb_start_commit(cpu_buffer);
2118 2122
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2133 2137
2134 length = rb_calculate_event_length(length); 2138 length = rb_calculate_event_length(length);
2135 again: 2139 again:
2140 add_timestamp = 0;
2141 delta = 0;
2142
2136 /* 2143 /*
2137 * We allow for interrupts to reenter here and do a trace. 2144 * We allow for interrupts to reenter here and do a trace.
2138 * If one does, it will cause this original code to loop 2145 * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2146 goto out_fail; 2153 goto out_fail;
2147 2154
2148 ts = rb_time_stamp(cpu_buffer->buffer); 2155 ts = rb_time_stamp(cpu_buffer->buffer);
2156 diff = ts - cpu_buffer->write_stamp;
2149 2157
2150 /* 2158 /* make sure this diff is calculated here */
2151 * Only the first commit can update the timestamp. 2159 barrier();
2152 * Yes there is a race here. If an interrupt comes in
2153 * just after the conditional and it traces too, then it
2154 * will also check the deltas. More than one timestamp may
2155 * also be made. But only the entry that did the actual
2156 * commit will be something other than zero.
2157 */
2158 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2159 rb_page_write(cpu_buffer->tail_page) ==
2160 rb_commit_index(cpu_buffer))) {
2161 u64 diff;
2162
2163 diff = ts - cpu_buffer->write_stamp;
2164
2165 /* make sure this diff is calculated here */
2166 barrier();
2167
2168 /* Did the write stamp get updated already? */
2169 if (unlikely(ts < cpu_buffer->write_stamp))
2170 goto get_event;
2171 2160
2161 /* Did the write stamp get updated already? */
2162 if (likely(ts >= cpu_buffer->write_stamp)) {
2172 delta = diff; 2163 delta = diff;
2173 if (unlikely(test_time_stamp(delta))) { 2164 if (unlikely(test_time_stamp(delta))) {
2174 2165 WARN_ONCE(delta > (1ULL << 59),
2175 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2176 if (commit == -EBUSY) 2167 (unsigned long long)delta,
2177 goto out_fail; 2168 (unsigned long long)ts,
2178 2169 (unsigned long long)cpu_buffer->write_stamp);
2179 if (commit == -EAGAIN) 2170 add_timestamp = 1;
2180 goto again;
2181
2182 RB_WARN_ON(cpu_buffer, commit < 0);
2183 } 2171 }
2184 } 2172 }
2185 2173
2186 get_event: 2174 event = __rb_reserve_next(cpu_buffer, length, ts,
2187 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2175 delta, add_timestamp);
2188 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2176 if (unlikely(PTR_ERR(event) == -EAGAIN))
2189 goto again; 2177 goto again;
2190 2178
2191 if (!event) 2179 if (!event)
2192 goto out_fail; 2180 goto out_fail;
2193 2181
2194 if (!rb_event_is_commit(cpu_buffer, event))
2195 delta = 0;
2196
2197 event->time_delta = delta;
2198
2199 return event; 2182 return event;
2200 2183
2201 out_fail: 2184 out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2207 2190
2208#define TRACE_RECURSIVE_DEPTH 16 2191#define TRACE_RECURSIVE_DEPTH 16
2209 2192
2210static int trace_recursive_lock(void) 2193/* Keep this code out of the fast path cache */
2194static noinline void trace_recursive_fail(void)
2211{ 2195{
2212 current->trace_recursion++;
2213
2214 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2215 return 0;
2216
2217 /* Disable all tracing before we do anything else */ 2196 /* Disable all tracing before we do anything else */
2218 tracing_off_permanent(); 2197 tracing_off_permanent();
2219 2198
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
2225 in_nmi()); 2204 in_nmi());
2226 2205
2227 WARN_ON_ONCE(1); 2206 WARN_ON_ONCE(1);
2207}
2208
2209static inline int trace_recursive_lock(void)
2210{
2211 current->trace_recursion++;
2212
2213 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2214 return 0;
2215
2216 trace_recursive_fail();
2217
2228 return -1; 2218 return -1;
2229} 2219}
2230 2220
2231static void trace_recursive_unlock(void) 2221static inline void trace_recursive_unlock(void)
2232{ 2222{
2233 WARN_ON_ONCE(!current->trace_recursion); 2223 WARN_ON_ONCE(!current->trace_recursion);
2234 2224
@@ -2308,12 +2298,28 @@ static void
2308rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2298rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2309 struct ring_buffer_event *event) 2299 struct ring_buffer_event *event)
2310{ 2300{
2301 u64 delta;
2302
2311 /* 2303 /*
2312 * The event first in the commit queue updates the 2304 * The event first in the commit queue updates the
2313 * time stamp. 2305 * time stamp.
2314 */ 2306 */
2315 if (rb_event_is_commit(cpu_buffer, event)) 2307 if (rb_event_is_commit(cpu_buffer, event)) {
2316 cpu_buffer->write_stamp += event->time_delta; 2308 /*
2309 * A commit event that is first on a page
2310 * updates the write timestamp with the page stamp
2311 */
2312 if (!rb_event_index(event))
2313 cpu_buffer->write_stamp =
2314 cpu_buffer->commit_page->page->time_stamp;
2315 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2316 delta = event->array[0];
2317 delta <<= TS_SHIFT;
2318 delta += event->time_delta;
2319 cpu_buffer->write_stamp += delta;
2320 } else
2321 cpu_buffer->write_stamp += event->time_delta;
2322 }
2317} 2323}
2318 2324
2319static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2325static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2353 2359
2354static inline void rb_event_discard(struct ring_buffer_event *event) 2360static inline void rb_event_discard(struct ring_buffer_event *event)
2355{ 2361{
2362 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2363 event = skip_time_extend(event);
2364
2356 /* array[0] holds the actual length for the discarded event */ 2365 /* array[0] holds the actual length for the discarded event */
2357 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2366 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2358 event->type_len = RINGBUF_TYPE_PADDING; 2367 event->type_len = RINGBUF_TYPE_PADDING;
@@ -2606,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2615}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2616EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2617
2618/*
2619 * The total entries in the ring buffer is the running counter
2620 * of entries entered into the ring buffer, minus the sum of
2621 * the entries read from the ring buffer and the number of
2622 * entries that were overwritten.
2623 */
2624static inline unsigned long
2625rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2626{
2627 return local_read(&cpu_buffer->entries) -
2628 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2629}
2630
2609/** 2631/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2632 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2633 * @buffer: The ring buffer
@@ -2614,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2636unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2637{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2638 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2639
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2640 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2641 return 0;
2621 2642
2622 cpu_buffer = buffer->buffers[cpu]; 2643 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2644
2626 return ret; 2645 return rb_num_of_entries(cpu_buffer);
2627} 2646}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2647EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2648
@@ -2684,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2703 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2704 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2705 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2706 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2707 }
2690 2708
2691 return entries; 2709 return entries;
@@ -3040,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3040 3058
3041 again: 3059 again:
3042 /* 3060 /*
3043 * We repeat when a timestamp is encountered. It is possible 3061 * We repeat when a time extend is encountered.
3044 * to get multiple timestamps from an interrupt entering just 3062 * Since the time extend is always attached to a data event,
3045 * as one timestamp is about to be written, or from discarded 3063 * we should never loop more than once.
3046 * commits. The most that we can have is the number on a single page. 3064 * (We never hit the following condition more than twice).
3047 */ 3065 */
3048 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3066 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3049 return NULL; 3067 return NULL;
3050 3068
3051 reader = rb_get_reader_page(cpu_buffer); 3069 reader = rb_get_reader_page(cpu_buffer);
@@ -3121,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3121 return NULL; 3139 return NULL;
3122 3140
3123 /* 3141 /*
3124 * We repeat when a timestamp is encountered. 3142 * We repeat when a time extend is encountered.
3125 * We can get multiple timestamps by nested interrupts or also 3143 * Since the time extend is always attached to a data event,
3126 * if filtering is on (discarding commits). Since discarding 3144 * we should never loop more than once.
3127 * commits can be frequent we can get a lot of timestamps. 3145 * (We never hit the following condition more than twice).
3128 * But we limit them by not adding timestamps if they begin
3129 * at the start of a page.
3130 */ 3146 */
3131 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3147 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3132 return NULL; 3148 return NULL;
3133 3149
3134 if (rb_per_cpu_empty(cpu_buffer)) 3150 if (rb_per_cpu_empty(cpu_buffer))
@@ -3826,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3826 if (len > (commit - read)) 3842 if (len > (commit - read))
3827 len = (commit - read); 3843 len = (commit - read);
3828 3844
3829 size = rb_event_length(event); 3845 /* Always keep the time extend and data together */
3846 size = rb_event_ts_length(event);
3830 3847
3831 if (len < size) 3848 if (len < size)
3832 goto out_unlock; 3849 goto out_unlock;
@@ -3848,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3848 break; 3865 break;
3849 3866
3850 event = rb_reader_event(cpu_buffer); 3867 event = rb_reader_event(cpu_buffer);
3851 size = rb_event_length(event); 3868 /* Always keep the time extend and data together */
3869 size = rb_event_ts_length(event);
3852 } while (len > size); 3870 } while (len > size);
3853 3871
3854 /* update bpage */ 3872 /* update bpage */
@@ -3965,6 +3983,7 @@ static const struct file_operations rb_simple_fops = {
3965 .open = tracing_open_generic, 3983 .open = tracing_open_generic,
3966 .read = rb_simple_read, 3984 .read = rb_simple_read,
3967 .write = rb_simple_write, 3985 .write = rb_simple_write,
3986 .llseek = default_llseek,
3968}; 3987};
3969 3988
3970 3989
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..82d9b8106cd0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2196 2196
2197static int tracing_release(struct inode *inode, struct file *file) 2197static int tracing_release(struct inode *inode, struct file *file)
2198{ 2198{
2199 struct seq_file *m = (struct seq_file *)file->private_data; 2199 struct seq_file *m = file->private_data;
2200 struct trace_iterator *iter; 2200 struct trace_iterator *iter;
2201 int cpu; 2201 int cpu;
2202 2202
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3996{ 3996{
3997 struct dentry *d_percpu = tracing_dentry_percpu(); 3997 struct dentry *d_percpu = tracing_dentry_percpu();
3998 struct dentry *d_cpu; 3998 struct dentry *d_cpu;
3999 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 3999 char cpu_dir[30]; /* 30 characters should be more than enough */
4000 char cpu_dir[7];
4001 4000
4002 if (cpu > 999 || cpu < 0) 4001 snprintf(cpu_dir, 30, "cpu%ld", cpu);
4003 return;
4004
4005 sprintf(cpu_dir, "cpu%ld", cpu);
4006 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4002 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4007 if (!d_cpu) { 4003 if (!d_cpu) {
4008 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4004 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d39b3c5454a5..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr,
343 unsigned long ip, 343 unsigned long ip,
344 unsigned long parent_ip, 344 unsigned long parent_ip,
345 unsigned long flags, int pc); 345 unsigned long flags, int pc);
346void trace_graph_function(struct trace_array *tr,
347 unsigned long ip,
348 unsigned long parent_ip,
349 unsigned long flags, int pc);
346void trace_default_header(struct seq_file *m); 350void trace_default_header(struct seq_file *m);
347void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 351void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
348int trace_empty(struct trace_iterator *iter); 352int trace_empty(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12static char *perf_trace_buf[4]; 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 13
14/* 14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses 15 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 24static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 struct hlist_head *list; 27 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 28 int ret = -ENOMEM;
29 int cpu; 29 int cpu;
30 30
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
42 tp_event->perf_events = list; 42 tp_event->perf_events = list;
43 43
44 if (!total_ref_count) { 44 if (!total_ref_count) {
45 char *buf; 45 char __percpu *buf;
46 int i; 46 int i;
47 47
48 for (i = 0; i < 4; i++) { 48 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 buf = (char *)alloc_percpu(perf_trace_t); 49 buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 if (!buf) 50 if (!buf)
51 goto fail; 51 goto fail;
52 52
@@ -65,7 +65,7 @@ fail:
65 if (!total_ref_count) { 65 if (!total_ref_count) {
66 int i; 66 int i;
67 67
68 for (i = 0; i < 4; i++) { 68 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 free_percpu(perf_trace_buf[i]); 69 free_percpu(perf_trace_buf[i]);
70 perf_trace_buf[i] = NULL; 70 perf_trace_buf[i] = NULL;
71 } 71 }
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
101 return ret; 101 return ret;
102} 102}
103 103
104int perf_trace_enable(struct perf_event *p_event) 104int perf_trace_add(struct perf_event *p_event, int flags)
105{ 105{
106 struct ftrace_event_call *tp_event = p_event->tp_event; 106 struct ftrace_event_call *tp_event = p_event->tp_event;
107 struct hlist_head __percpu *pcpu_list;
107 struct hlist_head *list; 108 struct hlist_head *list;
108 109
109 list = tp_event->perf_events; 110 pcpu_list = tp_event->perf_events;
110 if (WARN_ON_ONCE(!list)) 111 if (WARN_ON_ONCE(!pcpu_list))
111 return -EINVAL; 112 return -EINVAL;
112 113
113 list = this_cpu_ptr(list); 114 if (!(flags & PERF_EF_START))
115 p_event->hw.state = PERF_HES_STOPPED;
116
117 list = this_cpu_ptr(pcpu_list);
114 hlist_add_head_rcu(&p_event->hlist_entry, list); 118 hlist_add_head_rcu(&p_event->hlist_entry, list);
115 119
116 return 0; 120 return 0;
117} 121}
118 122
119void perf_trace_disable(struct perf_event *p_event) 123void perf_trace_del(struct perf_event *p_event, int flags)
120{ 124{
121 hlist_del_rcu(&p_event->hlist_entry); 125 hlist_del_rcu(&p_event->hlist_entry);
122} 126}
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->perf_events = NULL; 146 tp_event->perf_events = NULL;
143 147
144 if (!--total_ref_count) { 148 if (!--total_ref_count) {
145 for (i = 0; i < 4; i++) { 149 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
146 free_percpu(perf_trace_buf[i]); 150 free_percpu(perf_trace_buf[i]);
147 perf_trace_buf[i] = NULL; 151 perf_trace_buf[i] = NULL;
148 } 152 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..0725eeab1937 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
600 600
601enum { 601enum {
602 FORMAT_HEADER = 1, 602 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2, 603 FORMAT_FIELD_SEPERATOR = 2,
604 FORMAT_PRINTFMT = 3,
604}; 605};
605 606
606static void *f_next(struct seq_file *m, void *v, loff_t *pos) 607static void *f_next(struct seq_file *m, void *v, loff_t *pos)
607{ 608{
608 struct ftrace_event_call *call = m->private; 609 struct ftrace_event_call *call = m->private;
609 struct ftrace_event_field *field; 610 struct ftrace_event_field *field;
610 struct list_head *head; 611 struct list_head *common_head = &ftrace_common_fields;
612 struct list_head *head = trace_get_fields(call);
611 613
612 (*pos)++; 614 (*pos)++;
613 615
614 switch ((unsigned long)v) { 616 switch ((unsigned long)v) {
615 case FORMAT_HEADER: 617 case FORMAT_HEADER:
616 head = &ftrace_common_fields; 618 if (unlikely(list_empty(common_head)))
619 return NULL;
620
621 field = list_entry(common_head->prev,
622 struct ftrace_event_field, link);
623 return field;
617 624
625 case FORMAT_FIELD_SEPERATOR:
618 if (unlikely(list_empty(head))) 626 if (unlikely(list_empty(head)))
619 return NULL; 627 return NULL;
620 628
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
626 return NULL; 634 return NULL;
627 } 635 }
628 636
629 head = trace_get_fields(call);
630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v; 637 field = v;
638 /* 638 if (field->link.prev == common_head)
639 * If this is a common field, and at the end of the list, then 639 return (void *)FORMAT_FIELD_SEPERATOR;
640 * continue with main list. 640 else if (field->link.prev == head)
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT; 641 return (void *)FORMAT_PRINTFMT;
655 642
656 field = list_entry(field->link.prev, struct ftrace_event_field, link); 643 field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
688 seq_printf(m, "format:\n"); 675 seq_printf(m, "format:\n");
689 return 0; 676 return 0;
690 677
678 case FORMAT_FIELD_SEPERATOR:
679 seq_putc(m, '\n');
680 return 0;
681
691 case FORMAT_PRINTFMT: 682 case FORMAT_PRINTFMT:
692 seq_printf(m, "\nprint fmt: %s\n", 683 seq_printf(m, "\nprint fmt: %s\n",
693 call->print_fmt); 684 call->print_fmt);
694 return 0; 685 return 0;
695 } 686 }
696 687
697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
706
707 field = v; 688 field = v;
708 689
709 /* 690 /*
@@ -951,6 +932,7 @@ static const struct file_operations ftrace_enable_fops = {
951 .open = tracing_open_generic, 932 .open = tracing_open_generic,
952 .read = event_enable_read, 933 .read = event_enable_read,
953 .write = event_enable_write, 934 .write = event_enable_write,
935 .llseek = default_llseek,
954}; 936};
955 937
956static const struct file_operations ftrace_event_format_fops = { 938static const struct file_operations ftrace_event_format_fops = {
@@ -963,29 +945,34 @@ static const struct file_operations ftrace_event_format_fops = {
963static const struct file_operations ftrace_event_id_fops = { 945static const struct file_operations ftrace_event_id_fops = {
964 .open = tracing_open_generic, 946 .open = tracing_open_generic,
965 .read = event_id_read, 947 .read = event_id_read,
948 .llseek = default_llseek,
966}; 949};
967 950
968static const struct file_operations ftrace_event_filter_fops = { 951static const struct file_operations ftrace_event_filter_fops = {
969 .open = tracing_open_generic, 952 .open = tracing_open_generic,
970 .read = event_filter_read, 953 .read = event_filter_read,
971 .write = event_filter_write, 954 .write = event_filter_write,
955 .llseek = default_llseek,
972}; 956};
973 957
974static const struct file_operations ftrace_subsystem_filter_fops = { 958static const struct file_operations ftrace_subsystem_filter_fops = {
975 .open = tracing_open_generic, 959 .open = tracing_open_generic,
976 .read = subsystem_filter_read, 960 .read = subsystem_filter_read,
977 .write = subsystem_filter_write, 961 .write = subsystem_filter_write,
962 .llseek = default_llseek,
978}; 963};
979 964
980static const struct file_operations ftrace_system_enable_fops = { 965static const struct file_operations ftrace_system_enable_fops = {
981 .open = tracing_open_generic, 966 .open = tracing_open_generic,
982 .read = system_enable_read, 967 .read = system_enable_read,
983 .write = system_enable_write, 968 .write = system_enable_write,
969 .llseek = default_llseek,
984}; 970};
985 971
986static const struct file_operations ftrace_show_header_fops = { 972static const struct file_operations ftrace_show_header_fops = {
987 .open = tracing_open_generic, 973 .open = tracing_open_generic,
988 .read = show_header, 974 .read = show_header,
975 .llseek = default_llseek,
989}; 976};
990 977
991static struct dentry *event_trace_events_dir(void) 978static struct dentry *event_trace_events_dir(void)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
15#include "trace.h" 15#include "trace.h"
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
18struct fgraph_cpu_data { 21struct fgraph_cpu_data {
19 pid_t last_pid; 22 pid_t last_pid;
20 int depth; 23 int depth;
24 int depth_irq;
21 int ignore; 25 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; 26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23}; 27};
24 28
25struct fgraph_data { 29struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
27 31
28 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
41#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
42#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
44 49
45static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
46 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
55 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
56 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
57 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
58 { } /* Empty entry */ 65 { } /* Empty entry */
59}; 66};
60 67
61static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
62 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
63 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
64 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
65 .opts = trace_opts 72 .opts = trace_opts
66}; 73};
67 74
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
204 return 1; 211 return 1;
205} 212}
206 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
207int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
208{ 223{
209 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
218 return 0; 233 return 0;
219 234
220 /* trace it when it is-nested-in or is a function enabled. */ 235 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func))) 236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
246 return trace_graph_entry(trace); 262 return trace_graph_entry(trace);
247} 263}
248 264
265static void
266__trace_graph_function(struct trace_array *tr,
267 unsigned long ip, unsigned long flags, int pc)
268{
269 u64 time = trace_clock_local();
270 struct ftrace_graph_ent ent = {
271 .func = ip,
272 .depth = 0,
273 };
274 struct ftrace_graph_ret ret = {
275 .func = ip,
276 .depth = 0,
277 .calltime = time,
278 .rettime = time,
279 };
280
281 __trace_graph_entry(tr, &ent, flags, pc);
282 __trace_graph_return(tr, &ret, flags, pc);
283}
284
285void
286trace_graph_function(struct trace_array *tr,
287 unsigned long ip, unsigned long parent_ip,
288 unsigned long flags, int pc)
289{
290 __trace_graph_function(tr, ip, flags, pc);
291}
292
249void __trace_graph_return(struct trace_array *tr, 293void __trace_graph_return(struct trace_array *tr,
250 struct ftrace_graph_ret *trace, 294 struct ftrace_graph_ret *trace,
251 unsigned long flags, 295 unsigned long flags,
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
649 693
650 /* Print nsecs (we don't want to exceed 7 numbers) */ 694 /* Print nsecs (we don't want to exceed 7 numbers) */
651 if (len < 7) { 695 if (len < 7) {
652 snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", 696 size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
653 nsecs_rem); 697
698 snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
654 ret = trace_seq_printf(s, ".%s", nsecs_str); 699 ret = trace_seq_printf(s, ".%s", nsecs_str);
655 if (!ret) 700 if (!ret)
656 return TRACE_TYPE_PARTIAL_LINE; 701 return TRACE_TYPE_PARTIAL_LINE;
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
855 return 0; 900 return 0;
856} 901}
857 902
903/*
904 * Entry check for irq code
905 *
906 * returns 1 if
907 * - we are inside irq code
908 * - we just extered irq code
909 *
910 * retunns 0 if
911 * - funcgraph-interrupts option is set
912 * - we are not inside irq code
913 */
914static int
915check_irq_entry(struct trace_iterator *iter, u32 flags,
916 unsigned long addr, int depth)
917{
918 int cpu = iter->cpu;
919 int *depth_irq;
920 struct fgraph_data *data = iter->private;
921
922 /*
923 * If we are either displaying irqs, or we got called as
924 * a graph event and private data does not exist,
925 * then we bypass the irq check.
926 */
927 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
928 (!data))
929 return 0;
930
931 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
932
933 /*
934 * We are inside the irq code
935 */
936 if (*depth_irq >= 0)
937 return 1;
938
939 if ((addr < (unsigned long)__irqentry_text_start) ||
940 (addr >= (unsigned long)__irqentry_text_end))
941 return 0;
942
943 /*
944 * We are entering irq code.
945 */
946 *depth_irq = depth;
947 return 1;
948}
949
950/*
951 * Return check for irq code
952 *
953 * returns 1 if
954 * - we are inside irq code
955 * - we just left irq code
956 *
957 * returns 0 if
958 * - funcgraph-interrupts option is set
959 * - we are not inside irq code
960 */
961static int
962check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
963{
964 int cpu = iter->cpu;
965 int *depth_irq;
966 struct fgraph_data *data = iter->private;
967
968 /*
969 * If we are either displaying irqs, or we got called as
970 * a graph event and private data does not exist,
971 * then we bypass the irq check.
972 */
973 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
974 (!data))
975 return 0;
976
977 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
978
979 /*
980 * We are not inside the irq code.
981 */
982 if (*depth_irq == -1)
983 return 0;
984
985 /*
986 * We are inside the irq code, and this is returning entry.
987 * Let's not trace it and clear the entry depth, since
988 * we are out of irq code.
989 *
990 * This condition ensures that we 'leave the irq code' once
991 * we are out of the entry depth. Thus protecting us from
992 * the RETURN entry loss.
993 */
994 if (*depth_irq >= depth) {
995 *depth_irq = -1;
996 return 1;
997 }
998
999 /*
1000 * We are inside the irq code, and this is not the entry.
1001 */
1002 return 1;
1003}
1004
858static enum print_line_t 1005static enum print_line_t
859print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 1006print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
860 struct trace_iterator *iter, u32 flags) 1007 struct trace_iterator *iter, u32 flags)
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
865 static enum print_line_t ret; 1012 static enum print_line_t ret;
866 int cpu = iter->cpu; 1013 int cpu = iter->cpu;
867 1014
1015 if (check_irq_entry(iter, flags, call->func, call->depth))
1016 return TRACE_TYPE_HANDLED;
1017
868 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) 1018 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
869 return TRACE_TYPE_PARTIAL_LINE; 1019 return TRACE_TYPE_PARTIAL_LINE;
870 1020
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
902 int ret; 1052 int ret;
903 int i; 1053 int i;
904 1054
1055 if (check_irq_return(iter, flags, trace->depth))
1056 return TRACE_TYPE_HANDLED;
1057
905 if (data) { 1058 if (data) {
906 struct fgraph_cpu_data *cpu_data; 1059 struct fgraph_cpu_data *cpu_data;
907 int cpu = iter->cpu; 1060 int cpu = iter->cpu;
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1054 1207
1055 1208
1056enum print_line_t 1209enum print_line_t
1057print_graph_function_flags(struct trace_iterator *iter, u32 flags) 1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1058{ 1211{
1059 struct ftrace_graph_ent_entry *field; 1212 struct ftrace_graph_ent_entry *field;
1060 struct fgraph_data *data = iter->private; 1213 struct fgraph_data *data = iter->private;
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1117static enum print_line_t 1270static enum print_line_t
1118print_graph_function(struct trace_iterator *iter) 1271print_graph_function(struct trace_iterator *iter)
1119{ 1272{
1120 return print_graph_function_flags(iter, tracer_flags.val); 1273 return __print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1121} 1285}
1122 1286
1123static enum print_line_t 1287static enum print_line_t
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
1149 seq_printf(s, "#%.*s|||| / \n", size, spaces); 1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1150} 1314}
1151 1315
1152void print_graph_headers_flags(struct seq_file *s, u32 flags) 1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1153{ 1317{
1154 int lat = trace_flags & TRACE_ITER_LATENCY_FMT; 1318 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1155 1319
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
1190 print_graph_headers_flags(s, tracer_flags.val); 1354 print_graph_headers_flags(s, tracer_flags.val);
1191} 1355}
1192 1356
1357void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{
1359 struct trace_iterator *iter = s->private;
1360
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter))
1364 return;
1365
1366 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION;
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370
1371 __print_graph_headers_flags(s, flags);
1372}
1373
1193void graph_trace_open(struct trace_iterator *iter) 1374void graph_trace_open(struct trace_iterator *iter)
1194{ 1375{
1195 /* pid and depth on the last trace processed */ 1376 /* pid and depth on the last trace processed */
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
1210 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1391 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1211 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1392 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1212 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1393 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1394 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1395
1213 *pid = -1; 1396 *pid = -1;
1214 *depth = 0; 1397 *depth = 0;
1215 *ignore = 0; 1398 *ignore = 0;
1399 *depth_irq = -1;
1216 } 1400 }
1217 1401
1218 iter->private = data; 1402 iter->private = data;
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
1235 } 1419 }
1236} 1420}
1237 1421
1422static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1423{
1424 if (bit == TRACE_GRAPH_PRINT_IRQS)
1425 ftrace_graph_skip_irqs = !set;
1426
1427 return 0;
1428}
1429
1238static struct trace_event_functions graph_functions = { 1430static struct trace_event_functions graph_functions = {
1239 .trace = print_graph_function_event, 1431 .trace = print_graph_function_event,
1240}; 1432};
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
1261 .print_line = print_graph_function, 1453 .print_line = print_graph_function,
1262 .print_header = print_graph_headers, 1454 .print_header = print_graph_headers,
1263 .flags = &tracer_flags, 1455 .flags = &tracer_flags,
1456 .set_flag = func_graph_set_flag,
1264#ifdef CONFIG_FTRACE_SELFTEST 1457#ifdef CONFIG_FTRACE_SELFTEST
1265 .selftest = trace_selftest_startup_function_graph, 1458 .selftest = trace_selftest_startup_function_graph,
1266#endif 1459#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 73a6b0601f2e..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
87 87
88#ifdef CONFIG_FUNCTION_TRACER 88#ifdef CONFIG_FUNCTION_TRACER
89/* 89/*
90 * irqsoff uses its own tracer function to keep the overhead down: 90 * Prologue for the preempt and irqs off function tracers.
91 *
92 * Returns 1 if it is OK to continue, and data->disabled is
93 * incremented.
94 * 0 if the trace is to be ignored, and data->disabled
95 * is kept the same.
96 *
97 * Note, this function is also used outside this ifdef but
98 * inside the #ifdef of the function graph tracer below.
99 * This is OK, since the function graph tracer is
100 * dependent on the function tracer.
91 */ 101 */
92static void 102static int func_prolog_dec(struct trace_array *tr,
93irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 103 struct trace_array_cpu **data,
104 unsigned long *flags)
94{ 105{
95 struct trace_array *tr = irqsoff_trace;
96 struct trace_array_cpu *data;
97 unsigned long flags;
98 long disabled; 106 long disabled;
99 int cpu; 107 int cpu;
100 108
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
106 */ 114 */
107 cpu = raw_smp_processor_id(); 115 cpu = raw_smp_processor_id();
108 if (likely(!per_cpu(tracing_cpu, cpu))) 116 if (likely(!per_cpu(tracing_cpu, cpu)))
109 return; 117 return 0;
110 118
111 local_save_flags(flags); 119 local_save_flags(*flags);
112 /* slight chance to get a false positive on tracing_cpu */ 120 /* slight chance to get a false positive on tracing_cpu */
113 if (!irqs_disabled_flags(flags)) 121 if (!irqs_disabled_flags(*flags))
114 return; 122 return 0;
115 123
116 data = tr->data[cpu]; 124 *data = tr->data[cpu];
117 disabled = atomic_inc_return(&data->disabled); 125 disabled = atomic_inc_return(&(*data)->disabled);
118 126
119 if (likely(disabled == 1)) 127 if (likely(disabled == 1))
120 trace_function(tr, ip, parent_ip, flags, preempt_count()); 128 return 1;
129
130 atomic_dec(&(*data)->disabled);
131
132 return 0;
133}
134
135/*
136 * irqsoff uses its own tracer function to keep the overhead down:
137 */
138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140{
141 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data;
143 unsigned long flags;
144
145 if (!func_prolog_dec(tr, &data, &flags))
146 return;
147
148 trace_function(tr, ip, parent_ip, flags, preempt_count());
121 149
122 atomic_dec(&data->disabled); 150 atomic_dec(&data->disabled);
123} 151}
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
155 struct trace_array *tr = irqsoff_trace; 183 struct trace_array *tr = irqsoff_trace;
156 struct trace_array_cpu *data; 184 struct trace_array_cpu *data;
157 unsigned long flags; 185 unsigned long flags;
158 long disabled;
159 int ret; 186 int ret;
160 int cpu;
161 int pc; 187 int pc;
162 188
163 cpu = raw_smp_processor_id(); 189 if (!func_prolog_dec(tr, &data, &flags))
164 if (likely(!per_cpu(tracing_cpu, cpu)))
165 return 0; 190 return 0;
166 191
167 local_save_flags(flags); 192 pc = preempt_count();
168 /* slight chance to get a false positive on tracing_cpu */ 193 ret = __trace_graph_entry(tr, trace, flags, pc);
169 if (!irqs_disabled_flags(flags))
170 return 0;
171
172 data = tr->data[cpu];
173 disabled = atomic_inc_return(&data->disabled);
174
175 if (likely(disabled == 1)) {
176 pc = preempt_count();
177 ret = __trace_graph_entry(tr, trace, flags, pc);
178 } else
179 ret = 0;
180
181 atomic_dec(&data->disabled); 194 atomic_dec(&data->disabled);
195
182 return ret; 196 return ret;
183} 197}
184 198
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
187 struct trace_array *tr = irqsoff_trace; 201 struct trace_array *tr = irqsoff_trace;
188 struct trace_array_cpu *data; 202 struct trace_array_cpu *data;
189 unsigned long flags; 203 unsigned long flags;
190 long disabled;
191 int cpu;
192 int pc; 204 int pc;
193 205
194 cpu = raw_smp_processor_id(); 206 if (!func_prolog_dec(tr, &data, &flags))
195 if (likely(!per_cpu(tracing_cpu, cpu)))
196 return; 207 return;
197 208
198 local_save_flags(flags); 209 pc = preempt_count();
199 /* slight chance to get a false positive on tracing_cpu */ 210 __trace_graph_return(tr, trace, flags, pc);
200 if (!irqs_disabled_flags(flags))
201 return;
202
203 data = tr->data[cpu];
204 disabled = atomic_inc_return(&data->disabled);
205
206 if (likely(disabled == 1)) {
207 pc = preempt_count();
208 __trace_graph_return(tr, trace, flags, pc);
209 }
210
211 atomic_dec(&data->disabled); 211 atomic_dec(&data->disabled);
212} 212}
213 213
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
229 229
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) 230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{ 231{
232 u32 flags = GRAPH_TRACER_FLAGS;
233
234 if (trace_flags & TRACE_ITER_LATENCY_FMT)
235 flags |= TRACE_GRAPH_PRINT_DURATION;
236 else
237 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
238
239 /* 232 /*
240 * In graph mode call the graph tracer output function, 233 * In graph mode call the graph tracer output function,
241 * otherwise go with the TRACE_FN event handler 234 * otherwise go with the TRACE_FN event handler
242 */ 235 */
243 if (is_graph()) 236 if (is_graph())
244 return print_graph_function_flags(iter, flags); 237 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
245 238
246 return TRACE_TYPE_UNHANDLED; 239 return TRACE_TYPE_UNHANDLED;
247} 240}
248 241
249static void irqsoff_print_header(struct seq_file *s) 242static void irqsoff_print_header(struct seq_file *s)
250{ 243{
251 if (is_graph()) { 244 if (is_graph())
252 struct trace_iterator *iter = s->private; 245 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
253 u32 flags = GRAPH_TRACER_FLAGS; 246 else
254
255 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
256 /* print nothing if the buffers are empty */
257 if (trace_empty(iter))
258 return;
259
260 print_trace_header(s, iter);
261 flags |= TRACE_GRAPH_PRINT_DURATION;
262 } else
263 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
264
265 print_graph_headers_flags(s, flags);
266 } else
267 trace_default_header(s); 247 trace_default_header(s);
268} 248}
269 249
270static void 250static void
271trace_graph_function(struct trace_array *tr,
272 unsigned long ip, unsigned long flags, int pc)
273{
274 u64 time = trace_clock_local();
275 struct ftrace_graph_ent ent = {
276 .func = ip,
277 .depth = 0,
278 };
279 struct ftrace_graph_ret ret = {
280 .func = ip,
281 .depth = 0,
282 .calltime = time,
283 .rettime = time,
284 };
285
286 __trace_graph_entry(tr, &ent, flags, pc);
287 __trace_graph_return(tr, &ret, flags, pc);
288}
289
290static void
291__trace_function(struct trace_array *tr, 251__trace_function(struct trace_array *tr,
292 unsigned long ip, unsigned long parent_ip, 252 unsigned long ip, unsigned long parent_ip,
293 unsigned long flags, int pc) 253 unsigned long flags, int pc)
294{ 254{
295 if (!is_graph()) 255 if (is_graph())
256 trace_graph_function(tr, ip, parent_ip, flags, pc);
257 else
296 trace_function(tr, ip, parent_ip, flags, pc); 258 trace_function(tr, ip, parent_ip, flags, pc);
297 else {
298 trace_graph_function(tr, parent_ip, flags, pc);
299 trace_graph_function(tr, ip, flags, pc);
300 }
301} 259}
302 260
303#else 261#else
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 7b8ecd751d93..3c5c5dfea0b3 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -13,7 +13,6 @@
13#include <linux/kdb.h> 13#include <linux/kdb.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15 15
16#include "../debug/kdb/kdb_private.h"
17#include "trace.h" 16#include "trace.h"
18#include "trace_output.h" 17#include "trace_output.h"
19 18
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 544301d29dee..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h> 32#include <linux/stringify.h>
33#include <linux/limits.h> 33#include <linux/limits.h>
34#include <linux/uaccess.h>
35#include <asm/bitsperlong.h> 34#include <asm/bitsperlong.h>
36 35
37#include "trace.h" 36#include "trace.h"
@@ -648,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp)
648 } 647 }
649 ret = register_probe_event(tp); 648 ret = register_probe_event(tp);
650 if (ret) { 649 if (ret) {
651 pr_warning("Faild to register probe event(%d)\n", ret); 650 pr_warning("Failed to register probe event(%d)\n", ret);
652 goto end; 651 goto end;
653 } 652 }
654 653
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4086eae6e81b..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,48 +31,98 @@ static int wakeup_rt;
31static arch_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void wakeup_reset(struct trace_array *tr);
34static void __wakeup_reset(struct trace_array *tr); 35static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
35 38
36static int save_lat_flag; 39static int save_lat_flag;
37 40
41#define TRACE_DISPLAY_GRAPH 1
42
43static struct tracer_opt trace_opts[] = {
44#ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 /* display latency trace as call graph */
46 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47#endif
48 { } /* Empty entry */
49};
50
51static struct tracer_flags tracer_flags = {
52 .val = 0,
53 .opts = trace_opts,
54};
55
56#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57
38#ifdef CONFIG_FUNCTION_TRACER 58#ifdef CONFIG_FUNCTION_TRACER
59
39/* 60/*
40 * irqsoff uses its own tracer function to keep the overhead down: 61 * Prologue for the wakeup function tracers.
62 *
63 * Returns 1 if it is OK to continue, and preemption
64 * is disabled and data->disabled is incremented.
65 * 0 if the trace is to be ignored, and preemption
66 * is not disabled and data->disabled is
67 * kept the same.
68 *
69 * Note, this function is also used outside this ifdef but
70 * inside the #ifdef of the function graph tracer below.
71 * This is OK, since the function graph tracer is
72 * dependent on the function tracer.
41 */ 73 */
42static void 74static int
43wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 75func_prolog_preempt_disable(struct trace_array *tr,
76 struct trace_array_cpu **data,
77 int *pc)
44{ 78{
45 struct trace_array *tr = wakeup_trace;
46 struct trace_array_cpu *data;
47 unsigned long flags;
48 long disabled; 79 long disabled;
49 int cpu; 80 int cpu;
50 int pc;
51 81
52 if (likely(!wakeup_task)) 82 if (likely(!wakeup_task))
53 return; 83 return 0;
54 84
55 pc = preempt_count(); 85 *pc = preempt_count();
56 preempt_disable_notrace(); 86 preempt_disable_notrace();
57 87
58 cpu = raw_smp_processor_id(); 88 cpu = raw_smp_processor_id();
59 if (cpu != wakeup_current_cpu) 89 if (cpu != wakeup_current_cpu)
60 goto out_enable; 90 goto out_enable;
61 91
62 data = tr->data[cpu]; 92 *data = tr->data[cpu];
63 disabled = atomic_inc_return(&data->disabled); 93 disabled = atomic_inc_return(&(*data)->disabled);
64 if (unlikely(disabled != 1)) 94 if (unlikely(disabled != 1))
65 goto out; 95 goto out;
66 96
67 local_irq_save(flags); 97 return 1;
68 98
69 trace_function(tr, ip, parent_ip, flags, pc); 99out:
100 atomic_dec(&(*data)->disabled);
101
102out_enable:
103 preempt_enable_notrace();
104 return 0;
105}
70 106
107/*
108 * wakeup uses its own tracer function to keep the overhead down:
109 */
110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112{
113 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data;
115 unsigned long flags;
116 int pc;
117
118 if (!func_prolog_preempt_disable(tr, &data, &pc))
119 return;
120
121 local_irq_save(flags);
122 trace_function(tr, ip, parent_ip, flags, pc);
71 local_irq_restore(flags); 123 local_irq_restore(flags);
72 124
73 out:
74 atomic_dec(&data->disabled); 125 atomic_dec(&data->disabled);
75 out_enable:
76 preempt_enable_notrace(); 126 preempt_enable_notrace();
77} 127}
78 128
@@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
82}; 132};
83#endif /* CONFIG_FUNCTION_TRACER */ 133#endif /* CONFIG_FUNCTION_TRACER */
84 134
135static int start_func_tracer(int graph)
136{
137 int ret;
138
139 if (!graph)
140 ret = register_ftrace_function(&trace_ops);
141 else
142 ret = register_ftrace_graph(&wakeup_graph_return,
143 &wakeup_graph_entry);
144
145 if (!ret && tracing_is_enabled())
146 tracer_enabled = 1;
147 else
148 tracer_enabled = 0;
149
150 return ret;
151}
152
153static void stop_func_tracer(int graph)
154{
155 tracer_enabled = 0;
156
157 if (!graph)
158 unregister_ftrace_function(&trace_ops);
159 else
160 unregister_ftrace_graph();
161}
162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
165{
166
167 if (!(bit & TRACE_DISPLAY_GRAPH))
168 return -EINVAL;
169
170 if (!(is_graph() ^ set))
171 return 0;
172
173 stop_func_tracer(!set);
174
175 wakeup_reset(wakeup_trace);
176 tracing_max_latency = 0;
177
178 return start_func_tracer(set);
179}
180
181static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = wakeup_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int pc, ret = 0;
187
188 if (!func_prolog_preempt_disable(tr, &data, &pc))
189 return 0;
190
191 local_save_flags(flags);
192 ret = __trace_graph_entry(tr, trace, flags, pc);
193 atomic_dec(&data->disabled);
194 preempt_enable_notrace();
195
196 return ret;
197}
198
199static void wakeup_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = wakeup_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_preempt_disable(tr, &data, &pc))
207 return;
208
209 local_save_flags(flags);
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212
213 preempt_enable_notrace();
214 return;
215}
216
217static void wakeup_trace_open(struct trace_iterator *iter)
218{
219 if (is_graph())
220 graph_trace_open(iter);
221}
222
223static void wakeup_trace_close(struct trace_iterator *iter)
224{
225 if (iter->private)
226 graph_trace_close(iter);
227}
228
229#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
230
231static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
232{
233 /*
234 * In graph mode call the graph tracer output function,
235 * otherwise go with the TRACE_FN event handler
236 */
237 if (is_graph())
238 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
239
240 return TRACE_TYPE_UNHANDLED;
241}
242
243static void wakeup_print_header(struct seq_file *s)
244{
245 if (is_graph())
246 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
247 else
248 trace_default_header(s);
249}
250
251static void
252__trace_function(struct trace_array *tr,
253 unsigned long ip, unsigned long parent_ip,
254 unsigned long flags, int pc)
255{
256 if (is_graph())
257 trace_graph_function(tr, ip, parent_ip, flags, pc);
258 else
259 trace_function(tr, ip, parent_ip, flags, pc);
260}
261#else
262#define __trace_function trace_function
263
264static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
280static void wakeup_print_header(struct seq_file *s) { }
281static void wakeup_trace_open(struct trace_iterator *iter) { }
282static void wakeup_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
85/* 285/*
86 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
87 */ 287 */
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore,
152 /* The task we are waiting for is waking up */ 352 /* The task we are waiting for is waking up */
153 data = wakeup_trace->data[wakeup_cpu]; 353 data = wakeup_trace->data[wakeup_cpu];
154 354
155 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 355 __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 356 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
157 357
158 T0 = data->preempt_timestamp; 358 T0 = data->preempt_timestamp;
@@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
252 * is not called by an assembly function (where as schedule is) 452 * is not called by an assembly function (where as schedule is)
253 * it should be safe to use it here. 453 * it should be safe to use it here.
254 */ 454 */
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 455 __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 456
257out_locked: 457out_locked:
258 arch_spin_unlock(&wakeup_lock); 458 arch_spin_unlock(&wakeup_lock);
@@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
303 */ 503 */
304 smp_wmb(); 504 smp_wmb();
305 505
306 register_ftrace_function(&trace_ops); 506 if (start_func_tracer(is_graph()))
307 507 printk(KERN_ERR "failed to start wakeup tracer\n");
308 if (tracing_is_enabled())
309 tracer_enabled = 1;
310 else
311 tracer_enabled = 0;
312 508
313 return; 509 return;
314fail_deprobe_wake_new: 510fail_deprobe_wake_new:
@@ -320,7 +516,7 @@ fail_deprobe:
320static void stop_wakeup_tracer(struct trace_array *tr) 516static void stop_wakeup_tracer(struct trace_array *tr)
321{ 517{
322 tracer_enabled = 0; 518 tracer_enabled = 0;
323 unregister_ftrace_function(&trace_ops); 519 stop_func_tracer(is_graph());
324 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); 520 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
325 unregister_trace_sched_wakeup_new(probe_wakeup, NULL); 521 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
326 unregister_trace_sched_wakeup(probe_wakeup, NULL); 522 unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly =
379 .start = wakeup_tracer_start, 575 .start = wakeup_tracer_start,
380 .stop = wakeup_tracer_stop, 576 .stop = wakeup_tracer_stop,
381 .print_max = 1, 577 .print_max = 1,
578 .print_header = wakeup_print_header,
579 .print_line = wakeup_print_line,
580 .flags = &tracer_flags,
581 .set_flag = wakeup_set_flag,
382#ifdef CONFIG_FTRACE_SELFTEST 582#ifdef CONFIG_FTRACE_SELFTEST
383 .selftest = trace_selftest_startup_wakeup, 583 .selftest = trace_selftest_startup_wakeup,
384#endif 584#endif
585 .open = wakeup_trace_open,
586 .close = wakeup_trace_close,
385 .use_max_tr = 1, 587 .use_max_tr = 1,
386}; 588};
387 589
@@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly =
394 .stop = wakeup_tracer_stop, 596 .stop = wakeup_tracer_stop,
395 .wait_pipe = poll_wait_pipe, 597 .wait_pipe = poll_wait_pipe,
396 .print_max = 1, 598 .print_max = 1,
599 .print_header = wakeup_print_header,
600 .print_line = wakeup_print_line,
601 .flags = &tracer_flags,
602 .set_flag = wakeup_set_flag,
397#ifdef CONFIG_FTRACE_SELFTEST 603#ifdef CONFIG_FTRACE_SELFTEST
398 .selftest = trace_selftest_startup_wakeup, 604 .selftest = trace_selftest_startup_wakeup,
399#endif 605#endif
606 .open = wakeup_trace_open,
607 .close = wakeup_trace_close,
400 .use_max_tr = 1, 608 .use_max_tr = 1,
401}; 609};
402 610
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a6b7e0e0f3eb..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = {
195 .open = tracing_open_generic, 195 .open = tracing_open_generic,
196 .read = stack_max_size_read, 196 .read = stack_max_size_read,
197 .write = stack_max_size_write, 197 .write = stack_max_size_write,
198 .llseek = default_llseek,
198}; 199};
199 200
200static void * 201static void *
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
263{ 263{
264 int ret, cpu; 264 int ret, cpu;
265 265
266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); 271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
267 if (ret) 272 if (ret)
268 goto out; 273 goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
279 if (ret) 284 if (ret)
280 goto no_creation; 285 goto no_creation;
281 286
282 for_each_possible_cpu(cpu) {
283 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
284 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
285 }
286
287 return 0; 287 return 0;
288 288
289no_creation: 289no_creation: