aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--block/Kconfig1
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blktrace.c332
-rw-r--r--block/elevator.c12
-rw-r--r--drivers/md/dm.c8
-rw-r--r--fs/bio.c5
-rw-r--r--include/linux/blktrace_api.h172
-rw-r--r--include/linux/ftrace.h29
-rw-r--r--include/trace/block.h76
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--mm/bounce.c5
-rwxr-xr-xscripts/recordmcount.pl2
-rw-r--r--scripts/trace/power.pl108
18 files changed, 815 insertions, 199 deletions
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..c27af49a4ede 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/ftrace.h>
10#include <asm/system.h> 11#include <asm/system.h>
11 12
12unsigned long idle_halt; 13unsigned long idle_halt;
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void)
100void default_idle(void) 101void default_idle(void)
101{ 102{
102 if (hlt_use_halt()) { 103 if (hlt_use_halt()) {
104 struct power_trace it;
105
106 trace_power_start(&it, POWER_CSTATE, 1);
103 current_thread_info()->status &= ~TS_POLLING; 107 current_thread_info()->status &= ~TS_POLLING;
104 /* 108 /*
105 * TS_POLLING-cleared state must be visible before we 109 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@ void default_idle(void)
112 else 116 else
113 local_irq_enable(); 117 local_irq_enable();
114 current_thread_info()->status |= TS_POLLING; 118 current_thread_info()->status |= TS_POLLING;
119 trace_power_end(&it);
115 } else { 120 } else {
116 local_irq_enable(); 121 local_irq_enable();
117 /* loop is done by the caller */ 122 /* loop is done by the caller */
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
154 */ 159 */
155void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 160void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
156{ 161{
162 struct power_trace it;
163
164 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
157 if (!need_resched()) { 165 if (!need_resched()) {
158 __monitor((void *)&current_thread_info()->flags, 0, 0); 166 __monitor((void *)&current_thread_info()->flags, 0, 0);
159 smp_mb(); 167 smp_mb();
160 if (!need_resched()) 168 if (!need_resched())
161 __mwait(ax, cx); 169 __mwait(ax, cx);
162 } 170 }
171 trace_power_end(&it);
163} 172}
164 173
165/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 174/* Default MONITOR/MWAIT with no hints, used for default C1 state */
166static void mwait_idle(void) 175static void mwait_idle(void)
167{ 176{
177 struct power_trace it;
168 if (!need_resched()) { 178 if (!need_resched()) {
179 trace_power_start(&it, POWER_CSTATE, 1);
169 __monitor((void *)&current_thread_info()->flags, 0, 0); 180 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb(); 181 smp_mb();
171 if (!need_resched()) 182 if (!need_resched())
172 __sti_mwait(0, 0); 183 __sti_mwait(0, 0);
173 else 184 else
174 local_irq_enable(); 185 local_irq_enable();
186 trace_power_end(&it);
175 } else 187 } else
176 local_irq_enable(); 188 local_irq_enable();
177} 189}
@@ -183,9 +195,13 @@ static void mwait_idle(void)
183 */ 195 */
184static void poll_idle(void) 196static void poll_idle(void)
185{ 197{
198 struct power_trace it;
199
200 trace_power_start(&it, POWER_CSTATE, 0);
186 local_irq_enable(); 201 local_irq_enable();
187 while (!need_resched()) 202 while (!need_resched())
188 cpu_relax(); 203 cpu_relax();
204 trace_power_end(&it);
189} 205}
190 206
191/* 207/*
diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..290b219fad9c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
47 depends on SYSFS 47 depends on SYSFS
48 select RELAY 48 select RELAY
49 select DEBUG_FS 49 select DEBUG_FS
50 select TRACEPOINTS
50 help 51 help
51 Say Y here if you want to be able to trace the block layer actions 52 Say Y here if you want to be able to trace the block layer actions
52 on a given queue. Tracing allows you to see any traffic happening 53 on a given queue. Tracing allows you to see any traffic happening
diff --git a/block/blk-core.c b/block/blk-core.c
index 10e8a64a5a5b..0c06cf5aaaf8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,9 +28,23 @@
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
30#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
31#include <trace/block.h>
31 32
32#include "blk.h" 33#include "blk.h"
33 34
35DEFINE_TRACE(block_plug);
36DEFINE_TRACE(block_unplug_io);
37DEFINE_TRACE(block_unplug_timer);
38DEFINE_TRACE(block_getrq);
39DEFINE_TRACE(block_sleeprq);
40DEFINE_TRACE(block_rq_requeue);
41DEFINE_TRACE(block_bio_backmerge);
42DEFINE_TRACE(block_bio_frontmerge);
43DEFINE_TRACE(block_bio_queue);
44DEFINE_TRACE(block_rq_complete);
45DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
47
34static int __make_request(struct request_queue *q, struct bio *bio); 48static int __make_request(struct request_queue *q, struct bio *bio);
35 49
36/* 50/*
@@ -205,7 +219,7 @@ void blk_plug_device(struct request_queue *q)
205 219
206 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 220 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
207 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 221 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
208 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 222 trace_block_plug(q);
209 } 223 }
210} 224}
211EXPORT_SYMBOL(blk_plug_device); 225EXPORT_SYMBOL(blk_plug_device);
@@ -292,9 +306,7 @@ void blk_unplug_work(struct work_struct *work)
292 struct request_queue *q = 306 struct request_queue *q =
293 container_of(work, struct request_queue, unplug_work); 307 container_of(work, struct request_queue, unplug_work);
294 308
295 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 309 trace_block_unplug_io(q);
296 q->rq.count[READ] + q->rq.count[WRITE]);
297
298 q->unplug_fn(q); 310 q->unplug_fn(q);
299} 311}
300 312
@@ -302,9 +314,7 @@ void blk_unplug_timeout(unsigned long data)
302{ 314{
303 struct request_queue *q = (struct request_queue *)data; 315 struct request_queue *q = (struct request_queue *)data;
304 316
305 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 317 trace_block_unplug_timer(q);
306 q->rq.count[READ] + q->rq.count[WRITE]);
307
308 kblockd_schedule_work(q, &q->unplug_work); 318 kblockd_schedule_work(q, &q->unplug_work);
309} 319}
310 320
@@ -314,9 +324,7 @@ void blk_unplug(struct request_queue *q)
314 * devices don't necessarily have an ->unplug_fn defined 324 * devices don't necessarily have an ->unplug_fn defined
315 */ 325 */
316 if (q->unplug_fn) { 326 if (q->unplug_fn) {
317 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 327 trace_block_unplug_io(q);
318 q->rq.count[READ] + q->rq.count[WRITE]);
319
320 q->unplug_fn(q); 328 q->unplug_fn(q);
321 } 329 }
322} 330}
@@ -822,7 +830,7 @@ rq_starved:
822 if (ioc_batching(q, ioc)) 830 if (ioc_batching(q, ioc))
823 ioc->nr_batch_requests--; 831 ioc->nr_batch_requests--;
824 832
825 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 833 trace_block_getrq(q, bio, rw);
826out: 834out:
827 return rq; 835 return rq;
828} 836}
@@ -848,7 +856,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
848 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 856 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
849 TASK_UNINTERRUPTIBLE); 857 TASK_UNINTERRUPTIBLE);
850 858
851 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 859 trace_block_sleeprq(q, bio, rw);
852 860
853 __generic_unplug_device(q); 861 __generic_unplug_device(q);
854 spin_unlock_irq(q->queue_lock); 862 spin_unlock_irq(q->queue_lock);
@@ -928,7 +936,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
928{ 936{
929 blk_delete_timer(rq); 937 blk_delete_timer(rq);
930 blk_clear_rq_complete(rq); 938 blk_clear_rq_complete(rq);
931 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 939 trace_block_rq_requeue(q, rq);
932 940
933 if (blk_rq_tagged(rq)) 941 if (blk_rq_tagged(rq))
934 blk_queue_end_tag(q, rq); 942 blk_queue_end_tag(q, rq);
@@ -1167,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1167 if (!ll_back_merge_fn(q, req, bio)) 1175 if (!ll_back_merge_fn(q, req, bio))
1168 break; 1176 break;
1169 1177
1170 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 1178 trace_block_bio_backmerge(q, bio);
1171 1179
1172 req->biotail->bi_next = bio; 1180 req->biotail->bi_next = bio;
1173 req->biotail = bio; 1181 req->biotail = bio;
@@ -1186,7 +1194,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1186 if (!ll_front_merge_fn(q, req, bio)) 1194 if (!ll_front_merge_fn(q, req, bio))
1187 break; 1195 break;
1188 1196
1189 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 1197 trace_block_bio_frontmerge(q, bio);
1190 1198
1191 bio->bi_next = req->bio; 1199 bio->bi_next = req->bio;
1192 req->bio = bio; 1200 req->bio = bio;
@@ -1269,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
1269 bio->bi_sector += p->start_sect; 1277 bio->bi_sector += p->start_sect;
1270 bio->bi_bdev = bdev->bd_contains; 1278 bio->bi_bdev = bdev->bd_contains;
1271 1279
1272 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, 1280 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1273 bdev->bd_dev, bio->bi_sector, 1281 bdev->bd_dev, bio->bi_sector,
1274 bio->bi_sector - p->start_sect); 1282 bio->bi_sector - p->start_sect);
1275 } 1283 }
@@ -1441,10 +1449,10 @@ end_io:
1441 goto end_io; 1449 goto end_io;
1442 1450
1443 if (old_sector != -1) 1451 if (old_sector != -1)
1444 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1452 trace_block_remap(q, bio, old_dev, bio->bi_sector,
1445 old_sector); 1453 old_sector);
1446 1454
1447 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 1455 trace_block_bio_queue(q, bio);
1448 1456
1449 old_sector = bio->bi_sector; 1457 old_sector = bio->bi_sector;
1450 old_dev = bio->bi_bdev->bd_dev; 1458 old_dev = bio->bi_bdev->bd_dev;
@@ -1656,7 +1664,7 @@ static int __end_that_request_first(struct request *req, int error,
1656 int total_bytes, bio_nbytes, next_idx = 0; 1664 int total_bytes, bio_nbytes, next_idx = 0;
1657 struct bio *bio; 1665 struct bio *bio;
1658 1666
1659 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1667 trace_block_rq_complete(req->q, req);
1660 1668
1661 /* 1669 /*
1662 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1670 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <trace/block.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
28static unsigned int blktrace_seq __read_mostly = 1; 29static unsigned int blktrace_seq __read_mostly = 1;
29 30
31/* Global reference count of probes */
32static DEFINE_MUTEX(blk_probe_mutex);
33static atomic_t blk_probes_ref = ATOMIC_INIT(0);
34
35static int blk_register_tracepoints(void);
36static void blk_unregister_tracepoints(void);
37
30/* 38/*
31 * Send out a notify message. 39 * Send out a notify message.
32 */ 40 */
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
119 * The worker for the various blk_add_trace*() types. Fills out a 127 * The worker for the various blk_add_trace*() types. Fills out a
120 * blk_io_trace structure and places it in a per-cpu subbuffer. 128 * blk_io_trace structure and places it in a per-cpu subbuffer.
121 */ 129 */
122void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 130static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
123 int rw, u32 what, int error, int pdu_len, void *pdu_data) 131 int rw, u32 what, int error, int pdu_len, void *pdu_data)
124{ 132{
125 struct task_struct *tsk = current; 133 struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
177 local_irq_restore(flags); 185 local_irq_restore(flags);
178} 186}
179 187
180EXPORT_SYMBOL_GPL(__blk_add_trace);
181
182static struct dentry *blk_tree_root; 188static struct dentry *blk_tree_root;
183static DEFINE_MUTEX(blk_tree_mutex); 189static DEFINE_MUTEX(blk_tree_mutex);
184static unsigned int root_users; 190static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
237 free_percpu(bt->sequence); 243 free_percpu(bt->sequence);
238 free_percpu(bt->msg_data); 244 free_percpu(bt->msg_data);
239 kfree(bt); 245 kfree(bt);
246 mutex_lock(&blk_probe_mutex);
247 if (atomic_dec_and_test(&blk_probes_ref))
248 blk_unregister_tracepoints();
249 mutex_unlock(&blk_probe_mutex);
240} 250}
241 251
242int blk_trace_remove(struct request_queue *q) 252int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
428 bt->pid = buts->pid; 438 bt->pid = buts->pid;
429 bt->trace_state = Blktrace_setup; 439 bt->trace_state = Blktrace_setup;
430 440
441 mutex_lock(&blk_probe_mutex);
442 if (atomic_add_return(1, &blk_probes_ref) == 1) {
443 ret = blk_register_tracepoints();
444 if (ret)
445 goto probe_err;
446 }
447 mutex_unlock(&blk_probe_mutex);
448
431 ret = -EBUSY; 449 ret = -EBUSY;
432 old_bt = xchg(&q->blk_trace, bt); 450 old_bt = xchg(&q->blk_trace, bt);
433 if (old_bt) { 451 if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
436 } 454 }
437 455
438 return 0; 456 return 0;
457probe_err:
458 atomic_dec(&blk_probes_ref);
459 mutex_unlock(&blk_probe_mutex);
439err: 460err:
440 if (dir) 461 if (dir)
441 blk_remove_tree(dir); 462 blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
562 blk_trace_remove(q); 583 blk_trace_remove(q);
563 } 584 }
564} 585}
586
587/*
588 * blktrace probes
589 */
590
591/**
592 * blk_add_trace_rq - Add a trace for a request oriented action
593 * @q: queue the io is for
594 * @rq: the source request
595 * @what: the action
596 *
597 * Description:
598 * Records an action against a request. Will log the bio offset + size.
599 *
600 **/
601static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
602 u32 what)
603{
604 struct blk_trace *bt = q->blk_trace;
605 int rw = rq->cmd_flags & 0x03;
606
607 if (likely(!bt))
608 return;
609
610 if (blk_discard_rq(rq))
611 rw |= (1 << BIO_RW_DISCARD);
612
613 if (blk_pc_request(rq)) {
614 what |= BLK_TC_ACT(BLK_TC_PC);
615 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
616 sizeof(rq->cmd), rq->cmd);
617 } else {
618 what |= BLK_TC_ACT(BLK_TC_FS);
619 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
620 rw, what, rq->errors, 0, NULL);
621 }
622}
623
624static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
625{
626 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
627}
628
629static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
630{
631 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
632}
633
634static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
635{
636 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
637}
638
639static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
640{
641 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
642}
643
644static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
645{
646 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
647}
648
649/**
650 * blk_add_trace_bio - Add a trace for a bio oriented action
651 * @q: queue the io is for
652 * @bio: the source bio
653 * @what: the action
654 *
655 * Description:
656 * Records an action against a bio. Will log the bio offset + size.
657 *
658 **/
659static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
660 u32 what)
661{
662 struct blk_trace *bt = q->blk_trace;
663
664 if (likely(!bt))
665 return;
666
667 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
668 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
669}
670
671static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
672{
673 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
674}
675
676static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
677{
678 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
679}
680
681static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
682{
683 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
684}
685
686static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
687{
688 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
689}
690
691static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
692{
693 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
694}
695
696static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
697{
698 if (bio)
699 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
700 else {
701 struct blk_trace *bt = q->blk_trace;
702
703 if (bt)
704 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
705 }
706}
707
708
709static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
710{
711 if (bio)
712 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
713 else {
714 struct blk_trace *bt = q->blk_trace;
715
716 if (bt)
717 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
718 }
719}
720
721static void blk_add_trace_plug(struct request_queue *q)
722{
723 struct blk_trace *bt = q->blk_trace;
724
725 if (bt)
726 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
727}
728
729static void blk_add_trace_unplug_io(struct request_queue *q)
730{
731 struct blk_trace *bt = q->blk_trace;
732
733 if (bt) {
734 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
735 __be64 rpdu = cpu_to_be64(pdu);
736
737 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
738 sizeof(rpdu), &rpdu);
739 }
740}
741
742static void blk_add_trace_unplug_timer(struct request_queue *q)
743{
744 struct blk_trace *bt = q->blk_trace;
745
746 if (bt) {
747 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
748 __be64 rpdu = cpu_to_be64(pdu);
749
750 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
751 sizeof(rpdu), &rpdu);
752 }
753}
754
755static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
756 unsigned int pdu)
757{
758 struct blk_trace *bt = q->blk_trace;
759
760 if (bt) {
761 __be64 rpdu = cpu_to_be64(pdu);
762
763 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
764 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
765 sizeof(rpdu), &rpdu);
766 }
767}
768
769/**
770 * blk_add_trace_remap - Add a trace for a remap operation
771 * @q: queue the io is for
772 * @bio: the source bio
773 * @dev: target device
774 * @from: source sector
775 * @to: target sector
776 *
777 * Description:
778 * Device mapper or raid target sometimes need to split a bio because
779 * it spans a stripe (or similar). Add a trace for that action.
780 *
781 **/
782static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
783 dev_t dev, sector_t from, sector_t to)
784{
785 struct blk_trace *bt = q->blk_trace;
786 struct blk_io_trace_remap r;
787
788 if (likely(!bt))
789 return;
790
791 r.device = cpu_to_be32(dev);
792 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
793 r.sector = cpu_to_be64(to);
794
795 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
796 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
797}
798
799/**
800 * blk_add_driver_data - Add binary message with driver-specific data
801 * @q: queue the io is for
802 * @rq: io request
803 * @data: driver-specific data
804 * @len: length of driver-specific data
805 *
806 * Description:
807 * Some drivers might want to write driver-specific data per request.
808 *
809 **/
810void blk_add_driver_data(struct request_queue *q,
811 struct request *rq,
812 void *data, size_t len)
813{
814 struct blk_trace *bt = q->blk_trace;
815
816 if (likely(!bt))
817 return;
818
819 if (blk_pc_request(rq))
820 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
821 rq->errors, len, data);
822 else
823 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
824 0, BLK_TA_DRV_DATA, rq->errors, len, data);
825}
826EXPORT_SYMBOL_GPL(blk_add_driver_data);
827
828static int blk_register_tracepoints(void)
829{
830 int ret;
831
832 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
833 WARN_ON(ret);
834 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
835 WARN_ON(ret);
836 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
837 WARN_ON(ret);
838 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
839 WARN_ON(ret);
840 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
841 WARN_ON(ret);
842 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
843 WARN_ON(ret);
844 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
845 WARN_ON(ret);
846 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
847 WARN_ON(ret);
848 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
849 WARN_ON(ret);
850 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
851 WARN_ON(ret);
852 ret = register_trace_block_getrq(blk_add_trace_getrq);
853 WARN_ON(ret);
854 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
855 WARN_ON(ret);
856 ret = register_trace_block_plug(blk_add_trace_plug);
857 WARN_ON(ret);
858 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
859 WARN_ON(ret);
860 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
861 WARN_ON(ret);
862 ret = register_trace_block_split(blk_add_trace_split);
863 WARN_ON(ret);
864 ret = register_trace_block_remap(blk_add_trace_remap);
865 WARN_ON(ret);
866 return 0;
867}
868
869static void blk_unregister_tracepoints(void)
870{
871 unregister_trace_block_remap(blk_add_trace_remap);
872 unregister_trace_block_split(blk_add_trace_split);
873 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
874 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
875 unregister_trace_block_plug(blk_add_trace_plug);
876 unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
877 unregister_trace_block_getrq(blk_add_trace_getrq);
878 unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
879 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
880 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
881 unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
882 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
883 unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
884 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
885 unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
886 unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
887 unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
888
889 tracepoint_synchronize_unregister();
890}
diff --git a/block/elevator.c b/block/elevator.c
index 9ac82dde99dd..e5677fe4f412 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
33#include <linux/compiler.h> 33#include <linux/compiler.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <trace/block.h>
36#include <linux/hash.h> 37#include <linux/hash.h>
37#include <linux/uaccess.h> 38#include <linux/uaccess.h>
38 39
@@ -41,6 +42,8 @@
41static DEFINE_SPINLOCK(elv_list_lock); 42static DEFINE_SPINLOCK(elv_list_lock);
42static LIST_HEAD(elv_list); 43static LIST_HEAD(elv_list);
43 44
45DEFINE_TRACE(block_rq_abort);
46
44/* 47/*
45 * Merge hash stuff. 48 * Merge hash stuff.
46 */ 49 */
@@ -52,6 +55,9 @@ static const int elv_hash_shift = 6;
52#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 55#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
53#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 56#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
54 57
58DEFINE_TRACE(block_rq_insert);
59DEFINE_TRACE(block_rq_issue);
60
55/* 61/*
56 * Query io scheduler to see if the current process issuing bio may be 62 * Query io scheduler to see if the current process issuing bio may be
57 * merged with rq. 63 * merged with rq.
@@ -586,7 +592,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
586 unsigned ordseq; 592 unsigned ordseq;
587 int unplug_it = 1; 593 int unplug_it = 1;
588 594
589 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 595 trace_block_rq_insert(q, rq);
590 596
591 rq->q = q; 597 rq->q = q;
592 598
@@ -772,7 +778,7 @@ struct request *elv_next_request(struct request_queue *q)
772 * not be passed by new incoming requests 778 * not be passed by new incoming requests
773 */ 779 */
774 rq->cmd_flags |= REQ_STARTED; 780 rq->cmd_flags |= REQ_STARTED;
775 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 781 trace_block_rq_issue(q, rq);
776 } 782 }
777 783
778 if (!q->boundary_rq || q->boundary_rq == rq) { 784 if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -921,7 +927,7 @@ void elv_abort_queue(struct request_queue *q)
921 while (!list_empty(&q->queue_head)) { 927 while (!list_empty(&q->queue_head)) {
922 rq = list_entry_rq(q->queue_head.next); 928 rq = list_entry_rq(q->queue_head.next);
923 rq->cmd_flags |= REQ_QUIET; 929 rq->cmd_flags |= REQ_QUIET;
924 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 930 trace_block_rq_abort(q, rq);
925 __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 931 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
926 } 932 }
927} 933}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c99e4728ff41..343094c3feeb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -21,6 +21,7 @@
21#include <linux/idr.h> 21#include <linux/idr.h>
22#include <linux/hdreg.h> 22#include <linux/hdreg.h>
23#include <linux/blktrace_api.h> 23#include <linux/blktrace_api.h>
24#include <trace/block.h>
24 25
25#define DM_MSG_PREFIX "core" 26#define DM_MSG_PREFIX "core"
26 27
@@ -51,6 +52,8 @@ struct dm_target_io {
51 union map_info info; 52 union map_info info;
52}; 53};
53 54
55DEFINE_TRACE(block_bio_complete);
56
54union map_info *dm_get_mapinfo(struct bio *bio) 57union map_info *dm_get_mapinfo(struct bio *bio)
55{ 58{
56 if (bio && bio->bi_private) 59 if (bio && bio->bi_private)
@@ -504,8 +507,7 @@ static void dec_pending(struct dm_io *io, int error)
504 end_io_acct(io); 507 end_io_acct(io);
505 508
506 if (io->error != DM_ENDIO_REQUEUE) { 509 if (io->error != DM_ENDIO_REQUEUE) {
507 blk_add_trace_bio(io->md->queue, io->bio, 510 trace_block_bio_complete(io->md->queue, io->bio);
508 BLK_TA_COMPLETE);
509 511
510 bio_endio(io->bio, io->error); 512 bio_endio(io->bio, io->error);
511 } 513 }
@@ -598,7 +600,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
598 if (r == DM_MAPIO_REMAPPED) { 600 if (r == DM_MAPIO_REMAPPED) {
599 /* the bio has been remapped so dispatch it */ 601 /* the bio has been remapped so dispatch it */
600 602
601 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 603 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
602 tio->io->bio->bi_bdev->bd_dev, 604 tio->io->bio->bi_bdev->bd_dev,
603 clone->bi_sector, sector); 605 clone->bi_sector, sector);
604 606
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceedb..df99c882b807 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,8 +26,11 @@
26#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <trace/block.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
30 31
32DEFINE_TRACE(block_split);
33
31static struct kmem_cache *bio_slab __read_mostly; 34static struct kmem_cache *bio_slab __read_mostly;
32 35
33static mempool_t *bio_split_pool __read_mostly; 36static mempool_t *bio_split_pool __read_mostly;
@@ -1263,7 +1266,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1263 if (!bp) 1266 if (!bp)
1264 return bp; 1267 return bp;
1265 1268
1266 blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, 1269 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1267 bi->bi_sector + first_sectors); 1270 bi->bi_sector + first_sectors);
1268 1271
1269 BUG_ON(bi->bi_vcnt != 1); 1272 BUG_ON(bi->bi_vcnt != 1);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index bdf505d33e77..1dba3493d520 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -160,7 +160,6 @@ struct blk_trace {
160 160
161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 161extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
162extern void blk_trace_shutdown(struct request_queue *); 162extern void blk_trace_shutdown(struct request_queue *);
163extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
164extern int do_blk_trace_setup(struct request_queue *q, 163extern int do_blk_trace_setup(struct request_queue *q,
165 char *name, dev_t dev, struct blk_user_trace_setup *buts); 164 char *name, dev_t dev, struct blk_user_trace_setup *buts);
166extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); 165extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
@@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
186 } while (0) 185 } while (0)
187#define BLK_TN_MAX_MSG 128 186#define BLK_TN_MAX_MSG 128
188 187
189/** 188extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
190 * blk_add_trace_rq - Add a trace for a request oriented action 189 void *data, size_t len);
191 * @q: queue the io is for
192 * @rq: the source request
193 * @what: the action
194 *
195 * Description:
196 * Records an action against a request. Will log the bio offset + size.
197 *
198 **/
199static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
200 u32 what)
201{
202 struct blk_trace *bt = q->blk_trace;
203 int rw = rq->cmd_flags & 0x03;
204
205 if (likely(!bt))
206 return;
207
208 if (blk_discard_rq(rq))
209 rw |= (1 << BIO_RW_DISCARD);
210
211 if (blk_pc_request(rq)) {
212 what |= BLK_TC_ACT(BLK_TC_PC);
213 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
214 } else {
215 what |= BLK_TC_ACT(BLK_TC_FS);
216 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
217 }
218}
219
220/**
221 * blk_add_trace_bio - Add a trace for a bio oriented action
222 * @q: queue the io is for
223 * @bio: the source bio
224 * @what: the action
225 *
226 * Description:
227 * Records an action against a bio. Will log the bio offset + size.
228 *
229 **/
230static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
231 u32 what)
232{
233 struct blk_trace *bt = q->blk_trace;
234
235 if (likely(!bt))
236 return;
237
238 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
239}
240
241/**
242 * blk_add_trace_generic - Add a trace for a generic action
243 * @q: queue the io is for
244 * @bio: the source bio
245 * @rw: the data direction
246 * @what: the action
247 *
248 * Description:
249 * Records a simple trace
250 *
251 **/
252static inline void blk_add_trace_generic(struct request_queue *q,
253 struct bio *bio, int rw, u32 what)
254{
255 struct blk_trace *bt = q->blk_trace;
256
257 if (likely(!bt))
258 return;
259
260 if (bio)
261 blk_add_trace_bio(q, bio, what);
262 else
263 __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
264}
265
266/**
267 * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
268 * @q: queue the io is for
269 * @what: the action
270 * @bio: the source bio
271 * @pdu: the integer payload
272 *
273 * Description:
274 * Adds a trace with some integer payload. This might be an unplug
275 * option given as the action, with the depth at unplug time given
276 * as the payload
277 *
278 **/
279static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
280 struct bio *bio, unsigned int pdu)
281{
282 struct blk_trace *bt = q->blk_trace;
283 __be64 rpdu = cpu_to_be64(pdu);
284
285 if (likely(!bt))
286 return;
287
288 if (bio)
289 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
290 else
291 __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
292}
293
294/**
295 * blk_add_trace_remap - Add a trace for a remap operation
296 * @q: queue the io is for
297 * @bio: the source bio
298 * @dev: target device
299 * @from: source sector
300 * @to: target sector
301 *
302 * Description:
303 * Device mapper or raid target sometimes need to split a bio because
304 * it spans a stripe (or similar). Add a trace for that action.
305 *
306 **/
307static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
308 dev_t dev, sector_t from, sector_t to)
309{
310 struct blk_trace *bt = q->blk_trace;
311 struct blk_io_trace_remap r;
312
313 if (likely(!bt))
314 return;
315
316 r.device = cpu_to_be32(dev);
317 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
318 r.sector = cpu_to_be64(to);
319
320 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
321}
322
323/**
324 * blk_add_driver_data - Add binary message with driver-specific data
325 * @q: queue the io is for
326 * @rq: io request
327 * @data: driver-specific data
328 * @len: length of driver-specific data
329 *
330 * Description:
331 * Some drivers might want to write driver-specific data per request.
332 *
333 **/
334static inline void blk_add_driver_data(struct request_queue *q,
335 struct request *rq,
336 void *data, size_t len)
337{
338 struct blk_trace *bt = q->blk_trace;
339
340 if (likely(!bt))
341 return;
342
343 if (blk_pc_request(rq))
344 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
345 rq->errors, len, data);
346 else
347 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
348 0, BLK_TA_DRV_DATA, rq->errors, len, data);
349}
350
351extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 190extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
352 char __user *arg); 191 char __user *arg);
353extern int blk_trace_startstop(struct request_queue *q, int start); 192extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q);
356#else /* !CONFIG_BLK_DEV_IO_TRACE */ 195#else /* !CONFIG_BLK_DEV_IO_TRACE */
357#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 196#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
358#define blk_trace_shutdown(q) do { } while (0) 197#define blk_trace_shutdown(q) do { } while (0)
359#define blk_add_trace_rq(q, rq, what) do { } while (0)
360#define blk_add_trace_bio(q, rq, what) do { } while (0)
361#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
362#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
363#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
364#define blk_add_driver_data(q, rq, data, len) do {} while (0)
365#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) 198#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
199#define blk_add_driver_data(q, rq, data, len) do {} while (0)
366#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 200#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
367#define blk_trace_startstop(q, start) (-ENOTTY) 201#define blk_trace_startstop(q, start) (-ENOTTY)
368#define blk_trace_remove(q) (-ENOTTY) 202#define blk_trace_remove(q) (-ENOTTY)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f9792c0d73f6..afba918c623c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -316,6 +316,35 @@ ftrace_init_module(struct module *mod,
316 unsigned long *start, unsigned long *end) { } 316 unsigned long *start, unsigned long *end) { }
317#endif 317#endif
318 318
319enum {
320 POWER_NONE = 0,
321 POWER_CSTATE = 1,
322 POWER_PSTATE = 2,
323};
324
325struct power_trace {
326#ifdef CONFIG_POWER_TRACER
327 ktime_t stamp;
328 ktime_t end;
329 int type;
330 int state;
331#endif
332};
333
334#ifdef CONFIG_POWER_TRACER
335extern void trace_power_start(struct power_trace *it, unsigned int type,
336 unsigned int state);
337extern void trace_power_mark(struct power_trace *it, unsigned int type,
338 unsigned int state);
339extern void trace_power_end(struct power_trace *it);
340#else
341static inline void trace_power_start(struct power_trace *it, unsigned int type,
342 unsigned int state) { }
343static inline void trace_power_mark(struct power_trace *it, unsigned int type,
344 unsigned int state) { }
345static inline void trace_power_end(struct power_trace *it) { }
346#endif
347
319 348
320/* 349/*
321 * Structure that defines an entry function trace. 350 * Structure that defines an entry function trace.
diff --git a/include/trace/block.h b/include/trace/block.h
new file mode 100644
index 000000000000..25c6a1fd5b77
--- /dev/null
+++ b/include/trace/block.h
@@ -0,0 +1,76 @@
1#ifndef _TRACE_BLOCK_H
2#define _TRACE_BLOCK_H
3
4#include <linux/blkdev.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(block_rq_abort,
8 TPPROTO(struct request_queue *q, struct request *rq),
9 TPARGS(q, rq));
10
11DECLARE_TRACE(block_rq_insert,
12 TPPROTO(struct request_queue *q, struct request *rq),
13 TPARGS(q, rq));
14
15DECLARE_TRACE(block_rq_issue,
16 TPPROTO(struct request_queue *q, struct request *rq),
17 TPARGS(q, rq));
18
19DECLARE_TRACE(block_rq_requeue,
20 TPPROTO(struct request_queue *q, struct request *rq),
21 TPARGS(q, rq));
22
23DECLARE_TRACE(block_rq_complete,
24 TPPROTO(struct request_queue *q, struct request *rq),
25 TPARGS(q, rq));
26
27DECLARE_TRACE(block_bio_bounce,
28 TPPROTO(struct request_queue *q, struct bio *bio),
29 TPARGS(q, bio));
30
31DECLARE_TRACE(block_bio_complete,
32 TPPROTO(struct request_queue *q, struct bio *bio),
33 TPARGS(q, bio));
34
35DECLARE_TRACE(block_bio_backmerge,
36 TPPROTO(struct request_queue *q, struct bio *bio),
37 TPARGS(q, bio));
38
39DECLARE_TRACE(block_bio_frontmerge,
40 TPPROTO(struct request_queue *q, struct bio *bio),
41 TPARGS(q, bio));
42
43DECLARE_TRACE(block_bio_queue,
44 TPPROTO(struct request_queue *q, struct bio *bio),
45 TPARGS(q, bio));
46
47DECLARE_TRACE(block_getrq,
48 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
49 TPARGS(q, bio, rw));
50
51DECLARE_TRACE(block_sleeprq,
52 TPPROTO(struct request_queue *q, struct bio *bio, int rw),
53 TPARGS(q, bio, rw));
54
55DECLARE_TRACE(block_plug,
56 TPPROTO(struct request_queue *q),
57 TPARGS(q));
58
59DECLARE_TRACE(block_unplug_timer,
60 TPPROTO(struct request_queue *q),
61 TPARGS(q));
62
63DECLARE_TRACE(block_unplug_io,
64 TPPROTO(struct request_queue *q),
65 TPARGS(q));
66
67DECLARE_TRACE(block_split,
68 TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
69 TPARGS(q, bio, pdu));
70
71DECLARE_TRACE(block_remap,
72 TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to),
74 TPARGS(q, bio, dev, from, to));
75
76#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index eb9b901e0777..8b6b673b4d6c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -220,6 +220,17 @@ config BRANCH_TRACER
220 220
221 Say N if unsure. 221 Say N if unsure.
222 222
223config POWER_TRACER
224 bool "Trace power consumption behavior"
225 depends on DEBUG_KERNEL
226 depends on X86
227 select TRACING
228 help
229 This tracer helps developers to analyze and optimize the kernels
230 power management decisions, specifically the C-state and P-state
231 behavior.
232
233
223config STACK_TRACER 234config STACK_TRACER
224 bool "Trace max stack" 235 bool "Trace max stack"
225 depends on HAVE_FUNCTION_TRACER 236 depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 08c5fe6ddc09..62dc561b6676 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -32,5 +32,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_BTS_TRACER) += trace_bts.o 34obj-$(CONFIG_BTS_TRACER) += trace_bts.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o
35 36
36libftrace-y := ftrace.o 37libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7adacf349ef7..f96f4e787ff3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -29,6 +29,7 @@ enum trace_type {
29 TRACE_GRAPH_ENT, 29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK, 30 TRACE_USER_STACK,
31 TRACE_BTS, 31 TRACE_BTS,
32 TRACE_POWER,
32 33
33 __TRACE_LAST_TYPE 34 __TRACE_LAST_TYPE
34}; 35};
@@ -163,6 +164,11 @@ struct bts_entry {
163 unsigned long to; 164 unsigned long to;
164}; 165};
165 166
167struct trace_power {
168 struct trace_entry ent;
169 struct power_trace state_data;
170};
171
166/* 172/*
167 * trace_flag_type is an enumeration that holds different 173 * trace_flag_type is an enumeration that holds different
168 * states when a trace occurs. These are: 174 * states when a trace occurs. These are:
@@ -272,6 +278,7 @@ extern void __ftrace_bad_type(void);
272 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 278 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
273 TRACE_GRAPH_RET); \ 279 TRACE_GRAPH_RET); \
274 IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ 280 IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
281 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
275 __ftrace_bad_type(); \ 282 __ftrace_bad_type(); \
276 } while (0) 283 } while (0)
277 284
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <linux/ftrace.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19
20static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled;
22
23
24static void start_power_trace(struct trace_array *tr)
25{
26 trace_power_enabled = 1;
27}
28
29static void stop_power_trace(struct trace_array *tr)
30{
31 trace_power_enabled = 0;
32}
33
34
35static int power_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 power_trace = tr;
39
40 trace_power_enabled = 1;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44 return 0;
45}
46
47static enum print_line_t power_print_line(struct trace_iterator *iter)
48{
49 int ret = 0;
50 struct trace_entry *entry = iter->ent;
51 struct trace_power *field ;
52 struct power_trace *it;
53 struct trace_seq *s = &iter->seq;
54 struct timespec stamp;
55 struct timespec duration;
56
57 trace_assign_type(field, entry);
58 it = &field->state_data;
59 stamp = ktime_to_timespec(it->stamp);
60 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
61
62 if (entry->type == TRACE_POWER) {
63 if (it->type == POWER_CSTATE)
64 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
65 stamp.tv_sec,
66 stamp.tv_nsec,
67 it->state, iter->cpu,
68 duration.tv_sec,
69 duration.tv_nsec);
70 if (it->type == POWER_PSTATE)
71 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
72 stamp.tv_sec,
73 stamp.tv_nsec,
74 it->state, iter->cpu);
75 if (!ret)
76 return TRACE_TYPE_PARTIAL_LINE;
77 return TRACE_TYPE_HANDLED;
78 }
79 return TRACE_TYPE_UNHANDLED;
80}
81
82static struct tracer power_tracer __read_mostly =
83{
84 .name = "power",
85 .init = power_trace_init,
86 .start = start_power_trace,
87 .stop = stop_power_trace,
88 .reset = stop_power_trace,
89 .print_line = power_print_line,
90};
91
92static int init_power_trace(void)
93{
94 return register_tracer(&power_tracer);
95}
96device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c403058..bf0cf7c8387b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
14#include <linux/hash.h> 14#include <linux/hash.h>
15#include <linux/highmem.h> 15#include <linux/highmem.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
17#include <trace/block.h>
17#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
18 19
19#define POOL_SIZE 64 20#define POOL_SIZE 64
@@ -21,6 +22,8 @@
21 22
22static mempool_t *page_pool, *isa_page_pool; 23static mempool_t *page_pool, *isa_page_pool;
23 24
25DEFINE_TRACE(block_bio_bounce);
26
24#ifdef CONFIG_HIGHMEM 27#ifdef CONFIG_HIGHMEM
25static __init int init_emergency_pool(void) 28static __init int init_emergency_pool(void)
26{ 29{
@@ -222,7 +225,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
222 if (!bio) 225 if (!bio)
223 return; 226 return;
224 227
225 blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); 228 trace_block_bio_bounce(q, *bio_orig);
226 229
227 /* 230 /*
228 * at least one page was bounced, fill in possible non-highmem 231 * at least one page was bounced, fill in possible non-highmem
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 0197e2f6b544..0b1dc9f9bb06 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -112,6 +112,8 @@ my ($arch, $bits, $objdump, $objcopy, $cc,
112# Acceptable sections to record. 112# Acceptable sections to record.
113my %text_sections = ( 113my %text_sections = (
114 ".text" => 1, 114 ".text" => 1,
115 ".sched.text" => 1,
116 ".spinlock.text" => 1,
115); 117);
116 118
117$objdump = "objdump" if ((length $objdump) == 0); 119$objdump = "objdump" if ((length $objdump) == 0);
diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl
new file mode 100644
index 000000000000..4f729b3501e0
--- /dev/null
+++ b/scripts/trace/power.pl
@@ -0,0 +1,108 @@
1#!/usr/bin/perl
2
3# Copyright 2008, Intel Corporation
4#
5# This file is part of the Linux kernel
6#
7# This program file is free software; you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by the
9# Free Software Foundation; version 2 of the License.
10#
11# This program is distributed in the hope that it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14# for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program in a file named COPYING; if not, write to the
18# Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301 USA
21#
22# Authors:
23# Arjan van de Ven <arjan@linux.intel.com>
24
25
26#
27# This script turns a cstate ftrace output into a SVG graphic that shows
28# historic C-state information
29#
30#
31# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
32#
33
34my @styles;
35my $base = 0;
36
37my @pstate_last;
38my @pstate_level;
39
40$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
41$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
42$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
43$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
44$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
45$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
46$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
47$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
48$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
49
50
51print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
52print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
53
54my $scale = 30000.0;
55while (<>) {
56 my $line = $_;
57 if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
58 if ($base == 0) {
59 $base = $1;
60 }
61 my $time = $1 - $base;
62 $time = $time * $scale;
63 my $C = $2;
64 my $cpu = $3;
65 my $y = 400 * $cpu;
66 my $duration = $4 * $scale;
67 my $msec = int($4 * 100000)/100.0;
68 my $height = $C * 20;
69 $style = $styles[$C];
70
71 $y = $y + 140 - $height;
72
73 $x2 = $time + 4;
74 $y2 = $y + 4;
75
76
77 print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
78 print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
79 }
80 if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
81 my $time = $1 - $base;
82 my $state = $2;
83 my $cpu = $3;
84
85 if (defined($pstate_last[$cpu])) {
86 my $from = $pstate_last[$cpu];
87 my $oldstate = $pstate_state[$cpu];
88 my $duration = ($time-$from) * $scale;
89
90 $from = $from * $scale;
91 my $to = $from + $duration;
92 my $height = 140 - ($oldstate * (140/8));
93
94 my $y = 400 * $cpu + 200 + $height;
95 my $y2 = $y+4;
96 my $style = $styles[8];
97
98 print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
99 print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
100 };
101
102 $pstate_last[$cpu] = $time;
103 $pstate_state[$cpu] = $state;
104 }
105}
106
107
108print "</svg>\n";