aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
commit0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch)
tree177c4cb22ece78b18f64f548ae82b9a15edbb99c /kernel/trace
parent2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff)
parente37459b8e2c7db6735e39e019e448b76e5e77647 (diff)
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe: "This is the pull request for the core changes in the block layer for 3.13. It contains: - The new blk-mq request interface. This is a new and more scalable queueing model that marries the best part of the request based interface we currently have (which is fully featured, but scales poorly) and the bio based "interface" which the new drivers for high IOPS devices end up using because it's much faster than the request based one. The bio interface has no block layer support, since it taps into the stack much earlier. This means that drivers end up having to implement a lot of functionality on their own, like tagging, timeout handling, requeue, etc. The blk-mq interface provides all these. Some drivers even provide a switch to select bio or rq and has code to handle both, since things like merging only works in the rq model and hence is faster for some workloads. This is a huge mess. Conversion of these drivers nets us a substantial code reduction. Initial results on converting SCSI to this model even shows an 8x improvement on single queue devices. So while the model was intended to work on the newer multiqueue devices, it has substantial improvements for "classic" hardware as well. This code has gone through extensive testing and development, it's now ready to go. A pull request is coming to convert virtio-blk to this model will be will be coming as well, with more drivers scheduled for 3.14 conversion. - Two blktrace fixes from Jan and Chen Gang. - A plug merge fix from Alireza Haghdoost. - Conversion of __get_cpu_var() from Christoph Lameter. - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven. - A fix for a race between request completion and the timeout handling from Jeff Moyer. This is what caused the merge conflict with blk-mq/core, in case you are looking at that. - A dm stacking fix from Mike Snitzer. - A code consolidation fix and duplicated code removal from Kent Overstreet. - A handful of block bug fixes from Mikulas Patocka, fixing a loop crash and memory corruption on blk cg. - Elevator switch bug fix from Tomoki Sekiyama. A heads-up that I had to rebase this branch. Initially the immutable bio_vecs had been queued up for inclusion, but a week later, it became clear that it wasn't fully cooked yet. So the decision was made to pull this out and postpone it until 3.14. It was a straight forward rebase, just pruning out the immutable series and the later fixes of problems with it. The rest of the patches applied directly and no further changes were made" * 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits) block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: Do not call sector_div() with a 64-bit divisor kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup() block: Consolidate duplicated bio_trim() implementations block: Use rw_copy_check_uvector() block: Enable sysfs nomerge control for I/O requests in the plug list block: properly stack underlying max_segment_size to DM device elevator: acquire q->sysfs_lock in elevator_change() elevator: Fix a race in elevator switching and md device initialization block: Replace __get_cpu_var uses bdi: test bdi_init failure block: fix a probe argument to blk_register_region loop: fix crash if blk_alloc_queue fails blk-core: Fix memory corruption if blkcg_init_queue fails block: fix race between request completion and timeout handling blktrace: Send BLK_TN_PROCESS events to all running traces blk-mq: don't disallow request merges for req->special being set blk-mq: mq plug list breakage blk-mq: fix for flush deadlock ...
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/blktrace.c36
1 files changed, 28 insertions, 8 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b8b8560bfb95..f785aef65799 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -26,6 +26,7 @@
26#include <linux/export.h> 26#include <linux/export.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/list.h>
29 30
30#include <trace/events/block.h> 31#include <trace/events/block.h>
31 32
@@ -38,6 +39,9 @@ static unsigned int blktrace_seq __read_mostly = 1;
38static struct trace_array *blk_tr; 39static struct trace_array *blk_tr;
39static bool blk_tracer_enabled __read_mostly; 40static bool blk_tracer_enabled __read_mostly;
40 41
42static LIST_HEAD(running_trace_list);
43static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
44
41/* Select an alternative, minimalistic output than the original one */ 45/* Select an alternative, minimalistic output than the original one */
42#define TRACE_BLK_OPT_CLASSIC 0x1 46#define TRACE_BLK_OPT_CLASSIC 0x1
43 47
@@ -107,10 +111,18 @@ record_it:
107 * Send out a notify for this process, if we haven't done so since a trace 111 * Send out a notify for this process, if we haven't done so since a trace
108 * started 112 * started
109 */ 113 */
110static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) 114static void trace_note_tsk(struct task_struct *tsk)
111{ 115{
116 unsigned long flags;
117 struct blk_trace *bt;
118
112 tsk->btrace_seq = blktrace_seq; 119 tsk->btrace_seq = blktrace_seq;
113 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); 120 spin_lock_irqsave(&running_trace_lock, flags);
121 list_for_each_entry(bt, &running_trace_list, running_list) {
122 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
123 sizeof(tsk->comm));
124 }
125 spin_unlock_irqrestore(&running_trace_lock, flags);
114} 126}
115 127
116static void trace_note_time(struct blk_trace *bt) 128static void trace_note_time(struct blk_trace *bt)
@@ -229,16 +241,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
229 goto record_it; 241 goto record_it;
230 } 242 }
231 243
244 if (unlikely(tsk->btrace_seq != blktrace_seq))
245 trace_note_tsk(tsk);
246
232 /* 247 /*
233 * A word about the locking here - we disable interrupts to reserve 248 * A word about the locking here - we disable interrupts to reserve
234 * some space in the relay per-cpu buffer, to prevent an irq 249 * some space in the relay per-cpu buffer, to prevent an irq
235 * from coming in and stepping on our toes. 250 * from coming in and stepping on our toes.
236 */ 251 */
237 local_irq_save(flags); 252 local_irq_save(flags);
238
239 if (unlikely(tsk->btrace_seq != blktrace_seq))
240 trace_note_tsk(bt, tsk);
241
242 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 253 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
243 if (t) { 254 if (t) {
244 sequence = per_cpu_ptr(bt->sequence, cpu); 255 sequence = per_cpu_ptr(bt->sequence, cpu);
@@ -477,6 +488,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
477 bt->dir = dir; 488 bt->dir = dir;
478 bt->dev = dev; 489 bt->dev = dev;
479 atomic_set(&bt->dropped, 0); 490 atomic_set(&bt->dropped, 0);
491 INIT_LIST_HEAD(&bt->running_list);
480 492
481 ret = -EIO; 493 ret = -EIO;
482 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, 494 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
@@ -567,13 +579,12 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
567 .end_lba = cbuts.end_lba, 579 .end_lba = cbuts.end_lba,
568 .pid = cbuts.pid, 580 .pid = cbuts.pid,
569 }; 581 };
570 memcpy(&buts.name, &cbuts.name, 32);
571 582
572 ret = do_blk_trace_setup(q, name, dev, bdev, &buts); 583 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
573 if (ret) 584 if (ret)
574 return ret; 585 return ret;
575 586
576 if (copy_to_user(arg, &buts.name, 32)) { 587 if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
577 blk_trace_remove(q); 588 blk_trace_remove(q);
578 return -EFAULT; 589 return -EFAULT;
579 } 590 }
@@ -601,6 +612,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
601 blktrace_seq++; 612 blktrace_seq++;
602 smp_mb(); 613 smp_mb();
603 bt->trace_state = Blktrace_running; 614 bt->trace_state = Blktrace_running;
615 spin_lock_irq(&running_trace_lock);
616 list_add(&bt->running_list, &running_trace_list);
617 spin_unlock_irq(&running_trace_lock);
604 618
605 trace_note_time(bt); 619 trace_note_time(bt);
606 ret = 0; 620 ret = 0;
@@ -608,6 +622,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
608 } else { 622 } else {
609 if (bt->trace_state == Blktrace_running) { 623 if (bt->trace_state == Blktrace_running) {
610 bt->trace_state = Blktrace_stopped; 624 bt->trace_state = Blktrace_stopped;
625 spin_lock_irq(&running_trace_lock);
626 list_del_init(&bt->running_list);
627 spin_unlock_irq(&running_trace_lock);
611 relay_flush(bt->rchan); 628 relay_flush(bt->rchan);
612 ret = 0; 629 ret = 0;
613 } 630 }
@@ -1472,6 +1489,9 @@ static int blk_trace_remove_queue(struct request_queue *q)
1472 if (atomic_dec_and_test(&blk_probes_ref)) 1489 if (atomic_dec_and_test(&blk_probes_ref))
1473 blk_unregister_tracepoints(); 1490 blk_unregister_tracepoints();
1474 1491
1492 spin_lock_irq(&running_trace_lock);
1493 list_del(&bt->running_list);
1494 spin_unlock_irq(&running_trace_lock);
1475 blk_trace_free(bt); 1495 blk_trace_free(bt);
1476 return 0; 1496 return 0;
1477} 1497}