From 548bc8e1b38e48653a90f48f636f8d253504f8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:13 -0800 Subject: block: RCU free request_queue RCU free request_queue so that blkcg_gq->q can be dereferenced under RCU lock. This will be used to implement hierarchical stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..406343c43cda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.2 From 242d98f077ac0ab80920219769eb095503b93f61 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 17 Dec 2012 10:01:27 -0500 Subject: block,elevator: use new hashtable implementation Switch elevator to use the new hashtable implementation. This reduces the amount of generic unrelated code in the elevator. This also removes the dymanic allocation of the hash table. The size of the table is constant so there's no point in paying the price of an extra dereference when accessing it. This patch depends on d9b482c ("hashtable: introduce a small and naive hashtable") which was merged in v3.6. Signed-off-by: Sasha Levin Signed-off-by: Jens Axboe --- include/linux/elevator.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c03af7687bb4..7c5a7c9789ee 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -2,6 +2,7 @@ #define _LINUX_ELEVATOR_H #include +#include #ifdef CONFIG_BLOCK @@ -96,6 +97,8 @@ struct elevator_type struct list_head list; }; +#define ELV_HASH_BITS 6 + /* * each queue has an elevator_queue associated with it */ @@ -105,8 +108,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; - struct hlist_head *hash; unsigned int registered:1; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* -- cgit v1.2.2 From 422765c2638924da10ff363b5eed77924911bdc7 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Fri, 11 Jan 2013 14:46:09 +0100 Subject: block: Remove should_sort judgement when flush blk_plug In commit 975927b942c932,it add blk_rq_pos to sort rq when flushing. Although this commit was used for the situation which blk_plug handled multi devices on the same time like md device. I think there must be some situations like this but only single device. So remove the should_sort judgement. Because the parameter should_sort is only for this purpose,it can delete should_sort from blk_plug. CC: Shaohua Li Signed-off-by: Jianpeng Ma Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..dbe74279f3d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -974,7 +974,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.2 From 3a366e614d0837d9fc23f78cdb1a1186ebc3387f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:33 -0800 Subject: block: add missing block_bio_complete() tracepoint bio completion didn't kick block_bio_complete TP. Only dm was explicitly triggering the TP on IO completion. This makes block_bio_complete TP useless for tracers which want to know about bios, and all other bio based drivers skip generating blktrace completion events. This patch makes all bio completions via bio_endio() generate block_bio_complete TP. * Explicit trace_block_bio_complete() invocation removed from dm and the trace point is unexported. * @rq dropped from trace_block_bio_complete(). bios may fly around w/o queue associated. Verifying and accessing the assocaited queue belongs to TP probes. * blktrace now gets both request and bio completions. Make it ignore bio completions if request completion path is happening. This makes all bio based drivers generate blktrace completion events properly and makes the block_bio_complete TP actually useful. v2: With this change, block_bio_complete TP could be invoked on sg commands which have bio's with %NULL bi_bdev. Update TP assignment code to check whether bio->bi_bdev is %NULL before dereferencing. Signed-off-by: Tejun Heo Original-patch-by: Namhyung Kim Cc: Tejun Heo Cc: Steven Rostedt Cc: Alasdair Kergon Cc: dm-devel@redhat.com Cc: Neil Brown Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 + include/trace/events/block.h | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7c2e030e72f1..0ea61e07a91c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,6 +12,7 @@ struct blk_trace { int trace_state; + bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 05c5e61f0a7c..8a168db9a645 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -206,7 +206,6 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation - * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -215,9 +214,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct bio *bio, int error), - TP_ARGS(q, bio, error), + TP_ARGS(bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -228,7 +227,8 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; -- cgit v1.2.2 From 8c1cf6bb02fda79b0a4b9bd121f6be6d4ce7a15a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:34 -0800 Subject: block: add @req to bio_{front|back}_merge tracepoints bio_{front|back}_merge tracepoints report a bio merging into an existing request but didn't specify which request the bio is being merged into. Add @req to it. This makes it impossible to share the event template with block_bio_queue - split it out. @req isn't used or exported to userland at this point and there is no userland visible behavior change. Later changes will make use of the extra parameter. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/trace/events/block.h | 45 +++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 8a168db9a645..b408f518a819 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -241,11 +241,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio, +DECLARE_EVENT_CLASS(block_bio_merge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -272,31 +272,33 @@ DECLARE_EVENT_CLASS(block_bio, /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_backmerge, +DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_frontmerge, +DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** @@ -306,11 +308,32 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, * * About to place the block IO operation @bio into queue @q. */ -DEFINE_EVENT(block_bio, block_bio_queue, +TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, RWBS_LEN ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, -- cgit v1.2.2 From f0059afd3e6e7aa1a0ffc23468b74c43d47660b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:35 -0800 Subject: buffer: make touch_buffer() an exported function We want to add a trace point to touch_buffer() but macros and inline functions defined in header files can't have tracing points. Move touch_buffer() to fs/buffer.c and make it a proper function. The new exported function is also declared inline. As most uses of touch_buffer() are inside buffer.c with nilfs2 as the only other user, the effect of this change should be negligible. Signed-off-by: Tejun Heo Cc: Steven Rostedt Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 458f497738a4..5afc4f94d110 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -#define touch_buffer(bh) mark_page_accessed(bh->b_page) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten) void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); +void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); -- cgit v1.2.2 From 5305cb830834549b9203ad4d009ad5483c5e293f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:36 -0800 Subject: block: add block_{touch|dirty}_buffer tracepoint The former is triggered from touch_buffer() and the latter mark_buffer_dirty(). This is part of tracepoint additions to improve visiblity into dirtying / writeback operations for io tracer and userland. v2: Transformed writeback_dirty_buffer to block_dirty_buffer and made it share TP definition with block_touch_buffer. Signed-off-by: Tejun Heo Cc: Fengguang Wu Signed-off-by: Jens Axboe --- include/trace/events/block.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index b408f518a819..9961726523d0 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -6,10 +6,61 @@ #include #include +#include #include #define RWBS_LEN 8 +DECLARE_EVENT_CLASS(block_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh), + + TP_STRUCT__entry ( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( size_t, size ) + ), + + TP_fast_assign( + __entry->dev = bh->b_bdev->bd_dev; + __entry->sector = bh->b_blocknr; + __entry->size = bh->b_size; + ), + + TP_printk("%d,%d sector=%llu size=%zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->sector, __entry->size + ) +); + +/** + * block_touch_buffer - mark a buffer accessed + * @bh: buffer_head being touched + * + * Called from touch_buffer(). + */ +DEFINE_EVENT(block_buffer, block_touch_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + +/** + * block_dirty_buffer - mark a buffer dirty + * @bh: buffer_head being dirtied + * + * Called from mark_buffer_dirty(). + */ +DEFINE_EVENT(block_buffer, block_dirty_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), -- cgit v1.2.2 From 9fb0a7da0c528d9bd49b597aa63b1fe2216c7203 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:37 -0800 Subject: writeback: add more tracepoints Add tracepoints for page dirtying, writeback_single_inode start, inode dirtying and writeback. For the latter two inode events, a pair of events are defined to denote start and end of the operations (the starting one has _start suffix and the one w/o suffix happens after the operation is complete). These inode ops are FS specific and can be non-trivial and having enclosing tracepoints is useful for external tracers. This is part of tracepoint additions to improve visiblity into dirtying / writeback operations for io tracer and userland. v2: writeback_dirty_inode[_start] TPs may be called for files on pseudo FSes w/ unregistered bdi. Check whether bdi->dev is %NULL before dereferencing. v3: buffer dirtying moved to a block TP. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/trace/events/writeback.h | 116 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b453d92c2253..6a16fd2e70ed 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -32,6 +32,115 @@ struct wb_writeback_work; +TRACE_EVENT(writeback_dirty_page, + + TP_PROTO(struct page *page, struct address_space *mapping), + + TP_ARGS(page, mapping), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(pgoff_t, index) + ), + + TP_fast_assign( + strncpy(__entry->name, + mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), + + TP_printk("bdi %s: ino=%lu index=%lu", + __entry->name, + __entry->ino, + __entry->index + ) +); + +DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, flags) + ), + + TP_fast_assign( + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* may be called for files on pseudo FSes w/ unregistered bdi */ + strncpy(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + __entry->ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("bdi %s: ino=%lu flags=%s", + __entry->name, + __entry->ino, + show_inode_state(__entry->flags) + ) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DECLARE_EVENT_CLASS(writeback_write_inode_template, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(int, sync_mode) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode->i_mapping->backing_dev_info->dev), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + ), + + TP_printk("bdi %s: ino=%lu sync_mode=%d", + __entry->name, + __entry->ino, + __entry->sync_mode + ) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), TP_ARGS(bdi, work), @@ -479,6 +588,13 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ) ); +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, -- cgit v1.2.2 From 686855f5d833178e518d79e7912cdb3268a9fa69 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Feb 2013 18:19:58 +0400 Subject: sched: add wait_for_completion_io[_timeout] The only difference between wait_for_completion[_timeout]() and wait_for_completion_io[_timeout]() is that the latter calls io_schedule_timeout() instead of schedule_timeout() so that the caller is accounted as waiting for IO, not just sleeping. These functions can be used for correct iowait time accounting when the completion struct is actually used for waiting for IO (e.g. completion of a bio request in the block layer). Signed-off-by: Vladimir Davydov Acked-by: Ingo Molnar Signed-off-by: Jens Axboe --- include/linux/completion.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..33f0280fd533 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( -- cgit v1.2.2