From e2e1a148bc45855816ae6b4692ce29d0020fa22e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jun 2010 10:42:09 +0200 Subject: block: add sysfs knob for turning off disk entropy contributions There are two reasons for doing this: - On SSD disks, the completion times aren't as random as they are for rotational drives. So it's questionable whether they should contribute to the random pool in the first place. - Calling add_disk_randomness() has a lot of overhead. This adds /sys/block//queue/add_random that will allow you to switch off on a per-device basis. The default setting is on, so there should be no functional changes from this patch. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 09a840264d6f..b8224ea4a5de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -467,11 +467,13 @@ struct request_queue #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ - (1 << QUEUE_FLAG_SAME_COMP)) + (1 << QUEUE_FLAG_SAME_COMP) | \ + (1 << QUEUE_FLAG_ADD_RANDOM)) static inline int queue_is_locked(struct request_queue *q) { @@ -596,6 +598,7 @@ enum { test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.2 From 41f2df62894bfcd3bf868af916b32b90aa7168dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jun 2010 08:54:16 +0200 Subject: block: BARRIER request should imply SYNC A barrier request should by defintion have priority in get_request and let the queue be unplugged immediately as it's blocking all forward progress due to the queue draining. Most filesystems already get this implicitly by the way how submit_bh treats the buffer_ordered flag, and gfs2 sets it explicitly. But btrfs and XFS are still forgetting to set the flag, as is blkdev_issue_flush and some places in DM/MD. For XFS on metadata heavy workloads this gives a consistent speedup in the 2-3% range. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 68ca1b0491af..598878831497 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -136,7 +136,7 @@ struct inodes_stat_t { * SWRITE_SYNC * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. * See SWRITE. - * WRITE_BARRIER Like WRITE, but tells the block layer that all + * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when * this write is complete, it itself is also safely on @@ -159,7 +159,7 @@ struct inodes_stat_t { #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE_SYNC | (1 << BIO_RW_BARRIER)) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.2 From bfe172310e58225f0d07f9354b683abacbd6a0d8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 31 May 2010 15:59:03 +0900 Subject: block: kill ISA_DMA_THRESHOLD usage block uses ISA_DMA_THRESHOLD for BLK_BOUNCE_ISA. Only SCSI uses ISA_DMA_THRESHOLD for ancient drivers with non-zero unchecked_isa_dma. Nowadays drivers (and subsystems) use dma_mask properly instead of ISA_DMA_THRESHOLD. Documentation/scsi/scsi_mid_low_api.txt says: unchecked_isa_dma - 1=>only use bottom 16 MB of ram (ISA DMA addressing restriction), 0=>can use full 32 bit (or better) DMA address space So block simply uses DMA_BIT_MASK(24) for BLK_BOUNCE_ISA for SCSI. Signed-off-by: FUJITA Tomonori Acked-by: James Bottomley Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b8224ea4a5de..d7ae241a9e55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -712,7 +712,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_HIGH -1ULL #endif #define BLK_BOUNCE_ANY (-1ULL) -#define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) +#define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) /* * default timeout for SG_IO if none specified -- cgit v1.2.2 From 33659ebbae262228eef4e0fe990f393d1f0ed941 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:17:56 +0200 Subject: block: remove wrappers for request type/flags Remove all the trivial wrappers for the cmd_type and cmd_flags fields in struct requests. This allows much easier grepping for different request types instead of unwinding through macros. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 41 +++++++++++++---------------------------- include/linux/blktrace_api.h | 2 +- 2 files changed, 14 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d7ae241a9e55..3ecd28ef9ba4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -604,33 +604,20 @@ enum { test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) -#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) -#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) -#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) - -#define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) -#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) -#define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) -#define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ - blk_failfast_transport(rq) || \ - blk_failfast_driver(rq)) -#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) -#define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) - -#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) - -#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) -#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) +#define blk_noretry_request(rq) \ + ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ + REQ_FAILFAST_DRIVER)) + +#define blk_account_rq(rq) \ + (((rq)->cmd_flags & REQ_STARTED) && \ + ((rq)->cmd_type == REQ_TYPE_FS || \ + ((rq)->cmd_flags & REQ_DISCARD))) + #define blk_pm_request(rq) \ - (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) + ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_PM_RESUME) #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) -#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) -#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) -#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) -#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) @@ -652,9 +639,6 @@ static inline bool rq_is_sync(struct request *rq) return rw_is_sync(rq->cmd_flags); } -#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) -#define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE) - static inline int blk_queue_full(struct request_queue *q, int sync) { if (sync) @@ -687,7 +671,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ - (blk_discard_rq(rq) || blk_fs_request((rq)))) + (((rq)->cmd_flags & REQ_DISCARD) || \ + (rq)->cmd_type == REQ_TYPE_FS)) /* * q->prep_rq_fn return values diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 416bf62d6d46..23faa67e8022 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -224,7 +224,7 @@ static inline int blk_trace_init_sysfs(struct device *dev) static inline int blk_cmd_buf_len(struct request *rq) { - return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; + return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1; } extern void blk_dump_cmd(char *buf, struct request *rq); -- cgit v1.2.2 From 7b6d91daee5cac6402186ff224c3af39d79f4a0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:20:39 +0200 Subject: block: unify flags for struct bio and struct request Remove the current bio flags and reuse the request flags for the bio, too. This allows to more easily trace the type of I/O from the filesystem down to the block driver. There were two flags in the bio that were missing in the requests: BIO_RW_UNPLUG and BIO_RW_AHEAD. Also I've renamed two request flags that had a superflous RW in them. Note that the flags are in bio.h despite having the REQ_ name - as blkdev.h includes bio.h that is the only way to go for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 125 ++++++++++++++++++++++++++++++------------------- include/linux/blkdev.h | 66 +------------------------- include/linux/fs.h | 38 ++++++++------- 3 files changed, 99 insertions(+), 130 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7fc5606e6ea5..4d379c8250ae 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -138,55 +138,83 @@ struct bio { #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) /* - * bio bi_rw flags - * - * bit 0 -- data direction - * If not set, bio is a read from device. If set, it's a write to device. - * bit 1 -- fail fast device errors - * bit 2 -- fail fast transport errors - * bit 3 -- fail fast driver errors - * bit 4 -- rw-ahead when set - * bit 5 -- barrier - * Insert a serialization point in the IO queue, forcing previously - * submitted IO to be completed before this one is issued. - * bit 6 -- synchronous I/O hint. - * bit 7 -- Unplug the device immediately after submitting this bio. - * bit 8 -- metadata request - * Used for tracing to differentiate metadata and data IO. May also - * get some preferential treatment in the IO scheduler - * bit 9 -- discard sectors - * Informs the lower level device that this range of sectors is no longer - * used by the file system and may thus be freed by the device. Used - * for flash based storage. - * Don't want driver retries for any fast fail whatever the reason. - * bit 10 -- Tell the IO scheduler not to wait for more requests after this - one has been submitted, even if it is a SYNC request. + * Request flags. For use in the cmd_flags field of struct request, and in + * bi_rw of struct bio. Note that some flags are only valid in either one. */ -enum bio_rw_flags { - BIO_RW, - BIO_RW_FAILFAST_DEV, - BIO_RW_FAILFAST_TRANSPORT, - BIO_RW_FAILFAST_DRIVER, - /* above flags must match REQ_* */ - BIO_RW_AHEAD, - BIO_RW_BARRIER, - BIO_RW_SYNCIO, - BIO_RW_UNPLUG, - BIO_RW_META, - BIO_RW_DISCARD, - BIO_RW_NOIDLE, +enum rq_flag_bits { + /* common flags */ + __REQ_WRITE, /* not set, read. set, write */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + + __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_DISCARD, /* request to discard sectors */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + + /* bio only flags */ + __REQ_UNPLUG, /* unplug the immediately after submission */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ + + /* request only flags */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ + __REQ_NR_BITS, /* stops here */ }; -/* - * First four bits must match between bio->bi_rw and rq->cmd_flags, make - * that explicit here. - */ -#define BIO_RW_RQ_MASK 0xf - -static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) -{ - return (bio->bi_rw & (1 << flag)) != 0; -} +#define REQ_WRITE (1 << __REQ_WRITE) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) +#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_SYNC (1 << __REQ_SYNC) +#define REQ_META (1 << __REQ_META) +#define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) + +#define REQ_FAILFAST_MASK \ + (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) +#define REQ_COMMON_MASK \ + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ + REQ_META| REQ_DISCARD | REQ_NOIDLE) + +#define REQ_UNPLUG (1 << __REQ_UNPLUG) +#define REQ_RAHEAD (1 << __REQ_RAHEAD) + +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) /* * upper 16 bits of bi_rw define the io priority of this bio @@ -211,7 +239,10 @@ static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) \ + ((bio->bi_rw & REQ_HARDBARRIER) && \ + !bio_has_data(bio) && \ + !(bio->bi_rw & REQ_DISCARD)) static inline unsigned int bio_cur_bytes(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3ecd28ef9ba4..3fc0f5908619 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -84,70 +84,6 @@ enum { REQ_LB_OP_FLUSH = 0x41, /* flush request */ }; -/* - * request type modified bits. first four bits match BIO_RW* bits, important - */ -enum rq_flag_bits { - __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST_DEV, /* no driver retries of device errors */ - __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ - __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ - /* above flags must match BIO_RW_* */ - __REQ_DISCARD, /* request to discard sectors */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_FUA, /* forced unit access */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (sync write or read) */ - __REQ_ALLOCED, /* request came from our alloc pool */ - __REQ_RW_META, /* metadata io request */ - __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ - __REQ_NOIDLE, /* Don't anticipate more IO after this one */ - __REQ_IO_STAT, /* account I/O stat */ - __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_RW (1 << __REQ_RW) -#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) -#define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) -#define REQ_ALLOCED (1 << __REQ_ALLOCED) -#define REQ_RW_META (1 << __REQ_RW_META) -#define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) -#define REQ_NOIDLE (1 << __REQ_NOIDLE) -#define REQ_IO_STAT (1 << __REQ_IO_STAT) -#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) - -#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ - REQ_FAILFAST_DRIVER) - #define BLK_MAX_CDB 16 /* @@ -631,7 +567,7 @@ enum { */ static inline bool rw_is_sync(unsigned int rw_flags) { - return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); + return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); } static inline bool rq_is_sync(struct request *rq) diff --git a/include/linux/fs.h b/include/linux/fs.h index 598878831497..c5c92943c767 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -144,29 +144,31 @@ struct inodes_stat_t { * of this IO. * */ -#define RW_MASK 1 -#define RWA_MASK 2 -#define READ 0 -#define WRITE 1 -#define READA 2 /* read-ahead - don't block if no resources */ -#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) -#define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) -#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) -#define WRITE_META (WRITE | (1 << BIO_RW_META)) -#define SWRITE_SYNC_PLUG \ - (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) -#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_BARRIER (WRITE_SYNC | (1 << BIO_RW_BARRIER)) +#define RW_MASK 1 +#define RWA_MASK 2 + +#define READ 0 +#define WRITE 1 +#define READA 2 /* readahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ + +#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) +#define READ_META (READ | REQ_META) +#define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) +#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) +#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) +#define WRITE_META (WRITE | REQ_META) +#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_HARDBARRIER) +#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) +#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about * parts of device that are now unused by the file system. */ -#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) -#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) +#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) +#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.2 From c1955ce32fdb0877b7a1b22feb2669358f65be76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Jun 2010 23:08:06 +0200 Subject: writeback: remove wb_list The wb_list member of struct backing_device_info always has exactly one element. Just use the direct bdi->wb pointer instead and simplify some code. Also remove bdi_task_init which is now trivial to prepare for the next patch. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e9aec0d099df..50f146146169 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -45,8 +45,6 @@ enum bdi_stat_item { #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) struct bdi_writeback { - struct list_head list; /* hangs off the bdi */ - struct backing_dev_info *bdi; /* our parent bdi */ unsigned int nr; @@ -80,8 +78,7 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct bdi_writeback wb; /* default writeback info for this bdi */ - spinlock_t wb_lock; /* protects update side of wb_list */ - struct list_head wb_list; /* the flusher threads hanging off this bdi */ + spinlock_t wb_lock; /* protects work_list */ struct list_head work_list; -- cgit v1.2.2 From 082439004b31adc146e96e5f1c574dd2b57dcd93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Jun 2010 23:08:22 +0200 Subject: writeback: merge bdi_writeback_task and bdi_start_fn Move all code for the writeback thread into fs/fs-writeback.c instead of splitting it over two functions in two files. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 50f146146169..e536f3a74e60 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -102,7 +102,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); void bdi_start_background_writeback(struct backing_dev_info *bdi); -int bdi_writeback_task(struct bdi_writeback *wb); +int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); -- cgit v1.2.2 From 66ac0280197981f88774e74b60c8e5f9f07c1dba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jun 2010 16:59:42 +0200 Subject: block: don't allocate a payload for discard request Allocating a fixed payload for discard requests always was a horrible hack, and it's not coming to byte us when adding support for discard in DM/MD. So change the code to leave the allocation of a payload to the lowlevel driver. Unfortunately that means we'll need another hack, which allows us to update the various block layer length fields indicating that we have a payload. Instead of hiding this in sd.c, which we already partially do for UNMAP support add a documented helper in the core block layer for it. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fc0f5908619..204fbe22354d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -705,6 +705,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern void blk_add_request_payload(struct request *rq, struct page *page, + unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, -- cgit v1.2.2 From 1676effca4cd2a6b32e6e8e0ecaa91522dfda6fa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 21 Jun 2010 11:02:48 +0200 Subject: gcc-4.6: fs: fix unused but set warnings No real bugs I believe, just some dead code, and some shut up code. Signed-off-by: Andi Kleen Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index f391d45c8aea..e24afabc548f 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -544,7 +544,7 @@ extern int audit_signals; #define audit_putname(n) do { ; } while (0) #define __audit_inode(n,d) do { ; } while (0) #define __audit_inode_child(i,p) do { ; } while (0) -#define audit_inode(n,d) do { ; } while (0) +#define audit_inode(n,d) do { (void)(d); } while (0) #define audit_inode_child(i,p) do { ; } while (0) #define audit_core_dumps(i) do { ; } while (0) #define auditsc_get_stamp(c,t,s) (0) -- cgit v1.2.2 From 28018c242a4ec7017bbbf81d2d3952f820a27118 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 1 Jul 2010 19:49:17 +0900 Subject: block: implement an unprep function corresponding directly to prep Reviewed-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 204fbe22354d..6bba04c7ec48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,6 +200,7 @@ struct request_pm_state typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); +typedef void (unprep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); struct bio_vec; @@ -282,6 +283,7 @@ struct request_queue request_fn_proc *request_fn; make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; + unprep_rq_fn *unprep_rq_fn; unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; @@ -841,6 +843,7 @@ extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); +extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties @@ -885,6 +888,7 @@ extern int blk_queue_dma_drain(struct request_queue *q, extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); +extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -- cgit v1.2.2 From 8749534fe6826596b71bc409c872b047a8e2755b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:32 +0900 Subject: block: introduce REQ_FLUSH flag SCSI-ml needs a way to mark a request as flush request in q->prepare_flush_fn because it needs to identify them later (e.g. in q->request_fn or prep_rq_fn). queue_flush sets REQ_HARDBARRIER in rq->cmd_flags however the block layer also sends normal REQ_TYPE_FS requests with REQ_HARDBARRIER. So SCSI-ml can't use REQ_HARDBARRIER to identify flush requests. We could change the block layer to clear REQ_HARDBARRIER bit before sending non flush requests to the lower layers. However, intorudcing the new flag looks cleaner (surely easier). Signed-off-by: FUJITA Tomonori Cc: James Bottomley Cc: David S. Miller Cc: Rusty Russell Cc: Alasdair G Kergon Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4d379c8250ae..f655b54c9ef3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -174,6 +174,7 @@ enum rq_flag_bits { __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_FLUSH, /* request for cache flush */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_NR_BITS, /* stops here */ @@ -213,6 +214,7 @@ enum rq_flag_bits { #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_FLUSH (1 << __REQ_FLUSH) #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) -- cgit v1.2.2 From 00fff26539bfe3fad21c164fc4002d9ede056fb0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:40 +0900 Subject: block: remove q->prepare_flush_fn completely This removes q->prepare_flush_fn completely (changes the blk_queue_ordered API). Signed-off-by: FUJITA Tomonori Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6bba04c7ec48..3a2c5d9a9288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -212,7 +212,6 @@ struct bvec_merge_data { }; typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, struct bio_vec *); -typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); @@ -286,7 +285,6 @@ struct request_queue unprep_rq_fn *unprep_rq_fn; unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; - prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; @@ -896,7 +894,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); +extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.2 From a89f5c899db3c6be4bb426e4efb72ecee29a93b5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 6 Jul 2010 09:03:18 +0200 Subject: block: remove unused REQ_TYPE_LINUX_BLOCK Nobody uses REQ_TYPE_LINUX_BLOCK (and its REQ_LB_OP_*). Signed-off-by: FUJITA Tomonori Acked-by: Jeff Garzik Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a2c5d9a9288..baf5258f5985 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -60,7 +60,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ REQ_TYPE_SPECIAL, /* driver defined type */ - REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* * for ATA/ATAPI devices. this really doesn't belong here, ide should * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver @@ -70,20 +69,6 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_PC, }; -/* - * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being - * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a - * SCSI cdb. - * - * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, - * typically to differentiate REQ_TYPE_SPECIAL requests. - * - */ -enum { - REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush request */ -}; - #define BLK_MAX_CDB 16 /* -- cgit v1.2.2 From 8a6cfeb6deca3a8fefd639d898b0d163c0b5d368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 8 Jul 2010 10:18:46 +0200 Subject: block: push down BKL into .locked_ioctl As a preparation for the removal of the big kernel lock in the block layer, this removes the BKL from the common ioctl handling code, moving it into every single driver still using it. Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index baf5258f5985..a8b05fc80c6d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1246,7 +1246,6 @@ static inline int blk_integrity_rq(struct request *rq) struct block_device_operations { int (*open) (struct block_device *, fmode_t); int (*release) (struct gendisk *, fmode_t); - int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, -- cgit v1.2.2 From 62c2a7d969f30163f733c81158254b3095b23e72 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Jul 2010 16:51:26 +0200 Subject: block: push BKL into blktrace ioctls The blktrace driver currently needs the BKL, but we should not need to take that in the block layer, so just push it down into the driver itself. It is quite likely that the BKL is not actually required in blktrace code and could be removed in a follow-on patch. Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 23faa67e8022..07c698621ad0 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ #include #include +#include #endif /* @@ -203,6 +204,17 @@ extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; +struct compat_blk_user_trace_setup { + char name[32]; + u16 act_mask; + u32 buf_size; + u32 buf_nr; + compat_u64 start_lba; + compat_u64 end_lba; + u32 pid; +}; +#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup) + #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -- cgit v1.2.2 From 2669b19fa4debcdd6a660ace1a124c0900f113e6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 9 Jul 2010 14:24:38 +1000 Subject: block: fix for block tracing build error block/compat_ioctl.c: In function 'compat_blkdev_ioctl': block/compat_ioctl.c:754: error: 'BLKTRACESETUP32' undeclared (first use in this function) Signed-off-by: Stephen Rothwell Acked-by: Arnd Bergmann Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 07c698621ad0..3395cf7130f5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -204,17 +204,6 @@ extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; -struct compat_blk_user_trace_setup { - char name[32]; - u16 act_mask; - u32 buf_size; - u32 buf_nr; - compat_u64 start_lba; - compat_u64 end_lba; - u32 pid; -}; -#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup) - #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) @@ -232,6 +221,21 @@ static inline int blk_trace_init_sysfs(struct device *dev) #endif /* CONFIG_BLK_DEV_IO_TRACE */ +#ifdef CONFIG_COMPAT + +struct compat_blk_user_trace_setup { + char name[32]; + u16 act_mask; + u32 buf_size; + u32 buf_nr; + compat_u64 start_lba; + compat_u64 end_lba; + u32 pid; +}; +#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup) + +#endif + #if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) static inline int blk_cmd_buf_len(struct request *rq) -- cgit v1.2.2 From edca4a380584a65a16839bdee33ec82244f0f88e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 3 Aug 2010 12:54:51 +0200 Subject: block: disallow FS recursion from sb_issue_discard allocation Filesystems can call sb_issue_discard on a memory reclaim path (e.g. ext4 calls sb_issue_discard during journal commit). Use GFP_NOFS in sb_issue_discard to avoid recursing back into the FS. Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a8b05fc80c6d..89c855c5655c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -933,7 +933,7 @@ static inline int sb_issue_discard(struct super_block *sb, { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); } -- cgit v1.2.2 From aca27ba9618276dd2f777bcd5a1419589ccf1ca8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Aug 2010 13:14:33 +0200 Subject: bio, fs: update RWA_MASK, READA and SWRITE to match the corresponding BIO_RW_* bits Commit a82afdf (block: use the same failfast bits for bio and request) moved BIO_RW_* bits around such that they match up with REQ_* bits. Unfortunately, fs.h hard coded RW_MASK, RWA_MASK, READ, WRITE, READA and SWRITE as 0, 1, 2 and 3, and expected them to match with BIO_RW_* bits. READ/WRITE didn't change but BIO_RW_AHEAD was moved to bit 4 instead of bit 1, breaking RWA_MASK, READA and SWRITE. This patch updates RWA_MASK, READA and SWRITE such that they match the BIO_RW_* bits again. A follow up patch will update the definitions to directly use BIO_RW_* bits so that this kind of breakage won't happen again. Neil also spotted missing RWA_MASK conversion. Stable: The offending commit a82afdf was released with v2.6.32, so this patch should be applied to all kernels since then but it must _NOT_ be applied to kernels earlier than that. Signed-off-by: Tejun Heo Reported-and-bisected-by: Vladislav Bolkhovitin Root-caused-by: Neil Brown Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c5c92943c767..55dad7bca25b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -145,12 +145,12 @@ struct inodes_stat_t { * */ #define RW_MASK 1 -#define RWA_MASK 2 +#define RWA_MASK 16 #define READ 0 #define WRITE 1 -#define READA 2 /* readahead - don't block if no resources */ -#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ +#define READA 16 /* readahead - don't block if no resources */ +#define SWRITE 17 /* for ll_rw_block(), wait for buffer lock */ #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) -- cgit v1.2.2 From 7cc015811ef8992dfcce314d0ed9642bc18143d1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 3 Aug 2010 13:14:58 +0200 Subject: bio, fs: separate out bio_types.h and define READ/WRITE constants in terms of BIO_RW_* flags linux/fs.h hard coded READ/WRITE constants which should match BIO_RW_* flags. This is fragile and caused breakage during BIO_RW_* flag rearrangement. The hardcoding is to avoid include dependency hell. Create linux/bio_types.h which contatins definitions for bio data structures and flags and include it from bio.h and fs.h, and make fs.h define all READ/WRITE related constants in terms of BIO_RW_* flags. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 183 +------------------------------------------ include/linux/blk_types.h | 193 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 15 ++-- 3 files changed, 204 insertions(+), 187 deletions(-) create mode 100644 include/linux/blk_types.h (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f655b54c9ef3..5274103434ad 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -9,7 +9,7 @@ * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - + * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * @@ -28,6 +28,9 @@ #include +/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ +#include + #define BIO_DEBUG #ifdef BIO_DEBUG @@ -40,184 +43,6 @@ #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) -/* - * was unsigned short, but we might as well be ready for > 64kB I/O pages - */ -struct bio_vec { - struct page *bv_page; - unsigned int bv_len; - unsigned int bv_offset; -}; - -struct bio_set; -struct bio; -struct bio_integrity_payload; -typedef void (bio_end_io_t) (struct bio *, int); -typedef void (bio_destructor_t) (struct bio *); - -/* - * main unit of I/O for the block layer and lower layers (ie drivers and - * stacking drivers) - */ -struct bio { - sector_t bi_sector; /* device address in 512 byte - sectors */ - struct bio *bi_next; /* request queue link */ - struct block_device *bi_bdev; - unsigned long bi_flags; /* status, command, etc */ - unsigned long bi_rw; /* bottom bits READ/WRITE, - * top bits priority - */ - - unsigned short bi_vcnt; /* how many bio_vec's */ - unsigned short bi_idx; /* current index into bvl_vec */ - - /* Number of segments in this BIO after - * physical address coalescing is performed. - */ - unsigned int bi_phys_segments; - - unsigned int bi_size; /* residual I/O count */ - - /* - * To keep track of the max segment size, we account for the - * sizes of the first and last mergeable segments in this bio. - */ - unsigned int bi_seg_front_size; - unsigned int bi_seg_back_size; - - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ - - unsigned int bi_comp_cpu; /* completion CPU */ - - atomic_t bi_cnt; /* pin count */ - - struct bio_vec *bi_io_vec; /* the actual vec list */ - - bio_end_io_t *bi_end_io; - - void *bi_private; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload *bi_integrity; /* data integrity */ -#endif - - bio_destructor_t *bi_destructor; /* destructor */ - - /* - * We can inline a number of vecs at the end of the bio, to avoid - * double allocations for a small number of bio_vecs. This member - * MUST obviously be kept at the very end of the bio. - */ - struct bio_vec bi_inline_vecs[0]; -}; - -/* - * bio flags - */ -#define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ -#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ -#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ -#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ -#define BIO_QUIET 11 /* Make BIO Quiet */ -#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) - -/* - * top 4 bits of bio flags indicate the pool this bio came from - */ -#define BIO_POOL_BITS (4) -#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) -#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) -#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) -#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) - -/* - * Request flags. For use in the cmd_flags field of struct request, and in - * bi_rw of struct bio. Note that some flags are only valid in either one. - */ -enum rq_flag_bits { - /* common flags */ - __REQ_WRITE, /* not set, read. set, write */ - __REQ_FAILFAST_DEV, /* no driver retries of device errors */ - __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ - __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ - - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_SYNC, /* request is sync (sync write or read) */ - __REQ_META, /* metadata io request */ - __REQ_DISCARD, /* request to discard sectors */ - __REQ_NOIDLE, /* don't anticipate more IO after this one */ - - /* bio only flags */ - __REQ_UNPLUG, /* unplug the immediately after submission */ - __REQ_RAHEAD, /* read ahead, can fail anytime */ - - /* request only flags */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_FUA, /* forced unit access */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_ALLOCED, /* request came from our alloc pool */ - __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ - __REQ_FLUSH, /* request for cache flush */ - __REQ_IO_STAT, /* account I/O stat */ - __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_WRITE (1 << __REQ_WRITE) -#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_SYNC (1 << __REQ_SYNC) -#define REQ_META (1 << __REQ_META) -#define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_NOIDLE (1 << __REQ_NOIDLE) - -#define REQ_FAILFAST_MASK \ - (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -#define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) - -#define REQ_UNPLUG (1 << __REQ_UNPLUG) -#define REQ_RAHEAD (1 << __REQ_RAHEAD) - -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_ALLOCED (1 << __REQ_ALLOCED) -#define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) -#define REQ_FLUSH (1 << __REQ_FLUSH) -#define REQ_IO_STAT (1 << __REQ_IO_STAT) -#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) - /* * upper 16 bits of bi_rw define the io priority of this bio */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h new file mode 100644 index 000000000000..118523734af0 --- /dev/null +++ b/include/linux/blk_types.h @@ -0,0 +1,193 @@ +/* + * Block data types and constants. Directly include this file only to + * break include dependency loop. + */ +#ifndef __LINUX_BLK_TYPES_H +#define __LINUX_BLK_TYPES_H + +#ifdef CONFIG_BLOCK + +#include + +struct bio_set; +struct bio; +struct bio_integrity_payload; +struct page; +struct block_device; +typedef void (bio_end_io_t) (struct bio *, int); +typedef void (bio_destructor_t) (struct bio *); + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + sector_t bi_sector; /* device address in 512 byte + sectors */ + struct bio *bi_next; /* request queue link */ + struct block_device *bi_bdev; + unsigned long bi_flags; /* status, command, etc */ + unsigned long bi_rw; /* bottom bits READ/WRITE, + * top bits priority + */ + + unsigned short bi_vcnt; /* how many bio_vec's */ + unsigned short bi_idx; /* current index into bvl_vec */ + + /* Number of segments in this BIO after + * physical address coalescing is performed. + */ + unsigned int bi_phys_segments; + + unsigned int bi_size; /* residual I/O count */ + + /* + * To keep track of the max segment size, we account for the + * sizes of the first and last mergeable segments in this bio. + */ + unsigned int bi_seg_front_size; + unsigned int bi_seg_back_size; + + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + + unsigned int bi_comp_cpu; /* completion CPU */ + + atomic_t bi_cnt; /* pin count */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + + bio_end_io_t *bi_end_io; + + void *bi_private; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; /* data integrity */ +#endif + + bio_destructor_t *bi_destructor; /* destructor */ + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; +}; + +/* + * bio flags + */ +#define BIO_UPTODATE 0 /* ok after I/O completion */ +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 2 /* out-out-bounds error */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ +#define BIO_CLONED 4 /* doesn't own data */ +#define BIO_BOUNCED 5 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 6 /* contains user pages */ +#define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 11 /* Make BIO Quiet */ +#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) + +/* + * top 4 bits of bio flags indicate the pool this bio came from + */ +#define BIO_POOL_BITS (4) +#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) +#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) +#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) +#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) + +/* + * Request flags. For use in the cmd_flags field of struct request, and in + * bi_rw of struct bio. Note that some flags are only valid in either one. + */ +enum rq_flag_bits { + /* common flags */ + __REQ_WRITE, /* not set, read. set, write */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + + __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_DISCARD, /* request to discard sectors */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + + /* bio only flags */ + __REQ_UNPLUG, /* unplug the immediately after submission */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ + + /* request only flags */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_FLUSH, /* request for cache flush */ + __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ + __REQ_NR_BITS, /* stops here */ +}; + +#define REQ_WRITE (1 << __REQ_WRITE) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) +#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_SYNC (1 << __REQ_SYNC) +#define REQ_META (1 << __REQ_META) +#define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) + +#define REQ_FAILFAST_MASK \ + (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) +#define REQ_COMMON_MASK \ + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ + REQ_META| REQ_DISCARD | REQ_NOIDLE) + +#define REQ_UNPLUG (1 << __REQ_UNPLUG) +#define REQ_RAHEAD (1 << __REQ_RAHEAD) + +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_FLUSH (1 << __REQ_FLUSH) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) + +#endif /* CONFIG_BLOCK */ +#endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 55dad7bca25b..c53911277210 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -8,6 +8,7 @@ #include #include +#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -117,7 +118,7 @@ struct inodes_stat_t { * immediately wait on this read without caring about * unplugging. * READA Used for read-ahead operations. Lower priority, and the - * block layer could (in theory) choose to ignore this + * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. * SWRITE Like WRITE, but a special case for ll_rw_block() that @@ -144,13 +145,13 @@ struct inodes_stat_t { * of this IO. * */ -#define RW_MASK 1 -#define RWA_MASK 16 +#define RW_MASK REQ_WRITE +#define RWA_MASK REQ_RAHEAD #define READ 0 -#define WRITE 1 -#define READA 16 /* readahead - don't block if no resources */ -#define SWRITE 17 /* for ll_rw_block(), wait for buffer lock */ +#define WRITE RW_MASK +#define READA RWA_MASK +#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -2200,7 +2201,6 @@ static inline void insert_inode_hash(struct inode *inode) { extern void file_move(struct file *f, struct list_head *list); extern void file_kill(struct file *f); #ifdef CONFIG_BLOCK -struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); #endif @@ -2267,7 +2267,6 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) #endif #ifdef CONFIG_BLOCK -struct bio; typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); void dio_end_io(struct bio *bio, int error); -- cgit v1.2.2 From 4aeefdc69f7b6f3f287e6fd8d4b213953b9e92d8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 3 Aug 2010 13:22:51 +0200 Subject: coda: fixup clash with block layer REQ_* defines CODA should not be using defines in the global name space of that nature, prefix them with CODA_. Signed-off-by: Jens Axboe --- include/linux/coda_psdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 8859e2ede9fe..284b520934a0 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -86,9 +86,9 @@ struct upc_req { wait_queue_head_t uc_sleep; /* process' wait queue */ }; -#define REQ_ASYNC 0x1 -#define REQ_READ 0x2 -#define REQ_WRITE 0x4 -#define REQ_ABORT 0x8 +#define CODA_REQ_ASYNC 0x1 +#define CODA_REQ_READ 0x2 +#define CODA_REQ_WRITE 0x4 +#define CODA_REQ_ABORT 0x8 #endif -- cgit v1.2.2 From 6f904ff0e39ea88f81eb77e8dfb4e1238492f0a8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:11 +0300 Subject: writeback: harmonize writeback threads naming The write-back code mixes words "thread" and "task" for the same things. This is not a big deal, but still an inconsistency. hch: a convention I tend to use and I've seen in various places is to always use _task for the storage of the task_struct pointer, and thread everywhere else. This especially helps with having foo_thread for the actual thread and foo_task for a global variable keeping the task_struct pointer This patch renames: * 'bdi_add_default_flusher_task()' -> 'bdi_add_default_flusher_thread()' * 'bdi_forker_task()' -> 'bdi_forker_thread()' because bdi threads are 'bdi_writeback_thread()', so these names are more consistent. This patch also amends commentaries and makes them refer the forker and bdi threads as "thread", not "task". Also, while on it, make 'bdi_add_default_flusher_thread()' declaration use 'static void' instead of 'void static' and make checkpatch.pl happy. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e536f3a74e60..f0936f5f85dd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -50,7 +50,7 @@ struct bdi_writeback { unsigned long last_old_flush; /* last old data flush */ - struct task_struct *task; /* writeback task */ + struct task_struct *task; /* writeback thread */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ -- cgit v1.2.2 From 080dcec41709be72613133f695be75b98dd43e88 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:16 +0300 Subject: writeback: simplify bdi code a little This patch simplifies bdi code a little by removing the 'pending_list' which is redundant. Indeed, currently the forker thread ('bdi_forker_thread()') is working like this: 1. In a loop, fetch all bdi's which have works but have no writeback thread and move them to the 'pending_list'. 2. If the list is empty, sleep for 5 sec. 3. Otherwise, take one bdi from the list, fork the writeback thread for this bdi, and repeat the loop. IOW, it first moves everything to the 'pending_list', then process only one element, and so on. This patch simplifies the algorithm, which is now as follows. 1. Find the first bdi which has a work and remove it from the global list of bdi's (bdi_list). 2. If there was not such bdi, sleep 5 sec. 3. Fork the writeback thread for this bdi and repeat the loop. IOW, now we find the first bdi to process, process it, and so on. This is simpler and involves less lists. The bonus now is that we can get rid of a couple of functions, as well as remove complications which involve 'rcu_call()' and 'bdi->rcu_head'. This patch also makes sure we use 'list_add_tail_rcu()', instead of plain 'list_add_tail()', but this piece of code is going to be removed in the next patch anyway. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f0936f5f85dd..95ecb2bebca8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -58,7 +58,6 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; - struct rcu_head rcu_head; unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ -- cgit v1.2.2 From ecd584030da67ede1bf17955746a6ce834d9fc6b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:18 +0300 Subject: writeback: move last_active to bdi Currently bdi threads use local variable 'last_active' which stores last time when the bdi thread did some useful work. Move this local variable to 'struct bdi_writeback'. This is just a preparation for the further patches which will make the forker thread decide when bdi threads should be killed. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 95ecb2bebca8..71b6223e0a77 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -45,15 +45,16 @@ enum bdi_stat_item { #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) struct bdi_writeback { - struct backing_dev_info *bdi; /* our parent bdi */ + struct backing_dev_info *bdi; /* our parent bdi */ unsigned int nr; - unsigned long last_old_flush; /* last old data flush */ + unsigned long last_old_flush; /* last old data flush */ + unsigned long last_active; /* last time bdi thread was active */ - struct task_struct *task; /* writeback thread */ - struct list_head b_dirty; /* dirty inodes */ - struct list_head b_io; /* parked for writeback */ - struct list_head b_more_io; /* parked for more writeback */ + struct task_struct *task; /* writeback thread */ + struct list_head b_dirty; /* dirty inodes */ + struct list_head b_io; /* parked for writeback */ + struct list_head b_more_io; /* parked for more writeback */ }; struct backing_dev_info { -- cgit v1.2.2 From 6467716a37673e8d47b4984eb19839bdad0a8353 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:22 +0300 Subject: writeback: optimize periodic bdi thread wakeups Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which should take care of the periodic background write-out. However, the write-out will actually start only 'dirty_writeback_interval' centisecs later, so we can delay the wake-up. This change was requested by Nick Piggin who pointed out that if we delay the wake-up, we weed out 2 unnecessary contex switches, which matters because '__mark_inode_dirty()' is a hot-path function. This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which sets up a timer to wake-up the bdi thread and returns. So the wake-up is delayed. We also delete the timer in bdi threads just before writing-back. And synchronously delete it when unregistering bdi. At the unregister point the bdi does not have any users, so no one can arm it again. Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes this change as well. This patch also moves the 'bdi_wb_init()' function down in the file to avoid forward-declaration of 'bdi_wakeup_thread_delayed()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 71b6223e0a77..7628219e5386 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -52,6 +52,7 @@ struct bdi_writeback { unsigned long last_active; /* last time bdi thread was active */ struct task_struct *task; /* writeback thread */ + struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -105,6 +106,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); +void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -- cgit v1.2.2 From e7f52dfb4f378ea1bbfd4476f4e8ba42f5fb332c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 3 Aug 2010 20:20:20 +0200 Subject: drbd: revert "delay probes", feature is being re-implemented differently It was a now abandoned attempt to throttle resync bandwidth based on the delay it causes on the bulk data socket. It has no userbase yet, and has been disabled by 9173465ccb51c09cc3102a10af93e9f469a0af6f already. This removes the now unused code. The basic feature, namely using up "idle" bandwith of network and disk IO subsystem, with minimal impact to application IO, is being reimplemented differently. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- include/linux/drbd.h | 2 +- include/linux/drbd_nl.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index b8d2516668aa..479ee3a1d901 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8" +#define REL_VERSION "8.3.8.1" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 94 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ce77a746fc9d..5f042810a56c 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -78,10 +78,11 @@ NL_PACKET(syncer_conf, 8, NL_INTEGER( 30, T_MAY_IGNORE, rate) NL_INTEGER( 31, T_MAY_IGNORE, after) NL_INTEGER( 32, T_MAY_IGNORE, al_extents) - NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) - NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) - NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) - NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) +/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) + * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) + * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) + * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) + * feature will be reimplemented differently with 8.3.9 */ NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) -- cgit v1.2.2 From 387ac089361fbe5ef287e6950c5c40f6b18e5c55 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Aug 2010 08:34:13 +0200 Subject: block: fix missing export of blk_types.h Stephen reports: After merging the block tree, today's linux-next build (x86_64 allmodconfig) failed like this: usr/include/linux/fs.h:11: included file 'linux/blk_types.h' is not exported Caused by commit 9d3dbbcd9a84518ff5e32ffe671d06a48cf84fd9 ("bio, fs: separate out bio_types.h and define READ/WRITE constants in terms of BIO_RW_* flags"). Reported-by: Stephen Rothwell Signed-off-by: Jens Axboe --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 2fc8e14cc24a..671715b869fc 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -39,6 +39,7 @@ header-y += ax25.h header-y += b1lli.h header-y += baycom.h header-y += bfs_fs.h +header-y += blk_types.h header-y += blkpg.h header-y += bpqether.h header-y += bsg.h -- cgit v1.2.2 From de75d60d5ea235e6e09f4962ab22541ce0fe176a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Aug 2010 12:14:27 -0400 Subject: block: make sure that REQ_* types are seen even with CONFIG_BLOCK=n These form the basis of the basic WRITE etc primitives, so we need them to be always visible. Otherwise we see errors like: mm/filemap.c:2164: error: 'REQ_WRITE' undeclared fs/read_write.c:362: error: 'REQ_WRITE' undeclared fs/splice.c:1108: error: 'REQ_WRITE' undeclared fs/aio.c:1496: error: 'REQ_WRITE' undeclared Reported-by: Randy Dunlap Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 118523734af0..53691774d34e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -108,6 +108,8 @@ struct bio { #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) +#endif /* CONFIG_BLOCK */ + /* * Request flags. For use in the cmd_flags field of struct request, and in * bi_rw of struct bio. Note that some flags are only valid in either one. @@ -189,5 +191,4 @@ enum rq_flag_bits { #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) -#endif /* CONFIG_BLOCK */ #endif /* __LINUX_BLK_TYPES_H */ -- cgit v1.2.2