Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block

Pull core block IO bits from Jens Axboe: "The most complicated part if this is the request allocation rework by Tejun, which has been queued up for a long time and has been in for-next ditto as well. There are a few commits from yesterday and today, mostly trivial and obvious fixes. So I'm pretty confident that it is sound. It's also smaller than usual." * 'for-3.6/core' of git://git.kernel.dk/linux-block: block: remove dead func declaration block: add partition resize function to blkpg ioctl block: uninitialized ioc->nr_tasks triggers WARN_ON block: do not artificially constrain max_sectors for stacking drivers blkcg: implement per-blkg request allocation block: prepare for multiple request_lists block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv blkcg: inline bio_blkcg() and friends block: allocate io_context upfront block: refactor get_request[_wait]() block: drop custom queue draining used by scsi_transport_{iscsi|fc} mempool: add @gfp_mask to mempool_create_node() blkcg: make root blkcg allocation use %GFP_KERNEL blkcg: __blkg_lookup_create() doesn't need radix preload
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-08-01 12:02:41 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-08-01 12:02:41 -0400
commit: 8cf1a3fce0b95050b63d451c9d561da0da2aa4d6 (patch)
tree: 0dc7f93474c3be601a5893900db1418dfd60ba5d /include
parent: fcff06c438b60f415af5983efe92811d6aa02ad1 (diff)
parent: 80799fbb7d10c30df78015b3fa21f7ffcfc0eb2c (diff)
5 files changed, 92 insertions, 23 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 07954b05b86c..3816ce8a08fc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -46,16 +46,23 @@ struct blkcg_gq;
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
+#define BLK_RL_SYNCFULL         (1U << 0)
+#define BLK_RL_ASYNCFULL        (1U << 1)
 struct request_list {
+        struct request_queue    *q;     /* the queue this rl belongs to */
+#ifdef CONFIG_BLK_CGROUP
+        struct blkcg_gq         *blkg;  /* blkg this request pool belongs to */
+#endif
        /*
         * count[], starved[], and wait[] are indexed by
         * BLK_RW_SYNC/BLK_RW_ASYNC
         */
-        int count[2];
+        int                     count[2];
-        int starved[2];
+        int                     starved[2];
-        int elvpriv;
+        mempool_t               *rq_pool;
-        mempool_t *rq_pool;
+        wait_queue_head_t       wait[2];
-        wait_queue_head_t wait[2];
+        unsigned int            flags;
 };
 /*
@@ -138,6 +145,7 @@ struct request {
        struct hd_struct *part;
        unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
+        struct request_list *rl;                /* rl this rq is alloced from */
        unsigned long long start_time_ns;
        unsigned long long io_start_time_ns;    /* when passed to hardware */
 #endif
@@ -282,11 +290,16 @@ struct request_queue {
        struct list_head        queue_head;
        struct request          *last_merge;
        struct elevator_queue   *elevator;
+        int                     nr_rqs[2];      /* # allocated [a]sync rqs */
+        int                     nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
        /*
-         * the queue request freelist, one for reads and one for writes
+         * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
+         * is used, root blkg allocates from @q->root_rl and all other
+         * blkgs from their own blkg->rl.  Which one to use should be
+         * determined using bio_request_list().
         */
-        struct request_list     rq;
+        struct request_list     root_rl;
        request_fn_proc         *request_fn;
        make_request_fn         *make_request_fn;
@@ -561,27 +574,25 @@ static inline bool rq_is_sync(struct request *rq)
        return rw_is_sync(rq->cmd_flags);
 }
-static inline int blk_queue_full(struct request_queue *q, int sync)
+static inline bool blk_rl_full(struct request_list *rl, bool sync)
 {
-        if (sync)
+        unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-                return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
-        return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
+        return rl->flags & flag;
 }
-static inline void blk_set_queue_full(struct request_queue *q, int sync)
+static inline void blk_set_rl_full(struct request_list *rl, bool sync)
 {
-        if (sync)
+        unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-                queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
-        else
+        rl->flags |= flag;
-                queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
 }
-static inline void blk_clear_queue_full(struct request_queue *q, int sync)
+static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
 {
-        if (sync)
+        unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-                queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
-        else
+        rl->flags &= ~flag;
-                queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
 }
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h
index faf8a45af210..a8519446c111 100644
--- a/include/linux/blkpg.h
+++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
 /* The subfunctions (for the op field) */
 #define BLKPG_ADD_PARTITION     1
 #define BLKPG_DEL_PARTITION     2
+#define BLKPG_RESIZE_PARTITION  3
 /* Sizes of name fields. Unused at present. */
 #define BLKPG_DEVNAMELTH        64
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index f55ab8cdc106..4d0fb3df2f4a 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
                    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_remove_queue(struct request_queue *q);
 void bsg_goose_queue(struct request_queue *q);
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae0aaa9d42fa..4f440b3e89fe 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -97,7 +97,13 @@ struct partition_meta_info {
 struct hd_struct {
        sector_t start_sect;
+        /*
+         * nr_sects is protected by sequence counter. One might extend a
+         * partition while IO is happening to it and update of nr_sects
+         * can be non-atomic on 32bit machines with 64bit sector_t.
+         */
        sector_t nr_sects;
+        seqcount_t nr_sects_seq;
        sector_t alignment_offset;
        unsigned int discard_alignment;
        struct device __dev;
@@ -647,6 +653,57 @@ static inline void hd_struct_put(struct hd_struct *part)
                __delete_partition(part);
 }
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+        sector_t nr_sects;
+        unsigned seq;
+        do {
+                seq = read_seqcount_begin(&part->nr_sects_seq);
+                nr_sects = part->nr_sects;
+        } while (read_seqcount_retry(&part->nr_sects_seq, seq));
+        return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+        sector_t nr_sects;
+        preempt_disable();
+        nr_sects = part->nr_sects;
+        preempt_enable();
+        return nr_sects;
+#else
+        return part->nr_sects;
+#endif
+}
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+        write_seqcount_begin(&part->nr_sects_seq);
+        part->nr_sects = size;
+        write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+        preempt_disable();
+        part->nr_sects = size;
+        preempt_enable();
+#else
+        part->nr_sects = size;
+#endif
+}
 #else /* CONFIG_BLOCK */
 static inline void printk_all_partitions(void) { }
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 7c08052e3321..39ed62ab5b8a 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -26,7 +26,8 @@ typedef struct mempool_s {
 extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
                        mempool_free_t *free_fn, void *pool_data);
 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
-                        mempool_free_t *free_fn, void *pool_data, int nid);
+                        mempool_free_t *free_fn, void *pool_data,
+                        gfp_t gfp_mask, int nid);
 extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
 extern void mempool_destroy(mempool_t *pool);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-08-01 12:02:41 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-08-01 12:02:41 -0400
commit	8cf1a3fce0b95050b63d451c9d561da0da2aa4d6 (patch)
tree	0dc7f93474c3be601a5893900db1418dfd60ba5d /include
parent	fcff06c438b60f415af5983efe92811d6aa02ad1 (diff)
parent	80799fbb7d10c30df78015b3fa21f7ffcfc0eb2c (diff)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 07954b05b86c..3816ce8a08fc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h
@@ -46,16 +46,23 @@ struct blkcg_gq;
46	struct request;	46	struct request;
47	typedef void (rq_end_io_fn)(struct request *, int);	47	typedef void (rq_end_io_fn)(struct request *, int);
48		48
		49	#define BLK_RL_SYNCFULL (1U << 0)
		50	#define BLK_RL_ASYNCFULL (1U << 1)
		51
49	struct request_list {	52	struct request_list {
		53	struct request_queue q; / the queue this rl belongs to */
		54	#ifdef CONFIG_BLK_CGROUP
		55	struct blkcg_gq blkg; / blkg this request pool belongs to */
		56	#endif
50	/*	57	/*
51	* count[], starved[], and wait[] are indexed by	58	* count[], starved[], and wait[] are indexed by
52	* BLK_RW_SYNC/BLK_RW_ASYNC	59	* BLK_RW_SYNC/BLK_RW_ASYNC
53	*/	60	*/
54	int count[2];	61	int count[2];
55	int starved[2];	62	int starved[2];
56	int elvpriv;	63	mempool_t *rq_pool;
57	mempool_t *rq_pool;	64	wait_queue_head_t wait[2];
58	wait_queue_head_t wait[2];	65	unsigned int flags;
59	};	66	};
60		67
61	/*	68	/*
@@ -138,6 +145,7 @@ struct request {
138	struct hd_struct *part;	145	struct hd_struct *part;
139	unsigned long start_time;	146	unsigned long start_time;
140	#ifdef CONFIG_BLK_CGROUP	147	#ifdef CONFIG_BLK_CGROUP
		148	struct request_list rl; / rl this rq is alloced from */
141	unsigned long long start_time_ns;	149	unsigned long long start_time_ns;
142	unsigned long long io_start_time_ns; /* when passed to hardware */	150	unsigned long long io_start_time_ns; /* when passed to hardware */
143	#endif	151	#endif
@@ -282,11 +290,16 @@ struct request_queue {
282	struct list_head queue_head;	290	struct list_head queue_head;
283	struct request *last_merge;	291	struct request *last_merge;
284	struct elevator_queue *elevator;	292	struct elevator_queue *elevator;
		293	int nr_rqs[2]; /* # allocated [a]sync rqs */
		294	int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
285		295
286	/*	296	/*
287	* the queue request freelist, one for reads and one for writes	297	* If blkcg is not used, @q->root_rl serves all requests. If blkcg
		298	* is used, root blkg allocates from @q->root_rl and all other
		299	* blkgs from their own blkg->rl. Which one to use should be
		300	* determined using bio_request_list().
288	*/	301	*/
289	struct request_list rq;	302	struct request_list root_rl;
290		303
291	request_fn_proc *request_fn;	304	request_fn_proc *request_fn;
292	make_request_fn *make_request_fn;	305	make_request_fn *make_request_fn;
@@ -561,27 +574,25 @@ static inline bool rq_is_sync(struct request *rq)
561	return rw_is_sync(rq->cmd_flags);	574	return rw_is_sync(rq->cmd_flags);
562	}	575	}
563		576
564	static inline int blk_queue_full(struct request_queue *q, int sync)	577	static inline bool blk_rl_full(struct request_list *rl, bool sync)
565	{	578	{
566	if (sync)	579	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
567	return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);	580
568	return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);	581	return rl->flags & flag;
569	}	582	}
570		583
571	static inline void blk_set_queue_full(struct request_queue *q, int sync)	584	static inline void blk_set_rl_full(struct request_list *rl, bool sync)
572	{	585	{
573	if (sync)	586	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
574	queue_flag_set(QUEUE_FLAG_SYNCFULL, q);	587
575	else	588	rl->flags \|= flag;
576	queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
577	}	589	}
578		590
579	static inline void blk_clear_queue_full(struct request_queue *q, int sync)	591	static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
580	{	592	{
581	if (sync)	593	unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
582	queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);	594
583	else	595	rl->flags &= ~flag;
584	queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
585	}	596	}
586		597
587		598


diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h index faf8a45af210..a8519446c111 100644 --- a/include/linux/blkpg.h +++ b/include/linux/blkpg.h
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
40	/* The subfunctions (for the op field) */	40	/* The subfunctions (for the op field) */
41	#define BLKPG_ADD_PARTITION 1	41	#define BLKPG_ADD_PARTITION 1
42	#define BLKPG_DEL_PARTITION 2	42	#define BLKPG_DEL_PARTITION 2
		43	#define BLKPG_RESIZE_PARTITION 3
43		44
44	/* Sizes of name fields. Unused at present. */	45	/* Sizes of name fields. Unused at present. */
45	#define BLKPG_DEVNAMELTH 64	46	#define BLKPG_DEVNAMELTH 64


diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index f55ab8cdc106..4d0fb3df2f4a 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h
@@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result,
67	int bsg_setup_queue(struct device dev, struct request_queue q, char *name,	67	int bsg_setup_queue(struct device dev, struct request_queue q, char *name,
68	bsg_job_fn *job_fn, int dd_job_size);	68	bsg_job_fn *job_fn, int dd_job_size);
69	void bsg_request_fn(struct request_queue *q);	69	void bsg_request_fn(struct request_queue *q);
70	void bsg_remove_queue(struct request_queue *q);
71	void bsg_goose_queue(struct request_queue *q);	70	void bsg_goose_queue(struct request_queue *q);
72		71
73	#endif	72	#endif


diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ae0aaa9d42fa..4f440b3e89fe 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h
@@ -97,7 +97,13 @@ struct partition_meta_info {
97		97
98	struct hd_struct {	98	struct hd_struct {
99	sector_t start_sect;	99	sector_t start_sect;
		100	/*
		101	* nr_sects is protected by sequence counter. One might extend a
		102	* partition while IO is happening to it and update of nr_sects
		103	* can be non-atomic on 32bit machines with 64bit sector_t.
		104	*/
100	sector_t nr_sects;	105	sector_t nr_sects;
		106	seqcount_t nr_sects_seq;
101	sector_t alignment_offset;	107	sector_t alignment_offset;
102	unsigned int discard_alignment;	108	unsigned int discard_alignment;
103	struct device __dev;	109	struct device __dev;
@@ -647,6 +653,57 @@ static inline void hd_struct_put(struct hd_struct *part)
647	__delete_partition(part);	653	__delete_partition(part);
648	}	654	}
649		655
		656	/*
		657	* Any access of part->nr_sects which is not protected by partition
		658	* bd_mutex or gendisk bdev bd_mutex, should be done using this
		659	* accessor function.
		660	*
		661	* Code written along the lines of i_size_read() and i_size_write().
		662	* CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
		663	* on.
		664	*/
		665	static inline sector_t part_nr_sects_read(struct hd_struct *part)
		666	{
		667	#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
		668	sector_t nr_sects;
		669	unsigned seq;
		670	do {
		671	seq = read_seqcount_begin(&part->nr_sects_seq);
		672	nr_sects = part->nr_sects;
		673	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
		674	return nr_sects;
		675	#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
		676	sector_t nr_sects;
		677
		678	preempt_disable();
		679	nr_sects = part->nr_sects;
		680	preempt_enable();
		681	return nr_sects;
		682	#else
		683	return part->nr_sects;
		684	#endif
		685	}
		686
		687	/*
		688	* Should be called with mutex lock held (typically bd_mutex) of partition
		689	* to provide mutual exlusion among writers otherwise seqcount might be
		690	* left in wrong state leaving the readers spinning infinitely.
		691	*/
		692	static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
		693	{
		694	#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
		695	write_seqcount_begin(&part->nr_sects_seq);
		696	part->nr_sects = size;
		697	write_seqcount_end(&part->nr_sects_seq);
		698	#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
		699	preempt_disable();
		700	part->nr_sects = size;
		701	preempt_enable();
		702	#else
		703	part->nr_sects = size;
		704	#endif
		705	}
		706
650	#else /* CONFIG_BLOCK */	707	#else /* CONFIG_BLOCK */
651		708
652	static inline void printk_all_partitions(void) { }	709	static inline void printk_all_partitions(void) { }


diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7c08052e3321..39ed62ab5b8a 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h
@@ -26,7 +26,8 @@ typedef struct mempool_s {
26	extern mempool_t mempool_create(int min_nr, mempool_alloc_t alloc_fn,	26	extern mempool_t mempool_create(int min_nr, mempool_alloc_t alloc_fn,
27	mempool_free_t free_fn, void pool_data);	27	mempool_free_t free_fn, void pool_data);
28	extern mempool_t mempool_create_node(int min_nr, mempool_alloc_t alloc_fn,	28	extern mempool_t mempool_create_node(int min_nr, mempool_alloc_t alloc_fn,
29	mempool_free_t free_fn, void pool_data, int nid);	29	mempool_free_t free_fn, void pool_data,
		30	gfp_t gfp_mask, int nid);
30		31
31	extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);	32	extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask);
32	extern void mempool_destroy(mempool_t *pool);	33	extern void mempool_destroy(mempool_t *pool);