diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:41 -0400 |
commit | 8cf1a3fce0b95050b63d451c9d561da0da2aa4d6 (patch) | |
tree | 0dc7f93474c3be601a5893900db1418dfd60ba5d /block | |
parent | fcff06c438b60f415af5983efe92811d6aa02ad1 (diff) | |
parent | 80799fbb7d10c30df78015b3fa21f7ffcfc0eb2c (diff) |
Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block
Pull core block IO bits from Jens Axboe:
"The most complicated part if this is the request allocation rework by
Tejun, which has been queued up for a long time and has been in
for-next ditto as well.
There are a few commits from yesterday and today, mostly trivial and
obvious fixes. So I'm pretty confident that it is sound. It's also
smaller than usual."
* 'for-3.6/core' of git://git.kernel.dk/linux-block:
block: remove dead func declaration
block: add partition resize function to blkpg ioctl
block: uninitialized ioc->nr_tasks triggers WARN_ON
block: do not artificially constrain max_sectors for stacking drivers
blkcg: implement per-blkg request allocation
block: prepare for multiple request_lists
block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv
blkcg: inline bio_blkcg() and friends
block: allocate io_context upfront
block: refactor get_request[_wait]()
block: drop custom queue draining used by scsi_transport_{iscsi|fc}
mempool: add @gfp_mask to mempool_create_node()
blkcg: make root blkcg allocation use %GFP_KERNEL
blkcg: __blkg_lookup_create() doesn't need radix preload
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 139 | ||||
-rw-r--r-- | block/blk-cgroup.h | 128 | ||||
-rw-r--r-- | block/blk-core.c | 209 | ||||
-rw-r--r-- | block/blk-ioc.c | 1 | ||||
-rw-r--r-- | block/blk-settings.c | 3 | ||||
-rw-r--r-- | block/blk-sysfs.c | 34 | ||||
-rw-r--r-- | block/blk-throttle.c | 3 | ||||
-rw-r--r-- | block/blk.h | 4 | ||||
-rw-r--r-- | block/bsg-lib.c | 53 | ||||
-rw-r--r-- | block/genhd.c | 20 | ||||
-rw-r--r-- | block/ioctl.c | 59 | ||||
-rw-r--r-- | block/partition-generic.c | 4 |
12 files changed, 423 insertions, 234 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dee617358e..f3b44a65fc7a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root); | |||
31 | 31 | ||
32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
33 | 33 | ||
34 | struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | ||
35 | { | ||
36 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | ||
37 | struct blkcg, css); | ||
38 | } | ||
39 | EXPORT_SYMBOL_GPL(cgroup_to_blkcg); | ||
40 | |||
41 | static struct blkcg *task_blkcg(struct task_struct *tsk) | ||
42 | { | ||
43 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | ||
44 | struct blkcg, css); | ||
45 | } | ||
46 | |||
47 | struct blkcg *bio_blkcg(struct bio *bio) | ||
48 | { | ||
49 | if (bio && bio->bi_css) | ||
50 | return container_of(bio->bi_css, struct blkcg, css); | ||
51 | return task_blkcg(current); | ||
52 | } | ||
53 | EXPORT_SYMBOL_GPL(bio_blkcg); | ||
54 | |||
55 | static bool blkcg_policy_enabled(struct request_queue *q, | 34 | static bool blkcg_policy_enabled(struct request_queue *q, |
56 | const struct blkcg_policy *pol) | 35 | const struct blkcg_policy *pol) |
57 | { | 36 | { |
@@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
84 | kfree(pd); | 63 | kfree(pd); |
85 | } | 64 | } |
86 | 65 | ||
66 | blk_exit_rl(&blkg->rl); | ||
87 | kfree(blkg); | 67 | kfree(blkg); |
88 | } | 68 | } |
89 | 69 | ||
@@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
91 | * blkg_alloc - allocate a blkg | 71 | * blkg_alloc - allocate a blkg |
92 | * @blkcg: block cgroup the new blkg is associated with | 72 | * @blkcg: block cgroup the new blkg is associated with |
93 | * @q: request_queue the new blkg is associated with | 73 | * @q: request_queue the new blkg is associated with |
74 | * @gfp_mask: allocation mask to use | ||
94 | * | 75 | * |
95 | * Allocate a new blkg assocating @blkcg and @q. | 76 | * Allocate a new blkg assocating @blkcg and @q. |
96 | */ | 77 | */ |
97 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | 78 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, |
79 | gfp_t gfp_mask) | ||
98 | { | 80 | { |
99 | struct blkcg_gq *blkg; | 81 | struct blkcg_gq *blkg; |
100 | int i; | 82 | int i; |
101 | 83 | ||
102 | /* alloc and init base part */ | 84 | /* alloc and init base part */ |
103 | blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node); | 85 | blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); |
104 | if (!blkg) | 86 | if (!blkg) |
105 | return NULL; | 87 | return NULL; |
106 | 88 | ||
@@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
109 | blkg->blkcg = blkcg; | 91 | blkg->blkcg = blkcg; |
110 | blkg->refcnt = 1; | 92 | blkg->refcnt = 1; |
111 | 93 | ||
94 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ | ||
95 | if (blkcg != &blkcg_root) { | ||
96 | if (blk_init_rl(&blkg->rl, q, gfp_mask)) | ||
97 | goto err_free; | ||
98 | blkg->rl.blkg = blkg; | ||
99 | } | ||
100 | |||
112 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 101 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
113 | struct blkcg_policy *pol = blkcg_policy[i]; | 102 | struct blkcg_policy *pol = blkcg_policy[i]; |
114 | struct blkg_policy_data *pd; | 103 | struct blkg_policy_data *pd; |
@@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
117 | continue; | 106 | continue; |
118 | 107 | ||
119 | /* alloc per-policy data and attach it to blkg */ | 108 | /* alloc per-policy data and attach it to blkg */ |
120 | pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node); | 109 | pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); |
121 | if (!pd) { | 110 | if (!pd) |
122 | blkg_free(blkg); | 111 | goto err_free; |
123 | return NULL; | ||
124 | } | ||
125 | 112 | ||
126 | blkg->pd[i] = pd; | 113 | blkg->pd[i] = pd; |
127 | pd->blkg = blkg; | 114 | pd->blkg = blkg; |
@@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
132 | } | 119 | } |
133 | 120 | ||
134 | return blkg; | 121 | return blkg; |
122 | |||
123 | err_free: | ||
124 | blkg_free(blkg); | ||
125 | return NULL; | ||
135 | } | 126 | } |
136 | 127 | ||
137 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | 128 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, |
@@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) | |||
175 | } | 166 | } |
176 | EXPORT_SYMBOL_GPL(blkg_lookup); | 167 | EXPORT_SYMBOL_GPL(blkg_lookup); |
177 | 168 | ||
169 | /* | ||
170 | * If @new_blkg is %NULL, this function tries to allocate a new one as | ||
171 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. | ||
172 | */ | ||
178 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | 173 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, |
179 | struct request_queue *q) | 174 | struct request_queue *q, |
180 | __releases(q->queue_lock) __acquires(q->queue_lock) | 175 | struct blkcg_gq *new_blkg) |
181 | { | 176 | { |
182 | struct blkcg_gq *blkg; | 177 | struct blkcg_gq *blkg; |
183 | int ret; | 178 | int ret; |
@@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
189 | blkg = __blkg_lookup(blkcg, q); | 184 | blkg = __blkg_lookup(blkcg, q); |
190 | if (blkg) { | 185 | if (blkg) { |
191 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | 186 | rcu_assign_pointer(blkcg->blkg_hint, blkg); |
192 | return blkg; | 187 | goto out_free; |
193 | } | 188 | } |
194 | 189 | ||
195 | /* blkg holds a reference to blkcg */ | 190 | /* blkg holds a reference to blkcg */ |
196 | if (!css_tryget(&blkcg->css)) | 191 | if (!css_tryget(&blkcg->css)) { |
197 | return ERR_PTR(-EINVAL); | 192 | blkg = ERR_PTR(-EINVAL); |
193 | goto out_free; | ||
194 | } | ||
198 | 195 | ||
199 | /* allocate */ | 196 | /* allocate */ |
200 | ret = -ENOMEM; | 197 | if (!new_blkg) { |
201 | blkg = blkg_alloc(blkcg, q); | 198 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
202 | if (unlikely(!blkg)) | 199 | if (unlikely(!new_blkg)) { |
203 | goto err_put; | 200 | blkg = ERR_PTR(-ENOMEM); |
201 | goto out_put; | ||
202 | } | ||
203 | } | ||
204 | blkg = new_blkg; | ||
204 | 205 | ||
205 | /* insert */ | 206 | /* insert */ |
206 | ret = radix_tree_preload(GFP_ATOMIC); | ||
207 | if (ret) | ||
208 | goto err_free; | ||
209 | |||
210 | spin_lock(&blkcg->lock); | 207 | spin_lock(&blkcg->lock); |
211 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 208 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
212 | if (likely(!ret)) { | 209 | if (likely(!ret)) { |
@@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
215 | } | 212 | } |
216 | spin_unlock(&blkcg->lock); | 213 | spin_unlock(&blkcg->lock); |
217 | 214 | ||
218 | radix_tree_preload_end(); | ||
219 | |||
220 | if (!ret) | 215 | if (!ret) |
221 | return blkg; | 216 | return blkg; |
222 | err_free: | 217 | |
223 | blkg_free(blkg); | 218 | blkg = ERR_PTR(ret); |
224 | err_put: | 219 | out_put: |
225 | css_put(&blkcg->css); | 220 | css_put(&blkcg->css); |
226 | return ERR_PTR(ret); | 221 | out_free: |
222 | blkg_free(new_blkg); | ||
223 | return blkg; | ||
227 | } | 224 | } |
228 | 225 | ||
229 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 226 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
@@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | |||
235 | */ | 232 | */ |
236 | if (unlikely(blk_queue_bypass(q))) | 233 | if (unlikely(blk_queue_bypass(q))) |
237 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | 234 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); |
238 | return __blkg_lookup_create(blkcg, q); | 235 | return __blkg_lookup_create(blkcg, q, NULL); |
239 | } | 236 | } |
240 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 237 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
241 | 238 | ||
@@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg) | |||
313 | } | 310 | } |
314 | EXPORT_SYMBOL_GPL(__blkg_release); | 311 | EXPORT_SYMBOL_GPL(__blkg_release); |
315 | 312 | ||
313 | /* | ||
314 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky | ||
315 | * because the root blkg uses @q->root_rl instead of its own rl. | ||
316 | */ | ||
317 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
318 | struct request_queue *q) | ||
319 | { | ||
320 | struct list_head *ent; | ||
321 | struct blkcg_gq *blkg; | ||
322 | |||
323 | /* | ||
324 | * Determine the current blkg list_head. The first entry is | ||
325 | * root_rl which is off @q->blkg_list and mapped to the head. | ||
326 | */ | ||
327 | if (rl == &q->root_rl) { | ||
328 | ent = &q->blkg_list; | ||
329 | } else { | ||
330 | blkg = container_of(rl, struct blkcg_gq, rl); | ||
331 | ent = &blkg->q_node; | ||
332 | } | ||
333 | |||
334 | /* walk to the next list_head, skip root blkcg */ | ||
335 | ent = ent->next; | ||
336 | if (ent == &q->root_blkg->q_node) | ||
337 | ent = ent->next; | ||
338 | if (ent == &q->blkg_list) | ||
339 | return NULL; | ||
340 | |||
341 | blkg = container_of(ent, struct blkcg_gq, q_node); | ||
342 | return &blkg->rl; | ||
343 | } | ||
344 | |||
316 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, | 345 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, |
317 | u64 val) | 346 | u64 val) |
318 | { | 347 | { |
@@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q, | |||
734 | struct blkcg_gq *blkg; | 763 | struct blkcg_gq *blkg; |
735 | struct blkg_policy_data *pd, *n; | 764 | struct blkg_policy_data *pd, *n; |
736 | int cnt = 0, ret; | 765 | int cnt = 0, ret; |
766 | bool preloaded; | ||
737 | 767 | ||
738 | if (blkcg_policy_enabled(q, pol)) | 768 | if (blkcg_policy_enabled(q, pol)) |
739 | return 0; | 769 | return 0; |
740 | 770 | ||
771 | /* preallocations for root blkg */ | ||
772 | blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | ||
773 | if (!blkg) | ||
774 | return -ENOMEM; | ||
775 | |||
776 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
777 | |||
741 | blk_queue_bypass_start(q); | 778 | blk_queue_bypass_start(q); |
742 | 779 | ||
743 | /* make sure the root blkg exists and count the existing blkgs */ | 780 | /* make sure the root blkg exists and count the existing blkgs */ |
744 | spin_lock_irq(q->queue_lock); | 781 | spin_lock_irq(q->queue_lock); |
745 | 782 | ||
746 | rcu_read_lock(); | 783 | rcu_read_lock(); |
747 | blkg = __blkg_lookup_create(&blkcg_root, q); | 784 | blkg = __blkg_lookup_create(&blkcg_root, q, blkg); |
748 | rcu_read_unlock(); | 785 | rcu_read_unlock(); |
749 | 786 | ||
787 | if (preloaded) | ||
788 | radix_tree_preload_end(); | ||
789 | |||
750 | if (IS_ERR(blkg)) { | 790 | if (IS_ERR(blkg)) { |
751 | ret = PTR_ERR(blkg); | 791 | ret = PTR_ERR(blkg); |
752 | goto out_unlock; | 792 | goto out_unlock; |
753 | } | 793 | } |
754 | q->root_blkg = blkg; | 794 | q->root_blkg = blkg; |
795 | q->root_rl.blkg = blkg; | ||
755 | 796 | ||
756 | list_for_each_entry(blkg, &q->blkg_list, q_node) | 797 | list_for_each_entry(blkg, &q->blkg_list, q_node) |
757 | cnt++; | 798 | cnt++; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 8ac457ce7783..24597309e23d 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/u64_stats_sync.h> | 17 | #include <linux/u64_stats_sync.h> |
18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
20 | #include <linux/blkdev.h> | ||
20 | 21 | ||
21 | /* Max limits for throttle policy */ | 22 | /* Max limits for throttle policy */ |
22 | #define THROTL_IOPS_MAX UINT_MAX | 23 | #define THROTL_IOPS_MAX UINT_MAX |
@@ -93,6 +94,8 @@ struct blkcg_gq { | |||
93 | struct list_head q_node; | 94 | struct list_head q_node; |
94 | struct hlist_node blkcg_node; | 95 | struct hlist_node blkcg_node; |
95 | struct blkcg *blkcg; | 96 | struct blkcg *blkcg; |
97 | /* request allocation list for this blkcg-q pair */ | ||
98 | struct request_list rl; | ||
96 | /* reference count */ | 99 | /* reference count */ |
97 | int refcnt; | 100 | int refcnt; |
98 | 101 | ||
@@ -120,8 +123,6 @@ struct blkcg_policy { | |||
120 | 123 | ||
121 | extern struct blkcg blkcg_root; | 124 | extern struct blkcg blkcg_root; |
122 | 125 | ||
123 | struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup); | ||
124 | struct blkcg *bio_blkcg(struct bio *bio); | ||
125 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); | 126 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); |
126 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 127 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
127 | struct request_queue *q); | 128 | struct request_queue *q); |
@@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | |||
160 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 161 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
161 | 162 | ||
162 | 163 | ||
164 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | ||
165 | { | ||
166 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | ||
167 | struct blkcg, css); | ||
168 | } | ||
169 | |||
170 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | ||
171 | { | ||
172 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | ||
173 | struct blkcg, css); | ||
174 | } | ||
175 | |||
176 | static inline struct blkcg *bio_blkcg(struct bio *bio) | ||
177 | { | ||
178 | if (bio && bio->bi_css) | ||
179 | return container_of(bio->bi_css, struct blkcg, css); | ||
180 | return task_blkcg(current); | ||
181 | } | ||
182 | |||
163 | /** | 183 | /** |
164 | * blkg_to_pdata - get policy private data | 184 | * blkg_to_pdata - get policy private data |
165 | * @blkg: blkg of interest | 185 | * @blkg: blkg of interest |
@@ -234,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg) | |||
234 | } | 254 | } |
235 | 255 | ||
236 | /** | 256 | /** |
257 | * blk_get_rl - get request_list to use | ||
258 | * @q: request_queue of interest | ||
259 | * @bio: bio which will be attached to the allocated request (may be %NULL) | ||
260 | * | ||
261 | * The caller wants to allocate a request from @q to use for @bio. Find | ||
262 | * the request_list to use and obtain a reference on it. Should be called | ||
263 | * under queue_lock. This function is guaranteed to return non-%NULL | ||
264 | * request_list. | ||
265 | */ | ||
266 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
267 | struct bio *bio) | ||
268 | { | ||
269 | struct blkcg *blkcg; | ||
270 | struct blkcg_gq *blkg; | ||
271 | |||
272 | rcu_read_lock(); | ||
273 | |||
274 | blkcg = bio_blkcg(bio); | ||
275 | |||
276 | /* bypass blkg lookup and use @q->root_rl directly for root */ | ||
277 | if (blkcg == &blkcg_root) | ||
278 | goto root_rl; | ||
279 | |||
280 | /* | ||
281 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | ||
282 | * or if either the blkcg or queue is going away. Fall back to | ||
283 | * root_rl in such cases. | ||
284 | */ | ||
285 | blkg = blkg_lookup_create(blkcg, q); | ||
286 | if (unlikely(IS_ERR(blkg))) | ||
287 | goto root_rl; | ||
288 | |||
289 | blkg_get(blkg); | ||
290 | rcu_read_unlock(); | ||
291 | return &blkg->rl; | ||
292 | root_rl: | ||
293 | rcu_read_unlock(); | ||
294 | return &q->root_rl; | ||
295 | } | ||
296 | |||
297 | /** | ||
298 | * blk_put_rl - put request_list | ||
299 | * @rl: request_list to put | ||
300 | * | ||
301 | * Put the reference acquired by blk_get_rl(). Should be called under | ||
302 | * queue_lock. | ||
303 | */ | ||
304 | static inline void blk_put_rl(struct request_list *rl) | ||
305 | { | ||
306 | /* root_rl may not have blkg set */ | ||
307 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | ||
308 | blkg_put(rl->blkg); | ||
309 | } | ||
310 | |||
311 | /** | ||
312 | * blk_rq_set_rl - associate a request with a request_list | ||
313 | * @rq: request of interest | ||
314 | * @rl: target request_list | ||
315 | * | ||
316 | * Associate @rq with @rl so that accounting and freeing can know the | ||
317 | * request_list @rq came from. | ||
318 | */ | ||
319 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) | ||
320 | { | ||
321 | rq->rl = rl; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * blk_rq_rl - return the request_list a request came from | ||
326 | * @rq: request of interest | ||
327 | * | ||
328 | * Return the request_list @rq is allocated from. | ||
329 | */ | ||
330 | static inline struct request_list *blk_rq_rl(struct request *rq) | ||
331 | { | ||
332 | return rq->rl; | ||
333 | } | ||
334 | |||
335 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
336 | struct request_queue *q); | ||
337 | /** | ||
338 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue | ||
339 | * | ||
340 | * Should be used under queue_lock. | ||
341 | */ | ||
342 | #define blk_queue_for_each_rl(rl, q) \ | ||
343 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | ||
344 | |||
345 | /** | ||
237 | * blkg_stat_add - add a value to a blkg_stat | 346 | * blkg_stat_add - add a value to a blkg_stat |
238 | * @stat: target blkg_stat | 347 | * @stat: target blkg_stat |
239 | * @val: value to add | 348 | * @val: value to add |
@@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | |||
351 | #else /* CONFIG_BLK_CGROUP */ | 460 | #else /* CONFIG_BLK_CGROUP */ |
352 | 461 | ||
353 | struct cgroup; | 462 | struct cgroup; |
463 | struct blkcg; | ||
354 | 464 | ||
355 | struct blkg_policy_data { | 465 | struct blkg_policy_data { |
356 | }; | 466 | }; |
@@ -361,8 +471,6 @@ struct blkcg_gq { | |||
361 | struct blkcg_policy { | 471 | struct blkcg_policy { |
362 | }; | 472 | }; |
363 | 473 | ||
364 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | ||
365 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | ||
366 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } | 474 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
367 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } | 475 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
368 | static inline void blkcg_drain_queue(struct request_queue *q) { } | 476 | static inline void blkcg_drain_queue(struct request_queue *q) { } |
@@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q, | |||
374 | static inline void blkcg_deactivate_policy(struct request_queue *q, | 482 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
375 | const struct blkcg_policy *pol) { } | 483 | const struct blkcg_policy *pol) { } |
376 | 484 | ||
485 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | ||
486 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | ||
487 | |||
377 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 488 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
378 | struct blkcg_policy *pol) { return NULL; } | 489 | struct blkcg_policy *pol) { return NULL; } |
379 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | 490 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
@@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | |||
381 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 492 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
382 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 493 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
383 | 494 | ||
495 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
496 | struct bio *bio) { return &q->root_rl; } | ||
497 | static inline void blk_put_rl(struct request_list *rl) { } | ||
498 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | ||
499 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | ||
500 | |||
501 | #define blk_queue_for_each_rl(rl, q) \ | ||
502 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | ||
503 | |||
384 | #endif /* CONFIG_BLK_CGROUP */ | 504 | #endif /* CONFIG_BLK_CGROUP */ |
385 | #endif /* _BLK_CGROUP_H */ | 505 | #endif /* _BLK_CGROUP_H */ |
diff --git a/block/blk-core.c b/block/blk-core.c index 93eb3e4f88ce..dd134d834d58 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
387 | if (!list_empty(&q->queue_head) && q->request_fn) | 387 | if (!list_empty(&q->queue_head) && q->request_fn) |
388 | __blk_run_queue(q); | 388 | __blk_run_queue(q); |
389 | 389 | ||
390 | drain |= q->rq.elvpriv; | 390 | drain |= q->nr_rqs_elvpriv; |
391 | 391 | ||
392 | /* | 392 | /* |
393 | * Unfortunately, requests are queued at and tracked from | 393 | * Unfortunately, requests are queued at and tracked from |
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
397 | if (drain_all) { | 397 | if (drain_all) { |
398 | drain |= !list_empty(&q->queue_head); | 398 | drain |= !list_empty(&q->queue_head); |
399 | for (i = 0; i < 2; i++) { | 399 | for (i = 0; i < 2; i++) { |
400 | drain |= q->rq.count[i]; | 400 | drain |= q->nr_rqs[i]; |
401 | drain |= q->in_flight[i]; | 401 | drain |= q->in_flight[i]; |
402 | drain |= !list_empty(&q->flush_queue[i]); | 402 | drain |= !list_empty(&q->flush_queue[i]); |
403 | } | 403 | } |
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
416 | * left with hung waiters. We need to wake up those waiters. | 416 | * left with hung waiters. We need to wake up those waiters. |
417 | */ | 417 | */ |
418 | if (q->request_fn) { | 418 | if (q->request_fn) { |
419 | struct request_list *rl; | ||
420 | |||
419 | spin_lock_irq(q->queue_lock); | 421 | spin_lock_irq(q->queue_lock); |
420 | for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) | 422 | |
421 | wake_up_all(&q->rq.wait[i]); | 423 | blk_queue_for_each_rl(rl, q) |
424 | for (i = 0; i < ARRAY_SIZE(rl->wait); i++) | ||
425 | wake_up_all(&rl->wait[i]); | ||
426 | |||
422 | spin_unlock_irq(q->queue_lock); | 427 | spin_unlock_irq(q->queue_lock); |
423 | } | 428 | } |
424 | } | 429 | } |
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q) | |||
517 | } | 522 | } |
518 | EXPORT_SYMBOL(blk_cleanup_queue); | 523 | EXPORT_SYMBOL(blk_cleanup_queue); |
519 | 524 | ||
520 | static int blk_init_free_list(struct request_queue *q) | 525 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
526 | gfp_t gfp_mask) | ||
521 | { | 527 | { |
522 | struct request_list *rl = &q->rq; | ||
523 | |||
524 | if (unlikely(rl->rq_pool)) | 528 | if (unlikely(rl->rq_pool)) |
525 | return 0; | 529 | return 0; |
526 | 530 | ||
531 | rl->q = q; | ||
527 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; | 532 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
528 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; | 533 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
529 | rl->elvpriv = 0; | ||
530 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 534 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
531 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 535 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
532 | 536 | ||
533 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 537 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
534 | mempool_free_slab, request_cachep, q->node); | 538 | mempool_free_slab, request_cachep, |
535 | 539 | gfp_mask, q->node); | |
536 | if (!rl->rq_pool) | 540 | if (!rl->rq_pool) |
537 | return -ENOMEM; | 541 | return -ENOMEM; |
538 | 542 | ||
539 | return 0; | 543 | return 0; |
540 | } | 544 | } |
541 | 545 | ||
546 | void blk_exit_rl(struct request_list *rl) | ||
547 | { | ||
548 | if (rl->rq_pool) | ||
549 | mempool_destroy(rl->rq_pool); | ||
550 | } | ||
551 | |||
542 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 552 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
543 | { | 553 | { |
544 | return blk_alloc_queue_node(gfp_mask, -1); | 554 | return blk_alloc_queue_node(gfp_mask, -1); |
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
680 | if (!q) | 690 | if (!q) |
681 | return NULL; | 691 | return NULL; |
682 | 692 | ||
683 | if (blk_init_free_list(q)) | 693 | if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) |
684 | return NULL; | 694 | return NULL; |
685 | 695 | ||
686 | q->request_fn = rfn; | 696 | q->request_fn = rfn; |
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q) | |||
722 | } | 732 | } |
723 | EXPORT_SYMBOL(blk_get_queue); | 733 | EXPORT_SYMBOL(blk_get_queue); |
724 | 734 | ||
725 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 735 | static inline void blk_free_request(struct request_list *rl, struct request *rq) |
726 | { | 736 | { |
727 | if (rq->cmd_flags & REQ_ELVPRIV) { | 737 | if (rq->cmd_flags & REQ_ELVPRIV) { |
728 | elv_put_request(q, rq); | 738 | elv_put_request(rl->q, rq); |
729 | if (rq->elv.icq) | 739 | if (rq->elv.icq) |
730 | put_io_context(rq->elv.icq->ioc); | 740 | put_io_context(rq->elv.icq->ioc); |
731 | } | 741 | } |
732 | 742 | ||
733 | mempool_free(rq, q->rq.rq_pool); | 743 | mempool_free(rq, rl->rq_pool); |
734 | } | 744 | } |
735 | 745 | ||
736 | /* | 746 | /* |
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | |||
767 | ioc->last_waited = jiffies; | 777 | ioc->last_waited = jiffies; |
768 | } | 778 | } |
769 | 779 | ||
770 | static void __freed_request(struct request_queue *q, int sync) | 780 | static void __freed_request(struct request_list *rl, int sync) |
771 | { | 781 | { |
772 | struct request_list *rl = &q->rq; | 782 | struct request_queue *q = rl->q; |
773 | 783 | ||
774 | if (rl->count[sync] < queue_congestion_off_threshold(q)) | 784 | /* |
785 | * bdi isn't aware of blkcg yet. As all async IOs end up root | ||
786 | * blkcg anyway, just use root blkcg state. | ||
787 | */ | ||
788 | if (rl == &q->root_rl && | ||
789 | rl->count[sync] < queue_congestion_off_threshold(q)) | ||
775 | blk_clear_queue_congested(q, sync); | 790 | blk_clear_queue_congested(q, sync); |
776 | 791 | ||
777 | if (rl->count[sync] + 1 <= q->nr_requests) { | 792 | if (rl->count[sync] + 1 <= q->nr_requests) { |
778 | if (waitqueue_active(&rl->wait[sync])) | 793 | if (waitqueue_active(&rl->wait[sync])) |
779 | wake_up(&rl->wait[sync]); | 794 | wake_up(&rl->wait[sync]); |
780 | 795 | ||
781 | blk_clear_queue_full(q, sync); | 796 | blk_clear_rl_full(rl, sync); |
782 | } | 797 | } |
783 | } | 798 | } |
784 | 799 | ||
@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync) | |||
786 | * A request has just been released. Account for it, update the full and | 801 | * A request has just been released. Account for it, update the full and |
787 | * congestion status, wake up any waiters. Called under q->queue_lock. | 802 | * congestion status, wake up any waiters. Called under q->queue_lock. |
788 | */ | 803 | */ |
789 | static void freed_request(struct request_queue *q, unsigned int flags) | 804 | static void freed_request(struct request_list *rl, unsigned int flags) |
790 | { | 805 | { |
791 | struct request_list *rl = &q->rq; | 806 | struct request_queue *q = rl->q; |
792 | int sync = rw_is_sync(flags); | 807 | int sync = rw_is_sync(flags); |
793 | 808 | ||
809 | q->nr_rqs[sync]--; | ||
794 | rl->count[sync]--; | 810 | rl->count[sync]--; |
795 | if (flags & REQ_ELVPRIV) | 811 | if (flags & REQ_ELVPRIV) |
796 | rl->elvpriv--; | 812 | q->nr_rqs_elvpriv--; |
797 | 813 | ||
798 | __freed_request(q, sync); | 814 | __freed_request(rl, sync); |
799 | 815 | ||
800 | if (unlikely(rl->starved[sync ^ 1])) | 816 | if (unlikely(rl->starved[sync ^ 1])) |
801 | __freed_request(q, sync ^ 1); | 817 | __freed_request(rl, sync ^ 1); |
802 | } | 818 | } |
803 | 819 | ||
804 | /* | 820 | /* |
@@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio) | |||
837 | } | 853 | } |
838 | 854 | ||
839 | /** | 855 | /** |
840 | * get_request - get a free request | 856 | * __get_request - get a free request |
841 | * @q: request_queue to allocate request from | 857 | * @rl: request list to allocate from |
842 | * @rw_flags: RW and SYNC flags | 858 | * @rw_flags: RW and SYNC flags |
843 | * @bio: bio to allocate request for (can be %NULL) | 859 | * @bio: bio to allocate request for (can be %NULL) |
844 | * @gfp_mask: allocation mask | 860 | * @gfp_mask: allocation mask |
@@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio) | |||
850 | * Returns %NULL on failure, with @q->queue_lock held. | 866 | * Returns %NULL on failure, with @q->queue_lock held. |
851 | * Returns !%NULL on success, with @q->queue_lock *not held*. | 867 | * Returns !%NULL on success, with @q->queue_lock *not held*. |
852 | */ | 868 | */ |
853 | static struct request *get_request(struct request_queue *q, int rw_flags, | 869 | static struct request *__get_request(struct request_list *rl, int rw_flags, |
854 | struct bio *bio, gfp_t gfp_mask) | 870 | struct bio *bio, gfp_t gfp_mask) |
855 | { | 871 | { |
872 | struct request_queue *q = rl->q; | ||
856 | struct request *rq; | 873 | struct request *rq; |
857 | struct request_list *rl = &q->rq; | 874 | struct elevator_type *et = q->elevator->type; |
858 | struct elevator_type *et; | 875 | struct io_context *ioc = rq_ioc(bio); |
859 | struct io_context *ioc; | ||
860 | struct io_cq *icq = NULL; | 876 | struct io_cq *icq = NULL; |
861 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 877 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
862 | bool retried = false; | ||
863 | int may_queue; | 878 | int may_queue; |
864 | retry: | ||
865 | et = q->elevator->type; | ||
866 | ioc = rq_ioc(bio); | ||
867 | 879 | ||
868 | if (unlikely(blk_queue_dead(q))) | 880 | if (unlikely(blk_queue_dead(q))) |
869 | return NULL; | 881 | return NULL; |
@@ -875,28 +887,14 @@ retry: | |||
875 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 887 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
876 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 888 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
877 | /* | 889 | /* |
878 | * We want ioc to record batching state. If it's | ||
879 | * not already there, creating a new one requires | ||
880 | * dropping queue_lock, which in turn requires | ||
881 | * retesting conditions to avoid queue hang. | ||
882 | */ | ||
883 | if (!ioc && !retried) { | ||
884 | spin_unlock_irq(q->queue_lock); | ||
885 | create_io_context(gfp_mask, q->node); | ||
886 | spin_lock_irq(q->queue_lock); | ||
887 | retried = true; | ||
888 | goto retry; | ||
889 | } | ||
890 | |||
891 | /* | ||
892 | * The queue will fill after this allocation, so set | 890 | * The queue will fill after this allocation, so set |
893 | * it as full, and mark this process as "batching". | 891 | * it as full, and mark this process as "batching". |
894 | * This process will be allowed to complete a batch of | 892 | * This process will be allowed to complete a batch of |
895 | * requests, others will be blocked. | 893 | * requests, others will be blocked. |
896 | */ | 894 | */ |
897 | if (!blk_queue_full(q, is_sync)) { | 895 | if (!blk_rl_full(rl, is_sync)) { |
898 | ioc_set_batching(q, ioc); | 896 | ioc_set_batching(q, ioc); |
899 | blk_set_queue_full(q, is_sync); | 897 | blk_set_rl_full(rl, is_sync); |
900 | } else { | 898 | } else { |
901 | if (may_queue != ELV_MQUEUE_MUST | 899 | if (may_queue != ELV_MQUEUE_MUST |
902 | && !ioc_batching(q, ioc)) { | 900 | && !ioc_batching(q, ioc)) { |
@@ -909,7 +907,12 @@ retry: | |||
909 | } | 907 | } |
910 | } | 908 | } |
911 | } | 909 | } |
912 | blk_set_queue_congested(q, is_sync); | 910 | /* |
911 | * bdi isn't aware of blkcg yet. As all async IOs end up | ||
912 | * root blkcg anyway, just use root blkcg state. | ||
913 | */ | ||
914 | if (rl == &q->root_rl) | ||
915 | blk_set_queue_congested(q, is_sync); | ||
913 | } | 916 | } |
914 | 917 | ||
915 | /* | 918 | /* |
@@ -920,6 +923,7 @@ retry: | |||
920 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) | 923 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
921 | return NULL; | 924 | return NULL; |
922 | 925 | ||
926 | q->nr_rqs[is_sync]++; | ||
923 | rl->count[is_sync]++; | 927 | rl->count[is_sync]++; |
924 | rl->starved[is_sync] = 0; | 928 | rl->starved[is_sync] = 0; |
925 | 929 | ||
@@ -935,7 +939,7 @@ retry: | |||
935 | */ | 939 | */ |
936 | if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { | 940 | if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { |
937 | rw_flags |= REQ_ELVPRIV; | 941 | rw_flags |= REQ_ELVPRIV; |
938 | rl->elvpriv++; | 942 | q->nr_rqs_elvpriv++; |
939 | if (et->icq_cache && ioc) | 943 | if (et->icq_cache && ioc) |
940 | icq = ioc_lookup_icq(ioc, q); | 944 | icq = ioc_lookup_icq(ioc, q); |
941 | } | 945 | } |
@@ -945,22 +949,19 @@ retry: | |||
945 | spin_unlock_irq(q->queue_lock); | 949 | spin_unlock_irq(q->queue_lock); |
946 | 950 | ||
947 | /* allocate and init request */ | 951 | /* allocate and init request */ |
948 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 952 | rq = mempool_alloc(rl->rq_pool, gfp_mask); |
949 | if (!rq) | 953 | if (!rq) |
950 | goto fail_alloc; | 954 | goto fail_alloc; |
951 | 955 | ||
952 | blk_rq_init(q, rq); | 956 | blk_rq_init(q, rq); |
957 | blk_rq_set_rl(rq, rl); | ||
953 | rq->cmd_flags = rw_flags | REQ_ALLOCED; | 958 | rq->cmd_flags = rw_flags | REQ_ALLOCED; |
954 | 959 | ||
955 | /* init elvpriv */ | 960 | /* init elvpriv */ |
956 | if (rw_flags & REQ_ELVPRIV) { | 961 | if (rw_flags & REQ_ELVPRIV) { |
957 | if (unlikely(et->icq_cache && !icq)) { | 962 | if (unlikely(et->icq_cache && !icq)) { |
958 | create_io_context(gfp_mask, q->node); | 963 | if (ioc) |
959 | ioc = rq_ioc(bio); | 964 | icq = ioc_create_icq(ioc, q, gfp_mask); |
960 | if (!ioc) | ||
961 | goto fail_elvpriv; | ||
962 | |||
963 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
964 | if (!icq) | 965 | if (!icq) |
965 | goto fail_elvpriv; | 966 | goto fail_elvpriv; |
966 | } | 967 | } |
@@ -1000,7 +1001,7 @@ fail_elvpriv: | |||
1000 | rq->elv.icq = NULL; | 1001 | rq->elv.icq = NULL; |
1001 | 1002 | ||
1002 | spin_lock_irq(q->queue_lock); | 1003 | spin_lock_irq(q->queue_lock); |
1003 | rl->elvpriv--; | 1004 | q->nr_rqs_elvpriv--; |
1004 | spin_unlock_irq(q->queue_lock); | 1005 | spin_unlock_irq(q->queue_lock); |
1005 | goto out; | 1006 | goto out; |
1006 | 1007 | ||
@@ -1013,7 +1014,7 @@ fail_alloc: | |||
1013 | * queue, but this is pretty rare. | 1014 | * queue, but this is pretty rare. |
1014 | */ | 1015 | */ |
1015 | spin_lock_irq(q->queue_lock); | 1016 | spin_lock_irq(q->queue_lock); |
1016 | freed_request(q, rw_flags); | 1017 | freed_request(rl, rw_flags); |
1017 | 1018 | ||
1018 | /* | 1019 | /* |
1019 | * in the very unlikely event that allocation failed and no | 1020 | * in the very unlikely event that allocation failed and no |
@@ -1029,56 +1030,58 @@ rq_starved: | |||
1029 | } | 1030 | } |
1030 | 1031 | ||
1031 | /** | 1032 | /** |
1032 | * get_request_wait - get a free request with retry | 1033 | * get_request - get a free request |
1033 | * @q: request_queue to allocate request from | 1034 | * @q: request_queue to allocate request from |
1034 | * @rw_flags: RW and SYNC flags | 1035 | * @rw_flags: RW and SYNC flags |
1035 | * @bio: bio to allocate request for (can be %NULL) | 1036 | * @bio: bio to allocate request for (can be %NULL) |
1037 | * @gfp_mask: allocation mask | ||
1036 | * | 1038 | * |
1037 | * Get a free request from @q. This function keeps retrying under memory | 1039 | * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this |
1038 | * pressure and fails iff @q is dead. | 1040 | * function keeps retrying under memory pressure and fails iff @q is dead. |
1039 | * | 1041 | * |
1040 | * Must be callled with @q->queue_lock held and, | 1042 | * Must be callled with @q->queue_lock held and, |
1041 | * Returns %NULL on failure, with @q->queue_lock held. | 1043 | * Returns %NULL on failure, with @q->queue_lock held. |
1042 | * Returns !%NULL on success, with @q->queue_lock *not held*. | 1044 | * Returns !%NULL on success, with @q->queue_lock *not held*. |
1043 | */ | 1045 | */ |
1044 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 1046 | static struct request *get_request(struct request_queue *q, int rw_flags, |
1045 | struct bio *bio) | 1047 | struct bio *bio, gfp_t gfp_mask) |
1046 | { | 1048 | { |
1047 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 1049 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
1050 | DEFINE_WAIT(wait); | ||
1051 | struct request_list *rl; | ||
1048 | struct request *rq; | 1052 | struct request *rq; |
1049 | 1053 | ||
1050 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 1054 | rl = blk_get_rl(q, bio); /* transferred to @rq on success */ |
1051 | while (!rq) { | 1055 | retry: |
1052 | DEFINE_WAIT(wait); | 1056 | rq = __get_request(rl, rw_flags, bio, gfp_mask); |
1053 | struct request_list *rl = &q->rq; | 1057 | if (rq) |
1054 | 1058 | return rq; | |
1055 | if (unlikely(blk_queue_dead(q))) | ||
1056 | return NULL; | ||
1057 | 1059 | ||
1058 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 1060 | if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) { |
1059 | TASK_UNINTERRUPTIBLE); | 1061 | blk_put_rl(rl); |
1062 | return NULL; | ||
1063 | } | ||
1060 | 1064 | ||
1061 | trace_block_sleeprq(q, bio, rw_flags & 1); | 1065 | /* wait on @rl and retry */ |
1066 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | ||
1067 | TASK_UNINTERRUPTIBLE); | ||
1062 | 1068 | ||
1063 | spin_unlock_irq(q->queue_lock); | 1069 | trace_block_sleeprq(q, bio, rw_flags & 1); |
1064 | io_schedule(); | ||
1065 | 1070 | ||
1066 | /* | 1071 | spin_unlock_irq(q->queue_lock); |
1067 | * After sleeping, we become a "batching" process and | 1072 | io_schedule(); |
1068 | * will be able to allocate at least one request, and | ||
1069 | * up to a big batch of them for a small period time. | ||
1070 | * See ioc_batching, ioc_set_batching | ||
1071 | */ | ||
1072 | create_io_context(GFP_NOIO, q->node); | ||
1073 | ioc_set_batching(q, current->io_context); | ||
1074 | 1073 | ||
1075 | spin_lock_irq(q->queue_lock); | 1074 | /* |
1076 | finish_wait(&rl->wait[is_sync], &wait); | 1075 | * After sleeping, we become a "batching" process and will be able |
1076 | * to allocate at least one request, and up to a big batch of them | ||
1077 | * for a small period time. See ioc_batching, ioc_set_batching | ||
1078 | */ | ||
1079 | ioc_set_batching(q, current->io_context); | ||
1077 | 1080 | ||
1078 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 1081 | spin_lock_irq(q->queue_lock); |
1079 | }; | 1082 | finish_wait(&rl->wait[is_sync], &wait); |
1080 | 1083 | ||
1081 | return rq; | 1084 | goto retry; |
1082 | } | 1085 | } |
1083 | 1086 | ||
1084 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1087 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
@@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | |||
1087 | 1090 | ||
1088 | BUG_ON(rw != READ && rw != WRITE); | 1091 | BUG_ON(rw != READ && rw != WRITE); |
1089 | 1092 | ||
1093 | /* create ioc upfront */ | ||
1094 | create_io_context(gfp_mask, q->node); | ||
1095 | |||
1090 | spin_lock_irq(q->queue_lock); | 1096 | spin_lock_irq(q->queue_lock); |
1091 | if (gfp_mask & __GFP_WAIT) | 1097 | rq = get_request(q, rw, NULL, gfp_mask); |
1092 | rq = get_request_wait(q, rw, NULL); | ||
1093 | else | ||
1094 | rq = get_request(q, rw, NULL, gfp_mask); | ||
1095 | if (!rq) | 1098 | if (!rq) |
1096 | spin_unlock_irq(q->queue_lock); | 1099 | spin_unlock_irq(q->queue_lock); |
1097 | /* q->queue_lock is unlocked at this point */ | 1100 | /* q->queue_lock is unlocked at this point */ |
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
1248 | */ | 1251 | */ |
1249 | if (req->cmd_flags & REQ_ALLOCED) { | 1252 | if (req->cmd_flags & REQ_ALLOCED) { |
1250 | unsigned int flags = req->cmd_flags; | 1253 | unsigned int flags = req->cmd_flags; |
1254 | struct request_list *rl = blk_rq_rl(req); | ||
1251 | 1255 | ||
1252 | BUG_ON(!list_empty(&req->queuelist)); | 1256 | BUG_ON(!list_empty(&req->queuelist)); |
1253 | BUG_ON(!hlist_unhashed(&req->hash)); | 1257 | BUG_ON(!hlist_unhashed(&req->hash)); |
1254 | 1258 | ||
1255 | blk_free_request(q, req); | 1259 | blk_free_request(rl, req); |
1256 | freed_request(q, flags); | 1260 | freed_request(rl, flags); |
1261 | blk_put_rl(rl); | ||
1257 | } | 1262 | } |
1258 | } | 1263 | } |
1259 | EXPORT_SYMBOL_GPL(__blk_put_request); | 1264 | EXPORT_SYMBOL_GPL(__blk_put_request); |
@@ -1481,7 +1486,7 @@ get_rq: | |||
1481 | * Grab a free request. This is might sleep but can not fail. | 1486 | * Grab a free request. This is might sleep but can not fail. |
1482 | * Returns with the queue unlocked. | 1487 | * Returns with the queue unlocked. |
1483 | */ | 1488 | */ |
1484 | req = get_request_wait(q, rw_flags, bio); | 1489 | req = get_request(q, rw_flags, bio, GFP_NOIO); |
1485 | if (unlikely(!req)) { | 1490 | if (unlikely(!req)) { |
1486 | bio_endio(bio, -ENODEV); /* @q is dead */ | 1491 | bio_endio(bio, -ENODEV); /* @q is dead */ |
1487 | goto out_unlock; | 1492 | goto out_unlock; |
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio) | |||
1702 | goto end_io; | 1707 | goto end_io; |
1703 | } | 1708 | } |
1704 | 1709 | ||
1710 | /* | ||
1711 | * Various block parts want %current->io_context and lazy ioc | ||
1712 | * allocation ends up trading a lot of pain for a small amount of | ||
1713 | * memory. Just allocate it upfront. This may fail and block | ||
1714 | * layer knows how to live with it. | ||
1715 | */ | ||
1716 | create_io_context(GFP_ATOMIC, q->node); | ||
1717 | |||
1705 | if (blk_throtl_bio(q, bio)) | 1718 | if (blk_throtl_bio(q, bio)) |
1706 | return false; /* throttled, will be resubmitted later */ | 1719 | return false; /* throttled, will be resubmitted later */ |
1707 | 1720 | ||
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 893b8007c657..fab4cdd3f7bb 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) | |||
244 | 244 | ||
245 | /* initialize */ | 245 | /* initialize */ |
246 | atomic_long_set(&ioc->refcount, 1); | 246 | atomic_long_set(&ioc->refcount, 1); |
247 | atomic_set(&ioc->nr_tasks, 1); | ||
247 | atomic_set(&ioc->active_ref, 1); | 248 | atomic_set(&ioc->active_ref, 1); |
248 | spin_lock_init(&ioc->lock); | 249 | spin_lock_init(&ioc->lock); |
249 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); | 250 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); |
diff --git a/block/blk-settings.c b/block/blk-settings.c index d3234fc494ad..565a6786032f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -143,8 +143,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) | |||
143 | lim->discard_zeroes_data = 1; | 143 | lim->discard_zeroes_data = 1; |
144 | lim->max_segments = USHRT_MAX; | 144 | lim->max_segments = USHRT_MAX; |
145 | lim->max_hw_sectors = UINT_MAX; | 145 | lim->max_hw_sectors = UINT_MAX; |
146 | 146 | lim->max_sectors = UINT_MAX; | |
147 | lim->max_sectors = BLK_DEF_MAX_SECTORS; | ||
148 | } | 147 | } |
149 | EXPORT_SYMBOL(blk_set_stacking_limits); | 148 | EXPORT_SYMBOL(blk_set_stacking_limits); |
150 | 149 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index aa41b47c22d2..9628b291f960 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -40,7 +40,7 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) | |||
40 | static ssize_t | 40 | static ssize_t |
41 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 41 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
42 | { | 42 | { |
43 | struct request_list *rl = &q->rq; | 43 | struct request_list *rl; |
44 | unsigned long nr; | 44 | unsigned long nr; |
45 | int ret; | 45 | int ret; |
46 | 46 | ||
@@ -55,6 +55,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
55 | q->nr_requests = nr; | 55 | q->nr_requests = nr; |
56 | blk_queue_congestion_threshold(q); | 56 | blk_queue_congestion_threshold(q); |
57 | 57 | ||
58 | /* congestion isn't cgroup aware and follows root blkcg for now */ | ||
59 | rl = &q->root_rl; | ||
60 | |||
58 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | 61 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) |
59 | blk_set_queue_congested(q, BLK_RW_SYNC); | 62 | blk_set_queue_congested(q, BLK_RW_SYNC); |
60 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | 63 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) |
@@ -65,19 +68,22 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
65 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | 68 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) |
66 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | 69 | blk_clear_queue_congested(q, BLK_RW_ASYNC); |
67 | 70 | ||
68 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | 71 | blk_queue_for_each_rl(rl, q) { |
69 | blk_set_queue_full(q, BLK_RW_SYNC); | 72 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
70 | } else { | 73 | blk_set_rl_full(rl, BLK_RW_SYNC); |
71 | blk_clear_queue_full(q, BLK_RW_SYNC); | 74 | } else { |
72 | wake_up(&rl->wait[BLK_RW_SYNC]); | 75 | blk_clear_rl_full(rl, BLK_RW_SYNC); |
76 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
77 | } | ||
78 | |||
79 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
80 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
81 | } else { | ||
82 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
83 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
84 | } | ||
73 | } | 85 | } |
74 | 86 | ||
75 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
76 | blk_set_queue_full(q, BLK_RW_ASYNC); | ||
77 | } else { | ||
78 | blk_clear_queue_full(q, BLK_RW_ASYNC); | ||
79 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
80 | } | ||
81 | spin_unlock_irq(q->queue_lock); | 87 | spin_unlock_irq(q->queue_lock); |
82 | return ret; | 88 | return ret; |
83 | } | 89 | } |
@@ -476,7 +482,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
476 | { | 482 | { |
477 | struct request_queue *q = | 483 | struct request_queue *q = |
478 | container_of(kobj, struct request_queue, kobj); | 484 | container_of(kobj, struct request_queue, kobj); |
479 | struct request_list *rl = &q->rq; | ||
480 | 485 | ||
481 | blk_sync_queue(q); | 486 | blk_sync_queue(q); |
482 | 487 | ||
@@ -489,8 +494,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
489 | elevator_exit(q->elevator); | 494 | elevator_exit(q->elevator); |
490 | } | 495 | } |
491 | 496 | ||
492 | if (rl->rq_pool) | 497 | blk_exit_rl(&q->root_rl); |
493 | mempool_destroy(rl->rq_pool); | ||
494 | 498 | ||
495 | if (q->queue_tags) | 499 | if (q->queue_tags) |
496 | __blk_queue_free_tags(q); | 500 | __blk_queue_free_tags(q); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5b0659512047..e287c19908c8 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -1123,9 +1123,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
1123 | goto out; | 1123 | goto out; |
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | /* bio_associate_current() needs ioc, try creating */ | ||
1127 | create_io_context(GFP_ATOMIC, q->node); | ||
1128 | |||
1129 | /* | 1126 | /* |
1130 | * A throtl_grp pointer retrieved under rcu can be used to access | 1127 | * A throtl_grp pointer retrieved under rcu can be used to access |
1131 | * basic fields like stats and io rates. If a group has no rules, | 1128 | * basic fields like stats and io rates. If a group has no rules, |
diff --git a/block/blk.h b/block/blk.h index 85f6ae42f7d3..2a0ea32d249f 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -18,6 +18,9 @@ static inline void __blk_get_queue(struct request_queue *q) | |||
18 | kobject_get(&q->kobj); | 18 | kobject_get(&q->kobj); |
19 | } | 19 | } |
20 | 20 | ||
21 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | ||
22 | gfp_t gfp_mask); | ||
23 | void blk_exit_rl(struct request_list *rl); | ||
21 | void init_request_from_bio(struct request *req, struct bio *bio); | 24 | void init_request_from_bio(struct request *req, struct bio *bio); |
22 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 25 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
23 | struct bio *bio); | 26 | struct bio *bio); |
@@ -33,7 +36,6 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
33 | void blk_rq_timed_out_timer(unsigned long data); | 36 | void blk_rq_timed_out_timer(unsigned long data); |
34 | void blk_delete_timer(struct request *); | 37 | void blk_delete_timer(struct request *); |
35 | void blk_add_timer(struct request *); | 38 | void blk_add_timer(struct request *); |
36 | void __generic_unplug_device(struct request_queue *); | ||
37 | 39 | ||
38 | /* | 40 | /* |
39 | * Internal atomic flags for request handling | 41 | * Internal atomic flags for request handling |
diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 7ad49c88f6b1..deee61fbb741 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c | |||
@@ -243,56 +243,3 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q, | |||
243 | return 0; | 243 | return 0; |
244 | } | 244 | } |
245 | EXPORT_SYMBOL_GPL(bsg_setup_queue); | 245 | EXPORT_SYMBOL_GPL(bsg_setup_queue); |
246 | |||
247 | /** | ||
248 | * bsg_remove_queue - Deletes the bsg dev from the q | ||
249 | * @q: the request_queue that is to be torn down. | ||
250 | * | ||
251 | * Notes: | ||
252 | * Before unregistering the queue empty any requests that are blocked | ||
253 | */ | ||
254 | void bsg_remove_queue(struct request_queue *q) | ||
255 | { | ||
256 | struct request *req; /* block request */ | ||
257 | int counts; /* totals for request_list count and starved */ | ||
258 | |||
259 | if (!q) | ||
260 | return; | ||
261 | |||
262 | /* Stop taking in new requests */ | ||
263 | spin_lock_irq(q->queue_lock); | ||
264 | blk_stop_queue(q); | ||
265 | |||
266 | /* drain all requests in the queue */ | ||
267 | while (1) { | ||
268 | /* need the lock to fetch a request | ||
269 | * this may fetch the same reqeust as the previous pass | ||
270 | */ | ||
271 | req = blk_fetch_request(q); | ||
272 | /* save requests in use and starved */ | ||
273 | counts = q->rq.count[0] + q->rq.count[1] + | ||
274 | q->rq.starved[0] + q->rq.starved[1]; | ||
275 | spin_unlock_irq(q->queue_lock); | ||
276 | /* any requests still outstanding? */ | ||
277 | if (counts == 0) | ||
278 | break; | ||
279 | |||
280 | /* This may be the same req as the previous iteration, | ||
281 | * always send the blk_end_request_all after a prefetch. | ||
282 | * It is not okay to not end the request because the | ||
283 | * prefetch started the request. | ||
284 | */ | ||
285 | if (req) { | ||
286 | /* return -ENXIO to indicate that this queue is | ||
287 | * going away | ||
288 | */ | ||
289 | req->errors = -ENXIO; | ||
290 | blk_end_request_all(req, -ENXIO); | ||
291 | } | ||
292 | |||
293 | msleep(200); /* allow bsg to possibly finish */ | ||
294 | spin_lock_irq(q->queue_lock); | ||
295 | } | ||
296 | bsg_unregister_queue(q); | ||
297 | } | ||
298 | EXPORT_SYMBOL_GPL(bsg_remove_queue); | ||
diff --git a/block/genhd.c b/block/genhd.c index 9cf5583c90ff..cac7366957c3 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) | |||
154 | part = rcu_dereference(ptbl->part[piter->idx]); | 154 | part = rcu_dereference(ptbl->part[piter->idx]); |
155 | if (!part) | 155 | if (!part) |
156 | continue; | 156 | continue; |
157 | if (!part->nr_sects && | 157 | if (!part_nr_sects_read(part) && |
158 | !(piter->flags & DISK_PITER_INCL_EMPTY) && | 158 | !(piter->flags & DISK_PITER_INCL_EMPTY) && |
159 | !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && | 159 | !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && |
160 | piter->idx == 0)) | 160 | piter->idx == 0)) |
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); | |||
191 | static inline int sector_in_part(struct hd_struct *part, sector_t sector) | 191 | static inline int sector_in_part(struct hd_struct *part, sector_t sector) |
192 | { | 192 | { |
193 | return part->start_sect <= sector && | 193 | return part->start_sect <= sector && |
194 | sector < part->start_sect + part->nr_sects; | 194 | sector < part->start_sect + part_nr_sects_read(part); |
195 | } | 195 | } |
196 | 196 | ||
197 | /** | 197 | /** |
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void) | |||
769 | 769 | ||
770 | printk("%s%s %10llu %s %s", is_part0 ? "" : " ", | 770 | printk("%s%s %10llu %s %s", is_part0 ? "" : " ", |
771 | bdevt_str(part_devt(part), devt_buf), | 771 | bdevt_str(part_devt(part), devt_buf), |
772 | (unsigned long long)part->nr_sects >> 1, | 772 | (unsigned long long)part_nr_sects_read(part) >> 1 |
773 | disk_name(disk, part->partno, name_buf), | 773 | , disk_name(disk, part->partno, name_buf), |
774 | uuid_buf); | 774 | uuid_buf); |
775 | if (is_part0) { | 775 | if (is_part0) { |
776 | if (disk->driverfs_dev != NULL && | 776 | if (disk->driverfs_dev != NULL && |
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v) | |||
862 | while ((part = disk_part_iter_next(&piter))) | 862 | while ((part = disk_part_iter_next(&piter))) |
863 | seq_printf(seqf, "%4d %7d %10llu %s\n", | 863 | seq_printf(seqf, "%4d %7d %10llu %s\n", |
864 | MAJOR(part_devt(part)), MINOR(part_devt(part)), | 864 | MAJOR(part_devt(part)), MINOR(part_devt(part)), |
865 | (unsigned long long)part->nr_sects >> 1, | 865 | (unsigned long long)part_nr_sects_read(part) >> 1, |
866 | disk_name(sgp, part->partno, buf)); | 866 | disk_name(sgp, part->partno, buf)); |
867 | disk_part_iter_exit(&piter); | 867 | disk_part_iter_exit(&piter); |
868 | 868 | ||
@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id) | |||
1268 | } | 1268 | } |
1269 | disk->part_tbl->part[0] = &disk->part0; | 1269 | disk->part_tbl->part[0] = &disk->part0; |
1270 | 1270 | ||
1271 | /* | ||
1272 | * set_capacity() and get_capacity() currently don't use | ||
1273 | * seqcounter to read/update the part0->nr_sects. Still init | ||
1274 | * the counter as we can read the sectors in IO submission | ||
1275 | * patch using seqence counters. | ||
1276 | * | ||
1277 | * TODO: Ideally set_capacity() and get_capacity() should be | ||
1278 | * converted to make use of bd_mutex and sequence counters. | ||
1279 | */ | ||
1280 | seqcount_init(&disk->part0.nr_sects_seq); | ||
1271 | hd_ref_init(&disk->part0); | 1281 | hd_ref_init(&disk->part0); |
1272 | 1282 | ||
1273 | disk->minors = minors; | 1283 | disk->minors = minors; |
diff --git a/block/ioctl.c b/block/ioctl.c index ba15b2dbfb98..4476e0e85d16 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
13 | { | 13 | { |
14 | struct block_device *bdevp; | 14 | struct block_device *bdevp; |
15 | struct gendisk *disk; | 15 | struct gendisk *disk; |
16 | struct hd_struct *part; | 16 | struct hd_struct *part, *lpart; |
17 | struct blkpg_ioctl_arg a; | 17 | struct blkpg_ioctl_arg a; |
18 | struct blkpg_partition p; | 18 | struct blkpg_partition p; |
19 | struct disk_part_iter piter; | 19 | struct disk_part_iter piter; |
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
36 | case BLKPG_ADD_PARTITION: | 36 | case BLKPG_ADD_PARTITION: |
37 | start = p.start >> 9; | 37 | start = p.start >> 9; |
38 | length = p.length >> 9; | 38 | length = p.length >> 9; |
39 | /* check for fit in a hd_struct */ | 39 | /* check for fit in a hd_struct */ |
40 | if (sizeof(sector_t) == sizeof(long) && | 40 | if (sizeof(sector_t) == sizeof(long) && |
41 | sizeof(long long) > sizeof(long)) { | 41 | sizeof(long long) > sizeof(long)) { |
42 | long pstart = start, plength = length; | 42 | long pstart = start, plength = length; |
43 | if (pstart != start || plength != length | 43 | if (pstart != start || plength != length |
@@ -92,6 +92,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
92 | bdput(bdevp); | 92 | bdput(bdevp); |
93 | 93 | ||
94 | return 0; | 94 | return 0; |
95 | case BLKPG_RESIZE_PARTITION: | ||
96 | start = p.start >> 9; | ||
97 | /* new length of partition in bytes */ | ||
98 | length = p.length >> 9; | ||
99 | /* check for fit in a hd_struct */ | ||
100 | if (sizeof(sector_t) == sizeof(long) && | ||
101 | sizeof(long long) > sizeof(long)) { | ||
102 | long pstart = start, plength = length; | ||
103 | if (pstart != start || plength != length | ||
104 | || pstart < 0 || plength < 0) | ||
105 | return -EINVAL; | ||
106 | } | ||
107 | part = disk_get_part(disk, partno); | ||
108 | if (!part) | ||
109 | return -ENXIO; | ||
110 | bdevp = bdget(part_devt(part)); | ||
111 | if (!bdevp) { | ||
112 | disk_put_part(part); | ||
113 | return -ENOMEM; | ||
114 | } | ||
115 | mutex_lock(&bdevp->bd_mutex); | ||
116 | mutex_lock_nested(&bdev->bd_mutex, 1); | ||
117 | if (start != part->start_sect) { | ||
118 | mutex_unlock(&bdevp->bd_mutex); | ||
119 | mutex_unlock(&bdev->bd_mutex); | ||
120 | bdput(bdevp); | ||
121 | disk_put_part(part); | ||
122 | return -EINVAL; | ||
123 | } | ||
124 | /* overlap? */ | ||
125 | disk_part_iter_init(&piter, disk, | ||
126 | DISK_PITER_INCL_EMPTY); | ||
127 | while ((lpart = disk_part_iter_next(&piter))) { | ||
128 | if (lpart->partno != partno && | ||
129 | !(start + length <= lpart->start_sect || | ||
130 | start >= lpart->start_sect + lpart->nr_sects) | ||
131 | ) { | ||
132 | disk_part_iter_exit(&piter); | ||
133 | mutex_unlock(&bdevp->bd_mutex); | ||
134 | mutex_unlock(&bdev->bd_mutex); | ||
135 | bdput(bdevp); | ||
136 | disk_put_part(part); | ||
137 | return -EBUSY; | ||
138 | } | ||
139 | } | ||
140 | disk_part_iter_exit(&piter); | ||
141 | part_nr_sects_write(part, (sector_t)length); | ||
142 | i_size_write(bdevp->bd_inode, p.length); | ||
143 | mutex_unlock(&bdevp->bd_mutex); | ||
144 | mutex_unlock(&bdev->bd_mutex); | ||
145 | bdput(bdevp); | ||
146 | disk_put_part(part); | ||
147 | return 0; | ||
95 | default: | 148 | default: |
96 | return -EINVAL; | 149 | return -EINVAL; |
97 | } | 150 | } |
diff --git a/block/partition-generic.c b/block/partition-generic.c index 6df5d6928a44..f1d14519cc04 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev, | |||
84 | struct device_attribute *attr, char *buf) | 84 | struct device_attribute *attr, char *buf) |
85 | { | 85 | { |
86 | struct hd_struct *p = dev_to_part(dev); | 86 | struct hd_struct *p = dev_to_part(dev); |
87 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 87 | return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); |
88 | } | 88 | } |
89 | 89 | ||
90 | static ssize_t part_ro_show(struct device *dev, | 90 | static ssize_t part_ro_show(struct device *dev, |
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
294 | err = -ENOMEM; | 294 | err = -ENOMEM; |
295 | goto out_free; | 295 | goto out_free; |
296 | } | 296 | } |
297 | |||
298 | seqcount_init(&p->nr_sects_seq); | ||
297 | pdev = part_to_dev(p); | 299 | pdev = part_to_dev(p); |
298 | 300 | ||
299 | p->start_sect = start; | 301 | p->start_sect = start; |