diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:41 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:41 -0400 |
| commit | 8cf1a3fce0b95050b63d451c9d561da0da2aa4d6 (patch) | |
| tree | 0dc7f93474c3be601a5893900db1418dfd60ba5d | |
| parent | fcff06c438b60f415af5983efe92811d6aa02ad1 (diff) | |
| parent | 80799fbb7d10c30df78015b3fa21f7ffcfc0eb2c (diff) | |
Merge branch 'for-3.6/core' of git://git.kernel.dk/linux-block
Pull core block IO bits from Jens Axboe:
"The most complicated part if this is the request allocation rework by
Tejun, which has been queued up for a long time and has been in
for-next ditto as well.
There are a few commits from yesterday and today, mostly trivial and
obvious fixes. So I'm pretty confident that it is sound. It's also
smaller than usual."
* 'for-3.6/core' of git://git.kernel.dk/linux-block:
block: remove dead func declaration
block: add partition resize function to blkpg ioctl
block: uninitialized ioc->nr_tasks triggers WARN_ON
block: do not artificially constrain max_sectors for stacking drivers
blkcg: implement per-blkg request allocation
block: prepare for multiple request_lists
block: add q->nr_rqs[] and move q->rq.elvpriv to q->nr_rqs_elvpriv
blkcg: inline bio_blkcg() and friends
block: allocate io_context upfront
block: refactor get_request[_wait]()
block: drop custom queue draining used by scsi_transport_{iscsi|fc}
mempool: add @gfp_mask to mempool_create_node()
blkcg: make root blkcg allocation use %GFP_KERNEL
blkcg: __blkg_lookup_create() doesn't need radix preload
| -rw-r--r-- | Documentation/block/queue-sysfs.txt | 7 | ||||
| -rw-r--r-- | block/blk-cgroup.c | 139 | ||||
| -rw-r--r-- | block/blk-cgroup.h | 128 | ||||
| -rw-r--r-- | block/blk-core.c | 209 | ||||
| -rw-r--r-- | block/blk-ioc.c | 1 | ||||
| -rw-r--r-- | block/blk-settings.c | 3 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 34 | ||||
| -rw-r--r-- | block/blk-throttle.c | 3 | ||||
| -rw-r--r-- | block/blk.h | 4 | ||||
| -rw-r--r-- | block/bsg-lib.c | 53 | ||||
| -rw-r--r-- | block/genhd.c | 20 | ||||
| -rw-r--r-- | block/ioctl.c | 59 | ||||
| -rw-r--r-- | block/partition-generic.c | 4 | ||||
| -rw-r--r-- | drivers/scsi/scsi_transport_fc.c | 38 | ||||
| -rw-r--r-- | drivers/scsi/scsi_transport_iscsi.c | 2 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 53 | ||||
| -rw-r--r-- | include/linux/blkpg.h | 1 | ||||
| -rw-r--r-- | include/linux/bsg-lib.h | 1 | ||||
| -rw-r--r-- | include/linux/genhd.h | 57 | ||||
| -rw-r--r-- | include/linux/mempool.h | 3 | ||||
| -rw-r--r-- | mm/mempool.c | 12 |
21 files changed, 530 insertions, 301 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index d8147b336c35..6518a55273e7 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt | |||
| @@ -38,6 +38,13 @@ read or write requests. Note that the total allocated number may be twice | |||
| 38 | this amount, since it applies only to reads or writes (not the accumulated | 38 | this amount, since it applies only to reads or writes (not the accumulated |
| 39 | sum). | 39 | sum). |
| 40 | 40 | ||
| 41 | To avoid priority inversion through request starvation, a request | ||
| 42 | queue maintains a separate request pool per each cgroup when | ||
| 43 | CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such | ||
| 44 | per-block-cgroup request pool. IOW, if there are N block cgroups, | ||
| 45 | each request queue may have upto N request pools, each independently | ||
| 46 | regulated by nr_requests. | ||
| 47 | |||
| 41 | read_ahead_kb (RW) | 48 | read_ahead_kb (RW) |
| 42 | ------------------ | 49 | ------------------ |
| 43 | Maximum number of kilobytes to read-ahead for filesystems on this block | 50 | Maximum number of kilobytes to read-ahead for filesystems on this block |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dee617358e..f3b44a65fc7a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
| @@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root); | |||
| 31 | 31 | ||
| 32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
| 33 | 33 | ||
| 34 | struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | ||
| 35 | { | ||
| 36 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | ||
| 37 | struct blkcg, css); | ||
| 38 | } | ||
| 39 | EXPORT_SYMBOL_GPL(cgroup_to_blkcg); | ||
| 40 | |||
| 41 | static struct blkcg *task_blkcg(struct task_struct *tsk) | ||
| 42 | { | ||
| 43 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | ||
| 44 | struct blkcg, css); | ||
| 45 | } | ||
| 46 | |||
| 47 | struct blkcg *bio_blkcg(struct bio *bio) | ||
| 48 | { | ||
| 49 | if (bio && bio->bi_css) | ||
| 50 | return container_of(bio->bi_css, struct blkcg, css); | ||
| 51 | return task_blkcg(current); | ||
| 52 | } | ||
| 53 | EXPORT_SYMBOL_GPL(bio_blkcg); | ||
| 54 | |||
| 55 | static bool blkcg_policy_enabled(struct request_queue *q, | 34 | static bool blkcg_policy_enabled(struct request_queue *q, |
| 56 | const struct blkcg_policy *pol) | 35 | const struct blkcg_policy *pol) |
| 57 | { | 36 | { |
| @@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
| 84 | kfree(pd); | 63 | kfree(pd); |
| 85 | } | 64 | } |
| 86 | 65 | ||
| 66 | blk_exit_rl(&blkg->rl); | ||
| 87 | kfree(blkg); | 67 | kfree(blkg); |
| 88 | } | 68 | } |
| 89 | 69 | ||
| @@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
| 91 | * blkg_alloc - allocate a blkg | 71 | * blkg_alloc - allocate a blkg |
| 92 | * @blkcg: block cgroup the new blkg is associated with | 72 | * @blkcg: block cgroup the new blkg is associated with |
| 93 | * @q: request_queue the new blkg is associated with | 73 | * @q: request_queue the new blkg is associated with |
| 74 | * @gfp_mask: allocation mask to use | ||
| 94 | * | 75 | * |
| 95 | * Allocate a new blkg assocating @blkcg and @q. | 76 | * Allocate a new blkg assocating @blkcg and @q. |
| 96 | */ | 77 | */ |
| 97 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | 78 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, |
| 79 | gfp_t gfp_mask) | ||
| 98 | { | 80 | { |
| 99 | struct blkcg_gq *blkg; | 81 | struct blkcg_gq *blkg; |
| 100 | int i; | 82 | int i; |
| 101 | 83 | ||
| 102 | /* alloc and init base part */ | 84 | /* alloc and init base part */ |
| 103 | blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node); | 85 | blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); |
| 104 | if (!blkg) | 86 | if (!blkg) |
| 105 | return NULL; | 87 | return NULL; |
| 106 | 88 | ||
| @@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
| 109 | blkg->blkcg = blkcg; | 91 | blkg->blkcg = blkcg; |
| 110 | blkg->refcnt = 1; | 92 | blkg->refcnt = 1; |
| 111 | 93 | ||
| 94 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ | ||
| 95 | if (blkcg != &blkcg_root) { | ||
| 96 | if (blk_init_rl(&blkg->rl, q, gfp_mask)) | ||
| 97 | goto err_free; | ||
| 98 | blkg->rl.blkg = blkg; | ||
| 99 | } | ||
| 100 | |||
| 112 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 101 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
| 113 | struct blkcg_policy *pol = blkcg_policy[i]; | 102 | struct blkcg_policy *pol = blkcg_policy[i]; |
| 114 | struct blkg_policy_data *pd; | 103 | struct blkg_policy_data *pd; |
| @@ -117,11 +106,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
| 117 | continue; | 106 | continue; |
| 118 | 107 | ||
| 119 | /* alloc per-policy data and attach it to blkg */ | 108 | /* alloc per-policy data and attach it to blkg */ |
| 120 | pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node); | 109 | pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); |
| 121 | if (!pd) { | 110 | if (!pd) |
| 122 | blkg_free(blkg); | 111 | goto err_free; |
| 123 | return NULL; | ||
| 124 | } | ||
| 125 | 112 | ||
| 126 | blkg->pd[i] = pd; | 113 | blkg->pd[i] = pd; |
| 127 | pd->blkg = blkg; | 114 | pd->blkg = blkg; |
| @@ -132,6 +119,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) | |||
| 132 | } | 119 | } |
| 133 | 120 | ||
| 134 | return blkg; | 121 | return blkg; |
| 122 | |||
| 123 | err_free: | ||
| 124 | blkg_free(blkg); | ||
| 125 | return NULL; | ||
| 135 | } | 126 | } |
| 136 | 127 | ||
| 137 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | 128 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, |
| @@ -175,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) | |||
| 175 | } | 166 | } |
| 176 | EXPORT_SYMBOL_GPL(blkg_lookup); | 167 | EXPORT_SYMBOL_GPL(blkg_lookup); |
| 177 | 168 | ||
| 169 | /* | ||
| 170 | * If @new_blkg is %NULL, this function tries to allocate a new one as | ||
| 171 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. | ||
| 172 | */ | ||
| 178 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | 173 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, |
| 179 | struct request_queue *q) | 174 | struct request_queue *q, |
| 180 | __releases(q->queue_lock) __acquires(q->queue_lock) | 175 | struct blkcg_gq *new_blkg) |
| 181 | { | 176 | { |
| 182 | struct blkcg_gq *blkg; | 177 | struct blkcg_gq *blkg; |
| 183 | int ret; | 178 | int ret; |
| @@ -189,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
| 189 | blkg = __blkg_lookup(blkcg, q); | 184 | blkg = __blkg_lookup(blkcg, q); |
| 190 | if (blkg) { | 185 | if (blkg) { |
| 191 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | 186 | rcu_assign_pointer(blkcg->blkg_hint, blkg); |
| 192 | return blkg; | 187 | goto out_free; |
| 193 | } | 188 | } |
| 194 | 189 | ||
| 195 | /* blkg holds a reference to blkcg */ | 190 | /* blkg holds a reference to blkcg */ |
| 196 | if (!css_tryget(&blkcg->css)) | 191 | if (!css_tryget(&blkcg->css)) { |
| 197 | return ERR_PTR(-EINVAL); | 192 | blkg = ERR_PTR(-EINVAL); |
| 193 | goto out_free; | ||
| 194 | } | ||
| 198 | 195 | ||
| 199 | /* allocate */ | 196 | /* allocate */ |
| 200 | ret = -ENOMEM; | 197 | if (!new_blkg) { |
| 201 | blkg = blkg_alloc(blkcg, q); | 198 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
| 202 | if (unlikely(!blkg)) | 199 | if (unlikely(!new_blkg)) { |
| 203 | goto err_put; | 200 | blkg = ERR_PTR(-ENOMEM); |
| 201 | goto out_put; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | blkg = new_blkg; | ||
| 204 | 205 | ||
| 205 | /* insert */ | 206 | /* insert */ |
| 206 | ret = radix_tree_preload(GFP_ATOMIC); | ||
| 207 | if (ret) | ||
| 208 | goto err_free; | ||
| 209 | |||
| 210 | spin_lock(&blkcg->lock); | 207 | spin_lock(&blkcg->lock); |
| 211 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 208 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
| 212 | if (likely(!ret)) { | 209 | if (likely(!ret)) { |
| @@ -215,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
| 215 | } | 212 | } |
| 216 | spin_unlock(&blkcg->lock); | 213 | spin_unlock(&blkcg->lock); |
| 217 | 214 | ||
| 218 | radix_tree_preload_end(); | ||
| 219 | |||
| 220 | if (!ret) | 215 | if (!ret) |
| 221 | return blkg; | 216 | return blkg; |
| 222 | err_free: | 217 | |
| 223 | blkg_free(blkg); | 218 | blkg = ERR_PTR(ret); |
| 224 | err_put: | 219 | out_put: |
| 225 | css_put(&blkcg->css); | 220 | css_put(&blkcg->css); |
| 226 | return ERR_PTR(ret); | 221 | out_free: |
| 222 | blkg_free(new_blkg); | ||
| 223 | return blkg; | ||
| 227 | } | 224 | } |
| 228 | 225 | ||
| 229 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 226 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
| @@ -235,7 +232,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | |||
| 235 | */ | 232 | */ |
| 236 | if (unlikely(blk_queue_bypass(q))) | 233 | if (unlikely(blk_queue_bypass(q))) |
| 237 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | 234 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); |
| 238 | return __blkg_lookup_create(blkcg, q); | 235 | return __blkg_lookup_create(blkcg, q, NULL); |
| 239 | } | 236 | } |
| 240 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 237 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
| 241 | 238 | ||
| @@ -313,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg) | |||
| 313 | } | 310 | } |
| 314 | EXPORT_SYMBOL_GPL(__blkg_release); | 311 | EXPORT_SYMBOL_GPL(__blkg_release); |
| 315 | 312 | ||
| 313 | /* | ||
| 314 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky | ||
| 315 | * because the root blkg uses @q->root_rl instead of its own rl. | ||
| 316 | */ | ||
| 317 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
| 318 | struct request_queue *q) | ||
| 319 | { | ||
| 320 | struct list_head *ent; | ||
| 321 | struct blkcg_gq *blkg; | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Determine the current blkg list_head. The first entry is | ||
| 325 | * root_rl which is off @q->blkg_list and mapped to the head. | ||
| 326 | */ | ||
| 327 | if (rl == &q->root_rl) { | ||
| 328 | ent = &q->blkg_list; | ||
| 329 | } else { | ||
| 330 | blkg = container_of(rl, struct blkcg_gq, rl); | ||
| 331 | ent = &blkg->q_node; | ||
| 332 | } | ||
| 333 | |||
| 334 | /* walk to the next list_head, skip root blkcg */ | ||
| 335 | ent = ent->next; | ||
| 336 | if (ent == &q->root_blkg->q_node) | ||
| 337 | ent = ent->next; | ||
| 338 | if (ent == &q->blkg_list) | ||
| 339 | return NULL; | ||
| 340 | |||
| 341 | blkg = container_of(ent, struct blkcg_gq, q_node); | ||
| 342 | return &blkg->rl; | ||
| 343 | } | ||
| 344 | |||
| 316 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, | 345 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, |
| 317 | u64 val) | 346 | u64 val) |
| 318 | { | 347 | { |
| @@ -734,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q, | |||
| 734 | struct blkcg_gq *blkg; | 763 | struct blkcg_gq *blkg; |
| 735 | struct blkg_policy_data *pd, *n; | 764 | struct blkg_policy_data *pd, *n; |
| 736 | int cnt = 0, ret; | 765 | int cnt = 0, ret; |
| 766 | bool preloaded; | ||
| 737 | 767 | ||
| 738 | if (blkcg_policy_enabled(q, pol)) | 768 | if (blkcg_policy_enabled(q, pol)) |
| 739 | return 0; | 769 | return 0; |
| 740 | 770 | ||
| 771 | /* preallocations for root blkg */ | ||
| 772 | blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | ||
| 773 | if (!blkg) | ||
| 774 | return -ENOMEM; | ||
| 775 | |||
| 776 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
| 777 | |||
| 741 | blk_queue_bypass_start(q); | 778 | blk_queue_bypass_start(q); |
| 742 | 779 | ||
| 743 | /* make sure the root blkg exists and count the existing blkgs */ | 780 | /* make sure the root blkg exists and count the existing blkgs */ |
| 744 | spin_lock_irq(q->queue_lock); | 781 | spin_lock_irq(q->queue_lock); |
| 745 | 782 | ||
| 746 | rcu_read_lock(); | 783 | rcu_read_lock(); |
| 747 | blkg = __blkg_lookup_create(&blkcg_root, q); | 784 | blkg = __blkg_lookup_create(&blkcg_root, q, blkg); |
| 748 | rcu_read_unlock(); | 785 | rcu_read_unlock(); |
| 749 | 786 | ||
| 787 | if (preloaded) | ||
| 788 | radix_tree_preload_end(); | ||
| 789 | |||
| 750 | if (IS_ERR(blkg)) { | 790 | if (IS_ERR(blkg)) { |
| 751 | ret = PTR_ERR(blkg); | 791 | ret = PTR_ERR(blkg); |
| 752 | goto out_unlock; | 792 | goto out_unlock; |
| 753 | } | 793 | } |
| 754 | q->root_blkg = blkg; | 794 | q->root_blkg = blkg; |
| 795 | q->root_rl.blkg = blkg; | ||
| 755 | 796 | ||
| 756 | list_for_each_entry(blkg, &q->blkg_list, q_node) | 797 | list_for_each_entry(blkg, &q->blkg_list, q_node) |
| 757 | cnt++; | 798 | cnt++; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 8ac457ce7783..24597309e23d 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/u64_stats_sync.h> | 17 | #include <linux/u64_stats_sync.h> |
| 18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
| 19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
| 20 | #include <linux/blkdev.h> | ||
| 20 | 21 | ||
| 21 | /* Max limits for throttle policy */ | 22 | /* Max limits for throttle policy */ |
| 22 | #define THROTL_IOPS_MAX UINT_MAX | 23 | #define THROTL_IOPS_MAX UINT_MAX |
| @@ -93,6 +94,8 @@ struct blkcg_gq { | |||
| 93 | struct list_head q_node; | 94 | struct list_head q_node; |
| 94 | struct hlist_node blkcg_node; | 95 | struct hlist_node blkcg_node; |
| 95 | struct blkcg *blkcg; | 96 | struct blkcg *blkcg; |
| 97 | /* request allocation list for this blkcg-q pair */ | ||
| 98 | struct request_list rl; | ||
| 96 | /* reference count */ | 99 | /* reference count */ |
| 97 | int refcnt; | 100 | int refcnt; |
| 98 | 101 | ||
| @@ -120,8 +123,6 @@ struct blkcg_policy { | |||
| 120 | 123 | ||
| 121 | extern struct blkcg blkcg_root; | 124 | extern struct blkcg blkcg_root; |
| 122 | 125 | ||
| 123 | struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup); | ||
| 124 | struct blkcg *bio_blkcg(struct bio *bio); | ||
| 125 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); | 126 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); |
| 126 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 127 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
| 127 | struct request_queue *q); | 128 | struct request_queue *q); |
| @@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | |||
| 160 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 161 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
| 161 | 162 | ||
| 162 | 163 | ||
| 164 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | ||
| 165 | { | ||
| 166 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | ||
| 167 | struct blkcg, css); | ||
| 168 | } | ||
| 169 | |||
| 170 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | ||
| 171 | { | ||
| 172 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | ||
| 173 | struct blkcg, css); | ||
| 174 | } | ||
| 175 | |||
| 176 | static inline struct blkcg *bio_blkcg(struct bio *bio) | ||
| 177 | { | ||
| 178 | if (bio && bio->bi_css) | ||
| 179 | return container_of(bio->bi_css, struct blkcg, css); | ||
| 180 | return task_blkcg(current); | ||
| 181 | } | ||
| 182 | |||
| 163 | /** | 183 | /** |
| 164 | * blkg_to_pdata - get policy private data | 184 | * blkg_to_pdata - get policy private data |
| 165 | * @blkg: blkg of interest | 185 | * @blkg: blkg of interest |
| @@ -234,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg) | |||
| 234 | } | 254 | } |
| 235 | 255 | ||
| 236 | /** | 256 | /** |
| 257 | * blk_get_rl - get request_list to use | ||
| 258 | * @q: request_queue of interest | ||
| 259 | * @bio: bio which will be attached to the allocated request (may be %NULL) | ||
| 260 | * | ||
| 261 | * The caller wants to allocate a request from @q to use for @bio. Find | ||
| 262 | * the request_list to use and obtain a reference on it. Should be called | ||
| 263 | * under queue_lock. This function is guaranteed to return non-%NULL | ||
| 264 | * request_list. | ||
| 265 | */ | ||
| 266 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
| 267 | struct bio *bio) | ||
| 268 | { | ||
| 269 | struct blkcg *blkcg; | ||
| 270 | struct blkcg_gq *blkg; | ||
| 271 | |||
| 272 | rcu_read_lock(); | ||
| 273 | |||
| 274 | blkcg = bio_blkcg(bio); | ||
| 275 | |||
| 276 | /* bypass blkg lookup and use @q->root_rl directly for root */ | ||
| 277 | if (blkcg == &blkcg_root) | ||
| 278 | goto root_rl; | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | ||
| 282 | * or if either the blkcg or queue is going away. Fall back to | ||
| 283 | * root_rl in such cases. | ||
| 284 | */ | ||
| 285 | blkg = blkg_lookup_create(blkcg, q); | ||
| 286 | if (unlikely(IS_ERR(blkg))) | ||
| 287 | goto root_rl; | ||
| 288 | |||
| 289 | blkg_get(blkg); | ||
| 290 | rcu_read_unlock(); | ||
| 291 | return &blkg->rl; | ||
| 292 | root_rl: | ||
| 293 | rcu_read_unlock(); | ||
| 294 | return &q->root_rl; | ||
| 295 | } | ||
| 296 | |||
| 297 | /** | ||
| 298 | * blk_put_rl - put request_list | ||
| 299 | * @rl: request_list to put | ||
| 300 | * | ||
| 301 | * Put the reference acquired by blk_get_rl(). Should be called under | ||
| 302 | * queue_lock. | ||
| 303 | */ | ||
| 304 | static inline void blk_put_rl(struct request_list *rl) | ||
| 305 | { | ||
| 306 | /* root_rl may not have blkg set */ | ||
| 307 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | ||
| 308 | blkg_put(rl->blkg); | ||
| 309 | } | ||
| 310 | |||
| 311 | /** | ||
| 312 | * blk_rq_set_rl - associate a request with a request_list | ||
| 313 | * @rq: request of interest | ||
| 314 | * @rl: target request_list | ||
| 315 | * | ||
| 316 | * Associate @rq with @rl so that accounting and freeing can know the | ||
| 317 | * request_list @rq came from. | ||
| 318 | */ | ||
| 319 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) | ||
| 320 | { | ||
| 321 | rq->rl = rl; | ||
| 322 | } | ||
| 323 | |||
| 324 | /** | ||
| 325 | * blk_rq_rl - return the request_list a request came from | ||
| 326 | * @rq: request of interest | ||
| 327 | * | ||
| 328 | * Return the request_list @rq is allocated from. | ||
| 329 | */ | ||
| 330 | static inline struct request_list *blk_rq_rl(struct request *rq) | ||
| 331 | { | ||
| 332 | return rq->rl; | ||
| 333 | } | ||
| 334 | |||
| 335 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
| 336 | struct request_queue *q); | ||
| 337 | /** | ||
| 338 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue | ||
| 339 | * | ||
| 340 | * Should be used under queue_lock. | ||
| 341 | */ | ||
| 342 | #define blk_queue_for_each_rl(rl, q) \ | ||
| 343 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | ||
| 344 | |||
| 345 | /** | ||
| 237 | * blkg_stat_add - add a value to a blkg_stat | 346 | * blkg_stat_add - add a value to a blkg_stat |
| 238 | * @stat: target blkg_stat | 347 | * @stat: target blkg_stat |
| 239 | * @val: value to add | 348 | * @val: value to add |
| @@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | |||
| 351 | #else /* CONFIG_BLK_CGROUP */ | 460 | #else /* CONFIG_BLK_CGROUP */ |
| 352 | 461 | ||
| 353 | struct cgroup; | 462 | struct cgroup; |
| 463 | struct blkcg; | ||
| 354 | 464 | ||
| 355 | struct blkg_policy_data { | 465 | struct blkg_policy_data { |
| 356 | }; | 466 | }; |
| @@ -361,8 +471,6 @@ struct blkcg_gq { | |||
| 361 | struct blkcg_policy { | 471 | struct blkcg_policy { |
| 362 | }; | 472 | }; |
| 363 | 473 | ||
| 364 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | ||
| 365 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | ||
| 366 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } | 474 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
| 367 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } | 475 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
| 368 | static inline void blkcg_drain_queue(struct request_queue *q) { } | 476 | static inline void blkcg_drain_queue(struct request_queue *q) { } |
| @@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q, | |||
| 374 | static inline void blkcg_deactivate_policy(struct request_queue *q, | 482 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
| 375 | const struct blkcg_policy *pol) { } | 483 | const struct blkcg_policy *pol) { } |
| 376 | 484 | ||
| 485 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | ||
| 486 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | ||
| 487 | |||
| 377 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 488 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
| 378 | struct blkcg_policy *pol) { return NULL; } | 489 | struct blkcg_policy *pol) { return NULL; } |
| 379 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | 490 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
| @@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | |||
| 381 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 492 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
| 382 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 493 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
| 383 | 494 | ||
| 495 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
| 496 | struct bio *bio) { return &q->root_rl; } | ||
| 497 | static inline void blk_put_rl(struct request_list *rl) { } | ||
| 498 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | ||
| 499 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | ||
| 500 | |||
| 501 | #define blk_queue_for_each_rl(rl, q) \ | ||
| 502 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | ||
| 503 | |||
| 384 | #endif /* CONFIG_BLK_CGROUP */ | 504 | #endif /* CONFIG_BLK_CGROUP */ |
| 385 | #endif /* _BLK_CGROUP_H */ | 505 | #endif /* _BLK_CGROUP_H */ |
diff --git a/block/blk-core.c b/block/blk-core.c index 93eb3e4f88ce..dd134d834d58 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
| 387 | if (!list_empty(&q->queue_head) && q->request_fn) | 387 | if (!list_empty(&q->queue_head) && q->request_fn) |
| 388 | __blk_run_queue(q); | 388 | __blk_run_queue(q); |
| 389 | 389 | ||
| 390 | drain |= q->rq.elvpriv; | 390 | drain |= q->nr_rqs_elvpriv; |
| 391 | 391 | ||
| 392 | /* | 392 | /* |
| 393 | * Unfortunately, requests are queued at and tracked from | 393 | * Unfortunately, requests are queued at and tracked from |
| @@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
| 397 | if (drain_all) { | 397 | if (drain_all) { |
| 398 | drain |= !list_empty(&q->queue_head); | 398 | drain |= !list_empty(&q->queue_head); |
| 399 | for (i = 0; i < 2; i++) { | 399 | for (i = 0; i < 2; i++) { |
| 400 | drain |= q->rq.count[i]; | 400 | drain |= q->nr_rqs[i]; |
| 401 | drain |= q->in_flight[i]; | 401 | drain |= q->in_flight[i]; |
| 402 | drain |= !list_empty(&q->flush_queue[i]); | 402 | drain |= !list_empty(&q->flush_queue[i]); |
| 403 | } | 403 | } |
| @@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
| 416 | * left with hung waiters. We need to wake up those waiters. | 416 | * left with hung waiters. We need to wake up those waiters. |
| 417 | */ | 417 | */ |
| 418 | if (q->request_fn) { | 418 | if (q->request_fn) { |
| 419 | struct request_list *rl; | ||
| 420 | |||
| 419 | spin_lock_irq(q->queue_lock); | 421 | spin_lock_irq(q->queue_lock); |
| 420 | for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) | 422 | |
| 421 | wake_up_all(&q->rq.wait[i]); | 423 | blk_queue_for_each_rl(rl, q) |
| 424 | for (i = 0; i < ARRAY_SIZE(rl->wait); i++) | ||
| 425 | wake_up_all(&rl->wait[i]); | ||
| 426 | |||
| 422 | spin_unlock_irq(q->queue_lock); | 427 | spin_unlock_irq(q->queue_lock); |
| 423 | } | 428 | } |
| 424 | } | 429 | } |
| @@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 517 | } | 522 | } |
| 518 | EXPORT_SYMBOL(blk_cleanup_queue); | 523 | EXPORT_SYMBOL(blk_cleanup_queue); |
| 519 | 524 | ||
| 520 | static int blk_init_free_list(struct request_queue *q) | 525 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
| 526 | gfp_t gfp_mask) | ||
| 521 | { | 527 | { |
| 522 | struct request_list *rl = &q->rq; | ||
| 523 | |||
| 524 | if (unlikely(rl->rq_pool)) | 528 | if (unlikely(rl->rq_pool)) |
| 525 | return 0; | 529 | return 0; |
| 526 | 530 | ||
| 531 | rl->q = q; | ||
| 527 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; | 532 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
| 528 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; | 533 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
| 529 | rl->elvpriv = 0; | ||
| 530 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 534 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
| 531 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 535 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
| 532 | 536 | ||
| 533 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 537 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
| 534 | mempool_free_slab, request_cachep, q->node); | 538 | mempool_free_slab, request_cachep, |
| 535 | 539 | gfp_mask, q->node); | |
| 536 | if (!rl->rq_pool) | 540 | if (!rl->rq_pool) |
| 537 | return -ENOMEM; | 541 | return -ENOMEM; |
| 538 | 542 | ||
| 539 | return 0; | 543 | return 0; |
| 540 | } | 544 | } |
| 541 | 545 | ||
| 546 | void blk_exit_rl(struct request_list *rl) | ||
| 547 | { | ||
| 548 | if (rl->rq_pool) | ||
| 549 | mempool_destroy(rl->rq_pool); | ||
| 550 | } | ||
| 551 | |||
| 542 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 552 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
| 543 | { | 553 | { |
| 544 | return blk_alloc_queue_node(gfp_mask, -1); | 554 | return blk_alloc_queue_node(gfp_mask, -1); |
| @@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
| 680 | if (!q) | 690 | if (!q) |
| 681 | return NULL; | 691 | return NULL; |
| 682 | 692 | ||
| 683 | if (blk_init_free_list(q)) | 693 | if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) |
| 684 | return NULL; | 694 | return NULL; |
| 685 | 695 | ||
| 686 | q->request_fn = rfn; | 696 | q->request_fn = rfn; |
| @@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q) | |||
| 722 | } | 732 | } |
| 723 | EXPORT_SYMBOL(blk_get_queue); | 733 | EXPORT_SYMBOL(blk_get_queue); |
| 724 | 734 | ||
| 725 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 735 | static inline void blk_free_request(struct request_list *rl, struct request *rq) |
| 726 | { | 736 | { |
| 727 | if (rq->cmd_flags & REQ_ELVPRIV) { | 737 | if (rq->cmd_flags & REQ_ELVPRIV) { |
| 728 | elv_put_request(q, rq); | 738 | elv_put_request(rl->q, rq); |
| 729 | if (rq->elv.icq) | 739 | if (rq->elv.icq) |
| 730 | put_io_context(rq->elv.icq->ioc); | 740 | put_io_context(rq->elv.icq->ioc); |
| 731 | } | 741 | } |
| 732 | 742 | ||
| 733 | mempool_free(rq, q->rq.rq_pool); | 743 | mempool_free(rq, rl->rq_pool); |
| 734 | } | 744 | } |
| 735 | 745 | ||
| 736 | /* | 746 | /* |
| @@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | |||
| 767 | ioc->last_waited = jiffies; | 777 | ioc->last_waited = jiffies; |
| 768 | } | 778 | } |
| 769 | 779 | ||
| 770 | static void __freed_request(struct request_queue *q, int sync) | 780 | static void __freed_request(struct request_list *rl, int sync) |
| 771 | { | 781 | { |
| 772 | struct request_list *rl = &q->rq; | 782 | struct request_queue *q = rl->q; |
| 773 | 783 | ||
| 774 | if (rl->count[sync] < queue_congestion_off_threshold(q)) | 784 | /* |
| 785 | * bdi isn't aware of blkcg yet. As all async IOs end up root | ||
| 786 | * blkcg anyway, just use root blkcg state. | ||
| 787 | */ | ||
| 788 | if (rl == &q->root_rl && | ||
| 789 | rl->count[sync] < queue_congestion_off_threshold(q)) | ||
| 775 | blk_clear_queue_congested(q, sync); | 790 | blk_clear_queue_congested(q, sync); |
| 776 | 791 | ||
| 777 | if (rl->count[sync] + 1 <= q->nr_requests) { | 792 | if (rl->count[sync] + 1 <= q->nr_requests) { |
| 778 | if (waitqueue_active(&rl->wait[sync])) | 793 | if (waitqueue_active(&rl->wait[sync])) |
| 779 | wake_up(&rl->wait[sync]); | 794 | wake_up(&rl->wait[sync]); |
| 780 | 795 | ||
| 781 | blk_clear_queue_full(q, sync); | 796 | blk_clear_rl_full(rl, sync); |
| 782 | } | 797 | } |
| 783 | } | 798 | } |
| 784 | 799 | ||
| @@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync) | |||
| 786 | * A request has just been released. Account for it, update the full and | 801 | * A request has just been released. Account for it, update the full and |
| 787 | * congestion status, wake up any waiters. Called under q->queue_lock. | 802 | * congestion status, wake up any waiters. Called under q->queue_lock. |
| 788 | */ | 803 | */ |
| 789 | static void freed_request(struct request_queue *q, unsigned int flags) | 804 | static void freed_request(struct request_list *rl, unsigned int flags) |
| 790 | { | 805 | { |
| 791 | struct request_list *rl = &q->rq; | 806 | struct request_queue *q = rl->q; |
| 792 | int sync = rw_is_sync(flags); | 807 | int sync = rw_is_sync(flags); |
| 793 | 808 | ||
| 809 | q->nr_rqs[sync]--; | ||
| 794 | rl->count[sync]--; | 810 | rl->count[sync]--; |
| 795 | if (flags & REQ_ELVPRIV) | 811 | if (flags & REQ_ELVPRIV) |
| 796 | rl->elvpriv--; | 812 | q->nr_rqs_elvpriv--; |
| 797 | 813 | ||
| 798 | __freed_request(q, sync); | 814 | __freed_request(rl, sync); |
| 799 | 815 | ||
| 800 | if (unlikely(rl->starved[sync ^ 1])) | 816 | if (unlikely(rl->starved[sync ^ 1])) |
| 801 | __freed_request(q, sync ^ 1); | 817 | __freed_request(rl, sync ^ 1); |
| 802 | } | 818 | } |
| 803 | 819 | ||
| 804 | /* | 820 | /* |
| @@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio) | |||
| 837 | } | 853 | } |
| 838 | 854 | ||
| 839 | /** | 855 | /** |
| 840 | * get_request - get a free request | 856 | * __get_request - get a free request |
| 841 | * @q: request_queue to allocate request from | 857 | * @rl: request list to allocate from |
| 842 | * @rw_flags: RW and SYNC flags | 858 | * @rw_flags: RW and SYNC flags |
| 843 | * @bio: bio to allocate request for (can be %NULL) | 859 | * @bio: bio to allocate request for (can be %NULL) |
| 844 | * @gfp_mask: allocation mask | 860 | * @gfp_mask: allocation mask |
| @@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio) | |||
| 850 | * Returns %NULL on failure, with @q->queue_lock held. | 866 | * Returns %NULL on failure, with @q->queue_lock held. |
| 851 | * Returns !%NULL on success, with @q->queue_lock *not held*. | 867 | * Returns !%NULL on success, with @q->queue_lock *not held*. |
| 852 | */ | 868 | */ |
| 853 | static struct request *get_request(struct request_queue *q, int rw_flags, | 869 | static struct request *__get_request(struct request_list *rl, int rw_flags, |
| 854 | struct bio *bio, gfp_t gfp_mask) | 870 | struct bio *bio, gfp_t gfp_mask) |
| 855 | { | 871 | { |
| 872 | struct request_queue *q = rl->q; | ||
| 856 | struct request *rq; | 873 | struct request *rq; |
| 857 | struct request_list *rl = &q->rq; | 874 | struct elevator_type *et = q->elevator->type; |
| 858 | struct elevator_type *et; | 875 | struct io_context *ioc = rq_ioc(bio); |
| 859 | struct io_context *ioc; | ||
| 860 | struct io_cq *icq = NULL; | 876 | struct io_cq *icq = NULL; |
| 861 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 877 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
| 862 | bool retried = false; | ||
| 863 | int may_queue; | 878 | int may_queue; |
| 864 | retry: | ||
| 865 | et = q->elevator->type; | ||
| 866 | ioc = rq_ioc(bio); | ||
| 867 | 879 | ||
| 868 | if (unlikely(blk_queue_dead(q))) | 880 | if (unlikely(blk_queue_dead(q))) |
| 869 | return NULL; | 881 | return NULL; |
| @@ -875,28 +887,14 @@ retry: | |||
| 875 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 887 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
| 876 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 888 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
| 877 | /* | 889 | /* |
| 878 | * We want ioc to record batching state. If it's | ||
| 879 | * not already there, creating a new one requires | ||
| 880 | * dropping queue_lock, which in turn requires | ||
| 881 | * retesting conditions to avoid queue hang. | ||
| 882 | */ | ||
| 883 | if (!ioc && !retried) { | ||
| 884 | spin_unlock_irq(q->queue_lock); | ||
| 885 | create_io_context(gfp_mask, q->node); | ||
| 886 | spin_lock_irq(q->queue_lock); | ||
| 887 | retried = true; | ||
| 888 | goto retry; | ||
| 889 | } | ||
| 890 | |||
| 891 | /* | ||
| 892 | * The queue will fill after this allocation, so set | 890 | * The queue will fill after this allocation, so set |
| 893 | * it as full, and mark this process as "batching". | 891 | * it as full, and mark this process as "batching". |
| 894 | * This process will be allowed to complete a batch of | 892 | * This process will be allowed to complete a batch of |
| 895 | * requests, others will be blocked. | 893 | * requests, others will be blocked. |
| 896 | */ | 894 | */ |
| 897 | if (!blk_queue_full(q, is_sync)) { | 895 | if (!blk_rl_full(rl, is_sync)) { |
| 898 | ioc_set_batching(q, ioc); | 896 | ioc_set_batching(q, ioc); |
| 899 | blk_set_queue_full(q, is_sync); | 897 | blk_set_rl_full(rl, is_sync); |
| 900 | } else { | 898 | } else { |
| 901 | if (may_queue != ELV_MQUEUE_MUST | 899 | if (may_queue != ELV_MQUEUE_MUST |
| 902 | && !ioc_batching(q, ioc)) { | 900 | && !ioc_batching(q, ioc)) { |
| @@ -909,7 +907,12 @@ retry: | |||
| 909 | } | 907 | } |
| 910 | } | 908 | } |
| 911 | } | 909 | } |
| 912 | blk_set_queue_congested(q, is_sync); | 910 | /* |
| 911 | * bdi isn't aware of blkcg yet. As all async IOs end up | ||
| 912 | * root blkcg anyway, just use root blkcg state. | ||
| 913 | */ | ||
| 914 | if (rl == &q->root_rl) | ||
| 915 | blk_set_queue_congested(q, is_sync); | ||
| 913 | } | 916 | } |
| 914 | 917 | ||
| 915 | /* | 918 | /* |
| @@ -920,6 +923,7 @@ retry: | |||
| 920 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) | 923 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
| 921 | return NULL; | 924 | return NULL; |
| 922 | 925 | ||
| 926 | q->nr_rqs[is_sync]++; | ||
| 923 | rl->count[is_sync]++; | 927 | rl->count[is_sync]++; |
| 924 | rl->starved[is_sync] = 0; | 928 | rl->starved[is_sync] = 0; |
| 925 | 929 | ||
| @@ -935,7 +939,7 @@ retry: | |||
| 935 | */ | 939 | */ |
| 936 | if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { | 940 | if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { |
| 937 | rw_flags |= REQ_ELVPRIV; | 941 | rw_flags |= REQ_ELVPRIV; |
| 938 | rl->elvpriv++; | 942 | q->nr_rqs_elvpriv++; |
| 939 | if (et->icq_cache && ioc) | 943 | if (et->icq_cache && ioc) |
| 940 | icq = ioc_lookup_icq(ioc, q); | 944 | icq = ioc_lookup_icq(ioc, q); |
| 941 | } | 945 | } |
| @@ -945,22 +949,19 @@ retry: | |||
| 945 | spin_unlock_irq(q->queue_lock); | 949 | spin_unlock_irq(q->queue_lock); |
| 946 | 950 | ||
| 947 | /* allocate and init request */ | 951 | /* allocate and init request */ |
| 948 | rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 952 | rq = mempool_alloc(rl->rq_pool, gfp_mask); |
| 949 | if (!rq) | 953 | if (!rq) |
| 950 | goto fail_alloc; | 954 | goto fail_alloc; |
| 951 | 955 | ||
| 952 | blk_rq_init(q, rq); | 956 | blk_rq_init(q, rq); |
| 957 | blk_rq_set_rl(rq, rl); | ||
| 953 | rq->cmd_flags = rw_flags | REQ_ALLOCED; | 958 | rq->cmd_flags = rw_flags | REQ_ALLOCED; |
| 954 | 959 | ||
| 955 | /* init elvpriv */ | 960 | /* init elvpriv */ |
| 956 | if (rw_flags & REQ_ELVPRIV) { | 961 | if (rw_flags & REQ_ELVPRIV) { |
| 957 | if (unlikely(et->icq_cache && !icq)) { | 962 | if (unlikely(et->icq_cache && !icq)) { |
| 958 | create_io_context(gfp_mask, q->node); | 963 | if (ioc) |
| 959 | ioc = rq_ioc(bio); | 964 | icq = ioc_create_icq(ioc, q, gfp_mask); |
| 960 | if (!ioc) | ||
| 961 | goto fail_elvpriv; | ||
| 962 | |||
| 963 | icq = ioc_create_icq(ioc, q, gfp_mask); | ||
| 964 | if (!icq) | 965 | if (!icq) |
| 965 | goto fail_elvpriv; | 966 | goto fail_elvpriv; |
| 966 | } | 967 | } |
| @@ -1000,7 +1001,7 @@ fail_elvpriv: | |||
| 1000 | rq->elv.icq = NULL; | 1001 | rq->elv.icq = NULL; |
| 1001 | 1002 | ||
| 1002 | spin_lock_irq(q->queue_lock); | 1003 | spin_lock_irq(q->queue_lock); |
| 1003 | rl->elvpriv--; | 1004 | q->nr_rqs_elvpriv--; |
| 1004 | spin_unlock_irq(q->queue_lock); | 1005 | spin_unlock_irq(q->queue_lock); |
| 1005 | goto out; | 1006 | goto out; |
| 1006 | 1007 | ||
| @@ -1013,7 +1014,7 @@ fail_alloc: | |||
| 1013 | * queue, but this is pretty rare. | 1014 | * queue, but this is pretty rare. |
| 1014 | */ | 1015 | */ |
| 1015 | spin_lock_irq(q->queue_lock); | 1016 | spin_lock_irq(q->queue_lock); |
| 1016 | freed_request(q, rw_flags); | 1017 | freed_request(rl, rw_flags); |
| 1017 | 1018 | ||
| 1018 | /* | 1019 | /* |
| 1019 | * in the very unlikely event that allocation failed and no | 1020 | * in the very unlikely event that allocation failed and no |
| @@ -1029,56 +1030,58 @@ rq_starved: | |||
| 1029 | } | 1030 | } |
| 1030 | 1031 | ||
| 1031 | /** | 1032 | /** |
| 1032 | * get_request_wait - get a free request with retry | 1033 | * get_request - get a free request |
| 1033 | * @q: request_queue to allocate request from | 1034 | * @q: request_queue to allocate request from |
| 1034 | * @rw_flags: RW and SYNC flags | 1035 | * @rw_flags: RW and SYNC flags |
| 1035 | * @bio: bio to allocate request for (can be %NULL) | 1036 | * @bio: bio to allocate request for (can be %NULL) |
| 1037 | * @gfp_mask: allocation mask | ||
| 1036 | * | 1038 | * |
| 1037 | * Get a free request from @q. This function keeps retrying under memory | 1039 | * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this |
| 1038 | * pressure and fails iff @q is dead. | 1040 | * function keeps retrying under memory pressure and fails iff @q is dead. |
| 1039 | * | 1041 | * |
| 1040 | * Must be callled with @q->queue_lock held and, | 1042 | * Must be callled with @q->queue_lock held and, |
| 1041 | * Returns %NULL on failure, with @q->queue_lock held. | 1043 | * Returns %NULL on failure, with @q->queue_lock held. |
| 1042 | * Returns !%NULL on success, with @q->queue_lock *not held*. | 1044 | * Returns !%NULL on success, with @q->queue_lock *not held*. |
| 1043 | */ | 1045 | */ |
| 1044 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 1046 | static struct request *get_request(struct request_queue *q, int rw_flags, |
| 1045 | struct bio *bio) | 1047 | struct bio *bio, gfp_t gfp_mask) |
| 1046 | { | 1048 | { |
| 1047 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 1049 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
| 1050 | DEFINE_WAIT(wait); | ||
| 1051 | struct request_list *rl; | ||
| 1048 | struct request *rq; | 1052 | struct request *rq; |
| 1049 | 1053 | ||
| 1050 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 1054 | rl = blk_get_rl(q, bio); /* transferred to @rq on success */ |
| 1051 | while (!rq) { | 1055 | retry: |
| 1052 | DEFINE_WAIT(wait); | 1056 | rq = __get_request(rl, rw_flags, bio, gfp_mask); |
| 1053 | struct request_list *rl = &q->rq; | 1057 | if (rq) |
| 1054 | 1058 | return rq; | |
| 1055 | if (unlikely(blk_queue_dead(q))) | ||
| 1056 | return NULL; | ||
| 1057 | 1059 | ||
| 1058 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 1060 | if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) { |
| 1059 | TASK_UNINTERRUPTIBLE); | 1061 | blk_put_rl(rl); |
| 1062 | return NULL; | ||
| 1063 | } | ||
| 1060 | 1064 | ||
| 1061 | trace_block_sleeprq(q, bio, rw_flags & 1); | 1065 | /* wait on @rl and retry */ |
| 1066 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | ||
| 1067 | TASK_UNINTERRUPTIBLE); | ||
| 1062 | 1068 | ||
| 1063 | spin_unlock_irq(q->queue_lock); | 1069 | trace_block_sleeprq(q, bio, rw_flags & 1); |
| 1064 | io_schedule(); | ||
| 1065 | 1070 | ||
| 1066 | /* | 1071 | spin_unlock_irq(q->queue_lock); |
| 1067 | * After sleeping, we become a "batching" process and | 1072 | io_schedule(); |
| 1068 | * will be able to allocate at least one request, and | ||
| 1069 | * up to a big batch of them for a small period time. | ||
| 1070 | * See ioc_batching, ioc_set_batching | ||
| 1071 | */ | ||
| 1072 | create_io_context(GFP_NOIO, q->node); | ||
| 1073 | ioc_set_batching(q, current->io_context); | ||
| 1074 | 1073 | ||
| 1075 | spin_lock_irq(q->queue_lock); | 1074 | /* |
| 1076 | finish_wait(&rl->wait[is_sync], &wait); | 1075 | * After sleeping, we become a "batching" process and will be able |
| 1076 | * to allocate at least one request, and up to a big batch of them | ||
| 1077 | * for a small period time. See ioc_batching, ioc_set_batching | ||
| 1078 | */ | ||
| 1079 | ioc_set_batching(q, current->io_context); | ||
| 1077 | 1080 | ||
| 1078 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 1081 | spin_lock_irq(q->queue_lock); |
| 1079 | }; | 1082 | finish_wait(&rl->wait[is_sync], &wait); |
| 1080 | 1083 | ||
| 1081 | return rq; | 1084 | goto retry; |
| 1082 | } | 1085 | } |
| 1083 | 1086 | ||
| 1084 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1087 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
| @@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | |||
| 1087 | 1090 | ||
| 1088 | BUG_ON(rw != READ && rw != WRITE); | 1091 | BUG_ON(rw != READ && rw != WRITE); |
| 1089 | 1092 | ||
| 1093 | /* create ioc upfront */ | ||
| 1094 | create_io_context(gfp_mask, q->node); | ||
| 1095 | |||
| 1090 | spin_lock_irq(q->queue_lock); | 1096 | spin_lock_irq(q->queue_lock); |
| 1091 | if (gfp_mask & __GFP_WAIT) | 1097 | rq = get_request(q, rw, NULL, gfp_mask); |
| 1092 | rq = get_request_wait(q, rw, NULL); | ||
| 1093 | else | ||
| 1094 | rq = get_request(q, rw, NULL, gfp_mask); | ||
| 1095 | if (!rq) | 1098 | if (!rq) |
| 1096 | spin_unlock_irq(q->queue_lock); | 1099 | spin_unlock_irq(q->queue_lock); |
| 1097 | /* q->queue_lock is unlocked at this point */ | 1100 | /* q->queue_lock is unlocked at this point */ |
| @@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
| 1248 | */ | 1251 | */ |
| 1249 | if (req->cmd_flags & REQ_ALLOCED) { | 1252 | if (req->cmd_flags & REQ_ALLOCED) { |
| 1250 | unsigned int flags = req->cmd_flags; | 1253 | unsigned int flags = req->cmd_flags; |
| 1254 | struct request_list *rl = blk_rq_rl(req); | ||
| 1251 | 1255 | ||
| 1252 | BUG_ON(!list_empty(&req->queuelist)); | 1256 | BUG_ON(!list_empty(&req->queuelist)); |
| 1253 | BUG_ON(!hlist_unhashed(&req->hash)); | 1257 | BUG_ON(!hlist_unhashed(&req->hash)); |
| 1254 | 1258 | ||
| 1255 | blk_free_request(q, req); | 1259 | blk_free_request(rl, req); |
| 1256 | freed_request(q, flags); | 1260 | freed_request(rl, flags); |
| 1261 | blk_put_rl(rl); | ||
| 1257 | } | 1262 | } |
| 1258 | } | 1263 | } |
| 1259 | EXPORT_SYMBOL_GPL(__blk_put_request); | 1264 | EXPORT_SYMBOL_GPL(__blk_put_request); |
| @@ -1481,7 +1486,7 @@ get_rq: | |||
| 1481 | * Grab a free request. This is might sleep but can not fail. | 1486 | * Grab a free request. This is might sleep but can not fail. |
| 1482 | * Returns with the queue unlocked. | 1487 | * Returns with the queue unlocked. |
| 1483 | */ | 1488 | */ |
| 1484 | req = get_request_wait(q, rw_flags, bio); | 1489 | req = get_request(q, rw_flags, bio, GFP_NOIO); |
| 1485 | if (unlikely(!req)) { | 1490 | if (unlikely(!req)) { |
| 1486 | bio_endio(bio, -ENODEV); /* @q is dead */ | 1491 | bio_endio(bio, -ENODEV); /* @q is dead */ |
| 1487 | goto out_unlock; | 1492 | goto out_unlock; |
| @@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio) | |||
| 1702 | goto end_io; | 1707 | goto end_io; |
| 1703 | } | 1708 | } |
| 1704 | 1709 | ||
| 1710 | /* | ||
| 1711 | * Various block parts want %current->io_context and lazy ioc | ||
| 1712 | * allocation ends up trading a lot of pain for a small amount of | ||
| 1713 | * memory. Just allocate it upfront. This may fail and block | ||
| 1714 | * layer knows how to live with it. | ||
| 1715 | */ | ||
| 1716 | create_io_context(GFP_ATOMIC, q->node); | ||
| 1717 | |||
| 1705 | if (blk_throtl_bio(q, bio)) | 1718 | if (blk_throtl_bio(q, bio)) |
| 1706 | return false; /* throttled, will be resubmitted later */ | 1719 | return false; /* throttled, will be resubmitted later */ |
| 1707 | 1720 | ||
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 893b8007c657..fab4cdd3f7bb 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
| @@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) | |||
| 244 | 244 | ||
| 245 | /* initialize */ | 245 | /* initialize */ |
| 246 | atomic_long_set(&ioc->refcount, 1); | 246 | atomic_long_set(&ioc->refcount, 1); |
| 247 | atomic_set(&ioc->nr_tasks, 1); | ||
| 247 | atomic_set(&ioc->active_ref, 1); | 248 | atomic_set(&ioc->active_ref, 1); |
| 248 | spin_lock_init(&ioc->lock); | 249 | spin_lock_init(&ioc->lock); |
| 249 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); | 250 | INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH); |
diff --git a/block/blk-settings.c b/block/blk-settings.c index d3234fc494ad..565a6786032f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
| @@ -143,8 +143,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) | |||
| 143 | lim->discard_zeroes_data = 1; | 143 | lim->discard_zeroes_data = 1; |
| 144 | lim->max_segments = USHRT_MAX; | 144 | lim->max_segments = USHRT_MAX; |
| 145 | lim->max_hw_sectors = UINT_MAX; | 145 | lim->max_hw_sectors = UINT_MAX; |
| 146 | 146 | lim->max_sectors = UINT_MAX; | |
| 147 | lim->max_sectors = BLK_DEF_MAX_SECTORS; | ||
| 148 | } | 147 | } |
| 149 | EXPORT_SYMBOL(blk_set_stacking_limits); | 148 | EXPORT_SYMBOL(blk_set_stacking_limits); |
| 150 | 149 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index aa41b47c22d2..9628b291f960 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -40,7 +40,7 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) | |||
| 40 | static ssize_t | 40 | static ssize_t |
| 41 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 41 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
| 42 | { | 42 | { |
| 43 | struct request_list *rl = &q->rq; | 43 | struct request_list *rl; |
| 44 | unsigned long nr; | 44 | unsigned long nr; |
| 45 | int ret; | 45 | int ret; |
| 46 | 46 | ||
| @@ -55,6 +55,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
| 55 | q->nr_requests = nr; | 55 | q->nr_requests = nr; |
| 56 | blk_queue_congestion_threshold(q); | 56 | blk_queue_congestion_threshold(q); |
| 57 | 57 | ||
| 58 | /* congestion isn't cgroup aware and follows root blkcg for now */ | ||
| 59 | rl = &q->root_rl; | ||
| 60 | |||
| 58 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | 61 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) |
| 59 | blk_set_queue_congested(q, BLK_RW_SYNC); | 62 | blk_set_queue_congested(q, BLK_RW_SYNC); |
| 60 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | 63 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) |
| @@ -65,19 +68,22 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
| 65 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | 68 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) |
| 66 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | 69 | blk_clear_queue_congested(q, BLK_RW_ASYNC); |
| 67 | 70 | ||
| 68 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | 71 | blk_queue_for_each_rl(rl, q) { |
| 69 | blk_set_queue_full(q, BLK_RW_SYNC); | 72 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
| 70 | } else { | 73 | blk_set_rl_full(rl, BLK_RW_SYNC); |
| 71 | blk_clear_queue_full(q, BLK_RW_SYNC); | 74 | } else { |
| 72 | wake_up(&rl->wait[BLK_RW_SYNC]); | 75 | blk_clear_rl_full(rl, BLK_RW_SYNC); |
| 76 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
| 77 | } | ||
| 78 | |||
| 79 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
| 80 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
| 81 | } else { | ||
| 82 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
| 83 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
| 84 | } | ||
| 73 | } | 85 | } |
| 74 | 86 | ||
| 75 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
| 76 | blk_set_queue_full(q, BLK_RW_ASYNC); | ||
| 77 | } else { | ||
| 78 | blk_clear_queue_full(q, BLK_RW_ASYNC); | ||
| 79 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
| 80 | } | ||
| 81 | spin_unlock_irq(q->queue_lock); | 87 | spin_unlock_irq(q->queue_lock); |
| 82 | return ret; | 88 | return ret; |
| 83 | } | 89 | } |
| @@ -476,7 +482,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 476 | { | 482 | { |
| 477 | struct request_queue *q = | 483 | struct request_queue *q = |
| 478 | container_of(kobj, struct request_queue, kobj); | 484 | container_of(kobj, struct request_queue, kobj); |
| 479 | struct request_list *rl = &q->rq; | ||
| 480 | 485 | ||
| 481 | blk_sync_queue(q); | 486 | blk_sync_queue(q); |
| 482 | 487 | ||
| @@ -489,8 +494,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 489 | elevator_exit(q->elevator); | 494 | elevator_exit(q->elevator); |
| 490 | } | 495 | } |
| 491 | 496 | ||
| 492 | if (rl->rq_pool) | 497 | blk_exit_rl(&q->root_rl); |
| 493 | mempool_destroy(rl->rq_pool); | ||
| 494 | 498 | ||
| 495 | if (q->queue_tags) | 499 | if (q->queue_tags) |
| 496 | __blk_queue_free_tags(q); | 500 | __blk_queue_free_tags(q); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5b0659512047..e287c19908c8 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
| @@ -1123,9 +1123,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
| 1123 | goto out; | 1123 | goto out; |
| 1124 | } | 1124 | } |
| 1125 | 1125 | ||
| 1126 | /* bio_associate_current() needs ioc, try creating */ | ||
| 1127 | create_io_context(GFP_ATOMIC, q->node); | ||
| 1128 | |||
| 1129 | /* | 1126 | /* |
| 1130 | * A throtl_grp pointer retrieved under rcu can be used to access | 1127 | * A throtl_grp pointer retrieved under rcu can be used to access |
| 1131 | * basic fields like stats and io rates. If a group has no rules, | 1128 | * basic fields like stats and io rates. If a group has no rules, |
diff --git a/block/blk.h b/block/blk.h index 85f6ae42f7d3..2a0ea32d249f 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -18,6 +18,9 @@ static inline void __blk_get_queue(struct request_queue *q) | |||
| 18 | kobject_get(&q->kobj); | 18 | kobject_get(&q->kobj); |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | ||
| 22 | gfp_t gfp_mask); | ||
| 23 | void blk_exit_rl(struct request_list *rl); | ||
| 21 | void init_request_from_bio(struct request *req, struct bio *bio); | 24 | void init_request_from_bio(struct request *req, struct bio *bio); |
| 22 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 25 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
| 23 | struct bio *bio); | 26 | struct bio *bio); |
| @@ -33,7 +36,6 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
| 33 | void blk_rq_timed_out_timer(unsigned long data); | 36 | void blk_rq_timed_out_timer(unsigned long data); |
| 34 | void blk_delete_timer(struct request *); | 37 | void blk_delete_timer(struct request *); |
| 35 | void blk_add_timer(struct request *); | 38 | void blk_add_timer(struct request *); |
| 36 | void __generic_unplug_device(struct request_queue *); | ||
| 37 | 39 | ||
| 38 | /* | 40 | /* |
| 39 | * Internal atomic flags for request handling | 41 | * Internal atomic flags for request handling |
diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 7ad49c88f6b1..deee61fbb741 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c | |||
| @@ -243,56 +243,3 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q, | |||
| 243 | return 0; | 243 | return 0; |
| 244 | } | 244 | } |
| 245 | EXPORT_SYMBOL_GPL(bsg_setup_queue); | 245 | EXPORT_SYMBOL_GPL(bsg_setup_queue); |
| 246 | |||
| 247 | /** | ||
| 248 | * bsg_remove_queue - Deletes the bsg dev from the q | ||
| 249 | * @q: the request_queue that is to be torn down. | ||
| 250 | * | ||
| 251 | * Notes: | ||
| 252 | * Before unregistering the queue empty any requests that are blocked | ||
| 253 | */ | ||
| 254 | void bsg_remove_queue(struct request_queue *q) | ||
| 255 | { | ||
| 256 | struct request *req; /* block request */ | ||
| 257 | int counts; /* totals for request_list count and starved */ | ||
| 258 | |||
| 259 | if (!q) | ||
| 260 | return; | ||
| 261 | |||
| 262 | /* Stop taking in new requests */ | ||
| 263 | spin_lock_irq(q->queue_lock); | ||
| 264 | blk_stop_queue(q); | ||
| 265 | |||
| 266 | /* drain all requests in the queue */ | ||
| 267 | while (1) { | ||
| 268 | /* need the lock to fetch a request | ||
| 269 | * this may fetch the same reqeust as the previous pass | ||
| 270 | */ | ||
| 271 | req = blk_fetch_request(q); | ||
| 272 | /* save requests in use and starved */ | ||
| 273 | counts = q->rq.count[0] + q->rq.count[1] + | ||
| 274 | q->rq.starved[0] + q->rq.starved[1]; | ||
| 275 | spin_unlock_irq(q->queue_lock); | ||
| 276 | /* any requests still outstanding? */ | ||
| 277 | if (counts == 0) | ||
| 278 | break; | ||
| 279 | |||
| 280 | /* This may be the same req as the previous iteration, | ||
| 281 | * always send the blk_end_request_all after a prefetch. | ||
| 282 | * It is not okay to not end the request because the | ||
| 283 | * prefetch started the request. | ||
| 284 | */ | ||
| 285 | if (req) { | ||
| 286 | /* return -ENXIO to indicate that this queue is | ||
| 287 | * going away | ||
| 288 | */ | ||
| 289 | req->errors = -ENXIO; | ||
| 290 | blk_end_request_all(req, -ENXIO); | ||
| 291 | } | ||
| 292 | |||
| 293 | msleep(200); /* allow bsg to possibly finish */ | ||
| 294 | spin_lock_irq(q->queue_lock); | ||
| 295 | } | ||
| 296 | bsg_unregister_queue(q); | ||
| 297 | } | ||
| 298 | EXPORT_SYMBOL_GPL(bsg_remove_queue); | ||
diff --git a/block/genhd.c b/block/genhd.c index 9cf5583c90ff..cac7366957c3 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
| @@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) | |||
| 154 | part = rcu_dereference(ptbl->part[piter->idx]); | 154 | part = rcu_dereference(ptbl->part[piter->idx]); |
| 155 | if (!part) | 155 | if (!part) |
| 156 | continue; | 156 | continue; |
| 157 | if (!part->nr_sects && | 157 | if (!part_nr_sects_read(part) && |
| 158 | !(piter->flags & DISK_PITER_INCL_EMPTY) && | 158 | !(piter->flags & DISK_PITER_INCL_EMPTY) && |
| 159 | !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && | 159 | !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && |
| 160 | piter->idx == 0)) | 160 | piter->idx == 0)) |
| @@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); | |||
| 191 | static inline int sector_in_part(struct hd_struct *part, sector_t sector) | 191 | static inline int sector_in_part(struct hd_struct *part, sector_t sector) |
| 192 | { | 192 | { |
| 193 | return part->start_sect <= sector && | 193 | return part->start_sect <= sector && |
| 194 | sector < part->start_sect + part->nr_sects; | 194 | sector < part->start_sect + part_nr_sects_read(part); |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | /** | 197 | /** |
| @@ -769,8 +769,8 @@ void __init printk_all_partitions(void) | |||
| 769 | 769 | ||
| 770 | printk("%s%s %10llu %s %s", is_part0 ? "" : " ", | 770 | printk("%s%s %10llu %s %s", is_part0 ? "" : " ", |
| 771 | bdevt_str(part_devt(part), devt_buf), | 771 | bdevt_str(part_devt(part), devt_buf), |
| 772 | (unsigned long long)part->nr_sects >> 1, | 772 | (unsigned long long)part_nr_sects_read(part) >> 1 |
| 773 | disk_name(disk, part->partno, name_buf), | 773 | , disk_name(disk, part->partno, name_buf), |
| 774 | uuid_buf); | 774 | uuid_buf); |
| 775 | if (is_part0) { | 775 | if (is_part0) { |
| 776 | if (disk->driverfs_dev != NULL && | 776 | if (disk->driverfs_dev != NULL && |
| @@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v) | |||
| 862 | while ((part = disk_part_iter_next(&piter))) | 862 | while ((part = disk_part_iter_next(&piter))) |
| 863 | seq_printf(seqf, "%4d %7d %10llu %s\n", | 863 | seq_printf(seqf, "%4d %7d %10llu %s\n", |
| 864 | MAJOR(part_devt(part)), MINOR(part_devt(part)), | 864 | MAJOR(part_devt(part)), MINOR(part_devt(part)), |
| 865 | (unsigned long long)part->nr_sects >> 1, | 865 | (unsigned long long)part_nr_sects_read(part) >> 1, |
| 866 | disk_name(sgp, part->partno, buf)); | 866 | disk_name(sgp, part->partno, buf)); |
| 867 | disk_part_iter_exit(&piter); | 867 | disk_part_iter_exit(&piter); |
| 868 | 868 | ||
| @@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id) | |||
| 1268 | } | 1268 | } |
| 1269 | disk->part_tbl->part[0] = &disk->part0; | 1269 | disk->part_tbl->part[0] = &disk->part0; |
| 1270 | 1270 | ||
| 1271 | /* | ||
| 1272 | * set_capacity() and get_capacity() currently don't use | ||
| 1273 | * seqcounter to read/update the part0->nr_sects. Still init | ||
| 1274 | * the counter as we can read the sectors in IO submission | ||
| 1275 | * patch using seqence counters. | ||
| 1276 | * | ||
| 1277 | * TODO: Ideally set_capacity() and get_capacity() should be | ||
| 1278 | * converted to make use of bd_mutex and sequence counters. | ||
| 1279 | */ | ||
| 1280 | seqcount_init(&disk->part0.nr_sects_seq); | ||
| 1271 | hd_ref_init(&disk->part0); | 1281 | hd_ref_init(&disk->part0); |
| 1272 | 1282 | ||
| 1273 | disk->minors = minors; | 1283 | disk->minors = minors; |
diff --git a/block/ioctl.c b/block/ioctl.c index ba15b2dbfb98..4476e0e85d16 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
| @@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
| 13 | { | 13 | { |
| 14 | struct block_device *bdevp; | 14 | struct block_device *bdevp; |
| 15 | struct gendisk *disk; | 15 | struct gendisk *disk; |
| 16 | struct hd_struct *part; | 16 | struct hd_struct *part, *lpart; |
| 17 | struct blkpg_ioctl_arg a; | 17 | struct blkpg_ioctl_arg a; |
| 18 | struct blkpg_partition p; | 18 | struct blkpg_partition p; |
| 19 | struct disk_part_iter piter; | 19 | struct disk_part_iter piter; |
| @@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
| 36 | case BLKPG_ADD_PARTITION: | 36 | case BLKPG_ADD_PARTITION: |
| 37 | start = p.start >> 9; | 37 | start = p.start >> 9; |
| 38 | length = p.length >> 9; | 38 | length = p.length >> 9; |
| 39 | /* check for fit in a hd_struct */ | 39 | /* check for fit in a hd_struct */ |
| 40 | if (sizeof(sector_t) == sizeof(long) && | 40 | if (sizeof(sector_t) == sizeof(long) && |
| 41 | sizeof(long long) > sizeof(long)) { | 41 | sizeof(long long) > sizeof(long)) { |
| 42 | long pstart = start, plength = length; | 42 | long pstart = start, plength = length; |
| 43 | if (pstart != start || plength != length | 43 | if (pstart != start || plength != length |
| @@ -92,6 +92,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
| 92 | bdput(bdevp); | 92 | bdput(bdevp); |
| 93 | 93 | ||
| 94 | return 0; | 94 | return 0; |
| 95 | case BLKPG_RESIZE_PARTITION: | ||
| 96 | start = p.start >> 9; | ||
| 97 | /* new length of partition in bytes */ | ||
| 98 | length = p.length >> 9; | ||
| 99 | /* check for fit in a hd_struct */ | ||
| 100 | if (sizeof(sector_t) == sizeof(long) && | ||
| 101 | sizeof(long long) > sizeof(long)) { | ||
| 102 | long pstart = start, plength = length; | ||
| 103 | if (pstart != start || plength != length | ||
| 104 | || pstart < 0 || plength < 0) | ||
| 105 | return -EINVAL; | ||
| 106 | } | ||
| 107 | part = disk_get_part(disk, partno); | ||
| 108 | if (!part) | ||
| 109 | return -ENXIO; | ||
| 110 | bdevp = bdget(part_devt(part)); | ||
| 111 | if (!bdevp) { | ||
| 112 | disk_put_part(part); | ||
| 113 | return -ENOMEM; | ||
| 114 | } | ||
| 115 | mutex_lock(&bdevp->bd_mutex); | ||
| 116 | mutex_lock_nested(&bdev->bd_mutex, 1); | ||
| 117 | if (start != part->start_sect) { | ||
| 118 | mutex_unlock(&bdevp->bd_mutex); | ||
| 119 | mutex_unlock(&bdev->bd_mutex); | ||
| 120 | bdput(bdevp); | ||
| 121 | disk_put_part(part); | ||
| 122 | return -EINVAL; | ||
| 123 | } | ||
| 124 | /* overlap? */ | ||
| 125 | disk_part_iter_init(&piter, disk, | ||
| 126 | DISK_PITER_INCL_EMPTY); | ||
| 127 | while ((lpart = disk_part_iter_next(&piter))) { | ||
| 128 | if (lpart->partno != partno && | ||
| 129 | !(start + length <= lpart->start_sect || | ||
| 130 | start >= lpart->start_sect + lpart->nr_sects) | ||
| 131 | ) { | ||
| 132 | disk_part_iter_exit(&piter); | ||
| 133 | mutex_unlock(&bdevp->bd_mutex); | ||
| 134 | mutex_unlock(&bdev->bd_mutex); | ||
| 135 | bdput(bdevp); | ||
| 136 | disk_put_part(part); | ||
| 137 | return -EBUSY; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | disk_part_iter_exit(&piter); | ||
| 141 | part_nr_sects_write(part, (sector_t)length); | ||
| 142 | i_size_write(bdevp->bd_inode, p.length); | ||
| 143 | mutex_unlock(&bdevp->bd_mutex); | ||
| 144 | mutex_unlock(&bdev->bd_mutex); | ||
| 145 | bdput(bdevp); | ||
| 146 | disk_put_part(part); | ||
| 147 | return 0; | ||
| 95 | default: | 148 | default: |
| 96 | return -EINVAL; | 149 | return -EINVAL; |
| 97 | } | 150 | } |
diff --git a/block/partition-generic.c b/block/partition-generic.c index 6df5d6928a44..f1d14519cc04 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
| @@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev, | |||
| 84 | struct device_attribute *attr, char *buf) | 84 | struct device_attribute *attr, char *buf) |
| 85 | { | 85 | { |
| 86 | struct hd_struct *p = dev_to_part(dev); | 86 | struct hd_struct *p = dev_to_part(dev); |
| 87 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 87 | return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | static ssize_t part_ro_show(struct device *dev, | 90 | static ssize_t part_ro_show(struct device *dev, |
| @@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
| 294 | err = -ENOMEM; | 294 | err = -ENOMEM; |
| 295 | goto out_free; | 295 | goto out_free; |
| 296 | } | 296 | } |
| 297 | |||
| 298 | seqcount_init(&p->nr_sects_seq); | ||
| 297 | pdev = part_to_dev(p); | 299 | pdev = part_to_dev(p); |
| 298 | 300 | ||
| 299 | p->start_sect = start; | 301 | p->start_sect = start; |
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 2d1e68db9b3f..e894ca7b54c0 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c | |||
| @@ -4146,45 +4146,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) | |||
| 4146 | static void | 4146 | static void |
| 4147 | fc_bsg_remove(struct request_queue *q) | 4147 | fc_bsg_remove(struct request_queue *q) |
| 4148 | { | 4148 | { |
| 4149 | struct request *req; /* block request */ | ||
| 4150 | int counts; /* totals for request_list count and starved */ | ||
| 4151 | |||
| 4152 | if (q) { | 4149 | if (q) { |
| 4153 | /* Stop taking in new requests */ | ||
| 4154 | spin_lock_irq(q->queue_lock); | ||
| 4155 | blk_stop_queue(q); | ||
| 4156 | |||
| 4157 | /* drain all requests in the queue */ | ||
| 4158 | while (1) { | ||
| 4159 | /* need the lock to fetch a request | ||
| 4160 | * this may fetch the same reqeust as the previous pass | ||
| 4161 | */ | ||
| 4162 | req = blk_fetch_request(q); | ||
| 4163 | /* save requests in use and starved */ | ||
| 4164 | counts = q->rq.count[0] + q->rq.count[1] + | ||
| 4165 | q->rq.starved[0] + q->rq.starved[1]; | ||
| 4166 | spin_unlock_irq(q->queue_lock); | ||
| 4167 | /* any requests still outstanding? */ | ||
| 4168 | if (counts == 0) | ||
| 4169 | break; | ||
| 4170 | |||
| 4171 | /* This may be the same req as the previous iteration, | ||
| 4172 | * always send the blk_end_request_all after a prefetch. | ||
| 4173 | * It is not okay to not end the request because the | ||
| 4174 | * prefetch started the request. | ||
| 4175 | */ | ||
| 4176 | if (req) { | ||
| 4177 | /* return -ENXIO to indicate that this queue is | ||
| 4178 | * going away | ||
| 4179 | */ | ||
| 4180 | req->errors = -ENXIO; | ||
| 4181 | blk_end_request_all(req, -ENXIO); | ||
| 4182 | } | ||
| 4183 | |||
| 4184 | msleep(200); /* allow bsg to possibly finish */ | ||
| 4185 | spin_lock_irq(q->queue_lock); | ||
| 4186 | } | ||
| 4187 | |||
| 4188 | bsg_unregister_queue(q); | 4150 | bsg_unregister_queue(q); |
| 4189 | blk_cleanup_queue(q); | 4151 | blk_cleanup_queue(q); |
| 4190 | } | 4152 | } |
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 09809d06eccb..fa1dfaa83e32 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c | |||
| @@ -575,7 +575,7 @@ static int iscsi_remove_host(struct transport_container *tc, | |||
| 575 | struct iscsi_cls_host *ihost = shost->shost_data; | 575 | struct iscsi_cls_host *ihost = shost->shost_data; |
| 576 | 576 | ||
| 577 | if (ihost->bsg_q) { | 577 | if (ihost->bsg_q) { |
| 578 | bsg_remove_queue(ihost->bsg_q); | 578 | bsg_unregister_queue(ihost->bsg_q); |
| 579 | blk_cleanup_queue(ihost->bsg_q); | 579 | blk_cleanup_queue(ihost->bsg_q); |
| 580 | } | 580 | } |
| 581 | return 0; | 581 | return 0; |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 07954b05b86c..3816ce8a08fc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -46,16 +46,23 @@ struct blkcg_gq; | |||
| 46 | struct request; | 46 | struct request; |
| 47 | typedef void (rq_end_io_fn)(struct request *, int); | 47 | typedef void (rq_end_io_fn)(struct request *, int); |
| 48 | 48 | ||
| 49 | #define BLK_RL_SYNCFULL (1U << 0) | ||
| 50 | #define BLK_RL_ASYNCFULL (1U << 1) | ||
| 51 | |||
| 49 | struct request_list { | 52 | struct request_list { |
| 53 | struct request_queue *q; /* the queue this rl belongs to */ | ||
| 54 | #ifdef CONFIG_BLK_CGROUP | ||
| 55 | struct blkcg_gq *blkg; /* blkg this request pool belongs to */ | ||
| 56 | #endif | ||
| 50 | /* | 57 | /* |
| 51 | * count[], starved[], and wait[] are indexed by | 58 | * count[], starved[], and wait[] are indexed by |
| 52 | * BLK_RW_SYNC/BLK_RW_ASYNC | 59 | * BLK_RW_SYNC/BLK_RW_ASYNC |
| 53 | */ | 60 | */ |
| 54 | int count[2]; | 61 | int count[2]; |
| 55 | int starved[2]; | 62 | int starved[2]; |
| 56 | int elvpriv; | 63 | mempool_t *rq_pool; |
| 57 | mempool_t *rq_pool; | 64 | wait_queue_head_t wait[2]; |
| 58 | wait_queue_head_t wait[2]; | 65 | unsigned int flags; |
| 59 | }; | 66 | }; |
| 60 | 67 | ||
| 61 | /* | 68 | /* |
| @@ -138,6 +145,7 @@ struct request { | |||
| 138 | struct hd_struct *part; | 145 | struct hd_struct *part; |
| 139 | unsigned long start_time; | 146 | unsigned long start_time; |
| 140 | #ifdef CONFIG_BLK_CGROUP | 147 | #ifdef CONFIG_BLK_CGROUP |
| 148 | struct request_list *rl; /* rl this rq is alloced from */ | ||
| 141 | unsigned long long start_time_ns; | 149 | unsigned long long start_time_ns; |
| 142 | unsigned long long io_start_time_ns; /* when passed to hardware */ | 150 | unsigned long long io_start_time_ns; /* when passed to hardware */ |
| 143 | #endif | 151 | #endif |
| @@ -282,11 +290,16 @@ struct request_queue { | |||
| 282 | struct list_head queue_head; | 290 | struct list_head queue_head; |
| 283 | struct request *last_merge; | 291 | struct request *last_merge; |
| 284 | struct elevator_queue *elevator; | 292 | struct elevator_queue *elevator; |
| 293 | int nr_rqs[2]; /* # allocated [a]sync rqs */ | ||
| 294 | int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ | ||
| 285 | 295 | ||
| 286 | /* | 296 | /* |
| 287 | * the queue request freelist, one for reads and one for writes | 297 | * If blkcg is not used, @q->root_rl serves all requests. If blkcg |
| 298 | * is used, root blkg allocates from @q->root_rl and all other | ||
| 299 | * blkgs from their own blkg->rl. Which one to use should be | ||
| 300 | * determined using bio_request_list(). | ||
| 288 | */ | 301 | */ |
| 289 | struct request_list rq; | 302 | struct request_list root_rl; |
| 290 | 303 | ||
| 291 | request_fn_proc *request_fn; | 304 | request_fn_proc *request_fn; |
| 292 | make_request_fn *make_request_fn; | 305 | make_request_fn *make_request_fn; |
| @@ -561,27 +574,25 @@ static inline bool rq_is_sync(struct request *rq) | |||
| 561 | return rw_is_sync(rq->cmd_flags); | 574 | return rw_is_sync(rq->cmd_flags); |
| 562 | } | 575 | } |
| 563 | 576 | ||
| 564 | static inline int blk_queue_full(struct request_queue *q, int sync) | 577 | static inline bool blk_rl_full(struct request_list *rl, bool sync) |
| 565 | { | 578 | { |
| 566 | if (sync) | 579 | unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; |
| 567 | return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); | 580 | |
| 568 | return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); | 581 | return rl->flags & flag; |
| 569 | } | 582 | } |
| 570 | 583 | ||
| 571 | static inline void blk_set_queue_full(struct request_queue *q, int sync) | 584 | static inline void blk_set_rl_full(struct request_list *rl, bool sync) |
| 572 | { | 585 | { |
| 573 | if (sync) | 586 | unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; |
| 574 | queue_flag_set(QUEUE_FLAG_SYNCFULL, q); | 587 | |
| 575 | else | 588 | rl->flags |= flag; |
| 576 | queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); | ||
| 577 | } | 589 | } |
| 578 | 590 | ||
| 579 | static inline void blk_clear_queue_full(struct request_queue *q, int sync) | 591 | static inline void blk_clear_rl_full(struct request_list *rl, bool sync) |
| 580 | { | 592 | { |
| 581 | if (sync) | 593 | unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; |
| 582 | queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); | 594 | |
| 583 | else | 595 | rl->flags &= ~flag; |
| 584 | queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); | ||
| 585 | } | 596 | } |
| 586 | 597 | ||
| 587 | 598 | ||
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h index faf8a45af210..a8519446c111 100644 --- a/include/linux/blkpg.h +++ b/include/linux/blkpg.h | |||
| @@ -40,6 +40,7 @@ struct blkpg_ioctl_arg { | |||
| 40 | /* The subfunctions (for the op field) */ | 40 | /* The subfunctions (for the op field) */ |
| 41 | #define BLKPG_ADD_PARTITION 1 | 41 | #define BLKPG_ADD_PARTITION 1 |
| 42 | #define BLKPG_DEL_PARTITION 2 | 42 | #define BLKPG_DEL_PARTITION 2 |
| 43 | #define BLKPG_RESIZE_PARTITION 3 | ||
| 43 | 44 | ||
| 44 | /* Sizes of name fields. Unused at present. */ | 45 | /* Sizes of name fields. Unused at present. */ |
| 45 | #define BLKPG_DEVNAMELTH 64 | 46 | #define BLKPG_DEVNAMELTH 64 |
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index f55ab8cdc106..4d0fb3df2f4a 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h | |||
| @@ -67,7 +67,6 @@ void bsg_job_done(struct bsg_job *job, int result, | |||
| 67 | int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, | 67 | int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, |
| 68 | bsg_job_fn *job_fn, int dd_job_size); | 68 | bsg_job_fn *job_fn, int dd_job_size); |
| 69 | void bsg_request_fn(struct request_queue *q); | 69 | void bsg_request_fn(struct request_queue *q); |
| 70 | void bsg_remove_queue(struct request_queue *q); | ||
| 71 | void bsg_goose_queue(struct request_queue *q); | 70 | void bsg_goose_queue(struct request_queue *q); |
| 72 | 71 | ||
| 73 | #endif | 72 | #endif |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ae0aaa9d42fa..4f440b3e89fe 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
| @@ -97,7 +97,13 @@ struct partition_meta_info { | |||
| 97 | 97 | ||
| 98 | struct hd_struct { | 98 | struct hd_struct { |
| 99 | sector_t start_sect; | 99 | sector_t start_sect; |
| 100 | /* | ||
| 101 | * nr_sects is protected by sequence counter. One might extend a | ||
| 102 | * partition while IO is happening to it and update of nr_sects | ||
| 103 | * can be non-atomic on 32bit machines with 64bit sector_t. | ||
| 104 | */ | ||
| 100 | sector_t nr_sects; | 105 | sector_t nr_sects; |
| 106 | seqcount_t nr_sects_seq; | ||
| 101 | sector_t alignment_offset; | 107 | sector_t alignment_offset; |
| 102 | unsigned int discard_alignment; | 108 | unsigned int discard_alignment; |
| 103 | struct device __dev; | 109 | struct device __dev; |
| @@ -647,6 +653,57 @@ static inline void hd_struct_put(struct hd_struct *part) | |||
| 647 | __delete_partition(part); | 653 | __delete_partition(part); |
| 648 | } | 654 | } |
| 649 | 655 | ||
| 656 | /* | ||
| 657 | * Any access of part->nr_sects which is not protected by partition | ||
| 658 | * bd_mutex or gendisk bdev bd_mutex, should be done using this | ||
| 659 | * accessor function. | ||
| 660 | * | ||
| 661 | * Code written along the lines of i_size_read() and i_size_write(). | ||
| 662 | * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption | ||
| 663 | * on. | ||
| 664 | */ | ||
| 665 | static inline sector_t part_nr_sects_read(struct hd_struct *part) | ||
| 666 | { | ||
| 667 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | ||
| 668 | sector_t nr_sects; | ||
| 669 | unsigned seq; | ||
| 670 | do { | ||
| 671 | seq = read_seqcount_begin(&part->nr_sects_seq); | ||
| 672 | nr_sects = part->nr_sects; | ||
| 673 | } while (read_seqcount_retry(&part->nr_sects_seq, seq)); | ||
| 674 | return nr_sects; | ||
| 675 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | ||
| 676 | sector_t nr_sects; | ||
| 677 | |||
| 678 | preempt_disable(); | ||
| 679 | nr_sects = part->nr_sects; | ||
| 680 | preempt_enable(); | ||
| 681 | return nr_sects; | ||
| 682 | #else | ||
| 683 | return part->nr_sects; | ||
| 684 | #endif | ||
| 685 | } | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Should be called with mutex lock held (typically bd_mutex) of partition | ||
| 689 | * to provide mutual exlusion among writers otherwise seqcount might be | ||
| 690 | * left in wrong state leaving the readers spinning infinitely. | ||
| 691 | */ | ||
| 692 | static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) | ||
| 693 | { | ||
| 694 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | ||
| 695 | write_seqcount_begin(&part->nr_sects_seq); | ||
| 696 | part->nr_sects = size; | ||
| 697 | write_seqcount_end(&part->nr_sects_seq); | ||
| 698 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | ||
| 699 | preempt_disable(); | ||
| 700 | part->nr_sects = size; | ||
| 701 | preempt_enable(); | ||
| 702 | #else | ||
| 703 | part->nr_sects = size; | ||
| 704 | #endif | ||
| 705 | } | ||
| 706 | |||
| 650 | #else /* CONFIG_BLOCK */ | 707 | #else /* CONFIG_BLOCK */ |
| 651 | 708 | ||
| 652 | static inline void printk_all_partitions(void) { } | 709 | static inline void printk_all_partitions(void) { } |
diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7c08052e3321..39ed62ab5b8a 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h | |||
| @@ -26,7 +26,8 @@ typedef struct mempool_s { | |||
| 26 | extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, | 26 | extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, |
| 27 | mempool_free_t *free_fn, void *pool_data); | 27 | mempool_free_t *free_fn, void *pool_data); |
| 28 | extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | 28 | extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, |
| 29 | mempool_free_t *free_fn, void *pool_data, int nid); | 29 | mempool_free_t *free_fn, void *pool_data, |
| 30 | gfp_t gfp_mask, int nid); | ||
| 30 | 31 | ||
| 31 | extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); | 32 | extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); |
| 32 | extern void mempool_destroy(mempool_t *pool); | 33 | extern void mempool_destroy(mempool_t *pool); |
diff --git a/mm/mempool.c b/mm/mempool.c index d9049811f352..54990476c049 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
| @@ -63,19 +63,21 @@ EXPORT_SYMBOL(mempool_destroy); | |||
| 63 | mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, | 63 | mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, |
| 64 | mempool_free_t *free_fn, void *pool_data) | 64 | mempool_free_t *free_fn, void *pool_data) |
| 65 | { | 65 | { |
| 66 | return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1); | 66 | return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data, |
| 67 | GFP_KERNEL, NUMA_NO_NODE); | ||
| 67 | } | 68 | } |
| 68 | EXPORT_SYMBOL(mempool_create); | 69 | EXPORT_SYMBOL(mempool_create); |
| 69 | 70 | ||
| 70 | mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | 71 | mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, |
| 71 | mempool_free_t *free_fn, void *pool_data, int node_id) | 72 | mempool_free_t *free_fn, void *pool_data, |
| 73 | gfp_t gfp_mask, int node_id) | ||
| 72 | { | 74 | { |
| 73 | mempool_t *pool; | 75 | mempool_t *pool; |
| 74 | pool = kmalloc_node(sizeof(*pool), GFP_KERNEL | __GFP_ZERO, node_id); | 76 | pool = kmalloc_node(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id); |
| 75 | if (!pool) | 77 | if (!pool) |
| 76 | return NULL; | 78 | return NULL; |
| 77 | pool->elements = kmalloc_node(min_nr * sizeof(void *), | 79 | pool->elements = kmalloc_node(min_nr * sizeof(void *), |
| 78 | GFP_KERNEL, node_id); | 80 | gfp_mask, node_id); |
| 79 | if (!pool->elements) { | 81 | if (!pool->elements) { |
| 80 | kfree(pool); | 82 | kfree(pool); |
| 81 | return NULL; | 83 | return NULL; |
| @@ -93,7 +95,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, | |||
| 93 | while (pool->curr_nr < pool->min_nr) { | 95 | while (pool->curr_nr < pool->min_nr) { |
| 94 | void *element; | 96 | void *element; |
| 95 | 97 | ||
| 96 | element = pool->alloc(GFP_KERNEL, pool->pool_data); | 98 | element = pool->alloc(gfp_mask, pool->pool_data); |
| 97 | if (unlikely(!element)) { | 99 | if (unlikely(!element)) { |
| 98 | mempool_destroy(pool); | 100 | mempool_destroy(pool); |
| 99 | return NULL; | 101 | return NULL; |
