diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-04 20:06:58 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-04 20:06:58 -0400 |
commit | b4fdcb02f1e39c27058a885905bd0277370ba441 (patch) | |
tree | fd4cfd1994f21f44afe5e7904681fb5ac09f81b8 /block | |
parent | 044595d4e448305fbaec472eb7d22636d24e7d8c (diff) | |
parent | 6dd9ad7df2019b1e33a372a501907db293ebcd0d (diff) |
Merge branch 'for-3.2/core' of git://git.kernel.dk/linux-block
* 'for-3.2/core' of git://git.kernel.dk/linux-block: (29 commits)
block: don't call blk_drain_queue() if elevator is not up
blk-throttle: use queue_is_locked() instead of lockdep_is_held()
blk-throttle: Take blkcg->lock while traversing blkcg->policy_list
blk-throttle: Free up policy node associated with deleted rule
block: warn if tag is greater than real_max_depth.
block: make gendisk hold a reference to its queue
blk-flush: move the queue kick into
blk-flush: fix invalid BUG_ON in blk_insert_flush
block: Remove the control of complete cpu from bio.
block: fix a typo in the blk-cgroup.h file
block: initialize the bounce pool if high memory may be added later
block: fix request_queue lifetime handling by making blk_queue_cleanup() properly shutdown
block: drop @tsk from attempt_plug_merge() and explain sync rules
block: make get_request[_wait]() fail if queue is dead
block: reorganize throtl_get_tg() and blk_throtl_bio()
block: reorganize queue draining
block: drop unnecessary blk_get/put_queue() in scsi_cmd_ioctl() and blk_get_tg()
block: pass around REQ_* flags instead of broken down booleans during request alloc/free
block: move blk_throtl prototypes to block/blk.h
block: fix genhd refcounting in blkio_policy_parse_and_set()
...
Fix up trivial conflicts due to "mddev_t" -> "struct mddev" conversion
and making the request functions be of type "void" instead of "int" in
- drivers/md/{faulty.c,linear.c,md.c,md.h,multipath.c,raid0.c,raid1.c,raid10.c,raid5.c}
- drivers/staging/zram/zram_drv.c
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 111 | ||||
-rw-r--r-- | block/blk-cgroup.h | 2 | ||||
-rw-r--r-- | block/blk-core.c | 461 | ||||
-rw-r--r-- | block/blk-flush.c | 3 | ||||
-rw-r--r-- | block/blk-sysfs.c | 7 | ||||
-rw-r--r-- | block/blk-tag.c | 6 | ||||
-rw-r--r-- | block/blk-throttle.c | 106 | ||||
-rw-r--r-- | block/blk.h | 20 | ||||
-rw-r--r-- | block/elevator.c | 39 | ||||
-rw-r--r-- | block/genhd.c | 8 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 3 |
11 files changed, 439 insertions, 327 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b596e54ddd71..8f630cec906e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -768,25 +768,14 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, | |||
768 | return disk_total; | 768 | return disk_total; |
769 | } | 769 | } |
770 | 770 | ||
771 | static int blkio_check_dev_num(dev_t dev) | ||
772 | { | ||
773 | int part = 0; | ||
774 | struct gendisk *disk; | ||
775 | |||
776 | disk = get_gendisk(dev, &part); | ||
777 | if (!disk || part) | ||
778 | return -ENODEV; | ||
779 | |||
780 | return 0; | ||
781 | } | ||
782 | |||
783 | static int blkio_policy_parse_and_set(char *buf, | 771 | static int blkio_policy_parse_and_set(char *buf, |
784 | struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) | 772 | struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) |
785 | { | 773 | { |
774 | struct gendisk *disk = NULL; | ||
786 | char *s[4], *p, *major_s = NULL, *minor_s = NULL; | 775 | char *s[4], *p, *major_s = NULL, *minor_s = NULL; |
787 | int ret; | ||
788 | unsigned long major, minor; | 776 | unsigned long major, minor; |
789 | int i = 0; | 777 | int i = 0, ret = -EINVAL; |
778 | int part; | ||
790 | dev_t dev; | 779 | dev_t dev; |
791 | u64 temp; | 780 | u64 temp; |
792 | 781 | ||
@@ -804,37 +793,36 @@ static int blkio_policy_parse_and_set(char *buf, | |||
804 | } | 793 | } |
805 | 794 | ||
806 | if (i != 2) | 795 | if (i != 2) |
807 | return -EINVAL; | 796 | goto out; |
808 | 797 | ||
809 | p = strsep(&s[0], ":"); | 798 | p = strsep(&s[0], ":"); |
810 | if (p != NULL) | 799 | if (p != NULL) |
811 | major_s = p; | 800 | major_s = p; |
812 | else | 801 | else |
813 | return -EINVAL; | 802 | goto out; |
814 | 803 | ||
815 | minor_s = s[0]; | 804 | minor_s = s[0]; |
816 | if (!minor_s) | 805 | if (!minor_s) |
817 | return -EINVAL; | 806 | goto out; |
818 | 807 | ||
819 | ret = strict_strtoul(major_s, 10, &major); | 808 | if (strict_strtoul(major_s, 10, &major)) |
820 | if (ret) | 809 | goto out; |
821 | return -EINVAL; | ||
822 | 810 | ||
823 | ret = strict_strtoul(minor_s, 10, &minor); | 811 | if (strict_strtoul(minor_s, 10, &minor)) |
824 | if (ret) | 812 | goto out; |
825 | return -EINVAL; | ||
826 | 813 | ||
827 | dev = MKDEV(major, minor); | 814 | dev = MKDEV(major, minor); |
828 | 815 | ||
829 | ret = strict_strtoull(s[1], 10, &temp); | 816 | if (strict_strtoull(s[1], 10, &temp)) |
830 | if (ret) | 817 | goto out; |
831 | return -EINVAL; | ||
832 | 818 | ||
833 | /* For rule removal, do not check for device presence. */ | 819 | /* For rule removal, do not check for device presence. */ |
834 | if (temp) { | 820 | if (temp) { |
835 | ret = blkio_check_dev_num(dev); | 821 | disk = get_gendisk(dev, &part); |
836 | if (ret) | 822 | if (!disk || part) { |
837 | return ret; | 823 | ret = -ENODEV; |
824 | goto out; | ||
825 | } | ||
838 | } | 826 | } |
839 | 827 | ||
840 | newpn->dev = dev; | 828 | newpn->dev = dev; |
@@ -843,7 +831,7 @@ static int blkio_policy_parse_and_set(char *buf, | |||
843 | case BLKIO_POLICY_PROP: | 831 | case BLKIO_POLICY_PROP: |
844 | if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || | 832 | if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || |
845 | temp > BLKIO_WEIGHT_MAX) | 833 | temp > BLKIO_WEIGHT_MAX) |
846 | return -EINVAL; | 834 | goto out; |
847 | 835 | ||
848 | newpn->plid = plid; | 836 | newpn->plid = plid; |
849 | newpn->fileid = fileid; | 837 | newpn->fileid = fileid; |
@@ -860,7 +848,7 @@ static int blkio_policy_parse_and_set(char *buf, | |||
860 | case BLKIO_THROTL_read_iops_device: | 848 | case BLKIO_THROTL_read_iops_device: |
861 | case BLKIO_THROTL_write_iops_device: | 849 | case BLKIO_THROTL_write_iops_device: |
862 | if (temp > THROTL_IOPS_MAX) | 850 | if (temp > THROTL_IOPS_MAX) |
863 | return -EINVAL; | 851 | goto out; |
864 | 852 | ||
865 | newpn->plid = plid; | 853 | newpn->plid = plid; |
866 | newpn->fileid = fileid; | 854 | newpn->fileid = fileid; |
@@ -871,68 +859,96 @@ static int blkio_policy_parse_and_set(char *buf, | |||
871 | default: | 859 | default: |
872 | BUG(); | 860 | BUG(); |
873 | } | 861 | } |
874 | 862 | ret = 0; | |
875 | return 0; | 863 | out: |
864 | put_disk(disk); | ||
865 | return ret; | ||
876 | } | 866 | } |
877 | 867 | ||
878 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | 868 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, |
879 | dev_t dev) | 869 | dev_t dev) |
880 | { | 870 | { |
881 | struct blkio_policy_node *pn; | 871 | struct blkio_policy_node *pn; |
872 | unsigned long flags; | ||
873 | unsigned int weight; | ||
874 | |||
875 | spin_lock_irqsave(&blkcg->lock, flags); | ||
882 | 876 | ||
883 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, | 877 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, |
884 | BLKIO_PROP_weight_device); | 878 | BLKIO_PROP_weight_device); |
885 | if (pn) | 879 | if (pn) |
886 | return pn->val.weight; | 880 | weight = pn->val.weight; |
887 | else | 881 | else |
888 | return blkcg->weight; | 882 | weight = blkcg->weight; |
883 | |||
884 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
885 | |||
886 | return weight; | ||
889 | } | 887 | } |
890 | EXPORT_SYMBOL_GPL(blkcg_get_weight); | 888 | EXPORT_SYMBOL_GPL(blkcg_get_weight); |
891 | 889 | ||
892 | uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) | 890 | uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) |
893 | { | 891 | { |
894 | struct blkio_policy_node *pn; | 892 | struct blkio_policy_node *pn; |
893 | unsigned long flags; | ||
894 | uint64_t bps = -1; | ||
895 | 895 | ||
896 | spin_lock_irqsave(&blkcg->lock, flags); | ||
896 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, | 897 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, |
897 | BLKIO_THROTL_read_bps_device); | 898 | BLKIO_THROTL_read_bps_device); |
898 | if (pn) | 899 | if (pn) |
899 | return pn->val.bps; | 900 | bps = pn->val.bps; |
900 | else | 901 | spin_unlock_irqrestore(&blkcg->lock, flags); |
901 | return -1; | 902 | |
903 | return bps; | ||
902 | } | 904 | } |
903 | 905 | ||
904 | uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) | 906 | uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) |
905 | { | 907 | { |
906 | struct blkio_policy_node *pn; | 908 | struct blkio_policy_node *pn; |
909 | unsigned long flags; | ||
910 | uint64_t bps = -1; | ||
911 | |||
912 | spin_lock_irqsave(&blkcg->lock, flags); | ||
907 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, | 913 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, |
908 | BLKIO_THROTL_write_bps_device); | 914 | BLKIO_THROTL_write_bps_device); |
909 | if (pn) | 915 | if (pn) |
910 | return pn->val.bps; | 916 | bps = pn->val.bps; |
911 | else | 917 | spin_unlock_irqrestore(&blkcg->lock, flags); |
912 | return -1; | 918 | |
919 | return bps; | ||
913 | } | 920 | } |
914 | 921 | ||
915 | unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) | 922 | unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) |
916 | { | 923 | { |
917 | struct blkio_policy_node *pn; | 924 | struct blkio_policy_node *pn; |
925 | unsigned long flags; | ||
926 | unsigned int iops = -1; | ||
918 | 927 | ||
928 | spin_lock_irqsave(&blkcg->lock, flags); | ||
919 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, | 929 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, |
920 | BLKIO_THROTL_read_iops_device); | 930 | BLKIO_THROTL_read_iops_device); |
921 | if (pn) | 931 | if (pn) |
922 | return pn->val.iops; | 932 | iops = pn->val.iops; |
923 | else | 933 | spin_unlock_irqrestore(&blkcg->lock, flags); |
924 | return -1; | 934 | |
935 | return iops; | ||
925 | } | 936 | } |
926 | 937 | ||
927 | unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) | 938 | unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) |
928 | { | 939 | { |
929 | struct blkio_policy_node *pn; | 940 | struct blkio_policy_node *pn; |
941 | unsigned long flags; | ||
942 | unsigned int iops = -1; | ||
943 | |||
944 | spin_lock_irqsave(&blkcg->lock, flags); | ||
930 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, | 945 | pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, |
931 | BLKIO_THROTL_write_iops_device); | 946 | BLKIO_THROTL_write_iops_device); |
932 | if (pn) | 947 | if (pn) |
933 | return pn->val.iops; | 948 | iops = pn->val.iops; |
934 | else | 949 | spin_unlock_irqrestore(&blkcg->lock, flags); |
935 | return -1; | 950 | |
951 | return iops; | ||
936 | } | 952 | } |
937 | 953 | ||
938 | /* Checks whether user asked for deleting a policy rule */ | 954 | /* Checks whether user asked for deleting a policy rule */ |
@@ -1085,6 +1101,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, | |||
1085 | 1101 | ||
1086 | if (blkio_delete_rule_command(newpn)) { | 1102 | if (blkio_delete_rule_command(newpn)) { |
1087 | blkio_policy_delete_node(pn); | 1103 | blkio_policy_delete_node(pn); |
1104 | kfree(pn); | ||
1088 | spin_unlock_irq(&blkcg->lock); | 1105 | spin_unlock_irq(&blkcg->lock); |
1089 | goto update_io_group; | 1106 | goto update_io_group; |
1090 | } | 1107 | } |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index a71d2904ffb9..6f3ace7e792f 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -188,7 +188,7 @@ struct blkio_policy_node { | |||
188 | union { | 188 | union { |
189 | unsigned int weight; | 189 | unsigned int weight; |
190 | /* | 190 | /* |
191 | * Rate read/write in terms of byptes per second | 191 | * Rate read/write in terms of bytes per second |
192 | * Whether this rate represents read or write is determined | 192 | * Whether this rate represents read or write is determined |
193 | * by file type "fileid". | 193 | * by file type "fileid". |
194 | */ | 194 | */ |
diff --git a/block/blk-core.c b/block/blk-core.c index d34433ae7917..f43c8a5840ae 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | #include <linux/list_sort.h> | 30 | #include <linux/list_sort.h> |
31 | #include <linux/delay.h> | ||
31 | 32 | ||
32 | #define CREATE_TRACE_POINTS | 33 | #define CREATE_TRACE_POINTS |
33 | #include <trace/events/block.h> | 34 | #include <trace/events/block.h> |
@@ -38,8 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | |||
38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
40 | 41 | ||
41 | static int __make_request(struct request_queue *q, struct bio *bio); | ||
42 | |||
43 | /* | 42 | /* |
44 | * For the allocated request tables | 43 | * For the allocated request tables |
45 | */ | 44 | */ |
@@ -347,30 +346,80 @@ void blk_put_queue(struct request_queue *q) | |||
347 | } | 346 | } |
348 | EXPORT_SYMBOL(blk_put_queue); | 347 | EXPORT_SYMBOL(blk_put_queue); |
349 | 348 | ||
350 | /* | 349 | /** |
351 | * Note: If a driver supplied the queue lock, it is disconnected | 350 | * blk_drain_queue - drain requests from request_queue |
352 | * by this function. The actual state of the lock doesn't matter | 351 | * @q: queue to drain |
353 | * here as the request_queue isn't accessible after this point | 352 | * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV |
354 | * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. | 353 | * |
354 | * Drain requests from @q. If @drain_all is set, all requests are drained. | ||
355 | * If not, only ELVPRIV requests are drained. The caller is responsible | ||
356 | * for ensuring that no new requests which need to be drained are queued. | ||
357 | */ | ||
358 | void blk_drain_queue(struct request_queue *q, bool drain_all) | ||
359 | { | ||
360 | while (true) { | ||
361 | int nr_rqs; | ||
362 | |||
363 | spin_lock_irq(q->queue_lock); | ||
364 | |||
365 | elv_drain_elevator(q); | ||
366 | if (drain_all) | ||
367 | blk_throtl_drain(q); | ||
368 | |||
369 | __blk_run_queue(q); | ||
370 | |||
371 | if (drain_all) | ||
372 | nr_rqs = q->rq.count[0] + q->rq.count[1]; | ||
373 | else | ||
374 | nr_rqs = q->rq.elvpriv; | ||
375 | |||
376 | spin_unlock_irq(q->queue_lock); | ||
377 | |||
378 | if (!nr_rqs) | ||
379 | break; | ||
380 | msleep(10); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /** | ||
385 | * blk_cleanup_queue - shutdown a request queue | ||
386 | * @q: request queue to shutdown | ||
387 | * | ||
388 | * Mark @q DEAD, drain all pending requests, destroy and put it. All | ||
389 | * future requests will be failed immediately with -ENODEV. | ||
355 | */ | 390 | */ |
356 | void blk_cleanup_queue(struct request_queue *q) | 391 | void blk_cleanup_queue(struct request_queue *q) |
357 | { | 392 | { |
358 | /* | 393 | spinlock_t *lock = q->queue_lock; |
359 | * We know we have process context here, so we can be a little | ||
360 | * cautious and ensure that pending block actions on this device | ||
361 | * are done before moving on. Going into this function, we should | ||
362 | * not have processes doing IO to this device. | ||
363 | */ | ||
364 | blk_sync_queue(q); | ||
365 | 394 | ||
366 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | 395 | /* mark @q DEAD, no new request or merges will be allowed afterwards */ |
367 | mutex_lock(&q->sysfs_lock); | 396 | mutex_lock(&q->sysfs_lock); |
368 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 397 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
369 | mutex_unlock(&q->sysfs_lock); | 398 | |
399 | spin_lock_irq(lock); | ||
400 | queue_flag_set(QUEUE_FLAG_NOMERGES, q); | ||
401 | queue_flag_set(QUEUE_FLAG_NOXMERGES, q); | ||
402 | queue_flag_set(QUEUE_FLAG_DEAD, q); | ||
370 | 403 | ||
371 | if (q->queue_lock != &q->__queue_lock) | 404 | if (q->queue_lock != &q->__queue_lock) |
372 | q->queue_lock = &q->__queue_lock; | 405 | q->queue_lock = &q->__queue_lock; |
373 | 406 | ||
407 | spin_unlock_irq(lock); | ||
408 | mutex_unlock(&q->sysfs_lock); | ||
409 | |||
410 | /* | ||
411 | * Drain all requests queued before DEAD marking. The caller might | ||
412 | * be trying to tear down @q before its elevator is initialized, in | ||
413 | * which case we don't want to call into draining. | ||
414 | */ | ||
415 | if (q->elevator) | ||
416 | blk_drain_queue(q, true); | ||
417 | |||
418 | /* @q won't process any more request, flush async actions */ | ||
419 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | ||
420 | blk_sync_queue(q); | ||
421 | |||
422 | /* @q is and will stay empty, shutdown and put */ | ||
374 | blk_put_queue(q); | 423 | blk_put_queue(q); |
375 | } | 424 | } |
376 | EXPORT_SYMBOL(blk_cleanup_queue); | 425 | EXPORT_SYMBOL(blk_cleanup_queue); |
@@ -541,7 +590,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | |||
541 | /* | 590 | /* |
542 | * This also sets hw/phys segments, boundary and size | 591 | * This also sets hw/phys segments, boundary and size |
543 | */ | 592 | */ |
544 | blk_queue_make_request(q, __make_request); | 593 | blk_queue_make_request(q, blk_queue_bio); |
545 | 594 | ||
546 | q->sg_reserved_size = INT_MAX; | 595 | q->sg_reserved_size = INT_MAX; |
547 | 596 | ||
@@ -576,7 +625,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) | |||
576 | } | 625 | } |
577 | 626 | ||
578 | static struct request * | 627 | static struct request * |
579 | blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) | 628 | blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) |
580 | { | 629 | { |
581 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 630 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
582 | 631 | ||
@@ -587,12 +636,10 @@ blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) | |||
587 | 636 | ||
588 | rq->cmd_flags = flags | REQ_ALLOCED; | 637 | rq->cmd_flags = flags | REQ_ALLOCED; |
589 | 638 | ||
590 | if (priv) { | 639 | if ((flags & REQ_ELVPRIV) && |
591 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { | 640 | unlikely(elv_set_request(q, rq, gfp_mask))) { |
592 | mempool_free(rq, q->rq.rq_pool); | 641 | mempool_free(rq, q->rq.rq_pool); |
593 | return NULL; | 642 | return NULL; |
594 | } | ||
595 | rq->cmd_flags |= REQ_ELVPRIV; | ||
596 | } | 643 | } |
597 | 644 | ||
598 | return rq; | 645 | return rq; |
@@ -651,12 +698,13 @@ static void __freed_request(struct request_queue *q, int sync) | |||
651 | * A request has just been released. Account for it, update the full and | 698 | * A request has just been released. Account for it, update the full and |
652 | * congestion status, wake up any waiters. Called under q->queue_lock. | 699 | * congestion status, wake up any waiters. Called under q->queue_lock. |
653 | */ | 700 | */ |
654 | static void freed_request(struct request_queue *q, int sync, int priv) | 701 | static void freed_request(struct request_queue *q, unsigned int flags) |
655 | { | 702 | { |
656 | struct request_list *rl = &q->rq; | 703 | struct request_list *rl = &q->rq; |
704 | int sync = rw_is_sync(flags); | ||
657 | 705 | ||
658 | rl->count[sync]--; | 706 | rl->count[sync]--; |
659 | if (priv) | 707 | if (flags & REQ_ELVPRIV) |
660 | rl->elvpriv--; | 708 | rl->elvpriv--; |
661 | 709 | ||
662 | __freed_request(q, sync); | 710 | __freed_request(q, sync); |
@@ -684,10 +732,19 @@ static bool blk_rq_should_init_elevator(struct bio *bio) | |||
684 | return true; | 732 | return true; |
685 | } | 733 | } |
686 | 734 | ||
687 | /* | 735 | /** |
688 | * Get a free request, queue_lock must be held. | 736 | * get_request - get a free request |
689 | * Returns NULL on failure, with queue_lock held. | 737 | * @q: request_queue to allocate request from |
690 | * Returns !NULL on success, with queue_lock *not held*. | 738 | * @rw_flags: RW and SYNC flags |
739 | * @bio: bio to allocate request for (can be %NULL) | ||
740 | * @gfp_mask: allocation mask | ||
741 | * | ||
742 | * Get a free request from @q. This function may fail under memory | ||
743 | * pressure or if @q is dead. | ||
744 | * | ||
745 | * Must be callled with @q->queue_lock held and, | ||
746 | * Returns %NULL on failure, with @q->queue_lock held. | ||
747 | * Returns !%NULL on success, with @q->queue_lock *not held*. | ||
691 | */ | 748 | */ |
692 | static struct request *get_request(struct request_queue *q, int rw_flags, | 749 | static struct request *get_request(struct request_queue *q, int rw_flags, |
693 | struct bio *bio, gfp_t gfp_mask) | 750 | struct bio *bio, gfp_t gfp_mask) |
@@ -696,7 +753,10 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
696 | struct request_list *rl = &q->rq; | 753 | struct request_list *rl = &q->rq; |
697 | struct io_context *ioc = NULL; | 754 | struct io_context *ioc = NULL; |
698 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 755 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
699 | int may_queue, priv = 0; | 756 | int may_queue; |
757 | |||
758 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | ||
759 | return NULL; | ||
700 | 760 | ||
701 | may_queue = elv_may_queue(q, rw_flags); | 761 | may_queue = elv_may_queue(q, rw_flags); |
702 | if (may_queue == ELV_MQUEUE_NO) | 762 | if (may_queue == ELV_MQUEUE_NO) |
@@ -740,17 +800,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
740 | rl->count[is_sync]++; | 800 | rl->count[is_sync]++; |
741 | rl->starved[is_sync] = 0; | 801 | rl->starved[is_sync] = 0; |
742 | 802 | ||
743 | if (blk_rq_should_init_elevator(bio)) { | 803 | if (blk_rq_should_init_elevator(bio) && |
744 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 804 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { |
745 | if (priv) | 805 | rw_flags |= REQ_ELVPRIV; |
746 | rl->elvpriv++; | 806 | rl->elvpriv++; |
747 | } | 807 | } |
748 | 808 | ||
749 | if (blk_queue_io_stat(q)) | 809 | if (blk_queue_io_stat(q)) |
750 | rw_flags |= REQ_IO_STAT; | 810 | rw_flags |= REQ_IO_STAT; |
751 | spin_unlock_irq(q->queue_lock); | 811 | spin_unlock_irq(q->queue_lock); |
752 | 812 | ||
753 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); | 813 | rq = blk_alloc_request(q, rw_flags, gfp_mask); |
754 | if (unlikely(!rq)) { | 814 | if (unlikely(!rq)) { |
755 | /* | 815 | /* |
756 | * Allocation failed presumably due to memory. Undo anything | 816 | * Allocation failed presumably due to memory. Undo anything |
@@ -760,7 +820,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
760 | * wait queue, but this is pretty rare. | 820 | * wait queue, but this is pretty rare. |
761 | */ | 821 | */ |
762 | spin_lock_irq(q->queue_lock); | 822 | spin_lock_irq(q->queue_lock); |
763 | freed_request(q, is_sync, priv); | 823 | freed_request(q, rw_flags); |
764 | 824 | ||
765 | /* | 825 | /* |
766 | * in the very unlikely event that allocation failed and no | 826 | * in the very unlikely event that allocation failed and no |
@@ -790,11 +850,18 @@ out: | |||
790 | return rq; | 850 | return rq; |
791 | } | 851 | } |
792 | 852 | ||
793 | /* | 853 | /** |
794 | * No available requests for this queue, wait for some requests to become | 854 | * get_request_wait - get a free request with retry |
795 | * available. | 855 | * @q: request_queue to allocate request from |
856 | * @rw_flags: RW and SYNC flags | ||
857 | * @bio: bio to allocate request for (can be %NULL) | ||
858 | * | ||
859 | * Get a free request from @q. This function keeps retrying under memory | ||
860 | * pressure and fails iff @q is dead. | ||
796 | * | 861 | * |
797 | * Called with q->queue_lock held, and returns with it unlocked. | 862 | * Must be callled with @q->queue_lock held and, |
863 | * Returns %NULL on failure, with @q->queue_lock held. | ||
864 | * Returns !%NULL on success, with @q->queue_lock *not held*. | ||
798 | */ | 865 | */ |
799 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 866 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, |
800 | struct bio *bio) | 867 | struct bio *bio) |
@@ -808,6 +875,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
808 | struct io_context *ioc; | 875 | struct io_context *ioc; |
809 | struct request_list *rl = &q->rq; | 876 | struct request_list *rl = &q->rq; |
810 | 877 | ||
878 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | ||
879 | return NULL; | ||
880 | |||
811 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 881 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
812 | TASK_UNINTERRUPTIBLE); | 882 | TASK_UNINTERRUPTIBLE); |
813 | 883 | ||
@@ -838,19 +908,15 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | |||
838 | { | 908 | { |
839 | struct request *rq; | 909 | struct request *rq; |
840 | 910 | ||
841 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | ||
842 | return NULL; | ||
843 | |||
844 | BUG_ON(rw != READ && rw != WRITE); | 911 | BUG_ON(rw != READ && rw != WRITE); |
845 | 912 | ||
846 | spin_lock_irq(q->queue_lock); | 913 | spin_lock_irq(q->queue_lock); |
847 | if (gfp_mask & __GFP_WAIT) { | 914 | if (gfp_mask & __GFP_WAIT) |
848 | rq = get_request_wait(q, rw, NULL); | 915 | rq = get_request_wait(q, rw, NULL); |
849 | } else { | 916 | else |
850 | rq = get_request(q, rw, NULL, gfp_mask); | 917 | rq = get_request(q, rw, NULL, gfp_mask); |
851 | if (!rq) | 918 | if (!rq) |
852 | spin_unlock_irq(q->queue_lock); | 919 | spin_unlock_irq(q->queue_lock); |
853 | } | ||
854 | /* q->queue_lock is unlocked at this point */ | 920 | /* q->queue_lock is unlocked at this point */ |
855 | 921 | ||
856 | return rq; | 922 | return rq; |
@@ -1052,14 +1118,13 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
1052 | * it didn't come out of our reserved rq pools | 1118 | * it didn't come out of our reserved rq pools |
1053 | */ | 1119 | */ |
1054 | if (req->cmd_flags & REQ_ALLOCED) { | 1120 | if (req->cmd_flags & REQ_ALLOCED) { |
1055 | int is_sync = rq_is_sync(req) != 0; | 1121 | unsigned int flags = req->cmd_flags; |
1056 | int priv = req->cmd_flags & REQ_ELVPRIV; | ||
1057 | 1122 | ||
1058 | BUG_ON(!list_empty(&req->queuelist)); | 1123 | BUG_ON(!list_empty(&req->queuelist)); |
1059 | BUG_ON(!hlist_unhashed(&req->hash)); | 1124 | BUG_ON(!hlist_unhashed(&req->hash)); |
1060 | 1125 | ||
1061 | blk_free_request(q, req); | 1126 | blk_free_request(q, req); |
1062 | freed_request(q, is_sync, priv); | 1127 | freed_request(q, flags); |
1063 | } | 1128 | } |
1064 | } | 1129 | } |
1065 | EXPORT_SYMBOL_GPL(__blk_put_request); | 1130 | EXPORT_SYMBOL_GPL(__blk_put_request); |
@@ -1161,18 +1226,32 @@ static bool bio_attempt_front_merge(struct request_queue *q, | |||
1161 | return true; | 1226 | return true; |
1162 | } | 1227 | } |
1163 | 1228 | ||
1164 | /* | 1229 | /** |
1165 | * Attempts to merge with the plugged list in the current process. Returns | 1230 | * attempt_plug_merge - try to merge with %current's plugged list |
1166 | * true if merge was successful, otherwise false. | 1231 | * @q: request_queue new bio is being queued at |
1232 | * @bio: new bio being queued | ||
1233 | * @request_count: out parameter for number of traversed plugged requests | ||
1234 | * | ||
1235 | * Determine whether @bio being queued on @q can be merged with a request | ||
1236 | * on %current's plugged list. Returns %true if merge was successful, | ||
1237 | * otherwise %false. | ||
1238 | * | ||
1239 | * This function is called without @q->queue_lock; however, elevator is | ||
1240 | * accessed iff there already are requests on the plugged list which in | ||
1241 | * turn guarantees validity of the elevator. | ||
1242 | * | ||
1243 | * Note that, on successful merge, elevator operation | ||
1244 | * elevator_bio_merged_fn() will be called without queue lock. Elevator | ||
1245 | * must be ready for this. | ||
1167 | */ | 1246 | */ |
1168 | static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, | 1247 | static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, |
1169 | struct bio *bio, unsigned int *request_count) | 1248 | unsigned int *request_count) |
1170 | { | 1249 | { |
1171 | struct blk_plug *plug; | 1250 | struct blk_plug *plug; |
1172 | struct request *rq; | 1251 | struct request *rq; |
1173 | bool ret = false; | 1252 | bool ret = false; |
1174 | 1253 | ||
1175 | plug = tsk->plug; | 1254 | plug = current->plug; |
1176 | if (!plug) | 1255 | if (!plug) |
1177 | goto out; | 1256 | goto out; |
1178 | *request_count = 0; | 1257 | *request_count = 0; |
@@ -1202,7 +1281,6 @@ out: | |||
1202 | 1281 | ||
1203 | void init_request_from_bio(struct request *req, struct bio *bio) | 1282 | void init_request_from_bio(struct request *req, struct bio *bio) |
1204 | { | 1283 | { |
1205 | req->cpu = bio->bi_comp_cpu; | ||
1206 | req->cmd_type = REQ_TYPE_FS; | 1284 | req->cmd_type = REQ_TYPE_FS; |
1207 | 1285 | ||
1208 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; | 1286 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; |
@@ -1215,7 +1293,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1215 | blk_rq_bio_prep(req->q, req, bio); | 1293 | blk_rq_bio_prep(req->q, req, bio); |
1216 | } | 1294 | } |
1217 | 1295 | ||
1218 | static int __make_request(struct request_queue *q, struct bio *bio) | 1296 | void blk_queue_bio(struct request_queue *q, struct bio *bio) |
1219 | { | 1297 | { |
1220 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1298 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1221 | struct blk_plug *plug; | 1299 | struct blk_plug *plug; |
@@ -1240,8 +1318,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1240 | * Check if we can merge with the plugged list before grabbing | 1318 | * Check if we can merge with the plugged list before grabbing |
1241 | * any locks. | 1319 | * any locks. |
1242 | */ | 1320 | */ |
1243 | if (attempt_plug_merge(current, q, bio, &request_count)) | 1321 | if (attempt_plug_merge(q, bio, &request_count)) |
1244 | goto out; | 1322 | return; |
1245 | 1323 | ||
1246 | spin_lock_irq(q->queue_lock); | 1324 | spin_lock_irq(q->queue_lock); |
1247 | 1325 | ||
@@ -1275,6 +1353,10 @@ get_rq: | |||
1275 | * Returns with the queue unlocked. | 1353 | * Returns with the queue unlocked. |
1276 | */ | 1354 | */ |
1277 | req = get_request_wait(q, rw_flags, bio); | 1355 | req = get_request_wait(q, rw_flags, bio); |
1356 | if (unlikely(!req)) { | ||
1357 | bio_endio(bio, -ENODEV); /* @q is dead */ | ||
1358 | goto out_unlock; | ||
1359 | } | ||
1278 | 1360 | ||
1279 | /* | 1361 | /* |
1280 | * After dropping the lock and possibly sleeping here, our request | 1362 | * After dropping the lock and possibly sleeping here, our request |
@@ -1284,8 +1366,7 @@ get_rq: | |||
1284 | */ | 1366 | */ |
1285 | init_request_from_bio(req, bio); | 1367 | init_request_from_bio(req, bio); |
1286 | 1368 | ||
1287 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | 1369 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) |
1288 | bio_flagged(bio, BIO_CPU_AFFINE)) | ||
1289 | req->cpu = raw_smp_processor_id(); | 1370 | req->cpu = raw_smp_processor_id(); |
1290 | 1371 | ||
1291 | plug = current->plug; | 1372 | plug = current->plug; |
@@ -1316,9 +1397,8 @@ get_rq: | |||
1316 | out_unlock: | 1397 | out_unlock: |
1317 | spin_unlock_irq(q->queue_lock); | 1398 | spin_unlock_irq(q->queue_lock); |
1318 | } | 1399 | } |
1319 | out: | ||
1320 | return 0; | ||
1321 | } | 1400 | } |
1401 | EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ | ||
1322 | 1402 | ||
1323 | /* | 1403 | /* |
1324 | * If bio->bi_dev is a partition, remap the location | 1404 | * If bio->bi_dev is a partition, remap the location |
@@ -1417,165 +1497,135 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | |||
1417 | return 0; | 1497 | return 0; |
1418 | } | 1498 | } |
1419 | 1499 | ||
1420 | /** | 1500 | static noinline_for_stack bool |
1421 | * generic_make_request - hand a buffer to its device driver for I/O | 1501 | generic_make_request_checks(struct bio *bio) |
1422 | * @bio: The bio describing the location in memory and on the device. | ||
1423 | * | ||
1424 | * generic_make_request() is used to make I/O requests of block | ||
1425 | * devices. It is passed a &struct bio, which describes the I/O that needs | ||
1426 | * to be done. | ||
1427 | * | ||
1428 | * generic_make_request() does not return any status. The | ||
1429 | * success/failure status of the request, along with notification of | ||
1430 | * completion, is delivered asynchronously through the bio->bi_end_io | ||
1431 | * function described (one day) else where. | ||
1432 | * | ||
1433 | * The caller of generic_make_request must make sure that bi_io_vec | ||
1434 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | ||
1435 | * set to describe the device address, and the | ||
1436 | * bi_end_io and optionally bi_private are set to describe how | ||
1437 | * completion notification should be signaled. | ||
1438 | * | ||
1439 | * generic_make_request and the drivers it calls may use bi_next if this | ||
1440 | * bio happens to be merged with someone else, and may change bi_dev and | ||
1441 | * bi_sector for remaps as it sees fit. So the values of these fields | ||
1442 | * should NOT be depended on after the call to generic_make_request. | ||
1443 | */ | ||
1444 | static inline void __generic_make_request(struct bio *bio) | ||
1445 | { | 1502 | { |
1446 | struct request_queue *q; | 1503 | struct request_queue *q; |
1447 | sector_t old_sector; | 1504 | int nr_sectors = bio_sectors(bio); |
1448 | int ret, nr_sectors = bio_sectors(bio); | ||
1449 | dev_t old_dev; | ||
1450 | int err = -EIO; | 1505 | int err = -EIO; |
1506 | char b[BDEVNAME_SIZE]; | ||
1507 | struct hd_struct *part; | ||
1451 | 1508 | ||
1452 | might_sleep(); | 1509 | might_sleep(); |
1453 | 1510 | ||
1454 | if (bio_check_eod(bio, nr_sectors)) | 1511 | if (bio_check_eod(bio, nr_sectors)) |
1455 | goto end_io; | 1512 | goto end_io; |
1456 | 1513 | ||
1457 | /* | 1514 | q = bdev_get_queue(bio->bi_bdev); |
1458 | * Resolve the mapping until finished. (drivers are | 1515 | if (unlikely(!q)) { |
1459 | * still free to implement/resolve their own stacking | 1516 | printk(KERN_ERR |
1460 | * by explicitly returning 0) | 1517 | "generic_make_request: Trying to access " |
1461 | * | 1518 | "nonexistent block-device %s (%Lu)\n", |
1462 | * NOTE: we don't repeat the blk_size check for each new device. | 1519 | bdevname(bio->bi_bdev, b), |
1463 | * Stacking drivers are expected to know what they are doing. | 1520 | (long long) bio->bi_sector); |
1464 | */ | 1521 | goto end_io; |
1465 | old_sector = -1; | 1522 | } |
1466 | old_dev = 0; | ||
1467 | do { | ||
1468 | char b[BDEVNAME_SIZE]; | ||
1469 | struct hd_struct *part; | ||
1470 | |||
1471 | q = bdev_get_queue(bio->bi_bdev); | ||
1472 | if (unlikely(!q)) { | ||
1473 | printk(KERN_ERR | ||
1474 | "generic_make_request: Trying to access " | ||
1475 | "nonexistent block-device %s (%Lu)\n", | ||
1476 | bdevname(bio->bi_bdev, b), | ||
1477 | (long long) bio->bi_sector); | ||
1478 | goto end_io; | ||
1479 | } | ||
1480 | |||
1481 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && | ||
1482 | nr_sectors > queue_max_hw_sectors(q))) { | ||
1483 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | ||
1484 | bdevname(bio->bi_bdev, b), | ||
1485 | bio_sectors(bio), | ||
1486 | queue_max_hw_sectors(q)); | ||
1487 | goto end_io; | ||
1488 | } | ||
1489 | |||
1490 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | ||
1491 | goto end_io; | ||
1492 | |||
1493 | part = bio->bi_bdev->bd_part; | ||
1494 | if (should_fail_request(part, bio->bi_size) || | ||
1495 | should_fail_request(&part_to_disk(part)->part0, | ||
1496 | bio->bi_size)) | ||
1497 | goto end_io; | ||
1498 | |||
1499 | /* | ||
1500 | * If this device has partitions, remap block n | ||
1501 | * of partition p to block n+start(p) of the disk. | ||
1502 | */ | ||
1503 | blk_partition_remap(bio); | ||
1504 | 1523 | ||
1505 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) | 1524 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && |
1506 | goto end_io; | 1525 | nr_sectors > queue_max_hw_sectors(q))) { |
1526 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | ||
1527 | bdevname(bio->bi_bdev, b), | ||
1528 | bio_sectors(bio), | ||
1529 | queue_max_hw_sectors(q)); | ||
1530 | goto end_io; | ||
1531 | } | ||
1507 | 1532 | ||
1508 | if (old_sector != -1) | 1533 | part = bio->bi_bdev->bd_part; |
1509 | trace_block_bio_remap(q, bio, old_dev, old_sector); | 1534 | if (should_fail_request(part, bio->bi_size) || |
1535 | should_fail_request(&part_to_disk(part)->part0, | ||
1536 | bio->bi_size)) | ||
1537 | goto end_io; | ||
1510 | 1538 | ||
1511 | old_sector = bio->bi_sector; | 1539 | /* |
1512 | old_dev = bio->bi_bdev->bd_dev; | 1540 | * If this device has partitions, remap block n |
1541 | * of partition p to block n+start(p) of the disk. | ||
1542 | */ | ||
1543 | blk_partition_remap(bio); | ||
1513 | 1544 | ||
1514 | if (bio_check_eod(bio, nr_sectors)) | 1545 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) |
1515 | goto end_io; | 1546 | goto end_io; |
1516 | 1547 | ||
1517 | /* | 1548 | if (bio_check_eod(bio, nr_sectors)) |
1518 | * Filter flush bio's early so that make_request based | 1549 | goto end_io; |
1519 | * drivers without flush support don't have to worry | ||
1520 | * about them. | ||
1521 | */ | ||
1522 | if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { | ||
1523 | bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); | ||
1524 | if (!nr_sectors) { | ||
1525 | err = 0; | ||
1526 | goto end_io; | ||
1527 | } | ||
1528 | } | ||
1529 | 1550 | ||
1530 | if ((bio->bi_rw & REQ_DISCARD) && | 1551 | /* |
1531 | (!blk_queue_discard(q) || | 1552 | * Filter flush bio's early so that make_request based |
1532 | ((bio->bi_rw & REQ_SECURE) && | 1553 | * drivers without flush support don't have to worry |
1533 | !blk_queue_secdiscard(q)))) { | 1554 | * about them. |
1534 | err = -EOPNOTSUPP; | 1555 | */ |
1556 | if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { | ||
1557 | bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); | ||
1558 | if (!nr_sectors) { | ||
1559 | err = 0; | ||
1535 | goto end_io; | 1560 | goto end_io; |
1536 | } | 1561 | } |
1562 | } | ||
1537 | 1563 | ||
1538 | if (blk_throtl_bio(q, &bio)) | 1564 | if ((bio->bi_rw & REQ_DISCARD) && |
1539 | goto end_io; | 1565 | (!blk_queue_discard(q) || |
1540 | 1566 | ((bio->bi_rw & REQ_SECURE) && | |
1541 | /* | 1567 | !blk_queue_secdiscard(q)))) { |
1542 | * If bio = NULL, bio has been throttled and will be submitted | 1568 | err = -EOPNOTSUPP; |
1543 | * later. | 1569 | goto end_io; |
1544 | */ | 1570 | } |
1545 | if (!bio) | ||
1546 | break; | ||
1547 | |||
1548 | trace_block_bio_queue(q, bio); | ||
1549 | 1571 | ||
1550 | ret = q->make_request_fn(q, bio); | 1572 | if (blk_throtl_bio(q, bio)) |
1551 | } while (ret); | 1573 | return false; /* throttled, will be resubmitted later */ |
1552 | 1574 | ||
1553 | return; | 1575 | trace_block_bio_queue(q, bio); |
1576 | return true; | ||
1554 | 1577 | ||
1555 | end_io: | 1578 | end_io: |
1556 | bio_endio(bio, err); | 1579 | bio_endio(bio, err); |
1580 | return false; | ||
1557 | } | 1581 | } |
1558 | 1582 | ||
1559 | /* | 1583 | /** |
1560 | * We only want one ->make_request_fn to be active at a time, | 1584 | * generic_make_request - hand a buffer to its device driver for I/O |
1561 | * else stack usage with stacked devices could be a problem. | 1585 | * @bio: The bio describing the location in memory and on the device. |
1562 | * So use current->bio_list to keep a list of requests | 1586 | * |
1563 | * submited by a make_request_fn function. | 1587 | * generic_make_request() is used to make I/O requests of block |
1564 | * current->bio_list is also used as a flag to say if | 1588 | * devices. It is passed a &struct bio, which describes the I/O that needs |
1565 | * generic_make_request is currently active in this task or not. | 1589 | * to be done. |
1566 | * If it is NULL, then no make_request is active. If it is non-NULL, | 1590 | * |
1567 | * then a make_request is active, and new requests should be added | 1591 | * generic_make_request() does not return any status. The |
1568 | * at the tail | 1592 | * success/failure status of the request, along with notification of |
1593 | * completion, is delivered asynchronously through the bio->bi_end_io | ||
1594 | * function described (one day) else where. | ||
1595 | * | ||
1596 | * The caller of generic_make_request must make sure that bi_io_vec | ||
1597 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | ||
1598 | * set to describe the device address, and the | ||
1599 | * bi_end_io and optionally bi_private are set to describe how | ||
1600 | * completion notification should be signaled. | ||
1601 | * | ||
1602 | * generic_make_request and the drivers it calls may use bi_next if this | ||
1603 | * bio happens to be merged with someone else, and may resubmit the bio to | ||
1604 | * a lower device by calling into generic_make_request recursively, which | ||
1605 | * means the bio should NOT be touched after the call to ->make_request_fn. | ||
1569 | */ | 1606 | */ |
1570 | void generic_make_request(struct bio *bio) | 1607 | void generic_make_request(struct bio *bio) |
1571 | { | 1608 | { |
1572 | struct bio_list bio_list_on_stack; | 1609 | struct bio_list bio_list_on_stack; |
1573 | 1610 | ||
1611 | if (!generic_make_request_checks(bio)) | ||
1612 | return; | ||
1613 | |||
1614 | /* | ||
1615 | * We only want one ->make_request_fn to be active at a time, else | ||
1616 | * stack usage with stacked devices could be a problem. So use | ||
1617 | * current->bio_list to keep a list of requests submited by a | ||
1618 | * make_request_fn function. current->bio_list is also used as a | ||
1619 | * flag to say if generic_make_request is currently active in this | ||
1620 | * task or not. If it is NULL, then no make_request is active. If | ||
1621 | * it is non-NULL, then a make_request is active, and new requests | ||
1622 | * should be added at the tail | ||
1623 | */ | ||
1574 | if (current->bio_list) { | 1624 | if (current->bio_list) { |
1575 | /* make_request is active */ | ||
1576 | bio_list_add(current->bio_list, bio); | 1625 | bio_list_add(current->bio_list, bio); |
1577 | return; | 1626 | return; |
1578 | } | 1627 | } |
1628 | |||
1579 | /* following loop may be a bit non-obvious, and so deserves some | 1629 | /* following loop may be a bit non-obvious, and so deserves some |
1580 | * explanation. | 1630 | * explanation. |
1581 | * Before entering the loop, bio->bi_next is NULL (as all callers | 1631 | * Before entering the loop, bio->bi_next is NULL (as all callers |
@@ -1583,22 +1633,21 @@ void generic_make_request(struct bio *bio) | |||
1583 | * We pretend that we have just taken it off a longer list, so | 1633 | * We pretend that we have just taken it off a longer list, so |
1584 | * we assign bio_list to a pointer to the bio_list_on_stack, | 1634 | * we assign bio_list to a pointer to the bio_list_on_stack, |
1585 | * thus initialising the bio_list of new bios to be | 1635 | * thus initialising the bio_list of new bios to be |
1586 | * added. __generic_make_request may indeed add some more bios | 1636 | * added. ->make_request() may indeed add some more bios |
1587 | * through a recursive call to generic_make_request. If it | 1637 | * through a recursive call to generic_make_request. If it |
1588 | * did, we find a non-NULL value in bio_list and re-enter the loop | 1638 | * did, we find a non-NULL value in bio_list and re-enter the loop |
1589 | * from the top. In this case we really did just take the bio | 1639 | * from the top. In this case we really did just take the bio |
1590 | * of the top of the list (no pretending) and so remove it from | 1640 | * of the top of the list (no pretending) and so remove it from |
1591 | * bio_list, and call into __generic_make_request again. | 1641 | * bio_list, and call into ->make_request() again. |
1592 | * | ||
1593 | * The loop was structured like this to make only one call to | ||
1594 | * __generic_make_request (which is important as it is large and | ||
1595 | * inlined) and to keep the structure simple. | ||
1596 | */ | 1642 | */ |
1597 | BUG_ON(bio->bi_next); | 1643 | BUG_ON(bio->bi_next); |
1598 | bio_list_init(&bio_list_on_stack); | 1644 | bio_list_init(&bio_list_on_stack); |
1599 | current->bio_list = &bio_list_on_stack; | 1645 | current->bio_list = &bio_list_on_stack; |
1600 | do { | 1646 | do { |
1601 | __generic_make_request(bio); | 1647 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); |
1648 | |||
1649 | q->make_request_fn(q, bio); | ||
1650 | |||
1602 | bio = bio_list_pop(current->bio_list); | 1651 | bio = bio_list_pop(current->bio_list); |
1603 | } while (bio); | 1652 | } while (bio); |
1604 | current->bio_list = NULL; /* deactivate */ | 1653 | current->bio_list = NULL; /* deactivate */ |
@@ -1725,6 +1774,8 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1725 | where = ELEVATOR_INSERT_FLUSH; | 1774 | where = ELEVATOR_INSERT_FLUSH; |
1726 | 1775 | ||
1727 | add_acct_request(q, rq, where); | 1776 | add_acct_request(q, rq, where); |
1777 | if (where == ELEVATOR_INSERT_FLUSH) | ||
1778 | __blk_run_queue(q); | ||
1728 | spin_unlock_irqrestore(q->queue_lock, flags); | 1779 | spin_unlock_irqrestore(q->queue_lock, flags); |
1729 | 1780 | ||
1730 | return 0; | 1781 | return 0; |
@@ -2628,6 +2679,20 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work); | |||
2628 | 2679 | ||
2629 | #define PLUG_MAGIC 0x91827364 | 2680 | #define PLUG_MAGIC 0x91827364 |
2630 | 2681 | ||
2682 | /** | ||
2683 | * blk_start_plug - initialize blk_plug and track it inside the task_struct | ||
2684 | * @plug: The &struct blk_plug that needs to be initialized | ||
2685 | * | ||
2686 | * Description: | ||
2687 | * Tracking blk_plug inside the task_struct will help with auto-flushing the | ||
2688 | * pending I/O should the task end up blocking between blk_start_plug() and | ||
2689 | * blk_finish_plug(). This is important from a performance perspective, but | ||
2690 | * also ensures that we don't deadlock. For instance, if the task is blocking | ||
2691 | * for a memory allocation, memory reclaim could end up wanting to free a | ||
2692 | * page belonging to that request that is currently residing in our private | ||
2693 | * plug. By flushing the pending I/O when the process goes to sleep, we avoid | ||
2694 | * this kind of deadlock. | ||
2695 | */ | ||
2631 | void blk_start_plug(struct blk_plug *plug) | 2696 | void blk_start_plug(struct blk_plug *plug) |
2632 | { | 2697 | { |
2633 | struct task_struct *tsk = current; | 2698 | struct task_struct *tsk = current; |
diff --git a/block/blk-flush.c b/block/blk-flush.c index 491eb30a242d..720ad607ff91 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -320,7 +320,7 @@ void blk_insert_flush(struct request *rq) | |||
320 | return; | 320 | return; |
321 | } | 321 | } |
322 | 322 | ||
323 | BUG_ON(!rq->bio || rq->bio != rq->biotail); | 323 | BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */ |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * If there's data but flush is not necessary, the request can be | 326 | * If there's data but flush is not necessary, the request can be |
@@ -330,7 +330,6 @@ void blk_insert_flush(struct request *rq) | |||
330 | if ((policy & REQ_FSEQ_DATA) && | 330 | if ((policy & REQ_FSEQ_DATA) && |
331 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { | 331 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
332 | list_add_tail(&rq->queuelist, &q->queue_head); | 332 | list_add_tail(&rq->queuelist, &q->queue_head); |
333 | blk_run_queue_async(q); | ||
334 | return; | 333 | return; |
335 | } | 334 | } |
336 | 335 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 60fda88c57f0..e7f9f657f105 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, | |||
457 | } | 457 | } |
458 | 458 | ||
459 | /** | 459 | /** |
460 | * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed | 460 | * blk_release_queue: - release a &struct request_queue when it is no longer needed |
461 | * @kobj: the kobj belonging of the request queue to be released | 461 | * @kobj: the kobj belonging to the request queue to be released |
462 | * | 462 | * |
463 | * Description: | 463 | * Description: |
464 | * blk_cleanup_queue is the pair to blk_init_queue() or | 464 | * blk_release_queue is the pair to blk_init_queue() or |
465 | * blk_queue_make_request(). It should be called when a request queue is | 465 | * blk_queue_make_request(). It should be called when a request queue is |
466 | * being released; typically when a block device is being de-registered. | 466 | * being released; typically when a block device is being de-registered. |
467 | * Currently, its primary task it to free all the &struct request | 467 | * Currently, its primary task it to free all the &struct request |
@@ -490,6 +490,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
490 | if (q->queue_tags) | 490 | if (q->queue_tags) |
491 | __blk_queue_free_tags(q); | 491 | __blk_queue_free_tags(q); |
492 | 492 | ||
493 | blk_throtl_release(q); | ||
493 | blk_trace_shutdown(q); | 494 | blk_trace_shutdown(q); |
494 | 495 | ||
495 | bdi_destroy(&q->backing_dev_info); | 496 | bdi_destroy(&q->backing_dev_info); |
diff --git a/block/blk-tag.c b/block/blk-tag.c index ece65fc4c79b..e74d6d13838f 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -286,12 +286,14 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) | |||
286 | 286 | ||
287 | BUG_ON(tag == -1); | 287 | BUG_ON(tag == -1); |
288 | 288 | ||
289 | if (unlikely(tag >= bqt->real_max_depth)) | 289 | if (unlikely(tag >= bqt->max_depth)) { |
290 | /* | 290 | /* |
291 | * This can happen after tag depth has been reduced. | 291 | * This can happen after tag depth has been reduced. |
292 | * FIXME: how about a warning or info message here? | 292 | * But tag shouldn't be larger than real_max_depth. |
293 | */ | 293 | */ |
294 | WARN_ON(tag >= bqt->real_max_depth); | ||
294 | return; | 295 | return; |
296 | } | ||
295 | 297 | ||
296 | list_del_init(&rq->queuelist); | 298 | list_del_init(&rq->queuelist); |
297 | rq->cmd_flags &= ~REQ_QUEUED; | 299 | rq->cmd_flags &= ~REQ_QUEUED; |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a19f58c6fc3a..4553245d9317 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/blktrace_api.h> | 11 | #include <linux/blktrace_api.h> |
12 | #include "blk-cgroup.h" | 12 | #include "blk-cgroup.h" |
13 | #include "blk.h" | ||
13 | 14 | ||
14 | /* Max dispatch from a group in 1 round */ | 15 | /* Max dispatch from a group in 1 round */ |
15 | static int throtl_grp_quantum = 8; | 16 | static int throtl_grp_quantum = 8; |
@@ -302,16 +303,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | |||
302 | return tg; | 303 | return tg; |
303 | } | 304 | } |
304 | 305 | ||
305 | /* | ||
306 | * This function returns with queue lock unlocked in case of error, like | ||
307 | * request queue is no more | ||
308 | */ | ||
309 | static struct throtl_grp * throtl_get_tg(struct throtl_data *td) | 306 | static struct throtl_grp * throtl_get_tg(struct throtl_data *td) |
310 | { | 307 | { |
311 | struct throtl_grp *tg = NULL, *__tg = NULL; | 308 | struct throtl_grp *tg = NULL, *__tg = NULL; |
312 | struct blkio_cgroup *blkcg; | 309 | struct blkio_cgroup *blkcg; |
313 | struct request_queue *q = td->queue; | 310 | struct request_queue *q = td->queue; |
314 | 311 | ||
312 | /* no throttling for dead queue */ | ||
313 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | ||
314 | return NULL; | ||
315 | |||
315 | rcu_read_lock(); | 316 | rcu_read_lock(); |
316 | blkcg = task_blkio_cgroup(current); | 317 | blkcg = task_blkio_cgroup(current); |
317 | tg = throtl_find_tg(td, blkcg); | 318 | tg = throtl_find_tg(td, blkcg); |
@@ -323,32 +324,22 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) | |||
323 | /* | 324 | /* |
324 | * Need to allocate a group. Allocation of group also needs allocation | 325 | * Need to allocate a group. Allocation of group also needs allocation |
325 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | 326 | * of per cpu stats which in-turn takes a mutex() and can block. Hence |
326 | * we need to drop rcu lock and queue_lock before we call alloc | 327 | * we need to drop rcu lock and queue_lock before we call alloc. |
327 | * | ||
328 | * Take the request queue reference to make sure queue does not | ||
329 | * go away once we return from allocation. | ||
330 | */ | 328 | */ |
331 | blk_get_queue(q); | ||
332 | rcu_read_unlock(); | 329 | rcu_read_unlock(); |
333 | spin_unlock_irq(q->queue_lock); | 330 | spin_unlock_irq(q->queue_lock); |
334 | 331 | ||
335 | tg = throtl_alloc_tg(td); | 332 | tg = throtl_alloc_tg(td); |
336 | /* | ||
337 | * We might have slept in group allocation. Make sure queue is not | ||
338 | * dead | ||
339 | */ | ||
340 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | ||
341 | blk_put_queue(q); | ||
342 | if (tg) | ||
343 | kfree(tg); | ||
344 | |||
345 | return ERR_PTR(-ENODEV); | ||
346 | } | ||
347 | blk_put_queue(q); | ||
348 | 333 | ||
349 | /* Group allocated and queue is still alive. take the lock */ | 334 | /* Group allocated and queue is still alive. take the lock */ |
350 | spin_lock_irq(q->queue_lock); | 335 | spin_lock_irq(q->queue_lock); |
351 | 336 | ||
337 | /* Make sure @q is still alive */ | ||
338 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | ||
339 | kfree(tg); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
352 | /* | 343 | /* |
353 | * Initialize the new group. After sleeping, read the blkcg again. | 344 | * Initialize the new group. After sleeping, read the blkcg again. |
354 | */ | 345 | */ |
@@ -1014,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td) | |||
1014 | } | 1005 | } |
1015 | } | 1006 | } |
1016 | 1007 | ||
1017 | static void throtl_td_free(struct throtl_data *td) | ||
1018 | { | ||
1019 | kfree(td); | ||
1020 | } | ||
1021 | |||
1022 | /* | 1008 | /* |
1023 | * Blk cgroup controller notification saying that blkio_group object is being | 1009 | * Blk cgroup controller notification saying that blkio_group object is being |
1024 | * delinked as associated cgroup object is going away. That also means that | 1010 | * delinked as associated cgroup object is going away. That also means that |
@@ -1123,17 +1109,17 @@ static struct blkio_policy_type blkio_policy_throtl = { | |||
1123 | .plid = BLKIO_POLICY_THROTL, | 1109 | .plid = BLKIO_POLICY_THROTL, |
1124 | }; | 1110 | }; |
1125 | 1111 | ||
1126 | int blk_throtl_bio(struct request_queue *q, struct bio **biop) | 1112 | bool blk_throtl_bio(struct request_queue *q, struct bio *bio) |
1127 | { | 1113 | { |
1128 | struct throtl_data *td = q->td; | 1114 | struct throtl_data *td = q->td; |
1129 | struct throtl_grp *tg; | 1115 | struct throtl_grp *tg; |
1130 | struct bio *bio = *biop; | ||
1131 | bool rw = bio_data_dir(bio), update_disptime = true; | 1116 | bool rw = bio_data_dir(bio), update_disptime = true; |
1132 | struct blkio_cgroup *blkcg; | 1117 | struct blkio_cgroup *blkcg; |
1118 | bool throttled = false; | ||
1133 | 1119 | ||
1134 | if (bio->bi_rw & REQ_THROTTLED) { | 1120 | if (bio->bi_rw & REQ_THROTTLED) { |
1135 | bio->bi_rw &= ~REQ_THROTTLED; | 1121 | bio->bi_rw &= ~REQ_THROTTLED; |
1136 | return 0; | 1122 | goto out; |
1137 | } | 1123 | } |
1138 | 1124 | ||
1139 | /* | 1125 | /* |
@@ -1152,7 +1138,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1152 | blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, | 1138 | blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, |
1153 | rw, rw_is_sync(bio->bi_rw)); | 1139 | rw, rw_is_sync(bio->bi_rw)); |
1154 | rcu_read_unlock(); | 1140 | rcu_read_unlock(); |
1155 | return 0; | 1141 | goto out; |
1156 | } | 1142 | } |
1157 | } | 1143 | } |
1158 | rcu_read_unlock(); | 1144 | rcu_read_unlock(); |
@@ -1161,18 +1147,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1161 | * Either group has not been allocated yet or it is not an unlimited | 1147 | * Either group has not been allocated yet or it is not an unlimited |
1162 | * IO group | 1148 | * IO group |
1163 | */ | 1149 | */ |
1164 | |||
1165 | spin_lock_irq(q->queue_lock); | 1150 | spin_lock_irq(q->queue_lock); |
1166 | tg = throtl_get_tg(td); | 1151 | tg = throtl_get_tg(td); |
1167 | 1152 | if (unlikely(!tg)) | |
1168 | if (IS_ERR(tg)) { | 1153 | goto out_unlock; |
1169 | if (PTR_ERR(tg) == -ENODEV) { | ||
1170 | /* | ||
1171 | * Queue is gone. No queue lock held here. | ||
1172 | */ | ||
1173 | return -ENODEV; | ||
1174 | } | ||
1175 | } | ||
1176 | 1154 | ||
1177 | if (tg->nr_queued[rw]) { | 1155 | if (tg->nr_queued[rw]) { |
1178 | /* | 1156 | /* |
@@ -1200,7 +1178,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1200 | * So keep on trimming slice even if bio is not queued. | 1178 | * So keep on trimming slice even if bio is not queued. |
1201 | */ | 1179 | */ |
1202 | throtl_trim_slice(td, tg, rw); | 1180 | throtl_trim_slice(td, tg, rw); |
1203 | goto out; | 1181 | goto out_unlock; |
1204 | } | 1182 | } |
1205 | 1183 | ||
1206 | queue_bio: | 1184 | queue_bio: |
@@ -1212,16 +1190,52 @@ queue_bio: | |||
1212 | tg->nr_queued[READ], tg->nr_queued[WRITE]); | 1190 | tg->nr_queued[READ], tg->nr_queued[WRITE]); |
1213 | 1191 | ||
1214 | throtl_add_bio_tg(q->td, tg, bio); | 1192 | throtl_add_bio_tg(q->td, tg, bio); |
1215 | *biop = NULL; | 1193 | throttled = true; |
1216 | 1194 | ||
1217 | if (update_disptime) { | 1195 | if (update_disptime) { |
1218 | tg_update_disptime(td, tg); | 1196 | tg_update_disptime(td, tg); |
1219 | throtl_schedule_next_dispatch(td); | 1197 | throtl_schedule_next_dispatch(td); |
1220 | } | 1198 | } |
1221 | 1199 | ||
1200 | out_unlock: | ||
1201 | spin_unlock_irq(q->queue_lock); | ||
1222 | out: | 1202 | out: |
1203 | return throttled; | ||
1204 | } | ||
1205 | |||
1206 | /** | ||
1207 | * blk_throtl_drain - drain throttled bios | ||
1208 | * @q: request_queue to drain throttled bios for | ||
1209 | * | ||
1210 | * Dispatch all currently throttled bios on @q through ->make_request_fn(). | ||
1211 | */ | ||
1212 | void blk_throtl_drain(struct request_queue *q) | ||
1213 | __releases(q->queue_lock) __acquires(q->queue_lock) | ||
1214 | { | ||
1215 | struct throtl_data *td = q->td; | ||
1216 | struct throtl_rb_root *st = &td->tg_service_tree; | ||
1217 | struct throtl_grp *tg; | ||
1218 | struct bio_list bl; | ||
1219 | struct bio *bio; | ||
1220 | |||
1221 | WARN_ON_ONCE(!queue_is_locked(q)); | ||
1222 | |||
1223 | bio_list_init(&bl); | ||
1224 | |||
1225 | while ((tg = throtl_rb_first(st))) { | ||
1226 | throtl_dequeue_tg(td, tg); | ||
1227 | |||
1228 | while ((bio = bio_list_peek(&tg->bio_lists[READ]))) | ||
1229 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); | ||
1230 | while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) | ||
1231 | tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); | ||
1232 | } | ||
1223 | spin_unlock_irq(q->queue_lock); | 1233 | spin_unlock_irq(q->queue_lock); |
1224 | return 0; | 1234 | |
1235 | while ((bio = bio_list_pop(&bl))) | ||
1236 | generic_make_request(bio); | ||
1237 | |||
1238 | spin_lock_irq(q->queue_lock); | ||
1225 | } | 1239 | } |
1226 | 1240 | ||
1227 | int blk_throtl_init(struct request_queue *q) | 1241 | int blk_throtl_init(struct request_queue *q) |
@@ -1296,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q) | |||
1296 | * it. | 1310 | * it. |
1297 | */ | 1311 | */ |
1298 | throtl_shutdown_wq(q); | 1312 | throtl_shutdown_wq(q); |
1299 | throtl_td_free(td); | 1313 | } |
1314 | |||
1315 | void blk_throtl_release(struct request_queue *q) | ||
1316 | { | ||
1317 | kfree(q->td); | ||
1300 | } | 1318 | } |
1301 | 1319 | ||
1302 | static int __init throtl_init(void) | 1320 | static int __init throtl_init(void) |
diff --git a/block/blk.h b/block/blk.h index 20b900a377c9..3f6551b3c92d 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -15,6 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
15 | struct bio *bio); | 15 | struct bio *bio); |
16 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, | 16 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, |
17 | struct bio *bio); | 17 | struct bio *bio); |
18 | void blk_drain_queue(struct request_queue *q, bool drain_all); | ||
18 | void blk_dequeue_request(struct request *rq); | 19 | void blk_dequeue_request(struct request *rq); |
19 | void __blk_queue_free_tags(struct request_queue *q); | 20 | void __blk_queue_free_tags(struct request_queue *q); |
20 | bool __blk_end_bidi_request(struct request *rq, int error, | 21 | bool __blk_end_bidi_request(struct request *rq, int error, |
@@ -188,4 +189,21 @@ static inline int blk_do_io_stat(struct request *rq) | |||
188 | (rq->cmd_flags & REQ_DISCARD)); | 189 | (rq->cmd_flags & REQ_DISCARD)); |
189 | } | 190 | } |
190 | 191 | ||
191 | #endif | 192 | #ifdef CONFIG_BLK_DEV_THROTTLING |
193 | extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); | ||
194 | extern void blk_throtl_drain(struct request_queue *q); | ||
195 | extern int blk_throtl_init(struct request_queue *q); | ||
196 | extern void blk_throtl_exit(struct request_queue *q); | ||
197 | extern void blk_throtl_release(struct request_queue *q); | ||
198 | #else /* CONFIG_BLK_DEV_THROTTLING */ | ||
199 | static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | ||
200 | { | ||
201 | return false; | ||
202 | } | ||
203 | static inline void blk_throtl_drain(struct request_queue *q) { } | ||
204 | static inline int blk_throtl_init(struct request_queue *q) { return 0; } | ||
205 | static inline void blk_throtl_exit(struct request_queue *q) { } | ||
206 | static inline void blk_throtl_release(struct request_queue *q) { } | ||
207 | #endif /* CONFIG_BLK_DEV_THROTTLING */ | ||
208 | |||
209 | #endif /* BLK_INTERNAL_H */ | ||
diff --git a/block/elevator.c b/block/elevator.c index a3b64bc71d88..66343d6917d0 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/delay.h> | ||
35 | #include <linux/blktrace_api.h> | 34 | #include <linux/blktrace_api.h> |
36 | #include <linux/hash.h> | 35 | #include <linux/hash.h> |
37 | #include <linux/uaccess.h> | 36 | #include <linux/uaccess.h> |
@@ -182,7 +181,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, | |||
182 | eq->elevator_data = data; | 181 | eq->elevator_data = data; |
183 | } | 182 | } |
184 | 183 | ||
185 | static char chosen_elevator[16]; | 184 | static char chosen_elevator[ELV_NAME_MAX]; |
186 | 185 | ||
187 | static int __init elevator_setup(char *str) | 186 | static int __init elevator_setup(char *str) |
188 | { | 187 | { |
@@ -606,43 +605,35 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) | |||
606 | void elv_drain_elevator(struct request_queue *q) | 605 | void elv_drain_elevator(struct request_queue *q) |
607 | { | 606 | { |
608 | static int printed; | 607 | static int printed; |
608 | |||
609 | lockdep_assert_held(q->queue_lock); | ||
610 | |||
609 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 611 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) |
610 | ; | 612 | ; |
611 | if (q->nr_sorted == 0) | 613 | if (q->nr_sorted && printed++ < 10) { |
612 | return; | ||
613 | if (printed++ < 10) { | ||
614 | printk(KERN_ERR "%s: forced dispatching is broken " | 614 | printk(KERN_ERR "%s: forced dispatching is broken " |
615 | "(nr_sorted=%u), please report this\n", | 615 | "(nr_sorted=%u), please report this\n", |
616 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | 616 | q->elevator->elevator_type->elevator_name, q->nr_sorted); |
617 | } | 617 | } |
618 | } | 618 | } |
619 | 619 | ||
620 | /* | ||
621 | * Call with queue lock held, interrupts disabled | ||
622 | */ | ||
623 | void elv_quiesce_start(struct request_queue *q) | 620 | void elv_quiesce_start(struct request_queue *q) |
624 | { | 621 | { |
625 | if (!q->elevator) | 622 | if (!q->elevator) |
626 | return; | 623 | return; |
627 | 624 | ||
625 | spin_lock_irq(q->queue_lock); | ||
628 | queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); | 626 | queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); |
627 | spin_unlock_irq(q->queue_lock); | ||
629 | 628 | ||
630 | /* | 629 | blk_drain_queue(q, false); |
631 | * make sure we don't have any requests in flight | ||
632 | */ | ||
633 | elv_drain_elevator(q); | ||
634 | while (q->rq.elvpriv) { | ||
635 | __blk_run_queue(q); | ||
636 | spin_unlock_irq(q->queue_lock); | ||
637 | msleep(10); | ||
638 | spin_lock_irq(q->queue_lock); | ||
639 | elv_drain_elevator(q); | ||
640 | } | ||
641 | } | 630 | } |
642 | 631 | ||
643 | void elv_quiesce_end(struct request_queue *q) | 632 | void elv_quiesce_end(struct request_queue *q) |
644 | { | 633 | { |
634 | spin_lock_irq(q->queue_lock); | ||
645 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); | 635 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); |
636 | spin_unlock_irq(q->queue_lock); | ||
646 | } | 637 | } |
647 | 638 | ||
648 | void __elv_add_request(struct request_queue *q, struct request *rq, int where) | 639 | void __elv_add_request(struct request_queue *q, struct request *rq, int where) |
@@ -972,7 +963,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
972 | /* | 963 | /* |
973 | * Turn on BYPASS and drain all requests w/ elevator private data | 964 | * Turn on BYPASS and drain all requests w/ elevator private data |
974 | */ | 965 | */ |
975 | spin_lock_irq(q->queue_lock); | ||
976 | elv_quiesce_start(q); | 966 | elv_quiesce_start(q); |
977 | 967 | ||
978 | /* | 968 | /* |
@@ -983,8 +973,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
983 | /* | 973 | /* |
984 | * attach and start new elevator | 974 | * attach and start new elevator |
985 | */ | 975 | */ |
976 | spin_lock_irq(q->queue_lock); | ||
986 | elevator_attach(q, e, data); | 977 | elevator_attach(q, e, data); |
987 | |||
988 | spin_unlock_irq(q->queue_lock); | 978 | spin_unlock_irq(q->queue_lock); |
989 | 979 | ||
990 | if (old_elevator->registered) { | 980 | if (old_elevator->registered) { |
@@ -999,9 +989,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
999 | * finally exit old elevator and turn off BYPASS. | 989 | * finally exit old elevator and turn off BYPASS. |
1000 | */ | 990 | */ |
1001 | elevator_exit(old_elevator); | 991 | elevator_exit(old_elevator); |
1002 | spin_lock_irq(q->queue_lock); | ||
1003 | elv_quiesce_end(q); | 992 | elv_quiesce_end(q); |
1004 | spin_unlock_irq(q->queue_lock); | ||
1005 | 993 | ||
1006 | blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); | 994 | blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); |
1007 | 995 | ||
@@ -1015,10 +1003,7 @@ fail_register: | |||
1015 | elevator_exit(e); | 1003 | elevator_exit(e); |
1016 | q->elevator = old_elevator; | 1004 | q->elevator = old_elevator; |
1017 | elv_register_queue(q); | 1005 | elv_register_queue(q); |
1018 | 1006 | elv_quiesce_end(q); | |
1019 | spin_lock_irq(q->queue_lock); | ||
1020 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); | ||
1021 | spin_unlock_irq(q->queue_lock); | ||
1022 | 1007 | ||
1023 | return err; | 1008 | return err; |
1024 | } | 1009 | } |
diff --git a/block/genhd.c b/block/genhd.c index 94855a9717de..024fc3944fb5 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -612,6 +612,12 @@ void add_disk(struct gendisk *disk) | |||
612 | register_disk(disk); | 612 | register_disk(disk); |
613 | blk_register_queue(disk); | 613 | blk_register_queue(disk); |
614 | 614 | ||
615 | /* | ||
616 | * Take an extra ref on queue which will be put on disk_release() | ||
617 | * so that it sticks around as long as @disk is there. | ||
618 | */ | ||
619 | WARN_ON_ONCE(blk_get_queue(disk->queue)); | ||
620 | |||
615 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, | 621 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
616 | "bdi"); | 622 | "bdi"); |
617 | WARN_ON(retval); | 623 | WARN_ON(retval); |
@@ -1166,6 +1172,8 @@ static void disk_release(struct device *dev) | |||
1166 | disk_replace_part_tbl(disk, NULL); | 1172 | disk_replace_part_tbl(disk, NULL); |
1167 | free_part_stats(&disk->part0); | 1173 | free_part_stats(&disk->part0); |
1168 | free_part_info(&disk->part0); | 1174 | free_part_info(&disk->part0); |
1175 | if (disk->queue) | ||
1176 | blk_put_queue(disk->queue); | ||
1169 | kfree(disk); | 1177 | kfree(disk); |
1170 | } | 1178 | } |
1171 | struct class block_class = { | 1179 | struct class block_class = { |
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4f4230b79bb6..fbdf0d802ec4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod | |||
565 | { | 565 | { |
566 | int err; | 566 | int err; |
567 | 567 | ||
568 | if (!q || blk_get_queue(q)) | 568 | if (!q) |
569 | return -ENXIO; | 569 | return -ENXIO; |
570 | 570 | ||
571 | switch (cmd) { | 571 | switch (cmd) { |
@@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod | |||
686 | err = -ENOTTY; | 686 | err = -ENOTTY; |
687 | } | 687 | } |
688 | 688 | ||
689 | blk_put_queue(q); | ||
690 | return err; | 689 | return err; |
691 | } | 690 | } |
692 | EXPORT_SYMBOL(scsi_cmd_ioctl); | 691 | EXPORT_SYMBOL(scsi_cmd_ioctl); |