aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c113
-rw-r--r--block/blk-merge.c31
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c44
-rw-r--r--block/blk.h16
-rw-r--r--block/bsg.c12
-rw-r--r--block/cfq-iosched.c206
-rw-r--r--block/cmd-filter.c1
-rw-r--r--block/elevator.c44
-rw-r--r--block/scsi_ioctl.c21
11 files changed, 264 insertions, 228 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 859879d0a0bf..07ab75403e1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue;
64 64
65static void drive_stat_acct(struct request *rq, int new_io) 65static void drive_stat_acct(struct request *rq, int new_io)
66{ 66{
67 struct gendisk *disk = rq->rq_disk;
68 struct hd_struct *part; 67 struct hd_struct *part;
69 int rw = rq_data_dir(rq); 68 int rw = rq_data_dir(rq);
70 int cpu; 69 int cpu;
71 70
72 if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) 71 if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
73 return; 72 return;
74 73
75 cpu = part_stat_lock(); 74 cpu = part_stat_lock();
@@ -485,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q)
485{ 484{
486 struct request_list *rl = &q->rq; 485 struct request_list *rl = &q->rq;
487 486
488 rl->count[READ] = rl->count[WRITE] = 0; 487 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
489 rl->starved[READ] = rl->starved[WRITE] = 0; 488 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
490 rl->elvpriv = 0; 489 rl->elvpriv = 0;
491 init_waitqueue_head(&rl->wait[READ]); 490 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
492 init_waitqueue_head(&rl->wait[WRITE]); 491 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
493 492
494 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 493 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
495 mempool_free_slab, request_cachep, q->node); 494 mempool_free_slab, request_cachep, q->node);
@@ -604,13 +603,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
604 q->queue_flags = QUEUE_FLAG_DEFAULT; 603 q->queue_flags = QUEUE_FLAG_DEFAULT;
605 q->queue_lock = lock; 604 q->queue_lock = lock;
606 605
607 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); 606 /*
608 607 * This also sets hw/phys segments, boundary and size
608 */
609 blk_queue_make_request(q, __make_request); 609 blk_queue_make_request(q, __make_request);
610 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
611
612 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
613 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
614 610
615 q->sg_reserved_size = INT_MAX; 611 q->sg_reserved_size = INT_MAX;
616 612
@@ -703,18 +699,18 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
703 ioc->last_waited = jiffies; 699 ioc->last_waited = jiffies;
704} 700}
705 701
706static void __freed_request(struct request_queue *q, int rw) 702static void __freed_request(struct request_queue *q, int sync)
707{ 703{
708 struct request_list *rl = &q->rq; 704 struct request_list *rl = &q->rq;
709 705
710 if (rl->count[rw] < queue_congestion_off_threshold(q)) 706 if (rl->count[sync] < queue_congestion_off_threshold(q))
711 blk_clear_queue_congested(q, rw); 707 blk_clear_queue_congested(q, sync);
712 708
713 if (rl->count[rw] + 1 <= q->nr_requests) { 709 if (rl->count[sync] + 1 <= q->nr_requests) {
714 if (waitqueue_active(&rl->wait[rw])) 710 if (waitqueue_active(&rl->wait[sync]))
715 wake_up(&rl->wait[rw]); 711 wake_up(&rl->wait[sync]);
716 712
717 blk_clear_queue_full(q, rw); 713 blk_clear_queue_full(q, sync);
718 } 714 }
719} 715}
720 716
@@ -722,21 +718,20 @@ static void __freed_request(struct request_queue *q, int rw)
722 * A request has just been released. Account for it, update the full and 718 * A request has just been released. Account for it, update the full and
723 * congestion status, wake up any waiters. Called under q->queue_lock. 719 * congestion status, wake up any waiters. Called under q->queue_lock.
724 */ 720 */
725static void freed_request(struct request_queue *q, int rw, int priv) 721static void freed_request(struct request_queue *q, int sync, int priv)
726{ 722{
727 struct request_list *rl = &q->rq; 723 struct request_list *rl = &q->rq;
728 724
729 rl->count[rw]--; 725 rl->count[sync]--;
730 if (priv) 726 if (priv)
731 rl->elvpriv--; 727 rl->elvpriv--;
732 728
733 __freed_request(q, rw); 729 __freed_request(q, sync);
734 730
735 if (unlikely(rl->starved[rw ^ 1])) 731 if (unlikely(rl->starved[sync ^ 1]))
736 __freed_request(q, rw ^ 1); 732 __freed_request(q, sync ^ 1);
737} 733}
738 734
739#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
740/* 735/*
741 * Get a free request, queue_lock must be held. 736 * Get a free request, queue_lock must be held.
742 * Returns NULL on failure, with queue_lock held. 737 * Returns NULL on failure, with queue_lock held.
@@ -748,15 +743,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
748 struct request *rq = NULL; 743 struct request *rq = NULL;
749 struct request_list *rl = &q->rq; 744 struct request_list *rl = &q->rq;
750 struct io_context *ioc = NULL; 745 struct io_context *ioc = NULL;
751 const int rw = rw_flags & 0x01; 746 const bool is_sync = rw_is_sync(rw_flags) != 0;
752 int may_queue, priv; 747 int may_queue, priv;
753 748
754 may_queue = elv_may_queue(q, rw_flags); 749 may_queue = elv_may_queue(q, rw_flags);
755 if (may_queue == ELV_MQUEUE_NO) 750 if (may_queue == ELV_MQUEUE_NO)
756 goto rq_starved; 751 goto rq_starved;
757 752
758 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 753 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
759 if (rl->count[rw]+1 >= q->nr_requests) { 754 if (rl->count[is_sync]+1 >= q->nr_requests) {
760 ioc = current_io_context(GFP_ATOMIC, q->node); 755 ioc = current_io_context(GFP_ATOMIC, q->node);
761 /* 756 /*
762 * The queue will fill after this allocation, so set 757 * The queue will fill after this allocation, so set
@@ -764,9 +759,9 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
764 * This process will be allowed to complete a batch of 759 * This process will be allowed to complete a batch of
765 * requests, others will be blocked. 760 * requests, others will be blocked.
766 */ 761 */
767 if (!blk_queue_full(q, rw)) { 762 if (!blk_queue_full(q, is_sync)) {
768 ioc_set_batching(q, ioc); 763 ioc_set_batching(q, ioc);
769 blk_set_queue_full(q, rw); 764 blk_set_queue_full(q, is_sync);
770 } else { 765 } else {
771 if (may_queue != ELV_MQUEUE_MUST 766 if (may_queue != ELV_MQUEUE_MUST
772 && !ioc_batching(q, ioc)) { 767 && !ioc_batching(q, ioc)) {
@@ -779,7 +774,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
779 } 774 }
780 } 775 }
781 } 776 }
782 blk_set_queue_congested(q, rw); 777 blk_set_queue_congested(q, is_sync);
783 } 778 }
784 779
785 /* 780 /*
@@ -787,11 +782,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
787 * limit of requests, otherwise we could have thousands of requests 782 * limit of requests, otherwise we could have thousands of requests
788 * allocated with any setting of ->nr_requests 783 * allocated with any setting of ->nr_requests
789 */ 784 */
790 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 785 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
791 goto out; 786 goto out;
792 787
793 rl->count[rw]++; 788 rl->count[is_sync]++;
794 rl->starved[rw] = 0; 789 rl->starved[is_sync] = 0;
795 790
796 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 791 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
797 if (priv) 792 if (priv)
@@ -809,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
809 * wait queue, but this is pretty rare. 804 * wait queue, but this is pretty rare.
810 */ 805 */
811 spin_lock_irq(q->queue_lock); 806 spin_lock_irq(q->queue_lock);
812 freed_request(q, rw, priv); 807 freed_request(q, is_sync, priv);
813 808
814 /* 809 /*
815 * in the very unlikely event that allocation failed and no 810 * in the very unlikely event that allocation failed and no
@@ -819,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
819 * rq mempool into READ and WRITE 814 * rq mempool into READ and WRITE
820 */ 815 */
821rq_starved: 816rq_starved:
822 if (unlikely(rl->count[rw] == 0)) 817 if (unlikely(rl->count[is_sync] == 0))
823 rl->starved[rw] = 1; 818 rl->starved[is_sync] = 1;
824 819
825 goto out; 820 goto out;
826 } 821 }
@@ -834,7 +829,7 @@ rq_starved:
834 if (ioc_batching(q, ioc)) 829 if (ioc_batching(q, ioc))
835 ioc->nr_batch_requests--; 830 ioc->nr_batch_requests--;
836 831
837 trace_block_getrq(q, bio, rw); 832 trace_block_getrq(q, bio, rw_flags & 1);
838out: 833out:
839 return rq; 834 return rq;
840} 835}
@@ -848,7 +843,7 @@ out:
848static struct request *get_request_wait(struct request_queue *q, int rw_flags, 843static struct request *get_request_wait(struct request_queue *q, int rw_flags,
849 struct bio *bio) 844 struct bio *bio)
850{ 845{
851 const int rw = rw_flags & 0x01; 846 const bool is_sync = rw_is_sync(rw_flags) != 0;
852 struct request *rq; 847 struct request *rq;
853 848
854 rq = get_request(q, rw_flags, bio, GFP_NOIO); 849 rq = get_request(q, rw_flags, bio, GFP_NOIO);
@@ -857,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
857 struct io_context *ioc; 852 struct io_context *ioc;
858 struct request_list *rl = &q->rq; 853 struct request_list *rl = &q->rq;
859 854
860 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 855 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
861 TASK_UNINTERRUPTIBLE); 856 TASK_UNINTERRUPTIBLE);
862 857
863 trace_block_sleeprq(q, bio, rw); 858 trace_block_sleeprq(q, bio, rw_flags & 1);
864 859
865 __generic_unplug_device(q); 860 __generic_unplug_device(q);
866 spin_unlock_irq(q->queue_lock); 861 spin_unlock_irq(q->queue_lock);
@@ -876,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
876 ioc_set_batching(q, ioc); 871 ioc_set_batching(q, ioc);
877 872
878 spin_lock_irq(q->queue_lock); 873 spin_lock_irq(q->queue_lock);
879 finish_wait(&rl->wait[rw], &wait); 874 finish_wait(&rl->wait[is_sync], &wait);
880 875
881 rq = get_request(q, rw_flags, bio, GFP_NOIO); 876 rq = get_request(q, rw_flags, bio, GFP_NOIO);
882 }; 877 };
@@ -1067,19 +1062,22 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1067 1062
1068 elv_completed_request(q, req); 1063 elv_completed_request(q, req);
1069 1064
1065 /* this is a bio leak */
1066 WARN_ON(req->bio != NULL);
1067
1070 /* 1068 /*
1071 * Request may not have originated from ll_rw_blk. if not, 1069 * Request may not have originated from ll_rw_blk. if not,
1072 * it didn't come out of our reserved rq pools 1070 * it didn't come out of our reserved rq pools
1073 */ 1071 */
1074 if (req->cmd_flags & REQ_ALLOCED) { 1072 if (req->cmd_flags & REQ_ALLOCED) {
1075 int rw = rq_data_dir(req); 1073 int is_sync = rq_is_sync(req) != 0;
1076 int priv = req->cmd_flags & REQ_ELVPRIV; 1074 int priv = req->cmd_flags & REQ_ELVPRIV;
1077 1075
1078 BUG_ON(!list_empty(&req->queuelist)); 1076 BUG_ON(!list_empty(&req->queuelist));
1079 BUG_ON(!hlist_unhashed(&req->hash)); 1077 BUG_ON(!hlist_unhashed(&req->hash));
1080 1078
1081 blk_free_request(q, req); 1079 blk_free_request(q, req);
1082 freed_request(q, rw, priv); 1080 freed_request(q, is_sync, priv);
1083 } 1081 }
1084} 1082}
1085EXPORT_SYMBOL_GPL(__blk_put_request); 1083EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1126,10 +1124,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1126 1124
1127 if (bio_sync(bio)) 1125 if (bio_sync(bio))
1128 req->cmd_flags |= REQ_RW_SYNC; 1126 req->cmd_flags |= REQ_RW_SYNC;
1129 if (bio_unplug(bio))
1130 req->cmd_flags |= REQ_UNPLUG;
1131 if (bio_rw_meta(bio)) 1127 if (bio_rw_meta(bio))
1132 req->cmd_flags |= REQ_RW_META; 1128 req->cmd_flags |= REQ_RW_META;
1129 if (bio_noidle(bio))
1130 req->cmd_flags |= REQ_NOIDLE;
1133 1131
1134 req->errors = 0; 1132 req->errors = 0;
1135 req->hard_sector = req->sector = bio->bi_sector; 1133 req->hard_sector = req->sector = bio->bi_sector;
@@ -1138,6 +1136,15 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1138 blk_rq_bio_prep(req->q, req, bio); 1136 blk_rq_bio_prep(req->q, req, bio);
1139} 1137}
1140 1138
1139/*
1140 * Only disabling plugging for non-rotational devices if it does tagging
1141 * as well, otherwise we do need the proper merging
1142 */
1143static inline bool queue_should_plug(struct request_queue *q)
1144{
1145 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1146}
1147
1141static int __make_request(struct request_queue *q, struct bio *bio) 1148static int __make_request(struct request_queue *q, struct bio *bio)
1142{ 1149{
1143 struct request *req; 1150 struct request *req;
@@ -1244,11 +1251,11 @@ get_rq:
1244 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1251 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1245 bio_flagged(bio, BIO_CPU_AFFINE)) 1252 bio_flagged(bio, BIO_CPU_AFFINE))
1246 req->cpu = blk_cpu_to_group(smp_processor_id()); 1253 req->cpu = blk_cpu_to_group(smp_processor_id());
1247 if (!blk_queue_nonrot(q) && elv_queue_empty(q)) 1254 if (queue_should_plug(q) && elv_queue_empty(q))
1248 blk_plug_device(q); 1255 blk_plug_device(q);
1249 add_request(q, req); 1256 add_request(q, req);
1250out: 1257out:
1251 if (unplug || blk_queue_nonrot(q)) 1258 if (unplug || !queue_should_plug(q))
1252 __generic_unplug_device(q); 1259 __generic_unplug_device(q);
1253 spin_unlock_irq(q->queue_lock); 1260 spin_unlock_irq(q->queue_lock);
1254 return 0; 1261 return 0;
@@ -1666,9 +1673,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request);
1666 1673
1667static void blk_account_io_completion(struct request *req, unsigned int bytes) 1674static void blk_account_io_completion(struct request *req, unsigned int bytes)
1668{ 1675{
1669 struct gendisk *disk = req->rq_disk; 1676 if (!blk_do_io_stat(req))
1670
1671 if (!disk || !blk_do_io_stat(disk->queue))
1672 return; 1677 return;
1673 1678
1674 if (blk_fs_request(req)) { 1679 if (blk_fs_request(req)) {
@@ -1685,9 +1690,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
1685 1690
1686static void blk_account_io_done(struct request *req) 1691static void blk_account_io_done(struct request *req)
1687{ 1692{
1688 struct gendisk *disk = req->rq_disk; 1693 if (!blk_do_io_stat(req))
1689
1690 if (!disk || !blk_do_io_stat(disk->queue))
1691 return; 1694 return;
1692 1695
1693 /* 1696 /*
@@ -1702,7 +1705,7 @@ static void blk_account_io_done(struct request *req)
1702 int cpu; 1705 int cpu;
1703 1706
1704 cpu = part_stat_lock(); 1707 cpu = part_stat_lock();
1705 part = disk_map_sector_rcu(disk, req->sector); 1708 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1706 1709
1707 part_stat_inc(cpu, part, ios[rw]); 1710 part_stat_inc(cpu, part, ios[rw]);
1708 part_stat_add(cpu, part, ticks[rw], duration); 1711 part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5a244f05360f..63760ca3da0f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
338 return 1; 338 return 1;
339} 339}
340 340
341static void blk_account_io_merge(struct request *req)
342{
343 if (blk_do_io_stat(req)) {
344 struct hd_struct *part;
345 int cpu;
346
347 cpu = part_stat_lock();
348 part = disk_map_sector_rcu(req->rq_disk, req->sector);
349
350 part_round_stats(cpu, part);
351 part_dec_in_flight(part);
352
353 part_stat_unlock();
354 }
355}
356
341/* 357/*
342 * Has to be called with the request spinlock acquired 358 * Has to be called with the request spinlock acquired
343 */ 359 */
@@ -386,23 +402,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
386 402
387 elv_merge_requests(q, req, next); 403 elv_merge_requests(q, req, next);
388 404
389 if (req->rq_disk) { 405 blk_account_io_merge(req);
390 struct hd_struct *part;
391 int cpu;
392
393 cpu = part_stat_lock();
394 part = disk_map_sector_rcu(req->rq_disk, req->sector);
395
396 part_round_stats(cpu, part);
397 part_dec_in_flight(part);
398
399 part_stat_unlock();
400 }
401 406
402 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 407 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
403 if (blk_rq_cpu_valid(next)) 408 if (blk_rq_cpu_valid(next))
404 req->cpu = next->cpu; 409 req->cpu = next->cpu;
405 410
411 /* owner-ship of bio passed from next to req */
412 next->bio = NULL;
406 __blk_put_request(q, next); 413 __blk_put_request(q, next);
407 return 1; 414 return 1;
408} 415}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 59fd05d9f1d5..69c42adde52b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
431 * 431 *
432 * description: 432 * description:
433 * set required memory and length alignment for direct dma transactions. 433 * set required memory and length alignment for direct dma transactions.
434 * this is used when buiding direct io requests for the queue. 434 * this is used when building direct io requests for the queue.
435 * 435 *
436 **/ 436 **/
437void blk_queue_dma_alignment(struct request_queue *q, int mask) 437void blk_queue_dma_alignment(struct request_queue *q, int mask)
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ce0efc6b26dc..ee9c21602228 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
64 data->info = rq; 64 data->info = rq;
65 data->flags = 0; 65 data->flags = 0;
66 66
67 __smp_call_function_single(cpu, data); 67 __smp_call_function_single(cpu, data, 0);
68 return 0; 68 return 0;
69 } 69 }
70 70
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e29ddfc73cf4..73f36beff5cd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
48 q->nr_requests = nr; 48 q->nr_requests = nr;
49 blk_queue_congestion_threshold(q); 49 blk_queue_congestion_threshold(q);
50 50
51 if (rl->count[READ] >= queue_congestion_on_threshold(q)) 51 if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
52 blk_set_queue_congested(q, READ); 52 blk_set_queue_congested(q, BLK_RW_SYNC);
53 else if (rl->count[READ] < queue_congestion_off_threshold(q)) 53 else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
54 blk_clear_queue_congested(q, READ); 54 blk_clear_queue_congested(q, BLK_RW_SYNC);
55 55
56 if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) 56 if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
57 blk_set_queue_congested(q, WRITE); 57 blk_set_queue_congested(q, BLK_RW_ASYNC);
58 else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) 58 else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
59 blk_clear_queue_congested(q, WRITE); 59 blk_clear_queue_congested(q, BLK_RW_ASYNC);
60 60
61 if (rl->count[READ] >= q->nr_requests) { 61 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
62 blk_set_queue_full(q, READ); 62 blk_set_queue_full(q, BLK_RW_SYNC);
63 } else if (rl->count[READ]+1 <= q->nr_requests) { 63 } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
64 blk_clear_queue_full(q, READ); 64 blk_clear_queue_full(q, BLK_RW_SYNC);
65 wake_up(&rl->wait[READ]); 65 wake_up(&rl->wait[BLK_RW_SYNC]);
66 } 66 }
67 67
68 if (rl->count[WRITE] >= q->nr_requests) { 68 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
69 blk_set_queue_full(q, WRITE); 69 blk_set_queue_full(q, BLK_RW_ASYNC);
70 } else if (rl->count[WRITE]+1 <= q->nr_requests) { 70 } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
71 blk_clear_queue_full(q, WRITE); 71 blk_clear_queue_full(q, BLK_RW_ASYNC);
72 wake_up(&rl->wait[WRITE]); 72 wake_up(&rl->wait[BLK_RW_ASYNC]);
73 } 73 }
74 spin_unlock_irq(q->queue_lock); 74 spin_unlock_irq(q->queue_lock);
75 return ret; 75 return ret;
@@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
209 ssize_t ret = queue_var_store(&stats, page, count); 209 ssize_t ret = queue_var_store(&stats, page, count);
210 210
211 spin_lock_irq(q->queue_lock); 211 spin_lock_irq(q->queue_lock);
212 elv_quisce_start(q);
213
212 if (stats) 214 if (stats)
213 queue_flag_set(QUEUE_FLAG_IO_STAT, q); 215 queue_flag_set(QUEUE_FLAG_IO_STAT, q);
214 else 216 else
215 queue_flag_clear(QUEUE_FLAG_IO_STAT, q); 217 queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
218
219 elv_quisce_end(q);
216 spin_unlock_irq(q->queue_lock); 220 spin_unlock_irq(q->queue_lock);
217 221
218 return ret; 222 return ret;
diff --git a/block/blk.h b/block/blk.h
index 0dce92c37496..24fcaeeaf620 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,6 +70,10 @@ void blk_queue_congestion_threshold(struct request_queue *q);
70 70
71int blk_dev_init(void); 71int blk_dev_init(void);
72 72
73void elv_quisce_start(struct request_queue *q);
74void elv_quisce_end(struct request_queue *q);
75
76
73/* 77/*
74 * Return the threshold (number of used requests) at which the queue is 78 * Return the threshold (number of used requests) at which the queue is
75 * considered to be congested. It include a little hysteresis to keep the 79 * considered to be congested. It include a little hysteresis to keep the
@@ -102,18 +106,20 @@ static inline int blk_cpu_to_group(int cpu)
102 const struct cpumask *mask = cpu_coregroup_mask(cpu); 106 const struct cpumask *mask = cpu_coregroup_mask(cpu);
103 return cpumask_first(mask); 107 return cpumask_first(mask);
104#elif defined(CONFIG_SCHED_SMT) 108#elif defined(CONFIG_SCHED_SMT)
105 return first_cpu(per_cpu(cpu_sibling_map, cpu)); 109 return cpumask_first(topology_thread_cpumask(cpu));
106#else 110#else
107 return cpu; 111 return cpu;
108#endif 112#endif
109} 113}
110 114
111static inline int blk_do_io_stat(struct request_queue *q) 115static inline int blk_do_io_stat(struct request *rq)
112{ 116{
113 if (q) 117 struct gendisk *disk = rq->rq_disk;
114 return blk_queue_io_stat(q);
115 118
116 return 0; 119 if (!disk || !disk->queue)
120 return 0;
121
122 return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
117} 123}
118 124
119#endif 125#endif
diff --git a/block/bsg.c b/block/bsg.c
index 0ce8806dd0c1..206060e795da 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -218,9 +218,6 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
218 218
219 if (hdr->guard != 'Q') 219 if (hdr->guard != 'Q')
220 return -EINVAL; 220 return -EINVAL;
221 if (hdr->dout_xfer_len > (q->max_sectors << 9) ||
222 hdr->din_xfer_len > (q->max_sectors << 9))
223 return -EIO;
224 221
225 switch (hdr->protocol) { 222 switch (hdr->protocol) {
226 case BSG_PROTOCOL_SCSI: 223 case BSG_PROTOCOL_SCSI:
@@ -353,6 +350,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
353static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, 350static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
354 struct bsg_command *bc, struct request *rq) 351 struct bsg_command *bc, struct request *rq)
355{ 352{
353 int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
354
356 /* 355 /*
357 * add bc command to busy queue and submit rq for io 356 * add bc command to busy queue and submit rq for io
358 */ 357 */
@@ -368,7 +367,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
368 dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); 367 dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
369 368
370 rq->end_io_data = bc; 369 rq->end_io_data = bc;
371 blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io); 370 blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
372} 371}
373 372
374static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) 373static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
@@ -924,6 +923,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
924 struct request *rq; 923 struct request *rq;
925 struct bio *bio, *bidi_bio = NULL; 924 struct bio *bio, *bidi_bio = NULL;
926 struct sg_io_v4 hdr; 925 struct sg_io_v4 hdr;
926 int at_head;
927 u8 sense[SCSI_SENSE_BUFFERSIZE]; 927 u8 sense[SCSI_SENSE_BUFFERSIZE];
928 928
929 if (copy_from_user(&hdr, uarg, sizeof(hdr))) 929 if (copy_from_user(&hdr, uarg, sizeof(hdr)))
@@ -936,7 +936,9 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
936 bio = rq->bio; 936 bio = rq->bio;
937 if (rq->next_rq) 937 if (rq->next_rq)
938 bidi_bio = rq->next_rq->bio; 938 bidi_bio = rq->next_rq->bio;
939 blk_execute_rq(bd->queue, NULL, rq, 0); 939
940 at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
941 blk_execute_rq(bd->queue, NULL, rq, at_head);
940 ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio); 942 ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
941 943
942 if (copy_to_user(uarg, &hdr, sizeof(hdr))) 944 if (copy_to_user(uarg, &hdr, sizeof(hdr)))
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 664ebfd092ec..a4809de6fea6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -160,6 +160,7 @@ struct cfq_queue {
160 160
161 unsigned long slice_end; 161 unsigned long slice_end;
162 long slice_resid; 162 long slice_resid;
163 unsigned int slice_dispatch;
163 164
164 /* pending metadata requests */ 165 /* pending metadata requests */
165 int meta_pending; 166 int meta_pending;
@@ -176,13 +177,12 @@ struct cfq_queue {
176enum cfqq_state_flags { 177enum cfqq_state_flags {
177 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 178 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
178 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 179 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
180 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
179 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ 181 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
180 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 182 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
181 CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */
182 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 183 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
183 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 184 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
184 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 185 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
185 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */
186 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 186 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
187 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 187 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
188}; 188};
@@ -203,13 +203,12 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
203 203
204CFQ_CFQQ_FNS(on_rr); 204CFQ_CFQQ_FNS(on_rr);
205CFQ_CFQQ_FNS(wait_request); 205CFQ_CFQQ_FNS(wait_request);
206CFQ_CFQQ_FNS(must_dispatch);
206CFQ_CFQQ_FNS(must_alloc); 207CFQ_CFQQ_FNS(must_alloc);
207CFQ_CFQQ_FNS(must_alloc_slice); 208CFQ_CFQQ_FNS(must_alloc_slice);
208CFQ_CFQQ_FNS(must_dispatch);
209CFQ_CFQQ_FNS(fifo_expire); 209CFQ_CFQQ_FNS(fifo_expire);
210CFQ_CFQQ_FNS(idle_window); 210CFQ_CFQQ_FNS(idle_window);
211CFQ_CFQQ_FNS(prio_changed); 211CFQ_CFQQ_FNS(prio_changed);
212CFQ_CFQQ_FNS(queue_new);
213CFQ_CFQQ_FNS(slice_new); 212CFQ_CFQQ_FNS(slice_new);
214CFQ_CFQQ_FNS(sync); 213CFQ_CFQQ_FNS(sync);
215#undef CFQ_CFQQ_FNS 214#undef CFQ_CFQQ_FNS
@@ -774,10 +773,15 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
774 if (cfqq) { 773 if (cfqq) {
775 cfq_log_cfqq(cfqd, cfqq, "set_active"); 774 cfq_log_cfqq(cfqd, cfqq, "set_active");
776 cfqq->slice_end = 0; 775 cfqq->slice_end = 0;
776 cfqq->slice_dispatch = 0;
777
778 cfq_clear_cfqq_wait_request(cfqq);
779 cfq_clear_cfqq_must_dispatch(cfqq);
777 cfq_clear_cfqq_must_alloc_slice(cfqq); 780 cfq_clear_cfqq_must_alloc_slice(cfqq);
778 cfq_clear_cfqq_fifo_expire(cfqq); 781 cfq_clear_cfqq_fifo_expire(cfqq);
779 cfq_mark_cfqq_slice_new(cfqq); 782 cfq_mark_cfqq_slice_new(cfqq);
780 cfq_clear_cfqq_queue_new(cfqq); 783
784 del_timer(&cfqd->idle_slice_timer);
781 } 785 }
782 786
783 cfqd->active_queue = cfqq; 787 cfqd->active_queue = cfqq;
@@ -795,7 +799,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
795 if (cfq_cfqq_wait_request(cfqq)) 799 if (cfq_cfqq_wait_request(cfqq))
796 del_timer(&cfqd->idle_slice_timer); 800 del_timer(&cfqd->idle_slice_timer);
797 801
798 cfq_clear_cfqq_must_dispatch(cfqq);
799 cfq_clear_cfqq_wait_request(cfqq); 802 cfq_clear_cfqq_wait_request(cfqq);
800 803
801 /* 804 /*
@@ -924,7 +927,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
924 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) 927 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
925 return; 928 return;
926 929
927 cfq_mark_cfqq_must_dispatch(cfqq);
928 cfq_mark_cfqq_wait_request(cfqq); 930 cfq_mark_cfqq_wait_request(cfqq);
929 931
930 /* 932 /*
@@ -1010,7 +1012,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1010 /* 1012 /*
1011 * The active queue has run out of time, expire it and select new. 1013 * The active queue has run out of time, expire it and select new.
1012 */ 1014 */
1013 if (cfq_slice_used(cfqq)) 1015 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
1014 goto expire; 1016 goto expire;
1015 1017
1016 /* 1018 /*
@@ -1053,66 +1055,6 @@ keep_queue:
1053 return cfqq; 1055 return cfqq;
1054} 1056}
1055 1057
1056/*
1057 * Dispatch some requests from cfqq, moving them to the request queue
1058 * dispatch list.
1059 */
1060static int
1061__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1062 int max_dispatch)
1063{
1064 int dispatched = 0;
1065
1066 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1067
1068 do {
1069 struct request *rq;
1070
1071 /*
1072 * follow expired path, else get first next available
1073 */
1074 rq = cfq_check_fifo(cfqq);
1075 if (rq == NULL)
1076 rq = cfqq->next_rq;
1077
1078 /*
1079 * finally, insert request into driver dispatch list
1080 */
1081 cfq_dispatch_insert(cfqd->queue, rq);
1082
1083 dispatched++;
1084
1085 if (!cfqd->active_cic) {
1086 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
1087 cfqd->active_cic = RQ_CIC(rq);
1088 }
1089
1090 if (RB_EMPTY_ROOT(&cfqq->sort_list))
1091 break;
1092
1093 /*
1094 * If there is a non-empty RT cfqq waiting for current
1095 * cfqq's timeslice to complete, pre-empt this cfqq
1096 */
1097 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues)
1098 break;
1099
1100 } while (dispatched < max_dispatch);
1101
1102 /*
1103 * expire an async queue immediately if it has used up its slice. idle
1104 * queue always expire after 1 dispatch round.
1105 */
1106 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
1107 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1108 cfq_class_idle(cfqq))) {
1109 cfqq->slice_end = jiffies + 1;
1110 cfq_slice_expired(cfqd, 0);
1111 }
1112
1113 return dispatched;
1114}
1115
1116static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 1058static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
1117{ 1059{
1118 int dispatched = 0; 1060 int dispatched = 0;
@@ -1146,11 +1088,45 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1146 return dispatched; 1088 return dispatched;
1147} 1089}
1148 1090
1091/*
1092 * Dispatch a request from cfqq, moving them to the request queue
1093 * dispatch list.
1094 */
1095static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1096{
1097 struct request *rq;
1098
1099 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1100
1101 /*
1102 * follow expired path, else get first next available
1103 */
1104 rq = cfq_check_fifo(cfqq);
1105 if (!rq)
1106 rq = cfqq->next_rq;
1107
1108 /*
1109 * insert request into driver dispatch list
1110 */
1111 cfq_dispatch_insert(cfqd->queue, rq);
1112
1113 if (!cfqd->active_cic) {
1114 struct cfq_io_context *cic = RQ_CIC(rq);
1115
1116 atomic_inc(&cic->ioc->refcount);
1117 cfqd->active_cic = cic;
1118 }
1119}
1120
1121/*
1122 * Find the cfqq that we need to service and move a request from that to the
1123 * dispatch list
1124 */
1149static int cfq_dispatch_requests(struct request_queue *q, int force) 1125static int cfq_dispatch_requests(struct request_queue *q, int force)
1150{ 1126{
1151 struct cfq_data *cfqd = q->elevator->elevator_data; 1127 struct cfq_data *cfqd = q->elevator->elevator_data;
1152 struct cfq_queue *cfqq; 1128 struct cfq_queue *cfqq;
1153 int dispatched; 1129 unsigned int max_dispatch;
1154 1130
1155 if (!cfqd->busy_queues) 1131 if (!cfqd->busy_queues)
1156 return 0; 1132 return 0;
@@ -1158,29 +1134,63 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1158 if (unlikely(force)) 1134 if (unlikely(force))
1159 return cfq_forced_dispatch(cfqd); 1135 return cfq_forced_dispatch(cfqd);
1160 1136
1161 dispatched = 0; 1137 cfqq = cfq_select_queue(cfqd);
1162 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 1138 if (!cfqq)
1163 int max_dispatch; 1139 return 0;
1140
1141 /*
1142 * If this is an async queue and we have sync IO in flight, let it wait
1143 */
1144 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
1145 return 0;
1146
1147 max_dispatch = cfqd->cfq_quantum;
1148 if (cfq_class_idle(cfqq))
1149 max_dispatch = 1;
1164 1150
1165 max_dispatch = cfqd->cfq_quantum; 1151 /*
1152 * Does this cfqq already have too much IO in flight?
1153 */
1154 if (cfqq->dispatched >= max_dispatch) {
1155 /*
1156 * idle queue must always only have a single IO in flight
1157 */
1166 if (cfq_class_idle(cfqq)) 1158 if (cfq_class_idle(cfqq))
1167 max_dispatch = 1; 1159 return 0;
1168 1160
1169 if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1) 1161 /*
1170 break; 1162 * We have other queues, don't allow more IO from this one
1163 */
1164 if (cfqd->busy_queues > 1)
1165 return 0;
1171 1166
1172 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1167 /*
1173 break; 1168 * we are the only queue, allow up to 4 times of 'quantum'
1169 */
1170 if (cfqq->dispatched >= 4 * max_dispatch)
1171 return 0;
1172 }
1174 1173
1175 cfq_clear_cfqq_must_dispatch(cfqq); 1174 /*
1176 cfq_clear_cfqq_wait_request(cfqq); 1175 * Dispatch a request from this cfqq
1177 del_timer(&cfqd->idle_slice_timer); 1176 */
1177 cfq_dispatch_request(cfqd, cfqq);
1178 cfqq->slice_dispatch++;
1179 cfq_clear_cfqq_must_dispatch(cfqq);
1178 1180
1179 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1181 /*
1182 * expire an async queue immediately if it has used up its slice. idle
1183 * queue always expire after 1 dispatch round.
1184 */
1185 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
1186 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1187 cfq_class_idle(cfqq))) {
1188 cfqq->slice_end = jiffies + 1;
1189 cfq_slice_expired(cfqd, 0);
1180 } 1190 }
1181 1191
1182 cfq_log(cfqd, "dispatched=%d", dispatched); 1192 cfq_log(cfqd, "dispatched a request");
1183 return dispatched; 1193 return 1;
1184} 1194}
1185 1195
1186/* 1196/*
@@ -1506,7 +1516,6 @@ retry:
1506 cfqq->cfqd = cfqd; 1516 cfqq->cfqd = cfqd;
1507 1517
1508 cfq_mark_cfqq_prio_changed(cfqq); 1518 cfq_mark_cfqq_prio_changed(cfqq);
1509 cfq_mark_cfqq_queue_new(cfqq);
1510 1519
1511 cfq_init_prio_data(cfqq, ioc); 1520 cfq_init_prio_data(cfqq, ioc);
1512 1521
@@ -1893,15 +1902,13 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1893 1902
1894 if (cfqq == cfqd->active_queue) { 1903 if (cfqq == cfqd->active_queue) {
1895 /* 1904 /*
1896 * if we are waiting for a request for this queue, let it rip 1905 * Remember that we saw a request from this process, but
1897 * immediately and flag that we must not expire this queue 1906 * don't start queuing just yet. Otherwise we risk seeing lots
1898 * just now 1907 * of tiny requests, because we disrupt the normal plugging
1908 * and merging.
1899 */ 1909 */
1900 if (cfq_cfqq_wait_request(cfqq)) { 1910 if (cfq_cfqq_wait_request(cfqq))
1901 cfq_mark_cfqq_must_dispatch(cfqq); 1911 cfq_mark_cfqq_must_dispatch(cfqq);
1902 del_timer(&cfqd->idle_slice_timer);
1903 blk_start_queueing(cfqd->queue);
1904 }
1905 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 1912 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1906 /* 1913 /*
1907 * not the active queue - expire current slice if it is 1914 * not the active queue - expire current slice if it is
@@ -1910,7 +1917,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1910 * this new queue is RT and the current one is BE 1917 * this new queue is RT and the current one is BE
1911 */ 1918 */
1912 cfq_preempt_queue(cfqd, cfqq); 1919 cfq_preempt_queue(cfqd, cfqq);
1913 cfq_mark_cfqq_must_dispatch(cfqq);
1914 blk_start_queueing(cfqd->queue); 1920 blk_start_queueing(cfqd->queue);
1915 } 1921 }
1916} 1922}
@@ -1992,8 +1998,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1992 } 1998 }
1993 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 1999 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
1994 cfq_slice_expired(cfqd, 1); 2000 cfq_slice_expired(cfqd, 1);
1995 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) 2001 else if (sync && !rq_noidle(rq) &&
2002 RB_EMPTY_ROOT(&cfqq->sort_list)) {
1996 cfq_arm_slice_timer(cfqd); 2003 cfq_arm_slice_timer(cfqd);
2004 }
1997 } 2005 }
1998 2006
1999 if (!cfqd->rq_in_driver) 2007 if (!cfqd->rq_in_driver)
@@ -2170,6 +2178,12 @@ static void cfq_idle_slice_timer(unsigned long data)
2170 timed_out = 0; 2178 timed_out = 0;
2171 2179
2172 /* 2180 /*
2181 * We saw a request before the queue expired, let it through
2182 */
2183 if (cfq_cfqq_must_dispatch(cfqq))
2184 goto out_kick;
2185
2186 /*
2173 * expired 2187 * expired
2174 */ 2188 */
2175 if (cfq_slice_used(cfqq)) 2189 if (cfq_slice_used(cfqq))
@@ -2185,10 +2199,8 @@ static void cfq_idle_slice_timer(unsigned long data)
2185 /* 2199 /*
2186 * not expired and it has a request pending, let it dispatch 2200 * not expired and it has a request pending, let it dispatch
2187 */ 2201 */
2188 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { 2202 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
2189 cfq_mark_cfqq_must_dispatch(cfqq);
2190 goto out_kick; 2203 goto out_kick;
2191 }
2192 } 2204 }
2193expire: 2205expire:
2194 cfq_slice_expired(cfqd, timed_out); 2206 cfq_slice_expired(cfqd, timed_out);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 504b275e1b90..572bbc2f900d 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -22,6 +22,7 @@
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/capability.h> 23#include <linux/capability.h>
24#include <linux/bitops.h> 24#include <linux/bitops.h>
25#include <linux/blkdev.h>
25 26
26#include <scsi/scsi.h> 27#include <scsi/scsi.h>
27#include <linux/cdrom.h> 28#include <linux/cdrom.h>
diff --git a/block/elevator.c b/block/elevator.c
index 98259eda0ef6..fb81bcc14a8c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
573 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 573 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
574} 574}
575 575
576static void elv_drain_elevator(struct request_queue *q) 576void elv_drain_elevator(struct request_queue *q)
577{ 577{
578 static int printed; 578 static int printed;
579 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 579 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
@@ -587,6 +587,31 @@ static void elv_drain_elevator(struct request_queue *q)
587 } 587 }
588} 588}
589 589
590/*
591 * Call with queue lock held, interrupts disabled
592 */
593void elv_quisce_start(struct request_queue *q)
594{
595 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
596
597 /*
598 * make sure we don't have any requests in flight
599 */
600 elv_drain_elevator(q);
601 while (q->rq.elvpriv) {
602 blk_start_queueing(q);
603 spin_unlock_irq(q->queue_lock);
604 msleep(10);
605 spin_lock_irq(q->queue_lock);
606 elv_drain_elevator(q);
607 }
608}
609
610void elv_quisce_end(struct request_queue *q)
611{
612 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
613}
614
590void elv_insert(struct request_queue *q, struct request *rq, int where) 615void elv_insert(struct request_queue *q, struct request *rq, int where)
591{ 616{
592 struct list_head *pos; 617 struct list_head *pos;
@@ -677,7 +702,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
677 } 702 }
678 703
679 if (unplug_it && blk_queue_plugged(q)) { 704 if (unplug_it && blk_queue_plugged(q)) {
680 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 705 int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
681 - q->in_flight; 706 - q->in_flight;
682 707
683 if (nrq >= q->unplug_thresh) 708 if (nrq >= q->unplug_thresh)
@@ -1101,18 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1101 * Turn on BYPASS and drain all requests w/ elevator private data 1126 * Turn on BYPASS and drain all requests w/ elevator private data
1102 */ 1127 */
1103 spin_lock_irq(q->queue_lock); 1128 spin_lock_irq(q->queue_lock);
1104 1129 elv_quisce_start(q);
1105 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
1106
1107 elv_drain_elevator(q);
1108
1109 while (q->rq.elvpriv) {
1110 blk_start_queueing(q);
1111 spin_unlock_irq(q->queue_lock);
1112 msleep(10);
1113 spin_lock_irq(q->queue_lock);
1114 elv_drain_elevator(q);
1115 }
1116 1130
1117 /* 1131 /*
1118 * Remember old elevator. 1132 * Remember old elevator.
@@ -1136,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1136 */ 1150 */
1137 elevator_exit(old_elevator); 1151 elevator_exit(old_elevator);
1138 spin_lock_irq(q->queue_lock); 1152 spin_lock_irq(q->queue_lock);
1139 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 1153 elv_quisce_end(q);
1140 spin_unlock_irq(q->queue_lock); 1154 spin_unlock_irq(q->queue_lock);
1141 1155
1142 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); 1156 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ee9c67d7e1be..626ee274c5c4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
214 return 0; 214 return 0;
215} 215}
216 216
217/*
218 * unmap a request that was previously mapped to this sg_io_hdr. handles
219 * both sg and non-sg sg_io_hdr.
220 */
221static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr)
222{
223 blk_rq_unmap_user(rq->bio);
224 blk_put_request(rq);
225 return 0;
226}
227
228static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, 217static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
229 struct bio *bio) 218 struct bio *bio)
230{ 219{
231 int r, ret = 0; 220 int ret = 0;
232 221
233 /* 222 /*
234 * fill in all the output members 223 * fill in all the output members
@@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
253 ret = -EFAULT; 242 ret = -EFAULT;
254 } 243 }
255 244
256 rq->bio = bio; 245 blk_rq_unmap_user(bio);
257 r = blk_unmap_sghdr_rq(rq, hdr); 246 blk_put_request(rq);
258 if (ret)
259 r = ret;
260 247
261 return r; 248 return ret;
262} 249}
263 250
264static int sg_io(struct request_queue *q, struct gendisk *bd_disk, 251static int sg_io(struct request_queue *q, struct gendisk *bd_disk,