aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2008-12-19 02:22:57 -0500
committerTakashi Iwai <tiwai@suse.de>2008-12-19 02:22:57 -0500
commit0ff555192a8d20385d49d1c420e2e8d409b3c0da (patch)
treeb6e4b6cae1028a310a3488ebf745954c51694bfc /block/blk-core.c
parent3218c178b41b420cb7e0d120c7a137a3969242e5 (diff)
parent9e43f0de690211cf7153b5f3ec251bc315647ada (diff)
Merge branch 'fix/hda' into topic/hda
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c666
1 files changed, 377 insertions, 289 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef97b2b..c36aa98fafa3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,8 +26,6 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/interrupt.h>
30#include <linux/cpu.h>
31#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
32#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
33 31
@@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep;
50 */ 48 */
51static struct workqueue_struct *kblockd_workqueue; 49static struct workqueue_struct *kblockd_workqueue;
52 50
53static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
54
55static void drive_stat_acct(struct request *rq, int new_io) 51static void drive_stat_acct(struct request *rq, int new_io)
56{ 52{
57 struct hd_struct *part; 53 struct hd_struct *part;
58 int rw = rq_data_dir(rq); 54 int rw = rq_data_dir(rq);
55 int cpu;
59 56
60 if (!blk_fs_request(rq) || !rq->rq_disk) 57 if (!blk_fs_request(rq) || !rq->rq_disk)
61 return; 58 return;
62 59
63 part = get_part(rq->rq_disk, rq->sector); 60 cpu = part_stat_lock();
61 part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
62
64 if (!new_io) 63 if (!new_io)
65 __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); 64 part_stat_inc(cpu, part, merges[rw]);
66 else { 65 else {
67 disk_round_stats(rq->rq_disk); 66 part_round_stats(cpu, part);
68 rq->rq_disk->in_flight++; 67 part_inc_in_flight(part);
69 if (part) {
70 part_round_stats(part);
71 part->in_flight++;
72 }
73 } 68 }
69
70 part_stat_unlock();
74} 71}
75 72
76void blk_queue_congestion_threshold(struct request_queue *q) 73void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
113 memset(rq, 0, sizeof(*rq)); 110 memset(rq, 0, sizeof(*rq));
114 111
115 INIT_LIST_HEAD(&rq->queuelist); 112 INIT_LIST_HEAD(&rq->queuelist);
116 INIT_LIST_HEAD(&rq->donelist); 113 INIT_LIST_HEAD(&rq->timeout_list);
114 rq->cpu = -1;
117 rq->q = q; 115 rq->q = q;
118 rq->sector = rq->hard_sector = (sector_t) -1; 116 rq->sector = rq->hard_sector = (sector_t) -1;
119 INIT_HLIST_NODE(&rq->hash); 117 INIT_HLIST_NODE(&rq->hash);
@@ -259,7 +257,6 @@ void __generic_unplug_device(struct request_queue *q)
259 257
260 q->request_fn(q); 258 q->request_fn(q);
261} 259}
262EXPORT_SYMBOL(__generic_unplug_device);
263 260
264/** 261/**
265 * generic_unplug_device - fire a request queue 262 * generic_unplug_device - fire a request queue
@@ -308,7 +305,7 @@ void blk_unplug_timeout(unsigned long data)
308 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 305 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
309 q->rq.count[READ] + q->rq.count[WRITE]); 306 q->rq.count[READ] + q->rq.count[WRITE]);
310 307
311 kblockd_schedule_work(&q->unplug_work); 308 kblockd_schedule_work(q, &q->unplug_work);
312} 309}
313 310
314void blk_unplug(struct request_queue *q) 311void blk_unplug(struct request_queue *q)
@@ -325,6 +322,24 @@ void blk_unplug(struct request_queue *q)
325} 322}
326EXPORT_SYMBOL(blk_unplug); 323EXPORT_SYMBOL(blk_unplug);
327 324
325static void blk_invoke_request_fn(struct request_queue *q)
326{
327 if (unlikely(blk_queue_stopped(q)))
328 return;
329
330 /*
331 * one level of recursion is ok and is much faster than kicking
332 * the unplug handling
333 */
334 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
335 q->request_fn(q);
336 queue_flag_clear(QUEUE_FLAG_REENTER, q);
337 } else {
338 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
339 kblockd_schedule_work(q, &q->unplug_work);
340 }
341}
342
328/** 343/**
329 * blk_start_queue - restart a previously stopped queue 344 * blk_start_queue - restart a previously stopped queue
330 * @q: The &struct request_queue in question 345 * @q: The &struct request_queue in question
@@ -339,18 +354,7 @@ void blk_start_queue(struct request_queue *q)
339 WARN_ON(!irqs_disabled()); 354 WARN_ON(!irqs_disabled());
340 355
341 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 356 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
342 357 blk_invoke_request_fn(q);
343 /*
344 * one level of recursion is ok and is much faster than kicking
345 * the unplug handling
346 */
347 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
348 q->request_fn(q);
349 queue_flag_clear(QUEUE_FLAG_REENTER, q);
350 } else {
351 blk_plug_device(q);
352 kblockd_schedule_work(&q->unplug_work);
353 }
354} 358}
355EXPORT_SYMBOL(blk_start_queue); 359EXPORT_SYMBOL(blk_start_queue);
356 360
@@ -397,8 +401,13 @@ void blk_sync_queue(struct request_queue *q)
397EXPORT_SYMBOL(blk_sync_queue); 401EXPORT_SYMBOL(blk_sync_queue);
398 402
399/** 403/**
400 * blk_run_queue - run a single device queue 404 * __blk_run_queue - run a single device queue
401 * @q: The queue to run 405 * @q: The queue to run
406 *
407 * Description:
408 * See @blk_run_queue. This variant must be called with the queue lock
409 * held and interrupts disabled.
410 *
402 */ 411 */
403void __blk_run_queue(struct request_queue *q) 412void __blk_run_queue(struct request_queue *q)
404{ 413{
@@ -408,21 +417,20 @@ void __blk_run_queue(struct request_queue *q)
408 * Only recurse once to avoid overrunning the stack, let the unplug 417 * Only recurse once to avoid overrunning the stack, let the unplug
409 * handling reinvoke the handler shortly if we already got there. 418 * handling reinvoke the handler shortly if we already got there.
410 */ 419 */
411 if (!elv_queue_empty(q)) { 420 if (!elv_queue_empty(q))
412 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 421 blk_invoke_request_fn(q);
413 q->request_fn(q);
414 queue_flag_clear(QUEUE_FLAG_REENTER, q);
415 } else {
416 blk_plug_device(q);
417 kblockd_schedule_work(&q->unplug_work);
418 }
419 }
420} 422}
421EXPORT_SYMBOL(__blk_run_queue); 423EXPORT_SYMBOL(__blk_run_queue);
422 424
423/** 425/**
424 * blk_run_queue - run a single device queue 426 * blk_run_queue - run a single device queue
425 * @q: The queue to run 427 * @q: The queue to run
428 *
429 * Description:
430 * Invoke request handling on this queue, if it has pending work to do.
431 * May be used to restart queueing when a request has completed. Also
432 * See @blk_start_queueing.
433 *
426 */ 434 */
427void blk_run_queue(struct request_queue *q) 435void blk_run_queue(struct request_queue *q)
428{ 436{
@@ -441,6 +449,14 @@ void blk_put_queue(struct request_queue *q)
441 449
442void blk_cleanup_queue(struct request_queue *q) 450void blk_cleanup_queue(struct request_queue *q)
443{ 451{
452 /*
453 * We know we have process context here, so we can be a little
454 * cautious and ensure that pending block actions on this device
455 * are done before moving on. Going into this function, we should
456 * not have processes doing IO to this device.
457 */
458 blk_sync_queue(q);
459
444 mutex_lock(&q->sysfs_lock); 460 mutex_lock(&q->sysfs_lock);
445 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 461 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
446 mutex_unlock(&q->sysfs_lock); 462 mutex_unlock(&q->sysfs_lock);
@@ -496,6 +512,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
496 } 512 }
497 513
498 init_timer(&q->unplug_timer); 514 init_timer(&q->unplug_timer);
515 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
516 INIT_LIST_HEAD(&q->timeout_list);
517 INIT_WORK(&q->unplug_work, blk_unplug_work);
499 518
500 kobject_init(&q->kobj, &blk_queue_ktype); 519 kobject_init(&q->kobj, &blk_queue_ktype);
501 520
@@ -531,7 +550,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
531 * request queue; this lock will be taken also from interrupt context, so irq 550 * request queue; this lock will be taken also from interrupt context, so irq
532 * disabling is needed for it. 551 * disabling is needed for it.
533 * 552 *
534 * Function returns a pointer to the initialized request queue, or NULL if 553 * Function returns a pointer to the initialized request queue, or %NULL if
535 * it didn't succeed. 554 * it didn't succeed.
536 * 555 *
537 * Note: 556 * Note:
@@ -569,10 +588,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
569 q->request_fn = rfn; 588 q->request_fn = rfn;
570 q->prep_rq_fn = NULL; 589 q->prep_rq_fn = NULL;
571 q->unplug_fn = generic_unplug_device; 590 q->unplug_fn = generic_unplug_device;
572 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 591 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER |
592 1 << QUEUE_FLAG_STACKABLE);
573 q->queue_lock = lock; 593 q->queue_lock = lock;
574 594
575 blk_queue_segment_boundary(q, 0xffffffff); 595 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
576 596
577 blk_queue_make_request(q, __make_request); 597 blk_queue_make_request(q, __make_request);
578 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); 598 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
@@ -624,10 +644,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
624 644
625 blk_rq_init(q, rq); 645 blk_rq_init(q, rq);
626 646
627 /*
628 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
629 * see bio.h and blkdev.h
630 */
631 rq->cmd_flags = rw | REQ_ALLOCED; 647 rq->cmd_flags = rw | REQ_ALLOCED;
632 648
633 if (priv) { 649 if (priv) {
@@ -882,15 +898,18 @@ EXPORT_SYMBOL(blk_get_request);
882 * 898 *
883 * This is basically a helper to remove the need to know whether a queue 899 * This is basically a helper to remove the need to know whether a queue
884 * is plugged or not if someone just wants to initiate dispatch of requests 900 * is plugged or not if someone just wants to initiate dispatch of requests
885 * for this queue. 901 * for this queue. Should be used to start queueing on a device outside
902 * of ->request_fn() context. Also see @blk_run_queue.
886 * 903 *
887 * The queue lock must be held with interrupts disabled. 904 * The queue lock must be held with interrupts disabled.
888 */ 905 */
889void blk_start_queueing(struct request_queue *q) 906void blk_start_queueing(struct request_queue *q)
890{ 907{
891 if (!blk_queue_plugged(q)) 908 if (!blk_queue_plugged(q)) {
909 if (unlikely(blk_queue_stopped(q)))
910 return;
892 q->request_fn(q); 911 q->request_fn(q);
893 else 912 } else
894 __generic_unplug_device(q); 913 __generic_unplug_device(q);
895} 914}
896EXPORT_SYMBOL(blk_start_queueing); 915EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +926,8 @@ EXPORT_SYMBOL(blk_start_queueing);
907 */ 926 */
908void blk_requeue_request(struct request_queue *q, struct request *rq) 927void blk_requeue_request(struct request_queue *q, struct request *rq)
909{ 928{
929 blk_delete_timer(rq);
930 blk_clear_rq_complete(rq);
910 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 931 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
911 932
912 if (blk_rq_tagged(rq)) 933 if (blk_rq_tagged(rq))
@@ -917,7 +938,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
917EXPORT_SYMBOL(blk_requeue_request); 938EXPORT_SYMBOL(blk_requeue_request);
918 939
919/** 940/**
920 * blk_insert_request - insert a special request in to a request queue 941 * blk_insert_request - insert a special request into a request queue
921 * @q: request queue where request should be inserted 942 * @q: request queue where request should be inserted
922 * @rq: request to be inserted 943 * @rq: request to be inserted
923 * @at_head: insert request at head or tail of queue 944 * @at_head: insert request at head or tail of queue
@@ -927,8 +948,8 @@ EXPORT_SYMBOL(blk_requeue_request);
927 * Many block devices need to execute commands asynchronously, so they don't 948 * Many block devices need to execute commands asynchronously, so they don't
928 * block the whole kernel from preemption during request execution. This is 949 * block the whole kernel from preemption during request execution. This is
929 * accomplished normally by inserting aritficial requests tagged as 950 * accomplished normally by inserting aritficial requests tagged as
930 * REQ_SPECIAL in to the corresponding request queue, and letting them be 951 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
931 * scheduled for actual execution by the request queue. 952 * be scheduled for actual execution by the request queue.
932 * 953 *
933 * We have the option of inserting the head or the tail of the queue. 954 * We have the option of inserting the head or the tail of the queue.
934 * Typically we use the tail for new ioctls and so forth. We use the head 955 * Typically we use the tail for new ioctls and so forth. We use the head
@@ -982,9 +1003,24 @@ static inline void add_request(struct request_queue *q, struct request *req)
982 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 1003 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
983} 1004}
984 1005
985/* 1006static void part_round_stats_single(int cpu, struct hd_struct *part,
986 * disk_round_stats() - Round off the performance stats on a struct 1007 unsigned long now)
987 * disk_stats. 1008{
1009 if (now == part->stamp)
1010 return;
1011
1012 if (part->in_flight) {
1013 __part_stat_add(cpu, part, time_in_queue,
1014 part->in_flight * (now - part->stamp));
1015 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1016 }
1017 part->stamp = now;
1018}
1019
1020/**
1021 * part_round_stats() - Round off the performance stats on a struct disk_stats.
1022 * @cpu: cpu number for stats access
1023 * @part: target partition
988 * 1024 *
989 * The average IO queue length and utilisation statistics are maintained 1025 * The average IO queue length and utilisation statistics are maintained
990 * by observing the current state of the queue length and the amount of 1026 * by observing the current state of the queue length and the amount of
@@ -997,36 +1033,15 @@ static inline void add_request(struct request_queue *q, struct request *req)
997 * /proc/diskstats. This accounts immediately for all queue usage up to 1033 * /proc/diskstats. This accounts immediately for all queue usage up to
998 * the current jiffies and restarts the counters again. 1034 * the current jiffies and restarts the counters again.
999 */ 1035 */
1000void disk_round_stats(struct gendisk *disk) 1036void part_round_stats(int cpu, struct hd_struct *part)
1001{ 1037{
1002 unsigned long now = jiffies; 1038 unsigned long now = jiffies;
1003 1039
1004 if (now == disk->stamp) 1040 if (part->partno)
1005 return; 1041 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1006 1042 part_round_stats_single(cpu, part, now);
1007 if (disk->in_flight) {
1008 __disk_stat_add(disk, time_in_queue,
1009 disk->in_flight * (now - disk->stamp));
1010 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
1011 }
1012 disk->stamp = now;
1013}
1014EXPORT_SYMBOL_GPL(disk_round_stats);
1015
1016void part_round_stats(struct hd_struct *part)
1017{
1018 unsigned long now = jiffies;
1019
1020 if (now == part->stamp)
1021 return;
1022
1023 if (part->in_flight) {
1024 __part_stat_add(part, time_in_queue,
1025 part->in_flight * (now - part->stamp));
1026 __part_stat_add(part, io_ticks, (now - part->stamp));
1027 }
1028 part->stamp = now;
1029} 1043}
1044EXPORT_SYMBOL_GPL(part_round_stats);
1030 1045
1031/* 1046/*
1032 * queue lock must be held 1047 * queue lock must be held
@@ -1070,18 +1085,31 @@ EXPORT_SYMBOL(blk_put_request);
1070 1085
1071void init_request_from_bio(struct request *req, struct bio *bio) 1086void init_request_from_bio(struct request *req, struct bio *bio)
1072{ 1087{
1088 req->cpu = bio->bi_comp_cpu;
1073 req->cmd_type = REQ_TYPE_FS; 1089 req->cmd_type = REQ_TYPE_FS;
1074 1090
1075 /* 1091 /*
1076 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1092 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
1077 */ 1093 */
1078 if (bio_rw_ahead(bio) || bio_failfast(bio)) 1094 if (bio_rw_ahead(bio))
1079 req->cmd_flags |= REQ_FAILFAST; 1095 req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
1096 REQ_FAILFAST_DRIVER);
1097 if (bio_failfast_dev(bio))
1098 req->cmd_flags |= REQ_FAILFAST_DEV;
1099 if (bio_failfast_transport(bio))
1100 req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
1101 if (bio_failfast_driver(bio))
1102 req->cmd_flags |= REQ_FAILFAST_DRIVER;
1080 1103
1081 /* 1104 /*
1082 * REQ_BARRIER implies no merging, but lets make it explicit 1105 * REQ_BARRIER implies no merging, but lets make it explicit
1083 */ 1106 */
1084 if (unlikely(bio_barrier(bio))) 1107 if (unlikely(bio_discard(bio))) {
1108 req->cmd_flags |= REQ_DISCARD;
1109 if (bio_barrier(bio))
1110 req->cmd_flags |= REQ_SOFTBARRIER;
1111 req->q->prepare_discard_fn(req->q, req);
1112 } else if (unlikely(bio_barrier(bio)))
1085 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1113 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
1086 1114
1087 if (bio_sync(bio)) 1115 if (bio_sync(bio))
@@ -1099,7 +1127,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1099static int __make_request(struct request_queue *q, struct bio *bio) 1127static int __make_request(struct request_queue *q, struct bio *bio)
1100{ 1128{
1101 struct request *req; 1129 struct request *req;
1102 int el_ret, nr_sectors, barrier, err; 1130 int el_ret, nr_sectors, barrier, discard, err;
1103 const unsigned short prio = bio_prio(bio); 1131 const unsigned short prio = bio_prio(bio);
1104 const int sync = bio_sync(bio); 1132 const int sync = bio_sync(bio);
1105 int rw_flags; 1133 int rw_flags;
@@ -1114,7 +1142,14 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1114 blk_queue_bounce(q, &bio); 1142 blk_queue_bounce(q, &bio);
1115 1143
1116 barrier = bio_barrier(bio); 1144 barrier = bio_barrier(bio);
1117 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 1145 if (unlikely(barrier) && bio_has_data(bio) &&
1146 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1147 err = -EOPNOTSUPP;
1148 goto end_io;
1149 }
1150
1151 discard = bio_discard(bio);
1152 if (unlikely(discard) && !q->prepare_discard_fn) {
1118 err = -EOPNOTSUPP; 1153 err = -EOPNOTSUPP;
1119 goto end_io; 1154 goto end_io;
1120 } 1155 }
@@ -1138,6 +1173,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1138 req->biotail = bio; 1173 req->biotail = bio;
1139 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1174 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1140 req->ioprio = ioprio_best(req->ioprio, prio); 1175 req->ioprio = ioprio_best(req->ioprio, prio);
1176 if (!blk_rq_cpu_valid(req))
1177 req->cpu = bio->bi_comp_cpu;
1141 drive_stat_acct(req, 0); 1178 drive_stat_acct(req, 0);
1142 if (!attempt_back_merge(q, req)) 1179 if (!attempt_back_merge(q, req))
1143 elv_merged_request(q, req, el_ret); 1180 elv_merged_request(q, req, el_ret);
@@ -1165,6 +1202,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1165 req->sector = req->hard_sector = bio->bi_sector; 1202 req->sector = req->hard_sector = bio->bi_sector;
1166 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1203 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1167 req->ioprio = ioprio_best(req->ioprio, prio); 1204 req->ioprio = ioprio_best(req->ioprio, prio);
1205 if (!blk_rq_cpu_valid(req))
1206 req->cpu = bio->bi_comp_cpu;
1168 drive_stat_acct(req, 0); 1207 drive_stat_acct(req, 0);
1169 if (!attempt_front_merge(q, req)) 1208 if (!attempt_front_merge(q, req))
1170 elv_merged_request(q, req, el_ret); 1209 elv_merged_request(q, req, el_ret);
@@ -1200,13 +1239,15 @@ get_rq:
1200 init_request_from_bio(req, bio); 1239 init_request_from_bio(req, bio);
1201 1240
1202 spin_lock_irq(q->queue_lock); 1241 spin_lock_irq(q->queue_lock);
1242 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1243 bio_flagged(bio, BIO_CPU_AFFINE))
1244 req->cpu = blk_cpu_to_group(smp_processor_id());
1203 if (elv_queue_empty(q)) 1245 if (elv_queue_empty(q))
1204 blk_plug_device(q); 1246 blk_plug_device(q);
1205 add_request(q, req); 1247 add_request(q, req);
1206out: 1248out:
1207 if (sync) 1249 if (sync)
1208 __generic_unplug_device(q); 1250 __generic_unplug_device(q);
1209
1210 spin_unlock_irq(q->queue_lock); 1251 spin_unlock_irq(q->queue_lock);
1211 return 0; 1252 return 0;
1212 1253
@@ -1260,8 +1301,9 @@ __setup("fail_make_request=", setup_fail_make_request);
1260 1301
1261static int should_fail_request(struct bio *bio) 1302static int should_fail_request(struct bio *bio)
1262{ 1303{
1263 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || 1304 struct hd_struct *part = bio->bi_bdev->bd_part;
1264 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) 1305
1306 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1265 return should_fail(&fail_make_request, bio->bi_size); 1307 return should_fail(&fail_make_request, bio->bi_size);
1266 1308
1267 return 0; 1309 return 0;
@@ -1314,7 +1356,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1314} 1356}
1315 1357
1316/** 1358/**
1317 * generic_make_request: hand a buffer to its device driver for I/O 1359 * generic_make_request - hand a buffer to its device driver for I/O
1318 * @bio: The bio describing the location in memory and on the device. 1360 * @bio: The bio describing the location in memory and on the device.
1319 * 1361 *
1320 * generic_make_request() is used to make I/O requests of block 1362 * generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1451,8 @@ end_io:
1409 1451
1410 if (bio_check_eod(bio, nr_sectors)) 1452 if (bio_check_eod(bio, nr_sectors))
1411 goto end_io; 1453 goto end_io;
1412 if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { 1454 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
1455 (bio_discard(bio) && !q->prepare_discard_fn)) {
1413 err = -EOPNOTSUPP; 1456 err = -EOPNOTSUPP;
1414 goto end_io; 1457 goto end_io;
1415 } 1458 }
@@ -1471,13 +1514,13 @@ void generic_make_request(struct bio *bio)
1471EXPORT_SYMBOL(generic_make_request); 1514EXPORT_SYMBOL(generic_make_request);
1472 1515
1473/** 1516/**
1474 * submit_bio: submit a bio to the block device layer for I/O 1517 * submit_bio - submit a bio to the block device layer for I/O
1475 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1518 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1476 * @bio: The &struct bio which describes the I/O 1519 * @bio: The &struct bio which describes the I/O
1477 * 1520 *
1478 * submit_bio() is very similar in purpose to generic_make_request(), and 1521 * submit_bio() is very similar in purpose to generic_make_request(), and
1479 * uses that function to do most of the work. Both are fairly rough 1522 * uses that function to do most of the work. Both are fairly rough
1480 * interfaces, @bio must be presetup and ready for I/O. 1523 * interfaces; @bio must be presetup and ready for I/O.
1481 * 1524 *
1482 */ 1525 */
1483void submit_bio(int rw, struct bio *bio) 1526void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1533,7 @@ void submit_bio(int rw, struct bio *bio)
1490 * If it's a regular read/write or a barrier with data attached, 1533 * If it's a regular read/write or a barrier with data attached,
1491 * go through the normal accounting stuff before submission. 1534 * go through the normal accounting stuff before submission.
1492 */ 1535 */
1493 if (!bio_empty_barrier(bio)) { 1536 if (bio_has_data(bio)) {
1494
1495 BIO_BUG_ON(!bio->bi_size);
1496 BIO_BUG_ON(!bio->bi_io_vec);
1497
1498 if (rw & WRITE) { 1537 if (rw & WRITE) {
1499 count_vm_events(PGPGOUT, count); 1538 count_vm_events(PGPGOUT, count);
1500 } else { 1539 } else {
@@ -1517,9 +1556,112 @@ void submit_bio(int rw, struct bio *bio)
1517EXPORT_SYMBOL(submit_bio); 1556EXPORT_SYMBOL(submit_bio);
1518 1557
1519/** 1558/**
1559 * blk_rq_check_limits - Helper function to check a request for the queue limit
1560 * @q: the queue
1561 * @rq: the request being checked
1562 *
1563 * Description:
1564 * @rq may have been made based on weaker limitations of upper-level queues
1565 * in request stacking drivers, and it may violate the limitation of @q.
1566 * Since the block layer and the underlying device driver trust @rq
1567 * after it is inserted to @q, it should be checked against @q before
1568 * the insertion using this generic function.
1569 *
1570 * This function should also be useful for request stacking drivers
1571 * in some cases below, so export this fuction.
1572 * Request stacking drivers like request-based dm may change the queue
1573 * limits while requests are in the queue (e.g. dm's table swapping).
1574 * Such request stacking drivers should check those requests agaist
1575 * the new queue limits again when they dispatch those requests,
1576 * although such checkings are also done against the old queue limits
1577 * when submitting requests.
1578 */
1579int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1580{
1581 if (rq->nr_sectors > q->max_sectors ||
1582 rq->data_len > q->max_hw_sectors << 9) {
1583 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1584 return -EIO;
1585 }
1586
1587 /*
1588 * queue's settings related to segment counting like q->bounce_pfn
1589 * may differ from that of other stacking queues.
1590 * Recalculate it to check the request correctly on this queue's
1591 * limitation.
1592 */
1593 blk_recalc_rq_segments(rq);
1594 if (rq->nr_phys_segments > q->max_phys_segments ||
1595 rq->nr_phys_segments > q->max_hw_segments) {
1596 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1597 return -EIO;
1598 }
1599
1600 return 0;
1601}
1602EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1603
1604/**
1605 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1606 * @q: the queue to submit the request
1607 * @rq: the request being queued
1608 */
1609int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1610{
1611 unsigned long flags;
1612
1613 if (blk_rq_check_limits(q, rq))
1614 return -EIO;
1615
1616#ifdef CONFIG_FAIL_MAKE_REQUEST
1617 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1618 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1619 return -EIO;
1620#endif
1621
1622 spin_lock_irqsave(q->queue_lock, flags);
1623
1624 /*
1625 * Submitting request must be dequeued before calling this function
1626 * because it will be linked to another request_queue
1627 */
1628 BUG_ON(blk_queued_rq(rq));
1629
1630 drive_stat_acct(rq, 1);
1631 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1632
1633 spin_unlock_irqrestore(q->queue_lock, flags);
1634
1635 return 0;
1636}
1637EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1638
1639/**
1640 * blkdev_dequeue_request - dequeue request and start timeout timer
1641 * @req: request to dequeue
1642 *
1643 * Dequeue @req and start timeout timer on it. This hands off the
1644 * request to the driver.
1645 *
1646 * Block internal functions which don't want to start timer should
1647 * call elv_dequeue_request().
1648 */
1649void blkdev_dequeue_request(struct request *req)
1650{
1651 elv_dequeue_request(req->q, req);
1652
1653 /*
1654 * We are now handing the request to the hardware, add the
1655 * timeout handler.
1656 */
1657 blk_add_timer(req);
1658}
1659EXPORT_SYMBOL(blkdev_dequeue_request);
1660
1661/**
1520 * __end_that_request_first - end I/O on a request 1662 * __end_that_request_first - end I/O on a request
1521 * @req: the request being processed 1663 * @req: the request being processed
1522 * @error: 0 for success, < 0 for error 1664 * @error: %0 for success, < %0 for error
1523 * @nr_bytes: number of bytes to complete 1665 * @nr_bytes: number of bytes to complete
1524 * 1666 *
1525 * Description: 1667 * Description:
@@ -1527,8 +1669,8 @@ EXPORT_SYMBOL(submit_bio);
1527 * for the next range of segments (if any) in the cluster. 1669 * for the next range of segments (if any) in the cluster.
1528 * 1670 *
1529 * Return: 1671 * Return:
1530 * 0 - we are done with this request, call end_that_request_last() 1672 * %0 - we are done with this request, call end_that_request_last()
1531 * 1 - still buffers pending for this request 1673 * %1 - still buffers pending for this request
1532 **/ 1674 **/
1533static int __end_that_request_first(struct request *req, int error, 1675static int __end_that_request_first(struct request *req, int error,
1534 int nr_bytes) 1676 int nr_bytes)
@@ -1539,7 +1681,7 @@ static int __end_that_request_first(struct request *req, int error,
1539 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1681 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
1540 1682
1541 /* 1683 /*
1542 * for a REQ_BLOCK_PC request, we want to carry any eventual 1684 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
1543 * sense key with us all the way through 1685 * sense key with us all the way through
1544 */ 1686 */
1545 if (!blk_pc_request(req)) 1687 if (!blk_pc_request(req))
@@ -1552,11 +1694,14 @@ static int __end_that_request_first(struct request *req, int error,
1552 } 1694 }
1553 1695
1554 if (blk_fs_request(req) && req->rq_disk) { 1696 if (blk_fs_request(req) && req->rq_disk) {
1555 struct hd_struct *part = get_part(req->rq_disk, req->sector);
1556 const int rw = rq_data_dir(req); 1697 const int rw = rq_data_dir(req);
1698 struct hd_struct *part;
1699 int cpu;
1557 1700
1558 all_stat_add(req->rq_disk, part, sectors[rw], 1701 cpu = part_stat_lock();
1559 nr_bytes >> 9, req->sector); 1702 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1703 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
1704 part_stat_unlock();
1560 } 1705 }
1561 1706
1562 total_bytes = bio_nbytes = 0; 1707 total_bytes = bio_nbytes = 0;
@@ -1641,82 +1786,6 @@ static int __end_that_request_first(struct request *req, int error,
1641} 1786}
1642 1787
1643/* 1788/*
1644 * splice the completion data to a local structure and hand off to
1645 * process_completion_queue() to complete the requests
1646 */
1647static void blk_done_softirq(struct softirq_action *h)
1648{
1649 struct list_head *cpu_list, local_list;
1650
1651 local_irq_disable();
1652 cpu_list = &__get_cpu_var(blk_cpu_done);
1653 list_replace_init(cpu_list, &local_list);
1654 local_irq_enable();
1655
1656 while (!list_empty(&local_list)) {
1657 struct request *rq;
1658
1659 rq = list_entry(local_list.next, struct request, donelist);
1660 list_del_init(&rq->donelist);
1661 rq->q->softirq_done_fn(rq);
1662 }
1663}
1664
1665static int __cpuinit blk_cpu_notify(struct notifier_block *self,
1666 unsigned long action, void *hcpu)
1667{
1668 /*
1669 * If a CPU goes away, splice its entries to the current CPU
1670 * and trigger a run of the softirq
1671 */
1672 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1673 int cpu = (unsigned long) hcpu;
1674
1675 local_irq_disable();
1676 list_splice_init(&per_cpu(blk_cpu_done, cpu),
1677 &__get_cpu_var(blk_cpu_done));
1678 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1679 local_irq_enable();
1680 }
1681
1682 return NOTIFY_OK;
1683}
1684
1685
1686static struct notifier_block blk_cpu_notifier __cpuinitdata = {
1687 .notifier_call = blk_cpu_notify,
1688};
1689
1690/**
1691 * blk_complete_request - end I/O on a request
1692 * @req: the request being processed
1693 *
1694 * Description:
1695 * Ends all I/O on a request. It does not handle partial completions,
1696 * unless the driver actually implements this in its completion callback
1697 * through requeueing. The actual completion happens out-of-order,
1698 * through a softirq handler. The user must have registered a completion
1699 * callback through blk_queue_softirq_done().
1700 **/
1701
1702void blk_complete_request(struct request *req)
1703{
1704 struct list_head *cpu_list;
1705 unsigned long flags;
1706
1707 BUG_ON(!req->q->softirq_done_fn);
1708
1709 local_irq_save(flags);
1710
1711 cpu_list = &__get_cpu_var(blk_cpu_done);
1712 list_add_tail(&req->donelist, cpu_list);
1713 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1714
1715 local_irq_restore(flags);
1716}
1717EXPORT_SYMBOL(blk_complete_request);
1718
1719/*
1720 * queue lock must be held 1789 * queue lock must be held
1721 */ 1790 */
1722static void end_that_request_last(struct request *req, int error) 1791static void end_that_request_last(struct request *req, int error)
@@ -1727,11 +1796,13 @@ static void end_that_request_last(struct request *req, int error)
1727 blk_queue_end_tag(req->q, req); 1796 blk_queue_end_tag(req->q, req);
1728 1797
1729 if (blk_queued_rq(req)) 1798 if (blk_queued_rq(req))
1730 blkdev_dequeue_request(req); 1799 elv_dequeue_request(req->q, req);
1731 1800
1732 if (unlikely(laptop_mode) && blk_fs_request(req)) 1801 if (unlikely(laptop_mode) && blk_fs_request(req))
1733 laptop_io_completion(); 1802 laptop_io_completion();
1734 1803
1804 blk_delete_timer(req);
1805
1735 /* 1806 /*
1736 * Account IO completion. bar_rq isn't accounted as a normal 1807 * Account IO completion. bar_rq isn't accounted as a normal
1737 * IO on queueing nor completion. Accounting the containing 1808 * IO on queueing nor completion. Accounting the containing
@@ -1740,16 +1811,18 @@ static void end_that_request_last(struct request *req, int error)
1740 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1811 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1741 unsigned long duration = jiffies - req->start_time; 1812 unsigned long duration = jiffies - req->start_time;
1742 const int rw = rq_data_dir(req); 1813 const int rw = rq_data_dir(req);
1743 struct hd_struct *part = get_part(disk, req->sector); 1814 struct hd_struct *part;
1744 1815 int cpu;
1745 __all_stat_inc(disk, part, ios[rw], req->sector); 1816
1746 __all_stat_add(disk, part, ticks[rw], duration, req->sector); 1817 cpu = part_stat_lock();
1747 disk_round_stats(disk); 1818 part = disk_map_sector_rcu(disk, req->sector);
1748 disk->in_flight--; 1819
1749 if (part) { 1820 part_stat_inc(cpu, part, ios[rw]);
1750 part_round_stats(part); 1821 part_stat_add(cpu, part, ticks[rw], duration);
1751 part->in_flight--; 1822 part_round_stats(cpu, part);
1752 } 1823 part_dec_in_flight(part);
1824
1825 part_stat_unlock();
1753 } 1826 }
1754 1827
1755 if (req->end_io) 1828 if (req->end_io)
@@ -1762,17 +1835,6 @@ static void end_that_request_last(struct request *req, int error)
1762 } 1835 }
1763} 1836}
1764 1837
1765static inline void __end_request(struct request *rq, int uptodate,
1766 unsigned int nr_bytes)
1767{
1768 int error = 0;
1769
1770 if (uptodate <= 0)
1771 error = uptodate ? uptodate : -EIO;
1772
1773 __blk_end_request(rq, error, nr_bytes);
1774}
1775
1776/** 1838/**
1777 * blk_rq_bytes - Returns bytes left to complete in the entire request 1839 * blk_rq_bytes - Returns bytes left to complete in the entire request
1778 * @rq: the request being processed 1840 * @rq: the request being processed
@@ -1803,74 +1865,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq)
1803EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1865EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
1804 1866
1805/** 1867/**
1806 * end_queued_request - end all I/O on a queued request
1807 * @rq: the request being processed
1808 * @uptodate: error value or 0/1 uptodate flag
1809 *
1810 * Description:
1811 * Ends all I/O on a request, and removes it from the block layer queues.
1812 * Not suitable for normal IO completion, unless the driver still has
1813 * the request attached to the block layer.
1814 *
1815 **/
1816void end_queued_request(struct request *rq, int uptodate)
1817{
1818 __end_request(rq, uptodate, blk_rq_bytes(rq));
1819}
1820EXPORT_SYMBOL(end_queued_request);
1821
1822/**
1823 * end_dequeued_request - end all I/O on a dequeued request
1824 * @rq: the request being processed
1825 * @uptodate: error value or 0/1 uptodate flag
1826 *
1827 * Description:
1828 * Ends all I/O on a request. The request must already have been
1829 * dequeued using blkdev_dequeue_request(), as is normally the case
1830 * for most drivers.
1831 *
1832 **/
1833void end_dequeued_request(struct request *rq, int uptodate)
1834{
1835 __end_request(rq, uptodate, blk_rq_bytes(rq));
1836}
1837EXPORT_SYMBOL(end_dequeued_request);
1838
1839
1840/**
1841 * end_request - end I/O on the current segment of the request 1868 * end_request - end I/O on the current segment of the request
1842 * @req: the request being processed 1869 * @req: the request being processed
1843 * @uptodate: error value or 0/1 uptodate flag 1870 * @uptodate: error value or %0/%1 uptodate flag
1844 * 1871 *
1845 * Description: 1872 * Description:
1846 * Ends I/O on the current segment of a request. If that is the only 1873 * Ends I/O on the current segment of a request. If that is the only
1847 * remaining segment, the request is also completed and freed. 1874 * remaining segment, the request is also completed and freed.
1848 * 1875 *
1849 * This is a remnant of how older block drivers handled IO completions. 1876 * This is a remnant of how older block drivers handled I/O completions.
1850 * Modern drivers typically end IO on the full request in one go, unless 1877 * Modern drivers typically end I/O on the full request in one go, unless
1851 * they have a residual value to account for. For that case this function 1878 * they have a residual value to account for. For that case this function
1852 * isn't really useful, unless the residual just happens to be the 1879 * isn't really useful, unless the residual just happens to be the
1853 * full current segment. In other words, don't use this function in new 1880 * full current segment. In other words, don't use this function in new
1854 * code. Either use end_request_completely(), or the 1881 * code. Use blk_end_request() or __blk_end_request() to end a request.
1855 * end_that_request_chunk() (along with end_that_request_last()) for
1856 * partial completions.
1857 *
1858 **/ 1882 **/
1859void end_request(struct request *req, int uptodate) 1883void end_request(struct request *req, int uptodate)
1860{ 1884{
1861 __end_request(req, uptodate, req->hard_cur_sectors << 9); 1885 int error = 0;
1886
1887 if (uptodate <= 0)
1888 error = uptodate ? uptodate : -EIO;
1889
1890 __blk_end_request(req, error, req->hard_cur_sectors << 9);
1862} 1891}
1863EXPORT_SYMBOL(end_request); 1892EXPORT_SYMBOL(end_request);
1864 1893
1894static int end_that_request_data(struct request *rq, int error,
1895 unsigned int nr_bytes, unsigned int bidi_bytes)
1896{
1897 if (rq->bio) {
1898 if (__end_that_request_first(rq, error, nr_bytes))
1899 return 1;
1900
1901 /* Bidi request must be completed as a whole */
1902 if (blk_bidi_rq(rq) &&
1903 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1904 return 1;
1905 }
1906
1907 return 0;
1908}
1909
1865/** 1910/**
1866 * blk_end_io - Generic end_io function to complete a request. 1911 * blk_end_io - Generic end_io function to complete a request.
1867 * @rq: the request being processed 1912 * @rq: the request being processed
1868 * @error: 0 for success, < 0 for error 1913 * @error: %0 for success, < %0 for error
1869 * @nr_bytes: number of bytes to complete @rq 1914 * @nr_bytes: number of bytes to complete @rq
1870 * @bidi_bytes: number of bytes to complete @rq->next_rq 1915 * @bidi_bytes: number of bytes to complete @rq->next_rq
1871 * @drv_callback: function called between completion of bios in the request 1916 * @drv_callback: function called between completion of bios in the request
1872 * and completion of the request. 1917 * and completion of the request.
1873 * If the callback returns non 0, this helper returns without 1918 * If the callback returns non %0, this helper returns without
1874 * completion of the request. 1919 * completion of the request.
1875 * 1920 *
1876 * Description: 1921 * Description:
@@ -1878,8 +1923,8 @@ EXPORT_SYMBOL(end_request);
1878 * If @rq has leftover, sets it up for the next range of segments. 1923 * If @rq has leftover, sets it up for the next range of segments.
1879 * 1924 *
1880 * Return: 1925 * Return:
1881 * 0 - we are done with this request 1926 * %0 - we are done with this request
1882 * 1 - this request is not freed yet, it still has pending buffers. 1927 * %1 - this request is not freed yet, it still has pending buffers.
1883 **/ 1928 **/
1884static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1929static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1885 unsigned int bidi_bytes, 1930 unsigned int bidi_bytes,
@@ -1888,15 +1933,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1888 struct request_queue *q = rq->q; 1933 struct request_queue *q = rq->q;
1889 unsigned long flags = 0UL; 1934 unsigned long flags = 0UL;
1890 1935
1891 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1936 if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
1892 if (__end_that_request_first(rq, error, nr_bytes)) 1937 return 1;
1893 return 1;
1894
1895 /* Bidi request must be completed as a whole */
1896 if (blk_bidi_rq(rq) &&
1897 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1898 return 1;
1899 }
1900 1938
1901 /* Special feature for tricky drivers */ 1939 /* Special feature for tricky drivers */
1902 if (drv_callback && drv_callback(rq)) 1940 if (drv_callback && drv_callback(rq))
@@ -1914,7 +1952,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1914/** 1952/**
1915 * blk_end_request - Helper function for drivers to complete the request. 1953 * blk_end_request - Helper function for drivers to complete the request.
1916 * @rq: the request being processed 1954 * @rq: the request being processed
1917 * @error: 0 for success, < 0 for error 1955 * @error: %0 for success, < %0 for error
1918 * @nr_bytes: number of bytes to complete 1956 * @nr_bytes: number of bytes to complete
1919 * 1957 *
1920 * Description: 1958 * Description:
@@ -1922,8 +1960,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1922 * If @rq has leftover, sets it up for the next range of segments. 1960 * If @rq has leftover, sets it up for the next range of segments.
1923 * 1961 *
1924 * Return: 1962 * Return:
1925 * 0 - we are done with this request 1963 * %0 - we are done with this request
1926 * 1 - still buffers pending for this request 1964 * %1 - still buffers pending for this request
1927 **/ 1965 **/
1928int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1966int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1929{ 1967{
@@ -1934,22 +1972,20 @@ EXPORT_SYMBOL_GPL(blk_end_request);
1934/** 1972/**
1935 * __blk_end_request - Helper function for drivers to complete the request. 1973 * __blk_end_request - Helper function for drivers to complete the request.
1936 * @rq: the request being processed 1974 * @rq: the request being processed
1937 * @error: 0 for success, < 0 for error 1975 * @error: %0 for success, < %0 for error
1938 * @nr_bytes: number of bytes to complete 1976 * @nr_bytes: number of bytes to complete
1939 * 1977 *
1940 * Description: 1978 * Description:
1941 * Must be called with queue lock held unlike blk_end_request(). 1979 * Must be called with queue lock held unlike blk_end_request().
1942 * 1980 *
1943 * Return: 1981 * Return:
1944 * 0 - we are done with this request 1982 * %0 - we are done with this request
1945 * 1 - still buffers pending for this request 1983 * %1 - still buffers pending for this request
1946 **/ 1984 **/
1947int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1985int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1948{ 1986{
1949 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1987 if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
1950 if (__end_that_request_first(rq, error, nr_bytes)) 1988 return 1;
1951 return 1;
1952 }
1953 1989
1954 add_disk_randomness(rq->rq_disk); 1990 add_disk_randomness(rq->rq_disk);
1955 1991
@@ -1962,7 +1998,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1962/** 1998/**
1963 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 1999 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
1964 * @rq: the bidi request being processed 2000 * @rq: the bidi request being processed
1965 * @error: 0 for success, < 0 for error 2001 * @error: %0 for success, < %0 for error
1966 * @nr_bytes: number of bytes to complete @rq 2002 * @nr_bytes: number of bytes to complete @rq
1967 * @bidi_bytes: number of bytes to complete @rq->next_rq 2003 * @bidi_bytes: number of bytes to complete @rq->next_rq
1968 * 2004 *
@@ -1970,8 +2006,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1970 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2006 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1971 * 2007 *
1972 * Return: 2008 * Return:
1973 * 0 - we are done with this request 2009 * %0 - we are done with this request
1974 * 1 - still buffers pending for this request 2010 * %1 - still buffers pending for this request
1975 **/ 2011 **/
1976int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 2012int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1977 unsigned int bidi_bytes) 2013 unsigned int bidi_bytes)
@@ -1981,13 +2017,43 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1981EXPORT_SYMBOL_GPL(blk_end_bidi_request); 2017EXPORT_SYMBOL_GPL(blk_end_bidi_request);
1982 2018
1983/** 2019/**
2020 * blk_update_request - Special helper function for request stacking drivers
2021 * @rq: the request being processed
2022 * @error: %0 for success, < %0 for error
2023 * @nr_bytes: number of bytes to complete @rq
2024 *
2025 * Description:
2026 * Ends I/O on a number of bytes attached to @rq, but doesn't complete
2027 * the request structure even if @rq doesn't have leftover.
2028 * If @rq has leftover, sets it up for the next range of segments.
2029 *
2030 * This special helper function is only for request stacking drivers
2031 * (e.g. request-based dm) so that they can handle partial completion.
2032 * Actual device drivers should use blk_end_request instead.
2033 */
2034void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
2035{
2036 if (!end_that_request_data(rq, error, nr_bytes, 0)) {
2037 /*
2038 * These members are not updated in end_that_request_data()
2039 * when all bios are completed.
2040 * Update them so that the request stacking driver can find
2041 * how many bytes remain in the request later.
2042 */
2043 rq->nr_sectors = rq->hard_nr_sectors = 0;
2044 rq->current_nr_sectors = rq->hard_cur_sectors = 0;
2045 }
2046}
2047EXPORT_SYMBOL_GPL(blk_update_request);
2048
2049/**
1984 * blk_end_request_callback - Special helper function for tricky drivers 2050 * blk_end_request_callback - Special helper function for tricky drivers
1985 * @rq: the request being processed 2051 * @rq: the request being processed
1986 * @error: 0 for success, < 0 for error 2052 * @error: %0 for success, < %0 for error
1987 * @nr_bytes: number of bytes to complete 2053 * @nr_bytes: number of bytes to complete
1988 * @drv_callback: function called between completion of bios in the request 2054 * @drv_callback: function called between completion of bios in the request
1989 * and completion of the request. 2055 * and completion of the request.
1990 * If the callback returns non 0, this helper returns without 2056 * If the callback returns non %0, this helper returns without
1991 * completion of the request. 2057 * completion of the request.
1992 * 2058 *
1993 * Description: 2059 * Description:
@@ -2000,10 +2066,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
2000 * Don't use this interface in other places anymore. 2066 * Don't use this interface in other places anymore.
2001 * 2067 *
2002 * Return: 2068 * Return:
2003 * 0 - we are done with this request 2069 * %0 - we are done with this request
2004 * 1 - this request is not freed yet. 2070 * %1 - this request is not freed yet.
2005 * this request still has pending buffers or 2071 * this request still has pending buffers or
2006 * the driver doesn't want to finish this request yet. 2072 * the driver doesn't want to finish this request yet.
2007 **/ 2073 **/
2008int blk_end_request_callback(struct request *rq, int error, 2074int blk_end_request_callback(struct request *rq, int error,
2009 unsigned int nr_bytes, 2075 unsigned int nr_bytes,
@@ -2016,15 +2082,17 @@ EXPORT_SYMBOL_GPL(blk_end_request_callback);
2016void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2082void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2017 struct bio *bio) 2083 struct bio *bio)
2018{ 2084{
2019 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 2085 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
2086 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
2020 rq->cmd_flags |= (bio->bi_rw & 3); 2087 rq->cmd_flags |= (bio->bi_rw & 3);
2021 2088
2022 rq->nr_phys_segments = bio_phys_segments(q, bio); 2089 if (bio_has_data(bio)) {
2023 rq->nr_hw_segments = bio_hw_segments(q, bio); 2090 rq->nr_phys_segments = bio_phys_segments(q, bio);
2091 rq->buffer = bio_data(bio);
2092 }
2024 rq->current_nr_sectors = bio_cur_sectors(bio); 2093 rq->current_nr_sectors = bio_cur_sectors(bio);
2025 rq->hard_cur_sectors = rq->current_nr_sectors; 2094 rq->hard_cur_sectors = rq->current_nr_sectors;
2026 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2095 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
2027 rq->buffer = bio_data(bio);
2028 rq->data_len = bio->bi_size; 2096 rq->data_len = bio->bi_size;
2029 2097
2030 rq->bio = rq->biotail = bio; 2098 rq->bio = rq->biotail = bio;
@@ -2033,7 +2101,35 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2033 rq->rq_disk = bio->bi_bdev->bd_disk; 2101 rq->rq_disk = bio->bi_bdev->bd_disk;
2034} 2102}
2035 2103
2036int kblockd_schedule_work(struct work_struct *work) 2104/**
2105 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2106 * @q : the queue of the device being checked
2107 *
2108 * Description:
2109 * Check if underlying low-level drivers of a device are busy.
2110 * If the drivers want to export their busy state, they must set own
2111 * exporting function using blk_queue_lld_busy() first.
2112 *
2113 * Basically, this function is used only by request stacking drivers
2114 * to stop dispatching requests to underlying devices when underlying
2115 * devices are busy. This behavior helps more I/O merging on the queue
2116 * of the request stacking driver and prevents I/O throughput regression
2117 * on burst I/O load.
2118 *
2119 * Return:
2120 * 0 - Not busy (The request stacking driver should dispatch request)
2121 * 1 - Busy (The request stacking driver should stop dispatching request)
2122 */
2123int blk_lld_busy(struct request_queue *q)
2124{
2125 if (q->lld_busy_fn)
2126 return q->lld_busy_fn(q);
2127
2128 return 0;
2129}
2130EXPORT_SYMBOL_GPL(blk_lld_busy);
2131
2132int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2037{ 2133{
2038 return queue_work(kblockd_workqueue, work); 2134 return queue_work(kblockd_workqueue, work);
2039} 2135}
@@ -2047,8 +2143,6 @@ EXPORT_SYMBOL(kblockd_flush_work);
2047 2143
2048int __init blk_dev_init(void) 2144int __init blk_dev_init(void)
2049{ 2145{
2050 int i;
2051
2052 kblockd_workqueue = create_workqueue("kblockd"); 2146 kblockd_workqueue = create_workqueue("kblockd");
2053 if (!kblockd_workqueue) 2147 if (!kblockd_workqueue)
2054 panic("Failed to create kblockd\n"); 2148 panic("Failed to create kblockd\n");
@@ -2059,12 +2153,6 @@ int __init blk_dev_init(void)
2059 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2153 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2060 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2154 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2061 2155
2062 for_each_possible_cpu(i)
2063 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
2064
2065 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
2066 register_hotcpu_notifier(&blk_cpu_notifier);
2067
2068 return 0; 2156 return 0;
2069} 2157}
2070 2158