aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c605
1 files changed, 324 insertions, 281 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef97b2b..2d053b584410 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,8 +26,6 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/interrupt.h>
30#include <linux/cpu.h>
31#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
32#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
33 31
@@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep;
50 */ 48 */
51static struct workqueue_struct *kblockd_workqueue; 49static struct workqueue_struct *kblockd_workqueue;
52 50
53static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
54
55static void drive_stat_acct(struct request *rq, int new_io) 51static void drive_stat_acct(struct request *rq, int new_io)
56{ 52{
57 struct hd_struct *part; 53 struct hd_struct *part;
58 int rw = rq_data_dir(rq); 54 int rw = rq_data_dir(rq);
55 int cpu;
59 56
60 if (!blk_fs_request(rq) || !rq->rq_disk) 57 if (!blk_fs_request(rq) || !rq->rq_disk)
61 return; 58 return;
62 59
63 part = get_part(rq->rq_disk, rq->sector); 60 cpu = part_stat_lock();
61 part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
62
64 if (!new_io) 63 if (!new_io)
65 __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); 64 part_stat_inc(cpu, part, merges[rw]);
66 else { 65 else {
67 disk_round_stats(rq->rq_disk); 66 part_round_stats(cpu, part);
68 rq->rq_disk->in_flight++; 67 part_inc_in_flight(part);
69 if (part) {
70 part_round_stats(part);
71 part->in_flight++;
72 }
73 } 68 }
69
70 part_stat_unlock();
74} 71}
75 72
76void blk_queue_congestion_threshold(struct request_queue *q) 73void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
113 memset(rq, 0, sizeof(*rq)); 110 memset(rq, 0, sizeof(*rq));
114 111
115 INIT_LIST_HEAD(&rq->queuelist); 112 INIT_LIST_HEAD(&rq->queuelist);
116 INIT_LIST_HEAD(&rq->donelist); 113 INIT_LIST_HEAD(&rq->timeout_list);
114 rq->cpu = -1;
117 rq->q = q; 115 rq->q = q;
118 rq->sector = rq->hard_sector = (sector_t) -1; 116 rq->sector = rq->hard_sector = (sector_t) -1;
119 INIT_HLIST_NODE(&rq->hash); 117 INIT_HLIST_NODE(&rq->hash);
@@ -308,7 +306,7 @@ void blk_unplug_timeout(unsigned long data)
308 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 306 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
309 q->rq.count[READ] + q->rq.count[WRITE]); 307 q->rq.count[READ] + q->rq.count[WRITE]);
310 308
311 kblockd_schedule_work(&q->unplug_work); 309 kblockd_schedule_work(q, &q->unplug_work);
312} 310}
313 311
314void blk_unplug(struct request_queue *q) 312void blk_unplug(struct request_queue *q)
@@ -325,6 +323,21 @@ void blk_unplug(struct request_queue *q)
325} 323}
326EXPORT_SYMBOL(blk_unplug); 324EXPORT_SYMBOL(blk_unplug);
327 325
326static void blk_invoke_request_fn(struct request_queue *q)
327{
328 /*
329 * one level of recursion is ok and is much faster than kicking
330 * the unplug handling
331 */
332 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
333 q->request_fn(q);
334 queue_flag_clear(QUEUE_FLAG_REENTER, q);
335 } else {
336 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
337 kblockd_schedule_work(q, &q->unplug_work);
338 }
339}
340
328/** 341/**
329 * blk_start_queue - restart a previously stopped queue 342 * blk_start_queue - restart a previously stopped queue
330 * @q: The &struct request_queue in question 343 * @q: The &struct request_queue in question
@@ -339,18 +352,7 @@ void blk_start_queue(struct request_queue *q)
339 WARN_ON(!irqs_disabled()); 352 WARN_ON(!irqs_disabled());
340 353
341 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
342 355 blk_invoke_request_fn(q);
343 /*
344 * one level of recursion is ok and is much faster than kicking
345 * the unplug handling
346 */
347 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
348 q->request_fn(q);
349 queue_flag_clear(QUEUE_FLAG_REENTER, q);
350 } else {
351 blk_plug_device(q);
352 kblockd_schedule_work(&q->unplug_work);
353 }
354} 356}
355EXPORT_SYMBOL(blk_start_queue); 357EXPORT_SYMBOL(blk_start_queue);
356 358
@@ -408,15 +410,8 @@ void __blk_run_queue(struct request_queue *q)
408 * Only recurse once to avoid overrunning the stack, let the unplug 410 * Only recurse once to avoid overrunning the stack, let the unplug
409 * handling reinvoke the handler shortly if we already got there. 411 * handling reinvoke the handler shortly if we already got there.
410 */ 412 */
411 if (!elv_queue_empty(q)) { 413 if (!elv_queue_empty(q))
412 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 414 blk_invoke_request_fn(q);
413 q->request_fn(q);
414 queue_flag_clear(QUEUE_FLAG_REENTER, q);
415 } else {
416 blk_plug_device(q);
417 kblockd_schedule_work(&q->unplug_work);
418 }
419 }
420} 415}
421EXPORT_SYMBOL(__blk_run_queue); 416EXPORT_SYMBOL(__blk_run_queue);
422 417
@@ -441,6 +436,14 @@ void blk_put_queue(struct request_queue *q)
441 436
442void blk_cleanup_queue(struct request_queue *q) 437void blk_cleanup_queue(struct request_queue *q)
443{ 438{
439 /*
440 * We know we have process context here, so we can be a little
441 * cautious and ensure that pending block actions on this device
442 * are done before moving on. Going into this function, we should
443 * not have processes doing IO to this device.
444 */
445 blk_sync_queue(q);
446
444 mutex_lock(&q->sysfs_lock); 447 mutex_lock(&q->sysfs_lock);
445 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 448 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
446 mutex_unlock(&q->sysfs_lock); 449 mutex_unlock(&q->sysfs_lock);
@@ -496,6 +499,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
496 } 499 }
497 500
498 init_timer(&q->unplug_timer); 501 init_timer(&q->unplug_timer);
502 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
503 INIT_LIST_HEAD(&q->timeout_list);
499 504
500 kobject_init(&q->kobj, &blk_queue_ktype); 505 kobject_init(&q->kobj, &blk_queue_ktype);
501 506
@@ -531,7 +536,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
531 * request queue; this lock will be taken also from interrupt context, so irq 536 * request queue; this lock will be taken also from interrupt context, so irq
532 * disabling is needed for it. 537 * disabling is needed for it.
533 * 538 *
534 * Function returns a pointer to the initialized request queue, or NULL if 539 * Function returns a pointer to the initialized request queue, or %NULL if
535 * it didn't succeed. 540 * it didn't succeed.
536 * 541 *
537 * Note: 542 * Note:
@@ -569,7 +574,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
569 q->request_fn = rfn; 574 q->request_fn = rfn;
570 q->prep_rq_fn = NULL; 575 q->prep_rq_fn = NULL;
571 q->unplug_fn = generic_unplug_device; 576 q->unplug_fn = generic_unplug_device;
572 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 577 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER |
578 1 << QUEUE_FLAG_STACKABLE);
573 q->queue_lock = lock; 579 q->queue_lock = lock;
574 580
575 blk_queue_segment_boundary(q, 0xffffffff); 581 blk_queue_segment_boundary(q, 0xffffffff);
@@ -624,10 +630,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
624 630
625 blk_rq_init(q, rq); 631 blk_rq_init(q, rq);
626 632
627 /*
628 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
629 * see bio.h and blkdev.h
630 */
631 rq->cmd_flags = rw | REQ_ALLOCED; 633 rq->cmd_flags = rw | REQ_ALLOCED;
632 634
633 if (priv) { 635 if (priv) {
@@ -888,9 +890,11 @@ EXPORT_SYMBOL(blk_get_request);
888 */ 890 */
889void blk_start_queueing(struct request_queue *q) 891void blk_start_queueing(struct request_queue *q)
890{ 892{
891 if (!blk_queue_plugged(q)) 893 if (!blk_queue_plugged(q)) {
894 if (unlikely(blk_queue_stopped(q)))
895 return;
892 q->request_fn(q); 896 q->request_fn(q);
893 else 897 } else
894 __generic_unplug_device(q); 898 __generic_unplug_device(q);
895} 899}
896EXPORT_SYMBOL(blk_start_queueing); 900EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +911,8 @@ EXPORT_SYMBOL(blk_start_queueing);
907 */ 911 */
908void blk_requeue_request(struct request_queue *q, struct request *rq) 912void blk_requeue_request(struct request_queue *q, struct request *rq)
909{ 913{
914 blk_delete_timer(rq);
915 blk_clear_rq_complete(rq);
910 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 916 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
911 917
912 if (blk_rq_tagged(rq)) 918 if (blk_rq_tagged(rq))
@@ -917,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
917EXPORT_SYMBOL(blk_requeue_request); 923EXPORT_SYMBOL(blk_requeue_request);
918 924
919/** 925/**
920 * blk_insert_request - insert a special request in to a request queue 926 * blk_insert_request - insert a special request into a request queue
921 * @q: request queue where request should be inserted 927 * @q: request queue where request should be inserted
922 * @rq: request to be inserted 928 * @rq: request to be inserted
923 * @at_head: insert request at head or tail of queue 929 * @at_head: insert request at head or tail of queue
@@ -927,8 +933,8 @@ EXPORT_SYMBOL(blk_requeue_request);
927 * Many block devices need to execute commands asynchronously, so they don't 933 * Many block devices need to execute commands asynchronously, so they don't
928 * block the whole kernel from preemption during request execution. This is 934 * block the whole kernel from preemption during request execution. This is
929 * accomplished normally by inserting aritficial requests tagged as 935 * accomplished normally by inserting aritficial requests tagged as
930 * REQ_SPECIAL in to the corresponding request queue, and letting them be 936 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
931 * scheduled for actual execution by the request queue. 937 * be scheduled for actual execution by the request queue.
932 * 938 *
933 * We have the option of inserting the head or the tail of the queue. 939 * We have the option of inserting the head or the tail of the queue.
934 * Typically we use the tail for new ioctls and so forth. We use the head 940 * Typically we use the tail for new ioctls and so forth. We use the head
@@ -982,8 +988,22 @@ static inline void add_request(struct request_queue *q, struct request *req)
982 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 988 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
983} 989}
984 990
985/* 991static void part_round_stats_single(int cpu, struct hd_struct *part,
986 * disk_round_stats() - Round off the performance stats on a struct 992 unsigned long now)
993{
994 if (now == part->stamp)
995 return;
996
997 if (part->in_flight) {
998 __part_stat_add(cpu, part, time_in_queue,
999 part->in_flight * (now - part->stamp));
1000 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1001 }
1002 part->stamp = now;
1003}
1004
1005/**
1006 * part_round_stats() - Round off the performance stats on a struct
987 * disk_stats. 1007 * disk_stats.
988 * 1008 *
989 * The average IO queue length and utilisation statistics are maintained 1009 * The average IO queue length and utilisation statistics are maintained
@@ -997,36 +1017,15 @@ static inline void add_request(struct request_queue *q, struct request *req)
997 * /proc/diskstats. This accounts immediately for all queue usage up to 1017 * /proc/diskstats. This accounts immediately for all queue usage up to
998 * the current jiffies and restarts the counters again. 1018 * the current jiffies and restarts the counters again.
999 */ 1019 */
1000void disk_round_stats(struct gendisk *disk) 1020void part_round_stats(int cpu, struct hd_struct *part)
1001{ 1021{
1002 unsigned long now = jiffies; 1022 unsigned long now = jiffies;
1003 1023
1004 if (now == disk->stamp) 1024 if (part->partno)
1005 return; 1025 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1006 1026 part_round_stats_single(cpu, part, now);
1007 if (disk->in_flight) {
1008 __disk_stat_add(disk, time_in_queue,
1009 disk->in_flight * (now - disk->stamp));
1010 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
1011 }
1012 disk->stamp = now;
1013}
1014EXPORT_SYMBOL_GPL(disk_round_stats);
1015
1016void part_round_stats(struct hd_struct *part)
1017{
1018 unsigned long now = jiffies;
1019
1020 if (now == part->stamp)
1021 return;
1022
1023 if (part->in_flight) {
1024 __part_stat_add(part, time_in_queue,
1025 part->in_flight * (now - part->stamp));
1026 __part_stat_add(part, io_ticks, (now - part->stamp));
1027 }
1028 part->stamp = now;
1029} 1027}
1028EXPORT_SYMBOL_GPL(part_round_stats);
1030 1029
1031/* 1030/*
1032 * queue lock must be held 1031 * queue lock must be held
@@ -1070,6 +1069,7 @@ EXPORT_SYMBOL(blk_put_request);
1070 1069
1071void init_request_from_bio(struct request *req, struct bio *bio) 1070void init_request_from_bio(struct request *req, struct bio *bio)
1072{ 1071{
1072 req->cpu = bio->bi_comp_cpu;
1073 req->cmd_type = REQ_TYPE_FS; 1073 req->cmd_type = REQ_TYPE_FS;
1074 1074
1075 /* 1075 /*
@@ -1081,7 +1081,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1081 /* 1081 /*
1082 * REQ_BARRIER implies no merging, but lets make it explicit 1082 * REQ_BARRIER implies no merging, but lets make it explicit
1083 */ 1083 */
1084 if (unlikely(bio_barrier(bio))) 1084 if (unlikely(bio_discard(bio))) {
1085 req->cmd_flags |= REQ_DISCARD;
1086 if (bio_barrier(bio))
1087 req->cmd_flags |= REQ_SOFTBARRIER;
1088 req->q->prepare_discard_fn(req->q, req);
1089 } else if (unlikely(bio_barrier(bio)))
1085 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1090 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
1086 1091
1087 if (bio_sync(bio)) 1092 if (bio_sync(bio))
@@ -1099,7 +1104,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1099static int __make_request(struct request_queue *q, struct bio *bio) 1104static int __make_request(struct request_queue *q, struct bio *bio)
1100{ 1105{
1101 struct request *req; 1106 struct request *req;
1102 int el_ret, nr_sectors, barrier, err; 1107 int el_ret, nr_sectors, barrier, discard, err;
1103 const unsigned short prio = bio_prio(bio); 1108 const unsigned short prio = bio_prio(bio);
1104 const int sync = bio_sync(bio); 1109 const int sync = bio_sync(bio);
1105 int rw_flags; 1110 int rw_flags;
@@ -1114,7 +1119,14 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1114 blk_queue_bounce(q, &bio); 1119 blk_queue_bounce(q, &bio);
1115 1120
1116 barrier = bio_barrier(bio); 1121 barrier = bio_barrier(bio);
1117 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 1122 if (unlikely(barrier) && bio_has_data(bio) &&
1123 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1124 err = -EOPNOTSUPP;
1125 goto end_io;
1126 }
1127
1128 discard = bio_discard(bio);
1129 if (unlikely(discard) && !q->prepare_discard_fn) {
1118 err = -EOPNOTSUPP; 1130 err = -EOPNOTSUPP;
1119 goto end_io; 1131 goto end_io;
1120 } 1132 }
@@ -1138,6 +1150,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1138 req->biotail = bio; 1150 req->biotail = bio;
1139 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1151 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1140 req->ioprio = ioprio_best(req->ioprio, prio); 1152 req->ioprio = ioprio_best(req->ioprio, prio);
1153 if (!blk_rq_cpu_valid(req))
1154 req->cpu = bio->bi_comp_cpu;
1141 drive_stat_acct(req, 0); 1155 drive_stat_acct(req, 0);
1142 if (!attempt_back_merge(q, req)) 1156 if (!attempt_back_merge(q, req))
1143 elv_merged_request(q, req, el_ret); 1157 elv_merged_request(q, req, el_ret);
@@ -1165,6 +1179,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1165 req->sector = req->hard_sector = bio->bi_sector; 1179 req->sector = req->hard_sector = bio->bi_sector;
1166 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1180 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1167 req->ioprio = ioprio_best(req->ioprio, prio); 1181 req->ioprio = ioprio_best(req->ioprio, prio);
1182 if (!blk_rq_cpu_valid(req))
1183 req->cpu = bio->bi_comp_cpu;
1168 drive_stat_acct(req, 0); 1184 drive_stat_acct(req, 0);
1169 if (!attempt_front_merge(q, req)) 1185 if (!attempt_front_merge(q, req))
1170 elv_merged_request(q, req, el_ret); 1186 elv_merged_request(q, req, el_ret);
@@ -1200,13 +1216,15 @@ get_rq:
1200 init_request_from_bio(req, bio); 1216 init_request_from_bio(req, bio);
1201 1217
1202 spin_lock_irq(q->queue_lock); 1218 spin_lock_irq(q->queue_lock);
1219 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1220 bio_flagged(bio, BIO_CPU_AFFINE))
1221 req->cpu = blk_cpu_to_group(smp_processor_id());
1203 if (elv_queue_empty(q)) 1222 if (elv_queue_empty(q))
1204 blk_plug_device(q); 1223 blk_plug_device(q);
1205 add_request(q, req); 1224 add_request(q, req);
1206out: 1225out:
1207 if (sync) 1226 if (sync)
1208 __generic_unplug_device(q); 1227 __generic_unplug_device(q);
1209
1210 spin_unlock_irq(q->queue_lock); 1228 spin_unlock_irq(q->queue_lock);
1211 return 0; 1229 return 0;
1212 1230
@@ -1260,8 +1278,9 @@ __setup("fail_make_request=", setup_fail_make_request);
1260 1278
1261static int should_fail_request(struct bio *bio) 1279static int should_fail_request(struct bio *bio)
1262{ 1280{
1263 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || 1281 struct hd_struct *part = bio->bi_bdev->bd_part;
1264 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) 1282
1283 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1265 return should_fail(&fail_make_request, bio->bi_size); 1284 return should_fail(&fail_make_request, bio->bi_size);
1266 1285
1267 return 0; 1286 return 0;
@@ -1314,7 +1333,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1314} 1333}
1315 1334
1316/** 1335/**
1317 * generic_make_request: hand a buffer to its device driver for I/O 1336 * generic_make_request - hand a buffer to its device driver for I/O
1318 * @bio: The bio describing the location in memory and on the device. 1337 * @bio: The bio describing the location in memory and on the device.
1319 * 1338 *
1320 * generic_make_request() is used to make I/O requests of block 1339 * generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1428,8 @@ end_io:
1409 1428
1410 if (bio_check_eod(bio, nr_sectors)) 1429 if (bio_check_eod(bio, nr_sectors))
1411 goto end_io; 1430 goto end_io;
1412 if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { 1431 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
1432 (bio_discard(bio) && !q->prepare_discard_fn)) {
1413 err = -EOPNOTSUPP; 1433 err = -EOPNOTSUPP;
1414 goto end_io; 1434 goto end_io;
1415 } 1435 }
@@ -1471,13 +1491,13 @@ void generic_make_request(struct bio *bio)
1471EXPORT_SYMBOL(generic_make_request); 1491EXPORT_SYMBOL(generic_make_request);
1472 1492
1473/** 1493/**
1474 * submit_bio: submit a bio to the block device layer for I/O 1494 * submit_bio - submit a bio to the block device layer for I/O
1475 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1495 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1476 * @bio: The &struct bio which describes the I/O 1496 * @bio: The &struct bio which describes the I/O
1477 * 1497 *
1478 * submit_bio() is very similar in purpose to generic_make_request(), and 1498 * submit_bio() is very similar in purpose to generic_make_request(), and
1479 * uses that function to do most of the work. Both are fairly rough 1499 * uses that function to do most of the work. Both are fairly rough
1480 * interfaces, @bio must be presetup and ready for I/O. 1500 * interfaces; @bio must be presetup and ready for I/O.
1481 * 1501 *
1482 */ 1502 */
1483void submit_bio(int rw, struct bio *bio) 1503void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1510,7 @@ void submit_bio(int rw, struct bio *bio)
1490 * If it's a regular read/write or a barrier with data attached, 1510 * If it's a regular read/write or a barrier with data attached,
1491 * go through the normal accounting stuff before submission. 1511 * go through the normal accounting stuff before submission.
1492 */ 1512 */
1493 if (!bio_empty_barrier(bio)) { 1513 if (bio_has_data(bio)) {
1494
1495 BIO_BUG_ON(!bio->bi_size);
1496 BIO_BUG_ON(!bio->bi_io_vec);
1497
1498 if (rw & WRITE) { 1514 if (rw & WRITE) {
1499 count_vm_events(PGPGOUT, count); 1515 count_vm_events(PGPGOUT, count);
1500 } else { 1516 } else {
@@ -1517,9 +1533,90 @@ void submit_bio(int rw, struct bio *bio)
1517EXPORT_SYMBOL(submit_bio); 1533EXPORT_SYMBOL(submit_bio);
1518 1534
1519/** 1535/**
1536 * blk_rq_check_limits - Helper function to check a request for the queue limit
1537 * @q: the queue
1538 * @rq: the request being checked
1539 *
1540 * Description:
1541 * @rq may have been made based on weaker limitations of upper-level queues
1542 * in request stacking drivers, and it may violate the limitation of @q.
1543 * Since the block layer and the underlying device driver trust @rq
1544 * after it is inserted to @q, it should be checked against @q before
1545 * the insertion using this generic function.
1546 *
1547 * This function should also be useful for request stacking drivers
1548 * in some cases below, so export this fuction.
1549 * Request stacking drivers like request-based dm may change the queue
1550 * limits while requests are in the queue (e.g. dm's table swapping).
1551 * Such request stacking drivers should check those requests agaist
1552 * the new queue limits again when they dispatch those requests,
1553 * although such checkings are also done against the old queue limits
1554 * when submitting requests.
1555 */
1556int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1557{
1558 if (rq->nr_sectors > q->max_sectors ||
1559 rq->data_len > q->max_hw_sectors << 9) {
1560 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1561 return -EIO;
1562 }
1563
1564 /*
1565 * queue's settings related to segment counting like q->bounce_pfn
1566 * may differ from that of other stacking queues.
1567 * Recalculate it to check the request correctly on this queue's
1568 * limitation.
1569 */
1570 blk_recalc_rq_segments(rq);
1571 if (rq->nr_phys_segments > q->max_phys_segments ||
1572 rq->nr_phys_segments > q->max_hw_segments) {
1573 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1574 return -EIO;
1575 }
1576
1577 return 0;
1578}
1579EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1580
1581/**
1582 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1583 * @q: the queue to submit the request
1584 * @rq: the request being queued
1585 */
1586int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1587{
1588 unsigned long flags;
1589
1590 if (blk_rq_check_limits(q, rq))
1591 return -EIO;
1592
1593#ifdef CONFIG_FAIL_MAKE_REQUEST
1594 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1595 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1596 return -EIO;
1597#endif
1598
1599 spin_lock_irqsave(q->queue_lock, flags);
1600
1601 /*
1602 * Submitting request must be dequeued before calling this function
1603 * because it will be linked to another request_queue
1604 */
1605 BUG_ON(blk_queued_rq(rq));
1606
1607 drive_stat_acct(rq, 1);
1608 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1609
1610 spin_unlock_irqrestore(q->queue_lock, flags);
1611
1612 return 0;
1613}
1614EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1615
1616/**
1520 * __end_that_request_first - end I/O on a request 1617 * __end_that_request_first - end I/O on a request
1521 * @req: the request being processed 1618 * @req: the request being processed
1522 * @error: 0 for success, < 0 for error 1619 * @error: %0 for success, < %0 for error
1523 * @nr_bytes: number of bytes to complete 1620 * @nr_bytes: number of bytes to complete
1524 * 1621 *
1525 * Description: 1622 * Description:
@@ -1527,8 +1624,8 @@ EXPORT_SYMBOL(submit_bio);
1527 * for the next range of segments (if any) in the cluster. 1624 * for the next range of segments (if any) in the cluster.
1528 * 1625 *
1529 * Return: 1626 * Return:
1530 * 0 - we are done with this request, call end_that_request_last() 1627 * %0 - we are done with this request, call end_that_request_last()
1531 * 1 - still buffers pending for this request 1628 * %1 - still buffers pending for this request
1532 **/ 1629 **/
1533static int __end_that_request_first(struct request *req, int error, 1630static int __end_that_request_first(struct request *req, int error,
1534 int nr_bytes) 1631 int nr_bytes)
@@ -1539,7 +1636,7 @@ static int __end_that_request_first(struct request *req, int error,
1539 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1636 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
1540 1637
1541 /* 1638 /*
1542 * for a REQ_BLOCK_PC request, we want to carry any eventual 1639 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
1543 * sense key with us all the way through 1640 * sense key with us all the way through
1544 */ 1641 */
1545 if (!blk_pc_request(req)) 1642 if (!blk_pc_request(req))
@@ -1552,11 +1649,14 @@ static int __end_that_request_first(struct request *req, int error,
1552 } 1649 }
1553 1650
1554 if (blk_fs_request(req) && req->rq_disk) { 1651 if (blk_fs_request(req) && req->rq_disk) {
1555 struct hd_struct *part = get_part(req->rq_disk, req->sector);
1556 const int rw = rq_data_dir(req); 1652 const int rw = rq_data_dir(req);
1653 struct hd_struct *part;
1654 int cpu;
1557 1655
1558 all_stat_add(req->rq_disk, part, sectors[rw], 1656 cpu = part_stat_lock();
1559 nr_bytes >> 9, req->sector); 1657 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1658 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
1659 part_stat_unlock();
1560 } 1660 }
1561 1661
1562 total_bytes = bio_nbytes = 0; 1662 total_bytes = bio_nbytes = 0;
@@ -1641,88 +1741,14 @@ static int __end_that_request_first(struct request *req, int error,
1641} 1741}
1642 1742
1643/* 1743/*
1644 * splice the completion data to a local structure and hand off to
1645 * process_completion_queue() to complete the requests
1646 */
1647static void blk_done_softirq(struct softirq_action *h)
1648{
1649 struct list_head *cpu_list, local_list;
1650
1651 local_irq_disable();
1652 cpu_list = &__get_cpu_var(blk_cpu_done);
1653 list_replace_init(cpu_list, &local_list);
1654 local_irq_enable();
1655
1656 while (!list_empty(&local_list)) {
1657 struct request *rq;
1658
1659 rq = list_entry(local_list.next, struct request, donelist);
1660 list_del_init(&rq->donelist);
1661 rq->q->softirq_done_fn(rq);
1662 }
1663}
1664
1665static int __cpuinit blk_cpu_notify(struct notifier_block *self,
1666 unsigned long action, void *hcpu)
1667{
1668 /*
1669 * If a CPU goes away, splice its entries to the current CPU
1670 * and trigger a run of the softirq
1671 */
1672 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1673 int cpu = (unsigned long) hcpu;
1674
1675 local_irq_disable();
1676 list_splice_init(&per_cpu(blk_cpu_done, cpu),
1677 &__get_cpu_var(blk_cpu_done));
1678 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1679 local_irq_enable();
1680 }
1681
1682 return NOTIFY_OK;
1683}
1684
1685
1686static struct notifier_block blk_cpu_notifier __cpuinitdata = {
1687 .notifier_call = blk_cpu_notify,
1688};
1689
1690/**
1691 * blk_complete_request - end I/O on a request
1692 * @req: the request being processed
1693 *
1694 * Description:
1695 * Ends all I/O on a request. It does not handle partial completions,
1696 * unless the driver actually implements this in its completion callback
1697 * through requeueing. The actual completion happens out-of-order,
1698 * through a softirq handler. The user must have registered a completion
1699 * callback through blk_queue_softirq_done().
1700 **/
1701
1702void blk_complete_request(struct request *req)
1703{
1704 struct list_head *cpu_list;
1705 unsigned long flags;
1706
1707 BUG_ON(!req->q->softirq_done_fn);
1708
1709 local_irq_save(flags);
1710
1711 cpu_list = &__get_cpu_var(blk_cpu_done);
1712 list_add_tail(&req->donelist, cpu_list);
1713 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1714
1715 local_irq_restore(flags);
1716}
1717EXPORT_SYMBOL(blk_complete_request);
1718
1719/*
1720 * queue lock must be held 1744 * queue lock must be held
1721 */ 1745 */
1722static void end_that_request_last(struct request *req, int error) 1746static void end_that_request_last(struct request *req, int error)
1723{ 1747{
1724 struct gendisk *disk = req->rq_disk; 1748 struct gendisk *disk = req->rq_disk;
1725 1749
1750 blk_delete_timer(req);
1751
1726 if (blk_rq_tagged(req)) 1752 if (blk_rq_tagged(req))
1727 blk_queue_end_tag(req->q, req); 1753 blk_queue_end_tag(req->q, req);
1728 1754
@@ -1740,16 +1766,18 @@ static void end_that_request_last(struct request *req, int error)
1740 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1766 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1741 unsigned long duration = jiffies - req->start_time; 1767 unsigned long duration = jiffies - req->start_time;
1742 const int rw = rq_data_dir(req); 1768 const int rw = rq_data_dir(req);
1743 struct hd_struct *part = get_part(disk, req->sector); 1769 struct hd_struct *part;
1744 1770 int cpu;
1745 __all_stat_inc(disk, part, ios[rw], req->sector); 1771
1746 __all_stat_add(disk, part, ticks[rw], duration, req->sector); 1772 cpu = part_stat_lock();
1747 disk_round_stats(disk); 1773 part = disk_map_sector_rcu(disk, req->sector);
1748 disk->in_flight--; 1774
1749 if (part) { 1775 part_stat_inc(cpu, part, ios[rw]);
1750 part_round_stats(part); 1776 part_stat_add(cpu, part, ticks[rw], duration);
1751 part->in_flight--; 1777 part_round_stats(cpu, part);
1752 } 1778 part_dec_in_flight(part);
1779
1780 part_stat_unlock();
1753 } 1781 }
1754 1782
1755 if (req->end_io) 1783 if (req->end_io)
@@ -1762,17 +1790,6 @@ static void end_that_request_last(struct request *req, int error)
1762 } 1790 }
1763} 1791}
1764 1792
1765static inline void __end_request(struct request *rq, int uptodate,
1766 unsigned int nr_bytes)
1767{
1768 int error = 0;
1769
1770 if (uptodate <= 0)
1771 error = uptodate ? uptodate : -EIO;
1772
1773 __blk_end_request(rq, error, nr_bytes);
1774}
1775
1776/** 1793/**
1777 * blk_rq_bytes - Returns bytes left to complete in the entire request 1794 * blk_rq_bytes - Returns bytes left to complete in the entire request
1778 * @rq: the request being processed 1795 * @rq: the request being processed
@@ -1803,74 +1820,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq)
1803EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1820EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
1804 1821
1805/** 1822/**
1806 * end_queued_request - end all I/O on a queued request
1807 * @rq: the request being processed
1808 * @uptodate: error value or 0/1 uptodate flag
1809 *
1810 * Description:
1811 * Ends all I/O on a request, and removes it from the block layer queues.
1812 * Not suitable for normal IO completion, unless the driver still has
1813 * the request attached to the block layer.
1814 *
1815 **/
1816void end_queued_request(struct request *rq, int uptodate)
1817{
1818 __end_request(rq, uptodate, blk_rq_bytes(rq));
1819}
1820EXPORT_SYMBOL(end_queued_request);
1821
1822/**
1823 * end_dequeued_request - end all I/O on a dequeued request
1824 * @rq: the request being processed
1825 * @uptodate: error value or 0/1 uptodate flag
1826 *
1827 * Description:
1828 * Ends all I/O on a request. The request must already have been
1829 * dequeued using blkdev_dequeue_request(), as is normally the case
1830 * for most drivers.
1831 *
1832 **/
1833void end_dequeued_request(struct request *rq, int uptodate)
1834{
1835 __end_request(rq, uptodate, blk_rq_bytes(rq));
1836}
1837EXPORT_SYMBOL(end_dequeued_request);
1838
1839
1840/**
1841 * end_request - end I/O on the current segment of the request 1823 * end_request - end I/O on the current segment of the request
1842 * @req: the request being processed 1824 * @req: the request being processed
1843 * @uptodate: error value or 0/1 uptodate flag 1825 * @uptodate: error value or %0/%1 uptodate flag
1844 * 1826 *
1845 * Description: 1827 * Description:
1846 * Ends I/O on the current segment of a request. If that is the only 1828 * Ends I/O on the current segment of a request. If that is the only
1847 * remaining segment, the request is also completed and freed. 1829 * remaining segment, the request is also completed and freed.
1848 * 1830 *
1849 * This is a remnant of how older block drivers handled IO completions. 1831 * This is a remnant of how older block drivers handled I/O completions.
1850 * Modern drivers typically end IO on the full request in one go, unless 1832 * Modern drivers typically end I/O on the full request in one go, unless
1851 * they have a residual value to account for. For that case this function 1833 * they have a residual value to account for. For that case this function
1852 * isn't really useful, unless the residual just happens to be the 1834 * isn't really useful, unless the residual just happens to be the
1853 * full current segment. In other words, don't use this function in new 1835 * full current segment. In other words, don't use this function in new
1854 * code. Either use end_request_completely(), or the 1836 * code. Use blk_end_request() or __blk_end_request() to end a request.
1855 * end_that_request_chunk() (along with end_that_request_last()) for
1856 * partial completions.
1857 *
1858 **/ 1837 **/
1859void end_request(struct request *req, int uptodate) 1838void end_request(struct request *req, int uptodate)
1860{ 1839{
1861 __end_request(req, uptodate, req->hard_cur_sectors << 9); 1840 int error = 0;
1841
1842 if (uptodate <= 0)
1843 error = uptodate ? uptodate : -EIO;
1844
1845 __blk_end_request(req, error, req->hard_cur_sectors << 9);
1862} 1846}
1863EXPORT_SYMBOL(end_request); 1847EXPORT_SYMBOL(end_request);
1864 1848
1849static int end_that_request_data(struct request *rq, int error,
1850 unsigned int nr_bytes, unsigned int bidi_bytes)
1851{
1852 if (rq->bio) {
1853 if (__end_that_request_first(rq, error, nr_bytes))
1854 return 1;
1855
1856 /* Bidi request must be completed as a whole */
1857 if (blk_bidi_rq(rq) &&
1858 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1859 return 1;
1860 }
1861
1862 return 0;
1863}
1864
1865/** 1865/**
1866 * blk_end_io - Generic end_io function to complete a request. 1866 * blk_end_io - Generic end_io function to complete a request.
1867 * @rq: the request being processed 1867 * @rq: the request being processed
1868 * @error: 0 for success, < 0 for error 1868 * @error: %0 for success, < %0 for error
1869 * @nr_bytes: number of bytes to complete @rq 1869 * @nr_bytes: number of bytes to complete @rq
1870 * @bidi_bytes: number of bytes to complete @rq->next_rq 1870 * @bidi_bytes: number of bytes to complete @rq->next_rq
1871 * @drv_callback: function called between completion of bios in the request 1871 * @drv_callback: function called between completion of bios in the request
1872 * and completion of the request. 1872 * and completion of the request.
1873 * If the callback returns non 0, this helper returns without 1873 * If the callback returns non %0, this helper returns without
1874 * completion of the request. 1874 * completion of the request.
1875 * 1875 *
1876 * Description: 1876 * Description:
@@ -1878,8 +1878,8 @@ EXPORT_SYMBOL(end_request);
1878 * If @rq has leftover, sets it up for the next range of segments. 1878 * If @rq has leftover, sets it up for the next range of segments.
1879 * 1879 *
1880 * Return: 1880 * Return:
1881 * 0 - we are done with this request 1881 * %0 - we are done with this request
1882 * 1 - this request is not freed yet, it still has pending buffers. 1882 * %1 - this request is not freed yet, it still has pending buffers.
1883 **/ 1883 **/
1884static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1884static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1885 unsigned int bidi_bytes, 1885 unsigned int bidi_bytes,
@@ -1888,15 +1888,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1888 struct request_queue *q = rq->q; 1888 struct request_queue *q = rq->q;
1889 unsigned long flags = 0UL; 1889 unsigned long flags = 0UL;
1890 1890
1891 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1891 if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
1892 if (__end_that_request_first(rq, error, nr_bytes)) 1892 return 1;
1893 return 1;
1894
1895 /* Bidi request must be completed as a whole */
1896 if (blk_bidi_rq(rq) &&
1897 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1898 return 1;
1899 }
1900 1893
1901 /* Special feature for tricky drivers */ 1894 /* Special feature for tricky drivers */
1902 if (drv_callback && drv_callback(rq)) 1895 if (drv_callback && drv_callback(rq))
@@ -1914,7 +1907,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1914/** 1907/**
1915 * blk_end_request - Helper function for drivers to complete the request. 1908 * blk_end_request - Helper function for drivers to complete the request.
1916 * @rq: the request being processed 1909 * @rq: the request being processed
1917 * @error: 0 for success, < 0 for error 1910 * @error: %0 for success, < %0 for error
1918 * @nr_bytes: number of bytes to complete 1911 * @nr_bytes: number of bytes to complete
1919 * 1912 *
1920 * Description: 1913 * Description:
@@ -1922,8 +1915,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1922 * If @rq has leftover, sets it up for the next range of segments. 1915 * If @rq has leftover, sets it up for the next range of segments.
1923 * 1916 *
1924 * Return: 1917 * Return:
1925 * 0 - we are done with this request 1918 * %0 - we are done with this request
1926 * 1 - still buffers pending for this request 1919 * %1 - still buffers pending for this request
1927 **/ 1920 **/
1928int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1921int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1929{ 1922{
@@ -1934,22 +1927,20 @@ EXPORT_SYMBOL_GPL(blk_end_request);
1934/** 1927/**
1935 * __blk_end_request - Helper function for drivers to complete the request. 1928 * __blk_end_request - Helper function for drivers to complete the request.
1936 * @rq: the request being processed 1929 * @rq: the request being processed
1937 * @error: 0 for success, < 0 for error 1930 * @error: %0 for success, < %0 for error
1938 * @nr_bytes: number of bytes to complete 1931 * @nr_bytes: number of bytes to complete
1939 * 1932 *
1940 * Description: 1933 * Description:
1941 * Must be called with queue lock held unlike blk_end_request(). 1934 * Must be called with queue lock held unlike blk_end_request().
1942 * 1935 *
1943 * Return: 1936 * Return:
1944 * 0 - we are done with this request 1937 * %0 - we are done with this request
1945 * 1 - still buffers pending for this request 1938 * %1 - still buffers pending for this request
1946 **/ 1939 **/
1947int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1940int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1948{ 1941{
1949 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1942 if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
1950 if (__end_that_request_first(rq, error, nr_bytes)) 1943 return 1;
1951 return 1;
1952 }
1953 1944
1954 add_disk_randomness(rq->rq_disk); 1945 add_disk_randomness(rq->rq_disk);
1955 1946
@@ -1962,7 +1953,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1962/** 1953/**
1963 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 1954 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
1964 * @rq: the bidi request being processed 1955 * @rq: the bidi request being processed
1965 * @error: 0 for success, < 0 for error 1956 * @error: %0 for success, < %0 for error
1966 * @nr_bytes: number of bytes to complete @rq 1957 * @nr_bytes: number of bytes to complete @rq
1967 * @bidi_bytes: number of bytes to complete @rq->next_rq 1958 * @bidi_bytes: number of bytes to complete @rq->next_rq
1968 * 1959 *
@@ -1970,8 +1961,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1970 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1961 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1971 * 1962 *
1972 * Return: 1963 * Return:
1973 * 0 - we are done with this request 1964 * %0 - we are done with this request
1974 * 1 - still buffers pending for this request 1965 * %1 - still buffers pending for this request
1975 **/ 1966 **/
1976int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 1967int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1977 unsigned int bidi_bytes) 1968 unsigned int bidi_bytes)
@@ -1981,13 +1972,43 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1981EXPORT_SYMBOL_GPL(blk_end_bidi_request); 1972EXPORT_SYMBOL_GPL(blk_end_bidi_request);
1982 1973
1983/** 1974/**
1975 * blk_update_request - Special helper function for request stacking drivers
1976 * @rq: the request being processed
1977 * @error: %0 for success, < %0 for error
1978 * @nr_bytes: number of bytes to complete @rq
1979 *
1980 * Description:
1981 * Ends I/O on a number of bytes attached to @rq, but doesn't complete
1982 * the request structure even if @rq doesn't have leftover.
1983 * If @rq has leftover, sets it up for the next range of segments.
1984 *
1985 * This special helper function is only for request stacking drivers
1986 * (e.g. request-based dm) so that they can handle partial completion.
1987 * Actual device drivers should use blk_end_request instead.
1988 */
1989void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
1990{
1991 if (!end_that_request_data(rq, error, nr_bytes, 0)) {
1992 /*
1993 * These members are not updated in end_that_request_data()
1994 * when all bios are completed.
1995 * Update them so that the request stacking driver can find
1996 * how many bytes remain in the request later.
1997 */
1998 rq->nr_sectors = rq->hard_nr_sectors = 0;
1999 rq->current_nr_sectors = rq->hard_cur_sectors = 0;
2000 }
2001}
2002EXPORT_SYMBOL_GPL(blk_update_request);
2003
2004/**
1984 * blk_end_request_callback - Special helper function for tricky drivers 2005 * blk_end_request_callback - Special helper function for tricky drivers
1985 * @rq: the request being processed 2006 * @rq: the request being processed
1986 * @error: 0 for success, < 0 for error 2007 * @error: %0 for success, < %0 for error
1987 * @nr_bytes: number of bytes to complete 2008 * @nr_bytes: number of bytes to complete
1988 * @drv_callback: function called between completion of bios in the request 2009 * @drv_callback: function called between completion of bios in the request
1989 * and completion of the request. 2010 * and completion of the request.
1990 * If the callback returns non 0, this helper returns without 2011 * If the callback returns non %0, this helper returns without
1991 * completion of the request. 2012 * completion of the request.
1992 * 2013 *
1993 * Description: 2014 * Description:
@@ -2000,10 +2021,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
2000 * Don't use this interface in other places anymore. 2021 * Don't use this interface in other places anymore.
2001 * 2022 *
2002 * Return: 2023 * Return:
2003 * 0 - we are done with this request 2024 * %0 - we are done with this request
2004 * 1 - this request is not freed yet. 2025 * %1 - this request is not freed yet.
2005 * this request still has pending buffers or 2026 * this request still has pending buffers or
2006 * the driver doesn't want to finish this request yet. 2027 * the driver doesn't want to finish this request yet.
2007 **/ 2028 **/
2008int blk_end_request_callback(struct request *rq, int error, 2029int blk_end_request_callback(struct request *rq, int error,
2009 unsigned int nr_bytes, 2030 unsigned int nr_bytes,
@@ -2016,15 +2037,17 @@ EXPORT_SYMBOL_GPL(blk_end_request_callback);
2016void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2037void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2017 struct bio *bio) 2038 struct bio *bio)
2018{ 2039{
2019 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 2040 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
2041 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
2020 rq->cmd_flags |= (bio->bi_rw & 3); 2042 rq->cmd_flags |= (bio->bi_rw & 3);
2021 2043
2022 rq->nr_phys_segments = bio_phys_segments(q, bio); 2044 if (bio_has_data(bio)) {
2023 rq->nr_hw_segments = bio_hw_segments(q, bio); 2045 rq->nr_phys_segments = bio_phys_segments(q, bio);
2046 rq->buffer = bio_data(bio);
2047 }
2024 rq->current_nr_sectors = bio_cur_sectors(bio); 2048 rq->current_nr_sectors = bio_cur_sectors(bio);
2025 rq->hard_cur_sectors = rq->current_nr_sectors; 2049 rq->hard_cur_sectors = rq->current_nr_sectors;
2026 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2050 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
2027 rq->buffer = bio_data(bio);
2028 rq->data_len = bio->bi_size; 2051 rq->data_len = bio->bi_size;
2029 2052
2030 rq->bio = rq->biotail = bio; 2053 rq->bio = rq->biotail = bio;
@@ -2033,7 +2056,35 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2033 rq->rq_disk = bio->bi_bdev->bd_disk; 2056 rq->rq_disk = bio->bi_bdev->bd_disk;
2034} 2057}
2035 2058
2036int kblockd_schedule_work(struct work_struct *work) 2059/**
2060 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2061 * @q : the queue of the device being checked
2062 *
2063 * Description:
2064 * Check if underlying low-level drivers of a device are busy.
2065 * If the drivers want to export their busy state, they must set own
2066 * exporting function using blk_queue_lld_busy() first.
2067 *
2068 * Basically, this function is used only by request stacking drivers
2069 * to stop dispatching requests to underlying devices when underlying
2070 * devices are busy. This behavior helps more I/O merging on the queue
2071 * of the request stacking driver and prevents I/O throughput regression
2072 * on burst I/O load.
2073 *
2074 * Return:
2075 * 0 - Not busy (The request stacking driver should dispatch request)
2076 * 1 - Busy (The request stacking driver should stop dispatching request)
2077 */
2078int blk_lld_busy(struct request_queue *q)
2079{
2080 if (q->lld_busy_fn)
2081 return q->lld_busy_fn(q);
2082
2083 return 0;
2084}
2085EXPORT_SYMBOL_GPL(blk_lld_busy);
2086
2087int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2037{ 2088{
2038 return queue_work(kblockd_workqueue, work); 2089 return queue_work(kblockd_workqueue, work);
2039} 2090}
@@ -2047,8 +2098,6 @@ EXPORT_SYMBOL(kblockd_flush_work);
2047 2098
2048int __init blk_dev_init(void) 2099int __init blk_dev_init(void)
2049{ 2100{
2050 int i;
2051
2052 kblockd_workqueue = create_workqueue("kblockd"); 2101 kblockd_workqueue = create_workqueue("kblockd");
2053 if (!kblockd_workqueue) 2102 if (!kblockd_workqueue)
2054 panic("Failed to create kblockd\n"); 2103 panic("Failed to create kblockd\n");
@@ -2059,12 +2108,6 @@ int __init blk_dev_init(void)
2059 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2108 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2060 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2109 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2061 2110
2062 for_each_possible_cpu(i)
2063 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
2064
2065 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
2066 register_hotcpu_notifier(&blk_cpu_notifier);
2067
2068 return 0; 2111 return 0;
2069} 2112}
2070 2113