aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig2
-rw-r--r--block/as-iosched.c144
-rw-r--r--block/cfq-iosched.c16
-rw-r--r--block/deadline-iosched.c8
-rw-r--r--block/elevator.c100
-rw-r--r--block/genhd.c154
-rw-r--r--block/ioctl.c24
-rw-r--r--block/ll_rw_blk.c699
-rw-r--r--block/scsi_ioctl.c63
9 files changed, 742 insertions, 468 deletions
diff --git a/block/Kconfig b/block/Kconfig
index eb48edb80c1d..377f6dd20e17 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,7 +5,7 @@
5#for instance. 5#for instance.
6config LBD 6config LBD
7 bool "Support for Large Block Devices" 7 bool "Support for Large Block Devices"
8 depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML 8 depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML
9 help 9 help
10 Say Y here if you want to attach large (bigger than 2TB) discs to 10 Say Y here if you want to attach large (bigger than 2TB) discs to
11 your machine, or if you want to have a raid or loopback device 11 your machine, or if you want to have a raid or loopback device
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 43fa20495688..8da3cf66894c 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -182,6 +182,9 @@ struct as_rq {
182 182
183static kmem_cache_t *arq_pool; 183static kmem_cache_t *arq_pool;
184 184
185static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
186static void as_antic_stop(struct as_data *ad);
187
185/* 188/*
186 * IO Context helper functions 189 * IO Context helper functions
187 */ 190 */
@@ -370,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
370 * existing request against the same sector), which can happen when using 373 * existing request against the same sector), which can happen when using
371 * direct IO, then return the alias. 374 * direct IO, then return the alias.
372 */ 375 */
373static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 376static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
374{ 377{
375 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; 378 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
376 struct rb_node *parent = NULL; 379 struct rb_node *parent = NULL;
@@ -397,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
397 return NULL; 400 return NULL;
398} 401}
399 402
403static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
404{
405 struct as_rq *alias;
406
407 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
408 as_move_to_dispatch(ad, alias);
409 as_antic_stop(ad);
410 }
411}
412
400static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 413static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
401{ 414{
402 if (!ON_RB(&arq->rb_node)) { 415 if (!ON_RB(&arq->rb_node)) {
@@ -1133,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1133 /* 1146 /*
1134 * take it off the sort and fifo list, add to dispatch queue 1147 * take it off the sort and fifo list, add to dispatch queue
1135 */ 1148 */
1136 while (!list_empty(&rq->queuelist)) {
1137 struct request *__rq = list_entry_rq(rq->queuelist.next);
1138 struct as_rq *__arq = RQ_DATA(__rq);
1139
1140 list_del(&__rq->queuelist);
1141
1142 elv_dispatch_add_tail(ad->q, __rq);
1143
1144 if (__arq->io_context && __arq->io_context->aic)
1145 atomic_inc(&__arq->io_context->aic->nr_dispatched);
1146
1147 WARN_ON(__arq->state != AS_RQ_QUEUED);
1148 __arq->state = AS_RQ_DISPATCHED;
1149
1150 ad->nr_dispatched++;
1151 }
1152
1153 as_remove_queued_request(ad->q, rq); 1149 as_remove_queued_request(ad->q, rq);
1154 WARN_ON(arq->state != AS_RQ_QUEUED); 1150 WARN_ON(arq->state != AS_RQ_QUEUED);
1155 1151
@@ -1326,49 +1322,12 @@ fifo_expired:
1326} 1322}
1327 1323
1328/* 1324/*
1329 * Add arq to a list behind alias
1330 */
1331static inline void
1332as_add_aliased_request(struct as_data *ad, struct as_rq *arq,
1333 struct as_rq *alias)
1334{
1335 struct request *req = arq->request;
1336 struct list_head *insert = alias->request->queuelist.prev;
1337
1338 /*
1339 * Transfer list of aliases
1340 */
1341 while (!list_empty(&req->queuelist)) {
1342 struct request *__rq = list_entry_rq(req->queuelist.next);
1343 struct as_rq *__arq = RQ_DATA(__rq);
1344
1345 list_move_tail(&__rq->queuelist, &alias->request->queuelist);
1346
1347 WARN_ON(__arq->state != AS_RQ_QUEUED);
1348 }
1349
1350 /*
1351 * Another request with the same start sector on the rbtree.
1352 * Link this request to that sector. They are untangled in
1353 * as_move_to_dispatch
1354 */
1355 list_add(&arq->request->queuelist, insert);
1356
1357 /*
1358 * Don't want to have to handle merges.
1359 */
1360 as_del_arq_hash(arq);
1361 arq->request->flags |= REQ_NOMERGE;
1362}
1363
1364/*
1365 * add arq to rbtree and fifo 1325 * add arq to rbtree and fifo
1366 */ 1326 */
1367static void as_add_request(request_queue_t *q, struct request *rq) 1327static void as_add_request(request_queue_t *q, struct request *rq)
1368{ 1328{
1369 struct as_data *ad = q->elevator->elevator_data; 1329 struct as_data *ad = q->elevator->elevator_data;
1370 struct as_rq *arq = RQ_DATA(rq); 1330 struct as_rq *arq = RQ_DATA(rq);
1371 struct as_rq *alias;
1372 int data_dir; 1331 int data_dir;
1373 1332
1374 arq->state = AS_RQ_NEW; 1333 arq->state = AS_RQ_NEW;
@@ -1387,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq)
1387 atomic_inc(&arq->io_context->aic->nr_queued); 1346 atomic_inc(&arq->io_context->aic->nr_queued);
1388 } 1347 }
1389 1348
1390 alias = as_add_arq_rb(ad, arq); 1349 as_add_arq_rb(ad, arq);
1391 if (!alias) { 1350 if (rq_mergeable(arq->request))
1392 /* 1351 as_add_arq_hash(ad, arq);
1393 * set expire time (only used for reads) and add to fifo list
1394 */
1395 arq->expires = jiffies + ad->fifo_expire[data_dir];
1396 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1397 1352
1398 if (rq_mergeable(arq->request)) 1353 /*
1399 as_add_arq_hash(ad, arq); 1354 * set expire time (only used for reads) and add to fifo list
1400 as_update_arq(ad, arq); /* keep state machine up to date */ 1355 */
1401 1356 arq->expires = jiffies + ad->fifo_expire[data_dir];
1402 } else { 1357 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1403 as_add_aliased_request(ad, arq, alias);
1404
1405 /*
1406 * have we been anticipating this request?
1407 * or does it come from the same process as the one we are
1408 * anticipating for?
1409 */
1410 if (ad->antic_status == ANTIC_WAIT_REQ
1411 || ad->antic_status == ANTIC_WAIT_NEXT) {
1412 if (as_can_break_anticipation(ad, arq))
1413 as_antic_stop(ad);
1414 }
1415 }
1416 1358
1359 as_update_arq(ad, arq); /* keep state machine up to date */
1417 arq->state = AS_RQ_QUEUED; 1360 arq->state = AS_RQ_QUEUED;
1418} 1361}
1419 1362
@@ -1536,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1536 * if the merge was a front merge, we need to reposition request 1479 * if the merge was a front merge, we need to reposition request
1537 */ 1480 */
1538 if (rq_rb_key(req) != arq->rb_key) { 1481 if (rq_rb_key(req) != arq->rb_key) {
1539 struct as_rq *alias, *next_arq = NULL;
1540
1541 if (ad->next_arq[arq->is_sync] == arq)
1542 next_arq = as_find_next_arq(ad, arq);
1543
1544 /*
1545 * Note! We should really be moving any old aliased requests
1546 * off this request and try to insert them into the rbtree. We
1547 * currently don't bother. Ditto the next function.
1548 */
1549 as_del_arq_rb(ad, arq); 1482 as_del_arq_rb(ad, arq);
1550 if ((alias = as_add_arq_rb(ad, arq))) { 1483 as_add_arq_rb(ad, arq);
1551 list_del_init(&arq->fifo);
1552 as_add_aliased_request(ad, arq, alias);
1553 if (next_arq)
1554 ad->next_arq[arq->is_sync] = next_arq;
1555 }
1556 /* 1484 /*
1557 * Note! At this stage of this and the next function, our next 1485 * Note! At this stage of this and the next function, our next
1558 * request may not be optimal - eg the request may have "grown" 1486 * request may not be optimal - eg the request may have "grown"
@@ -1579,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1579 as_add_arq_hash(ad, arq); 1507 as_add_arq_hash(ad, arq);
1580 1508
1581 if (rq_rb_key(req) != arq->rb_key) { 1509 if (rq_rb_key(req) != arq->rb_key) {
1582 struct as_rq *alias, *next_arq = NULL;
1583
1584 if (ad->next_arq[arq->is_sync] == arq)
1585 next_arq = as_find_next_arq(ad, arq);
1586
1587 as_del_arq_rb(ad, arq); 1510 as_del_arq_rb(ad, arq);
1588 if ((alias = as_add_arq_rb(ad, arq))) { 1511 as_add_arq_rb(ad, arq);
1589 list_del_init(&arq->fifo);
1590 as_add_aliased_request(ad, arq, alias);
1591 if (next_arq)
1592 ad->next_arq[arq->is_sync] = next_arq;
1593 }
1594 } 1512 }
1595 1513
1596 /* 1514 /*
@@ -1610,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1610 } 1528 }
1611 1529
1612 /* 1530 /*
1613 * Transfer list of aliases
1614 */
1615 while (!list_empty(&next->queuelist)) {
1616 struct request *__rq = list_entry_rq(next->queuelist.next);
1617 struct as_rq *__arq = RQ_DATA(__rq);
1618
1619 list_move_tail(&__rq->queuelist, &req->queuelist);
1620
1621 WARN_ON(__arq->state != AS_RQ_QUEUED);
1622 }
1623
1624 /*
1625 * kill knowledge of next, this one is a goner 1531 * kill knowledge of next, this one is a goner
1626 */ 1532 */
1627 as_remove_queued_request(q, next); 1533 as_remove_queued_request(q, next);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ee0bb41694b0..74fae2daf87e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -25,15 +25,15 @@
25/* 25/*
26 * tunables 26 * tunables
27 */ 27 */
28static int cfq_quantum = 4; /* max queue in one round of service */ 28static const int cfq_quantum = 4; /* max queue in one round of service */
29static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 29static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
30static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 30static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
31static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 31static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
32static int cfq_back_penalty = 2; /* penalty of a backwards seek */ 32static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
33 33
34static int cfq_slice_sync = HZ / 10; 34static const int cfq_slice_sync = HZ / 10;
35static int cfq_slice_async = HZ / 25; 35static int cfq_slice_async = HZ / 25;
36static int cfq_slice_async_rq = 2; 36static const int cfq_slice_async_rq = 2;
37static int cfq_slice_idle = HZ / 100; 37static int cfq_slice_idle = HZ / 100;
38 38
39#define CFQ_IDLE_GRACE (HZ / 10) 39#define CFQ_IDLE_GRACE (HZ / 10)
@@ -45,7 +45,7 @@ static int cfq_slice_idle = HZ / 100;
45/* 45/*
46 * disable queueing at the driver/hardware level 46 * disable queueing at the driver/hardware level
47 */ 47 */
48static int cfq_max_depth = 2; 48static const int cfq_max_depth = 2;
49 49
50/* 50/*
51 * for the hash of cfqq inside the cfqd 51 * for the hash of cfqq inside the cfqd
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9cbec09e8415..27e494b1bf97 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -19,10 +19,10 @@
19/* 19/*
20 * See Documentation/block/deadline-iosched.txt 20 * See Documentation/block/deadline-iosched.txt
21 */ 21 */
22static int read_expire = HZ / 2; /* max time before a read is submitted. */ 22static const int read_expire = HZ / 2; /* max time before a read is submitted. */
23static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 23static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
24static int writes_starved = 2; /* max times reads can starve a write */ 24static const int writes_starved = 2; /* max times reads can starve a write */
25static int fifo_batch = 16; /* # of sequential requests treated as one 25static const int fifo_batch = 16; /* # of sequential requests treated as one
26 by the above parameters. For throughput. */ 26 by the above parameters. For throughput. */
27 27
28static const int deadline_hash_shift = 5; 28static const int deadline_hash_shift = 5;
diff --git a/block/elevator.c b/block/elevator.c
index 6c3fc8a10bf2..c9f424d5399c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -64,7 +64,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
64} 64}
65EXPORT_SYMBOL(elv_rq_merge_ok); 65EXPORT_SYMBOL(elv_rq_merge_ok);
66 66
67inline int elv_try_merge(struct request *__rq, struct bio *bio) 67static inline int elv_try_merge(struct request *__rq, struct bio *bio)
68{ 68{
69 int ret = ELEVATOR_NO_MERGE; 69 int ret = ELEVATOR_NO_MERGE;
70 70
@@ -80,7 +80,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
80 80
81 return ret; 81 return ret;
82} 82}
83EXPORT_SYMBOL(elv_try_merge);
84 83
85static struct elevator_type *elevator_find(const char *name) 84static struct elevator_type *elevator_find(const char *name)
86{ 85{
@@ -150,13 +149,20 @@ static void elevator_setup_default(void)
150 if (!chosen_elevator[0]) 149 if (!chosen_elevator[0])
151 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); 150 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
152 151
152 /*
153 * Be backwards-compatible with previous kernels, so users
154 * won't get the wrong elevator.
155 */
156 if (!strcmp(chosen_elevator, "as"))
157 strcpy(chosen_elevator, "anticipatory");
158
153 /* 159 /*
154 * If the given scheduler is not available, fall back to no-op. 160 * If the given scheduler is not available, fall back to the default
155 */ 161 */
156 if ((e = elevator_find(chosen_elevator))) 162 if ((e = elevator_find(chosen_elevator)))
157 elevator_put(e); 163 elevator_put(e);
158 else 164 else
159 strcpy(chosen_elevator, "noop"); 165 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
160} 166}
161 167
162static int __init elevator_setup(char *str) 168static int __init elevator_setup(char *str)
@@ -304,15 +310,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
304 310
305 rq->flags &= ~REQ_STARTED; 311 rq->flags &= ~REQ_STARTED;
306 312
307 /* 313 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
308 * if this is the flush, requeue the original instead and drop the flush
309 */
310 if (rq->flags & REQ_BAR_FLUSH) {
311 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
312 rq = rq->end_io_data;
313 }
314
315 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
316} 314}
317 315
318static void elv_drain_elevator(request_queue_t *q) 316static void elv_drain_elevator(request_queue_t *q)
@@ -332,8 +330,19 @@ static void elv_drain_elevator(request_queue_t *q)
332void __elv_add_request(request_queue_t *q, struct request *rq, int where, 330void __elv_add_request(request_queue_t *q, struct request *rq, int where,
333 int plug) 331 int plug)
334{ 332{
333 struct list_head *pos;
334 unsigned ordseq;
335
336 if (q->ordcolor)
337 rq->flags |= REQ_ORDERED_COLOR;
338
335 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 339 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
336 /* 340 /*
341 * toggle ordered color
342 */
343 q->ordcolor ^= 1;
344
345 /*
337 * barriers implicitly indicate back insertion 346 * barriers implicitly indicate back insertion
338 */ 347 */
339 if (where == ELEVATOR_INSERT_SORT) 348 if (where == ELEVATOR_INSERT_SORT)
@@ -393,6 +402,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
393 q->elevator->ops->elevator_add_req_fn(q, rq); 402 q->elevator->ops->elevator_add_req_fn(q, rq);
394 break; 403 break;
395 404
405 case ELEVATOR_INSERT_REQUEUE:
406 /*
407 * If ordered flush isn't in progress, we do front
408 * insertion; otherwise, requests should be requeued
409 * in ordseq order.
410 */
411 rq->flags |= REQ_SOFTBARRIER;
412
413 if (q->ordseq == 0) {
414 list_add(&rq->queuelist, &q->queue_head);
415 break;
416 }
417
418 ordseq = blk_ordered_req_seq(rq);
419
420 list_for_each(pos, &q->queue_head) {
421 struct request *pos_rq = list_entry_rq(pos);
422 if (ordseq <= blk_ordered_req_seq(pos_rq))
423 break;
424 }
425
426 list_add_tail(&rq->queuelist, pos);
427 break;
428
396 default: 429 default:
397 printk(KERN_ERR "%s: bad insertion point %d\n", 430 printk(KERN_ERR "%s: bad insertion point %d\n",
398 __FUNCTION__, where); 431 __FUNCTION__, where);
@@ -422,25 +455,16 @@ static inline struct request *__elv_next_request(request_queue_t *q)
422{ 455{
423 struct request *rq; 456 struct request *rq;
424 457
425 if (unlikely(list_empty(&q->queue_head) && 458 while (1) {
426 !q->elevator->ops->elevator_dispatch_fn(q, 0))) 459 while (!list_empty(&q->queue_head)) {
427 return NULL; 460 rq = list_entry_rq(q->queue_head.next);
428 461 if (blk_do_ordered(q, &rq))
429 rq = list_entry_rq(q->queue_head.next); 462 return rq;
430 463 }
431 /*
432 * if this is a barrier write and the device has to issue a
433 * flush sequence to support it, check how far we are
434 */
435 if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
436 BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
437 464
438 if (q->ordered == QUEUE_ORDERED_FLUSH && 465 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
439 !blk_barrier_preflush(rq)) 466 return NULL;
440 rq = blk_start_pre_flush(q, rq);
441 } 467 }
442
443 return rq;
444} 468}
445 469
446struct request *elv_next_request(request_queue_t *q) 470struct request *elv_next_request(request_queue_t *q)
@@ -498,7 +522,7 @@ struct request *elv_next_request(request_queue_t *q)
498 blkdev_dequeue_request(rq); 522 blkdev_dequeue_request(rq);
499 rq->flags |= REQ_QUIET; 523 rq->flags |= REQ_QUIET;
500 end_that_request_chunk(rq, 0, nr_bytes); 524 end_that_request_chunk(rq, 0, nr_bytes);
501 end_that_request_last(rq); 525 end_that_request_last(rq, 0);
502 } else { 526 } else {
503 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 527 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
504 ret); 528 ret);
@@ -597,6 +621,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
597 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 621 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
598 e->ops->elevator_completed_req_fn(q, rq); 622 e->ops->elevator_completed_req_fn(q, rq);
599 } 623 }
624
625 /*
626 * Check if the queue is waiting for fs requests to be
627 * drained for flush sequence.
628 */
629 if (unlikely(q->ordseq)) {
630 struct request *first_rq = list_entry_rq(q->queue_head.next);
631 if (q->in_flight == 0 &&
632 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
633 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
634 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
635 q->request_fn(q);
636 }
637 }
600} 638}
601 639
602int elv_register_queue(struct request_queue *q) 640int elv_register_queue(struct request_queue *q)
diff --git a/block/genhd.c b/block/genhd.c
index f04609d553b8..db57546a709d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -38,34 +38,100 @@ static inline int major_to_index(int major)
38 return major % MAX_PROBE_HASH; 38 return major % MAX_PROBE_HASH;
39} 39}
40 40
41#ifdef CONFIG_PROC_FS 41struct blkdev_info {
42/* get block device names in somewhat random order */ 42 int index;
43int get_blkdev_list(char *p, int used) 43 struct blk_major_name *bd;
44};
45
46/*
47 * iterate over a list of blkdev_info structures. allows
48 * the major_names array to be iterated over from outside this file
49 * must be called with the block_subsys_sem held
50 */
51void *get_next_blkdev(void *dev)
52{
53 struct blkdev_info *info;
54
55 if (dev == NULL) {
56 info = kmalloc(sizeof(*info), GFP_KERNEL);
57 if (!info)
58 goto out;
59 info->index=0;
60 info->bd = major_names[info->index];
61 if (info->bd)
62 goto out;
63 } else {
64 info = dev;
65 }
66
67 while (info->index < ARRAY_SIZE(major_names)) {
68 if (info->bd)
69 info->bd = info->bd->next;
70 if (info->bd)
71 goto out;
72 /*
73 * No devices on this chain, move to the next
74 */
75 info->index++;
76 info->bd = (info->index < ARRAY_SIZE(major_names)) ?
77 major_names[info->index] : NULL;
78 if (info->bd)
79 goto out;
80 }
81
82out:
83 return info;
84}
85
86void *acquire_blkdev_list(void)
87{
88 down(&block_subsys_sem);
89 return get_next_blkdev(NULL);
90}
91
92void release_blkdev_list(void *dev)
93{
94 up(&block_subsys_sem);
95 kfree(dev);
96}
97
98
99/*
100 * Count the number of records in the blkdev_list.
101 * must be called with the block_subsys_sem held
102 */
103int count_blkdev_list(void)
44{ 104{
45 struct blk_major_name *n; 105 struct blk_major_name *n;
46 int i, len; 106 int i, count;
47 107
48 len = snprintf(p, (PAGE_SIZE-used), "\nBlock devices:\n"); 108 count = 0;
49 109
50 down(&block_subsys_sem);
51 for (i = 0; i < ARRAY_SIZE(major_names); i++) { 110 for (i = 0; i < ARRAY_SIZE(major_names); i++) {
52 for (n = major_names[i]; n; n = n->next) { 111 for (n = major_names[i]; n; n = n->next)
53 /* 112 count++;
54 * If the curent string plus the 5 extra characters
55 * in the line would run us off the page, then we're done
56 */
57 if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE)
58 goto page_full;
59 len += sprintf(p+len, "%3d %s\n",
60 n->major, n->name);
61 }
62 } 113 }
63page_full:
64 up(&block_subsys_sem);
65 114
66 return len; 115 return count;
67} 116}
68#endif 117
118/*
119 * extract the major and name values from a blkdev_info struct
120 * passed in as a void to *dev. Must be called with
121 * block_subsys_sem held
122 */
123int get_blkdev_info(void *dev, int *major, char **name)
124{
125 struct blkdev_info *info = dev;
126
127 if (info->bd == NULL)
128 return 1;
129
130 *major = info->bd->major;
131 *name = info->bd->name;
132 return 0;
133}
134
69 135
70int register_blkdev(unsigned int major, const char *name) 136int register_blkdev(unsigned int major, const char *name)
71{ 137{
@@ -358,7 +424,7 @@ static struct sysfs_ops disk_sysfs_ops = {
358static ssize_t disk_uevent_store(struct gendisk * disk, 424static ssize_t disk_uevent_store(struct gendisk * disk,
359 const char *buf, size_t count) 425 const char *buf, size_t count)
360{ 426{
361 kobject_hotplug(&disk->kobj, KOBJ_ADD); 427 kobject_uevent(&disk->kobj, KOBJ_ADD);
362 return count; 428 return count;
363} 429}
364static ssize_t disk_dev_read(struct gendisk * disk, char *page) 430static ssize_t disk_dev_read(struct gendisk * disk, char *page)
@@ -455,14 +521,14 @@ static struct kobj_type ktype_block = {
455 521
456extern struct kobj_type ktype_part; 522extern struct kobj_type ktype_part;
457 523
458static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) 524static int block_uevent_filter(struct kset *kset, struct kobject *kobj)
459{ 525{
460 struct kobj_type *ktype = get_ktype(kobj); 526 struct kobj_type *ktype = get_ktype(kobj);
461 527
462 return ((ktype == &ktype_block) || (ktype == &ktype_part)); 528 return ((ktype == &ktype_block) || (ktype == &ktype_part));
463} 529}
464 530
465static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, 531static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp,
466 int num_envp, char *buffer, int buffer_size) 532 int num_envp, char *buffer, int buffer_size)
467{ 533{
468 struct kobj_type *ktype = get_ktype(kobj); 534 struct kobj_type *ktype = get_ktype(kobj);
@@ -474,40 +540,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
474 540
475 if (ktype == &ktype_block) { 541 if (ktype == &ktype_block) {
476 disk = container_of(kobj, struct gendisk, kobj); 542 disk = container_of(kobj, struct gendisk, kobj);
477 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 543 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
478 &length, "MINOR=%u", disk->first_minor); 544 &length, "MINOR=%u", disk->first_minor);
479 } else if (ktype == &ktype_part) { 545 } else if (ktype == &ktype_part) {
480 disk = container_of(kobj->parent, struct gendisk, kobj); 546 disk = container_of(kobj->parent, struct gendisk, kobj);
481 part = container_of(kobj, struct hd_struct, kobj); 547 part = container_of(kobj, struct hd_struct, kobj);
482 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 548 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
483 &length, "MINOR=%u", 549 &length, "MINOR=%u",
484 disk->first_minor + part->partno); 550 disk->first_minor + part->partno);
485 } else 551 } else
486 return 0; 552 return 0;
487 553
488 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, 554 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
489 "MAJOR=%u", disk->major); 555 "MAJOR=%u", disk->major);
490 556
491 /* add physical device, backing this device */ 557 /* add physical device, backing this device */
492 physdev = disk->driverfs_dev; 558 physdev = disk->driverfs_dev;
493 if (physdev) { 559 if (physdev) {
494 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); 560 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
495 561
496 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 562 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
497 &length, "PHYSDEVPATH=%s", path); 563 &length, "PHYSDEVPATH=%s", path);
498 kfree(path); 564 kfree(path);
499 565
500 if (physdev->bus) 566 if (physdev->bus)
501 add_hotplug_env_var(envp, num_envp, &i, 567 add_uevent_var(envp, num_envp, &i,
502 buffer, buffer_size, &length, 568 buffer, buffer_size, &length,
503 "PHYSDEVBUS=%s", 569 "PHYSDEVBUS=%s",
504 physdev->bus->name); 570 physdev->bus->name);
505 571
506 if (physdev->driver) 572 if (physdev->driver)
507 add_hotplug_env_var(envp, num_envp, &i, 573 add_uevent_var(envp, num_envp, &i,
508 buffer, buffer_size, &length, 574 buffer, buffer_size, &length,
509 "PHYSDEVDRIVER=%s", 575 "PHYSDEVDRIVER=%s",
510 physdev->driver->name); 576 physdev->driver->name);
511 } 577 }
512 578
513 /* terminate, set to next free slot, shrink available space */ 579 /* terminate, set to next free slot, shrink available space */
@@ -520,13 +586,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
520 return 0; 586 return 0;
521} 587}
522 588
523static struct kset_hotplug_ops block_hotplug_ops = { 589static struct kset_uevent_ops block_uevent_ops = {
524 .filter = block_hotplug_filter, 590 .filter = block_uevent_filter,
525 .hotplug = block_hotplug, 591 .uevent = block_uevent,
526}; 592};
527 593
528/* declare block_subsys. */ 594/* declare block_subsys. */
529static decl_subsys(block, &ktype_block, &block_hotplug_ops); 595static decl_subsys(block, &ktype_block, &block_uevent_ops);
530 596
531 597
532/* 598/*
diff --git a/block/ioctl.c b/block/ioctl.c
index 6e278474f9a8..e1109491c234 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,6 +1,7 @@
1#include <linux/sched.h> /* for capable() */ 1#include <linux/capability.h>
2#include <linux/blkdev.h> 2#include <linux/blkdev.h>
3#include <linux/blkpg.h> 3#include <linux/blkpg.h>
4#include <linux/hdreg.h>
4#include <linux/backing-dev.h> 5#include <linux/backing-dev.h>
5#include <linux/buffer_head.h> 6#include <linux/buffer_head.h>
6#include <linux/smp_lock.h> 7#include <linux/smp_lock.h>
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
245 set_device_ro(bdev, n); 246 set_device_ro(bdev, n);
246 unlock_kernel(); 247 unlock_kernel();
247 return 0; 248 return 0;
249 case HDIO_GETGEO: {
250 struct hd_geometry geo;
251
252 if (!arg)
253 return -EINVAL;
254 if (!disk->fops->getgeo)
255 return -ENOTTY;
256
257 /*
258 * We need to set the startsect first, the driver may
259 * want to override it.
260 */
261 geo.start = get_start_sect(bdev);
262 ret = disk->fops->getgeo(bdev, &geo);
263 if (ret)
264 return ret;
265 if (copy_to_user((struct hd_geometry __user *)arg, &geo,
266 sizeof(geo)))
267 return -EFAULT;
268 return 0;
269 }
248 } 270 }
249 271
250 lock_kernel(); 272 lock_kernel();
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 99c9ca6d5992..8e27d0ab0d7c 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -26,7 +26,8 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/swap.h> 27#include <linux/swap.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/blkdev.h> 29#include <linux/interrupt.h>
30#include <linux/cpu.h>
30 31
31/* 32/*
32 * for max sense size 33 * for max sense size
@@ -36,6 +37,8 @@
36static void blk_unplug_work(void *data); 37static void blk_unplug_work(void *data);
37static void blk_unplug_timeout(unsigned long data); 38static void blk_unplug_timeout(unsigned long data);
38static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio);
39 42
40/* 43/*
41 * For the allocated request tables 44 * For the allocated request tables
@@ -60,13 +63,15 @@ static wait_queue_head_t congestion_wqh[2] = {
60/* 63/*
61 * Controlling structure to kblockd 64 * Controlling structure to kblockd
62 */ 65 */
63static struct workqueue_struct *kblockd_workqueue; 66static struct workqueue_struct *kblockd_workqueue;
64 67
65unsigned long blk_max_low_pfn, blk_max_pfn; 68unsigned long blk_max_low_pfn, blk_max_pfn;
66 69
67EXPORT_SYMBOL(blk_max_low_pfn); 70EXPORT_SYMBOL(blk_max_low_pfn);
68EXPORT_SYMBOL(blk_max_pfn); 71EXPORT_SYMBOL(blk_max_pfn);
69 72
73static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
74
70/* Amount of time in which a process may batch requests */ 75/* Amount of time in which a process may batch requests */
71#define BLK_BATCH_TIME (HZ/50UL) 76#define BLK_BATCH_TIME (HZ/50UL)
72 77
@@ -205,6 +210,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
205 210
206EXPORT_SYMBOL(blk_queue_merge_bvec); 211EXPORT_SYMBOL(blk_queue_merge_bvec);
207 212
213void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
214{
215 q->softirq_done_fn = fn;
216}
217
218EXPORT_SYMBOL(blk_queue_softirq_done);
219
208/** 220/**
209 * blk_queue_make_request - define an alternate make_request function for a device 221 * blk_queue_make_request - define an alternate make_request function for a device
210 * @q: the request queue for the device to be affected 222 * @q: the request queue for the device to be affected
@@ -239,7 +251,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
239 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
240 q->backing_dev_info.state = 0; 252 q->backing_dev_info.state = 0;
241 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
242 blk_queue_max_sectors(q, MAX_SECTORS); 254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
243 blk_queue_hardsect_size(q, 512); 255 blk_queue_hardsect_size(q, 512);
244 blk_queue_dma_alignment(q, 511); 256 blk_queue_dma_alignment(q, 511);
245 blk_queue_congestion_threshold(q); 257 blk_queue_congestion_threshold(q);
@@ -268,6 +280,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
268static inline void rq_init(request_queue_t *q, struct request *rq) 280static inline void rq_init(request_queue_t *q, struct request *rq)
269{ 281{
270 INIT_LIST_HEAD(&rq->queuelist); 282 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist);
271 284
272 rq->errors = 0; 285 rq->errors = 0;
273 rq->rq_status = RQ_ACTIVE; 286 rq->rq_status = RQ_ACTIVE;
@@ -284,12 +297,13 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
284 rq->sense = NULL; 297 rq->sense = NULL;
285 rq->end_io = NULL; 298 rq->end_io = NULL;
286 rq->end_io_data = NULL; 299 rq->end_io_data = NULL;
300 rq->completion_data = NULL;
287} 301}
288 302
289/** 303/**
290 * blk_queue_ordered - does this queue support ordered writes 304 * blk_queue_ordered - does this queue support ordered writes
291 * @q: the request queue 305 * @q: the request queue
292 * @flag: see below 306 * @ordered: one of QUEUE_ORDERED_*
293 * 307 *
294 * Description: 308 * Description:
295 * For journalled file systems, doing ordered writes on a commit 309 * For journalled file systems, doing ordered writes on a commit
@@ -298,28 +312,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
298 * feature should call this function and indicate so. 312 * feature should call this function and indicate so.
299 * 313 *
300 **/ 314 **/
301void blk_queue_ordered(request_queue_t *q, int flag) 315int blk_queue_ordered(request_queue_t *q, unsigned ordered,
302{ 316 prepare_flush_fn *prepare_flush_fn)
303 switch (flag) { 317{
304 case QUEUE_ORDERED_NONE: 318 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
305 if (q->flush_rq) 319 prepare_flush_fn == NULL) {
306 kmem_cache_free(request_cachep, q->flush_rq); 320 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
307 q->flush_rq = NULL; 321 return -EINVAL;
308 q->ordered = flag;
309 break;
310 case QUEUE_ORDERED_TAG:
311 q->ordered = flag;
312 break;
313 case QUEUE_ORDERED_FLUSH:
314 q->ordered = flag;
315 if (!q->flush_rq)
316 q->flush_rq = kmem_cache_alloc(request_cachep,
317 GFP_KERNEL);
318 break;
319 default:
320 printk("blk_queue_ordered: bad value %d\n", flag);
321 break;
322 } 322 }
323
324 if (ordered != QUEUE_ORDERED_NONE &&
325 ordered != QUEUE_ORDERED_DRAIN &&
326 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
327 ordered != QUEUE_ORDERED_DRAIN_FUA &&
328 ordered != QUEUE_ORDERED_TAG &&
329 ordered != QUEUE_ORDERED_TAG_FLUSH &&
330 ordered != QUEUE_ORDERED_TAG_FUA) {
331 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
332 return -EINVAL;
333 }
334
335 q->next_ordered = ordered;
336 q->prepare_flush_fn = prepare_flush_fn;
337
338 return 0;
323} 339}
324 340
325EXPORT_SYMBOL(blk_queue_ordered); 341EXPORT_SYMBOL(blk_queue_ordered);
@@ -344,167 +360,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
344/* 360/*
345 * Cache flushing for ordered writes handling 361 * Cache flushing for ordered writes handling
346 */ 362 */
347static void blk_pre_flush_end_io(struct request *flush_rq) 363inline unsigned blk_ordered_cur_seq(request_queue_t *q)
348{ 364{
349 struct request *rq = flush_rq->end_io_data; 365 if (!q->ordseq)
350 request_queue_t *q = rq->q; 366 return 0;
351 367 return 1 << ffz(q->ordseq);
352 elv_completed_request(q, flush_rq);
353
354 rq->flags |= REQ_BAR_PREFLUSH;
355
356 if (!flush_rq->errors)
357 elv_requeue_request(q, rq);
358 else {
359 q->end_flush_fn(q, flush_rq);
360 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
361 q->request_fn(q);
362 }
363} 368}
364 369
365static void blk_post_flush_end_io(struct request *flush_rq) 370unsigned blk_ordered_req_seq(struct request *rq)
366{ 371{
367 struct request *rq = flush_rq->end_io_data;
368 request_queue_t *q = rq->q; 372 request_queue_t *q = rq->q;
369 373
370 elv_completed_request(q, flush_rq); 374 BUG_ON(q->ordseq == 0);
371 375
372 rq->flags |= REQ_BAR_POSTFLUSH; 376 if (rq == &q->pre_flush_rq)
377 return QUEUE_ORDSEQ_PREFLUSH;
378 if (rq == &q->bar_rq)
379 return QUEUE_ORDSEQ_BAR;
380 if (rq == &q->post_flush_rq)
381 return QUEUE_ORDSEQ_POSTFLUSH;
373 382
374 q->end_flush_fn(q, flush_rq); 383 if ((rq->flags & REQ_ORDERED_COLOR) ==
375 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 384 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
376 q->request_fn(q); 385 return QUEUE_ORDSEQ_DRAIN;
386 else
387 return QUEUE_ORDSEQ_DONE;
377} 388}
378 389
379struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 390void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
380{ 391{
381 struct request *flush_rq = q->flush_rq; 392 struct request *rq;
382 393 int uptodate;
383 BUG_ON(!blk_barrier_rq(rq));
384 394
385 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 395 if (error && !q->orderr)
386 return NULL; 396 q->orderr = error;
387 397
388 rq_init(q, flush_rq); 398 BUG_ON(q->ordseq & seq);
389 flush_rq->elevator_private = NULL; 399 q->ordseq |= seq;
390 flush_rq->flags = REQ_BAR_FLUSH;
391 flush_rq->rq_disk = rq->rq_disk;
392 flush_rq->rl = NULL;
393 400
394 /* 401 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
395 * prepare_flush returns 0 if no flush is needed, just mark both 402 return;
396 * pre and post flush as done in that case
397 */
398 if (!q->prepare_flush_fn(q, flush_rq)) {
399 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
400 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
401 return rq;
402 }
403 403
404 /* 404 /*
405 * some drivers dequeue requests right away, some only after io 405 * Okay, sequence complete.
406 * completion. make sure the request is dequeued.
407 */ 406 */
408 if (!list_empty(&rq->queuelist)) 407 rq = q->orig_bar_rq;
409 blkdev_dequeue_request(rq); 408 uptodate = q->orderr ? q->orderr : 1;
410 409
411 flush_rq->end_io_data = rq; 410 q->ordseq = 0;
412 flush_rq->end_io = blk_pre_flush_end_io;
413 411
414 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 412 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
415 return flush_rq; 413 end_that_request_last(rq, uptodate);
416} 414}
417 415
418static void blk_start_post_flush(request_queue_t *q, struct request *rq) 416static void pre_flush_end_io(struct request *rq, int error)
419{ 417{
420 struct request *flush_rq = q->flush_rq; 418 elv_completed_request(rq->q, rq);
419 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
420}
421 421
422 BUG_ON(!blk_barrier_rq(rq)); 422static void bar_end_io(struct request *rq, int error)
423{
424 elv_completed_request(rq->q, rq);
425 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
426}
423 427
424 rq_init(q, flush_rq); 428static void post_flush_end_io(struct request *rq, int error)
425 flush_rq->elevator_private = NULL; 429{
426 flush_rq->flags = REQ_BAR_FLUSH; 430 elv_completed_request(rq->q, rq);
427 flush_rq->rq_disk = rq->rq_disk; 431 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
428 flush_rq->rl = NULL; 432}
429 433
430 if (q->prepare_flush_fn(q, flush_rq)) { 434static void queue_flush(request_queue_t *q, unsigned which)
431 flush_rq->end_io_data = rq; 435{
432 flush_rq->end_io = blk_post_flush_end_io; 436 struct request *rq;
437 rq_end_io_fn *end_io;
433 438
434 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 439 if (which == QUEUE_ORDERED_PREFLUSH) {
435 q->request_fn(q); 440 rq = &q->pre_flush_rq;
441 end_io = pre_flush_end_io;
442 } else {
443 rq = &q->post_flush_rq;
444 end_io = post_flush_end_io;
436 } 445 }
446
447 rq_init(q, rq);
448 rq->flags = REQ_HARDBARRIER;
449 rq->elevator_private = NULL;
450 rq->rq_disk = q->bar_rq.rq_disk;
451 rq->rl = NULL;
452 rq->end_io = end_io;
453 q->prepare_flush_fn(q, rq);
454
455 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
437} 456}
438 457
439static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 458static inline struct request *start_ordered(request_queue_t *q,
440 int sectors) 459 struct request *rq)
441{ 460{
442 if (sectors > rq->nr_sectors) 461 q->bi_size = 0;
443 sectors = rq->nr_sectors; 462 q->orderr = 0;
463 q->ordered = q->next_ordered;
464 q->ordseq |= QUEUE_ORDSEQ_STARTED;
465
466 /*
467 * Prep proxy barrier request.
468 */
469 blkdev_dequeue_request(rq);
470 q->orig_bar_rq = rq;
471 rq = &q->bar_rq;
472 rq_init(q, rq);
473 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
474 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
475 rq->elevator_private = NULL;
476 rq->rl = NULL;
477 init_request_from_bio(rq, q->orig_bar_rq->bio);
478 rq->end_io = bar_end_io;
479
480 /*
481 * Queue ordered sequence. As we stack them at the head, we
482 * need to queue in reverse order. Note that we rely on that
483 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
484 * request gets inbetween ordered sequence.
485 */
486 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
487 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
488 else
489 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
490
491 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
444 492
445 rq->nr_sectors -= sectors; 493 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
446 return rq->nr_sectors; 494 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
495 rq = &q->pre_flush_rq;
496 } else
497 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
498
499 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
500 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
501 else
502 rq = NULL;
503
504 return rq;
447} 505}
448 506
449static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 507int blk_do_ordered(request_queue_t *q, struct request **rqp)
450 int sectors, int queue_locked)
451{ 508{
452 if (q->ordered != QUEUE_ORDERED_FLUSH) 509 struct request *rq = *rqp, *allowed_rq;
453 return 0; 510 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
454 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
455 return 0;
456 if (blk_barrier_postflush(rq))
457 return 0;
458 511
459 if (!blk_check_end_barrier(q, rq, sectors)) { 512 if (!q->ordseq) {
460 unsigned long flags = 0; 513 if (!is_barrier)
514 return 1;
461 515
462 if (!queue_locked) 516 if (q->next_ordered != QUEUE_ORDERED_NONE) {
463 spin_lock_irqsave(q->queue_lock, flags); 517 *rqp = start_ordered(q, rq);
518 return 1;
519 } else {
520 /*
521 * This can happen when the queue switches to
522 * ORDERED_NONE while this request is on it.
523 */
524 blkdev_dequeue_request(rq);
525 end_that_request_first(rq, -EOPNOTSUPP,
526 rq->hard_nr_sectors);
527 end_that_request_last(rq, -EOPNOTSUPP);
528 *rqp = NULL;
529 return 0;
530 }
531 }
464 532
465 blk_start_post_flush(q, rq); 533 if (q->ordered & QUEUE_ORDERED_TAG) {
534 if (is_barrier && rq != &q->bar_rq)
535 *rqp = NULL;
536 return 1;
537 }
466 538
467 if (!queue_locked) 539 switch (blk_ordered_cur_seq(q)) {
468 spin_unlock_irqrestore(q->queue_lock, flags); 540 case QUEUE_ORDSEQ_PREFLUSH:
541 allowed_rq = &q->pre_flush_rq;
542 break;
543 case QUEUE_ORDSEQ_BAR:
544 allowed_rq = &q->bar_rq;
545 break;
546 case QUEUE_ORDSEQ_POSTFLUSH:
547 allowed_rq = &q->post_flush_rq;
548 break;
549 default:
550 allowed_rq = NULL;
551 break;
469 } 552 }
470 553
554 if (rq != allowed_rq &&
555 (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
556 rq == &q->post_flush_rq))
557 *rqp = NULL;
558
471 return 1; 559 return 1;
472} 560}
473 561
474/** 562static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
475 * blk_complete_barrier_rq - complete possible barrier request
476 * @q: the request queue for the device
477 * @rq: the request
478 * @sectors: number of sectors to complete
479 *
480 * Description:
481 * Used in driver end_io handling to determine whether to postpone
482 * completion of a barrier request until a post flush has been done. This
483 * is the unlocked variant, used if the caller doesn't already hold the
484 * queue lock.
485 **/
486int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
487{ 563{
488 return __blk_complete_barrier_rq(q, rq, sectors, 0); 564 request_queue_t *q = bio->bi_private;
565 struct bio_vec *bvec;
566 int i;
567
568 /*
569 * This is dry run, restore bio_sector and size. We'll finish
570 * this request again with the original bi_end_io after an
571 * error occurs or post flush is complete.
572 */
573 q->bi_size += bytes;
574
575 if (bio->bi_size)
576 return 1;
577
578 /* Rewind bvec's */
579 bio->bi_idx = 0;
580 bio_for_each_segment(bvec, bio, i) {
581 bvec->bv_len += bvec->bv_offset;
582 bvec->bv_offset = 0;
583 }
584
585 /* Reset bio */
586 set_bit(BIO_UPTODATE, &bio->bi_flags);
587 bio->bi_size = q->bi_size;
588 bio->bi_sector -= (q->bi_size >> 9);
589 q->bi_size = 0;
590
591 return 0;
489} 592}
490EXPORT_SYMBOL(blk_complete_barrier_rq);
491 593
492/** 594static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
493 * blk_complete_barrier_rq_locked - complete possible barrier request 595 unsigned int nbytes, int error)
494 * @q: the request queue for the device
495 * @rq: the request
496 * @sectors: number of sectors to complete
497 *
498 * Description:
499 * See blk_complete_barrier_rq(). This variant must be used if the caller
500 * holds the queue lock.
501 **/
502int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
503 int sectors)
504{ 596{
505 return __blk_complete_barrier_rq(q, rq, sectors, 1); 597 request_queue_t *q = rq->q;
598 bio_end_io_t *endio;
599 void *private;
600
601 if (&q->bar_rq != rq)
602 return 0;
603
604 /*
605 * Okay, this is the barrier request in progress, dry finish it.
606 */
607 if (error && !q->orderr)
608 q->orderr = error;
609
610 endio = bio->bi_end_io;
611 private = bio->bi_private;
612 bio->bi_end_io = flush_dry_bio_endio;
613 bio->bi_private = q;
614
615 bio_endio(bio, nbytes, error);
616
617 bio->bi_end_io = endio;
618 bio->bi_private = private;
619
620 return 1;
506} 621}
507EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
508 622
509/** 623/**
510 * blk_queue_bounce_limit - set bounce buffer limit for queue 624 * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -555,7 +669,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
555 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 669 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
556 } 670 }
557 671
558 q->max_sectors = q->max_hw_sectors = max_sectors; 672 if (BLK_DEF_MAX_SECTORS > max_sectors)
673 q->max_hw_sectors = q->max_sectors = max_sectors;
674 else {
675 q->max_sectors = BLK_DEF_MAX_SECTORS;
676 q->max_hw_sectors = max_sectors;
677 }
559} 678}
560 679
561EXPORT_SYMBOL(blk_queue_max_sectors); 680EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -657,8 +776,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
657void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) 776void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
658{ 777{
659 /* zero is "infinity" */ 778 /* zero is "infinity" */
660 t->max_sectors = t->max_hw_sectors = 779 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
661 min_not_zero(t->max_sectors,b->max_sectors); 780 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
662 781
663 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 782 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
664 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 783 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
@@ -1034,12 +1153,13 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1034 1153
1035EXPORT_SYMBOL(blk_queue_invalidate_tags); 1154EXPORT_SYMBOL(blk_queue_invalidate_tags);
1036 1155
1037static char *rq_flags[] = { 1156static const char * const rq_flags[] = {
1038 "REQ_RW", 1157 "REQ_RW",
1039 "REQ_FAILFAST", 1158 "REQ_FAILFAST",
1040 "REQ_SORTED", 1159 "REQ_SORTED",
1041 "REQ_SOFTBARRIER", 1160 "REQ_SOFTBARRIER",
1042 "REQ_HARDBARRIER", 1161 "REQ_HARDBARRIER",
1162 "REQ_FUA",
1043 "REQ_CMD", 1163 "REQ_CMD",
1044 "REQ_NOMERGE", 1164 "REQ_NOMERGE",
1045 "REQ_STARTED", 1165 "REQ_STARTED",
@@ -1059,6 +1179,7 @@ static char *rq_flags[] = {
1059 "REQ_PM_SUSPEND", 1179 "REQ_PM_SUSPEND",
1060 "REQ_PM_RESUME", 1180 "REQ_PM_RESUME",
1061 "REQ_PM_SHUTDOWN", 1181 "REQ_PM_SHUTDOWN",
1182 "REQ_ORDERED_COLOR",
1062}; 1183};
1063 1184
1064void blk_dump_rq_flags(struct request *rq, char *msg) 1185void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1293,9 +1414,15 @@ static inline int ll_new_hw_segment(request_queue_t *q,
1293static int ll_back_merge_fn(request_queue_t *q, struct request *req, 1414static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1294 struct bio *bio) 1415 struct bio *bio)
1295{ 1416{
1417 unsigned short max_sectors;
1296 int len; 1418 int len;
1297 1419
1298 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1420 if (unlikely(blk_pc_request(req)))
1421 max_sectors = q->max_hw_sectors;
1422 else
1423 max_sectors = q->max_sectors;
1424
1425 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1299 req->flags |= REQ_NOMERGE; 1426 req->flags |= REQ_NOMERGE;
1300 if (req == q->last_merge) 1427 if (req == q->last_merge)
1301 q->last_merge = NULL; 1428 q->last_merge = NULL;
@@ -1325,9 +1452,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1325static int ll_front_merge_fn(request_queue_t *q, struct request *req, 1452static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1326 struct bio *bio) 1453 struct bio *bio)
1327{ 1454{
1455 unsigned short max_sectors;
1328 int len; 1456 int len;
1329 1457
1330 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1458 if (unlikely(blk_pc_request(req)))
1459 max_sectors = q->max_hw_sectors;
1460 else
1461 max_sectors = q->max_sectors;
1462
1463
1464 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1331 req->flags |= REQ_NOMERGE; 1465 req->flags |= REQ_NOMERGE;
1332 if (req == q->last_merge) 1466 if (req == q->last_merge)
1333 q->last_merge = NULL; 1467 q->last_merge = NULL;
@@ -1623,8 +1757,6 @@ void blk_cleanup_queue(request_queue_t * q)
1623 if (q->queue_tags) 1757 if (q->queue_tags)
1624 __blk_queue_free_tags(q); 1758 __blk_queue_free_tags(q);
1625 1759
1626 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1627
1628 kmem_cache_free(requestq_cachep, q); 1760 kmem_cache_free(requestq_cachep, q);
1629} 1761}
1630 1762
@@ -1649,8 +1781,6 @@ static int blk_init_free_list(request_queue_t *q)
1649 return 0; 1781 return 0;
1650} 1782}
1651 1783
1652static int __make_request(request_queue_t *, struct bio *);
1653
1654request_queue_t *blk_alloc_queue(gfp_t gfp_mask) 1784request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1655{ 1785{
1656 return blk_alloc_queue_node(gfp_mask, -1); 1786 return blk_alloc_queue_node(gfp_mask, -1);
@@ -1890,40 +2020,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1890{ 2020{
1891 struct request *rq = NULL; 2021 struct request *rq = NULL;
1892 struct request_list *rl = &q->rq; 2022 struct request_list *rl = &q->rq;
1893 struct io_context *ioc = current_io_context(GFP_ATOMIC); 2023 struct io_context *ioc = NULL;
1894 int priv; 2024 int may_queue, priv;
1895
1896 if (rl->count[rw]+1 >= q->nr_requests) {
1897 /*
1898 * The queue will fill after this allocation, so set it as
1899 * full, and mark this process as "batching". This process
1900 * will be allowed to complete a batch of requests, others
1901 * will be blocked.
1902 */
1903 if (!blk_queue_full(q, rw)) {
1904 ioc_set_batching(q, ioc);
1905 blk_set_queue_full(q, rw);
1906 }
1907 }
1908 2025
1909 switch (elv_may_queue(q, rw, bio)) { 2026 may_queue = elv_may_queue(q, rw, bio);
1910 case ELV_MQUEUE_NO: 2027 if (may_queue == ELV_MQUEUE_NO)
1911 goto rq_starved; 2028 goto rq_starved;
1912 case ELV_MQUEUE_MAY:
1913 break;
1914 case ELV_MQUEUE_MUST:
1915 goto get_rq;
1916 }
1917 2029
1918 if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { 2030 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
1919 /* 2031 if (rl->count[rw]+1 >= q->nr_requests) {
1920 * The queue is full and the allocating process is not a 2032 ioc = current_io_context(GFP_ATOMIC);
1921 * "batcher", and not exempted by the IO scheduler 2033 /*
1922 */ 2034 * The queue will fill after this allocation, so set
1923 goto out; 2035 * it as full, and mark this process as "batching".
2036 * This process will be allowed to complete a batch of
2037 * requests, others will be blocked.
2038 */
2039 if (!blk_queue_full(q, rw)) {
2040 ioc_set_batching(q, ioc);
2041 blk_set_queue_full(q, rw);
2042 } else {
2043 if (may_queue != ELV_MQUEUE_MUST
2044 && !ioc_batching(q, ioc)) {
2045 /*
2046 * The queue is full and the allocating
2047 * process is not a "batcher", and not
2048 * exempted by the IO scheduler
2049 */
2050 goto out;
2051 }
2052 }
2053 }
2054 set_queue_congested(q, rw);
1924 } 2055 }
1925 2056
1926get_rq:
1927 /* 2057 /*
1928 * Only allow batching queuers to allocate up to 50% over the defined 2058 * Only allow batching queuers to allocate up to 50% over the defined
1929 * limit of requests, otherwise we could have thousands of requests 2059 * limit of requests, otherwise we could have thousands of requests
@@ -1934,8 +2064,6 @@ get_rq:
1934 2064
1935 rl->count[rw]++; 2065 rl->count[rw]++;
1936 rl->starved[rw] = 0; 2066 rl->starved[rw] = 0;
1937 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1938 set_queue_congested(q, rw);
1939 2067
1940 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2068 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1941 if (priv) 2069 if (priv)
@@ -1944,7 +2072,7 @@ get_rq:
1944 spin_unlock_irq(q->queue_lock); 2072 spin_unlock_irq(q->queue_lock);
1945 2073
1946 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2074 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1947 if (!rq) { 2075 if (unlikely(!rq)) {
1948 /* 2076 /*
1949 * Allocation failed presumably due to memory. Undo anything 2077 * Allocation failed presumably due to memory. Undo anything
1950 * we might have messed up. 2078 * we might have messed up.
@@ -1969,6 +2097,12 @@ rq_starved:
1969 goto out; 2097 goto out;
1970 } 2098 }
1971 2099
2100 /*
2101 * ioc may be NULL here, and ioc_batching will be false. That's
2102 * OK, if the queue is under the request limit then requests need
2103 * not count toward the nr_batch_requests limit. There will always
2104 * be some limit enforced by BLK_BATCH_TIME.
2105 */
1972 if (ioc_batching(q, ioc)) 2106 if (ioc_batching(q, ioc))
1973 ioc->nr_batch_requests--; 2107 ioc->nr_batch_requests--;
1974 2108
@@ -2144,7 +2278,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2144 struct bio *bio; 2278 struct bio *bio;
2145 int reading; 2279 int reading;
2146 2280
2147 if (len > (q->max_sectors << 9)) 2281 if (len > (q->max_hw_sectors << 9))
2148 return -EINVAL; 2282 return -EINVAL;
2149 if (!len || !ubuf) 2283 if (!len || !ubuf)
2150 return -EINVAL; 2284 return -EINVAL;
@@ -2259,7 +2393,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2259{ 2393{
2260 struct bio *bio; 2394 struct bio *bio;
2261 2395
2262 if (len > (q->max_sectors << 9)) 2396 if (len > (q->max_hw_sectors << 9))
2263 return -EINVAL; 2397 return -EINVAL;
2264 if (!len || !kbuf) 2398 if (!len || !kbuf)
2265 return -EINVAL; 2399 return -EINVAL;
@@ -2295,7 +2429,7 @@ EXPORT_SYMBOL(blk_rq_map_kern);
2295 */ 2429 */
2296void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, 2430void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2297 struct request *rq, int at_head, 2431 struct request *rq, int at_head,
2298 void (*done)(struct request *)) 2432 rq_end_io_fn *done)
2299{ 2433{
2300 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2434 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2301 2435
@@ -2306,6 +2440,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2306 generic_unplug_device(q); 2440 generic_unplug_device(q);
2307} 2441}
2308 2442
2443EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2444
2309/** 2445/**
2310 * blk_execute_rq - insert a request into queue for execution 2446 * blk_execute_rq - insert a request into queue for execution
2311 * @q: queue to insert the request in 2447 * @q: queue to insert the request in
@@ -2444,7 +2580,7 @@ void disk_round_stats(struct gendisk *disk)
2444/* 2580/*
2445 * queue lock must be held 2581 * queue lock must be held
2446 */ 2582 */
2447static void __blk_put_request(request_queue_t *q, struct request *req) 2583void __blk_put_request(request_queue_t *q, struct request *req)
2448{ 2584{
2449 struct request_list *rl = req->rl; 2585 struct request_list *rl = req->rl;
2450 2586
@@ -2473,6 +2609,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2473 } 2609 }
2474} 2610}
2475 2611
2612EXPORT_SYMBOL_GPL(__blk_put_request);
2613
2476void blk_put_request(struct request *req) 2614void blk_put_request(struct request *req)
2477{ 2615{
2478 unsigned long flags; 2616 unsigned long flags;
@@ -2495,7 +2633,7 @@ EXPORT_SYMBOL(blk_put_request);
2495 * blk_end_sync_rq - executes a completion event on a request 2633 * blk_end_sync_rq - executes a completion event on a request
2496 * @rq: request to complete 2634 * @rq: request to complete
2497 */ 2635 */
2498void blk_end_sync_rq(struct request *rq) 2636void blk_end_sync_rq(struct request *rq, int error)
2499{ 2637{
2500 struct completion *waiting = rq->waiting; 2638 struct completion *waiting = rq->waiting;
2501 2639
@@ -2609,29 +2747,35 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2609 return 0; 2747 return 0;
2610} 2748}
2611 2749
2612/** 2750static void init_request_from_bio(struct request *req, struct bio *bio)
2613 * blk_attempt_remerge - attempt to remerge active head with next request
2614 * @q: The &request_queue_t belonging to the device
2615 * @rq: The head request (usually)
2616 *
2617 * Description:
2618 * For head-active devices, the queue can easily be unplugged so quickly
2619 * that proper merging is not done on the front request. This may hurt
2620 * performance greatly for some devices. The block layer cannot safely
2621 * do merging on that first request for these queues, but the driver can
2622 * call this function and make it happen any way. Only the driver knows
2623 * when it is safe to do so.
2624 **/
2625void blk_attempt_remerge(request_queue_t *q, struct request *rq)
2626{ 2751{
2627 unsigned long flags; 2752 req->flags |= REQ_CMD;
2628 2753
2629 spin_lock_irqsave(q->queue_lock, flags); 2754 /*
2630 attempt_back_merge(q, rq); 2755 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2631 spin_unlock_irqrestore(q->queue_lock, flags); 2756 */
2632} 2757 if (bio_rw_ahead(bio) || bio_failfast(bio))
2758 req->flags |= REQ_FAILFAST;
2633 2759
2634EXPORT_SYMBOL(blk_attempt_remerge); 2760 /*
2761 * REQ_BARRIER implies no merging, but lets make it explicit
2762 */
2763 if (unlikely(bio_barrier(bio)))
2764 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2765
2766 req->errors = 0;
2767 req->hard_sector = req->sector = bio->bi_sector;
2768 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2769 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2770 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2771 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2772 req->buffer = bio_data(bio); /* see ->buffer comment above */
2773 req->waiting = NULL;
2774 req->bio = req->biotail = bio;
2775 req->ioprio = bio_prio(bio);
2776 req->rq_disk = bio->bi_bdev->bd_disk;
2777 req->start_time = jiffies;
2778}
2635 2779
2636static int __make_request(request_queue_t *q, struct bio *bio) 2780static int __make_request(request_queue_t *q, struct bio *bio)
2637{ 2781{
@@ -2658,7 +2802,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2658 spin_lock_prefetch(q->queue_lock); 2802 spin_lock_prefetch(q->queue_lock);
2659 2803
2660 barrier = bio_barrier(bio); 2804 barrier = bio_barrier(bio);
2661 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2805 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2662 err = -EOPNOTSUPP; 2806 err = -EOPNOTSUPP;
2663 goto end_io; 2807 goto end_io;
2664 } 2808 }
@@ -2728,33 +2872,7 @@ get_rq:
2728 * We don't worry about that case for efficiency. It won't happen 2872 * We don't worry about that case for efficiency. It won't happen
2729 * often, and the elevators are able to handle it. 2873 * often, and the elevators are able to handle it.
2730 */ 2874 */
2731 2875 init_request_from_bio(req, bio);
2732 req->flags |= REQ_CMD;
2733
2734 /*
2735 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2736 */
2737 if (bio_rw_ahead(bio) || bio_failfast(bio))
2738 req->flags |= REQ_FAILFAST;
2739
2740 /*
2741 * REQ_BARRIER implies no merging, but lets make it explicit
2742 */
2743 if (unlikely(barrier))
2744 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2745
2746 req->errors = 0;
2747 req->hard_sector = req->sector = sector;
2748 req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2749 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2750 req->nr_phys_segments = bio_phys_segments(q, bio);
2751 req->nr_hw_segments = bio_hw_segments(q, bio);
2752 req->buffer = bio_data(bio); /* see ->buffer comment above */
2753 req->waiting = NULL;
2754 req->bio = req->biotail = bio;
2755 req->ioprio = prio;
2756 req->rq_disk = bio->bi_bdev->bd_disk;
2757 req->start_time = jiffies;
2758 2876
2759 spin_lock_irq(q->queue_lock); 2877 spin_lock_irq(q->queue_lock);
2760 if (elv_queue_empty(q)) 2878 if (elv_queue_empty(q))
@@ -3045,7 +3163,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3045 if (nr_bytes >= bio->bi_size) { 3163 if (nr_bytes >= bio->bi_size) {
3046 req->bio = bio->bi_next; 3164 req->bio = bio->bi_next;
3047 nbytes = bio->bi_size; 3165 nbytes = bio->bi_size;
3048 bio_endio(bio, nbytes, error); 3166 if (!ordered_bio_endio(req, bio, nbytes, error))
3167 bio_endio(bio, nbytes, error);
3049 next_idx = 0; 3168 next_idx = 0;
3050 bio_nbytes = 0; 3169 bio_nbytes = 0;
3051 } else { 3170 } else {
@@ -3100,7 +3219,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3100 * if the request wasn't completed, update state 3219 * if the request wasn't completed, update state
3101 */ 3220 */
3102 if (bio_nbytes) { 3221 if (bio_nbytes) {
3103 bio_endio(bio, bio_nbytes, error); 3222 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3223 bio_endio(bio, bio_nbytes, error);
3104 bio->bi_idx += next_idx; 3224 bio->bi_idx += next_idx;
3105 bio_iovec(bio)->bv_offset += nr_bytes; 3225 bio_iovec(bio)->bv_offset += nr_bytes;
3106 bio_iovec(bio)->bv_len -= nr_bytes; 3226 bio_iovec(bio)->bv_len -= nr_bytes;
@@ -3155,11 +3275,100 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3155EXPORT_SYMBOL(end_that_request_chunk); 3275EXPORT_SYMBOL(end_that_request_chunk);
3156 3276
3157/* 3277/*
3278 * splice the completion data to a local structure and hand off to
3279 * process_completion_queue() to complete the requests
3280 */
3281static void blk_done_softirq(struct softirq_action *h)
3282{
3283 struct list_head *cpu_list;
3284 LIST_HEAD(local_list);
3285
3286 local_irq_disable();
3287 cpu_list = &__get_cpu_var(blk_cpu_done);
3288 list_splice_init(cpu_list, &local_list);
3289 local_irq_enable();
3290
3291 while (!list_empty(&local_list)) {
3292 struct request *rq = list_entry(local_list.next, struct request, donelist);
3293
3294 list_del_init(&rq->donelist);
3295 rq->q->softirq_done_fn(rq);
3296 }
3297}
3298
3299#ifdef CONFIG_HOTPLUG_CPU
3300
3301static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
3302 void *hcpu)
3303{
3304 /*
3305 * If a CPU goes away, splice its entries to the current CPU
3306 * and trigger a run of the softirq
3307 */
3308 if (action == CPU_DEAD) {
3309 int cpu = (unsigned long) hcpu;
3310
3311 local_irq_disable();
3312 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3313 &__get_cpu_var(blk_cpu_done));
3314 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3315 local_irq_enable();
3316 }
3317
3318 return NOTIFY_OK;
3319}
3320
3321
3322static struct notifier_block __devinitdata blk_cpu_notifier = {
3323 .notifier_call = blk_cpu_notify,
3324};
3325
3326#endif /* CONFIG_HOTPLUG_CPU */
3327
3328/**
3329 * blk_complete_request - end I/O on a request
3330 * @req: the request being processed
3331 *
3332 * Description:
3333 * Ends all I/O on a request. It does not handle partial completions,
3334 * unless the driver actually implements this in its completionc callback
3335 * through requeueing. Theh actual completion happens out-of-order,
3336 * through a softirq handler. The user must have registered a completion
3337 * callback through blk_queue_softirq_done().
3338 **/
3339
3340void blk_complete_request(struct request *req)
3341{
3342 struct list_head *cpu_list;
3343 unsigned long flags;
3344
3345 BUG_ON(!req->q->softirq_done_fn);
3346
3347 local_irq_save(flags);
3348
3349 cpu_list = &__get_cpu_var(blk_cpu_done);
3350 list_add_tail(&req->donelist, cpu_list);
3351 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3352
3353 local_irq_restore(flags);
3354}
3355
3356EXPORT_SYMBOL(blk_complete_request);
3357
3358/*
3158 * queue lock must be held 3359 * queue lock must be held
3159 */ 3360 */
3160void end_that_request_last(struct request *req) 3361void end_that_request_last(struct request *req, int uptodate)
3161{ 3362{
3162 struct gendisk *disk = req->rq_disk; 3363 struct gendisk *disk = req->rq_disk;
3364 int error;
3365
3366 /*
3367 * extend uptodate bool to allow < 0 value to be direct io error
3368 */
3369 error = 0;
3370 if (end_io_error(uptodate))
3371 error = !uptodate ? -EIO : uptodate;
3163 3372
3164 if (unlikely(laptop_mode) && blk_fs_request(req)) 3373 if (unlikely(laptop_mode) && blk_fs_request(req))
3165 laptop_io_completion(); 3374 laptop_io_completion();
@@ -3174,7 +3383,7 @@ void end_that_request_last(struct request *req)
3174 disk->in_flight--; 3383 disk->in_flight--;
3175 } 3384 }
3176 if (req->end_io) 3385 if (req->end_io)
3177 req->end_io(req); 3386 req->end_io(req, error);
3178 else 3387 else
3179 __blk_put_request(req->q, req); 3388 __blk_put_request(req->q, req);
3180} 3389}
@@ -3186,7 +3395,7 @@ void end_request(struct request *req, int uptodate)
3186 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3395 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3187 add_disk_randomness(req->rq_disk); 3396 add_disk_randomness(req->rq_disk);
3188 blkdev_dequeue_request(req); 3397 blkdev_dequeue_request(req);
3189 end_that_request_last(req); 3398 end_that_request_last(req, uptodate);
3190 } 3399 }
3191} 3400}
3192 3401
@@ -3224,6 +3433,8 @@ EXPORT_SYMBOL(kblockd_flush);
3224 3433
3225int __init blk_dev_init(void) 3434int __init blk_dev_init(void)
3226{ 3435{
3436 int i;
3437
3227 kblockd_workqueue = create_workqueue("kblockd"); 3438 kblockd_workqueue = create_workqueue("kblockd");
3228 if (!kblockd_workqueue) 3439 if (!kblockd_workqueue)
3229 panic("Failed to create kblockd\n"); 3440 panic("Failed to create kblockd\n");
@@ -3237,6 +3448,14 @@ int __init blk_dev_init(void)
3237 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3448 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3238 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3449 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3239 3450
3451 for (i = 0; i < NR_CPUS; i++)
3452 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3453
3454 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3455#ifdef CONFIG_HOTPLUG_CPU
3456 register_cpu_notifier(&blk_cpu_notifier);
3457#endif
3458
3240 blk_max_low_pfn = max_low_pfn; 3459 blk_max_low_pfn = max_low_pfn;
3241 blk_max_pfn = max_pfn; 3460 blk_max_pfn = max_pfn;
3242 3461
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 382dea7b224c..cc72210687eb 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -21,6 +21,7 @@
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/capability.h>
24#include <linux/completion.h> 25#include <linux/completion.h>
25#include <linux/cdrom.h> 26#include <linux/cdrom.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -46,7 +47,7 @@ EXPORT_SYMBOL(scsi_command_size);
46 47
47static int sg_get_version(int __user *p) 48static int sg_get_version(int __user *p)
48{ 49{
49 static int sg_version_num = 30527; 50 static const int sg_version_num = 30527;
50 return put_user(sg_version_num, p); 51 return put_user(sg_version_num, p);
51} 52}
52 53
@@ -190,16 +191,21 @@ static int verify_command(struct file *file, unsigned char *cmd)
190 safe_for_write(GPCMD_SET_STREAMING), 191 safe_for_write(GPCMD_SET_STREAMING),
191 }; 192 };
192 unsigned char type = cmd_type[cmd[0]]; 193 unsigned char type = cmd_type[cmd[0]];
194 int has_write_perm = 0;
193 195
194 /* Anybody who can open the device can do a read-safe command */ 196 /* Anybody who can open the device can do a read-safe command */
195 if (type & CMD_READ_SAFE) 197 if (type & CMD_READ_SAFE)
196 return 0; 198 return 0;
197 199
200 /*
201 * file can be NULL from ioctl_by_bdev()...
202 */
203 if (file)
204 has_write_perm = file->f_mode & FMODE_WRITE;
205
198 /* Write-safe commands just require a writable open.. */ 206 /* Write-safe commands just require a writable open.. */
199 if (type & CMD_WRITE_SAFE) { 207 if ((type & CMD_WRITE_SAFE) && has_write_perm)
200 if (file->f_mode & FMODE_WRITE) 208 return 0;
201 return 0;
202 }
203 209
204 /* And root can do any command.. */ 210 /* And root can do any command.. */
205 if (capable(CAP_SYS_RAWIO)) 211 if (capable(CAP_SYS_RAWIO))
@@ -233,7 +239,7 @@ static int sg_io(struct file *file, request_queue_t *q,
233 if (verify_command(file, cmd)) 239 if (verify_command(file, cmd))
234 return -EPERM; 240 return -EPERM;
235 241
236 if (hdr->dxfer_len > (q->max_sectors << 9)) 242 if (hdr->dxfer_len > (q->max_hw_sectors << 9))
237 return -EIO; 243 return -EIO;
238 244
239 if (hdr->dxfer_len) 245 if (hdr->dxfer_len)
@@ -442,11 +448,37 @@ error:
442 return err; 448 return err;
443} 449}
444 450
451
452/* Send basic block requests */
453static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data)
454{
455 struct request *rq;
456 int err;
457
458 rq = blk_get_request(q, WRITE, __GFP_WAIT);
459 rq->flags |= REQ_BLOCK_PC;
460 rq->data = NULL;
461 rq->data_len = 0;
462 rq->timeout = BLK_DEFAULT_TIMEOUT;
463 memset(rq->cmd, 0, sizeof(rq->cmd));
464 rq->cmd[0] = cmd;
465 rq->cmd[4] = data;
466 rq->cmd_len = 6;
467 err = blk_execute_rq(q, bd_disk, rq, 0);
468 blk_put_request(rq);
469
470 return err;
471}
472
473static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data)
474{
475 return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
476}
477
445int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) 478int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg)
446{ 479{
447 request_queue_t *q; 480 request_queue_t *q;
448 struct request *rq; 481 int err;
449 int close = 0, err;
450 482
451 q = bd_disk->queue; 483 q = bd_disk->queue;
452 if (!q) 484 if (!q)
@@ -564,19 +596,10 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd,
564 err = sg_scsi_ioctl(file, q, bd_disk, arg); 596 err = sg_scsi_ioctl(file, q, bd_disk, arg);
565 break; 597 break;
566 case CDROMCLOSETRAY: 598 case CDROMCLOSETRAY:
567 close = 1; 599 err = blk_send_start_stop(q, bd_disk, 0x03);
600 break;
568 case CDROMEJECT: 601 case CDROMEJECT:
569 rq = blk_get_request(q, WRITE, __GFP_WAIT); 602 err = blk_send_start_stop(q, bd_disk, 0x02);
570 rq->flags |= REQ_BLOCK_PC;
571 rq->data = NULL;
572 rq->data_len = 0;
573 rq->timeout = BLK_DEFAULT_TIMEOUT;
574 memset(rq->cmd, 0, sizeof(rq->cmd));
575 rq->cmd[0] = GPCMD_START_STOP_UNIT;
576 rq->cmd[4] = 0x02 + (close != 0);
577 rq->cmd_len = 6;
578 err = blk_execute_rq(q, bd_disk, rq, 0);
579 blk_put_request(rq);
580 break; 603 break;
581 default: 604 default:
582 err = -ENOTTY; 605 err = -ENOTTY;