aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig2
-rw-r--r--block/as-iosched.c144
-rw-r--r--block/cfq-iosched.c16
-rw-r--r--block/deadline-iosched.c8
-rw-r--r--block/elevator.c86
-rw-r--r--block/genhd.c48
-rw-r--r--block/ll_rw_blk.c576
-rw-r--r--block/scsi_ioctl.c4
8 files changed, 481 insertions, 403 deletions
diff --git a/block/Kconfig b/block/Kconfig
index eb48edb80c1d..377f6dd20e17 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,7 +5,7 @@
5#for instance. 5#for instance.
6config LBD 6config LBD
7 bool "Support for Large Block Devices" 7 bool "Support for Large Block Devices"
8 depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML 8 depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML
9 help 9 help
10 Say Y here if you want to attach large (bigger than 2TB) discs to 10 Say Y here if you want to attach large (bigger than 2TB) discs to
11 your machine, or if you want to have a raid or loopback device 11 your machine, or if you want to have a raid or loopback device
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 43fa20495688..8da3cf66894c 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -182,6 +182,9 @@ struct as_rq {
182 182
183static kmem_cache_t *arq_pool; 183static kmem_cache_t *arq_pool;
184 184
185static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
186static void as_antic_stop(struct as_data *ad);
187
185/* 188/*
186 * IO Context helper functions 189 * IO Context helper functions
187 */ 190 */
@@ -370,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
370 * existing request against the same sector), which can happen when using 373 * existing request against the same sector), which can happen when using
371 * direct IO, then return the alias. 374 * direct IO, then return the alias.
372 */ 375 */
373static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 376static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
374{ 377{
375 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; 378 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
376 struct rb_node *parent = NULL; 379 struct rb_node *parent = NULL;
@@ -397,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
397 return NULL; 400 return NULL;
398} 401}
399 402
403static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
404{
405 struct as_rq *alias;
406
407 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
408 as_move_to_dispatch(ad, alias);
409 as_antic_stop(ad);
410 }
411}
412
400static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 413static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
401{ 414{
402 if (!ON_RB(&arq->rb_node)) { 415 if (!ON_RB(&arq->rb_node)) {
@@ -1133,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1133 /* 1146 /*
1134 * take it off the sort and fifo list, add to dispatch queue 1147 * take it off the sort and fifo list, add to dispatch queue
1135 */ 1148 */
1136 while (!list_empty(&rq->queuelist)) {
1137 struct request *__rq = list_entry_rq(rq->queuelist.next);
1138 struct as_rq *__arq = RQ_DATA(__rq);
1139
1140 list_del(&__rq->queuelist);
1141
1142 elv_dispatch_add_tail(ad->q, __rq);
1143
1144 if (__arq->io_context && __arq->io_context->aic)
1145 atomic_inc(&__arq->io_context->aic->nr_dispatched);
1146
1147 WARN_ON(__arq->state != AS_RQ_QUEUED);
1148 __arq->state = AS_RQ_DISPATCHED;
1149
1150 ad->nr_dispatched++;
1151 }
1152
1153 as_remove_queued_request(ad->q, rq); 1149 as_remove_queued_request(ad->q, rq);
1154 WARN_ON(arq->state != AS_RQ_QUEUED); 1150 WARN_ON(arq->state != AS_RQ_QUEUED);
1155 1151
@@ -1326,49 +1322,12 @@ fifo_expired:
1326} 1322}
1327 1323
1328/* 1324/*
1329 * Add arq to a list behind alias
1330 */
1331static inline void
1332as_add_aliased_request(struct as_data *ad, struct as_rq *arq,
1333 struct as_rq *alias)
1334{
1335 struct request *req = arq->request;
1336 struct list_head *insert = alias->request->queuelist.prev;
1337
1338 /*
1339 * Transfer list of aliases
1340 */
1341 while (!list_empty(&req->queuelist)) {
1342 struct request *__rq = list_entry_rq(req->queuelist.next);
1343 struct as_rq *__arq = RQ_DATA(__rq);
1344
1345 list_move_tail(&__rq->queuelist, &alias->request->queuelist);
1346
1347 WARN_ON(__arq->state != AS_RQ_QUEUED);
1348 }
1349
1350 /*
1351 * Another request with the same start sector on the rbtree.
1352 * Link this request to that sector. They are untangled in
1353 * as_move_to_dispatch
1354 */
1355 list_add(&arq->request->queuelist, insert);
1356
1357 /*
1358 * Don't want to have to handle merges.
1359 */
1360 as_del_arq_hash(arq);
1361 arq->request->flags |= REQ_NOMERGE;
1362}
1363
1364/*
1365 * add arq to rbtree and fifo 1325 * add arq to rbtree and fifo
1366 */ 1326 */
1367static void as_add_request(request_queue_t *q, struct request *rq) 1327static void as_add_request(request_queue_t *q, struct request *rq)
1368{ 1328{
1369 struct as_data *ad = q->elevator->elevator_data; 1329 struct as_data *ad = q->elevator->elevator_data;
1370 struct as_rq *arq = RQ_DATA(rq); 1330 struct as_rq *arq = RQ_DATA(rq);
1371 struct as_rq *alias;
1372 int data_dir; 1331 int data_dir;
1373 1332
1374 arq->state = AS_RQ_NEW; 1333 arq->state = AS_RQ_NEW;
@@ -1387,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq)
1387 atomic_inc(&arq->io_context->aic->nr_queued); 1346 atomic_inc(&arq->io_context->aic->nr_queued);
1388 } 1347 }
1389 1348
1390 alias = as_add_arq_rb(ad, arq); 1349 as_add_arq_rb(ad, arq);
1391 if (!alias) { 1350 if (rq_mergeable(arq->request))
1392 /* 1351 as_add_arq_hash(ad, arq);
1393 * set expire time (only used for reads) and add to fifo list
1394 */
1395 arq->expires = jiffies + ad->fifo_expire[data_dir];
1396 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1397 1352
1398 if (rq_mergeable(arq->request)) 1353 /*
1399 as_add_arq_hash(ad, arq); 1354 * set expire time (only used for reads) and add to fifo list
1400 as_update_arq(ad, arq); /* keep state machine up to date */ 1355 */
1401 1356 arq->expires = jiffies + ad->fifo_expire[data_dir];
1402 } else { 1357 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1403 as_add_aliased_request(ad, arq, alias);
1404
1405 /*
1406 * have we been anticipating this request?
1407 * or does it come from the same process as the one we are
1408 * anticipating for?
1409 */
1410 if (ad->antic_status == ANTIC_WAIT_REQ
1411 || ad->antic_status == ANTIC_WAIT_NEXT) {
1412 if (as_can_break_anticipation(ad, arq))
1413 as_antic_stop(ad);
1414 }
1415 }
1416 1358
1359 as_update_arq(ad, arq); /* keep state machine up to date */
1417 arq->state = AS_RQ_QUEUED; 1360 arq->state = AS_RQ_QUEUED;
1418} 1361}
1419 1362
@@ -1536,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1536 * if the merge was a front merge, we need to reposition request 1479 * if the merge was a front merge, we need to reposition request
1537 */ 1480 */
1538 if (rq_rb_key(req) != arq->rb_key) { 1481 if (rq_rb_key(req) != arq->rb_key) {
1539 struct as_rq *alias, *next_arq = NULL;
1540
1541 if (ad->next_arq[arq->is_sync] == arq)
1542 next_arq = as_find_next_arq(ad, arq);
1543
1544 /*
1545 * Note! We should really be moving any old aliased requests
1546 * off this request and try to insert them into the rbtree. We
1547 * currently don't bother. Ditto the next function.
1548 */
1549 as_del_arq_rb(ad, arq); 1482 as_del_arq_rb(ad, arq);
1550 if ((alias = as_add_arq_rb(ad, arq))) { 1483 as_add_arq_rb(ad, arq);
1551 list_del_init(&arq->fifo);
1552 as_add_aliased_request(ad, arq, alias);
1553 if (next_arq)
1554 ad->next_arq[arq->is_sync] = next_arq;
1555 }
1556 /* 1484 /*
1557 * Note! At this stage of this and the next function, our next 1485 * Note! At this stage of this and the next function, our next
1558 * request may not be optimal - eg the request may have "grown" 1486 * request may not be optimal - eg the request may have "grown"
@@ -1579,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1579 as_add_arq_hash(ad, arq); 1507 as_add_arq_hash(ad, arq);
1580 1508
1581 if (rq_rb_key(req) != arq->rb_key) { 1509 if (rq_rb_key(req) != arq->rb_key) {
1582 struct as_rq *alias, *next_arq = NULL;
1583
1584 if (ad->next_arq[arq->is_sync] == arq)
1585 next_arq = as_find_next_arq(ad, arq);
1586
1587 as_del_arq_rb(ad, arq); 1510 as_del_arq_rb(ad, arq);
1588 if ((alias = as_add_arq_rb(ad, arq))) { 1511 as_add_arq_rb(ad, arq);
1589 list_del_init(&arq->fifo);
1590 as_add_aliased_request(ad, arq, alias);
1591 if (next_arq)
1592 ad->next_arq[arq->is_sync] = next_arq;
1593 }
1594 } 1512 }
1595 1513
1596 /* 1514 /*
@@ -1610,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1610 } 1528 }
1611 1529
1612 /* 1530 /*
1613 * Transfer list of aliases
1614 */
1615 while (!list_empty(&next->queuelist)) {
1616 struct request *__rq = list_entry_rq(next->queuelist.next);
1617 struct as_rq *__arq = RQ_DATA(__rq);
1618
1619 list_move_tail(&__rq->queuelist, &req->queuelist);
1620
1621 WARN_ON(__arq->state != AS_RQ_QUEUED);
1622 }
1623
1624 /*
1625 * kill knowledge of next, this one is a goner 1531 * kill knowledge of next, this one is a goner
1626 */ 1532 */
1627 as_remove_queued_request(q, next); 1533 as_remove_queued_request(q, next);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ee0bb41694b0..74fae2daf87e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -25,15 +25,15 @@
25/* 25/*
26 * tunables 26 * tunables
27 */ 27 */
28static int cfq_quantum = 4; /* max queue in one round of service */ 28static const int cfq_quantum = 4; /* max queue in one round of service */
29static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 29static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
30static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 30static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
31static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 31static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
32static int cfq_back_penalty = 2; /* penalty of a backwards seek */ 32static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
33 33
34static int cfq_slice_sync = HZ / 10; 34static const int cfq_slice_sync = HZ / 10;
35static int cfq_slice_async = HZ / 25; 35static int cfq_slice_async = HZ / 25;
36static int cfq_slice_async_rq = 2; 36static const int cfq_slice_async_rq = 2;
37static int cfq_slice_idle = HZ / 100; 37static int cfq_slice_idle = HZ / 100;
38 38
39#define CFQ_IDLE_GRACE (HZ / 10) 39#define CFQ_IDLE_GRACE (HZ / 10)
@@ -45,7 +45,7 @@ static int cfq_slice_idle = HZ / 100;
45/* 45/*
46 * disable queueing at the driver/hardware level 46 * disable queueing at the driver/hardware level
47 */ 47 */
48static int cfq_max_depth = 2; 48static const int cfq_max_depth = 2;
49 49
50/* 50/*
51 * for the hash of cfqq inside the cfqd 51 * for the hash of cfqq inside the cfqd
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9cbec09e8415..27e494b1bf97 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -19,10 +19,10 @@
19/* 19/*
20 * See Documentation/block/deadline-iosched.txt 20 * See Documentation/block/deadline-iosched.txt
21 */ 21 */
22static int read_expire = HZ / 2; /* max time before a read is submitted. */ 22static const int read_expire = HZ / 2; /* max time before a read is submitted. */
23static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 23static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
24static int writes_starved = 2; /* max times reads can starve a write */ 24static const int writes_starved = 2; /* max times reads can starve a write */
25static int fifo_batch = 16; /* # of sequential requests treated as one 25static const int fifo_batch = 16; /* # of sequential requests treated as one
26 by the above parameters. For throughput. */ 26 by the above parameters. For throughput. */
27 27
28static const int deadline_hash_shift = 5; 28static const int deadline_hash_shift = 5;
diff --git a/block/elevator.c b/block/elevator.c
index 6c3fc8a10bf2..39dcccc82ada 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -304,15 +304,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
304 304
305 rq->flags &= ~REQ_STARTED; 305 rq->flags &= ~REQ_STARTED;
306 306
307 /* 307 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
308 * if this is the flush, requeue the original instead and drop the flush
309 */
310 if (rq->flags & REQ_BAR_FLUSH) {
311 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
312 rq = rq->end_io_data;
313 }
314
315 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
316} 308}
317 309
318static void elv_drain_elevator(request_queue_t *q) 310static void elv_drain_elevator(request_queue_t *q)
@@ -332,8 +324,19 @@ static void elv_drain_elevator(request_queue_t *q)
332void __elv_add_request(request_queue_t *q, struct request *rq, int where, 324void __elv_add_request(request_queue_t *q, struct request *rq, int where,
333 int plug) 325 int plug)
334{ 326{
327 struct list_head *pos;
328 unsigned ordseq;
329
330 if (q->ordcolor)
331 rq->flags |= REQ_ORDERED_COLOR;
332
335 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 333 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
336 /* 334 /*
335 * toggle ordered color
336 */
337 q->ordcolor ^= 1;
338
339 /*
337 * barriers implicitly indicate back insertion 340 * barriers implicitly indicate back insertion
338 */ 341 */
339 if (where == ELEVATOR_INSERT_SORT) 342 if (where == ELEVATOR_INSERT_SORT)
@@ -393,6 +396,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
393 q->elevator->ops->elevator_add_req_fn(q, rq); 396 q->elevator->ops->elevator_add_req_fn(q, rq);
394 break; 397 break;
395 398
399 case ELEVATOR_INSERT_REQUEUE:
400 /*
401 * If ordered flush isn't in progress, we do front
402 * insertion; otherwise, requests should be requeued
403 * in ordseq order.
404 */
405 rq->flags |= REQ_SOFTBARRIER;
406
407 if (q->ordseq == 0) {
408 list_add(&rq->queuelist, &q->queue_head);
409 break;
410 }
411
412 ordseq = blk_ordered_req_seq(rq);
413
414 list_for_each(pos, &q->queue_head) {
415 struct request *pos_rq = list_entry_rq(pos);
416 if (ordseq <= blk_ordered_req_seq(pos_rq))
417 break;
418 }
419
420 list_add_tail(&rq->queuelist, pos);
421 break;
422
396 default: 423 default:
397 printk(KERN_ERR "%s: bad insertion point %d\n", 424 printk(KERN_ERR "%s: bad insertion point %d\n",
398 __FUNCTION__, where); 425 __FUNCTION__, where);
@@ -422,25 +449,16 @@ static inline struct request *__elv_next_request(request_queue_t *q)
422{ 449{
423 struct request *rq; 450 struct request *rq;
424 451
425 if (unlikely(list_empty(&q->queue_head) && 452 while (1) {
426 !q->elevator->ops->elevator_dispatch_fn(q, 0))) 453 while (!list_empty(&q->queue_head)) {
427 return NULL; 454 rq = list_entry_rq(q->queue_head.next);
428 455 if (blk_do_ordered(q, &rq))
429 rq = list_entry_rq(q->queue_head.next); 456 return rq;
430 457 }
431 /*
432 * if this is a barrier write and the device has to issue a
433 * flush sequence to support it, check how far we are
434 */
435 if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
436 BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
437 458
438 if (q->ordered == QUEUE_ORDERED_FLUSH && 459 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
439 !blk_barrier_preflush(rq)) 460 return NULL;
440 rq = blk_start_pre_flush(q, rq);
441 } 461 }
442
443 return rq;
444} 462}
445 463
446struct request *elv_next_request(request_queue_t *q) 464struct request *elv_next_request(request_queue_t *q)
@@ -498,7 +516,7 @@ struct request *elv_next_request(request_queue_t *q)
498 blkdev_dequeue_request(rq); 516 blkdev_dequeue_request(rq);
499 rq->flags |= REQ_QUIET; 517 rq->flags |= REQ_QUIET;
500 end_that_request_chunk(rq, 0, nr_bytes); 518 end_that_request_chunk(rq, 0, nr_bytes);
501 end_that_request_last(rq); 519 end_that_request_last(rq, 0);
502 } else { 520 } else {
503 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 521 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
504 ret); 522 ret);
@@ -593,7 +611,21 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
593 * request is released from the driver, io must be done 611 * request is released from the driver, io must be done
594 */ 612 */
595 if (blk_account_rq(rq)) { 613 if (blk_account_rq(rq)) {
614 struct request *first_rq = list_entry_rq(q->queue_head.next);
615
596 q->in_flight--; 616 q->in_flight--;
617
618 /*
619 * Check if the queue is waiting for fs requests to be
620 * drained for flush sequence.
621 */
622 if (q->ordseq && q->in_flight == 0 &&
623 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
624 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
625 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
626 q->request_fn(q);
627 }
628
597 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 629 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
598 e->ops->elevator_completed_req_fn(q, rq); 630 e->ops->elevator_completed_req_fn(q, rq);
599 } 631 }
diff --git a/block/genhd.c b/block/genhd.c
index f04609d553b8..f1ed83f3f083 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -358,7 +358,7 @@ static struct sysfs_ops disk_sysfs_ops = {
358static ssize_t disk_uevent_store(struct gendisk * disk, 358static ssize_t disk_uevent_store(struct gendisk * disk,
359 const char *buf, size_t count) 359 const char *buf, size_t count)
360{ 360{
361 kobject_hotplug(&disk->kobj, KOBJ_ADD); 361 kobject_uevent(&disk->kobj, KOBJ_ADD);
362 return count; 362 return count;
363} 363}
364static ssize_t disk_dev_read(struct gendisk * disk, char *page) 364static ssize_t disk_dev_read(struct gendisk * disk, char *page)
@@ -455,14 +455,14 @@ static struct kobj_type ktype_block = {
455 455
456extern struct kobj_type ktype_part; 456extern struct kobj_type ktype_part;
457 457
458static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) 458static int block_uevent_filter(struct kset *kset, struct kobject *kobj)
459{ 459{
460 struct kobj_type *ktype = get_ktype(kobj); 460 struct kobj_type *ktype = get_ktype(kobj);
461 461
462 return ((ktype == &ktype_block) || (ktype == &ktype_part)); 462 return ((ktype == &ktype_block) || (ktype == &ktype_part));
463} 463}
464 464
465static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, 465static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp,
466 int num_envp, char *buffer, int buffer_size) 466 int num_envp, char *buffer, int buffer_size)
467{ 467{
468 struct kobj_type *ktype = get_ktype(kobj); 468 struct kobj_type *ktype = get_ktype(kobj);
@@ -474,40 +474,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
474 474
475 if (ktype == &ktype_block) { 475 if (ktype == &ktype_block) {
476 disk = container_of(kobj, struct gendisk, kobj); 476 disk = container_of(kobj, struct gendisk, kobj);
477 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 477 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
478 &length, "MINOR=%u", disk->first_minor); 478 &length, "MINOR=%u", disk->first_minor);
479 } else if (ktype == &ktype_part) { 479 } else if (ktype == &ktype_part) {
480 disk = container_of(kobj->parent, struct gendisk, kobj); 480 disk = container_of(kobj->parent, struct gendisk, kobj);
481 part = container_of(kobj, struct hd_struct, kobj); 481 part = container_of(kobj, struct hd_struct, kobj);
482 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 482 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
483 &length, "MINOR=%u", 483 &length, "MINOR=%u",
484 disk->first_minor + part->partno); 484 disk->first_minor + part->partno);
485 } else 485 } else
486 return 0; 486 return 0;
487 487
488 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, 488 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
489 "MAJOR=%u", disk->major); 489 "MAJOR=%u", disk->major);
490 490
491 /* add physical device, backing this device */ 491 /* add physical device, backing this device */
492 physdev = disk->driverfs_dev; 492 physdev = disk->driverfs_dev;
493 if (physdev) { 493 if (physdev) {
494 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); 494 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
495 495
496 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 496 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
497 &length, "PHYSDEVPATH=%s", path); 497 &length, "PHYSDEVPATH=%s", path);
498 kfree(path); 498 kfree(path);
499 499
500 if (physdev->bus) 500 if (physdev->bus)
501 add_hotplug_env_var(envp, num_envp, &i, 501 add_uevent_var(envp, num_envp, &i,
502 buffer, buffer_size, &length, 502 buffer, buffer_size, &length,
503 "PHYSDEVBUS=%s", 503 "PHYSDEVBUS=%s",
504 physdev->bus->name); 504 physdev->bus->name);
505 505
506 if (physdev->driver) 506 if (physdev->driver)
507 add_hotplug_env_var(envp, num_envp, &i, 507 add_uevent_var(envp, num_envp, &i,
508 buffer, buffer_size, &length, 508 buffer, buffer_size, &length,
509 "PHYSDEVDRIVER=%s", 509 "PHYSDEVDRIVER=%s",
510 physdev->driver->name); 510 physdev->driver->name);
511 } 511 }
512 512
513 /* terminate, set to next free slot, shrink available space */ 513 /* terminate, set to next free slot, shrink available space */
@@ -520,13 +520,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
520 return 0; 520 return 0;
521} 521}
522 522
523static struct kset_hotplug_ops block_hotplug_ops = { 523static struct kset_uevent_ops block_uevent_ops = {
524 .filter = block_hotplug_filter, 524 .filter = block_uevent_filter,
525 .hotplug = block_hotplug, 525 .uevent = block_uevent,
526}; 526};
527 527
528/* declare block_subsys. */ 528/* declare block_subsys. */
529static decl_subsys(block, &ktype_block, &block_hotplug_ops); 529static decl_subsys(block, &ktype_block, &block_uevent_ops);
530 530
531 531
532/* 532/*
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 99c9ca6d5992..91d3b4828c49 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -36,6 +36,8 @@
36static void blk_unplug_work(void *data); 36static void blk_unplug_work(void *data);
37static void blk_unplug_timeout(unsigned long data); 37static void blk_unplug_timeout(unsigned long data);
38static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 38static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
39static void init_request_from_bio(struct request *req, struct bio *bio);
40static int __make_request(request_queue_t *q, struct bio *bio);
39 41
40/* 42/*
41 * For the allocated request tables 43 * For the allocated request tables
@@ -239,7 +241,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
239 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 241 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
240 q->backing_dev_info.state = 0; 242 q->backing_dev_info.state = 0;
241 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 243 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
242 blk_queue_max_sectors(q, MAX_SECTORS); 244 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
243 blk_queue_hardsect_size(q, 512); 245 blk_queue_hardsect_size(q, 512);
244 blk_queue_dma_alignment(q, 511); 246 blk_queue_dma_alignment(q, 511);
245 blk_queue_congestion_threshold(q); 247 blk_queue_congestion_threshold(q);
@@ -288,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
288 290
289/** 291/**
290 * blk_queue_ordered - does this queue support ordered writes 292 * blk_queue_ordered - does this queue support ordered writes
291 * @q: the request queue 293 * @q: the request queue
292 * @flag: see below 294 * @ordered: one of QUEUE_ORDERED_*
293 * 295 *
294 * Description: 296 * Description:
295 * For journalled file systems, doing ordered writes on a commit 297 * For journalled file systems, doing ordered writes on a commit
@@ -298,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
298 * feature should call this function and indicate so. 300 * feature should call this function and indicate so.
299 * 301 *
300 **/ 302 **/
301void blk_queue_ordered(request_queue_t *q, int flag) 303int blk_queue_ordered(request_queue_t *q, unsigned ordered,
302{ 304 prepare_flush_fn *prepare_flush_fn)
303 switch (flag) { 305{
304 case QUEUE_ORDERED_NONE: 306 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
305 if (q->flush_rq) 307 prepare_flush_fn == NULL) {
306 kmem_cache_free(request_cachep, q->flush_rq); 308 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
307 q->flush_rq = NULL; 309 return -EINVAL;
308 q->ordered = flag; 310 }
309 break; 311
310 case QUEUE_ORDERED_TAG: 312 if (ordered != QUEUE_ORDERED_NONE &&
311 q->ordered = flag; 313 ordered != QUEUE_ORDERED_DRAIN &&
312 break; 314 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
313 case QUEUE_ORDERED_FLUSH: 315 ordered != QUEUE_ORDERED_DRAIN_FUA &&
314 q->ordered = flag; 316 ordered != QUEUE_ORDERED_TAG &&
315 if (!q->flush_rq) 317 ordered != QUEUE_ORDERED_TAG_FLUSH &&
316 q->flush_rq = kmem_cache_alloc(request_cachep, 318 ordered != QUEUE_ORDERED_TAG_FUA) {
317 GFP_KERNEL); 319 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
318 break; 320 return -EINVAL;
319 default:
320 printk("blk_queue_ordered: bad value %d\n", flag);
321 break;
322 } 321 }
322
323 q->next_ordered = ordered;
324 q->prepare_flush_fn = prepare_flush_fn;
325
326 return 0;
323} 327}
324 328
325EXPORT_SYMBOL(blk_queue_ordered); 329EXPORT_SYMBOL(blk_queue_ordered);
@@ -344,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
344/* 348/*
345 * Cache flushing for ordered writes handling 349 * Cache flushing for ordered writes handling
346 */ 350 */
347static void blk_pre_flush_end_io(struct request *flush_rq) 351inline unsigned blk_ordered_cur_seq(request_queue_t *q)
348{ 352{
349 struct request *rq = flush_rq->end_io_data; 353 if (!q->ordseq)
350 request_queue_t *q = rq->q; 354 return 0;
351 355 return 1 << ffz(q->ordseq);
352 elv_completed_request(q, flush_rq);
353
354 rq->flags |= REQ_BAR_PREFLUSH;
355
356 if (!flush_rq->errors)
357 elv_requeue_request(q, rq);
358 else {
359 q->end_flush_fn(q, flush_rq);
360 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
361 q->request_fn(q);
362 }
363} 356}
364 357
365static void blk_post_flush_end_io(struct request *flush_rq) 358unsigned blk_ordered_req_seq(struct request *rq)
366{ 359{
367 struct request *rq = flush_rq->end_io_data;
368 request_queue_t *q = rq->q; 360 request_queue_t *q = rq->q;
369 361
370 elv_completed_request(q, flush_rq); 362 BUG_ON(q->ordseq == 0);
371 363
372 rq->flags |= REQ_BAR_POSTFLUSH; 364 if (rq == &q->pre_flush_rq)
365 return QUEUE_ORDSEQ_PREFLUSH;
366 if (rq == &q->bar_rq)
367 return QUEUE_ORDSEQ_BAR;
368 if (rq == &q->post_flush_rq)
369 return QUEUE_ORDSEQ_POSTFLUSH;
373 370
374 q->end_flush_fn(q, flush_rq); 371 if ((rq->flags & REQ_ORDERED_COLOR) ==
375 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 372 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
376 q->request_fn(q); 373 return QUEUE_ORDSEQ_DRAIN;
374 else
375 return QUEUE_ORDSEQ_DONE;
377} 376}
378 377
379struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 378void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
380{ 379{
381 struct request *flush_rq = q->flush_rq; 380 struct request *rq;
382 381 int uptodate;
383 BUG_ON(!blk_barrier_rq(rq));
384 382
385 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 383 if (error && !q->orderr)
386 return NULL; 384 q->orderr = error;
387 385
388 rq_init(q, flush_rq); 386 BUG_ON(q->ordseq & seq);
389 flush_rq->elevator_private = NULL; 387 q->ordseq |= seq;
390 flush_rq->flags = REQ_BAR_FLUSH;
391 flush_rq->rq_disk = rq->rq_disk;
392 flush_rq->rl = NULL;
393 388
394 /* 389 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
395 * prepare_flush returns 0 if no flush is needed, just mark both 390 return;
396 * pre and post flush as done in that case
397 */
398 if (!q->prepare_flush_fn(q, flush_rq)) {
399 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
400 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
401 return rq;
402 }
403 391
404 /* 392 /*
405 * some drivers dequeue requests right away, some only after io 393 * Okay, sequence complete.
406 * completion. make sure the request is dequeued.
407 */ 394 */
408 if (!list_empty(&rq->queuelist)) 395 rq = q->orig_bar_rq;
409 blkdev_dequeue_request(rq); 396 uptodate = q->orderr ? q->orderr : 1;
410 397
411 flush_rq->end_io_data = rq; 398 q->ordseq = 0;
412 flush_rq->end_io = blk_pre_flush_end_io;
413 399
414 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 400 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
415 return flush_rq; 401 end_that_request_last(rq, uptodate);
416} 402}
417 403
418static void blk_start_post_flush(request_queue_t *q, struct request *rq) 404static void pre_flush_end_io(struct request *rq, int error)
419{ 405{
420 struct request *flush_rq = q->flush_rq; 406 elv_completed_request(rq->q, rq);
407 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
408}
421 409
422 BUG_ON(!blk_barrier_rq(rq)); 410static void bar_end_io(struct request *rq, int error)
411{
412 elv_completed_request(rq->q, rq);
413 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
414}
423 415
424 rq_init(q, flush_rq); 416static void post_flush_end_io(struct request *rq, int error)
425 flush_rq->elevator_private = NULL; 417{
426 flush_rq->flags = REQ_BAR_FLUSH; 418 elv_completed_request(rq->q, rq);
427 flush_rq->rq_disk = rq->rq_disk; 419 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
428 flush_rq->rl = NULL; 420}
429 421
430 if (q->prepare_flush_fn(q, flush_rq)) { 422static void queue_flush(request_queue_t *q, unsigned which)
431 flush_rq->end_io_data = rq; 423{
432 flush_rq->end_io = blk_post_flush_end_io; 424 struct request *rq;
425 rq_end_io_fn *end_io;
433 426
434 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 427 if (which == QUEUE_ORDERED_PREFLUSH) {
435 q->request_fn(q); 428 rq = &q->pre_flush_rq;
429 end_io = pre_flush_end_io;
430 } else {
431 rq = &q->post_flush_rq;
432 end_io = post_flush_end_io;
436 } 433 }
434
435 rq_init(q, rq);
436 rq->flags = REQ_HARDBARRIER;
437 rq->elevator_private = NULL;
438 rq->rq_disk = q->bar_rq.rq_disk;
439 rq->rl = NULL;
440 rq->end_io = end_io;
441 q->prepare_flush_fn(q, rq);
442
443 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
437} 444}
438 445
439static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 446static inline struct request *start_ordered(request_queue_t *q,
440 int sectors) 447 struct request *rq)
441{ 448{
442 if (sectors > rq->nr_sectors) 449 q->bi_size = 0;
443 sectors = rq->nr_sectors; 450 q->orderr = 0;
451 q->ordered = q->next_ordered;
452 q->ordseq |= QUEUE_ORDSEQ_STARTED;
453
454 /*
455 * Prep proxy barrier request.
456 */
457 blkdev_dequeue_request(rq);
458 q->orig_bar_rq = rq;
459 rq = &q->bar_rq;
460 rq_init(q, rq);
461 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
462 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
463 rq->elevator_private = NULL;
464 rq->rl = NULL;
465 init_request_from_bio(rq, q->orig_bar_rq->bio);
466 rq->end_io = bar_end_io;
467
468 /*
469 * Queue ordered sequence. As we stack them at the head, we
470 * need to queue in reverse order. Note that we rely on that
471 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
472 * request gets inbetween ordered sequence.
473 */
474 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
475 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
476 else
477 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
444 478
445 rq->nr_sectors -= sectors; 479 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
446 return rq->nr_sectors; 480
481 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
482 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
483 rq = &q->pre_flush_rq;
484 } else
485 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
486
487 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
488 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
489 else
490 rq = NULL;
491
492 return rq;
447} 493}
448 494
449static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 495int blk_do_ordered(request_queue_t *q, struct request **rqp)
450 int sectors, int queue_locked)
451{ 496{
452 if (q->ordered != QUEUE_ORDERED_FLUSH) 497 struct request *rq = *rqp, *allowed_rq;
453 return 0; 498 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
454 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
455 return 0;
456 if (blk_barrier_postflush(rq))
457 return 0;
458 499
459 if (!blk_check_end_barrier(q, rq, sectors)) { 500 if (!q->ordseq) {
460 unsigned long flags = 0; 501 if (!is_barrier)
502 return 1;
461 503
462 if (!queue_locked) 504 if (q->next_ordered != QUEUE_ORDERED_NONE) {
463 spin_lock_irqsave(q->queue_lock, flags); 505 *rqp = start_ordered(q, rq);
506 return 1;
507 } else {
508 /*
509 * This can happen when the queue switches to
510 * ORDERED_NONE while this request is on it.
511 */
512 blkdev_dequeue_request(rq);
513 end_that_request_first(rq, -EOPNOTSUPP,
514 rq->hard_nr_sectors);
515 end_that_request_last(rq, -EOPNOTSUPP);
516 *rqp = NULL;
517 return 0;
518 }
519 }
464 520
465 blk_start_post_flush(q, rq); 521 if (q->ordered & QUEUE_ORDERED_TAG) {
522 if (is_barrier && rq != &q->bar_rq)
523 *rqp = NULL;
524 return 1;
525 }
466 526
467 if (!queue_locked) 527 switch (blk_ordered_cur_seq(q)) {
468 spin_unlock_irqrestore(q->queue_lock, flags); 528 case QUEUE_ORDSEQ_PREFLUSH:
529 allowed_rq = &q->pre_flush_rq;
530 break;
531 case QUEUE_ORDSEQ_BAR:
532 allowed_rq = &q->bar_rq;
533 break;
534 case QUEUE_ORDSEQ_POSTFLUSH:
535 allowed_rq = &q->post_flush_rq;
536 break;
537 default:
538 allowed_rq = NULL;
539 break;
469 } 540 }
470 541
542 if (rq != allowed_rq &&
543 (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
544 rq == &q->post_flush_rq))
545 *rqp = NULL;
546
471 return 1; 547 return 1;
472} 548}
473 549
474/** 550static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
475 * blk_complete_barrier_rq - complete possible barrier request
476 * @q: the request queue for the device
477 * @rq: the request
478 * @sectors: number of sectors to complete
479 *
480 * Description:
481 * Used in driver end_io handling to determine whether to postpone
482 * completion of a barrier request until a post flush has been done. This
483 * is the unlocked variant, used if the caller doesn't already hold the
484 * queue lock.
485 **/
486int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
487{ 551{
488 return __blk_complete_barrier_rq(q, rq, sectors, 0); 552 request_queue_t *q = bio->bi_private;
553 struct bio_vec *bvec;
554 int i;
555
556 /*
557 * This is dry run, restore bio_sector and size. We'll finish
558 * this request again with the original bi_end_io after an
559 * error occurs or post flush is complete.
560 */
561 q->bi_size += bytes;
562
563 if (bio->bi_size)
564 return 1;
565
566 /* Rewind bvec's */
567 bio->bi_idx = 0;
568 bio_for_each_segment(bvec, bio, i) {
569 bvec->bv_len += bvec->bv_offset;
570 bvec->bv_offset = 0;
571 }
572
573 /* Reset bio */
574 set_bit(BIO_UPTODATE, &bio->bi_flags);
575 bio->bi_size = q->bi_size;
576 bio->bi_sector -= (q->bi_size >> 9);
577 q->bi_size = 0;
578
579 return 0;
489} 580}
490EXPORT_SYMBOL(blk_complete_barrier_rq);
491 581
492/** 582static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
493 * blk_complete_barrier_rq_locked - complete possible barrier request 583 unsigned int nbytes, int error)
494 * @q: the request queue for the device
495 * @rq: the request
496 * @sectors: number of sectors to complete
497 *
498 * Description:
499 * See blk_complete_barrier_rq(). This variant must be used if the caller
500 * holds the queue lock.
501 **/
502int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
503 int sectors)
504{ 584{
505 return __blk_complete_barrier_rq(q, rq, sectors, 1); 585 request_queue_t *q = rq->q;
586 bio_end_io_t *endio;
587 void *private;
588
589 if (&q->bar_rq != rq)
590 return 0;
591
592 /*
593 * Okay, this is the barrier request in progress, dry finish it.
594 */
595 if (error && !q->orderr)
596 q->orderr = error;
597
598 endio = bio->bi_end_io;
599 private = bio->bi_private;
600 bio->bi_end_io = flush_dry_bio_endio;
601 bio->bi_private = q;
602
603 bio_endio(bio, nbytes, error);
604
605 bio->bi_end_io = endio;
606 bio->bi_private = private;
607
608 return 1;
506} 609}
507EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
508 610
509/** 611/**
510 * blk_queue_bounce_limit - set bounce buffer limit for queue 612 * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -555,7 +657,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
555 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 657 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
556 } 658 }
557 659
558 q->max_sectors = q->max_hw_sectors = max_sectors; 660 if (BLK_DEF_MAX_SECTORS > max_sectors)
661 q->max_hw_sectors = q->max_sectors = max_sectors;
662 else {
663 q->max_sectors = BLK_DEF_MAX_SECTORS;
664 q->max_hw_sectors = max_sectors;
665 }
559} 666}
560 667
561EXPORT_SYMBOL(blk_queue_max_sectors); 668EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -657,8 +764,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
657void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) 764void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
658{ 765{
659 /* zero is "infinity" */ 766 /* zero is "infinity" */
660 t->max_sectors = t->max_hw_sectors = 767 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
661 min_not_zero(t->max_sectors,b->max_sectors); 768 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
662 769
663 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 770 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
664 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 771 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
@@ -1034,12 +1141,13 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1034 1141
1035EXPORT_SYMBOL(blk_queue_invalidate_tags); 1142EXPORT_SYMBOL(blk_queue_invalidate_tags);
1036 1143
1037static char *rq_flags[] = { 1144static const char * const rq_flags[] = {
1038 "REQ_RW", 1145 "REQ_RW",
1039 "REQ_FAILFAST", 1146 "REQ_FAILFAST",
1040 "REQ_SORTED", 1147 "REQ_SORTED",
1041 "REQ_SOFTBARRIER", 1148 "REQ_SOFTBARRIER",
1042 "REQ_HARDBARRIER", 1149 "REQ_HARDBARRIER",
1150 "REQ_FUA",
1043 "REQ_CMD", 1151 "REQ_CMD",
1044 "REQ_NOMERGE", 1152 "REQ_NOMERGE",
1045 "REQ_STARTED", 1153 "REQ_STARTED",
@@ -1059,6 +1167,7 @@ static char *rq_flags[] = {
1059 "REQ_PM_SUSPEND", 1167 "REQ_PM_SUSPEND",
1060 "REQ_PM_RESUME", 1168 "REQ_PM_RESUME",
1061 "REQ_PM_SHUTDOWN", 1169 "REQ_PM_SHUTDOWN",
1170 "REQ_ORDERED_COLOR",
1062}; 1171};
1063 1172
1064void blk_dump_rq_flags(struct request *rq, char *msg) 1173void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1293,9 +1402,15 @@ static inline int ll_new_hw_segment(request_queue_t *q,
1293static int ll_back_merge_fn(request_queue_t *q, struct request *req, 1402static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1294 struct bio *bio) 1403 struct bio *bio)
1295{ 1404{
1405 unsigned short max_sectors;
1296 int len; 1406 int len;
1297 1407
1298 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1408 if (unlikely(blk_pc_request(req)))
1409 max_sectors = q->max_hw_sectors;
1410 else
1411 max_sectors = q->max_sectors;
1412
1413 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1299 req->flags |= REQ_NOMERGE; 1414 req->flags |= REQ_NOMERGE;
1300 if (req == q->last_merge) 1415 if (req == q->last_merge)
1301 q->last_merge = NULL; 1416 q->last_merge = NULL;
@@ -1325,9 +1440,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1325static int ll_front_merge_fn(request_queue_t *q, struct request *req, 1440static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1326 struct bio *bio) 1441 struct bio *bio)
1327{ 1442{
1443 unsigned short max_sectors;
1328 int len; 1444 int len;
1329 1445
1330 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1446 if (unlikely(blk_pc_request(req)))
1447 max_sectors = q->max_hw_sectors;
1448 else
1449 max_sectors = q->max_sectors;
1450
1451
1452 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1331 req->flags |= REQ_NOMERGE; 1453 req->flags |= REQ_NOMERGE;
1332 if (req == q->last_merge) 1454 if (req == q->last_merge)
1333 q->last_merge = NULL; 1455 q->last_merge = NULL;
@@ -1623,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q)
1623 if (q->queue_tags) 1745 if (q->queue_tags)
1624 __blk_queue_free_tags(q); 1746 __blk_queue_free_tags(q);
1625 1747
1626 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1627
1628 kmem_cache_free(requestq_cachep, q); 1748 kmem_cache_free(requestq_cachep, q);
1629} 1749}
1630 1750
@@ -1649,8 +1769,6 @@ static int blk_init_free_list(request_queue_t *q)
1649 return 0; 1769 return 0;
1650} 1770}
1651 1771
1652static int __make_request(request_queue_t *, struct bio *);
1653
1654request_queue_t *blk_alloc_queue(gfp_t gfp_mask) 1772request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1655{ 1773{
1656 return blk_alloc_queue_node(gfp_mask, -1); 1774 return blk_alloc_queue_node(gfp_mask, -1);
@@ -1890,40 +2008,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1890{ 2008{
1891 struct request *rq = NULL; 2009 struct request *rq = NULL;
1892 struct request_list *rl = &q->rq; 2010 struct request_list *rl = &q->rq;
1893 struct io_context *ioc = current_io_context(GFP_ATOMIC); 2011 struct io_context *ioc = NULL;
1894 int priv; 2012 int may_queue, priv;
1895 2013
1896 if (rl->count[rw]+1 >= q->nr_requests) { 2014 may_queue = elv_may_queue(q, rw, bio);
1897 /* 2015 if (may_queue == ELV_MQUEUE_NO)
1898 * The queue will fill after this allocation, so set it as 2016 goto rq_starved;
1899 * full, and mark this process as "batching". This process
1900 * will be allowed to complete a batch of requests, others
1901 * will be blocked.
1902 */
1903 if (!blk_queue_full(q, rw)) {
1904 ioc_set_batching(q, ioc);
1905 blk_set_queue_full(q, rw);
1906 }
1907 }
1908 2017
1909 switch (elv_may_queue(q, rw, bio)) { 2018 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
1910 case ELV_MQUEUE_NO: 2019 if (rl->count[rw]+1 >= q->nr_requests) {
1911 goto rq_starved; 2020 ioc = current_io_context(GFP_ATOMIC);
1912 case ELV_MQUEUE_MAY: 2021 /*
1913 break; 2022 * The queue will fill after this allocation, so set
1914 case ELV_MQUEUE_MUST: 2023 * it as full, and mark this process as "batching".
1915 goto get_rq; 2024 * This process will be allowed to complete a batch of
1916 } 2025 * requests, others will be blocked.
1917 2026 */
1918 if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { 2027 if (!blk_queue_full(q, rw)) {
1919 /* 2028 ioc_set_batching(q, ioc);
1920 * The queue is full and the allocating process is not a 2029 blk_set_queue_full(q, rw);
1921 * "batcher", and not exempted by the IO scheduler 2030 } else {
1922 */ 2031 if (may_queue != ELV_MQUEUE_MUST
1923 goto out; 2032 && !ioc_batching(q, ioc)) {
2033 /*
2034 * The queue is full and the allocating
2035 * process is not a "batcher", and not
2036 * exempted by the IO scheduler
2037 */
2038 goto out;
2039 }
2040 }
2041 }
2042 set_queue_congested(q, rw);
1924 } 2043 }
1925 2044
1926get_rq:
1927 /* 2045 /*
1928 * Only allow batching queuers to allocate up to 50% over the defined 2046 * Only allow batching queuers to allocate up to 50% over the defined
1929 * limit of requests, otherwise we could have thousands of requests 2047 * limit of requests, otherwise we could have thousands of requests
@@ -1934,8 +2052,6 @@ get_rq:
1934 2052
1935 rl->count[rw]++; 2053 rl->count[rw]++;
1936 rl->starved[rw] = 0; 2054 rl->starved[rw] = 0;
1937 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1938 set_queue_congested(q, rw);
1939 2055
1940 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2056 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1941 if (priv) 2057 if (priv)
@@ -1944,7 +2060,7 @@ get_rq:
1944 spin_unlock_irq(q->queue_lock); 2060 spin_unlock_irq(q->queue_lock);
1945 2061
1946 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2062 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1947 if (!rq) { 2063 if (unlikely(!rq)) {
1948 /* 2064 /*
1949 * Allocation failed presumably due to memory. Undo anything 2065 * Allocation failed presumably due to memory. Undo anything
1950 * we might have messed up. 2066 * we might have messed up.
@@ -1969,6 +2085,12 @@ rq_starved:
1969 goto out; 2085 goto out;
1970 } 2086 }
1971 2087
2088 /*
2089 * ioc may be NULL here, and ioc_batching will be false. That's
2090 * OK, if the queue is under the request limit then requests need
2091 * not count toward the nr_batch_requests limit. There will always
2092 * be some limit enforced by BLK_BATCH_TIME.
2093 */
1972 if (ioc_batching(q, ioc)) 2094 if (ioc_batching(q, ioc))
1973 ioc->nr_batch_requests--; 2095 ioc->nr_batch_requests--;
1974 2096
@@ -2144,7 +2266,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2144 struct bio *bio; 2266 struct bio *bio;
2145 int reading; 2267 int reading;
2146 2268
2147 if (len > (q->max_sectors << 9)) 2269 if (len > (q->max_hw_sectors << 9))
2148 return -EINVAL; 2270 return -EINVAL;
2149 if (!len || !ubuf) 2271 if (!len || !ubuf)
2150 return -EINVAL; 2272 return -EINVAL;
@@ -2259,7 +2381,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2259{ 2381{
2260 struct bio *bio; 2382 struct bio *bio;
2261 2383
2262 if (len > (q->max_sectors << 9)) 2384 if (len > (q->max_hw_sectors << 9))
2263 return -EINVAL; 2385 return -EINVAL;
2264 if (!len || !kbuf) 2386 if (!len || !kbuf)
2265 return -EINVAL; 2387 return -EINVAL;
@@ -2295,7 +2417,7 @@ EXPORT_SYMBOL(blk_rq_map_kern);
2295 */ 2417 */
2296void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, 2418void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2297 struct request *rq, int at_head, 2419 struct request *rq, int at_head,
2298 void (*done)(struct request *)) 2420 rq_end_io_fn *done)
2299{ 2421{
2300 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2422 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2301 2423
@@ -2306,6 +2428,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2306 generic_unplug_device(q); 2428 generic_unplug_device(q);
2307} 2429}
2308 2430
2431EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2432
2309/** 2433/**
2310 * blk_execute_rq - insert a request into queue for execution 2434 * blk_execute_rq - insert a request into queue for execution
2311 * @q: queue to insert the request in 2435 * @q: queue to insert the request in
@@ -2444,7 +2568,7 @@ void disk_round_stats(struct gendisk *disk)
2444/* 2568/*
2445 * queue lock must be held 2569 * queue lock must be held
2446 */ 2570 */
2447static void __blk_put_request(request_queue_t *q, struct request *req) 2571void __blk_put_request(request_queue_t *q, struct request *req)
2448{ 2572{
2449 struct request_list *rl = req->rl; 2573 struct request_list *rl = req->rl;
2450 2574
@@ -2473,6 +2597,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2473 } 2597 }
2474} 2598}
2475 2599
2600EXPORT_SYMBOL_GPL(__blk_put_request);
2601
2476void blk_put_request(struct request *req) 2602void blk_put_request(struct request *req)
2477{ 2603{
2478 unsigned long flags; 2604 unsigned long flags;
@@ -2495,7 +2621,7 @@ EXPORT_SYMBOL(blk_put_request);
2495 * blk_end_sync_rq - executes a completion event on a request 2621 * blk_end_sync_rq - executes a completion event on a request
2496 * @rq: request to complete 2622 * @rq: request to complete
2497 */ 2623 */
2498void blk_end_sync_rq(struct request *rq) 2624void blk_end_sync_rq(struct request *rq, int error)
2499{ 2625{
2500 struct completion *waiting = rq->waiting; 2626 struct completion *waiting = rq->waiting;
2501 2627
@@ -2633,6 +2759,36 @@ void blk_attempt_remerge(request_queue_t *q, struct request *rq)
2633 2759
2634EXPORT_SYMBOL(blk_attempt_remerge); 2760EXPORT_SYMBOL(blk_attempt_remerge);
2635 2761
2762static void init_request_from_bio(struct request *req, struct bio *bio)
2763{
2764 req->flags |= REQ_CMD;
2765
2766 /*
2767 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2768 */
2769 if (bio_rw_ahead(bio) || bio_failfast(bio))
2770 req->flags |= REQ_FAILFAST;
2771
2772 /*
2773 * REQ_BARRIER implies no merging, but lets make it explicit
2774 */
2775 if (unlikely(bio_barrier(bio)))
2776 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2777
2778 req->errors = 0;
2779 req->hard_sector = req->sector = bio->bi_sector;
2780 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2781 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2782 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2783 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2784 req->buffer = bio_data(bio); /* see ->buffer comment above */
2785 req->waiting = NULL;
2786 req->bio = req->biotail = bio;
2787 req->ioprio = bio_prio(bio);
2788 req->rq_disk = bio->bi_bdev->bd_disk;
2789 req->start_time = jiffies;
2790}
2791
2636static int __make_request(request_queue_t *q, struct bio *bio) 2792static int __make_request(request_queue_t *q, struct bio *bio)
2637{ 2793{
2638 struct request *req; 2794 struct request *req;
@@ -2658,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2658 spin_lock_prefetch(q->queue_lock); 2814 spin_lock_prefetch(q->queue_lock);
2659 2815
2660 barrier = bio_barrier(bio); 2816 barrier = bio_barrier(bio);
2661 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2817 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2662 err = -EOPNOTSUPP; 2818 err = -EOPNOTSUPP;
2663 goto end_io; 2819 goto end_io;
2664 } 2820 }
@@ -2728,33 +2884,7 @@ get_rq:
2728 * We don't worry about that case for efficiency. It won't happen 2884 * We don't worry about that case for efficiency. It won't happen
2729 * often, and the elevators are able to handle it. 2885 * often, and the elevators are able to handle it.
2730 */ 2886 */
2731 2887 init_request_from_bio(req, bio);
2732 req->flags |= REQ_CMD;
2733
2734 /*
2735 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2736 */
2737 if (bio_rw_ahead(bio) || bio_failfast(bio))
2738 req->flags |= REQ_FAILFAST;
2739
2740 /*
2741 * REQ_BARRIER implies no merging, but lets make it explicit
2742 */
2743 if (unlikely(barrier))
2744 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2745
2746 req->errors = 0;
2747 req->hard_sector = req->sector = sector;
2748 req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2749 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2750 req->nr_phys_segments = bio_phys_segments(q, bio);
2751 req->nr_hw_segments = bio_hw_segments(q, bio);
2752 req->buffer = bio_data(bio); /* see ->buffer comment above */
2753 req->waiting = NULL;
2754 req->bio = req->biotail = bio;
2755 req->ioprio = prio;
2756 req->rq_disk = bio->bi_bdev->bd_disk;
2757 req->start_time = jiffies;
2758 2888
2759 spin_lock_irq(q->queue_lock); 2889 spin_lock_irq(q->queue_lock);
2760 if (elv_queue_empty(q)) 2890 if (elv_queue_empty(q))
@@ -3045,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3045 if (nr_bytes >= bio->bi_size) { 3175 if (nr_bytes >= bio->bi_size) {
3046 req->bio = bio->bi_next; 3176 req->bio = bio->bi_next;
3047 nbytes = bio->bi_size; 3177 nbytes = bio->bi_size;
3048 bio_endio(bio, nbytes, error); 3178 if (!ordered_bio_endio(req, bio, nbytes, error))
3179 bio_endio(bio, nbytes, error);
3049 next_idx = 0; 3180 next_idx = 0;
3050 bio_nbytes = 0; 3181 bio_nbytes = 0;
3051 } else { 3182 } else {
@@ -3100,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3100 * if the request wasn't completed, update state 3231 * if the request wasn't completed, update state
3101 */ 3232 */
3102 if (bio_nbytes) { 3233 if (bio_nbytes) {
3103 bio_endio(bio, bio_nbytes, error); 3234 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3235 bio_endio(bio, bio_nbytes, error);
3104 bio->bi_idx += next_idx; 3236 bio->bi_idx += next_idx;
3105 bio_iovec(bio)->bv_offset += nr_bytes; 3237 bio_iovec(bio)->bv_offset += nr_bytes;
3106 bio_iovec(bio)->bv_len -= nr_bytes; 3238 bio_iovec(bio)->bv_len -= nr_bytes;
@@ -3157,9 +3289,17 @@ EXPORT_SYMBOL(end_that_request_chunk);
3157/* 3289/*
3158 * queue lock must be held 3290 * queue lock must be held
3159 */ 3291 */
3160void end_that_request_last(struct request *req) 3292void end_that_request_last(struct request *req, int uptodate)
3161{ 3293{
3162 struct gendisk *disk = req->rq_disk; 3294 struct gendisk *disk = req->rq_disk;
3295 int error;
3296
3297 /*
3298 * extend uptodate bool to allow < 0 value to be direct io error
3299 */
3300 error = 0;
3301 if (end_io_error(uptodate))
3302 error = !uptodate ? -EIO : uptodate;
3163 3303
3164 if (unlikely(laptop_mode) && blk_fs_request(req)) 3304 if (unlikely(laptop_mode) && blk_fs_request(req))
3165 laptop_io_completion(); 3305 laptop_io_completion();
@@ -3174,7 +3314,7 @@ void end_that_request_last(struct request *req)
3174 disk->in_flight--; 3314 disk->in_flight--;
3175 } 3315 }
3176 if (req->end_io) 3316 if (req->end_io)
3177 req->end_io(req); 3317 req->end_io(req, error);
3178 else 3318 else
3179 __blk_put_request(req->q, req); 3319 __blk_put_request(req->q, req);
3180} 3320}
@@ -3186,7 +3326,7 @@ void end_request(struct request *req, int uptodate)
3186 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3326 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3187 add_disk_randomness(req->rq_disk); 3327 add_disk_randomness(req->rq_disk);
3188 blkdev_dequeue_request(req); 3328 blkdev_dequeue_request(req);
3189 end_that_request_last(req); 3329 end_that_request_last(req, uptodate);
3190 } 3330 }
3191} 3331}
3192 3332
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 6e7db2e79f42..c2ac36dfe4f3 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(scsi_command_size);
46 46
47static int sg_get_version(int __user *p) 47static int sg_get_version(int __user *p)
48{ 48{
49 static int sg_version_num = 30527; 49 static const int sg_version_num = 30527;
50 return put_user(sg_version_num, p); 50 return put_user(sg_version_num, p);
51} 51}
52 52
@@ -233,7 +233,7 @@ static int sg_io(struct file *file, request_queue_t *q,
233 if (verify_command(file, cmd)) 233 if (verify_command(file, cmd))
234 return -EPERM; 234 return -EPERM;
235 235
236 if (hdr->dxfer_len > (q->max_sectors << 9)) 236 if (hdr->dxfer_len > (q->max_hw_sectors << 9))
237 return -EIO; 237 return -EIO;
238 238
239 if (hdr->dxfer_len) 239 if (hdr->dxfer_len)