aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig2
-rw-r--r--block/as-iosched.c150
-rw-r--r--block/cfq-iosched.c76
-rw-r--r--block/deadline-iosched.c10
-rw-r--r--block/elevator.c160
-rw-r--r--block/genhd.c168
-rw-r--r--block/ioctl.c24
-rw-r--r--block/ll_rw_blk.c701
-rw-r--r--block/noop-iosched.c85
-rw-r--r--block/scsi_ioctl.c63
10 files changed, 908 insertions, 531 deletions
diff --git a/block/Kconfig b/block/Kconfig
index eb48edb80c1d..377f6dd20e17 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,7 +5,7 @@
5#for instance. 5#for instance.
6config LBD 6config LBD
7 bool "Support for Large Block Devices" 7 bool "Support for Large Block Devices"
8 depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML 8 depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML
9 help 9 help
10 Say Y here if you want to attach large (bigger than 2TB) discs to 10 Say Y here if you want to attach large (bigger than 2TB) discs to
11 your machine, or if you want to have a raid or loopback device 11 your machine, or if you want to have a raid or loopback device
diff --git a/block/as-iosched.c b/block/as-iosched.c
index a78e160b59a3..8da3cf66894c 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/drivers/block/as-iosched.c
3 *
4 * Anticipatory & deadline i/o scheduler. 2 * Anticipatory & deadline i/o scheduler.
5 * 3 *
6 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
@@ -184,6 +182,9 @@ struct as_rq {
184 182
185static kmem_cache_t *arq_pool; 183static kmem_cache_t *arq_pool;
186 184
185static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq);
186static void as_antic_stop(struct as_data *ad);
187
187/* 188/*
188 * IO Context helper functions 189 * IO Context helper functions
189 */ 190 */
@@ -372,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir)
372 * existing request against the same sector), which can happen when using 373 * existing request against the same sector), which can happen when using
373 * direct IO, then return the alias. 374 * direct IO, then return the alias.
374 */ 375 */
375static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) 376static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
376{ 377{
377 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; 378 struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;
378 struct rb_node *parent = NULL; 379 struct rb_node *parent = NULL;
@@ -399,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
399 return NULL; 400 return NULL;
400} 401}
401 402
403static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
404{
405 struct as_rq *alias;
406
407 while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) {
408 as_move_to_dispatch(ad, alias);
409 as_antic_stop(ad);
410 }
411}
412
402static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) 413static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
403{ 414{
404 if (!ON_RB(&arq->rb_node)) { 415 if (!ON_RB(&arq->rb_node)) {
@@ -1135,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
1135 /* 1146 /*
1136 * take it off the sort and fifo list, add to dispatch queue 1147 * take it off the sort and fifo list, add to dispatch queue
1137 */ 1148 */
1138 while (!list_empty(&rq->queuelist)) {
1139 struct request *__rq = list_entry_rq(rq->queuelist.next);
1140 struct as_rq *__arq = RQ_DATA(__rq);
1141
1142 list_del(&__rq->queuelist);
1143
1144 elv_dispatch_add_tail(ad->q, __rq);
1145
1146 if (__arq->io_context && __arq->io_context->aic)
1147 atomic_inc(&__arq->io_context->aic->nr_dispatched);
1148
1149 WARN_ON(__arq->state != AS_RQ_QUEUED);
1150 __arq->state = AS_RQ_DISPATCHED;
1151
1152 ad->nr_dispatched++;
1153 }
1154
1155 as_remove_queued_request(ad->q, rq); 1149 as_remove_queued_request(ad->q, rq);
1156 WARN_ON(arq->state != AS_RQ_QUEUED); 1150 WARN_ON(arq->state != AS_RQ_QUEUED);
1157 1151
@@ -1328,55 +1322,14 @@ fifo_expired:
1328} 1322}
1329 1323
1330/* 1324/*
1331 * Add arq to a list behind alias
1332 */
1333static inline void
1334as_add_aliased_request(struct as_data *ad, struct as_rq *arq,
1335 struct as_rq *alias)
1336{
1337 struct request *req = arq->request;
1338 struct list_head *insert = alias->request->queuelist.prev;
1339
1340 /*
1341 * Transfer list of aliases
1342 */
1343 while (!list_empty(&req->queuelist)) {
1344 struct request *__rq = list_entry_rq(req->queuelist.next);
1345 struct as_rq *__arq = RQ_DATA(__rq);
1346
1347 list_move_tail(&__rq->queuelist, &alias->request->queuelist);
1348
1349 WARN_ON(__arq->state != AS_RQ_QUEUED);
1350 }
1351
1352 /*
1353 * Another request with the same start sector on the rbtree.
1354 * Link this request to that sector. They are untangled in
1355 * as_move_to_dispatch
1356 */
1357 list_add(&arq->request->queuelist, insert);
1358
1359 /*
1360 * Don't want to have to handle merges.
1361 */
1362 as_del_arq_hash(arq);
1363 arq->request->flags |= REQ_NOMERGE;
1364}
1365
1366/*
1367 * add arq to rbtree and fifo 1325 * add arq to rbtree and fifo
1368 */ 1326 */
1369static void as_add_request(request_queue_t *q, struct request *rq) 1327static void as_add_request(request_queue_t *q, struct request *rq)
1370{ 1328{
1371 struct as_data *ad = q->elevator->elevator_data; 1329 struct as_data *ad = q->elevator->elevator_data;
1372 struct as_rq *arq = RQ_DATA(rq); 1330 struct as_rq *arq = RQ_DATA(rq);
1373 struct as_rq *alias;
1374 int data_dir; 1331 int data_dir;
1375 1332
1376 if (arq->state != AS_RQ_PRESCHED) {
1377 printk("arq->state: %d\n", arq->state);
1378 WARN_ON(1);
1379 }
1380 arq->state = AS_RQ_NEW; 1333 arq->state = AS_RQ_NEW;
1381 1334
1382 if (rq_data_dir(arq->request) == READ 1335 if (rq_data_dir(arq->request) == READ
@@ -1393,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq)
1393 atomic_inc(&arq->io_context->aic->nr_queued); 1346 atomic_inc(&arq->io_context->aic->nr_queued);
1394 } 1347 }
1395 1348
1396 alias = as_add_arq_rb(ad, arq); 1349 as_add_arq_rb(ad, arq);
1397 if (!alias) { 1350 if (rq_mergeable(arq->request))
1398 /* 1351 as_add_arq_hash(ad, arq);
1399 * set expire time (only used for reads) and add to fifo list
1400 */
1401 arq->expires = jiffies + ad->fifo_expire[data_dir];
1402 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1403
1404 if (rq_mergeable(arq->request))
1405 as_add_arq_hash(ad, arq);
1406 as_update_arq(ad, arq); /* keep state machine up to date */
1407 1352
1408 } else { 1353 /*
1409 as_add_aliased_request(ad, arq, alias); 1354 * set expire time (only used for reads) and add to fifo list
1410 1355 */
1411 /* 1356 arq->expires = jiffies + ad->fifo_expire[data_dir];
1412 * have we been anticipating this request? 1357 list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
1413 * or does it come from the same process as the one we are
1414 * anticipating for?
1415 */
1416 if (ad->antic_status == ANTIC_WAIT_REQ
1417 || ad->antic_status == ANTIC_WAIT_NEXT) {
1418 if (as_can_break_anticipation(ad, arq))
1419 as_antic_stop(ad);
1420 }
1421 }
1422 1358
1359 as_update_arq(ad, arq); /* keep state machine up to date */
1423 arq->state = AS_RQ_QUEUED; 1360 arq->state = AS_RQ_QUEUED;
1424} 1361}
1425 1362
@@ -1542,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1542 * if the merge was a front merge, we need to reposition request 1479 * if the merge was a front merge, we need to reposition request
1543 */ 1480 */
1544 if (rq_rb_key(req) != arq->rb_key) { 1481 if (rq_rb_key(req) != arq->rb_key) {
1545 struct as_rq *alias, *next_arq = NULL;
1546
1547 if (ad->next_arq[arq->is_sync] == arq)
1548 next_arq = as_find_next_arq(ad, arq);
1549
1550 /*
1551 * Note! We should really be moving any old aliased requests
1552 * off this request and try to insert them into the rbtree. We
1553 * currently don't bother. Ditto the next function.
1554 */
1555 as_del_arq_rb(ad, arq); 1482 as_del_arq_rb(ad, arq);
1556 if ((alias = as_add_arq_rb(ad, arq))) { 1483 as_add_arq_rb(ad, arq);
1557 list_del_init(&arq->fifo);
1558 as_add_aliased_request(ad, arq, alias);
1559 if (next_arq)
1560 ad->next_arq[arq->is_sync] = next_arq;
1561 }
1562 /* 1484 /*
1563 * Note! At this stage of this and the next function, our next 1485 * Note! At this stage of this and the next function, our next
1564 * request may not be optimal - eg the request may have "grown" 1486 * request may not be optimal - eg the request may have "grown"
@@ -1585,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1585 as_add_arq_hash(ad, arq); 1507 as_add_arq_hash(ad, arq);
1586 1508
1587 if (rq_rb_key(req) != arq->rb_key) { 1509 if (rq_rb_key(req) != arq->rb_key) {
1588 struct as_rq *alias, *next_arq = NULL;
1589
1590 if (ad->next_arq[arq->is_sync] == arq)
1591 next_arq = as_find_next_arq(ad, arq);
1592
1593 as_del_arq_rb(ad, arq); 1510 as_del_arq_rb(ad, arq);
1594 if ((alias = as_add_arq_rb(ad, arq))) { 1511 as_add_arq_rb(ad, arq);
1595 list_del_init(&arq->fifo);
1596 as_add_aliased_request(ad, arq, alias);
1597 if (next_arq)
1598 ad->next_arq[arq->is_sync] = next_arq;
1599 }
1600 } 1512 }
1601 1513
1602 /* 1514 /*
@@ -1616,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
1616 } 1528 }
1617 1529
1618 /* 1530 /*
1619 * Transfer list of aliases
1620 */
1621 while (!list_empty(&next->queuelist)) {
1622 struct request *__rq = list_entry_rq(next->queuelist.next);
1623 struct as_rq *__arq = RQ_DATA(__rq);
1624
1625 list_move_tail(&__rq->queuelist, &req->queuelist);
1626
1627 WARN_ON(__arq->state != AS_RQ_QUEUED);
1628 }
1629
1630 /*
1631 * kill knowledge of next, this one is a goner 1531 * kill knowledge of next, this one is a goner
1632 */ 1532 */
1633 as_remove_queued_request(q, next); 1533 as_remove_queued_request(q, next);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ecacca9c877e..74fae2daf87e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/drivers/block/cfq-iosched.c
3 *
4 * CFQ, or complete fairness queueing, disk scheduler. 2 * CFQ, or complete fairness queueing, disk scheduler.
5 * 3 *
6 * Based on ideas from a previously unfinished io 4 * Based on ideas from a previously unfinished io
@@ -27,15 +25,15 @@
27/* 25/*
28 * tunables 26 * tunables
29 */ 27 */
30static int cfq_quantum = 4; /* max queue in one round of service */ 28static const int cfq_quantum = 4; /* max queue in one round of service */
31static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ 29static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
32static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 30static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
33static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ 31static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
34static int cfq_back_penalty = 2; /* penalty of a backwards seek */ 32static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
35 33
36static int cfq_slice_sync = HZ / 10; 34static const int cfq_slice_sync = HZ / 10;
37static int cfq_slice_async = HZ / 25; 35static int cfq_slice_async = HZ / 25;
38static int cfq_slice_async_rq = 2; 36static const int cfq_slice_async_rq = 2;
39static int cfq_slice_idle = HZ / 100; 37static int cfq_slice_idle = HZ / 100;
40 38
41#define CFQ_IDLE_GRACE (HZ / 10) 39#define CFQ_IDLE_GRACE (HZ / 10)
@@ -47,7 +45,7 @@ static int cfq_slice_idle = HZ / 100;
47/* 45/*
48 * disable queueing at the driver/hardware level 46 * disable queueing at the driver/hardware level
49 */ 47 */
50static int cfq_max_depth = 2; 48static const int cfq_max_depth = 2;
51 49
52/* 50/*
53 * for the hash of cfqq inside the cfqd 51 * for the hash of cfqq inside the cfqd
@@ -861,8 +859,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
861 * store what was left of this slice, if the queue idled out 859 * store what was left of this slice, if the queue idled out
862 * or was preempted 860 * or was preempted
863 */ 861 */
864 if (time_after(now, cfqq->slice_end)) 862 if (time_after(cfqq->slice_end, now))
865 cfqq->slice_left = now - cfqq->slice_end; 863 cfqq->slice_left = cfqq->slice_end - now;
866 else 864 else
867 cfqq->slice_left = 0; 865 cfqq->slice_left = 0;
868 866
@@ -999,7 +997,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
999/* 997/*
1000 * get next queue for service 998 * get next queue for service
1001 */ 999 */
1002static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) 1000static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1003{ 1001{
1004 unsigned long now = jiffies; 1002 unsigned long now = jiffies;
1005 struct cfq_queue *cfqq; 1003 struct cfq_queue *cfqq;
@@ -1023,7 +1021,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
1023 */ 1021 */
1024 if (!RB_EMPTY(&cfqq->sort_list)) 1022 if (!RB_EMPTY(&cfqq->sort_list))
1025 goto keep_queue; 1023 goto keep_queue;
1026 else if (!force && cfq_cfqq_class_sync(cfqq) && 1024 else if (cfq_cfqq_class_sync(cfqq) &&
1027 time_before(now, cfqq->slice_end)) { 1025 time_before(now, cfqq->slice_end)) {
1028 if (cfq_arm_slice_timer(cfqd, cfqq)) 1026 if (cfq_arm_slice_timer(cfqd, cfqq))
1029 return NULL; 1027 return NULL;
@@ -1092,6 +1090,42 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1092} 1090}
1093 1091
1094static int 1092static int
1093cfq_forced_dispatch_cfqqs(struct list_head *list)
1094{
1095 int dispatched = 0;
1096 struct cfq_queue *cfqq, *next;
1097 struct cfq_rq *crq;
1098
1099 list_for_each_entry_safe(cfqq, next, list, cfq_list) {
1100 while ((crq = cfqq->next_crq)) {
1101 cfq_dispatch_insert(cfqq->cfqd->queue, crq);
1102 dispatched++;
1103 }
1104 BUG_ON(!list_empty(&cfqq->fifo));
1105 }
1106 return dispatched;
1107}
1108
1109static int
1110cfq_forced_dispatch(struct cfq_data *cfqd)
1111{
1112 int i, dispatched = 0;
1113
1114 for (i = 0; i < CFQ_PRIO_LISTS; i++)
1115 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]);
1116
1117 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr);
1118 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr);
1119 dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr);
1120
1121 cfq_slice_expired(cfqd, 0);
1122
1123 BUG_ON(cfqd->busy_queues);
1124
1125 return dispatched;
1126}
1127
1128static int
1095cfq_dispatch_requests(request_queue_t *q, int force) 1129cfq_dispatch_requests(request_queue_t *q, int force)
1096{ 1130{
1097 struct cfq_data *cfqd = q->elevator->elevator_data; 1131 struct cfq_data *cfqd = q->elevator->elevator_data;
@@ -1100,7 +1134,10 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1100 if (!cfqd->busy_queues) 1134 if (!cfqd->busy_queues)
1101 return 0; 1135 return 0;
1102 1136
1103 cfqq = cfq_select_queue(cfqd, force); 1137 if (unlikely(force))
1138 return cfq_forced_dispatch(cfqd);
1139
1140 cfqq = cfq_select_queue(cfqd);
1104 if (cfqq) { 1141 if (cfqq) {
1105 int max_dispatch; 1142 int max_dispatch;
1106 1143
@@ -1115,12 +1152,9 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1115 cfq_clear_cfqq_wait_request(cfqq); 1152 cfq_clear_cfqq_wait_request(cfqq);
1116 del_timer(&cfqd->idle_slice_timer); 1153 del_timer(&cfqd->idle_slice_timer);
1117 1154
1118 if (!force) { 1155 max_dispatch = cfqd->cfq_quantum;
1119 max_dispatch = cfqd->cfq_quantum; 1156 if (cfq_class_idle(cfqq))
1120 if (cfq_class_idle(cfqq)) 1157 max_dispatch = 1;
1121 max_dispatch = 1;
1122 } else
1123 max_dispatch = INT_MAX;
1124 1158
1125 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1159 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1126 } 1160 }
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 7929471d7df7..27e494b1bf97 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/drivers/block/deadline-iosched.c
3 *
4 * Deadline i/o scheduler. 2 * Deadline i/o scheduler.
5 * 3 *
6 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
@@ -21,10 +19,10 @@
21/* 19/*
22 * See Documentation/block/deadline-iosched.txt 20 * See Documentation/block/deadline-iosched.txt
23 */ 21 */
24static int read_expire = HZ / 2; /* max time before a read is submitted. */ 22static const int read_expire = HZ / 2; /* max time before a read is submitted. */
25static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ 23static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
26static int writes_starved = 2; /* max times reads can starve a write */ 24static const int writes_starved = 2; /* max times reads can starve a write */
27static int fifo_batch = 16; /* # of sequential requests treated as one 25static const int fifo_batch = 16; /* # of sequential requests treated as one
28 by the above parameters. For throughput. */ 26 by the above parameters. For throughput. */
29 27
30static const int deadline_hash_shift = 5; 28static const int deadline_hash_shift = 5;
diff --git a/block/elevator.c b/block/elevator.c
index d4a49a3df829..c9f424d5399c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/drivers/block/elevator.c
3 *
4 * Block device elevator/IO-scheduler. 2 * Block device elevator/IO-scheduler.
5 * 3 *
6 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
@@ -66,7 +64,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
66} 64}
67EXPORT_SYMBOL(elv_rq_merge_ok); 65EXPORT_SYMBOL(elv_rq_merge_ok);
68 66
69inline int elv_try_merge(struct request *__rq, struct bio *bio) 67static inline int elv_try_merge(struct request *__rq, struct bio *bio)
70{ 68{
71 int ret = ELEVATOR_NO_MERGE; 69 int ret = ELEVATOR_NO_MERGE;
72 70
@@ -82,7 +80,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
82 80
83 return ret; 81 return ret;
84} 82}
85EXPORT_SYMBOL(elv_try_merge);
86 83
87static struct elevator_type *elevator_find(const char *name) 84static struct elevator_type *elevator_find(const char *name)
88{ 85{
@@ -152,12 +149,20 @@ static void elevator_setup_default(void)
152 if (!chosen_elevator[0]) 149 if (!chosen_elevator[0])
153 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); 150 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
154 151
152 /*
153 * Be backwards-compatible with previous kernels, so users
154 * won't get the wrong elevator.
155 */
156 if (!strcmp(chosen_elevator, "as"))
157 strcpy(chosen_elevator, "anticipatory");
158
155 /* 159 /*
156 * If the given scheduler is not available, fall back to no-op. 160 * If the given scheduler is not available, fall back to the default
157 */ 161 */
158 if (!(e = elevator_find(chosen_elevator))) 162 if ((e = elevator_find(chosen_elevator)))
159 strcpy(chosen_elevator, "noop"); 163 elevator_put(e);
160 elevator_put(e); 164 else
165 strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
161} 166}
162 167
163static int __init elevator_setup(char *str) 168static int __init elevator_setup(char *str)
@@ -190,14 +195,14 @@ int elevator_init(request_queue_t *q, char *name)
190 195
191 eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); 196 eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL);
192 if (!eq) { 197 if (!eq) {
193 elevator_put(e->elevator_type); 198 elevator_put(e);
194 return -ENOMEM; 199 return -ENOMEM;
195 } 200 }
196 201
197 ret = elevator_attach(q, e, eq); 202 ret = elevator_attach(q, e, eq);
198 if (ret) { 203 if (ret) {
199 kfree(eq); 204 kfree(eq);
200 elevator_put(e->elevator_type); 205 elevator_put(e);
201 } 206 }
202 207
203 return ret; 208 return ret;
@@ -225,6 +230,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
225 230
226 if (q->last_merge == rq) 231 if (q->last_merge == rq)
227 q->last_merge = NULL; 232 q->last_merge = NULL;
233 q->nr_sorted--;
228 234
229 boundary = q->end_sector; 235 boundary = q->end_sector;
230 236
@@ -283,6 +289,7 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
283 289
284 if (e->ops->elevator_merge_req_fn) 290 if (e->ops->elevator_merge_req_fn)
285 e->ops->elevator_merge_req_fn(q, rq, next); 291 e->ops->elevator_merge_req_fn(q, rq, next);
292 q->nr_sorted--;
286 293
287 q->last_merge = rq; 294 q->last_merge = rq;
288} 295}
@@ -303,22 +310,39 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
303 310
304 rq->flags &= ~REQ_STARTED; 311 rq->flags &= ~REQ_STARTED;
305 312
306 /* 313 __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
307 * if this is the flush, requeue the original instead and drop the flush 314}
308 */
309 if (rq->flags & REQ_BAR_FLUSH) {
310 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
311 rq = rq->end_io_data;
312 }
313 315
314 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); 316static void elv_drain_elevator(request_queue_t *q)
317{
318 static int printed;
319 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
320 ;
321 if (q->nr_sorted == 0)
322 return;
323 if (printed++ < 10) {
324 printk(KERN_ERR "%s: forced dispatching is broken "
325 "(nr_sorted=%u), please report this\n",
326 q->elevator->elevator_type->elevator_name, q->nr_sorted);
327 }
315} 328}
316 329
317void __elv_add_request(request_queue_t *q, struct request *rq, int where, 330void __elv_add_request(request_queue_t *q, struct request *rq, int where,
318 int plug) 331 int plug)
319{ 332{
333 struct list_head *pos;
334 unsigned ordseq;
335
336 if (q->ordcolor)
337 rq->flags |= REQ_ORDERED_COLOR;
338
320 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 339 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
321 /* 340 /*
341 * toggle ordered color
342 */
343 q->ordcolor ^= 1;
344
345 /*
322 * barriers implicitly indicate back insertion 346 * barriers implicitly indicate back insertion
323 */ 347 */
324 if (where == ELEVATOR_INSERT_SORT) 348 if (where == ELEVATOR_INSERT_SORT)
@@ -348,9 +372,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
348 372
349 case ELEVATOR_INSERT_BACK: 373 case ELEVATOR_INSERT_BACK:
350 rq->flags |= REQ_SOFTBARRIER; 374 rq->flags |= REQ_SOFTBARRIER;
351 375 elv_drain_elevator(q);
352 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
353 ;
354 list_add_tail(&rq->queuelist, &q->queue_head); 376 list_add_tail(&rq->queuelist, &q->queue_head);
355 /* 377 /*
356 * We kick the queue here for the following reasons. 378 * We kick the queue here for the following reasons.
@@ -369,6 +391,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
369 case ELEVATOR_INSERT_SORT: 391 case ELEVATOR_INSERT_SORT:
370 BUG_ON(!blk_fs_request(rq)); 392 BUG_ON(!blk_fs_request(rq));
371 rq->flags |= REQ_SORTED; 393 rq->flags |= REQ_SORTED;
394 q->nr_sorted++;
372 if (q->last_merge == NULL && rq_mergeable(rq)) 395 if (q->last_merge == NULL && rq_mergeable(rq))
373 q->last_merge = rq; 396 q->last_merge = rq;
374 /* 397 /*
@@ -379,6 +402,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
379 q->elevator->ops->elevator_add_req_fn(q, rq); 402 q->elevator->ops->elevator_add_req_fn(q, rq);
380 break; 403 break;
381 404
405 case ELEVATOR_INSERT_REQUEUE:
406 /*
407 * If ordered flush isn't in progress, we do front
408 * insertion; otherwise, requests should be requeued
409 * in ordseq order.
410 */
411 rq->flags |= REQ_SOFTBARRIER;
412
413 if (q->ordseq == 0) {
414 list_add(&rq->queuelist, &q->queue_head);
415 break;
416 }
417
418 ordseq = blk_ordered_req_seq(rq);
419
420 list_for_each(pos, &q->queue_head) {
421 struct request *pos_rq = list_entry_rq(pos);
422 if (ordseq <= blk_ordered_req_seq(pos_rq))
423 break;
424 }
425
426 list_add_tail(&rq->queuelist, pos);
427 break;
428
382 default: 429 default:
383 printk(KERN_ERR "%s: bad insertion point %d\n", 430 printk(KERN_ERR "%s: bad insertion point %d\n",
384 __FUNCTION__, where); 431 __FUNCTION__, where);
@@ -408,25 +455,16 @@ static inline struct request *__elv_next_request(request_queue_t *q)
408{ 455{
409 struct request *rq; 456 struct request *rq;
410 457
411 if (unlikely(list_empty(&q->queue_head) && 458 while (1) {
412 !q->elevator->ops->elevator_dispatch_fn(q, 0))) 459 while (!list_empty(&q->queue_head)) {
413 return NULL; 460 rq = list_entry_rq(q->queue_head.next);
414 461 if (blk_do_ordered(q, &rq))
415 rq = list_entry_rq(q->queue_head.next); 462 return rq;
416 463 }
417 /*
418 * if this is a barrier write and the device has to issue a
419 * flush sequence to support it, check how far we are
420 */
421 if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
422 BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
423 464
424 if (q->ordered == QUEUE_ORDERED_FLUSH && 465 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
425 !blk_barrier_preflush(rq)) 466 return NULL;
426 rq = blk_start_pre_flush(q, rq);
427 } 467 }
428
429 return rq;
430} 468}
431 469
432struct request *elv_next_request(request_queue_t *q) 470struct request *elv_next_request(request_queue_t *q)
@@ -484,7 +522,7 @@ struct request *elv_next_request(request_queue_t *q)
484 blkdev_dequeue_request(rq); 522 blkdev_dequeue_request(rq);
485 rq->flags |= REQ_QUIET; 523 rq->flags |= REQ_QUIET;
486 end_that_request_chunk(rq, 0, nr_bytes); 524 end_that_request_chunk(rq, 0, nr_bytes);
487 end_that_request_last(rq); 525 end_that_request_last(rq, 0);
488 } else { 526 } else {
489 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 527 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
490 ret); 528 ret);
@@ -525,33 +563,19 @@ int elv_queue_empty(request_queue_t *q)
525 563
526struct request *elv_latter_request(request_queue_t *q, struct request *rq) 564struct request *elv_latter_request(request_queue_t *q, struct request *rq)
527{ 565{
528 struct list_head *next;
529
530 elevator_t *e = q->elevator; 566 elevator_t *e = q->elevator;
531 567
532 if (e->ops->elevator_latter_req_fn) 568 if (e->ops->elevator_latter_req_fn)
533 return e->ops->elevator_latter_req_fn(q, rq); 569 return e->ops->elevator_latter_req_fn(q, rq);
534
535 next = rq->queuelist.next;
536 if (next != &q->queue_head && next != &rq->queuelist)
537 return list_entry_rq(next);
538
539 return NULL; 570 return NULL;
540} 571}
541 572
542struct request *elv_former_request(request_queue_t *q, struct request *rq) 573struct request *elv_former_request(request_queue_t *q, struct request *rq)
543{ 574{
544 struct list_head *prev;
545
546 elevator_t *e = q->elevator; 575 elevator_t *e = q->elevator;
547 576
548 if (e->ops->elevator_former_req_fn) 577 if (e->ops->elevator_former_req_fn)
549 return e->ops->elevator_former_req_fn(q, rq); 578 return e->ops->elevator_former_req_fn(q, rq);
550
551 prev = rq->queuelist.prev;
552 if (prev != &q->queue_head && prev != &rq->queuelist)
553 return list_entry_rq(prev);
554
555 return NULL; 579 return NULL;
556} 580}
557 581
@@ -597,6 +621,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
597 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 621 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
598 e->ops->elevator_completed_req_fn(q, rq); 622 e->ops->elevator_completed_req_fn(q, rq);
599 } 623 }
624
625 /*
626 * Check if the queue is waiting for fs requests to be
627 * drained for flush sequence.
628 */
629 if (unlikely(q->ordseq)) {
630 struct request *first_rq = list_entry_rq(q->queue_head.next);
631 if (q->in_flight == 0 &&
632 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
633 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
634 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
635 q->request_fn(q);
636 }
637 }
600} 638}
601 639
602int elv_register_queue(struct request_queue *q) 640int elv_register_queue(struct request_queue *q)
@@ -691,13 +729,15 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
691 729
692 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 730 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
693 731
694 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 732 elv_drain_elevator(q);
695 ;
696 733
697 while (q->rq.elvpriv) { 734 while (q->rq.elvpriv) {
735 blk_remove_plug(q);
736 q->request_fn(q);
698 spin_unlock_irq(q->queue_lock); 737 spin_unlock_irq(q->queue_lock);
699 msleep(10); 738 msleep(10);
700 spin_lock_irq(q->queue_lock); 739 spin_lock_irq(q->queue_lock);
740 elv_drain_elevator(q);
701 } 741 }
702 742
703 spin_unlock_irq(q->queue_lock); 743 spin_unlock_irq(q->queue_lock);
@@ -744,13 +784,15 @@ error:
744ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) 784ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
745{ 785{
746 char elevator_name[ELV_NAME_MAX]; 786 char elevator_name[ELV_NAME_MAX];
787 size_t len;
747 struct elevator_type *e; 788 struct elevator_type *e;
748 789
749 memset(elevator_name, 0, sizeof(elevator_name)); 790 elevator_name[sizeof(elevator_name) - 1] = '\0';
750 strncpy(elevator_name, name, sizeof(elevator_name)); 791 strncpy(elevator_name, name, sizeof(elevator_name) - 1);
792 len = strlen(elevator_name);
751 793
752 if (elevator_name[strlen(elevator_name) - 1] == '\n') 794 if (len && elevator_name[len - 1] == '\n')
753 elevator_name[strlen(elevator_name) - 1] = '\0'; 795 elevator_name[len - 1] = '\0';
754 796
755 e = elevator_get(elevator_name); 797 e = elevator_get(elevator_name);
756 if (!e) { 798 if (!e) {
diff --git a/block/genhd.c b/block/genhd.c
index 54aec4a1ae13..db57546a709d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -38,34 +38,100 @@ static inline int major_to_index(int major)
38 return major % MAX_PROBE_HASH; 38 return major % MAX_PROBE_HASH;
39} 39}
40 40
41#ifdef CONFIG_PROC_FS 41struct blkdev_info {
42/* get block device names in somewhat random order */ 42 int index;
43int get_blkdev_list(char *p, int used) 43 struct blk_major_name *bd;
44};
45
46/*
47 * iterate over a list of blkdev_info structures. allows
48 * the major_names array to be iterated over from outside this file
49 * must be called with the block_subsys_sem held
50 */
51void *get_next_blkdev(void *dev)
52{
53 struct blkdev_info *info;
54
55 if (dev == NULL) {
56 info = kmalloc(sizeof(*info), GFP_KERNEL);
57 if (!info)
58 goto out;
59 info->index=0;
60 info->bd = major_names[info->index];
61 if (info->bd)
62 goto out;
63 } else {
64 info = dev;
65 }
66
67 while (info->index < ARRAY_SIZE(major_names)) {
68 if (info->bd)
69 info->bd = info->bd->next;
70 if (info->bd)
71 goto out;
72 /*
73 * No devices on this chain, move to the next
74 */
75 info->index++;
76 info->bd = (info->index < ARRAY_SIZE(major_names)) ?
77 major_names[info->index] : NULL;
78 if (info->bd)
79 goto out;
80 }
81
82out:
83 return info;
84}
85
86void *acquire_blkdev_list(void)
87{
88 down(&block_subsys_sem);
89 return get_next_blkdev(NULL);
90}
91
92void release_blkdev_list(void *dev)
93{
94 up(&block_subsys_sem);
95 kfree(dev);
96}
97
98
99/*
100 * Count the number of records in the blkdev_list.
101 * must be called with the block_subsys_sem held
102 */
103int count_blkdev_list(void)
44{ 104{
45 struct blk_major_name *n; 105 struct blk_major_name *n;
46 int i, len; 106 int i, count;
47 107
48 len = snprintf(p, (PAGE_SIZE-used), "\nBlock devices:\n"); 108 count = 0;
49 109
50 down(&block_subsys_sem);
51 for (i = 0; i < ARRAY_SIZE(major_names); i++) { 110 for (i = 0; i < ARRAY_SIZE(major_names); i++) {
52 for (n = major_names[i]; n; n = n->next) { 111 for (n = major_names[i]; n; n = n->next)
53 /* 112 count++;
54 * If the curent string plus the 5 extra characters
55 * in the line would run us off the page, then we're done
56 */
57 if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE)
58 goto page_full;
59 len += sprintf(p+len, "%3d %s\n",
60 n->major, n->name);
61 }
62 } 113 }
63page_full:
64 up(&block_subsys_sem);
65 114
66 return len; 115 return count;
67} 116}
68#endif 117
118/*
119 * extract the major and name values from a blkdev_info struct
120 * passed in as a void to *dev. Must be called with
121 * block_subsys_sem held
122 */
123int get_blkdev_info(void *dev, int *major, char **name)
124{
125 struct blkdev_info *info = dev;
126
127 if (info->bd == NULL)
128 return 1;
129
130 *major = info->bd->major;
131 *name = info->bd->name;
132 return 0;
133}
134
69 135
70int register_blkdev(unsigned int major, const char *name) 136int register_blkdev(unsigned int major, const char *name)
71{ 137{
@@ -358,7 +424,7 @@ static struct sysfs_ops disk_sysfs_ops = {
358static ssize_t disk_uevent_store(struct gendisk * disk, 424static ssize_t disk_uevent_store(struct gendisk * disk,
359 const char *buf, size_t count) 425 const char *buf, size_t count)
360{ 426{
361 kobject_hotplug(&disk->kobj, KOBJ_ADD); 427 kobject_uevent(&disk->kobj, KOBJ_ADD);
362 return count; 428 return count;
363} 429}
364static ssize_t disk_dev_read(struct gendisk * disk, char *page) 430static ssize_t disk_dev_read(struct gendisk * disk, char *page)
@@ -391,12 +457,14 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page)
391 "%8u %8u %8llu %8u " 457 "%8u %8u %8llu %8u "
392 "%8u %8u %8u" 458 "%8u %8u %8u"
393 "\n", 459 "\n",
394 disk_stat_read(disk, ios[0]), disk_stat_read(disk, merges[0]), 460 disk_stat_read(disk, ios[READ]),
395 (unsigned long long)disk_stat_read(disk, sectors[0]), 461 disk_stat_read(disk, merges[READ]),
396 jiffies_to_msecs(disk_stat_read(disk, ticks[0])), 462 (unsigned long long)disk_stat_read(disk, sectors[READ]),
397 disk_stat_read(disk, ios[1]), disk_stat_read(disk, merges[1]), 463 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
398 (unsigned long long)disk_stat_read(disk, sectors[1]), 464 disk_stat_read(disk, ios[WRITE]),
399 jiffies_to_msecs(disk_stat_read(disk, ticks[1])), 465 disk_stat_read(disk, merges[WRITE]),
466 (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
467 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
400 disk->in_flight, 468 disk->in_flight,
401 jiffies_to_msecs(disk_stat_read(disk, io_ticks)), 469 jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
402 jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); 470 jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
@@ -453,14 +521,14 @@ static struct kobj_type ktype_block = {
453 521
454extern struct kobj_type ktype_part; 522extern struct kobj_type ktype_part;
455 523
456static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) 524static int block_uevent_filter(struct kset *kset, struct kobject *kobj)
457{ 525{
458 struct kobj_type *ktype = get_ktype(kobj); 526 struct kobj_type *ktype = get_ktype(kobj);
459 527
460 return ((ktype == &ktype_block) || (ktype == &ktype_part)); 528 return ((ktype == &ktype_block) || (ktype == &ktype_part));
461} 529}
462 530
463static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, 531static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp,
464 int num_envp, char *buffer, int buffer_size) 532 int num_envp, char *buffer, int buffer_size)
465{ 533{
466 struct kobj_type *ktype = get_ktype(kobj); 534 struct kobj_type *ktype = get_ktype(kobj);
@@ -472,40 +540,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
472 540
473 if (ktype == &ktype_block) { 541 if (ktype == &ktype_block) {
474 disk = container_of(kobj, struct gendisk, kobj); 542 disk = container_of(kobj, struct gendisk, kobj);
475 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 543 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
476 &length, "MINOR=%u", disk->first_minor); 544 &length, "MINOR=%u", disk->first_minor);
477 } else if (ktype == &ktype_part) { 545 } else if (ktype == &ktype_part) {
478 disk = container_of(kobj->parent, struct gendisk, kobj); 546 disk = container_of(kobj->parent, struct gendisk, kobj);
479 part = container_of(kobj, struct hd_struct, kobj); 547 part = container_of(kobj, struct hd_struct, kobj);
480 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 548 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
481 &length, "MINOR=%u", 549 &length, "MINOR=%u",
482 disk->first_minor + part->partno); 550 disk->first_minor + part->partno);
483 } else 551 } else
484 return 0; 552 return 0;
485 553
486 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, 554 add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
487 "MAJOR=%u", disk->major); 555 "MAJOR=%u", disk->major);
488 556
489 /* add physical device, backing this device */ 557 /* add physical device, backing this device */
490 physdev = disk->driverfs_dev; 558 physdev = disk->driverfs_dev;
491 if (physdev) { 559 if (physdev) {
492 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); 560 char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
493 561
494 add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, 562 add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
495 &length, "PHYSDEVPATH=%s", path); 563 &length, "PHYSDEVPATH=%s", path);
496 kfree(path); 564 kfree(path);
497 565
498 if (physdev->bus) 566 if (physdev->bus)
499 add_hotplug_env_var(envp, num_envp, &i, 567 add_uevent_var(envp, num_envp, &i,
500 buffer, buffer_size, &length, 568 buffer, buffer_size, &length,
501 "PHYSDEVBUS=%s", 569 "PHYSDEVBUS=%s",
502 physdev->bus->name); 570 physdev->bus->name);
503 571
504 if (physdev->driver) 572 if (physdev->driver)
505 add_hotplug_env_var(envp, num_envp, &i, 573 add_uevent_var(envp, num_envp, &i,
506 buffer, buffer_size, &length, 574 buffer, buffer_size, &length,
507 "PHYSDEVDRIVER=%s", 575 "PHYSDEVDRIVER=%s",
508 physdev->driver->name); 576 physdev->driver->name);
509 } 577 }
510 578
511 /* terminate, set to next free slot, shrink available space */ 579 /* terminate, set to next free slot, shrink available space */
@@ -518,13 +586,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
518 return 0; 586 return 0;
519} 587}
520 588
521static struct kset_hotplug_ops block_hotplug_ops = { 589static struct kset_uevent_ops block_uevent_ops = {
522 .filter = block_hotplug_filter, 590 .filter = block_uevent_filter,
523 .hotplug = block_hotplug, 591 .uevent = block_uevent,
524}; 592};
525 593
526/* declare block_subsys. */ 594/* declare block_subsys. */
527static decl_subsys(block, &ktype_block, &block_hotplug_ops); 595static decl_subsys(block, &ktype_block, &block_uevent_ops);
528 596
529 597
530/* 598/*
diff --git a/block/ioctl.c b/block/ioctl.c
index 6e278474f9a8..e1109491c234 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,6 +1,7 @@
1#include <linux/sched.h> /* for capable() */ 1#include <linux/capability.h>
2#include <linux/blkdev.h> 2#include <linux/blkdev.h>
3#include <linux/blkpg.h> 3#include <linux/blkpg.h>
4#include <linux/hdreg.h>
4#include <linux/backing-dev.h> 5#include <linux/backing-dev.h>
5#include <linux/buffer_head.h> 6#include <linux/buffer_head.h>
6#include <linux/smp_lock.h> 7#include <linux/smp_lock.h>
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
245 set_device_ro(bdev, n); 246 set_device_ro(bdev, n);
246 unlock_kernel(); 247 unlock_kernel();
247 return 0; 248 return 0;
249 case HDIO_GETGEO: {
250 struct hd_geometry geo;
251
252 if (!arg)
253 return -EINVAL;
254 if (!disk->fops->getgeo)
255 return -ENOTTY;
256
257 /*
258 * We need to set the startsect first, the driver may
259 * want to override it.
260 */
261 geo.start = get_start_sect(bdev);
262 ret = disk->fops->getgeo(bdev, &geo);
263 if (ret)
264 return ret;
265 if (copy_to_user((struct hd_geometry __user *)arg, &geo,
266 sizeof(geo)))
267 return -EFAULT;
268 return 0;
269 }
248 } 270 }
249 271
250 lock_kernel(); 272 lock_kernel();
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 5f52e30b43f8..8e27d0ab0d7c 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/drivers/block/ll_rw_blk.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
@@ -28,7 +26,8 @@
28#include <linux/slab.h> 26#include <linux/slab.h>
29#include <linux/swap.h> 27#include <linux/swap.h>
30#include <linux/writeback.h> 28#include <linux/writeback.h>
31#include <linux/blkdev.h> 29#include <linux/interrupt.h>
30#include <linux/cpu.h>
32 31
33/* 32/*
34 * for max sense size 33 * for max sense size
@@ -38,6 +37,8 @@
38static void blk_unplug_work(void *data); 37static void blk_unplug_work(void *data);
39static void blk_unplug_timeout(unsigned long data); 38static void blk_unplug_timeout(unsigned long data);
40static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio);
41 42
42/* 43/*
43 * For the allocated request tables 44 * For the allocated request tables
@@ -62,13 +63,15 @@ static wait_queue_head_t congestion_wqh[2] = {
62/* 63/*
63 * Controlling structure to kblockd 64 * Controlling structure to kblockd
64 */ 65 */
65static struct workqueue_struct *kblockd_workqueue; 66static struct workqueue_struct *kblockd_workqueue;
66 67
67unsigned long blk_max_low_pfn, blk_max_pfn; 68unsigned long blk_max_low_pfn, blk_max_pfn;
68 69
69EXPORT_SYMBOL(blk_max_low_pfn); 70EXPORT_SYMBOL(blk_max_low_pfn);
70EXPORT_SYMBOL(blk_max_pfn); 71EXPORT_SYMBOL(blk_max_pfn);
71 72
73static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
74
72/* Amount of time in which a process may batch requests */ 75/* Amount of time in which a process may batch requests */
73#define BLK_BATCH_TIME (HZ/50UL) 76#define BLK_BATCH_TIME (HZ/50UL)
74 77
@@ -207,6 +210,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
207 210
208EXPORT_SYMBOL(blk_queue_merge_bvec); 211EXPORT_SYMBOL(blk_queue_merge_bvec);
209 212
213void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
214{
215 q->softirq_done_fn = fn;
216}
217
218EXPORT_SYMBOL(blk_queue_softirq_done);
219
210/** 220/**
211 * blk_queue_make_request - define an alternate make_request function for a device 221 * blk_queue_make_request - define an alternate make_request function for a device
212 * @q: the request queue for the device to be affected 222 * @q: the request queue for the device to be affected
@@ -241,7 +251,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
241 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
242 q->backing_dev_info.state = 0; 252 q->backing_dev_info.state = 0;
243 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
244 blk_queue_max_sectors(q, MAX_SECTORS); 254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
245 blk_queue_hardsect_size(q, 512); 255 blk_queue_hardsect_size(q, 512);
246 blk_queue_dma_alignment(q, 511); 256 blk_queue_dma_alignment(q, 511);
247 blk_queue_congestion_threshold(q); 257 blk_queue_congestion_threshold(q);
@@ -270,6 +280,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
270static inline void rq_init(request_queue_t *q, struct request *rq) 280static inline void rq_init(request_queue_t *q, struct request *rq)
271{ 281{
272 INIT_LIST_HEAD(&rq->queuelist); 282 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist);
273 284
274 rq->errors = 0; 285 rq->errors = 0;
275 rq->rq_status = RQ_ACTIVE; 286 rq->rq_status = RQ_ACTIVE;
@@ -286,12 +297,13 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
286 rq->sense = NULL; 297 rq->sense = NULL;
287 rq->end_io = NULL; 298 rq->end_io = NULL;
288 rq->end_io_data = NULL; 299 rq->end_io_data = NULL;
300 rq->completion_data = NULL;
289} 301}
290 302
291/** 303/**
292 * blk_queue_ordered - does this queue support ordered writes 304 * blk_queue_ordered - does this queue support ordered writes
293 * @q: the request queue 305 * @q: the request queue
294 * @flag: see below 306 * @ordered: one of QUEUE_ORDERED_*
295 * 307 *
296 * Description: 308 * Description:
297 * For journalled file systems, doing ordered writes on a commit 309 * For journalled file systems, doing ordered writes on a commit
@@ -300,28 +312,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
300 * feature should call this function and indicate so. 312 * feature should call this function and indicate so.
301 * 313 *
302 **/ 314 **/
303void blk_queue_ordered(request_queue_t *q, int flag) 315int blk_queue_ordered(request_queue_t *q, unsigned ordered,
304{ 316 prepare_flush_fn *prepare_flush_fn)
305 switch (flag) { 317{
306 case QUEUE_ORDERED_NONE: 318 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
307 if (q->flush_rq) 319 prepare_flush_fn == NULL) {
308 kmem_cache_free(request_cachep, q->flush_rq); 320 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
309 q->flush_rq = NULL; 321 return -EINVAL;
310 q->ordered = flag;
311 break;
312 case QUEUE_ORDERED_TAG:
313 q->ordered = flag;
314 break;
315 case QUEUE_ORDERED_FLUSH:
316 q->ordered = flag;
317 if (!q->flush_rq)
318 q->flush_rq = kmem_cache_alloc(request_cachep,
319 GFP_KERNEL);
320 break;
321 default:
322 printk("blk_queue_ordered: bad value %d\n", flag);
323 break;
324 } 322 }
323
324 if (ordered != QUEUE_ORDERED_NONE &&
325 ordered != QUEUE_ORDERED_DRAIN &&
326 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
327 ordered != QUEUE_ORDERED_DRAIN_FUA &&
328 ordered != QUEUE_ORDERED_TAG &&
329 ordered != QUEUE_ORDERED_TAG_FLUSH &&
330 ordered != QUEUE_ORDERED_TAG_FUA) {
331 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
332 return -EINVAL;
333 }
334
335 q->next_ordered = ordered;
336 q->prepare_flush_fn = prepare_flush_fn;
337
338 return 0;
325} 339}
326 340
327EXPORT_SYMBOL(blk_queue_ordered); 341EXPORT_SYMBOL(blk_queue_ordered);
@@ -346,167 +360,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
346/* 360/*
347 * Cache flushing for ordered writes handling 361 * Cache flushing for ordered writes handling
348 */ 362 */
349static void blk_pre_flush_end_io(struct request *flush_rq) 363inline unsigned blk_ordered_cur_seq(request_queue_t *q)
350{ 364{
351 struct request *rq = flush_rq->end_io_data; 365 if (!q->ordseq)
352 request_queue_t *q = rq->q; 366 return 0;
353 367 return 1 << ffz(q->ordseq);
354 elv_completed_request(q, flush_rq);
355
356 rq->flags |= REQ_BAR_PREFLUSH;
357
358 if (!flush_rq->errors)
359 elv_requeue_request(q, rq);
360 else {
361 q->end_flush_fn(q, flush_rq);
362 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
363 q->request_fn(q);
364 }
365} 368}
366 369
367static void blk_post_flush_end_io(struct request *flush_rq) 370unsigned blk_ordered_req_seq(struct request *rq)
368{ 371{
369 struct request *rq = flush_rq->end_io_data;
370 request_queue_t *q = rq->q; 372 request_queue_t *q = rq->q;
371 373
372 elv_completed_request(q, flush_rq); 374 BUG_ON(q->ordseq == 0);
373 375
374 rq->flags |= REQ_BAR_POSTFLUSH; 376 if (rq == &q->pre_flush_rq)
377 return QUEUE_ORDSEQ_PREFLUSH;
378 if (rq == &q->bar_rq)
379 return QUEUE_ORDSEQ_BAR;
380 if (rq == &q->post_flush_rq)
381 return QUEUE_ORDSEQ_POSTFLUSH;
375 382
376 q->end_flush_fn(q, flush_rq); 383 if ((rq->flags & REQ_ORDERED_COLOR) ==
377 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); 384 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
378 q->request_fn(q); 385 return QUEUE_ORDSEQ_DRAIN;
386 else
387 return QUEUE_ORDSEQ_DONE;
379} 388}
380 389
381struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) 390void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
382{ 391{
383 struct request *flush_rq = q->flush_rq; 392 struct request *rq;
384 393 int uptodate;
385 BUG_ON(!blk_barrier_rq(rq));
386 394
387 if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) 395 if (error && !q->orderr)
388 return NULL; 396 q->orderr = error;
389 397
390 rq_init(q, flush_rq); 398 BUG_ON(q->ordseq & seq);
391 flush_rq->elevator_private = NULL; 399 q->ordseq |= seq;
392 flush_rq->flags = REQ_BAR_FLUSH;
393 flush_rq->rq_disk = rq->rq_disk;
394 flush_rq->rl = NULL;
395 400
396 /* 401 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
397 * prepare_flush returns 0 if no flush is needed, just mark both 402 return;
398 * pre and post flush as done in that case
399 */
400 if (!q->prepare_flush_fn(q, flush_rq)) {
401 rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
402 clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
403 return rq;
404 }
405 403
406 /* 404 /*
407 * some drivers dequeue requests right away, some only after io 405 * Okay, sequence complete.
408 * completion. make sure the request is dequeued.
409 */ 406 */
410 if (!list_empty(&rq->queuelist)) 407 rq = q->orig_bar_rq;
411 blkdev_dequeue_request(rq); 408 uptodate = q->orderr ? q->orderr : 1;
412 409
413 flush_rq->end_io_data = rq; 410 q->ordseq = 0;
414 flush_rq->end_io = blk_pre_flush_end_io;
415 411
416 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 412 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
417 return flush_rq; 413 end_that_request_last(rq, uptodate);
418} 414}
419 415
420static void blk_start_post_flush(request_queue_t *q, struct request *rq) 416static void pre_flush_end_io(struct request *rq, int error)
421{ 417{
422 struct request *flush_rq = q->flush_rq; 418 elv_completed_request(rq->q, rq);
419 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
420}
423 421
424 BUG_ON(!blk_barrier_rq(rq)); 422static void bar_end_io(struct request *rq, int error)
423{
424 elv_completed_request(rq->q, rq);
425 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
426}
425 427
426 rq_init(q, flush_rq); 428static void post_flush_end_io(struct request *rq, int error)
427 flush_rq->elevator_private = NULL; 429{
428 flush_rq->flags = REQ_BAR_FLUSH; 430 elv_completed_request(rq->q, rq);
429 flush_rq->rq_disk = rq->rq_disk; 431 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
430 flush_rq->rl = NULL; 432}
431 433
432 if (q->prepare_flush_fn(q, flush_rq)) { 434static void queue_flush(request_queue_t *q, unsigned which)
433 flush_rq->end_io_data = rq; 435{
434 flush_rq->end_io = blk_post_flush_end_io; 436 struct request *rq;
437 rq_end_io_fn *end_io;
435 438
436 __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); 439 if (which == QUEUE_ORDERED_PREFLUSH) {
437 q->request_fn(q); 440 rq = &q->pre_flush_rq;
441 end_io = pre_flush_end_io;
442 } else {
443 rq = &q->post_flush_rq;
444 end_io = post_flush_end_io;
438 } 445 }
446
447 rq_init(q, rq);
448 rq->flags = REQ_HARDBARRIER;
449 rq->elevator_private = NULL;
450 rq->rq_disk = q->bar_rq.rq_disk;
451 rq->rl = NULL;
452 rq->end_io = end_io;
453 q->prepare_flush_fn(q, rq);
454
455 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
439} 456}
440 457
441static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, 458static inline struct request *start_ordered(request_queue_t *q,
442 int sectors) 459 struct request *rq)
443{ 460{
444 if (sectors > rq->nr_sectors) 461 q->bi_size = 0;
445 sectors = rq->nr_sectors; 462 q->orderr = 0;
463 q->ordered = q->next_ordered;
464 q->ordseq |= QUEUE_ORDSEQ_STARTED;
465
466 /*
467 * Prep proxy barrier request.
468 */
469 blkdev_dequeue_request(rq);
470 q->orig_bar_rq = rq;
471 rq = &q->bar_rq;
472 rq_init(q, rq);
473 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
474 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
475 rq->elevator_private = NULL;
476 rq->rl = NULL;
477 init_request_from_bio(rq, q->orig_bar_rq->bio);
478 rq->end_io = bar_end_io;
479
480 /*
481 * Queue ordered sequence. As we stack them at the head, we
482 * need to queue in reverse order. Note that we rely on that
483 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
484 * request gets inbetween ordered sequence.
485 */
486 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
487 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
488 else
489 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
446 490
447 rq->nr_sectors -= sectors; 491 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
448 return rq->nr_sectors; 492
493 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
494 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
495 rq = &q->pre_flush_rq;
496 } else
497 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
498
499 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
500 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
501 else
502 rq = NULL;
503
504 return rq;
449} 505}
450 506
451static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, 507int blk_do_ordered(request_queue_t *q, struct request **rqp)
452 int sectors, int queue_locked)
453{ 508{
454 if (q->ordered != QUEUE_ORDERED_FLUSH) 509 struct request *rq = *rqp, *allowed_rq;
455 return 0; 510 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
456 if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
457 return 0;
458 if (blk_barrier_postflush(rq))
459 return 0;
460 511
461 if (!blk_check_end_barrier(q, rq, sectors)) { 512 if (!q->ordseq) {
462 unsigned long flags = 0; 513 if (!is_barrier)
514 return 1;
463 515
464 if (!queue_locked) 516 if (q->next_ordered != QUEUE_ORDERED_NONE) {
465 spin_lock_irqsave(q->queue_lock, flags); 517 *rqp = start_ordered(q, rq);
518 return 1;
519 } else {
520 /*
521 * This can happen when the queue switches to
522 * ORDERED_NONE while this request is on it.
523 */
524 blkdev_dequeue_request(rq);
525 end_that_request_first(rq, -EOPNOTSUPP,
526 rq->hard_nr_sectors);
527 end_that_request_last(rq, -EOPNOTSUPP);
528 *rqp = NULL;
529 return 0;
530 }
531 }
466 532
467 blk_start_post_flush(q, rq); 533 if (q->ordered & QUEUE_ORDERED_TAG) {
534 if (is_barrier && rq != &q->bar_rq)
535 *rqp = NULL;
536 return 1;
537 }
468 538
469 if (!queue_locked) 539 switch (blk_ordered_cur_seq(q)) {
470 spin_unlock_irqrestore(q->queue_lock, flags); 540 case QUEUE_ORDSEQ_PREFLUSH:
541 allowed_rq = &q->pre_flush_rq;
542 break;
543 case QUEUE_ORDSEQ_BAR:
544 allowed_rq = &q->bar_rq;
545 break;
546 case QUEUE_ORDSEQ_POSTFLUSH:
547 allowed_rq = &q->post_flush_rq;
548 break;
549 default:
550 allowed_rq = NULL;
551 break;
471 } 552 }
472 553
554 if (rq != allowed_rq &&
555 (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
556 rq == &q->post_flush_rq))
557 *rqp = NULL;
558
473 return 1; 559 return 1;
474} 560}
475 561
476/** 562static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
477 * blk_complete_barrier_rq - complete possible barrier request
478 * @q: the request queue for the device
479 * @rq: the request
480 * @sectors: number of sectors to complete
481 *
482 * Description:
483 * Used in driver end_io handling to determine whether to postpone
484 * completion of a barrier request until a post flush has been done. This
485 * is the unlocked variant, used if the caller doesn't already hold the
486 * queue lock.
487 **/
488int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
489{ 563{
490 return __blk_complete_barrier_rq(q, rq, sectors, 0); 564 request_queue_t *q = bio->bi_private;
565 struct bio_vec *bvec;
566 int i;
567
568 /*
569 * This is dry run, restore bio_sector and size. We'll finish
570 * this request again with the original bi_end_io after an
571 * error occurs or post flush is complete.
572 */
573 q->bi_size += bytes;
574
575 if (bio->bi_size)
576 return 1;
577
578 /* Rewind bvec's */
579 bio->bi_idx = 0;
580 bio_for_each_segment(bvec, bio, i) {
581 bvec->bv_len += bvec->bv_offset;
582 bvec->bv_offset = 0;
583 }
584
585 /* Reset bio */
586 set_bit(BIO_UPTODATE, &bio->bi_flags);
587 bio->bi_size = q->bi_size;
588 bio->bi_sector -= (q->bi_size >> 9);
589 q->bi_size = 0;
590
591 return 0;
491} 592}
492EXPORT_SYMBOL(blk_complete_barrier_rq);
493 593
494/** 594static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
495 * blk_complete_barrier_rq_locked - complete possible barrier request 595 unsigned int nbytes, int error)
496 * @q: the request queue for the device
497 * @rq: the request
498 * @sectors: number of sectors to complete
499 *
500 * Description:
501 * See blk_complete_barrier_rq(). This variant must be used if the caller
502 * holds the queue lock.
503 **/
504int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
505 int sectors)
506{ 596{
507 return __blk_complete_barrier_rq(q, rq, sectors, 1); 597 request_queue_t *q = rq->q;
598 bio_end_io_t *endio;
599 void *private;
600
601 if (&q->bar_rq != rq)
602 return 0;
603
604 /*
605 * Okay, this is the barrier request in progress, dry finish it.
606 */
607 if (error && !q->orderr)
608 q->orderr = error;
609
610 endio = bio->bi_end_io;
611 private = bio->bi_private;
612 bio->bi_end_io = flush_dry_bio_endio;
613 bio->bi_private = q;
614
615 bio_endio(bio, nbytes, error);
616
617 bio->bi_end_io = endio;
618 bio->bi_private = private;
619
620 return 1;
508} 621}
509EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
510 622
511/** 623/**
512 * blk_queue_bounce_limit - set bounce buffer limit for queue 624 * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -557,7 +669,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
557 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 669 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
558 } 670 }
559 671
560 q->max_sectors = q->max_hw_sectors = max_sectors; 672 if (BLK_DEF_MAX_SECTORS > max_sectors)
673 q->max_hw_sectors = q->max_sectors = max_sectors;
674 else {
675 q->max_sectors = BLK_DEF_MAX_SECTORS;
676 q->max_hw_sectors = max_sectors;
677 }
561} 678}
562 679
563EXPORT_SYMBOL(blk_queue_max_sectors); 680EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -659,8 +776,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
659void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) 776void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
660{ 777{
661 /* zero is "infinity" */ 778 /* zero is "infinity" */
662 t->max_sectors = t->max_hw_sectors = 779 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
663 min_not_zero(t->max_sectors,b->max_sectors); 780 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
664 781
665 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 782 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
666 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 783 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
@@ -1036,12 +1153,13 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1036 1153
1037EXPORT_SYMBOL(blk_queue_invalidate_tags); 1154EXPORT_SYMBOL(blk_queue_invalidate_tags);
1038 1155
1039static char *rq_flags[] = { 1156static const char * const rq_flags[] = {
1040 "REQ_RW", 1157 "REQ_RW",
1041 "REQ_FAILFAST", 1158 "REQ_FAILFAST",
1042 "REQ_SORTED", 1159 "REQ_SORTED",
1043 "REQ_SOFTBARRIER", 1160 "REQ_SOFTBARRIER",
1044 "REQ_HARDBARRIER", 1161 "REQ_HARDBARRIER",
1162 "REQ_FUA",
1045 "REQ_CMD", 1163 "REQ_CMD",
1046 "REQ_NOMERGE", 1164 "REQ_NOMERGE",
1047 "REQ_STARTED", 1165 "REQ_STARTED",
@@ -1061,6 +1179,7 @@ static char *rq_flags[] = {
1061 "REQ_PM_SUSPEND", 1179 "REQ_PM_SUSPEND",
1062 "REQ_PM_RESUME", 1180 "REQ_PM_RESUME",
1063 "REQ_PM_SHUTDOWN", 1181 "REQ_PM_SHUTDOWN",
1182 "REQ_ORDERED_COLOR",
1064}; 1183};
1065 1184
1066void blk_dump_rq_flags(struct request *rq, char *msg) 1185void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1295,9 +1414,15 @@ static inline int ll_new_hw_segment(request_queue_t *q,
1295static int ll_back_merge_fn(request_queue_t *q, struct request *req, 1414static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1296 struct bio *bio) 1415 struct bio *bio)
1297{ 1416{
1417 unsigned short max_sectors;
1298 int len; 1418 int len;
1299 1419
1300 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1420 if (unlikely(blk_pc_request(req)))
1421 max_sectors = q->max_hw_sectors;
1422 else
1423 max_sectors = q->max_sectors;
1424
1425 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1301 req->flags |= REQ_NOMERGE; 1426 req->flags |= REQ_NOMERGE;
1302 if (req == q->last_merge) 1427 if (req == q->last_merge)
1303 q->last_merge = NULL; 1428 q->last_merge = NULL;
@@ -1327,9 +1452,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1327static int ll_front_merge_fn(request_queue_t *q, struct request *req, 1452static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1328 struct bio *bio) 1453 struct bio *bio)
1329{ 1454{
1455 unsigned short max_sectors;
1330 int len; 1456 int len;
1331 1457
1332 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { 1458 if (unlikely(blk_pc_request(req)))
1459 max_sectors = q->max_hw_sectors;
1460 else
1461 max_sectors = q->max_sectors;
1462
1463
1464 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1333 req->flags |= REQ_NOMERGE; 1465 req->flags |= REQ_NOMERGE;
1334 if (req == q->last_merge) 1466 if (req == q->last_merge)
1335 q->last_merge = NULL; 1467 q->last_merge = NULL;
@@ -1625,8 +1757,6 @@ void blk_cleanup_queue(request_queue_t * q)
1625 if (q->queue_tags) 1757 if (q->queue_tags)
1626 __blk_queue_free_tags(q); 1758 __blk_queue_free_tags(q);
1627 1759
1628 blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1629
1630 kmem_cache_free(requestq_cachep, q); 1760 kmem_cache_free(requestq_cachep, q);
1631} 1761}
1632 1762
@@ -1651,8 +1781,6 @@ static int blk_init_free_list(request_queue_t *q)
1651 return 0; 1781 return 0;
1652} 1782}
1653 1783
1654static int __make_request(request_queue_t *, struct bio *);
1655
1656request_queue_t *blk_alloc_queue(gfp_t gfp_mask) 1784request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1657{ 1785{
1658 return blk_alloc_queue_node(gfp_mask, -1); 1786 return blk_alloc_queue_node(gfp_mask, -1);
@@ -1892,40 +2020,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1892{ 2020{
1893 struct request *rq = NULL; 2021 struct request *rq = NULL;
1894 struct request_list *rl = &q->rq; 2022 struct request_list *rl = &q->rq;
1895 struct io_context *ioc = current_io_context(GFP_ATOMIC); 2023 struct io_context *ioc = NULL;
1896 int priv; 2024 int may_queue, priv;
1897 2025
1898 if (rl->count[rw]+1 >= q->nr_requests) { 2026 may_queue = elv_may_queue(q, rw, bio);
1899 /* 2027 if (may_queue == ELV_MQUEUE_NO)
1900 * The queue will fill after this allocation, so set it as 2028 goto rq_starved;
1901 * full, and mark this process as "batching". This process
1902 * will be allowed to complete a batch of requests, others
1903 * will be blocked.
1904 */
1905 if (!blk_queue_full(q, rw)) {
1906 ioc_set_batching(q, ioc);
1907 blk_set_queue_full(q, rw);
1908 }
1909 }
1910
1911 switch (elv_may_queue(q, rw, bio)) {
1912 case ELV_MQUEUE_NO:
1913 goto rq_starved;
1914 case ELV_MQUEUE_MAY:
1915 break;
1916 case ELV_MQUEUE_MUST:
1917 goto get_rq;
1918 }
1919 2029
1920 if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { 2030 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
1921 /* 2031 if (rl->count[rw]+1 >= q->nr_requests) {
1922 * The queue is full and the allocating process is not a 2032 ioc = current_io_context(GFP_ATOMIC);
1923 * "batcher", and not exempted by the IO scheduler 2033 /*
1924 */ 2034 * The queue will fill after this allocation, so set
1925 goto out; 2035 * it as full, and mark this process as "batching".
2036 * This process will be allowed to complete a batch of
2037 * requests, others will be blocked.
2038 */
2039 if (!blk_queue_full(q, rw)) {
2040 ioc_set_batching(q, ioc);
2041 blk_set_queue_full(q, rw);
2042 } else {
2043 if (may_queue != ELV_MQUEUE_MUST
2044 && !ioc_batching(q, ioc)) {
2045 /*
2046 * The queue is full and the allocating
2047 * process is not a "batcher", and not
2048 * exempted by the IO scheduler
2049 */
2050 goto out;
2051 }
2052 }
2053 }
2054 set_queue_congested(q, rw);
1926 } 2055 }
1927 2056
1928get_rq:
1929 /* 2057 /*
1930 * Only allow batching queuers to allocate up to 50% over the defined 2058 * Only allow batching queuers to allocate up to 50% over the defined
1931 * limit of requests, otherwise we could have thousands of requests 2059 * limit of requests, otherwise we could have thousands of requests
@@ -1936,8 +2064,6 @@ get_rq:
1936 2064
1937 rl->count[rw]++; 2065 rl->count[rw]++;
1938 rl->starved[rw] = 0; 2066 rl->starved[rw] = 0;
1939 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1940 set_queue_congested(q, rw);
1941 2067
1942 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2068 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1943 if (priv) 2069 if (priv)
@@ -1946,7 +2072,7 @@ get_rq:
1946 spin_unlock_irq(q->queue_lock); 2072 spin_unlock_irq(q->queue_lock);
1947 2073
1948 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); 2074 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1949 if (!rq) { 2075 if (unlikely(!rq)) {
1950 /* 2076 /*
1951 * Allocation failed presumably due to memory. Undo anything 2077 * Allocation failed presumably due to memory. Undo anything
1952 * we might have messed up. 2078 * we might have messed up.
@@ -1971,6 +2097,12 @@ rq_starved:
1971 goto out; 2097 goto out;
1972 } 2098 }
1973 2099
2100 /*
2101 * ioc may be NULL here, and ioc_batching will be false. That's
2102 * OK, if the queue is under the request limit then requests need
2103 * not count toward the nr_batch_requests limit. There will always
2104 * be some limit enforced by BLK_BATCH_TIME.
2105 */
1974 if (ioc_batching(q, ioc)) 2106 if (ioc_batching(q, ioc))
1975 ioc->nr_batch_requests--; 2107 ioc->nr_batch_requests--;
1976 2108
@@ -2146,7 +2278,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2146 struct bio *bio; 2278 struct bio *bio;
2147 int reading; 2279 int reading;
2148 2280
2149 if (len > (q->max_sectors << 9)) 2281 if (len > (q->max_hw_sectors << 9))
2150 return -EINVAL; 2282 return -EINVAL;
2151 if (!len || !ubuf) 2283 if (!len || !ubuf)
2152 return -EINVAL; 2284 return -EINVAL;
@@ -2261,7 +2393,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2261{ 2393{
2262 struct bio *bio; 2394 struct bio *bio;
2263 2395
2264 if (len > (q->max_sectors << 9)) 2396 if (len > (q->max_hw_sectors << 9))
2265 return -EINVAL; 2397 return -EINVAL;
2266 if (!len || !kbuf) 2398 if (!len || !kbuf)
2267 return -EINVAL; 2399 return -EINVAL;
@@ -2297,7 +2429,7 @@ EXPORT_SYMBOL(blk_rq_map_kern);
2297 */ 2429 */
2298void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, 2430void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2299 struct request *rq, int at_head, 2431 struct request *rq, int at_head,
2300 void (*done)(struct request *)) 2432 rq_end_io_fn *done)
2301{ 2433{
2302 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2434 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2303 2435
@@ -2308,6 +2440,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2308 generic_unplug_device(q); 2440 generic_unplug_device(q);
2309} 2441}
2310 2442
2443EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2444
2311/** 2445/**
2312 * blk_execute_rq - insert a request into queue for execution 2446 * blk_execute_rq - insert a request into queue for execution
2313 * @q: queue to insert the request in 2447 * @q: queue to insert the request in
@@ -2446,7 +2580,7 @@ void disk_round_stats(struct gendisk *disk)
2446/* 2580/*
2447 * queue lock must be held 2581 * queue lock must be held
2448 */ 2582 */
2449static void __blk_put_request(request_queue_t *q, struct request *req) 2583void __blk_put_request(request_queue_t *q, struct request *req)
2450{ 2584{
2451 struct request_list *rl = req->rl; 2585 struct request_list *rl = req->rl;
2452 2586
@@ -2475,6 +2609,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2475 } 2609 }
2476} 2610}
2477 2611
2612EXPORT_SYMBOL_GPL(__blk_put_request);
2613
2478void blk_put_request(struct request *req) 2614void blk_put_request(struct request *req)
2479{ 2615{
2480 unsigned long flags; 2616 unsigned long flags;
@@ -2497,7 +2633,7 @@ EXPORT_SYMBOL(blk_put_request);
2497 * blk_end_sync_rq - executes a completion event on a request 2633 * blk_end_sync_rq - executes a completion event on a request
2498 * @rq: request to complete 2634 * @rq: request to complete
2499 */ 2635 */
2500void blk_end_sync_rq(struct request *rq) 2636void blk_end_sync_rq(struct request *rq, int error)
2501{ 2637{
2502 struct completion *waiting = rq->waiting; 2638 struct completion *waiting = rq->waiting;
2503 2639
@@ -2611,29 +2747,35 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2611 return 0; 2747 return 0;
2612} 2748}
2613 2749
2614/** 2750static void init_request_from_bio(struct request *req, struct bio *bio)
2615 * blk_attempt_remerge - attempt to remerge active head with next request
2616 * @q: The &request_queue_t belonging to the device
2617 * @rq: The head request (usually)
2618 *
2619 * Description:
2620 * For head-active devices, the queue can easily be unplugged so quickly
2621 * that proper merging is not done on the front request. This may hurt
2622 * performance greatly for some devices. The block layer cannot safely
2623 * do merging on that first request for these queues, but the driver can
2624 * call this function and make it happen any way. Only the driver knows
2625 * when it is safe to do so.
2626 **/
2627void blk_attempt_remerge(request_queue_t *q, struct request *rq)
2628{ 2751{
2629 unsigned long flags; 2752 req->flags |= REQ_CMD;
2630 2753
2631 spin_lock_irqsave(q->queue_lock, flags); 2754 /*
2632 attempt_back_merge(q, rq); 2755 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2633 spin_unlock_irqrestore(q->queue_lock, flags); 2756 */
2634} 2757 if (bio_rw_ahead(bio) || bio_failfast(bio))
2758 req->flags |= REQ_FAILFAST;
2635 2759
2636EXPORT_SYMBOL(blk_attempt_remerge); 2760 /*
2761 * REQ_BARRIER implies no merging, but lets make it explicit
2762 */
2763 if (unlikely(bio_barrier(bio)))
2764 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2765
2766 req->errors = 0;
2767 req->hard_sector = req->sector = bio->bi_sector;
2768 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2769 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2770 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2771 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2772 req->buffer = bio_data(bio); /* see ->buffer comment above */
2773 req->waiting = NULL;
2774 req->bio = req->biotail = bio;
2775 req->ioprio = bio_prio(bio);
2776 req->rq_disk = bio->bi_bdev->bd_disk;
2777 req->start_time = jiffies;
2778}
2637 2779
2638static int __make_request(request_queue_t *q, struct bio *bio) 2780static int __make_request(request_queue_t *q, struct bio *bio)
2639{ 2781{
@@ -2660,7 +2802,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2660 spin_lock_prefetch(q->queue_lock); 2802 spin_lock_prefetch(q->queue_lock);
2661 2803
2662 barrier = bio_barrier(bio); 2804 barrier = bio_barrier(bio);
2663 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { 2805 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2664 err = -EOPNOTSUPP; 2806 err = -EOPNOTSUPP;
2665 goto end_io; 2807 goto end_io;
2666 } 2808 }
@@ -2730,33 +2872,7 @@ get_rq:
2730 * We don't worry about that case for efficiency. It won't happen 2872 * We don't worry about that case for efficiency. It won't happen
2731 * often, and the elevators are able to handle it. 2873 * often, and the elevators are able to handle it.
2732 */ 2874 */
2733 2875 init_request_from_bio(req, bio);
2734 req->flags |= REQ_CMD;
2735
2736 /*
2737 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2738 */
2739 if (bio_rw_ahead(bio) || bio_failfast(bio))
2740 req->flags |= REQ_FAILFAST;
2741
2742 /*
2743 * REQ_BARRIER implies no merging, but lets make it explicit
2744 */
2745 if (unlikely(barrier))
2746 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2747
2748 req->errors = 0;
2749 req->hard_sector = req->sector = sector;
2750 req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2751 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2752 req->nr_phys_segments = bio_phys_segments(q, bio);
2753 req->nr_hw_segments = bio_hw_segments(q, bio);
2754 req->buffer = bio_data(bio); /* see ->buffer comment above */
2755 req->waiting = NULL;
2756 req->bio = req->biotail = bio;
2757 req->ioprio = prio;
2758 req->rq_disk = bio->bi_bdev->bd_disk;
2759 req->start_time = jiffies;
2760 2876
2761 spin_lock_irq(q->queue_lock); 2877 spin_lock_irq(q->queue_lock);
2762 if (elv_queue_empty(q)) 2878 if (elv_queue_empty(q))
@@ -3047,7 +3163,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3047 if (nr_bytes >= bio->bi_size) { 3163 if (nr_bytes >= bio->bi_size) {
3048 req->bio = bio->bi_next; 3164 req->bio = bio->bi_next;
3049 nbytes = bio->bi_size; 3165 nbytes = bio->bi_size;
3050 bio_endio(bio, nbytes, error); 3166 if (!ordered_bio_endio(req, bio, nbytes, error))
3167 bio_endio(bio, nbytes, error);
3051 next_idx = 0; 3168 next_idx = 0;
3052 bio_nbytes = 0; 3169 bio_nbytes = 0;
3053 } else { 3170 } else {
@@ -3102,7 +3219,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
3102 * if the request wasn't completed, update state 3219 * if the request wasn't completed, update state
3103 */ 3220 */
3104 if (bio_nbytes) { 3221 if (bio_nbytes) {
3105 bio_endio(bio, bio_nbytes, error); 3222 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3223 bio_endio(bio, bio_nbytes, error);
3106 bio->bi_idx += next_idx; 3224 bio->bi_idx += next_idx;
3107 bio_iovec(bio)->bv_offset += nr_bytes; 3225 bio_iovec(bio)->bv_offset += nr_bytes;
3108 bio_iovec(bio)->bv_len -= nr_bytes; 3226 bio_iovec(bio)->bv_len -= nr_bytes;
@@ -3157,11 +3275,100 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3157EXPORT_SYMBOL(end_that_request_chunk); 3275EXPORT_SYMBOL(end_that_request_chunk);
3158 3276
3159/* 3277/*
3278 * splice the completion data to a local structure and hand off to
3279 * process_completion_queue() to complete the requests
3280 */
3281static void blk_done_softirq(struct softirq_action *h)
3282{
3283 struct list_head *cpu_list;
3284 LIST_HEAD(local_list);
3285
3286 local_irq_disable();
3287 cpu_list = &__get_cpu_var(blk_cpu_done);
3288 list_splice_init(cpu_list, &local_list);
3289 local_irq_enable();
3290
3291 while (!list_empty(&local_list)) {
3292 struct request *rq = list_entry(local_list.next, struct request, donelist);
3293
3294 list_del_init(&rq->donelist);
3295 rq->q->softirq_done_fn(rq);
3296 }
3297}
3298
3299#ifdef CONFIG_HOTPLUG_CPU
3300
3301static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
3302 void *hcpu)
3303{
3304 /*
3305 * If a CPU goes away, splice its entries to the current CPU
3306 * and trigger a run of the softirq
3307 */
3308 if (action == CPU_DEAD) {
3309 int cpu = (unsigned long) hcpu;
3310
3311 local_irq_disable();
3312 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3313 &__get_cpu_var(blk_cpu_done));
3314 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3315 local_irq_enable();
3316 }
3317
3318 return NOTIFY_OK;
3319}
3320
3321
3322static struct notifier_block __devinitdata blk_cpu_notifier = {
3323 .notifier_call = blk_cpu_notify,
3324};
3325
3326#endif /* CONFIG_HOTPLUG_CPU */
3327
3328/**
3329 * blk_complete_request - end I/O on a request
3330 * @req: the request being processed
3331 *
3332 * Description:
3333 * Ends all I/O on a request. It does not handle partial completions,
3334 * unless the driver actually implements this in its completionc callback
3335 * through requeueing. Theh actual completion happens out-of-order,
3336 * through a softirq handler. The user must have registered a completion
3337 * callback through blk_queue_softirq_done().
3338 **/
3339
3340void blk_complete_request(struct request *req)
3341{
3342 struct list_head *cpu_list;
3343 unsigned long flags;
3344
3345 BUG_ON(!req->q->softirq_done_fn);
3346
3347 local_irq_save(flags);
3348
3349 cpu_list = &__get_cpu_var(blk_cpu_done);
3350 list_add_tail(&req->donelist, cpu_list);
3351 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3352
3353 local_irq_restore(flags);
3354}
3355
3356EXPORT_SYMBOL(blk_complete_request);
3357
3358/*
3160 * queue lock must be held 3359 * queue lock must be held
3161 */ 3360 */
3162void end_that_request_last(struct request *req) 3361void end_that_request_last(struct request *req, int uptodate)
3163{ 3362{
3164 struct gendisk *disk = req->rq_disk; 3363 struct gendisk *disk = req->rq_disk;
3364 int error;
3365
3366 /*
3367 * extend uptodate bool to allow < 0 value to be direct io error
3368 */
3369 error = 0;
3370 if (end_io_error(uptodate))
3371 error = !uptodate ? -EIO : uptodate;
3165 3372
3166 if (unlikely(laptop_mode) && blk_fs_request(req)) 3373 if (unlikely(laptop_mode) && blk_fs_request(req))
3167 laptop_io_completion(); 3374 laptop_io_completion();
@@ -3176,7 +3383,7 @@ void end_that_request_last(struct request *req)
3176 disk->in_flight--; 3383 disk->in_flight--;
3177 } 3384 }
3178 if (req->end_io) 3385 if (req->end_io)
3179 req->end_io(req); 3386 req->end_io(req, error);
3180 else 3387 else
3181 __blk_put_request(req->q, req); 3388 __blk_put_request(req->q, req);
3182} 3389}
@@ -3188,7 +3395,7 @@ void end_request(struct request *req, int uptodate)
3188 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3395 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3189 add_disk_randomness(req->rq_disk); 3396 add_disk_randomness(req->rq_disk);
3190 blkdev_dequeue_request(req); 3397 blkdev_dequeue_request(req);
3191 end_that_request_last(req); 3398 end_that_request_last(req, uptodate);
3192 } 3399 }
3193} 3400}
3194 3401
@@ -3226,6 +3433,8 @@ EXPORT_SYMBOL(kblockd_flush);
3226 3433
3227int __init blk_dev_init(void) 3434int __init blk_dev_init(void)
3228{ 3435{
3436 int i;
3437
3229 kblockd_workqueue = create_workqueue("kblockd"); 3438 kblockd_workqueue = create_workqueue("kblockd");
3230 if (!kblockd_workqueue) 3439 if (!kblockd_workqueue)
3231 panic("Failed to create kblockd\n"); 3440 panic("Failed to create kblockd\n");
@@ -3239,6 +3448,14 @@ int __init blk_dev_init(void)
3239 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3448 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3240 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); 3449 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3241 3450
3451 for (i = 0; i < NR_CPUS; i++)
3452 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3453
3454 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3455#ifdef CONFIG_HOTPLUG_CPU
3456 register_cpu_notifier(&blk_cpu_notifier);
3457#endif
3458
3242 blk_max_low_pfn = max_low_pfn; 3459 blk_max_low_pfn = max_low_pfn;
3243 blk_max_pfn = max_pfn; 3460 blk_max_pfn = max_pfn;
3244 3461
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index e54f006e7e60..f370e4a7fe6d 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -7,21 +7,94 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/init.h> 8#include <linux/init.h>
9 9
10static void elevator_noop_add_request(request_queue_t *q, struct request *rq) 10struct noop_data {
11 struct list_head queue;
12};
13
14static void noop_merged_requests(request_queue_t *q, struct request *rq,
15 struct request *next)
16{
17 list_del_init(&next->queuelist);
18}
19
20static int noop_dispatch(request_queue_t *q, int force)
21{
22 struct noop_data *nd = q->elevator->elevator_data;
23
24 if (!list_empty(&nd->queue)) {
25 struct request *rq;
26 rq = list_entry(nd->queue.next, struct request, queuelist);
27 list_del_init(&rq->queuelist);
28 elv_dispatch_sort(q, rq);
29 return 1;
30 }
31 return 0;
32}
33
34static void noop_add_request(request_queue_t *q, struct request *rq)
35{
36 struct noop_data *nd = q->elevator->elevator_data;
37
38 list_add_tail(&rq->queuelist, &nd->queue);
39}
40
41static int noop_queue_empty(request_queue_t *q)
11{ 42{
12 rq->flags |= REQ_NOMERGE; 43 struct noop_data *nd = q->elevator->elevator_data;
13 elv_dispatch_add_tail(q, rq); 44
45 return list_empty(&nd->queue);
46}
47
48static struct request *
49noop_former_request(request_queue_t *q, struct request *rq)
50{
51 struct noop_data *nd = q->elevator->elevator_data;
52
53 if (rq->queuelist.prev == &nd->queue)
54 return NULL;
55 return list_entry(rq->queuelist.prev, struct request, queuelist);
56}
57
58static struct request *
59noop_latter_request(request_queue_t *q, struct request *rq)
60{
61 struct noop_data *nd = q->elevator->elevator_data;
62
63 if (rq->queuelist.next == &nd->queue)
64 return NULL;
65 return list_entry(rq->queuelist.next, struct request, queuelist);
14} 66}
15 67
16static int elevator_noop_dispatch(request_queue_t *q, int force) 68static int noop_init_queue(request_queue_t *q, elevator_t *e)
17{ 69{
70 struct noop_data *nd;
71
72 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
73 if (!nd)
74 return -ENOMEM;
75 INIT_LIST_HEAD(&nd->queue);
76 e->elevator_data = nd;
18 return 0; 77 return 0;
19} 78}
20 79
80static void noop_exit_queue(elevator_t *e)
81{
82 struct noop_data *nd = e->elevator_data;
83
84 BUG_ON(!list_empty(&nd->queue));
85 kfree(nd);
86}
87
21static struct elevator_type elevator_noop = { 88static struct elevator_type elevator_noop = {
22 .ops = { 89 .ops = {
23 .elevator_dispatch_fn = elevator_noop_dispatch, 90 .elevator_merge_req_fn = noop_merged_requests,
24 .elevator_add_req_fn = elevator_noop_add_request, 91 .elevator_dispatch_fn = noop_dispatch,
92 .elevator_add_req_fn = noop_add_request,
93 .elevator_queue_empty_fn = noop_queue_empty,
94 .elevator_former_req_fn = noop_former_request,
95 .elevator_latter_req_fn = noop_latter_request,
96 .elevator_init_fn = noop_init_queue,
97 .elevator_exit_fn = noop_exit_queue,
25 }, 98 },
26 .elevator_name = "noop", 99 .elevator_name = "noop",
27 .elevator_owner = THIS_MODULE, 100 .elevator_owner = THIS_MODULE,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 382dea7b224c..cc72210687eb 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -21,6 +21,7 @@
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/capability.h>
24#include <linux/completion.h> 25#include <linux/completion.h>
25#include <linux/cdrom.h> 26#include <linux/cdrom.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -46,7 +47,7 @@ EXPORT_SYMBOL(scsi_command_size);
46 47
47static int sg_get_version(int __user *p) 48static int sg_get_version(int __user *p)
48{ 49{
49 static int sg_version_num = 30527; 50 static const int sg_version_num = 30527;
50 return put_user(sg_version_num, p); 51 return put_user(sg_version_num, p);
51} 52}
52 53
@@ -190,16 +191,21 @@ static int verify_command(struct file *file, unsigned char *cmd)
190 safe_for_write(GPCMD_SET_STREAMING), 191 safe_for_write(GPCMD_SET_STREAMING),
191 }; 192 };
192 unsigned char type = cmd_type[cmd[0]]; 193 unsigned char type = cmd_type[cmd[0]];
194 int has_write_perm = 0;
193 195
194 /* Anybody who can open the device can do a read-safe command */ 196 /* Anybody who can open the device can do a read-safe command */
195 if (type & CMD_READ_SAFE) 197 if (type & CMD_READ_SAFE)
196 return 0; 198 return 0;
197 199
200 /*
201 * file can be NULL from ioctl_by_bdev()...
202 */
203 if (file)
204 has_write_perm = file->f_mode & FMODE_WRITE;
205
198 /* Write-safe commands just require a writable open.. */ 206 /* Write-safe commands just require a writable open.. */
199 if (type & CMD_WRITE_SAFE) { 207 if ((type & CMD_WRITE_SAFE) && has_write_perm)
200 if (file->f_mode & FMODE_WRITE) 208 return 0;
201 return 0;
202 }
203 209
204 /* And root can do any command.. */ 210 /* And root can do any command.. */
205 if (capable(CAP_SYS_RAWIO)) 211 if (capable(CAP_SYS_RAWIO))
@@ -233,7 +239,7 @@ static int sg_io(struct file *file, request_queue_t *q,
233 if (verify_command(file, cmd)) 239 if (verify_command(file, cmd))
234 return -EPERM; 240 return -EPERM;
235 241
236 if (hdr->dxfer_len > (q->max_sectors << 9)) 242 if (hdr->dxfer_len > (q->max_hw_sectors << 9))
237 return -EIO; 243 return -EIO;
238 244
239 if (hdr->dxfer_len) 245 if (hdr->dxfer_len)
@@ -442,11 +448,37 @@ error:
442 return err; 448 return err;
443} 449}
444 450
451
452/* Send basic block requests */
453static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data)
454{
455 struct request *rq;
456 int err;
457
458 rq = blk_get_request(q, WRITE, __GFP_WAIT);
459 rq->flags |= REQ_BLOCK_PC;
460 rq->data = NULL;
461 rq->data_len = 0;
462 rq->timeout = BLK_DEFAULT_TIMEOUT;
463 memset(rq->cmd, 0, sizeof(rq->cmd));
464 rq->cmd[0] = cmd;
465 rq->cmd[4] = data;
466 rq->cmd_len = 6;
467 err = blk_execute_rq(q, bd_disk, rq, 0);
468 blk_put_request(rq);
469
470 return err;
471}
472
473static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data)
474{
475 return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data);
476}
477
445int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) 478int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg)
446{ 479{
447 request_queue_t *q; 480 request_queue_t *q;
448 struct request *rq; 481 int err;
449 int close = 0, err;
450 482
451 q = bd_disk->queue; 483 q = bd_disk->queue;
452 if (!q) 484 if (!q)
@@ -564,19 +596,10 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd,
564 err = sg_scsi_ioctl(file, q, bd_disk, arg); 596 err = sg_scsi_ioctl(file, q, bd_disk, arg);
565 break; 597 break;
566 case CDROMCLOSETRAY: 598 case CDROMCLOSETRAY:
567 close = 1; 599 err = blk_send_start_stop(q, bd_disk, 0x03);
600 break;
568 case CDROMEJECT: 601 case CDROMEJECT:
569 rq = blk_get_request(q, WRITE, __GFP_WAIT); 602 err = blk_send_start_stop(q, bd_disk, 0x02);
570 rq->flags |= REQ_BLOCK_PC;
571 rq->data = NULL;
572 rq->data_len = 0;
573 rq->timeout = BLK_DEFAULT_TIMEOUT;
574 memset(rq->cmd, 0, sizeof(rq->cmd));
575 rq->cmd[0] = GPCMD_START_STOP_UNIT;
576 rq->cmd[4] = 0x02 + (close != 0);
577 rq->cmd_len = 6;
578 err = blk_execute_rq(q, bd_disk, rq, 0);
579 blk_put_request(rq);
580 break; 603 break;
581 default: 604 default:
582 err = -ENOTTY; 605 err = -ENOTTY;