diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 2 | ||||
-rw-r--r-- | block/as-iosched.c | 150 | ||||
-rw-r--r-- | block/cfq-iosched.c | 76 | ||||
-rw-r--r-- | block/deadline-iosched.c | 10 | ||||
-rw-r--r-- | block/elevator.c | 160 | ||||
-rw-r--r-- | block/genhd.c | 168 | ||||
-rw-r--r-- | block/ioctl.c | 24 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 701 | ||||
-rw-r--r-- | block/noop-iosched.c | 85 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 63 |
10 files changed, 908 insertions, 531 deletions
diff --git a/block/Kconfig b/block/Kconfig index eb48edb80c1d..377f6dd20e17 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -5,7 +5,7 @@ | |||
5 | #for instance. | 5 | #for instance. |
6 | config LBD | 6 | config LBD |
7 | bool "Support for Large Block Devices" | 7 | bool "Support for Large Block Devices" |
8 | depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML | 8 | depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML |
9 | help | 9 | help |
10 | Say Y here if you want to attach large (bigger than 2TB) discs to | 10 | Say Y here if you want to attach large (bigger than 2TB) discs to |
11 | your machine, or if you want to have a raid or loopback device | 11 | your machine, or if you want to have a raid or loopback device |
diff --git a/block/as-iosched.c b/block/as-iosched.c index a78e160b59a3..8da3cf66894c 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * linux/drivers/block/as-iosched.c | ||
3 | * | ||
4 | * Anticipatory & deadline i/o scheduler. | 2 | * Anticipatory & deadline i/o scheduler. |
5 | * | 3 | * |
6 | * Copyright (C) 2002 Jens Axboe <axboe@suse.de> | 4 | * Copyright (C) 2002 Jens Axboe <axboe@suse.de> |
@@ -184,6 +182,9 @@ struct as_rq { | |||
184 | 182 | ||
185 | static kmem_cache_t *arq_pool; | 183 | static kmem_cache_t *arq_pool; |
186 | 184 | ||
185 | static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); | ||
186 | static void as_antic_stop(struct as_data *ad); | ||
187 | |||
187 | /* | 188 | /* |
188 | * IO Context helper functions | 189 | * IO Context helper functions |
189 | */ | 190 | */ |
@@ -372,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) | |||
372 | * existing request against the same sector), which can happen when using | 373 | * existing request against the same sector), which can happen when using |
373 | * direct IO, then return the alias. | 374 | * direct IO, then return the alias. |
374 | */ | 375 | */ |
375 | static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | 376 | static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) |
376 | { | 377 | { |
377 | struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; | 378 | struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; |
378 | struct rb_node *parent = NULL; | 379 | struct rb_node *parent = NULL; |
@@ -399,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | |||
399 | return NULL; | 400 | return NULL; |
400 | } | 401 | } |
401 | 402 | ||
403 | static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) | ||
404 | { | ||
405 | struct as_rq *alias; | ||
406 | |||
407 | while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { | ||
408 | as_move_to_dispatch(ad, alias); | ||
409 | as_antic_stop(ad); | ||
410 | } | ||
411 | } | ||
412 | |||
402 | static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) | 413 | static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) |
403 | { | 414 | { |
404 | if (!ON_RB(&arq->rb_node)) { | 415 | if (!ON_RB(&arq->rb_node)) { |
@@ -1135,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) | |||
1135 | /* | 1146 | /* |
1136 | * take it off the sort and fifo list, add to dispatch queue | 1147 | * take it off the sort and fifo list, add to dispatch queue |
1137 | */ | 1148 | */ |
1138 | while (!list_empty(&rq->queuelist)) { | ||
1139 | struct request *__rq = list_entry_rq(rq->queuelist.next); | ||
1140 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1141 | |||
1142 | list_del(&__rq->queuelist); | ||
1143 | |||
1144 | elv_dispatch_add_tail(ad->q, __rq); | ||
1145 | |||
1146 | if (__arq->io_context && __arq->io_context->aic) | ||
1147 | atomic_inc(&__arq->io_context->aic->nr_dispatched); | ||
1148 | |||
1149 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1150 | __arq->state = AS_RQ_DISPATCHED; | ||
1151 | |||
1152 | ad->nr_dispatched++; | ||
1153 | } | ||
1154 | |||
1155 | as_remove_queued_request(ad->q, rq); | 1149 | as_remove_queued_request(ad->q, rq); |
1156 | WARN_ON(arq->state != AS_RQ_QUEUED); | 1150 | WARN_ON(arq->state != AS_RQ_QUEUED); |
1157 | 1151 | ||
@@ -1328,55 +1322,14 @@ fifo_expired: | |||
1328 | } | 1322 | } |
1329 | 1323 | ||
1330 | /* | 1324 | /* |
1331 | * Add arq to a list behind alias | ||
1332 | */ | ||
1333 | static inline void | ||
1334 | as_add_aliased_request(struct as_data *ad, struct as_rq *arq, | ||
1335 | struct as_rq *alias) | ||
1336 | { | ||
1337 | struct request *req = arq->request; | ||
1338 | struct list_head *insert = alias->request->queuelist.prev; | ||
1339 | |||
1340 | /* | ||
1341 | * Transfer list of aliases | ||
1342 | */ | ||
1343 | while (!list_empty(&req->queuelist)) { | ||
1344 | struct request *__rq = list_entry_rq(req->queuelist.next); | ||
1345 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1346 | |||
1347 | list_move_tail(&__rq->queuelist, &alias->request->queuelist); | ||
1348 | |||
1349 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1350 | } | ||
1351 | |||
1352 | /* | ||
1353 | * Another request with the same start sector on the rbtree. | ||
1354 | * Link this request to that sector. They are untangled in | ||
1355 | * as_move_to_dispatch | ||
1356 | */ | ||
1357 | list_add(&arq->request->queuelist, insert); | ||
1358 | |||
1359 | /* | ||
1360 | * Don't want to have to handle merges. | ||
1361 | */ | ||
1362 | as_del_arq_hash(arq); | ||
1363 | arq->request->flags |= REQ_NOMERGE; | ||
1364 | } | ||
1365 | |||
1366 | /* | ||
1367 | * add arq to rbtree and fifo | 1325 | * add arq to rbtree and fifo |
1368 | */ | 1326 | */ |
1369 | static void as_add_request(request_queue_t *q, struct request *rq) | 1327 | static void as_add_request(request_queue_t *q, struct request *rq) |
1370 | { | 1328 | { |
1371 | struct as_data *ad = q->elevator->elevator_data; | 1329 | struct as_data *ad = q->elevator->elevator_data; |
1372 | struct as_rq *arq = RQ_DATA(rq); | 1330 | struct as_rq *arq = RQ_DATA(rq); |
1373 | struct as_rq *alias; | ||
1374 | int data_dir; | 1331 | int data_dir; |
1375 | 1332 | ||
1376 | if (arq->state != AS_RQ_PRESCHED) { | ||
1377 | printk("arq->state: %d\n", arq->state); | ||
1378 | WARN_ON(1); | ||
1379 | } | ||
1380 | arq->state = AS_RQ_NEW; | 1333 | arq->state = AS_RQ_NEW; |
1381 | 1334 | ||
1382 | if (rq_data_dir(arq->request) == READ | 1335 | if (rq_data_dir(arq->request) == READ |
@@ -1393,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq) | |||
1393 | atomic_inc(&arq->io_context->aic->nr_queued); | 1346 | atomic_inc(&arq->io_context->aic->nr_queued); |
1394 | } | 1347 | } |
1395 | 1348 | ||
1396 | alias = as_add_arq_rb(ad, arq); | 1349 | as_add_arq_rb(ad, arq); |
1397 | if (!alias) { | 1350 | if (rq_mergeable(arq->request)) |
1398 | /* | 1351 | as_add_arq_hash(ad, arq); |
1399 | * set expire time (only used for reads) and add to fifo list | ||
1400 | */ | ||
1401 | arq->expires = jiffies + ad->fifo_expire[data_dir]; | ||
1402 | list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); | ||
1403 | |||
1404 | if (rq_mergeable(arq->request)) | ||
1405 | as_add_arq_hash(ad, arq); | ||
1406 | as_update_arq(ad, arq); /* keep state machine up to date */ | ||
1407 | 1352 | ||
1408 | } else { | 1353 | /* |
1409 | as_add_aliased_request(ad, arq, alias); | 1354 | * set expire time (only used for reads) and add to fifo list |
1410 | 1355 | */ | |
1411 | /* | 1356 | arq->expires = jiffies + ad->fifo_expire[data_dir]; |
1412 | * have we been anticipating this request? | 1357 | list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); |
1413 | * or does it come from the same process as the one we are | ||
1414 | * anticipating for? | ||
1415 | */ | ||
1416 | if (ad->antic_status == ANTIC_WAIT_REQ | ||
1417 | || ad->antic_status == ANTIC_WAIT_NEXT) { | ||
1418 | if (as_can_break_anticipation(ad, arq)) | ||
1419 | as_antic_stop(ad); | ||
1420 | } | ||
1421 | } | ||
1422 | 1358 | ||
1359 | as_update_arq(ad, arq); /* keep state machine up to date */ | ||
1423 | arq->state = AS_RQ_QUEUED; | 1360 | arq->state = AS_RQ_QUEUED; |
1424 | } | 1361 | } |
1425 | 1362 | ||
@@ -1542,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req) | |||
1542 | * if the merge was a front merge, we need to reposition request | 1479 | * if the merge was a front merge, we need to reposition request |
1543 | */ | 1480 | */ |
1544 | if (rq_rb_key(req) != arq->rb_key) { | 1481 | if (rq_rb_key(req) != arq->rb_key) { |
1545 | struct as_rq *alias, *next_arq = NULL; | ||
1546 | |||
1547 | if (ad->next_arq[arq->is_sync] == arq) | ||
1548 | next_arq = as_find_next_arq(ad, arq); | ||
1549 | |||
1550 | /* | ||
1551 | * Note! We should really be moving any old aliased requests | ||
1552 | * off this request and try to insert them into the rbtree. We | ||
1553 | * currently don't bother. Ditto the next function. | ||
1554 | */ | ||
1555 | as_del_arq_rb(ad, arq); | 1482 | as_del_arq_rb(ad, arq); |
1556 | if ((alias = as_add_arq_rb(ad, arq))) { | 1483 | as_add_arq_rb(ad, arq); |
1557 | list_del_init(&arq->fifo); | ||
1558 | as_add_aliased_request(ad, arq, alias); | ||
1559 | if (next_arq) | ||
1560 | ad->next_arq[arq->is_sync] = next_arq; | ||
1561 | } | ||
1562 | /* | 1484 | /* |
1563 | * Note! At this stage of this and the next function, our next | 1485 | * Note! At this stage of this and the next function, our next |
1564 | * request may not be optimal - eg the request may have "grown" | 1486 | * request may not be optimal - eg the request may have "grown" |
@@ -1585,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req, | |||
1585 | as_add_arq_hash(ad, arq); | 1507 | as_add_arq_hash(ad, arq); |
1586 | 1508 | ||
1587 | if (rq_rb_key(req) != arq->rb_key) { | 1509 | if (rq_rb_key(req) != arq->rb_key) { |
1588 | struct as_rq *alias, *next_arq = NULL; | ||
1589 | |||
1590 | if (ad->next_arq[arq->is_sync] == arq) | ||
1591 | next_arq = as_find_next_arq(ad, arq); | ||
1592 | |||
1593 | as_del_arq_rb(ad, arq); | 1510 | as_del_arq_rb(ad, arq); |
1594 | if ((alias = as_add_arq_rb(ad, arq))) { | 1511 | as_add_arq_rb(ad, arq); |
1595 | list_del_init(&arq->fifo); | ||
1596 | as_add_aliased_request(ad, arq, alias); | ||
1597 | if (next_arq) | ||
1598 | ad->next_arq[arq->is_sync] = next_arq; | ||
1599 | } | ||
1600 | } | 1512 | } |
1601 | 1513 | ||
1602 | /* | 1514 | /* |
@@ -1616,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req, | |||
1616 | } | 1528 | } |
1617 | 1529 | ||
1618 | /* | 1530 | /* |
1619 | * Transfer list of aliases | ||
1620 | */ | ||
1621 | while (!list_empty(&next->queuelist)) { | ||
1622 | struct request *__rq = list_entry_rq(next->queuelist.next); | ||
1623 | struct as_rq *__arq = RQ_DATA(__rq); | ||
1624 | |||
1625 | list_move_tail(&__rq->queuelist, &req->queuelist); | ||
1626 | |||
1627 | WARN_ON(__arq->state != AS_RQ_QUEUED); | ||
1628 | } | ||
1629 | |||
1630 | /* | ||
1631 | * kill knowledge of next, this one is a goner | 1531 | * kill knowledge of next, this one is a goner |
1632 | */ | 1532 | */ |
1633 | as_remove_queued_request(q, next); | 1533 | as_remove_queued_request(q, next); |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ecacca9c877e..74fae2daf87e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * linux/drivers/block/cfq-iosched.c | ||
3 | * | ||
4 | * CFQ, or complete fairness queueing, disk scheduler. | 2 | * CFQ, or complete fairness queueing, disk scheduler. |
5 | * | 3 | * |
6 | * Based on ideas from a previously unfinished io | 4 | * Based on ideas from a previously unfinished io |
@@ -27,15 +25,15 @@ | |||
27 | /* | 25 | /* |
28 | * tunables | 26 | * tunables |
29 | */ | 27 | */ |
30 | static int cfq_quantum = 4; /* max queue in one round of service */ | 28 | static const int cfq_quantum = 4; /* max queue in one round of service */ |
31 | static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ | 29 | static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ |
32 | static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; | 30 | static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; |
33 | static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ | 31 | static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ |
34 | static int cfq_back_penalty = 2; /* penalty of a backwards seek */ | 32 | static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ |
35 | 33 | ||
36 | static int cfq_slice_sync = HZ / 10; | 34 | static const int cfq_slice_sync = HZ / 10; |
37 | static int cfq_slice_async = HZ / 25; | 35 | static int cfq_slice_async = HZ / 25; |
38 | static int cfq_slice_async_rq = 2; | 36 | static const int cfq_slice_async_rq = 2; |
39 | static int cfq_slice_idle = HZ / 100; | 37 | static int cfq_slice_idle = HZ / 100; |
40 | 38 | ||
41 | #define CFQ_IDLE_GRACE (HZ / 10) | 39 | #define CFQ_IDLE_GRACE (HZ / 10) |
@@ -47,7 +45,7 @@ static int cfq_slice_idle = HZ / 100; | |||
47 | /* | 45 | /* |
48 | * disable queueing at the driver/hardware level | 46 | * disable queueing at the driver/hardware level |
49 | */ | 47 | */ |
50 | static int cfq_max_depth = 2; | 48 | static const int cfq_max_depth = 2; |
51 | 49 | ||
52 | /* | 50 | /* |
53 | * for the hash of cfqq inside the cfqd | 51 | * for the hash of cfqq inside the cfqd |
@@ -861,8 +859,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
861 | * store what was left of this slice, if the queue idled out | 859 | * store what was left of this slice, if the queue idled out |
862 | * or was preempted | 860 | * or was preempted |
863 | */ | 861 | */ |
864 | if (time_after(now, cfqq->slice_end)) | 862 | if (time_after(cfqq->slice_end, now)) |
865 | cfqq->slice_left = now - cfqq->slice_end; | 863 | cfqq->slice_left = cfqq->slice_end - now; |
866 | else | 864 | else |
867 | cfqq->slice_left = 0; | 865 | cfqq->slice_left = 0; |
868 | 866 | ||
@@ -999,7 +997,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
999 | /* | 997 | /* |
1000 | * get next queue for service | 998 | * get next queue for service |
1001 | */ | 999 | */ |
1002 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) | 1000 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) |
1003 | { | 1001 | { |
1004 | unsigned long now = jiffies; | 1002 | unsigned long now = jiffies; |
1005 | struct cfq_queue *cfqq; | 1003 | struct cfq_queue *cfqq; |
@@ -1023,7 +1021,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) | |||
1023 | */ | 1021 | */ |
1024 | if (!RB_EMPTY(&cfqq->sort_list)) | 1022 | if (!RB_EMPTY(&cfqq->sort_list)) |
1025 | goto keep_queue; | 1023 | goto keep_queue; |
1026 | else if (!force && cfq_cfqq_class_sync(cfqq) && | 1024 | else if (cfq_cfqq_class_sync(cfqq) && |
1027 | time_before(now, cfqq->slice_end)) { | 1025 | time_before(now, cfqq->slice_end)) { |
1028 | if (cfq_arm_slice_timer(cfqd, cfqq)) | 1026 | if (cfq_arm_slice_timer(cfqd, cfqq)) |
1029 | return NULL; | 1027 | return NULL; |
@@ -1092,6 +1090,42 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1092 | } | 1090 | } |
1093 | 1091 | ||
1094 | static int | 1092 | static int |
1093 | cfq_forced_dispatch_cfqqs(struct list_head *list) | ||
1094 | { | ||
1095 | int dispatched = 0; | ||
1096 | struct cfq_queue *cfqq, *next; | ||
1097 | struct cfq_rq *crq; | ||
1098 | |||
1099 | list_for_each_entry_safe(cfqq, next, list, cfq_list) { | ||
1100 | while ((crq = cfqq->next_crq)) { | ||
1101 | cfq_dispatch_insert(cfqq->cfqd->queue, crq); | ||
1102 | dispatched++; | ||
1103 | } | ||
1104 | BUG_ON(!list_empty(&cfqq->fifo)); | ||
1105 | } | ||
1106 | return dispatched; | ||
1107 | } | ||
1108 | |||
1109 | static int | ||
1110 | cfq_forced_dispatch(struct cfq_data *cfqd) | ||
1111 | { | ||
1112 | int i, dispatched = 0; | ||
1113 | |||
1114 | for (i = 0; i < CFQ_PRIO_LISTS; i++) | ||
1115 | dispatched += cfq_forced_dispatch_cfqqs(&cfqd->rr_list[i]); | ||
1116 | |||
1117 | dispatched += cfq_forced_dispatch_cfqqs(&cfqd->busy_rr); | ||
1118 | dispatched += cfq_forced_dispatch_cfqqs(&cfqd->cur_rr); | ||
1119 | dispatched += cfq_forced_dispatch_cfqqs(&cfqd->idle_rr); | ||
1120 | |||
1121 | cfq_slice_expired(cfqd, 0); | ||
1122 | |||
1123 | BUG_ON(cfqd->busy_queues); | ||
1124 | |||
1125 | return dispatched; | ||
1126 | } | ||
1127 | |||
1128 | static int | ||
1095 | cfq_dispatch_requests(request_queue_t *q, int force) | 1129 | cfq_dispatch_requests(request_queue_t *q, int force) |
1096 | { | 1130 | { |
1097 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1131 | struct cfq_data *cfqd = q->elevator->elevator_data; |
@@ -1100,7 +1134,10 @@ cfq_dispatch_requests(request_queue_t *q, int force) | |||
1100 | if (!cfqd->busy_queues) | 1134 | if (!cfqd->busy_queues) |
1101 | return 0; | 1135 | return 0; |
1102 | 1136 | ||
1103 | cfqq = cfq_select_queue(cfqd, force); | 1137 | if (unlikely(force)) |
1138 | return cfq_forced_dispatch(cfqd); | ||
1139 | |||
1140 | cfqq = cfq_select_queue(cfqd); | ||
1104 | if (cfqq) { | 1141 | if (cfqq) { |
1105 | int max_dispatch; | 1142 | int max_dispatch; |
1106 | 1143 | ||
@@ -1115,12 +1152,9 @@ cfq_dispatch_requests(request_queue_t *q, int force) | |||
1115 | cfq_clear_cfqq_wait_request(cfqq); | 1152 | cfq_clear_cfqq_wait_request(cfqq); |
1116 | del_timer(&cfqd->idle_slice_timer); | 1153 | del_timer(&cfqd->idle_slice_timer); |
1117 | 1154 | ||
1118 | if (!force) { | 1155 | max_dispatch = cfqd->cfq_quantum; |
1119 | max_dispatch = cfqd->cfq_quantum; | 1156 | if (cfq_class_idle(cfqq)) |
1120 | if (cfq_class_idle(cfqq)) | 1157 | max_dispatch = 1; |
1121 | max_dispatch = 1; | ||
1122 | } else | ||
1123 | max_dispatch = INT_MAX; | ||
1124 | 1158 | ||
1125 | return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); | 1159 | return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); |
1126 | } | 1160 | } |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 7929471d7df7..27e494b1bf97 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * linux/drivers/block/deadline-iosched.c | ||
3 | * | ||
4 | * Deadline i/o scheduler. | 2 | * Deadline i/o scheduler. |
5 | * | 3 | * |
6 | * Copyright (C) 2002 Jens Axboe <axboe@suse.de> | 4 | * Copyright (C) 2002 Jens Axboe <axboe@suse.de> |
@@ -21,10 +19,10 @@ | |||
21 | /* | 19 | /* |
22 | * See Documentation/block/deadline-iosched.txt | 20 | * See Documentation/block/deadline-iosched.txt |
23 | */ | 21 | */ |
24 | static int read_expire = HZ / 2; /* max time before a read is submitted. */ | 22 | static const int read_expire = HZ / 2; /* max time before a read is submitted. */ |
25 | static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ | 23 | static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ |
26 | static int writes_starved = 2; /* max times reads can starve a write */ | 24 | static const int writes_starved = 2; /* max times reads can starve a write */ |
27 | static int fifo_batch = 16; /* # of sequential requests treated as one | 25 | static const int fifo_batch = 16; /* # of sequential requests treated as one |
28 | by the above parameters. For throughput. */ | 26 | by the above parameters. For throughput. */ |
29 | 27 | ||
30 | static const int deadline_hash_shift = 5; | 28 | static const int deadline_hash_shift = 5; |
diff --git a/block/elevator.c b/block/elevator.c index d4a49a3df829..c9f424d5399c 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * linux/drivers/block/elevator.c | ||
3 | * | ||
4 | * Block device elevator/IO-scheduler. | 2 | * Block device elevator/IO-scheduler. |
5 | * | 3 | * |
6 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
@@ -66,7 +64,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
66 | } | 64 | } |
67 | EXPORT_SYMBOL(elv_rq_merge_ok); | 65 | EXPORT_SYMBOL(elv_rq_merge_ok); |
68 | 66 | ||
69 | inline int elv_try_merge(struct request *__rq, struct bio *bio) | 67 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) |
70 | { | 68 | { |
71 | int ret = ELEVATOR_NO_MERGE; | 69 | int ret = ELEVATOR_NO_MERGE; |
72 | 70 | ||
@@ -82,7 +80,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) | |||
82 | 80 | ||
83 | return ret; | 81 | return ret; |
84 | } | 82 | } |
85 | EXPORT_SYMBOL(elv_try_merge); | ||
86 | 83 | ||
87 | static struct elevator_type *elevator_find(const char *name) | 84 | static struct elevator_type *elevator_find(const char *name) |
88 | { | 85 | { |
@@ -152,12 +149,20 @@ static void elevator_setup_default(void) | |||
152 | if (!chosen_elevator[0]) | 149 | if (!chosen_elevator[0]) |
153 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); | 150 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); |
154 | 151 | ||
152 | /* | ||
153 | * Be backwards-compatible with previous kernels, so users | ||
154 | * won't get the wrong elevator. | ||
155 | */ | ||
156 | if (!strcmp(chosen_elevator, "as")) | ||
157 | strcpy(chosen_elevator, "anticipatory"); | ||
158 | |||
155 | /* | 159 | /* |
156 | * If the given scheduler is not available, fall back to no-op. | 160 | * If the given scheduler is not available, fall back to the default |
157 | */ | 161 | */ |
158 | if (!(e = elevator_find(chosen_elevator))) | 162 | if ((e = elevator_find(chosen_elevator))) |
159 | strcpy(chosen_elevator, "noop"); | 163 | elevator_put(e); |
160 | elevator_put(e); | 164 | else |
165 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); | ||
161 | } | 166 | } |
162 | 167 | ||
163 | static int __init elevator_setup(char *str) | 168 | static int __init elevator_setup(char *str) |
@@ -190,14 +195,14 @@ int elevator_init(request_queue_t *q, char *name) | |||
190 | 195 | ||
191 | eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); | 196 | eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); |
192 | if (!eq) { | 197 | if (!eq) { |
193 | elevator_put(e->elevator_type); | 198 | elevator_put(e); |
194 | return -ENOMEM; | 199 | return -ENOMEM; |
195 | } | 200 | } |
196 | 201 | ||
197 | ret = elevator_attach(q, e, eq); | 202 | ret = elevator_attach(q, e, eq); |
198 | if (ret) { | 203 | if (ret) { |
199 | kfree(eq); | 204 | kfree(eq); |
200 | elevator_put(e->elevator_type); | 205 | elevator_put(e); |
201 | } | 206 | } |
202 | 207 | ||
203 | return ret; | 208 | return ret; |
@@ -225,6 +230,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) | |||
225 | 230 | ||
226 | if (q->last_merge == rq) | 231 | if (q->last_merge == rq) |
227 | q->last_merge = NULL; | 232 | q->last_merge = NULL; |
233 | q->nr_sorted--; | ||
228 | 234 | ||
229 | boundary = q->end_sector; | 235 | boundary = q->end_sector; |
230 | 236 | ||
@@ -283,6 +289,7 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, | |||
283 | 289 | ||
284 | if (e->ops->elevator_merge_req_fn) | 290 | if (e->ops->elevator_merge_req_fn) |
285 | e->ops->elevator_merge_req_fn(q, rq, next); | 291 | e->ops->elevator_merge_req_fn(q, rq, next); |
292 | q->nr_sorted--; | ||
286 | 293 | ||
287 | q->last_merge = rq; | 294 | q->last_merge = rq; |
288 | } | 295 | } |
@@ -303,22 +310,39 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) | |||
303 | 310 | ||
304 | rq->flags &= ~REQ_STARTED; | 311 | rq->flags &= ~REQ_STARTED; |
305 | 312 | ||
306 | /* | 313 | __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0); |
307 | * if this is the flush, requeue the original instead and drop the flush | 314 | } |
308 | */ | ||
309 | if (rq->flags & REQ_BAR_FLUSH) { | ||
310 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
311 | rq = rq->end_io_data; | ||
312 | } | ||
313 | 315 | ||
314 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | 316 | static void elv_drain_elevator(request_queue_t *q) |
317 | { | ||
318 | static int printed; | ||
319 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
320 | ; | ||
321 | if (q->nr_sorted == 0) | ||
322 | return; | ||
323 | if (printed++ < 10) { | ||
324 | printk(KERN_ERR "%s: forced dispatching is broken " | ||
325 | "(nr_sorted=%u), please report this\n", | ||
326 | q->elevator->elevator_type->elevator_name, q->nr_sorted); | ||
327 | } | ||
315 | } | 328 | } |
316 | 329 | ||
317 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, | 330 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, |
318 | int plug) | 331 | int plug) |
319 | { | 332 | { |
333 | struct list_head *pos; | ||
334 | unsigned ordseq; | ||
335 | |||
336 | if (q->ordcolor) | ||
337 | rq->flags |= REQ_ORDERED_COLOR; | ||
338 | |||
320 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | 339 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
321 | /* | 340 | /* |
341 | * toggle ordered color | ||
342 | */ | ||
343 | q->ordcolor ^= 1; | ||
344 | |||
345 | /* | ||
322 | * barriers implicitly indicate back insertion | 346 | * barriers implicitly indicate back insertion |
323 | */ | 347 | */ |
324 | if (where == ELEVATOR_INSERT_SORT) | 348 | if (where == ELEVATOR_INSERT_SORT) |
@@ -348,9 +372,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
348 | 372 | ||
349 | case ELEVATOR_INSERT_BACK: | 373 | case ELEVATOR_INSERT_BACK: |
350 | rq->flags |= REQ_SOFTBARRIER; | 374 | rq->flags |= REQ_SOFTBARRIER; |
351 | 375 | elv_drain_elevator(q); | |
352 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
353 | ; | ||
354 | list_add_tail(&rq->queuelist, &q->queue_head); | 376 | list_add_tail(&rq->queuelist, &q->queue_head); |
355 | /* | 377 | /* |
356 | * We kick the queue here for the following reasons. | 378 | * We kick the queue here for the following reasons. |
@@ -369,6 +391,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
369 | case ELEVATOR_INSERT_SORT: | 391 | case ELEVATOR_INSERT_SORT: |
370 | BUG_ON(!blk_fs_request(rq)); | 392 | BUG_ON(!blk_fs_request(rq)); |
371 | rq->flags |= REQ_SORTED; | 393 | rq->flags |= REQ_SORTED; |
394 | q->nr_sorted++; | ||
372 | if (q->last_merge == NULL && rq_mergeable(rq)) | 395 | if (q->last_merge == NULL && rq_mergeable(rq)) |
373 | q->last_merge = rq; | 396 | q->last_merge = rq; |
374 | /* | 397 | /* |
@@ -379,6 +402,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
379 | q->elevator->ops->elevator_add_req_fn(q, rq); | 402 | q->elevator->ops->elevator_add_req_fn(q, rq); |
380 | break; | 403 | break; |
381 | 404 | ||
405 | case ELEVATOR_INSERT_REQUEUE: | ||
406 | /* | ||
407 | * If ordered flush isn't in progress, we do front | ||
408 | * insertion; otherwise, requests should be requeued | ||
409 | * in ordseq order. | ||
410 | */ | ||
411 | rq->flags |= REQ_SOFTBARRIER; | ||
412 | |||
413 | if (q->ordseq == 0) { | ||
414 | list_add(&rq->queuelist, &q->queue_head); | ||
415 | break; | ||
416 | } | ||
417 | |||
418 | ordseq = blk_ordered_req_seq(rq); | ||
419 | |||
420 | list_for_each(pos, &q->queue_head) { | ||
421 | struct request *pos_rq = list_entry_rq(pos); | ||
422 | if (ordseq <= blk_ordered_req_seq(pos_rq)) | ||
423 | break; | ||
424 | } | ||
425 | |||
426 | list_add_tail(&rq->queuelist, pos); | ||
427 | break; | ||
428 | |||
382 | default: | 429 | default: |
383 | printk(KERN_ERR "%s: bad insertion point %d\n", | 430 | printk(KERN_ERR "%s: bad insertion point %d\n", |
384 | __FUNCTION__, where); | 431 | __FUNCTION__, where); |
@@ -408,25 +455,16 @@ static inline struct request *__elv_next_request(request_queue_t *q) | |||
408 | { | 455 | { |
409 | struct request *rq; | 456 | struct request *rq; |
410 | 457 | ||
411 | if (unlikely(list_empty(&q->queue_head) && | 458 | while (1) { |
412 | !q->elevator->ops->elevator_dispatch_fn(q, 0))) | 459 | while (!list_empty(&q->queue_head)) { |
413 | return NULL; | 460 | rq = list_entry_rq(q->queue_head.next); |
414 | 461 | if (blk_do_ordered(q, &rq)) | |
415 | rq = list_entry_rq(q->queue_head.next); | 462 | return rq; |
416 | 463 | } | |
417 | /* | ||
418 | * if this is a barrier write and the device has to issue a | ||
419 | * flush sequence to support it, check how far we are | ||
420 | */ | ||
421 | if (blk_fs_request(rq) && blk_barrier_rq(rq)) { | ||
422 | BUG_ON(q->ordered == QUEUE_ORDERED_NONE); | ||
423 | 464 | ||
424 | if (q->ordered == QUEUE_ORDERED_FLUSH && | 465 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
425 | !blk_barrier_preflush(rq)) | 466 | return NULL; |
426 | rq = blk_start_pre_flush(q, rq); | ||
427 | } | 467 | } |
428 | |||
429 | return rq; | ||
430 | } | 468 | } |
431 | 469 | ||
432 | struct request *elv_next_request(request_queue_t *q) | 470 | struct request *elv_next_request(request_queue_t *q) |
@@ -484,7 +522,7 @@ struct request *elv_next_request(request_queue_t *q) | |||
484 | blkdev_dequeue_request(rq); | 522 | blkdev_dequeue_request(rq); |
485 | rq->flags |= REQ_QUIET; | 523 | rq->flags |= REQ_QUIET; |
486 | end_that_request_chunk(rq, 0, nr_bytes); | 524 | end_that_request_chunk(rq, 0, nr_bytes); |
487 | end_that_request_last(rq); | 525 | end_that_request_last(rq, 0); |
488 | } else { | 526 | } else { |
489 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | 527 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, |
490 | ret); | 528 | ret); |
@@ -525,33 +563,19 @@ int elv_queue_empty(request_queue_t *q) | |||
525 | 563 | ||
526 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) | 564 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) |
527 | { | 565 | { |
528 | struct list_head *next; | ||
529 | |||
530 | elevator_t *e = q->elevator; | 566 | elevator_t *e = q->elevator; |
531 | 567 | ||
532 | if (e->ops->elevator_latter_req_fn) | 568 | if (e->ops->elevator_latter_req_fn) |
533 | return e->ops->elevator_latter_req_fn(q, rq); | 569 | return e->ops->elevator_latter_req_fn(q, rq); |
534 | |||
535 | next = rq->queuelist.next; | ||
536 | if (next != &q->queue_head && next != &rq->queuelist) | ||
537 | return list_entry_rq(next); | ||
538 | |||
539 | return NULL; | 570 | return NULL; |
540 | } | 571 | } |
541 | 572 | ||
542 | struct request *elv_former_request(request_queue_t *q, struct request *rq) | 573 | struct request *elv_former_request(request_queue_t *q, struct request *rq) |
543 | { | 574 | { |
544 | struct list_head *prev; | ||
545 | |||
546 | elevator_t *e = q->elevator; | 575 | elevator_t *e = q->elevator; |
547 | 576 | ||
548 | if (e->ops->elevator_former_req_fn) | 577 | if (e->ops->elevator_former_req_fn) |
549 | return e->ops->elevator_former_req_fn(q, rq); | 578 | return e->ops->elevator_former_req_fn(q, rq); |
550 | |||
551 | prev = rq->queuelist.prev; | ||
552 | if (prev != &q->queue_head && prev != &rq->queuelist) | ||
553 | return list_entry_rq(prev); | ||
554 | |||
555 | return NULL; | 579 | return NULL; |
556 | } | 580 | } |
557 | 581 | ||
@@ -597,6 +621,20 @@ void elv_completed_request(request_queue_t *q, struct request *rq) | |||
597 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | 621 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) |
598 | e->ops->elevator_completed_req_fn(q, rq); | 622 | e->ops->elevator_completed_req_fn(q, rq); |
599 | } | 623 | } |
624 | |||
625 | /* | ||
626 | * Check if the queue is waiting for fs requests to be | ||
627 | * drained for flush sequence. | ||
628 | */ | ||
629 | if (unlikely(q->ordseq)) { | ||
630 | struct request *first_rq = list_entry_rq(q->queue_head.next); | ||
631 | if (q->in_flight == 0 && | ||
632 | blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && | ||
633 | blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { | ||
634 | blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); | ||
635 | q->request_fn(q); | ||
636 | } | ||
637 | } | ||
600 | } | 638 | } |
601 | 639 | ||
602 | int elv_register_queue(struct request_queue *q) | 640 | int elv_register_queue(struct request_queue *q) |
@@ -691,13 +729,15 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
691 | 729 | ||
692 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 730 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
693 | 731 | ||
694 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | 732 | elv_drain_elevator(q); |
695 | ; | ||
696 | 733 | ||
697 | while (q->rq.elvpriv) { | 734 | while (q->rq.elvpriv) { |
735 | blk_remove_plug(q); | ||
736 | q->request_fn(q); | ||
698 | spin_unlock_irq(q->queue_lock); | 737 | spin_unlock_irq(q->queue_lock); |
699 | msleep(10); | 738 | msleep(10); |
700 | spin_lock_irq(q->queue_lock); | 739 | spin_lock_irq(q->queue_lock); |
740 | elv_drain_elevator(q); | ||
701 | } | 741 | } |
702 | 742 | ||
703 | spin_unlock_irq(q->queue_lock); | 743 | spin_unlock_irq(q->queue_lock); |
@@ -744,13 +784,15 @@ error: | |||
744 | ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) | 784 | ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) |
745 | { | 785 | { |
746 | char elevator_name[ELV_NAME_MAX]; | 786 | char elevator_name[ELV_NAME_MAX]; |
787 | size_t len; | ||
747 | struct elevator_type *e; | 788 | struct elevator_type *e; |
748 | 789 | ||
749 | memset(elevator_name, 0, sizeof(elevator_name)); | 790 | elevator_name[sizeof(elevator_name) - 1] = '\0'; |
750 | strncpy(elevator_name, name, sizeof(elevator_name)); | 791 | strncpy(elevator_name, name, sizeof(elevator_name) - 1); |
792 | len = strlen(elevator_name); | ||
751 | 793 | ||
752 | if (elevator_name[strlen(elevator_name) - 1] == '\n') | 794 | if (len && elevator_name[len - 1] == '\n') |
753 | elevator_name[strlen(elevator_name) - 1] = '\0'; | 795 | elevator_name[len - 1] = '\0'; |
754 | 796 | ||
755 | e = elevator_get(elevator_name); | 797 | e = elevator_get(elevator_name); |
756 | if (!e) { | 798 | if (!e) { |
diff --git a/block/genhd.c b/block/genhd.c index 54aec4a1ae13..db57546a709d 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -38,34 +38,100 @@ static inline int major_to_index(int major) | |||
38 | return major % MAX_PROBE_HASH; | 38 | return major % MAX_PROBE_HASH; |
39 | } | 39 | } |
40 | 40 | ||
41 | #ifdef CONFIG_PROC_FS | 41 | struct blkdev_info { |
42 | /* get block device names in somewhat random order */ | 42 | int index; |
43 | int get_blkdev_list(char *p, int used) | 43 | struct blk_major_name *bd; |
44 | }; | ||
45 | |||
46 | /* | ||
47 | * iterate over a list of blkdev_info structures. allows | ||
48 | * the major_names array to be iterated over from outside this file | ||
49 | * must be called with the block_subsys_sem held | ||
50 | */ | ||
51 | void *get_next_blkdev(void *dev) | ||
52 | { | ||
53 | struct blkdev_info *info; | ||
54 | |||
55 | if (dev == NULL) { | ||
56 | info = kmalloc(sizeof(*info), GFP_KERNEL); | ||
57 | if (!info) | ||
58 | goto out; | ||
59 | info->index=0; | ||
60 | info->bd = major_names[info->index]; | ||
61 | if (info->bd) | ||
62 | goto out; | ||
63 | } else { | ||
64 | info = dev; | ||
65 | } | ||
66 | |||
67 | while (info->index < ARRAY_SIZE(major_names)) { | ||
68 | if (info->bd) | ||
69 | info->bd = info->bd->next; | ||
70 | if (info->bd) | ||
71 | goto out; | ||
72 | /* | ||
73 | * No devices on this chain, move to the next | ||
74 | */ | ||
75 | info->index++; | ||
76 | info->bd = (info->index < ARRAY_SIZE(major_names)) ? | ||
77 | major_names[info->index] : NULL; | ||
78 | if (info->bd) | ||
79 | goto out; | ||
80 | } | ||
81 | |||
82 | out: | ||
83 | return info; | ||
84 | } | ||
85 | |||
86 | void *acquire_blkdev_list(void) | ||
87 | { | ||
88 | down(&block_subsys_sem); | ||
89 | return get_next_blkdev(NULL); | ||
90 | } | ||
91 | |||
92 | void release_blkdev_list(void *dev) | ||
93 | { | ||
94 | up(&block_subsys_sem); | ||
95 | kfree(dev); | ||
96 | } | ||
97 | |||
98 | |||
99 | /* | ||
100 | * Count the number of records in the blkdev_list. | ||
101 | * must be called with the block_subsys_sem held | ||
102 | */ | ||
103 | int count_blkdev_list(void) | ||
44 | { | 104 | { |
45 | struct blk_major_name *n; | 105 | struct blk_major_name *n; |
46 | int i, len; | 106 | int i, count; |
47 | 107 | ||
48 | len = snprintf(p, (PAGE_SIZE-used), "\nBlock devices:\n"); | 108 | count = 0; |
49 | 109 | ||
50 | down(&block_subsys_sem); | ||
51 | for (i = 0; i < ARRAY_SIZE(major_names); i++) { | 110 | for (i = 0; i < ARRAY_SIZE(major_names); i++) { |
52 | for (n = major_names[i]; n; n = n->next) { | 111 | for (n = major_names[i]; n; n = n->next) |
53 | /* | 112 | count++; |
54 | * If the curent string plus the 5 extra characters | ||
55 | * in the line would run us off the page, then we're done | ||
56 | */ | ||
57 | if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE) | ||
58 | goto page_full; | ||
59 | len += sprintf(p+len, "%3d %s\n", | ||
60 | n->major, n->name); | ||
61 | } | ||
62 | } | 113 | } |
63 | page_full: | ||
64 | up(&block_subsys_sem); | ||
65 | 114 | ||
66 | return len; | 115 | return count; |
67 | } | 116 | } |
68 | #endif | 117 | |
118 | /* | ||
119 | * extract the major and name values from a blkdev_info struct | ||
120 | * passed in as a void to *dev. Must be called with | ||
121 | * block_subsys_sem held | ||
122 | */ | ||
123 | int get_blkdev_info(void *dev, int *major, char **name) | ||
124 | { | ||
125 | struct blkdev_info *info = dev; | ||
126 | |||
127 | if (info->bd == NULL) | ||
128 | return 1; | ||
129 | |||
130 | *major = info->bd->major; | ||
131 | *name = info->bd->name; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
69 | 135 | ||
70 | int register_blkdev(unsigned int major, const char *name) | 136 | int register_blkdev(unsigned int major, const char *name) |
71 | { | 137 | { |
@@ -358,7 +424,7 @@ static struct sysfs_ops disk_sysfs_ops = { | |||
358 | static ssize_t disk_uevent_store(struct gendisk * disk, | 424 | static ssize_t disk_uevent_store(struct gendisk * disk, |
359 | const char *buf, size_t count) | 425 | const char *buf, size_t count) |
360 | { | 426 | { |
361 | kobject_hotplug(&disk->kobj, KOBJ_ADD); | 427 | kobject_uevent(&disk->kobj, KOBJ_ADD); |
362 | return count; | 428 | return count; |
363 | } | 429 | } |
364 | static ssize_t disk_dev_read(struct gendisk * disk, char *page) | 430 | static ssize_t disk_dev_read(struct gendisk * disk, char *page) |
@@ -391,12 +457,14 @@ static ssize_t disk_stats_read(struct gendisk * disk, char *page) | |||
391 | "%8u %8u %8llu %8u " | 457 | "%8u %8u %8llu %8u " |
392 | "%8u %8u %8u" | 458 | "%8u %8u %8u" |
393 | "\n", | 459 | "\n", |
394 | disk_stat_read(disk, ios[0]), disk_stat_read(disk, merges[0]), | 460 | disk_stat_read(disk, ios[READ]), |
395 | (unsigned long long)disk_stat_read(disk, sectors[0]), | 461 | disk_stat_read(disk, merges[READ]), |
396 | jiffies_to_msecs(disk_stat_read(disk, ticks[0])), | 462 | (unsigned long long)disk_stat_read(disk, sectors[READ]), |
397 | disk_stat_read(disk, ios[1]), disk_stat_read(disk, merges[1]), | 463 | jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), |
398 | (unsigned long long)disk_stat_read(disk, sectors[1]), | 464 | disk_stat_read(disk, ios[WRITE]), |
399 | jiffies_to_msecs(disk_stat_read(disk, ticks[1])), | 465 | disk_stat_read(disk, merges[WRITE]), |
466 | (unsigned long long)disk_stat_read(disk, sectors[WRITE]), | ||
467 | jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), | ||
400 | disk->in_flight, | 468 | disk->in_flight, |
401 | jiffies_to_msecs(disk_stat_read(disk, io_ticks)), | 469 | jiffies_to_msecs(disk_stat_read(disk, io_ticks)), |
402 | jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); | 470 | jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); |
@@ -453,14 +521,14 @@ static struct kobj_type ktype_block = { | |||
453 | 521 | ||
454 | extern struct kobj_type ktype_part; | 522 | extern struct kobj_type ktype_part; |
455 | 523 | ||
456 | static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) | 524 | static int block_uevent_filter(struct kset *kset, struct kobject *kobj) |
457 | { | 525 | { |
458 | struct kobj_type *ktype = get_ktype(kobj); | 526 | struct kobj_type *ktype = get_ktype(kobj); |
459 | 527 | ||
460 | return ((ktype == &ktype_block) || (ktype == &ktype_part)); | 528 | return ((ktype == &ktype_block) || (ktype == &ktype_part)); |
461 | } | 529 | } |
462 | 530 | ||
463 | static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | 531 | static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp, |
464 | int num_envp, char *buffer, int buffer_size) | 532 | int num_envp, char *buffer, int buffer_size) |
465 | { | 533 | { |
466 | struct kobj_type *ktype = get_ktype(kobj); | 534 | struct kobj_type *ktype = get_ktype(kobj); |
@@ -472,40 +540,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | |||
472 | 540 | ||
473 | if (ktype == &ktype_block) { | 541 | if (ktype == &ktype_block) { |
474 | disk = container_of(kobj, struct gendisk, kobj); | 542 | disk = container_of(kobj, struct gendisk, kobj); |
475 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 543 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
476 | &length, "MINOR=%u", disk->first_minor); | 544 | &length, "MINOR=%u", disk->first_minor); |
477 | } else if (ktype == &ktype_part) { | 545 | } else if (ktype == &ktype_part) { |
478 | disk = container_of(kobj->parent, struct gendisk, kobj); | 546 | disk = container_of(kobj->parent, struct gendisk, kobj); |
479 | part = container_of(kobj, struct hd_struct, kobj); | 547 | part = container_of(kobj, struct hd_struct, kobj); |
480 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 548 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
481 | &length, "MINOR=%u", | 549 | &length, "MINOR=%u", |
482 | disk->first_minor + part->partno); | 550 | disk->first_minor + part->partno); |
483 | } else | 551 | } else |
484 | return 0; | 552 | return 0; |
485 | 553 | ||
486 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, | 554 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, |
487 | "MAJOR=%u", disk->major); | 555 | "MAJOR=%u", disk->major); |
488 | 556 | ||
489 | /* add physical device, backing this device */ | 557 | /* add physical device, backing this device */ |
490 | physdev = disk->driverfs_dev; | 558 | physdev = disk->driverfs_dev; |
491 | if (physdev) { | 559 | if (physdev) { |
492 | char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); | 560 | char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); |
493 | 561 | ||
494 | add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, | 562 | add_uevent_var(envp, num_envp, &i, buffer, buffer_size, |
495 | &length, "PHYSDEVPATH=%s", path); | 563 | &length, "PHYSDEVPATH=%s", path); |
496 | kfree(path); | 564 | kfree(path); |
497 | 565 | ||
498 | if (physdev->bus) | 566 | if (physdev->bus) |
499 | add_hotplug_env_var(envp, num_envp, &i, | 567 | add_uevent_var(envp, num_envp, &i, |
500 | buffer, buffer_size, &length, | 568 | buffer, buffer_size, &length, |
501 | "PHYSDEVBUS=%s", | 569 | "PHYSDEVBUS=%s", |
502 | physdev->bus->name); | 570 | physdev->bus->name); |
503 | 571 | ||
504 | if (physdev->driver) | 572 | if (physdev->driver) |
505 | add_hotplug_env_var(envp, num_envp, &i, | 573 | add_uevent_var(envp, num_envp, &i, |
506 | buffer, buffer_size, &length, | 574 | buffer, buffer_size, &length, |
507 | "PHYSDEVDRIVER=%s", | 575 | "PHYSDEVDRIVER=%s", |
508 | physdev->driver->name); | 576 | physdev->driver->name); |
509 | } | 577 | } |
510 | 578 | ||
511 | /* terminate, set to next free slot, shrink available space */ | 579 | /* terminate, set to next free slot, shrink available space */ |
@@ -518,13 +586,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, | |||
518 | return 0; | 586 | return 0; |
519 | } | 587 | } |
520 | 588 | ||
521 | static struct kset_hotplug_ops block_hotplug_ops = { | 589 | static struct kset_uevent_ops block_uevent_ops = { |
522 | .filter = block_hotplug_filter, | 590 | .filter = block_uevent_filter, |
523 | .hotplug = block_hotplug, | 591 | .uevent = block_uevent, |
524 | }; | 592 | }; |
525 | 593 | ||
526 | /* declare block_subsys. */ | 594 | /* declare block_subsys. */ |
527 | static decl_subsys(block, &ktype_block, &block_hotplug_ops); | 595 | static decl_subsys(block, &ktype_block, &block_uevent_ops); |
528 | 596 | ||
529 | 597 | ||
530 | /* | 598 | /* |
diff --git a/block/ioctl.c b/block/ioctl.c index 6e278474f9a8..e1109491c234 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/sched.h> /* for capable() */ | 1 | #include <linux/capability.h> |
2 | #include <linux/blkdev.h> | 2 | #include <linux/blkdev.h> |
3 | #include <linux/blkpg.h> | 3 | #include <linux/blkpg.h> |
4 | #include <linux/hdreg.h> | ||
4 | #include <linux/backing-dev.h> | 5 | #include <linux/backing-dev.h> |
5 | #include <linux/buffer_head.h> | 6 | #include <linux/buffer_head.h> |
6 | #include <linux/smp_lock.h> | 7 | #include <linux/smp_lock.h> |
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, | |||
245 | set_device_ro(bdev, n); | 246 | set_device_ro(bdev, n); |
246 | unlock_kernel(); | 247 | unlock_kernel(); |
247 | return 0; | 248 | return 0; |
249 | case HDIO_GETGEO: { | ||
250 | struct hd_geometry geo; | ||
251 | |||
252 | if (!arg) | ||
253 | return -EINVAL; | ||
254 | if (!disk->fops->getgeo) | ||
255 | return -ENOTTY; | ||
256 | |||
257 | /* | ||
258 | * We need to set the startsect first, the driver may | ||
259 | * want to override it. | ||
260 | */ | ||
261 | geo.start = get_start_sect(bdev); | ||
262 | ret = disk->fops->getgeo(bdev, &geo); | ||
263 | if (ret) | ||
264 | return ret; | ||
265 | if (copy_to_user((struct hd_geometry __user *)arg, &geo, | ||
266 | sizeof(geo))) | ||
267 | return -EFAULT; | ||
268 | return 0; | ||
269 | } | ||
248 | } | 270 | } |
249 | 271 | ||
250 | lock_kernel(); | 272 | lock_kernel(); |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 5f52e30b43f8..8e27d0ab0d7c 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * linux/drivers/block/ll_rw_blk.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
6 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
@@ -28,7 +26,8 @@ | |||
28 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
29 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
30 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
31 | #include <linux/blkdev.h> | 29 | #include <linux/interrupt.h> |
30 | #include <linux/cpu.h> | ||
32 | 31 | ||
33 | /* | 32 | /* |
34 | * for max sense size | 33 | * for max sense size |
@@ -38,6 +37,8 @@ | |||
38 | static void blk_unplug_work(void *data); | 37 | static void blk_unplug_work(void *data); |
39 | static void blk_unplug_timeout(unsigned long data); | 38 | static void blk_unplug_timeout(unsigned long data); |
40 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 39 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
40 | static void init_request_from_bio(struct request *req, struct bio *bio); | ||
41 | static int __make_request(request_queue_t *q, struct bio *bio); | ||
41 | 42 | ||
42 | /* | 43 | /* |
43 | * For the allocated request tables | 44 | * For the allocated request tables |
@@ -62,13 +63,15 @@ static wait_queue_head_t congestion_wqh[2] = { | |||
62 | /* | 63 | /* |
63 | * Controlling structure to kblockd | 64 | * Controlling structure to kblockd |
64 | */ | 65 | */ |
65 | static struct workqueue_struct *kblockd_workqueue; | 66 | static struct workqueue_struct *kblockd_workqueue; |
66 | 67 | ||
67 | unsigned long blk_max_low_pfn, blk_max_pfn; | 68 | unsigned long blk_max_low_pfn, blk_max_pfn; |
68 | 69 | ||
69 | EXPORT_SYMBOL(blk_max_low_pfn); | 70 | EXPORT_SYMBOL(blk_max_low_pfn); |
70 | EXPORT_SYMBOL(blk_max_pfn); | 71 | EXPORT_SYMBOL(blk_max_pfn); |
71 | 72 | ||
73 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | ||
74 | |||
72 | /* Amount of time in which a process may batch requests */ | 75 | /* Amount of time in which a process may batch requests */ |
73 | #define BLK_BATCH_TIME (HZ/50UL) | 76 | #define BLK_BATCH_TIME (HZ/50UL) |
74 | 77 | ||
@@ -207,6 +210,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) | |||
207 | 210 | ||
208 | EXPORT_SYMBOL(blk_queue_merge_bvec); | 211 | EXPORT_SYMBOL(blk_queue_merge_bvec); |
209 | 212 | ||
213 | void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) | ||
214 | { | ||
215 | q->softirq_done_fn = fn; | ||
216 | } | ||
217 | |||
218 | EXPORT_SYMBOL(blk_queue_softirq_done); | ||
219 | |||
210 | /** | 220 | /** |
211 | * blk_queue_make_request - define an alternate make_request function for a device | 221 | * blk_queue_make_request - define an alternate make_request function for a device |
212 | * @q: the request queue for the device to be affected | 222 | * @q: the request queue for the device to be affected |
@@ -241,7 +251,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) | |||
241 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 251 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
242 | q->backing_dev_info.state = 0; | 252 | q->backing_dev_info.state = 0; |
243 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 253 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
244 | blk_queue_max_sectors(q, MAX_SECTORS); | 254 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); |
245 | blk_queue_hardsect_size(q, 512); | 255 | blk_queue_hardsect_size(q, 512); |
246 | blk_queue_dma_alignment(q, 511); | 256 | blk_queue_dma_alignment(q, 511); |
247 | blk_queue_congestion_threshold(q); | 257 | blk_queue_congestion_threshold(q); |
@@ -270,6 +280,7 @@ EXPORT_SYMBOL(blk_queue_make_request); | |||
270 | static inline void rq_init(request_queue_t *q, struct request *rq) | 280 | static inline void rq_init(request_queue_t *q, struct request *rq) |
271 | { | 281 | { |
272 | INIT_LIST_HEAD(&rq->queuelist); | 282 | INIT_LIST_HEAD(&rq->queuelist); |
283 | INIT_LIST_HEAD(&rq->donelist); | ||
273 | 284 | ||
274 | rq->errors = 0; | 285 | rq->errors = 0; |
275 | rq->rq_status = RQ_ACTIVE; | 286 | rq->rq_status = RQ_ACTIVE; |
@@ -286,12 +297,13 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
286 | rq->sense = NULL; | 297 | rq->sense = NULL; |
287 | rq->end_io = NULL; | 298 | rq->end_io = NULL; |
288 | rq->end_io_data = NULL; | 299 | rq->end_io_data = NULL; |
300 | rq->completion_data = NULL; | ||
289 | } | 301 | } |
290 | 302 | ||
291 | /** | 303 | /** |
292 | * blk_queue_ordered - does this queue support ordered writes | 304 | * blk_queue_ordered - does this queue support ordered writes |
293 | * @q: the request queue | 305 | * @q: the request queue |
294 | * @flag: see below | 306 | * @ordered: one of QUEUE_ORDERED_* |
295 | * | 307 | * |
296 | * Description: | 308 | * Description: |
297 | * For journalled file systems, doing ordered writes on a commit | 309 | * For journalled file systems, doing ordered writes on a commit |
@@ -300,28 +312,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
300 | * feature should call this function and indicate so. | 312 | * feature should call this function and indicate so. |
301 | * | 313 | * |
302 | **/ | 314 | **/ |
303 | void blk_queue_ordered(request_queue_t *q, int flag) | 315 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, |
304 | { | 316 | prepare_flush_fn *prepare_flush_fn) |
305 | switch (flag) { | 317 | { |
306 | case QUEUE_ORDERED_NONE: | 318 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
307 | if (q->flush_rq) | 319 | prepare_flush_fn == NULL) { |
308 | kmem_cache_free(request_cachep, q->flush_rq); | 320 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
309 | q->flush_rq = NULL; | 321 | return -EINVAL; |
310 | q->ordered = flag; | ||
311 | break; | ||
312 | case QUEUE_ORDERED_TAG: | ||
313 | q->ordered = flag; | ||
314 | break; | ||
315 | case QUEUE_ORDERED_FLUSH: | ||
316 | q->ordered = flag; | ||
317 | if (!q->flush_rq) | ||
318 | q->flush_rq = kmem_cache_alloc(request_cachep, | ||
319 | GFP_KERNEL); | ||
320 | break; | ||
321 | default: | ||
322 | printk("blk_queue_ordered: bad value %d\n", flag); | ||
323 | break; | ||
324 | } | 322 | } |
323 | |||
324 | if (ordered != QUEUE_ORDERED_NONE && | ||
325 | ordered != QUEUE_ORDERED_DRAIN && | ||
326 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | ||
327 | ordered != QUEUE_ORDERED_DRAIN_FUA && | ||
328 | ordered != QUEUE_ORDERED_TAG && | ||
329 | ordered != QUEUE_ORDERED_TAG_FLUSH && | ||
330 | ordered != QUEUE_ORDERED_TAG_FUA) { | ||
331 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | |||
335 | q->next_ordered = ordered; | ||
336 | q->prepare_flush_fn = prepare_flush_fn; | ||
337 | |||
338 | return 0; | ||
325 | } | 339 | } |
326 | 340 | ||
327 | EXPORT_SYMBOL(blk_queue_ordered); | 341 | EXPORT_SYMBOL(blk_queue_ordered); |
@@ -346,167 +360,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); | |||
346 | /* | 360 | /* |
347 | * Cache flushing for ordered writes handling | 361 | * Cache flushing for ordered writes handling |
348 | */ | 362 | */ |
349 | static void blk_pre_flush_end_io(struct request *flush_rq) | 363 | inline unsigned blk_ordered_cur_seq(request_queue_t *q) |
350 | { | 364 | { |
351 | struct request *rq = flush_rq->end_io_data; | 365 | if (!q->ordseq) |
352 | request_queue_t *q = rq->q; | 366 | return 0; |
353 | 367 | return 1 << ffz(q->ordseq); | |
354 | elv_completed_request(q, flush_rq); | ||
355 | |||
356 | rq->flags |= REQ_BAR_PREFLUSH; | ||
357 | |||
358 | if (!flush_rq->errors) | ||
359 | elv_requeue_request(q, rq); | ||
360 | else { | ||
361 | q->end_flush_fn(q, flush_rq); | ||
362 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
363 | q->request_fn(q); | ||
364 | } | ||
365 | } | 368 | } |
366 | 369 | ||
367 | static void blk_post_flush_end_io(struct request *flush_rq) | 370 | unsigned blk_ordered_req_seq(struct request *rq) |
368 | { | 371 | { |
369 | struct request *rq = flush_rq->end_io_data; | ||
370 | request_queue_t *q = rq->q; | 372 | request_queue_t *q = rq->q; |
371 | 373 | ||
372 | elv_completed_request(q, flush_rq); | 374 | BUG_ON(q->ordseq == 0); |
373 | 375 | ||
374 | rq->flags |= REQ_BAR_POSTFLUSH; | 376 | if (rq == &q->pre_flush_rq) |
377 | return QUEUE_ORDSEQ_PREFLUSH; | ||
378 | if (rq == &q->bar_rq) | ||
379 | return QUEUE_ORDSEQ_BAR; | ||
380 | if (rq == &q->post_flush_rq) | ||
381 | return QUEUE_ORDSEQ_POSTFLUSH; | ||
375 | 382 | ||
376 | q->end_flush_fn(q, flush_rq); | 383 | if ((rq->flags & REQ_ORDERED_COLOR) == |
377 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | 384 | (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) |
378 | q->request_fn(q); | 385 | return QUEUE_ORDSEQ_DRAIN; |
386 | else | ||
387 | return QUEUE_ORDSEQ_DONE; | ||
379 | } | 388 | } |
380 | 389 | ||
381 | struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) | 390 | void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) |
382 | { | 391 | { |
383 | struct request *flush_rq = q->flush_rq; | 392 | struct request *rq; |
384 | 393 | int uptodate; | |
385 | BUG_ON(!blk_barrier_rq(rq)); | ||
386 | 394 | ||
387 | if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) | 395 | if (error && !q->orderr) |
388 | return NULL; | 396 | q->orderr = error; |
389 | 397 | ||
390 | rq_init(q, flush_rq); | 398 | BUG_ON(q->ordseq & seq); |
391 | flush_rq->elevator_private = NULL; | 399 | q->ordseq |= seq; |
392 | flush_rq->flags = REQ_BAR_FLUSH; | ||
393 | flush_rq->rq_disk = rq->rq_disk; | ||
394 | flush_rq->rl = NULL; | ||
395 | 400 | ||
396 | /* | 401 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
397 | * prepare_flush returns 0 if no flush is needed, just mark both | 402 | return; |
398 | * pre and post flush as done in that case | ||
399 | */ | ||
400 | if (!q->prepare_flush_fn(q, flush_rq)) { | ||
401 | rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; | ||
402 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
403 | return rq; | ||
404 | } | ||
405 | 403 | ||
406 | /* | 404 | /* |
407 | * some drivers dequeue requests right away, some only after io | 405 | * Okay, sequence complete. |
408 | * completion. make sure the request is dequeued. | ||
409 | */ | 406 | */ |
410 | if (!list_empty(&rq->queuelist)) | 407 | rq = q->orig_bar_rq; |
411 | blkdev_dequeue_request(rq); | 408 | uptodate = q->orderr ? q->orderr : 1; |
412 | 409 | ||
413 | flush_rq->end_io_data = rq; | 410 | q->ordseq = 0; |
414 | flush_rq->end_io = blk_pre_flush_end_io; | ||
415 | 411 | ||
416 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 412 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
417 | return flush_rq; | 413 | end_that_request_last(rq, uptodate); |
418 | } | 414 | } |
419 | 415 | ||
420 | static void blk_start_post_flush(request_queue_t *q, struct request *rq) | 416 | static void pre_flush_end_io(struct request *rq, int error) |
421 | { | 417 | { |
422 | struct request *flush_rq = q->flush_rq; | 418 | elv_completed_request(rq->q, rq); |
419 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | ||
420 | } | ||
423 | 421 | ||
424 | BUG_ON(!blk_barrier_rq(rq)); | 422 | static void bar_end_io(struct request *rq, int error) |
423 | { | ||
424 | elv_completed_request(rq->q, rq); | ||
425 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | ||
426 | } | ||
425 | 427 | ||
426 | rq_init(q, flush_rq); | 428 | static void post_flush_end_io(struct request *rq, int error) |
427 | flush_rq->elevator_private = NULL; | 429 | { |
428 | flush_rq->flags = REQ_BAR_FLUSH; | 430 | elv_completed_request(rq->q, rq); |
429 | flush_rq->rq_disk = rq->rq_disk; | 431 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
430 | flush_rq->rl = NULL; | 432 | } |
431 | 433 | ||
432 | if (q->prepare_flush_fn(q, flush_rq)) { | 434 | static void queue_flush(request_queue_t *q, unsigned which) |
433 | flush_rq->end_io_data = rq; | 435 | { |
434 | flush_rq->end_io = blk_post_flush_end_io; | 436 | struct request *rq; |
437 | rq_end_io_fn *end_io; | ||
435 | 438 | ||
436 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 439 | if (which == QUEUE_ORDERED_PREFLUSH) { |
437 | q->request_fn(q); | 440 | rq = &q->pre_flush_rq; |
441 | end_io = pre_flush_end_io; | ||
442 | } else { | ||
443 | rq = &q->post_flush_rq; | ||
444 | end_io = post_flush_end_io; | ||
438 | } | 445 | } |
446 | |||
447 | rq_init(q, rq); | ||
448 | rq->flags = REQ_HARDBARRIER; | ||
449 | rq->elevator_private = NULL; | ||
450 | rq->rq_disk = q->bar_rq.rq_disk; | ||
451 | rq->rl = NULL; | ||
452 | rq->end_io = end_io; | ||
453 | q->prepare_flush_fn(q, rq); | ||
454 | |||
455 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
439 | } | 456 | } |
440 | 457 | ||
441 | static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, | 458 | static inline struct request *start_ordered(request_queue_t *q, |
442 | int sectors) | 459 | struct request *rq) |
443 | { | 460 | { |
444 | if (sectors > rq->nr_sectors) | 461 | q->bi_size = 0; |
445 | sectors = rq->nr_sectors; | 462 | q->orderr = 0; |
463 | q->ordered = q->next_ordered; | ||
464 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | ||
465 | |||
466 | /* | ||
467 | * Prep proxy barrier request. | ||
468 | */ | ||
469 | blkdev_dequeue_request(rq); | ||
470 | q->orig_bar_rq = rq; | ||
471 | rq = &q->bar_rq; | ||
472 | rq_init(q, rq); | ||
473 | rq->flags = bio_data_dir(q->orig_bar_rq->bio); | ||
474 | rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; | ||
475 | rq->elevator_private = NULL; | ||
476 | rq->rl = NULL; | ||
477 | init_request_from_bio(rq, q->orig_bar_rq->bio); | ||
478 | rq->end_io = bar_end_io; | ||
479 | |||
480 | /* | ||
481 | * Queue ordered sequence. As we stack them at the head, we | ||
482 | * need to queue in reverse order. Note that we rely on that | ||
483 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | ||
484 | * request gets inbetween ordered sequence. | ||
485 | */ | ||
486 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | ||
487 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | ||
488 | else | ||
489 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | ||
446 | 490 | ||
447 | rq->nr_sectors -= sectors; | 491 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); |
448 | return rq->nr_sectors; | 492 | |
493 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | ||
494 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | ||
495 | rq = &q->pre_flush_rq; | ||
496 | } else | ||
497 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | ||
498 | |||
499 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | ||
500 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | ||
501 | else | ||
502 | rq = NULL; | ||
503 | |||
504 | return rq; | ||
449 | } | 505 | } |
450 | 506 | ||
451 | static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, | 507 | int blk_do_ordered(request_queue_t *q, struct request **rqp) |
452 | int sectors, int queue_locked) | ||
453 | { | 508 | { |
454 | if (q->ordered != QUEUE_ORDERED_FLUSH) | 509 | struct request *rq = *rqp, *allowed_rq; |
455 | return 0; | 510 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
456 | if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) | ||
457 | return 0; | ||
458 | if (blk_barrier_postflush(rq)) | ||
459 | return 0; | ||
460 | 511 | ||
461 | if (!blk_check_end_barrier(q, rq, sectors)) { | 512 | if (!q->ordseq) { |
462 | unsigned long flags = 0; | 513 | if (!is_barrier) |
514 | return 1; | ||
463 | 515 | ||
464 | if (!queue_locked) | 516 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
465 | spin_lock_irqsave(q->queue_lock, flags); | 517 | *rqp = start_ordered(q, rq); |
518 | return 1; | ||
519 | } else { | ||
520 | /* | ||
521 | * This can happen when the queue switches to | ||
522 | * ORDERED_NONE while this request is on it. | ||
523 | */ | ||
524 | blkdev_dequeue_request(rq); | ||
525 | end_that_request_first(rq, -EOPNOTSUPP, | ||
526 | rq->hard_nr_sectors); | ||
527 | end_that_request_last(rq, -EOPNOTSUPP); | ||
528 | *rqp = NULL; | ||
529 | return 0; | ||
530 | } | ||
531 | } | ||
466 | 532 | ||
467 | blk_start_post_flush(q, rq); | 533 | if (q->ordered & QUEUE_ORDERED_TAG) { |
534 | if (is_barrier && rq != &q->bar_rq) | ||
535 | *rqp = NULL; | ||
536 | return 1; | ||
537 | } | ||
468 | 538 | ||
469 | if (!queue_locked) | 539 | switch (blk_ordered_cur_seq(q)) { |
470 | spin_unlock_irqrestore(q->queue_lock, flags); | 540 | case QUEUE_ORDSEQ_PREFLUSH: |
541 | allowed_rq = &q->pre_flush_rq; | ||
542 | break; | ||
543 | case QUEUE_ORDSEQ_BAR: | ||
544 | allowed_rq = &q->bar_rq; | ||
545 | break; | ||
546 | case QUEUE_ORDSEQ_POSTFLUSH: | ||
547 | allowed_rq = &q->post_flush_rq; | ||
548 | break; | ||
549 | default: | ||
550 | allowed_rq = NULL; | ||
551 | break; | ||
471 | } | 552 | } |
472 | 553 | ||
554 | if (rq != allowed_rq && | ||
555 | (blk_fs_request(rq) || rq == &q->pre_flush_rq || | ||
556 | rq == &q->post_flush_rq)) | ||
557 | *rqp = NULL; | ||
558 | |||
473 | return 1; | 559 | return 1; |
474 | } | 560 | } |
475 | 561 | ||
476 | /** | 562 | static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) |
477 | * blk_complete_barrier_rq - complete possible barrier request | ||
478 | * @q: the request queue for the device | ||
479 | * @rq: the request | ||
480 | * @sectors: number of sectors to complete | ||
481 | * | ||
482 | * Description: | ||
483 | * Used in driver end_io handling to determine whether to postpone | ||
484 | * completion of a barrier request until a post flush has been done. This | ||
485 | * is the unlocked variant, used if the caller doesn't already hold the | ||
486 | * queue lock. | ||
487 | **/ | ||
488 | int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) | ||
489 | { | 563 | { |
490 | return __blk_complete_barrier_rq(q, rq, sectors, 0); | 564 | request_queue_t *q = bio->bi_private; |
565 | struct bio_vec *bvec; | ||
566 | int i; | ||
567 | |||
568 | /* | ||
569 | * This is dry run, restore bio_sector and size. We'll finish | ||
570 | * this request again with the original bi_end_io after an | ||
571 | * error occurs or post flush is complete. | ||
572 | */ | ||
573 | q->bi_size += bytes; | ||
574 | |||
575 | if (bio->bi_size) | ||
576 | return 1; | ||
577 | |||
578 | /* Rewind bvec's */ | ||
579 | bio->bi_idx = 0; | ||
580 | bio_for_each_segment(bvec, bio, i) { | ||
581 | bvec->bv_len += bvec->bv_offset; | ||
582 | bvec->bv_offset = 0; | ||
583 | } | ||
584 | |||
585 | /* Reset bio */ | ||
586 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
587 | bio->bi_size = q->bi_size; | ||
588 | bio->bi_sector -= (q->bi_size >> 9); | ||
589 | q->bi_size = 0; | ||
590 | |||
591 | return 0; | ||
491 | } | 592 | } |
492 | EXPORT_SYMBOL(blk_complete_barrier_rq); | ||
493 | 593 | ||
494 | /** | 594 | static inline int ordered_bio_endio(struct request *rq, struct bio *bio, |
495 | * blk_complete_barrier_rq_locked - complete possible barrier request | 595 | unsigned int nbytes, int error) |
496 | * @q: the request queue for the device | ||
497 | * @rq: the request | ||
498 | * @sectors: number of sectors to complete | ||
499 | * | ||
500 | * Description: | ||
501 | * See blk_complete_barrier_rq(). This variant must be used if the caller | ||
502 | * holds the queue lock. | ||
503 | **/ | ||
504 | int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, | ||
505 | int sectors) | ||
506 | { | 596 | { |
507 | return __blk_complete_barrier_rq(q, rq, sectors, 1); | 597 | request_queue_t *q = rq->q; |
598 | bio_end_io_t *endio; | ||
599 | void *private; | ||
600 | |||
601 | if (&q->bar_rq != rq) | ||
602 | return 0; | ||
603 | |||
604 | /* | ||
605 | * Okay, this is the barrier request in progress, dry finish it. | ||
606 | */ | ||
607 | if (error && !q->orderr) | ||
608 | q->orderr = error; | ||
609 | |||
610 | endio = bio->bi_end_io; | ||
611 | private = bio->bi_private; | ||
612 | bio->bi_end_io = flush_dry_bio_endio; | ||
613 | bio->bi_private = q; | ||
614 | |||
615 | bio_endio(bio, nbytes, error); | ||
616 | |||
617 | bio->bi_end_io = endio; | ||
618 | bio->bi_private = private; | ||
619 | |||
620 | return 1; | ||
508 | } | 621 | } |
509 | EXPORT_SYMBOL(blk_complete_barrier_rq_locked); | ||
510 | 622 | ||
511 | /** | 623 | /** |
512 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 624 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
@@ -557,7 +669,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) | |||
557 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | 669 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); |
558 | } | 670 | } |
559 | 671 | ||
560 | q->max_sectors = q->max_hw_sectors = max_sectors; | 672 | if (BLK_DEF_MAX_SECTORS > max_sectors) |
673 | q->max_hw_sectors = q->max_sectors = max_sectors; | ||
674 | else { | ||
675 | q->max_sectors = BLK_DEF_MAX_SECTORS; | ||
676 | q->max_hw_sectors = max_sectors; | ||
677 | } | ||
561 | } | 678 | } |
562 | 679 | ||
563 | EXPORT_SYMBOL(blk_queue_max_sectors); | 680 | EXPORT_SYMBOL(blk_queue_max_sectors); |
@@ -659,8 +776,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size); | |||
659 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) | 776 | void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) |
660 | { | 777 | { |
661 | /* zero is "infinity" */ | 778 | /* zero is "infinity" */ |
662 | t->max_sectors = t->max_hw_sectors = | 779 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); |
663 | min_not_zero(t->max_sectors,b->max_sectors); | 780 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); |
664 | 781 | ||
665 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | 782 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); |
666 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | 783 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); |
@@ -1036,12 +1153,13 @@ void blk_queue_invalidate_tags(request_queue_t *q) | |||
1036 | 1153 | ||
1037 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1154 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1038 | 1155 | ||
1039 | static char *rq_flags[] = { | 1156 | static const char * const rq_flags[] = { |
1040 | "REQ_RW", | 1157 | "REQ_RW", |
1041 | "REQ_FAILFAST", | 1158 | "REQ_FAILFAST", |
1042 | "REQ_SORTED", | 1159 | "REQ_SORTED", |
1043 | "REQ_SOFTBARRIER", | 1160 | "REQ_SOFTBARRIER", |
1044 | "REQ_HARDBARRIER", | 1161 | "REQ_HARDBARRIER", |
1162 | "REQ_FUA", | ||
1045 | "REQ_CMD", | 1163 | "REQ_CMD", |
1046 | "REQ_NOMERGE", | 1164 | "REQ_NOMERGE", |
1047 | "REQ_STARTED", | 1165 | "REQ_STARTED", |
@@ -1061,6 +1179,7 @@ static char *rq_flags[] = { | |||
1061 | "REQ_PM_SUSPEND", | 1179 | "REQ_PM_SUSPEND", |
1062 | "REQ_PM_RESUME", | 1180 | "REQ_PM_RESUME", |
1063 | "REQ_PM_SHUTDOWN", | 1181 | "REQ_PM_SHUTDOWN", |
1182 | "REQ_ORDERED_COLOR", | ||
1064 | }; | 1183 | }; |
1065 | 1184 | ||
1066 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1185 | void blk_dump_rq_flags(struct request *rq, char *msg) |
@@ -1295,9 +1414,15 @@ static inline int ll_new_hw_segment(request_queue_t *q, | |||
1295 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, | 1414 | static int ll_back_merge_fn(request_queue_t *q, struct request *req, |
1296 | struct bio *bio) | 1415 | struct bio *bio) |
1297 | { | 1416 | { |
1417 | unsigned short max_sectors; | ||
1298 | int len; | 1418 | int len; |
1299 | 1419 | ||
1300 | if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { | 1420 | if (unlikely(blk_pc_request(req))) |
1421 | max_sectors = q->max_hw_sectors; | ||
1422 | else | ||
1423 | max_sectors = q->max_sectors; | ||
1424 | |||
1425 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | ||
1301 | req->flags |= REQ_NOMERGE; | 1426 | req->flags |= REQ_NOMERGE; |
1302 | if (req == q->last_merge) | 1427 | if (req == q->last_merge) |
1303 | q->last_merge = NULL; | 1428 | q->last_merge = NULL; |
@@ -1327,9 +1452,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, | |||
1327 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, | 1452 | static int ll_front_merge_fn(request_queue_t *q, struct request *req, |
1328 | struct bio *bio) | 1453 | struct bio *bio) |
1329 | { | 1454 | { |
1455 | unsigned short max_sectors; | ||
1330 | int len; | 1456 | int len; |
1331 | 1457 | ||
1332 | if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { | 1458 | if (unlikely(blk_pc_request(req))) |
1459 | max_sectors = q->max_hw_sectors; | ||
1460 | else | ||
1461 | max_sectors = q->max_sectors; | ||
1462 | |||
1463 | |||
1464 | if (req->nr_sectors + bio_sectors(bio) > max_sectors) { | ||
1333 | req->flags |= REQ_NOMERGE; | 1465 | req->flags |= REQ_NOMERGE; |
1334 | if (req == q->last_merge) | 1466 | if (req == q->last_merge) |
1335 | q->last_merge = NULL; | 1467 | q->last_merge = NULL; |
@@ -1625,8 +1757,6 @@ void blk_cleanup_queue(request_queue_t * q) | |||
1625 | if (q->queue_tags) | 1757 | if (q->queue_tags) |
1626 | __blk_queue_free_tags(q); | 1758 | __blk_queue_free_tags(q); |
1627 | 1759 | ||
1628 | blk_queue_ordered(q, QUEUE_ORDERED_NONE); | ||
1629 | |||
1630 | kmem_cache_free(requestq_cachep, q); | 1760 | kmem_cache_free(requestq_cachep, q); |
1631 | } | 1761 | } |
1632 | 1762 | ||
@@ -1651,8 +1781,6 @@ static int blk_init_free_list(request_queue_t *q) | |||
1651 | return 0; | 1781 | return 0; |
1652 | } | 1782 | } |
1653 | 1783 | ||
1654 | static int __make_request(request_queue_t *, struct bio *); | ||
1655 | |||
1656 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) | 1784 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) |
1657 | { | 1785 | { |
1658 | return blk_alloc_queue_node(gfp_mask, -1); | 1786 | return blk_alloc_queue_node(gfp_mask, -1); |
@@ -1892,40 +2020,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | |||
1892 | { | 2020 | { |
1893 | struct request *rq = NULL; | 2021 | struct request *rq = NULL; |
1894 | struct request_list *rl = &q->rq; | 2022 | struct request_list *rl = &q->rq; |
1895 | struct io_context *ioc = current_io_context(GFP_ATOMIC); | 2023 | struct io_context *ioc = NULL; |
1896 | int priv; | 2024 | int may_queue, priv; |
1897 | 2025 | ||
1898 | if (rl->count[rw]+1 >= q->nr_requests) { | 2026 | may_queue = elv_may_queue(q, rw, bio); |
1899 | /* | 2027 | if (may_queue == ELV_MQUEUE_NO) |
1900 | * The queue will fill after this allocation, so set it as | 2028 | goto rq_starved; |
1901 | * full, and mark this process as "batching". This process | ||
1902 | * will be allowed to complete a batch of requests, others | ||
1903 | * will be blocked. | ||
1904 | */ | ||
1905 | if (!blk_queue_full(q, rw)) { | ||
1906 | ioc_set_batching(q, ioc); | ||
1907 | blk_set_queue_full(q, rw); | ||
1908 | } | ||
1909 | } | ||
1910 | |||
1911 | switch (elv_may_queue(q, rw, bio)) { | ||
1912 | case ELV_MQUEUE_NO: | ||
1913 | goto rq_starved; | ||
1914 | case ELV_MQUEUE_MAY: | ||
1915 | break; | ||
1916 | case ELV_MQUEUE_MUST: | ||
1917 | goto get_rq; | ||
1918 | } | ||
1919 | 2029 | ||
1920 | if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { | 2030 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
1921 | /* | 2031 | if (rl->count[rw]+1 >= q->nr_requests) { |
1922 | * The queue is full and the allocating process is not a | 2032 | ioc = current_io_context(GFP_ATOMIC); |
1923 | * "batcher", and not exempted by the IO scheduler | 2033 | /* |
1924 | */ | 2034 | * The queue will fill after this allocation, so set |
1925 | goto out; | 2035 | * it as full, and mark this process as "batching". |
2036 | * This process will be allowed to complete a batch of | ||
2037 | * requests, others will be blocked. | ||
2038 | */ | ||
2039 | if (!blk_queue_full(q, rw)) { | ||
2040 | ioc_set_batching(q, ioc); | ||
2041 | blk_set_queue_full(q, rw); | ||
2042 | } else { | ||
2043 | if (may_queue != ELV_MQUEUE_MUST | ||
2044 | && !ioc_batching(q, ioc)) { | ||
2045 | /* | ||
2046 | * The queue is full and the allocating | ||
2047 | * process is not a "batcher", and not | ||
2048 | * exempted by the IO scheduler | ||
2049 | */ | ||
2050 | goto out; | ||
2051 | } | ||
2052 | } | ||
2053 | } | ||
2054 | set_queue_congested(q, rw); | ||
1926 | } | 2055 | } |
1927 | 2056 | ||
1928 | get_rq: | ||
1929 | /* | 2057 | /* |
1930 | * Only allow batching queuers to allocate up to 50% over the defined | 2058 | * Only allow batching queuers to allocate up to 50% over the defined |
1931 | * limit of requests, otherwise we could have thousands of requests | 2059 | * limit of requests, otherwise we could have thousands of requests |
@@ -1936,8 +2064,6 @@ get_rq: | |||
1936 | 2064 | ||
1937 | rl->count[rw]++; | 2065 | rl->count[rw]++; |
1938 | rl->starved[rw] = 0; | 2066 | rl->starved[rw] = 0; |
1939 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) | ||
1940 | set_queue_congested(q, rw); | ||
1941 | 2067 | ||
1942 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2068 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1943 | if (priv) | 2069 | if (priv) |
@@ -1946,7 +2072,7 @@ get_rq: | |||
1946 | spin_unlock_irq(q->queue_lock); | 2072 | spin_unlock_irq(q->queue_lock); |
1947 | 2073 | ||
1948 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); | 2074 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
1949 | if (!rq) { | 2075 | if (unlikely(!rq)) { |
1950 | /* | 2076 | /* |
1951 | * Allocation failed presumably due to memory. Undo anything | 2077 | * Allocation failed presumably due to memory. Undo anything |
1952 | * we might have messed up. | 2078 | * we might have messed up. |
@@ -1971,6 +2097,12 @@ rq_starved: | |||
1971 | goto out; | 2097 | goto out; |
1972 | } | 2098 | } |
1973 | 2099 | ||
2100 | /* | ||
2101 | * ioc may be NULL here, and ioc_batching will be false. That's | ||
2102 | * OK, if the queue is under the request limit then requests need | ||
2103 | * not count toward the nr_batch_requests limit. There will always | ||
2104 | * be some limit enforced by BLK_BATCH_TIME. | ||
2105 | */ | ||
1974 | if (ioc_batching(q, ioc)) | 2106 | if (ioc_batching(q, ioc)) |
1975 | ioc->nr_batch_requests--; | 2107 | ioc->nr_batch_requests--; |
1976 | 2108 | ||
@@ -2146,7 +2278,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, | |||
2146 | struct bio *bio; | 2278 | struct bio *bio; |
2147 | int reading; | 2279 | int reading; |
2148 | 2280 | ||
2149 | if (len > (q->max_sectors << 9)) | 2281 | if (len > (q->max_hw_sectors << 9)) |
2150 | return -EINVAL; | 2282 | return -EINVAL; |
2151 | if (!len || !ubuf) | 2283 | if (!len || !ubuf) |
2152 | return -EINVAL; | 2284 | return -EINVAL; |
@@ -2261,7 +2393,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, | |||
2261 | { | 2393 | { |
2262 | struct bio *bio; | 2394 | struct bio *bio; |
2263 | 2395 | ||
2264 | if (len > (q->max_sectors << 9)) | 2396 | if (len > (q->max_hw_sectors << 9)) |
2265 | return -EINVAL; | 2397 | return -EINVAL; |
2266 | if (!len || !kbuf) | 2398 | if (!len || !kbuf) |
2267 | return -EINVAL; | 2399 | return -EINVAL; |
@@ -2297,7 +2429,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); | |||
2297 | */ | 2429 | */ |
2298 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | 2430 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, |
2299 | struct request *rq, int at_head, | 2431 | struct request *rq, int at_head, |
2300 | void (*done)(struct request *)) | 2432 | rq_end_io_fn *done) |
2301 | { | 2433 | { |
2302 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2434 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2303 | 2435 | ||
@@ -2308,6 +2440,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | |||
2308 | generic_unplug_device(q); | 2440 | generic_unplug_device(q); |
2309 | } | 2441 | } |
2310 | 2442 | ||
2443 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | ||
2444 | |||
2311 | /** | 2445 | /** |
2312 | * blk_execute_rq - insert a request into queue for execution | 2446 | * blk_execute_rq - insert a request into queue for execution |
2313 | * @q: queue to insert the request in | 2447 | * @q: queue to insert the request in |
@@ -2446,7 +2580,7 @@ void disk_round_stats(struct gendisk *disk) | |||
2446 | /* | 2580 | /* |
2447 | * queue lock must be held | 2581 | * queue lock must be held |
2448 | */ | 2582 | */ |
2449 | static void __blk_put_request(request_queue_t *q, struct request *req) | 2583 | void __blk_put_request(request_queue_t *q, struct request *req) |
2450 | { | 2584 | { |
2451 | struct request_list *rl = req->rl; | 2585 | struct request_list *rl = req->rl; |
2452 | 2586 | ||
@@ -2475,6 +2609,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req) | |||
2475 | } | 2609 | } |
2476 | } | 2610 | } |
2477 | 2611 | ||
2612 | EXPORT_SYMBOL_GPL(__blk_put_request); | ||
2613 | |||
2478 | void blk_put_request(struct request *req) | 2614 | void blk_put_request(struct request *req) |
2479 | { | 2615 | { |
2480 | unsigned long flags; | 2616 | unsigned long flags; |
@@ -2497,7 +2633,7 @@ EXPORT_SYMBOL(blk_put_request); | |||
2497 | * blk_end_sync_rq - executes a completion event on a request | 2633 | * blk_end_sync_rq - executes a completion event on a request |
2498 | * @rq: request to complete | 2634 | * @rq: request to complete |
2499 | */ | 2635 | */ |
2500 | void blk_end_sync_rq(struct request *rq) | 2636 | void blk_end_sync_rq(struct request *rq, int error) |
2501 | { | 2637 | { |
2502 | struct completion *waiting = rq->waiting; | 2638 | struct completion *waiting = rq->waiting; |
2503 | 2639 | ||
@@ -2611,29 +2747,35 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) | |||
2611 | return 0; | 2747 | return 0; |
2612 | } | 2748 | } |
2613 | 2749 | ||
2614 | /** | 2750 | static void init_request_from_bio(struct request *req, struct bio *bio) |
2615 | * blk_attempt_remerge - attempt to remerge active head with next request | ||
2616 | * @q: The &request_queue_t belonging to the device | ||
2617 | * @rq: The head request (usually) | ||
2618 | * | ||
2619 | * Description: | ||
2620 | * For head-active devices, the queue can easily be unplugged so quickly | ||
2621 | * that proper merging is not done on the front request. This may hurt | ||
2622 | * performance greatly for some devices. The block layer cannot safely | ||
2623 | * do merging on that first request for these queues, but the driver can | ||
2624 | * call this function and make it happen any way. Only the driver knows | ||
2625 | * when it is safe to do so. | ||
2626 | **/ | ||
2627 | void blk_attempt_remerge(request_queue_t *q, struct request *rq) | ||
2628 | { | 2751 | { |
2629 | unsigned long flags; | 2752 | req->flags |= REQ_CMD; |
2630 | 2753 | ||
2631 | spin_lock_irqsave(q->queue_lock, flags); | 2754 | /* |
2632 | attempt_back_merge(q, rq); | 2755 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) |
2633 | spin_unlock_irqrestore(q->queue_lock, flags); | 2756 | */ |
2634 | } | 2757 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
2758 | req->flags |= REQ_FAILFAST; | ||
2635 | 2759 | ||
2636 | EXPORT_SYMBOL(blk_attempt_remerge); | 2760 | /* |
2761 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2762 | */ | ||
2763 | if (unlikely(bio_barrier(bio))) | ||
2764 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2765 | |||
2766 | req->errors = 0; | ||
2767 | req->hard_sector = req->sector = bio->bi_sector; | ||
2768 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); | ||
2769 | req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); | ||
2770 | req->nr_phys_segments = bio_phys_segments(req->q, bio); | ||
2771 | req->nr_hw_segments = bio_hw_segments(req->q, bio); | ||
2772 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2773 | req->waiting = NULL; | ||
2774 | req->bio = req->biotail = bio; | ||
2775 | req->ioprio = bio_prio(bio); | ||
2776 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2777 | req->start_time = jiffies; | ||
2778 | } | ||
2637 | 2779 | ||
2638 | static int __make_request(request_queue_t *q, struct bio *bio) | 2780 | static int __make_request(request_queue_t *q, struct bio *bio) |
2639 | { | 2781 | { |
@@ -2660,7 +2802,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) | |||
2660 | spin_lock_prefetch(q->queue_lock); | 2802 | spin_lock_prefetch(q->queue_lock); |
2661 | 2803 | ||
2662 | barrier = bio_barrier(bio); | 2804 | barrier = bio_barrier(bio); |
2663 | if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { | 2805 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2664 | err = -EOPNOTSUPP; | 2806 | err = -EOPNOTSUPP; |
2665 | goto end_io; | 2807 | goto end_io; |
2666 | } | 2808 | } |
@@ -2730,33 +2872,7 @@ get_rq: | |||
2730 | * We don't worry about that case for efficiency. It won't happen | 2872 | * We don't worry about that case for efficiency. It won't happen |
2731 | * often, and the elevators are able to handle it. | 2873 | * often, and the elevators are able to handle it. |
2732 | */ | 2874 | */ |
2733 | 2875 | init_request_from_bio(req, bio); | |
2734 | req->flags |= REQ_CMD; | ||
2735 | |||
2736 | /* | ||
2737 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | ||
2738 | */ | ||
2739 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | ||
2740 | req->flags |= REQ_FAILFAST; | ||
2741 | |||
2742 | /* | ||
2743 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2744 | */ | ||
2745 | if (unlikely(barrier)) | ||
2746 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2747 | |||
2748 | req->errors = 0; | ||
2749 | req->hard_sector = req->sector = sector; | ||
2750 | req->hard_nr_sectors = req->nr_sectors = nr_sectors; | ||
2751 | req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; | ||
2752 | req->nr_phys_segments = bio_phys_segments(q, bio); | ||
2753 | req->nr_hw_segments = bio_hw_segments(q, bio); | ||
2754 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2755 | req->waiting = NULL; | ||
2756 | req->bio = req->biotail = bio; | ||
2757 | req->ioprio = prio; | ||
2758 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2759 | req->start_time = jiffies; | ||
2760 | 2876 | ||
2761 | spin_lock_irq(q->queue_lock); | 2877 | spin_lock_irq(q->queue_lock); |
2762 | if (elv_queue_empty(q)) | 2878 | if (elv_queue_empty(q)) |
@@ -3047,7 +3163,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3047 | if (nr_bytes >= bio->bi_size) { | 3163 | if (nr_bytes >= bio->bi_size) { |
3048 | req->bio = bio->bi_next; | 3164 | req->bio = bio->bi_next; |
3049 | nbytes = bio->bi_size; | 3165 | nbytes = bio->bi_size; |
3050 | bio_endio(bio, nbytes, error); | 3166 | if (!ordered_bio_endio(req, bio, nbytes, error)) |
3167 | bio_endio(bio, nbytes, error); | ||
3051 | next_idx = 0; | 3168 | next_idx = 0; |
3052 | bio_nbytes = 0; | 3169 | bio_nbytes = 0; |
3053 | } else { | 3170 | } else { |
@@ -3102,7 +3219,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3102 | * if the request wasn't completed, update state | 3219 | * if the request wasn't completed, update state |
3103 | */ | 3220 | */ |
3104 | if (bio_nbytes) { | 3221 | if (bio_nbytes) { |
3105 | bio_endio(bio, bio_nbytes, error); | 3222 | if (!ordered_bio_endio(req, bio, bio_nbytes, error)) |
3223 | bio_endio(bio, bio_nbytes, error); | ||
3106 | bio->bi_idx += next_idx; | 3224 | bio->bi_idx += next_idx; |
3107 | bio_iovec(bio)->bv_offset += nr_bytes; | 3225 | bio_iovec(bio)->bv_offset += nr_bytes; |
3108 | bio_iovec(bio)->bv_len -= nr_bytes; | 3226 | bio_iovec(bio)->bv_len -= nr_bytes; |
@@ -3157,11 +3275,100 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | |||
3157 | EXPORT_SYMBOL(end_that_request_chunk); | 3275 | EXPORT_SYMBOL(end_that_request_chunk); |
3158 | 3276 | ||
3159 | /* | 3277 | /* |
3278 | * splice the completion data to a local structure and hand off to | ||
3279 | * process_completion_queue() to complete the requests | ||
3280 | */ | ||
3281 | static void blk_done_softirq(struct softirq_action *h) | ||
3282 | { | ||
3283 | struct list_head *cpu_list; | ||
3284 | LIST_HEAD(local_list); | ||
3285 | |||
3286 | local_irq_disable(); | ||
3287 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
3288 | list_splice_init(cpu_list, &local_list); | ||
3289 | local_irq_enable(); | ||
3290 | |||
3291 | while (!list_empty(&local_list)) { | ||
3292 | struct request *rq = list_entry(local_list.next, struct request, donelist); | ||
3293 | |||
3294 | list_del_init(&rq->donelist); | ||
3295 | rq->q->softirq_done_fn(rq); | ||
3296 | } | ||
3297 | } | ||
3298 | |||
3299 | #ifdef CONFIG_HOTPLUG_CPU | ||
3300 | |||
3301 | static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | ||
3302 | void *hcpu) | ||
3303 | { | ||
3304 | /* | ||
3305 | * If a CPU goes away, splice its entries to the current CPU | ||
3306 | * and trigger a run of the softirq | ||
3307 | */ | ||
3308 | if (action == CPU_DEAD) { | ||
3309 | int cpu = (unsigned long) hcpu; | ||
3310 | |||
3311 | local_irq_disable(); | ||
3312 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | ||
3313 | &__get_cpu_var(blk_cpu_done)); | ||
3314 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
3315 | local_irq_enable(); | ||
3316 | } | ||
3317 | |||
3318 | return NOTIFY_OK; | ||
3319 | } | ||
3320 | |||
3321 | |||
3322 | static struct notifier_block __devinitdata blk_cpu_notifier = { | ||
3323 | .notifier_call = blk_cpu_notify, | ||
3324 | }; | ||
3325 | |||
3326 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
3327 | |||
3328 | /** | ||
3329 | * blk_complete_request - end I/O on a request | ||
3330 | * @req: the request being processed | ||
3331 | * | ||
3332 | * Description: | ||
3333 | * Ends all I/O on a request. It does not handle partial completions, | ||
3334 | * unless the driver actually implements this in its completionc callback | ||
3335 | * through requeueing. Theh actual completion happens out-of-order, | ||
3336 | * through a softirq handler. The user must have registered a completion | ||
3337 | * callback through blk_queue_softirq_done(). | ||
3338 | **/ | ||
3339 | |||
3340 | void blk_complete_request(struct request *req) | ||
3341 | { | ||
3342 | struct list_head *cpu_list; | ||
3343 | unsigned long flags; | ||
3344 | |||
3345 | BUG_ON(!req->q->softirq_done_fn); | ||
3346 | |||
3347 | local_irq_save(flags); | ||
3348 | |||
3349 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
3350 | list_add_tail(&req->donelist, cpu_list); | ||
3351 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
3352 | |||
3353 | local_irq_restore(flags); | ||
3354 | } | ||
3355 | |||
3356 | EXPORT_SYMBOL(blk_complete_request); | ||
3357 | |||
3358 | /* | ||
3160 | * queue lock must be held | 3359 | * queue lock must be held |
3161 | */ | 3360 | */ |
3162 | void end_that_request_last(struct request *req) | 3361 | void end_that_request_last(struct request *req, int uptodate) |
3163 | { | 3362 | { |
3164 | struct gendisk *disk = req->rq_disk; | 3363 | struct gendisk *disk = req->rq_disk; |
3364 | int error; | ||
3365 | |||
3366 | /* | ||
3367 | * extend uptodate bool to allow < 0 value to be direct io error | ||
3368 | */ | ||
3369 | error = 0; | ||
3370 | if (end_io_error(uptodate)) | ||
3371 | error = !uptodate ? -EIO : uptodate; | ||
3165 | 3372 | ||
3166 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3373 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3167 | laptop_io_completion(); | 3374 | laptop_io_completion(); |
@@ -3176,7 +3383,7 @@ void end_that_request_last(struct request *req) | |||
3176 | disk->in_flight--; | 3383 | disk->in_flight--; |
3177 | } | 3384 | } |
3178 | if (req->end_io) | 3385 | if (req->end_io) |
3179 | req->end_io(req); | 3386 | req->end_io(req, error); |
3180 | else | 3387 | else |
3181 | __blk_put_request(req->q, req); | 3388 | __blk_put_request(req->q, req); |
3182 | } | 3389 | } |
@@ -3188,7 +3395,7 @@ void end_request(struct request *req, int uptodate) | |||
3188 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3395 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { |
3189 | add_disk_randomness(req->rq_disk); | 3396 | add_disk_randomness(req->rq_disk); |
3190 | blkdev_dequeue_request(req); | 3397 | blkdev_dequeue_request(req); |
3191 | end_that_request_last(req); | 3398 | end_that_request_last(req, uptodate); |
3192 | } | 3399 | } |
3193 | } | 3400 | } |
3194 | 3401 | ||
@@ -3226,6 +3433,8 @@ EXPORT_SYMBOL(kblockd_flush); | |||
3226 | 3433 | ||
3227 | int __init blk_dev_init(void) | 3434 | int __init blk_dev_init(void) |
3228 | { | 3435 | { |
3436 | int i; | ||
3437 | |||
3229 | kblockd_workqueue = create_workqueue("kblockd"); | 3438 | kblockd_workqueue = create_workqueue("kblockd"); |
3230 | if (!kblockd_workqueue) | 3439 | if (!kblockd_workqueue) |
3231 | panic("Failed to create kblockd\n"); | 3440 | panic("Failed to create kblockd\n"); |
@@ -3239,6 +3448,14 @@ int __init blk_dev_init(void) | |||
3239 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | 3448 | iocontext_cachep = kmem_cache_create("blkdev_ioc", |
3240 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); | 3449 | sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); |
3241 | 3450 | ||
3451 | for (i = 0; i < NR_CPUS; i++) | ||
3452 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | ||
3453 | |||
3454 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | ||
3455 | #ifdef CONFIG_HOTPLUG_CPU | ||
3456 | register_cpu_notifier(&blk_cpu_notifier); | ||
3457 | #endif | ||
3458 | |||
3242 | blk_max_low_pfn = max_low_pfn; | 3459 | blk_max_low_pfn = max_low_pfn; |
3243 | blk_max_pfn = max_pfn; | 3460 | blk_max_pfn = max_pfn; |
3244 | 3461 | ||
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index e54f006e7e60..f370e4a7fe6d 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -7,21 +7,94 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | 9 | ||
10 | static void elevator_noop_add_request(request_queue_t *q, struct request *rq) | 10 | struct noop_data { |
11 | struct list_head queue; | ||
12 | }; | ||
13 | |||
14 | static void noop_merged_requests(request_queue_t *q, struct request *rq, | ||
15 | struct request *next) | ||
16 | { | ||
17 | list_del_init(&next->queuelist); | ||
18 | } | ||
19 | |||
20 | static int noop_dispatch(request_queue_t *q, int force) | ||
21 | { | ||
22 | struct noop_data *nd = q->elevator->elevator_data; | ||
23 | |||
24 | if (!list_empty(&nd->queue)) { | ||
25 | struct request *rq; | ||
26 | rq = list_entry(nd->queue.next, struct request, queuelist); | ||
27 | list_del_init(&rq->queuelist); | ||
28 | elv_dispatch_sort(q, rq); | ||
29 | return 1; | ||
30 | } | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | static void noop_add_request(request_queue_t *q, struct request *rq) | ||
35 | { | ||
36 | struct noop_data *nd = q->elevator->elevator_data; | ||
37 | |||
38 | list_add_tail(&rq->queuelist, &nd->queue); | ||
39 | } | ||
40 | |||
41 | static int noop_queue_empty(request_queue_t *q) | ||
11 | { | 42 | { |
12 | rq->flags |= REQ_NOMERGE; | 43 | struct noop_data *nd = q->elevator->elevator_data; |
13 | elv_dispatch_add_tail(q, rq); | 44 | |
45 | return list_empty(&nd->queue); | ||
46 | } | ||
47 | |||
48 | static struct request * | ||
49 | noop_former_request(request_queue_t *q, struct request *rq) | ||
50 | { | ||
51 | struct noop_data *nd = q->elevator->elevator_data; | ||
52 | |||
53 | if (rq->queuelist.prev == &nd->queue) | ||
54 | return NULL; | ||
55 | return list_entry(rq->queuelist.prev, struct request, queuelist); | ||
56 | } | ||
57 | |||
58 | static struct request * | ||
59 | noop_latter_request(request_queue_t *q, struct request *rq) | ||
60 | { | ||
61 | struct noop_data *nd = q->elevator->elevator_data; | ||
62 | |||
63 | if (rq->queuelist.next == &nd->queue) | ||
64 | return NULL; | ||
65 | return list_entry(rq->queuelist.next, struct request, queuelist); | ||
14 | } | 66 | } |
15 | 67 | ||
16 | static int elevator_noop_dispatch(request_queue_t *q, int force) | 68 | static int noop_init_queue(request_queue_t *q, elevator_t *e) |
17 | { | 69 | { |
70 | struct noop_data *nd; | ||
71 | |||
72 | nd = kmalloc(sizeof(*nd), GFP_KERNEL); | ||
73 | if (!nd) | ||
74 | return -ENOMEM; | ||
75 | INIT_LIST_HEAD(&nd->queue); | ||
76 | e->elevator_data = nd; | ||
18 | return 0; | 77 | return 0; |
19 | } | 78 | } |
20 | 79 | ||
80 | static void noop_exit_queue(elevator_t *e) | ||
81 | { | ||
82 | struct noop_data *nd = e->elevator_data; | ||
83 | |||
84 | BUG_ON(!list_empty(&nd->queue)); | ||
85 | kfree(nd); | ||
86 | } | ||
87 | |||
21 | static struct elevator_type elevator_noop = { | 88 | static struct elevator_type elevator_noop = { |
22 | .ops = { | 89 | .ops = { |
23 | .elevator_dispatch_fn = elevator_noop_dispatch, | 90 | .elevator_merge_req_fn = noop_merged_requests, |
24 | .elevator_add_req_fn = elevator_noop_add_request, | 91 | .elevator_dispatch_fn = noop_dispatch, |
92 | .elevator_add_req_fn = noop_add_request, | ||
93 | .elevator_queue_empty_fn = noop_queue_empty, | ||
94 | .elevator_former_req_fn = noop_former_request, | ||
95 | .elevator_latter_req_fn = noop_latter_request, | ||
96 | .elevator_init_fn = noop_init_queue, | ||
97 | .elevator_exit_fn = noop_exit_queue, | ||
25 | }, | 98 | }, |
26 | .elevator_name = "noop", | 99 | .elevator_name = "noop", |
27 | .elevator_owner = THIS_MODULE, | 100 | .elevator_owner = THIS_MODULE, |
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 382dea7b224c..cc72210687eb 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <linux/capability.h> | ||
24 | #include <linux/completion.h> | 25 | #include <linux/completion.h> |
25 | #include <linux/cdrom.h> | 26 | #include <linux/cdrom.h> |
26 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -46,7 +47,7 @@ EXPORT_SYMBOL(scsi_command_size); | |||
46 | 47 | ||
47 | static int sg_get_version(int __user *p) | 48 | static int sg_get_version(int __user *p) |
48 | { | 49 | { |
49 | static int sg_version_num = 30527; | 50 | static const int sg_version_num = 30527; |
50 | return put_user(sg_version_num, p); | 51 | return put_user(sg_version_num, p); |
51 | } | 52 | } |
52 | 53 | ||
@@ -190,16 +191,21 @@ static int verify_command(struct file *file, unsigned char *cmd) | |||
190 | safe_for_write(GPCMD_SET_STREAMING), | 191 | safe_for_write(GPCMD_SET_STREAMING), |
191 | }; | 192 | }; |
192 | unsigned char type = cmd_type[cmd[0]]; | 193 | unsigned char type = cmd_type[cmd[0]]; |
194 | int has_write_perm = 0; | ||
193 | 195 | ||
194 | /* Anybody who can open the device can do a read-safe command */ | 196 | /* Anybody who can open the device can do a read-safe command */ |
195 | if (type & CMD_READ_SAFE) | 197 | if (type & CMD_READ_SAFE) |
196 | return 0; | 198 | return 0; |
197 | 199 | ||
200 | /* | ||
201 | * file can be NULL from ioctl_by_bdev()... | ||
202 | */ | ||
203 | if (file) | ||
204 | has_write_perm = file->f_mode & FMODE_WRITE; | ||
205 | |||
198 | /* Write-safe commands just require a writable open.. */ | 206 | /* Write-safe commands just require a writable open.. */ |
199 | if (type & CMD_WRITE_SAFE) { | 207 | if ((type & CMD_WRITE_SAFE) && has_write_perm) |
200 | if (file->f_mode & FMODE_WRITE) | 208 | return 0; |
201 | return 0; | ||
202 | } | ||
203 | 209 | ||
204 | /* And root can do any command.. */ | 210 | /* And root can do any command.. */ |
205 | if (capable(CAP_SYS_RAWIO)) | 211 | if (capable(CAP_SYS_RAWIO)) |
@@ -233,7 +239,7 @@ static int sg_io(struct file *file, request_queue_t *q, | |||
233 | if (verify_command(file, cmd)) | 239 | if (verify_command(file, cmd)) |
234 | return -EPERM; | 240 | return -EPERM; |
235 | 241 | ||
236 | if (hdr->dxfer_len > (q->max_sectors << 9)) | 242 | if (hdr->dxfer_len > (q->max_hw_sectors << 9)) |
237 | return -EIO; | 243 | return -EIO; |
238 | 244 | ||
239 | if (hdr->dxfer_len) | 245 | if (hdr->dxfer_len) |
@@ -442,11 +448,37 @@ error: | |||
442 | return err; | 448 | return err; |
443 | } | 449 | } |
444 | 450 | ||
451 | |||
452 | /* Send basic block requests */ | ||
453 | static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int cmd, int data) | ||
454 | { | ||
455 | struct request *rq; | ||
456 | int err; | ||
457 | |||
458 | rq = blk_get_request(q, WRITE, __GFP_WAIT); | ||
459 | rq->flags |= REQ_BLOCK_PC; | ||
460 | rq->data = NULL; | ||
461 | rq->data_len = 0; | ||
462 | rq->timeout = BLK_DEFAULT_TIMEOUT; | ||
463 | memset(rq->cmd, 0, sizeof(rq->cmd)); | ||
464 | rq->cmd[0] = cmd; | ||
465 | rq->cmd[4] = data; | ||
466 | rq->cmd_len = 6; | ||
467 | err = blk_execute_rq(q, bd_disk, rq, 0); | ||
468 | blk_put_request(rq); | ||
469 | |||
470 | return err; | ||
471 | } | ||
472 | |||
473 | static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_disk, int data) | ||
474 | { | ||
475 | return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); | ||
476 | } | ||
477 | |||
445 | int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) | 478 | int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) |
446 | { | 479 | { |
447 | request_queue_t *q; | 480 | request_queue_t *q; |
448 | struct request *rq; | 481 | int err; |
449 | int close = 0, err; | ||
450 | 482 | ||
451 | q = bd_disk->queue; | 483 | q = bd_disk->queue; |
452 | if (!q) | 484 | if (!q) |
@@ -564,19 +596,10 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, | |||
564 | err = sg_scsi_ioctl(file, q, bd_disk, arg); | 596 | err = sg_scsi_ioctl(file, q, bd_disk, arg); |
565 | break; | 597 | break; |
566 | case CDROMCLOSETRAY: | 598 | case CDROMCLOSETRAY: |
567 | close = 1; | 599 | err = blk_send_start_stop(q, bd_disk, 0x03); |
600 | break; | ||
568 | case CDROMEJECT: | 601 | case CDROMEJECT: |
569 | rq = blk_get_request(q, WRITE, __GFP_WAIT); | 602 | err = blk_send_start_stop(q, bd_disk, 0x02); |
570 | rq->flags |= REQ_BLOCK_PC; | ||
571 | rq->data = NULL; | ||
572 | rq->data_len = 0; | ||
573 | rq->timeout = BLK_DEFAULT_TIMEOUT; | ||
574 | memset(rq->cmd, 0, sizeof(rq->cmd)); | ||
575 | rq->cmd[0] = GPCMD_START_STOP_UNIT; | ||
576 | rq->cmd[4] = 0x02 + (close != 0); | ||
577 | rq->cmd_len = 6; | ||
578 | err = blk_execute_rq(q, bd_disk, rq, 0); | ||
579 | blk_put_request(rq); | ||
580 | break; | 603 | break; |
581 | default: | 604 | default: |
582 | err = -ENOTTY; | 605 | err = -ENOTTY; |